EDAC, altera: Fix S10 Double Bit Error Notification
Stratix10 Double Bit Error Address was always read from SDRAM Address
register instead of each device's Address register.
To determine which device had the DBE, cycle through the EDAC devices
comparing the DBE value to the db_irq value. Once found, report the DBE
Address from the device registers as well as the device name.
Finally, notify the system via an SMC call and indicate the panic should
result in a system reboot. Change a run-time check to a Stratix10
compile-time check for a clean SMC notification.
Fixes: d5fc9125566c ("EDAC, altera: Combine Stratix10 and Arria10 probe functions")
Signed-off-by: Thor Thayer <thor.thayer@linux.intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: James Morse <james.morse@arm.com>
Cc: Mauro Carvalho Chehab <mchehab@kernel.org>
Cc: linux-edac <linux-edac@vger.kernel.org>
Link: https://lkml.kernel.org/r/1552490842-25440-1-git-send-email-thor.thayer@linux.intel.com
diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c
index 1bcf9ae..5ff2638 100644
--- a/drivers/edac/altera_edac.c
+++ b/drivers/edac/altera_edac.c
@@ -1930,6 +1930,15 @@ static int altr_edac_a10_device_add(struct altr_arria10_edac *edac,
goto err_release_group1;
}
+#ifdef CONFIG_ARCH_STRATIX10
+ /* Use IRQ to determine SError origin instead of assigning IRQ */
+ rc = of_property_read_u32_index(np, "interrupts", 0, &altdev->db_irq);
+ if (rc) {
+ edac_printk(KERN_ERR, EDAC_DEVICE,
+ "Unable to parse DB IRQ index\n");
+ goto err_release_group1;
+ }
+#else
altdev->db_irq = irq_of_parse_and_map(np, 1);
if (!altdev->db_irq) {
edac_printk(KERN_ERR, EDAC_DEVICE, "Error allocating DBIRQ\n");
@@ -1943,6 +1952,7 @@ static int altr_edac_a10_device_add(struct altr_arria10_edac *edac,
edac_printk(KERN_ERR, EDAC_DEVICE, "No DBERR IRQ resource\n");
goto err_release_group1;
}
+#endif
rc = edac_device_add_device(dci);
if (rc) {
@@ -2005,6 +2015,10 @@ static const struct irq_domain_ops a10_eccmgr_ic_ops = {
/************** Stratix 10 EDAC Double Bit Error Handler ************/
#define to_a10edac(p, m) container_of(p, struct altr_arria10_edac, m)
+#ifdef CONFIG_ARCH_STRATIX10
+/* panic routine issues reboot on non-zero panic_timeout */
+extern int panic_timeout;
+
/*
* The double bit error is handled through SError which is fatal. This is
* called as a panic notifier to printout ECC error info as part of the panic.
@@ -2018,17 +2032,37 @@ static int s10_edac_dberr_handler(struct notifier_block *this,
regmap_read(edac->ecc_mgr_map, S10_SYSMGR_ECC_INTSTAT_DERR_OFST,
&dberror);
regmap_write(edac->ecc_mgr_map, S10_SYSMGR_UE_VAL_OFST, dberror);
- if (dberror & S10_DDR0_IRQ_MASK) {
- regmap_read(edac->ecc_mgr_map, A10_DERRADDR_OFST, &err_addr);
- regmap_write(edac->ecc_mgr_map, S10_SYSMGR_UE_ADDR_OFST,
- err_addr);
- edac_printk(KERN_ERR, EDAC_MC,
- "EDAC: [Uncorrectable errors @ 0x%08X]\n\n",
- err_addr);
+ if (dberror & S10_DBE_IRQ_MASK) {
+ struct list_head *position;
+ struct altr_edac_device_dev *ed;
+ struct arm_smccc_res result;
+
+ /* Find the matching DBE in the list of devices */
+ list_for_each(position, &edac->a10_ecc_devices) {
+ ed = list_entry(position, struct altr_edac_device_dev,
+ next);
+ if (!(BIT(ed->db_irq) & dberror))
+ continue;
+
+ writel(ALTR_A10_ECC_DERRPENA,
+ ed->base + ALTR_A10_ECC_INTSTAT_OFST);
+ err_addr = readl(ed->base + ALTR_S10_DERR_ADDRA_OFST);
+ regmap_write(edac->ecc_mgr_map,
+ S10_SYSMGR_UE_ADDR_OFST, err_addr);
+ edac_printk(KERN_ERR, EDAC_DEVICE,
+ "EDAC: [Fatal DBE on %s @ 0x%08X]\n",
+ ed->edac_dev_name, err_addr);
+ break;
+ }
+ /* Notify the System through SMC. Reboot delay = 1 second */
+ panic_timeout = 1;
+ arm_smccc_smc(INTEL_SIP_SMC_ECC_DBE, dberror, 0, 0, 0, 0,
+ 0, 0, &result);
}
return NOTIFY_DONE;
}
+#endif
/****************** Arria 10 EDAC Probe Function *********************/
static int altr_edac_a10_probe(struct platform_device *pdev)
@@ -2098,16 +2132,8 @@ static int altr_edac_a10_probe(struct platform_device *pdev)
altr_edac_a10_irq_handler,
edac);
- if (socfpga_is_a10()) {
- edac->db_irq = platform_get_irq(pdev, 1);
- if (edac->db_irq < 0) {
- dev_err(&pdev->dev, "No DBERR IRQ resource\n");
- return edac->db_irq;
- }
- irq_set_chained_handler_and_data(edac->db_irq,
- altr_edac_a10_irq_handler,
- edac);
- } else {
+#ifdef CONFIG_ARCH_STRATIX10
+ {
int dberror, err_addr;
edac->panic_notifier.notifier_call = s10_edac_dberr_handler;
@@ -2130,6 +2156,15 @@ static int altr_edac_a10_probe(struct platform_device *pdev)
S10_SYSMGR_UE_ADDR_OFST, 0);
}
}
+#else
+ edac->db_irq = platform_get_irq(pdev, 1);
+ if (edac->db_irq < 0) {
+ dev_err(&pdev->dev, "No DBERR IRQ resource\n");
+ return edac->db_irq;
+ }
+ irq_set_chained_handler_and_data(edac->db_irq,
+ altr_edac_a10_irq_handler, edac);
+#endif
for_each_child_of_node(pdev->dev.of_node, child) {
if (!of_device_is_available(child))