| // SPDX-License-Identifier: GPL-2.0-only |
| /* |
| * Ampere Computing SoC's SMpro Error Monitoring Driver |
| * |
| * Copyright (c) 2022, Ampere Computing LLC |
| * |
| */ |
| |
| #include <linux/i2c.h> |
| #include <linux/mod_devicetable.h> |
| #include <linux/module.h> |
| #include <linux/platform_device.h> |
| #include <linux/regmap.h> |
| |
| /* GPI RAS Error Registers */ |
| #define GPI_RAS_ERR 0x7E |
| |
| /* Core and L2C Error Registers */ |
| #define CORE_CE_ERR_CNT 0x80 |
| #define CORE_CE_ERR_LEN 0x81 |
| #define CORE_CE_ERR_DATA 0x82 |
| #define CORE_UE_ERR_CNT 0x83 |
| #define CORE_UE_ERR_LEN 0x84 |
| #define CORE_UE_ERR_DATA 0x85 |
| |
| /* Memory Error Registers */ |
| #define MEM_CE_ERR_CNT 0x90 |
| #define MEM_CE_ERR_LEN 0x91 |
| #define MEM_CE_ERR_DATA 0x92 |
| #define MEM_UE_ERR_CNT 0x93 |
| #define MEM_UE_ERR_LEN 0x94 |
| #define MEM_UE_ERR_DATA 0x95 |
| |
| /* RAS Error/Warning Registers */ |
| #define ERR_SMPRO_TYPE 0xA0 |
| #define ERR_PMPRO_TYPE 0xA1 |
| #define ERR_SMPRO_INFO_LO 0xA2 |
| #define ERR_SMPRO_INFO_HI 0xA3 |
| #define ERR_SMPRO_DATA_LO 0xA4 |
| #define ERR_SMPRO_DATA_HI 0xA5 |
| #define WARN_SMPRO_INFO_LO 0xAA |
| #define WARN_SMPRO_INFO_HI 0xAB |
| #define ERR_PMPRO_INFO_LO 0xA6 |
| #define ERR_PMPRO_INFO_HI 0xA7 |
| #define ERR_PMPRO_DATA_LO 0xA8 |
| #define ERR_PMPRO_DATA_HI 0xA9 |
| #define WARN_PMPRO_INFO_LO 0xAC |
| #define WARN_PMPRO_INFO_HI 0xAD |
| |
| /* PCIE Error Registers */ |
| #define PCIE_CE_ERR_CNT 0xC0 |
| #define PCIE_CE_ERR_LEN 0xC1 |
| #define PCIE_CE_ERR_DATA 0xC2 |
| #define PCIE_UE_ERR_CNT 0xC3 |
| #define PCIE_UE_ERR_LEN 0xC4 |
| #define PCIE_UE_ERR_DATA 0xC5 |
| |
| /* Other Error Registers */ |
| #define OTHER_CE_ERR_CNT 0xD0 |
| #define OTHER_CE_ERR_LEN 0xD1 |
| #define OTHER_CE_ERR_DATA 0xD2 |
| #define OTHER_UE_ERR_CNT 0xD8 |
| #define OTHER_UE_ERR_LEN 0xD9 |
| #define OTHER_UE_ERR_DATA 0xDA |
| |
| /* Event Data Registers */ |
| #define VRD_WARN_FAULT_EVENT_DATA 0x78 |
| #define VRD_HOT_EVENT_DATA 0x79 |
| #define DIMM_HOT_EVENT_DATA 0x7A |
| |
| #define MAX_READ_BLOCK_LENGTH 48 |
| |
| #define RAS_SMPRO_ERR 0 |
| #define RAS_PMPRO_ERR 1 |
| |
| enum RAS_48BYTES_ERR_TYPES { |
| CORE_CE_ERR, |
| CORE_UE_ERR, |
| MEM_CE_ERR, |
| MEM_UE_ERR, |
| PCIE_CE_ERR, |
| PCIE_UE_ERR, |
| OTHER_CE_ERR, |
| OTHER_UE_ERR, |
| NUM_48BYTES_ERR_TYPE, |
| }; |
| |
| struct smpro_error_hdr { |
| u8 count; /* Number of the RAS errors */ |
| u8 len; /* Number of data bytes */ |
| u8 data; /* Start of 48-byte data */ |
| u8 max_cnt; /* Max num of errors */ |
| }; |
| |
| /* |
| * Included Address of registers to get Count, Length of data and Data |
| * of the 48 bytes error data |
| */ |
| static struct smpro_error_hdr smpro_error_table[] = { |
| [CORE_CE_ERR] = { |
| .count = CORE_CE_ERR_CNT, |
| .len = CORE_CE_ERR_LEN, |
| .data = CORE_CE_ERR_DATA, |
| .max_cnt = 32 |
| }, |
| [CORE_UE_ERR] = { |
| .count = CORE_UE_ERR_CNT, |
| .len = CORE_UE_ERR_LEN, |
| .data = CORE_UE_ERR_DATA, |
| .max_cnt = 32 |
| }, |
| [MEM_CE_ERR] = { |
| .count = MEM_CE_ERR_CNT, |
| .len = MEM_CE_ERR_LEN, |
| .data = MEM_CE_ERR_DATA, |
| .max_cnt = 16 |
| }, |
| [MEM_UE_ERR] = { |
| .count = MEM_UE_ERR_CNT, |
| .len = MEM_UE_ERR_LEN, |
| .data = MEM_UE_ERR_DATA, |
| .max_cnt = 16 |
| }, |
| [PCIE_CE_ERR] = { |
| .count = PCIE_CE_ERR_CNT, |
| .len = PCIE_CE_ERR_LEN, |
| .data = PCIE_CE_ERR_DATA, |
| .max_cnt = 96 |
| }, |
| [PCIE_UE_ERR] = { |
| .count = PCIE_UE_ERR_CNT, |
| .len = PCIE_UE_ERR_LEN, |
| .data = PCIE_UE_ERR_DATA, |
| .max_cnt = 96 |
| }, |
| [OTHER_CE_ERR] = { |
| .count = OTHER_CE_ERR_CNT, |
| .len = OTHER_CE_ERR_LEN, |
| .data = OTHER_CE_ERR_DATA, |
| .max_cnt = 8 |
| }, |
| [OTHER_UE_ERR] = { |
| .count = OTHER_UE_ERR_CNT, |
| .len = OTHER_UE_ERR_LEN, |
| .data = OTHER_UE_ERR_DATA, |
| .max_cnt = 8 |
| }, |
| }; |
| |
| /* |
| * List of SCP registers which are used to get |
| * one type of RAS Internal errors. |
| */ |
| struct smpro_int_error_hdr { |
| u8 type; |
| u8 info_l; |
| u8 info_h; |
| u8 data_l; |
| u8 data_h; |
| u8 warn_l; |
| u8 warn_h; |
| }; |
| |
| static struct smpro_int_error_hdr list_smpro_int_error_hdr[] = { |
| [RAS_SMPRO_ERR] = { |
| .type = ERR_SMPRO_TYPE, |
| .info_l = ERR_SMPRO_INFO_LO, |
| .info_h = ERR_SMPRO_INFO_HI, |
| .data_l = ERR_SMPRO_DATA_LO, |
| .data_h = ERR_SMPRO_DATA_HI, |
| .warn_l = WARN_SMPRO_INFO_LO, |
| .warn_h = WARN_SMPRO_INFO_HI, |
| }, |
| [RAS_PMPRO_ERR] = { |
| .type = ERR_PMPRO_TYPE, |
| .info_l = ERR_PMPRO_INFO_LO, |
| .info_h = ERR_PMPRO_INFO_HI, |
| .data_l = ERR_PMPRO_DATA_LO, |
| .data_h = ERR_PMPRO_DATA_HI, |
| .warn_l = WARN_PMPRO_INFO_LO, |
| .warn_h = WARN_PMPRO_INFO_HI, |
| }, |
| }; |
| |
| struct smpro_errmon { |
| struct regmap *regmap; |
| }; |
| |
| enum EVENT_TYPES { |
| VRD_WARN_FAULT_EVENT, |
| VRD_HOT_EVENT, |
| DIMM_HOT_EVENT, |
| NUM_EVENTS_TYPE, |
| }; |
| |
| /* Included Address of event source and data registers */ |
| static u8 smpro_event_table[NUM_EVENTS_TYPE] = { |
| VRD_WARN_FAULT_EVENT_DATA, |
| VRD_HOT_EVENT_DATA, |
| DIMM_HOT_EVENT_DATA, |
| }; |
| |
| static ssize_t smpro_event_data_read(struct device *dev, |
| struct device_attribute *da, char *buf, |
| int channel) |
| { |
| struct smpro_errmon *errmon = dev_get_drvdata(dev); |
| s32 event_data; |
| int ret; |
| |
| ret = regmap_read(errmon->regmap, smpro_event_table[channel], &event_data); |
| if (ret) |
| return ret; |
| /* Clear event after read */ |
| if (event_data != 0) |
| regmap_write(errmon->regmap, smpro_event_table[channel], event_data); |
| |
| return sysfs_emit(buf, "%04x\n", event_data); |
| } |
| |
| static ssize_t smpro_overflow_data_read(struct device *dev, struct device_attribute *da, |
| char *buf, int channel) |
| { |
| struct smpro_errmon *errmon = dev_get_drvdata(dev); |
| struct smpro_error_hdr *err_info; |
| s32 err_count; |
| int ret; |
| |
| err_info = &smpro_error_table[channel]; |
| |
| ret = regmap_read(errmon->regmap, err_info->count, &err_count); |
| if (ret) |
| return ret; |
| |
| /* Bit 8 indicates the overflow status */ |
| return sysfs_emit(buf, "%d\n", (err_count & BIT(8)) ? 1 : 0); |
| } |
| |
| static ssize_t smpro_error_data_read(struct device *dev, struct device_attribute *da, |
| char *buf, int channel) |
| { |
| struct smpro_errmon *errmon = dev_get_drvdata(dev); |
| unsigned char err_data[MAX_READ_BLOCK_LENGTH]; |
| struct smpro_error_hdr *err_info; |
| s32 err_count, err_length; |
| int ret; |
| |
| err_info = &smpro_error_table[channel]; |
| |
| ret = regmap_read(errmon->regmap, err_info->count, &err_count); |
| /* Error count is the low byte */ |
| err_count &= 0xff; |
| if (ret || !err_count || err_count > err_info->max_cnt) |
| return ret; |
| |
| ret = regmap_read(errmon->regmap, err_info->len, &err_length); |
| if (ret || err_length <= 0) |
| return ret; |
| |
| if (err_length > MAX_READ_BLOCK_LENGTH) |
| err_length = MAX_READ_BLOCK_LENGTH; |
| |
| memset(err_data, 0x00, MAX_READ_BLOCK_LENGTH); |
| ret = regmap_noinc_read(errmon->regmap, err_info->data, err_data, err_length); |
| if (ret < 0) |
| return ret; |
| |
| /* clear the error */ |
| ret = regmap_write(errmon->regmap, err_info->count, 0x100); |
| if (ret) |
| return ret; |
| /* |
| * The output of Core/Memory/PCIe/Others UE/CE errors follows the format |
| * specified in section 5.8.1 CE/UE Error Data record in |
| * Altra SOC BMC Interface specification. |
| */ |
| return sysfs_emit(buf, "%*phN\n", MAX_READ_BLOCK_LENGTH, err_data); |
| } |
| |
| /* |
| * Output format: |
| * <4-byte hex value of error info><4-byte hex value of error extensive data> |
| * Where: |
| * + error info : The error information |
| * + error data : Extensive data (32 bits) |
| * Reference to section 5.10 RAS Internal Error Register Definition in |
| * Altra SOC BMC Interface specification |
| */ |
| static ssize_t smpro_internal_err_read(struct device *dev, struct device_attribute *da, |
| char *buf, int channel) |
| { |
| struct smpro_errmon *errmon = dev_get_drvdata(dev); |
| struct smpro_int_error_hdr *err_info; |
| unsigned int err[4] = { 0 }; |
| unsigned int err_type; |
| unsigned int val; |
| int ret; |
| |
| /* read error status */ |
| ret = regmap_read(errmon->regmap, GPI_RAS_ERR, &val); |
| if (ret) |
| return ret; |
| |
| if ((channel == RAS_SMPRO_ERR && !(val & BIT(0))) || |
| (channel == RAS_PMPRO_ERR && !(val & BIT(1)))) |
| return 0; |
| |
| err_info = &list_smpro_int_error_hdr[channel]; |
| ret = regmap_read(errmon->regmap, err_info->type, &val); |
| if (ret) |
| return ret; |
| |
| err_type = (val & BIT(1)) ? BIT(1) : |
| (val & BIT(2)) ? BIT(2) : 0; |
| |
| if (!err_type) |
| return 0; |
| |
| ret = regmap_read(errmon->regmap, err_info->info_l, err + 1); |
| if (ret) |
| return ret; |
| |
| ret = regmap_read(errmon->regmap, err_info->info_h, err); |
| if (ret) |
| return ret; |
| |
| if (err_type & BIT(2)) { |
| /* Error with data type */ |
| ret = regmap_read(errmon->regmap, err_info->data_l, err + 3); |
| if (ret) |
| return ret; |
| |
| ret = regmap_read(errmon->regmap, err_info->data_h, err + 2); |
| if (ret) |
| return ret; |
| } |
| |
| /* clear the read errors */ |
| ret = regmap_write(errmon->regmap, err_info->type, err_type); |
| if (ret) |
| return ret; |
| |
| return sysfs_emit(buf, "%*phN\n", (int)sizeof(err), err); |
| } |
| |
| /* |
| * Output format: |
| * <4-byte hex value of warining info> |
| * Reference to section 5.10 RAS Internal Error Register Definition in |
| * Altra SOC BMC Interface specification |
| */ |
| static ssize_t smpro_internal_warn_read(struct device *dev, struct device_attribute *da, |
| char *buf, int channel) |
| { |
| struct smpro_errmon *errmon = dev_get_drvdata(dev); |
| struct smpro_int_error_hdr *err_info; |
| unsigned int warn[2] = { 0 }; |
| unsigned int val; |
| int ret; |
| |
| /* read error status */ |
| ret = regmap_read(errmon->regmap, GPI_RAS_ERR, &val); |
| if (ret) |
| return ret; |
| |
| if ((channel == RAS_SMPRO_ERR && !(val & BIT(0))) || |
| (channel == RAS_PMPRO_ERR && !(val & BIT(1)))) |
| return 0; |
| |
| err_info = &list_smpro_int_error_hdr[channel]; |
| ret = regmap_read(errmon->regmap, err_info->type, &val); |
| if (ret) |
| return ret; |
| |
| if (!(val & BIT(0))) |
| return 0; |
| |
| ret = regmap_read(errmon->regmap, err_info->warn_l, warn + 1); |
| if (ret) |
| return ret; |
| |
| ret = regmap_read(errmon->regmap, err_info->warn_h, warn); |
| if (ret) |
| return ret; |
| |
| /* clear the warning */ |
| ret = regmap_write(errmon->regmap, err_info->type, BIT(0)); |
| if (ret) |
| return ret; |
| |
| return sysfs_emit(buf, "%*phN\n", (int)sizeof(warn), warn); |
| } |
| |
| #define ERROR_OVERFLOW_RO(_error, _index) \ |
| static ssize_t overflow_##_error##_show(struct device *dev, \ |
| struct device_attribute *da, \ |
| char *buf) \ |
| { \ |
| return smpro_overflow_data_read(dev, da, buf, _index); \ |
| } \ |
| static DEVICE_ATTR_RO(overflow_##_error) |
| |
| ERROR_OVERFLOW_RO(core_ce, CORE_CE_ERR); |
| ERROR_OVERFLOW_RO(core_ue, CORE_UE_ERR); |
| ERROR_OVERFLOW_RO(mem_ce, MEM_CE_ERR); |
| ERROR_OVERFLOW_RO(mem_ue, MEM_UE_ERR); |
| ERROR_OVERFLOW_RO(pcie_ce, PCIE_CE_ERR); |
| ERROR_OVERFLOW_RO(pcie_ue, PCIE_UE_ERR); |
| ERROR_OVERFLOW_RO(other_ce, OTHER_CE_ERR); |
| ERROR_OVERFLOW_RO(other_ue, OTHER_UE_ERR); |
| |
| #define ERROR_RO(_error, _index) \ |
| static ssize_t error_##_error##_show(struct device *dev, \ |
| struct device_attribute *da, \ |
| char *buf) \ |
| { \ |
| return smpro_error_data_read(dev, da, buf, _index); \ |
| } \ |
| static DEVICE_ATTR_RO(error_##_error) |
| |
| ERROR_RO(core_ce, CORE_CE_ERR); |
| ERROR_RO(core_ue, CORE_UE_ERR); |
| ERROR_RO(mem_ce, MEM_CE_ERR); |
| ERROR_RO(mem_ue, MEM_UE_ERR); |
| ERROR_RO(pcie_ce, PCIE_CE_ERR); |
| ERROR_RO(pcie_ue, PCIE_UE_ERR); |
| ERROR_RO(other_ce, OTHER_CE_ERR); |
| ERROR_RO(other_ue, OTHER_UE_ERR); |
| |
| static ssize_t error_smpro_show(struct device *dev, struct device_attribute *da, char *buf) |
| { |
| return smpro_internal_err_read(dev, da, buf, RAS_SMPRO_ERR); |
| } |
| static DEVICE_ATTR_RO(error_smpro); |
| |
| static ssize_t error_pmpro_show(struct device *dev, struct device_attribute *da, char *buf) |
| { |
| return smpro_internal_err_read(dev, da, buf, RAS_PMPRO_ERR); |
| } |
| static DEVICE_ATTR_RO(error_pmpro); |
| |
| static ssize_t warn_smpro_show(struct device *dev, struct device_attribute *da, char *buf) |
| { |
| return smpro_internal_warn_read(dev, da, buf, RAS_SMPRO_ERR); |
| } |
| static DEVICE_ATTR_RO(warn_smpro); |
| |
| static ssize_t warn_pmpro_show(struct device *dev, struct device_attribute *da, char *buf) |
| { |
| return smpro_internal_warn_read(dev, da, buf, RAS_PMPRO_ERR); |
| } |
| static DEVICE_ATTR_RO(warn_pmpro); |
| |
| #define EVENT_RO(_event, _index) \ |
| static ssize_t event_##_event##_show(struct device *dev, \ |
| struct device_attribute *da, \ |
| char *buf) \ |
| { \ |
| return smpro_event_data_read(dev, da, buf, _index); \ |
| } \ |
| static DEVICE_ATTR_RO(event_##_event) |
| |
| EVENT_RO(vrd_warn_fault, VRD_WARN_FAULT_EVENT); |
| EVENT_RO(vrd_hot, VRD_HOT_EVENT); |
| EVENT_RO(dimm_hot, DIMM_HOT_EVENT); |
| |
| static struct attribute *smpro_errmon_attrs[] = { |
| &dev_attr_overflow_core_ce.attr, |
| &dev_attr_overflow_core_ue.attr, |
| &dev_attr_overflow_mem_ce.attr, |
| &dev_attr_overflow_mem_ue.attr, |
| &dev_attr_overflow_pcie_ce.attr, |
| &dev_attr_overflow_pcie_ue.attr, |
| &dev_attr_overflow_other_ce.attr, |
| &dev_attr_overflow_other_ue.attr, |
| &dev_attr_error_core_ce.attr, |
| &dev_attr_error_core_ue.attr, |
| &dev_attr_error_mem_ce.attr, |
| &dev_attr_error_mem_ue.attr, |
| &dev_attr_error_pcie_ce.attr, |
| &dev_attr_error_pcie_ue.attr, |
| &dev_attr_error_other_ce.attr, |
| &dev_attr_error_other_ue.attr, |
| &dev_attr_error_smpro.attr, |
| &dev_attr_error_pmpro.attr, |
| &dev_attr_warn_smpro.attr, |
| &dev_attr_warn_pmpro.attr, |
| &dev_attr_event_vrd_warn_fault.attr, |
| &dev_attr_event_vrd_hot.attr, |
| &dev_attr_event_dimm_hot.attr, |
| NULL |
| }; |
| |
| ATTRIBUTE_GROUPS(smpro_errmon); |
| |
| static int smpro_errmon_probe(struct platform_device *pdev) |
| { |
| struct smpro_errmon *errmon; |
| |
| errmon = devm_kzalloc(&pdev->dev, sizeof(struct smpro_errmon), GFP_KERNEL); |
| if (!errmon) |
| return -ENOMEM; |
| |
| platform_set_drvdata(pdev, errmon); |
| |
| errmon->regmap = dev_get_regmap(pdev->dev.parent, NULL); |
| if (!errmon->regmap) |
| return -ENODEV; |
| |
| return 0; |
| } |
| |
| static struct platform_driver smpro_errmon_driver = { |
| .probe = smpro_errmon_probe, |
| .driver = { |
| .name = "smpro-errmon", |
| .dev_groups = smpro_errmon_groups, |
| }, |
| }; |
| |
| module_platform_driver(smpro_errmon_driver); |
| |
| MODULE_AUTHOR("Tung Nguyen <tung.nguyen@amperecomputing.com>"); |
| MODULE_AUTHOR("Thinh Pham <thinh.pham@amperecomputing.com>"); |
| MODULE_AUTHOR("Hoang Nguyen <hnguyen@amperecomputing.com>"); |
| MODULE_AUTHOR("Thu Nguyen <thu@os.amperecomputing.com>"); |
| MODULE_AUTHOR("Quan Nguyen <quan@os.amperecomputing.com>"); |
| MODULE_DESCRIPTION("Ampere Altra SMpro driver"); |
| MODULE_LICENSE("GPL"); |