| // SPDX-License-Identifier: GPL-2.0-or-later |
| /* |
| * AMD Address Translation Library |
| * |
| * umc.c : Unified Memory Controller (UMC) topology helpers |
| * |
| * Copyright (c) 2023, Advanced Micro Devices, Inc. |
| * All Rights Reserved. |
| * |
| * Author: Yazen Ghannam <Yazen.Ghannam@amd.com> |
| */ |
| |
| #include "internal.h" |
| |
| /* |
| * MI300 has a fixed, model-specific mapping between a UMC instance and |
| * its related Data Fabric Coherent Station instance. |
| * |
| * The MCA_IPID_UMC[InstanceId] field holds a unique identifier for the |
| * UMC instance within a Node. Use this to find the appropriate Coherent |
| * Station ID. |
| * |
| * Redundant bits were removed from the map below. |
| */ |
| static const u16 umc_coh_st_map[32] = { |
| 0x393, 0x293, 0x193, 0x093, |
| 0x392, 0x292, 0x192, 0x092, |
| 0x391, 0x291, 0x191, 0x091, |
| 0x390, 0x290, 0x190, 0x090, |
| 0x793, 0x693, 0x593, 0x493, |
| 0x792, 0x692, 0x592, 0x492, |
| 0x791, 0x691, 0x591, 0x491, |
| 0x790, 0x690, 0x590, 0x490, |
| }; |
| |
| #define UMC_ID_MI300 GENMASK(23, 12) |
| static u8 get_coh_st_inst_id_mi300(struct atl_err *err) |
| { |
| u16 umc_id = FIELD_GET(UMC_ID_MI300, err->ipid); |
| u8 i; |
| |
| for (i = 0; i < ARRAY_SIZE(umc_coh_st_map); i++) { |
| if (umc_id == umc_coh_st_map[i]) |
| break; |
| } |
| |
| WARN_ON_ONCE(i >= ARRAY_SIZE(umc_coh_st_map)); |
| |
| return i; |
| } |
| |
| /* XOR the bits in @val. */ |
| static u16 bitwise_xor_bits(u16 val) |
| { |
| u16 tmp = 0; |
| u8 i; |
| |
| for (i = 0; i < 16; i++) |
| tmp ^= (val >> i) & 0x1; |
| |
| return tmp; |
| } |
| |
| struct xor_bits { |
| bool xor_enable; |
| u16 col_xor; |
| u32 row_xor; |
| }; |
| |
| #define NUM_BANK_BITS 4 |
| |
| static struct { |
| /* UMC::CH::AddrHashBank */ |
| struct xor_bits bank[NUM_BANK_BITS]; |
| |
| /* UMC::CH::AddrHashPC */ |
| struct xor_bits pc; |
| |
| /* UMC::CH::AddrHashPC2 */ |
| u8 bank_xor; |
| } addr_hash; |
| |
| #define MI300_UMC_CH_BASE 0x90000 |
| #define MI300_ADDR_HASH_BANK0 (MI300_UMC_CH_BASE + 0xC8) |
| #define MI300_ADDR_HASH_PC (MI300_UMC_CH_BASE + 0xE0) |
| #define MI300_ADDR_HASH_PC2 (MI300_UMC_CH_BASE + 0xE4) |
| |
| #define ADDR_HASH_XOR_EN BIT(0) |
| #define ADDR_HASH_COL_XOR GENMASK(13, 1) |
| #define ADDR_HASH_ROW_XOR GENMASK(31, 14) |
| #define ADDR_HASH_BANK_XOR GENMASK(5, 0) |
| |
| /* |
| * Read UMC::CH::AddrHash{Bank,PC,PC2} registers to get XOR bits used |
| * for hashing. Do this during module init, since the values will not |
| * change during run time. |
| * |
| * These registers are instantiated for each UMC across each AMD Node. |
| * However, they should be identically programmed due to the fixed hardware |
| * design of MI300 systems. So read the values from Node 0 UMC 0 and keep a |
| * single global structure for simplicity. |
| */ |
| int get_addr_hash_mi300(void) |
| { |
| u32 temp; |
| int ret; |
| u8 i; |
| |
| for (i = 0; i < NUM_BANK_BITS; i++) { |
| ret = amd_smn_read(0, MI300_ADDR_HASH_BANK0 + (i * 4), &temp); |
| if (ret) |
| return ret; |
| |
| addr_hash.bank[i].xor_enable = FIELD_GET(ADDR_HASH_XOR_EN, temp); |
| addr_hash.bank[i].col_xor = FIELD_GET(ADDR_HASH_COL_XOR, temp); |
| addr_hash.bank[i].row_xor = FIELD_GET(ADDR_HASH_ROW_XOR, temp); |
| } |
| |
| ret = amd_smn_read(0, MI300_ADDR_HASH_PC, &temp); |
| if (ret) |
| return ret; |
| |
| addr_hash.pc.xor_enable = FIELD_GET(ADDR_HASH_XOR_EN, temp); |
| addr_hash.pc.col_xor = FIELD_GET(ADDR_HASH_COL_XOR, temp); |
| addr_hash.pc.row_xor = FIELD_GET(ADDR_HASH_ROW_XOR, temp); |
| |
| ret = amd_smn_read(0, MI300_ADDR_HASH_PC2, &temp); |
| if (ret) |
| return ret; |
| |
| addr_hash.bank_xor = FIELD_GET(ADDR_HASH_BANK_XOR, temp); |
| |
| return 0; |
| } |
| |
| /* |
| * MI300 systems report a DRAM address in MCA_ADDR for DRAM ECC errors. This must |
| * be converted to the intermediate normalized address (NA) before translating to a |
| * system physical address. |
| * |
| * The DRAM address includes bank, row, and column. Also included are bits for |
| * pseudochannel (PC) and stack ID (SID). |
| * |
| * Abbreviations: (S)tack ID, (P)seudochannel, (R)ow, (B)ank, (C)olumn, (Z)ero |
| * |
| * The MCA address format is as follows: |
| * MCA_ADDR[27:0] = {S[1:0], P[0], R[14:0], B[3:0], C[4:0], Z[0]} |
| * |
| * The normalized address format is fixed in hardware and is as follows: |
| * NA[30:0] = {S[1:0], R[13:0], C4, B[1:0], B[3:2], C[3:2], P, C[1:0], Z[4:0]} |
| * |
| * Additionally, the PC and Bank bits may be hashed. This must be accounted for before |
| * reconstructing the normalized address. |
| */ |
| #define MI300_UMC_MCA_COL GENMASK(5, 1) |
| #define MI300_UMC_MCA_BANK GENMASK(9, 6) |
| #define MI300_UMC_MCA_ROW GENMASK(24, 10) |
| #define MI300_UMC_MCA_PC BIT(25) |
| #define MI300_UMC_MCA_SID GENMASK(27, 26) |
| |
| #define MI300_NA_COL_1_0 GENMASK(6, 5) |
| #define MI300_NA_PC BIT(7) |
| #define MI300_NA_COL_3_2 GENMASK(9, 8) |
| #define MI300_NA_BANK_3_2 GENMASK(11, 10) |
| #define MI300_NA_BANK_1_0 GENMASK(13, 12) |
| #define MI300_NA_COL_4 BIT(14) |
| #define MI300_NA_ROW GENMASK(28, 15) |
| #define MI300_NA_SID GENMASK(30, 29) |
| |
| static unsigned long convert_dram_to_norm_addr_mi300(unsigned long addr) |
| { |
| u16 i, col, row, bank, pc, sid, temp; |
| |
| col = FIELD_GET(MI300_UMC_MCA_COL, addr); |
| bank = FIELD_GET(MI300_UMC_MCA_BANK, addr); |
| row = FIELD_GET(MI300_UMC_MCA_ROW, addr); |
| pc = FIELD_GET(MI300_UMC_MCA_PC, addr); |
| sid = FIELD_GET(MI300_UMC_MCA_SID, addr); |
| |
| /* Calculate hash for each Bank bit. */ |
| for (i = 0; i < NUM_BANK_BITS; i++) { |
| if (!addr_hash.bank[i].xor_enable) |
| continue; |
| |
| temp = bitwise_xor_bits(col & addr_hash.bank[i].col_xor); |
| temp ^= bitwise_xor_bits(row & addr_hash.bank[i].row_xor); |
| bank ^= temp << i; |
| } |
| |
| /* Calculate hash for PC bit. */ |
| if (addr_hash.pc.xor_enable) { |
| /* Bits SID[1:0] act as Bank[6:5] for PC hash, so apply them here. */ |
| bank |= sid << 5; |
| |
| temp = bitwise_xor_bits(col & addr_hash.pc.col_xor); |
| temp ^= bitwise_xor_bits(row & addr_hash.pc.row_xor); |
| temp ^= bitwise_xor_bits(bank & addr_hash.bank_xor); |
| pc ^= temp; |
| |
| /* Drop SID bits for the sake of debug printing later. */ |
| bank &= 0x1F; |
| } |
| |
| /* Reconstruct the normalized address starting with NA[4:0] = 0 */ |
| addr = 0; |
| |
| /* NA[6:5] = Column[1:0] */ |
| temp = col & 0x3; |
| addr |= FIELD_PREP(MI300_NA_COL_1_0, temp); |
| |
| /* NA[7] = PC */ |
| addr |= FIELD_PREP(MI300_NA_PC, pc); |
| |
| /* NA[9:8] = Column[3:2] */ |
| temp = (col >> 2) & 0x3; |
| addr |= FIELD_PREP(MI300_NA_COL_3_2, temp); |
| |
| /* NA[11:10] = Bank[3:2] */ |
| temp = (bank >> 2) & 0x3; |
| addr |= FIELD_PREP(MI300_NA_BANK_3_2, temp); |
| |
| /* NA[13:12] = Bank[1:0] */ |
| temp = bank & 0x3; |
| addr |= FIELD_PREP(MI300_NA_BANK_1_0, temp); |
| |
| /* NA[14] = Column[4] */ |
| temp = (col >> 4) & 0x1; |
| addr |= FIELD_PREP(MI300_NA_COL_4, temp); |
| |
| /* NA[28:15] = Row[13:0] */ |
| addr |= FIELD_PREP(MI300_NA_ROW, row); |
| |
| /* NA[30:29] = SID[1:0] */ |
| addr |= FIELD_PREP(MI300_NA_SID, sid); |
| |
| pr_debug("Addr=0x%016lx", addr); |
| pr_debug("Bank=%u Row=%u Column=%u PC=%u SID=%u", bank, row, col, pc, sid); |
| |
| return addr; |
| } |
| |
| /* |
| * When a DRAM ECC error occurs on MI300 systems, it is recommended to retire |
| * all memory within that DRAM row. This applies to the memory with a DRAM |
| * bank. |
| * |
| * To find the memory addresses, loop through permutations of the DRAM column |
| * bits and find the System Physical address of each. The column bits are used |
| * to calculate the intermediate Normalized address, so all permutations should |
| * be checked. |
| * |
| * See amd_atl::convert_dram_to_norm_addr_mi300() for MI300 address formats. |
| */ |
| #define MI300_NUM_COL BIT(HWEIGHT(MI300_UMC_MCA_COL)) |
| static void retire_row_mi300(struct atl_err *a_err) |
| { |
| unsigned long addr; |
| struct page *p; |
| u8 col; |
| |
| for (col = 0; col < MI300_NUM_COL; col++) { |
| a_err->addr &= ~MI300_UMC_MCA_COL; |
| a_err->addr |= FIELD_PREP(MI300_UMC_MCA_COL, col); |
| |
| addr = amd_convert_umc_mca_addr_to_sys_addr(a_err); |
| if (IS_ERR_VALUE(addr)) |
| continue; |
| |
| addr = PHYS_PFN(addr); |
| |
| /* |
| * Skip invalid or already poisoned pages to avoid unnecessary |
| * error messages from memory_failure(). |
| */ |
| p = pfn_to_online_page(addr); |
| if (!p) |
| continue; |
| |
| if (PageHWPoison(p)) |
| continue; |
| |
| memory_failure(addr, 0); |
| } |
| } |
| |
| void amd_retire_dram_row(struct atl_err *a_err) |
| { |
| if (df_cfg.rev == DF4p5 && df_cfg.flags.heterogeneous) |
| return retire_row_mi300(a_err); |
| } |
| EXPORT_SYMBOL_GPL(amd_retire_dram_row); |
| |
| static unsigned long get_addr(unsigned long addr) |
| { |
| if (df_cfg.rev == DF4p5 && df_cfg.flags.heterogeneous) |
| return convert_dram_to_norm_addr_mi300(addr); |
| |
| return addr; |
| } |
| |
| #define MCA_IPID_INST_ID_HI GENMASK_ULL(47, 44) |
| static u8 get_die_id(struct atl_err *err) |
| { |
| /* |
| * AMD Node ID is provided in MCA_IPID[InstanceIdHi], and this |
| * needs to be divided by 4 to get the internal Die ID. |
| */ |
| if (df_cfg.rev == DF4p5 && df_cfg.flags.heterogeneous) { |
| u8 node_id = FIELD_GET(MCA_IPID_INST_ID_HI, err->ipid); |
| |
| return node_id >> 2; |
| } |
| |
| /* |
| * For CPUs, this is the AMD Node ID modulo the number |
| * of AMD Nodes per socket. |
| */ |
| return topology_amd_node_id(err->cpu) % topology_amd_nodes_per_pkg(); |
| } |
| |
| #define UMC_CHANNEL_NUM GENMASK(31, 20) |
| static u8 get_coh_st_inst_id(struct atl_err *err) |
| { |
| if (df_cfg.rev == DF4p5 && df_cfg.flags.heterogeneous) |
| return get_coh_st_inst_id_mi300(err); |
| |
| return FIELD_GET(UMC_CHANNEL_NUM, err->ipid); |
| } |
| |
| unsigned long convert_umc_mca_addr_to_sys_addr(struct atl_err *err) |
| { |
| u8 socket_id = topology_physical_package_id(err->cpu); |
| u8 coh_st_inst_id = get_coh_st_inst_id(err); |
| unsigned long addr = get_addr(err->addr); |
| u8 die_id = get_die_id(err); |
| |
| pr_debug("socket_id=0x%x die_id=0x%x coh_st_inst_id=0x%x addr=0x%016lx", |
| socket_id, die_id, coh_st_inst_id, addr); |
| |
| return norm_to_sys_addr(socket_id, die_id, coh_st_inst_id, addr); |
| } |