| // SPDX-License-Identifier: GPL-2.0 |
| /* Copyright (c) Meta Platforms, Inc. and affiliates. */ |
| |
| #include <linux/bitfield.h> |
| #include <net/tcp.h> |
| |
| #include "fbnic.h" |
| #include "fbnic_mac.h" |
| |
| static void fbnic_init_readrq(struct fbnic_dev *fbd, unsigned int offset, |
| unsigned int cls, unsigned int readrq) |
| { |
| u32 val = rd32(fbd, offset); |
| |
| /* The TDF_CTL masks are a superset of the RNI_RBP ones. So we can |
| * use them when setting either the TDE_CTF or RNI_RBP registers. |
| */ |
| val &= FBNIC_QM_TNI_TDF_CTL_MAX_OT | FBNIC_QM_TNI_TDF_CTL_MAX_OB; |
| |
| val |= FIELD_PREP(FBNIC_QM_TNI_TDF_CTL_MRRS, readrq) | |
| FIELD_PREP(FBNIC_QM_TNI_TDF_CTL_CLS, cls); |
| |
| wr32(fbd, offset, val); |
| } |
| |
| static void fbnic_init_mps(struct fbnic_dev *fbd, unsigned int offset, |
| unsigned int cls, unsigned int mps) |
| { |
| u32 val = rd32(fbd, offset); |
| |
| /* Currently all MPS masks are identical so just use the first one */ |
| val &= ~(FBNIC_QM_TNI_TCM_CTL_MPS | FBNIC_QM_TNI_TCM_CTL_CLS); |
| |
| val |= FIELD_PREP(FBNIC_QM_TNI_TCM_CTL_MPS, mps) | |
| FIELD_PREP(FBNIC_QM_TNI_TCM_CTL_CLS, cls); |
| |
| wr32(fbd, offset, val); |
| } |
| |
| static void fbnic_mac_init_axi(struct fbnic_dev *fbd) |
| { |
| bool override_1k = false; |
| int readrq, mps, cls; |
| |
| /* All of the values are based on being a power of 2 starting |
| * with 64 == 0. Therefore we can either divide by 64 in the |
| * case of constants, or just subtract 6 from the log2 of the value |
| * in order to get the value we will be programming into the |
| * registers. |
| */ |
| readrq = ilog2(fbd->readrq) - 6; |
| if (readrq > 3) |
| override_1k = true; |
| readrq = clamp(readrq, 0, 3); |
| |
| mps = ilog2(fbd->mps) - 6; |
| mps = clamp(mps, 0, 3); |
| |
| cls = ilog2(L1_CACHE_BYTES) - 6; |
| cls = clamp(cls, 0, 3); |
| |
| /* Configure Tx/Rx AXI Paths w/ Read Request and Max Payload sizes */ |
| fbnic_init_readrq(fbd, FBNIC_QM_TNI_TDF_CTL, cls, readrq); |
| fbnic_init_mps(fbd, FBNIC_QM_TNI_TCM_CTL, cls, mps); |
| |
| /* Configure QM TNI TDE: |
| * - Max outstanding AXI beats to 704(768 - 64) - guaranetees 8% of |
| * buffer capacity to descriptors. |
| * - Max outstanding transactions to 128 |
| */ |
| wr32(fbd, FBNIC_QM_TNI_TDE_CTL, |
| FIELD_PREP(FBNIC_QM_TNI_TDE_CTL_MRRS_1K, override_1k ? 1 : 0) | |
| FIELD_PREP(FBNIC_QM_TNI_TDE_CTL_MAX_OB, 704) | |
| FIELD_PREP(FBNIC_QM_TNI_TDE_CTL_MAX_OT, 128) | |
| FIELD_PREP(FBNIC_QM_TNI_TDE_CTL_MRRS, readrq) | |
| FIELD_PREP(FBNIC_QM_TNI_TDE_CTL_CLS, cls)); |
| |
| fbnic_init_readrq(fbd, FBNIC_QM_RNI_RBP_CTL, cls, readrq); |
| fbnic_init_mps(fbd, FBNIC_QM_RNI_RDE_CTL, cls, mps); |
| fbnic_init_mps(fbd, FBNIC_QM_RNI_RCM_CTL, cls, mps); |
| |
| /* Enable XALI AR/AW outbound */ |
| wr32(fbd, FBNIC_PUL_OB_TLP_HDR_AW_CFG, |
| FBNIC_PUL_OB_TLP_HDR_AW_CFG_BME); |
| wr32(fbd, FBNIC_PUL_OB_TLP_HDR_AR_CFG, |
| FBNIC_PUL_OB_TLP_HDR_AR_CFG_BME); |
| } |
| |
| static void fbnic_mac_init_qm(struct fbnic_dev *fbd) |
| { |
| u32 clock_freq; |
| |
| /* Configure TSO behavior */ |
| wr32(fbd, FBNIC_QM_TQS_CTL0, |
| FIELD_PREP(FBNIC_QM_TQS_CTL0_LSO_TS_MASK, |
| FBNIC_QM_TQS_CTL0_LSO_TS_LAST) | |
| FIELD_PREP(FBNIC_QM_TQS_CTL0_PREFETCH_THRESH, |
| FBNIC_QM_TQS_CTL0_PREFETCH_THRESH_MIN)); |
| |
| /* Limit EDT to INT_MAX as this is the limit of the EDT Qdisc */ |
| wr32(fbd, FBNIC_QM_TQS_EDT_TS_RANGE, INT_MAX); |
| |
| /* Configure MTU |
| * Due to known HW issue we cannot set the MTU to within 16 octets |
| * of a 64 octet aligned boundary. So we will set the TQS_MTU(s) to |
| * MTU + 1. |
| */ |
| wr32(fbd, FBNIC_QM_TQS_MTU_CTL0, FBNIC_MAX_JUMBO_FRAME_SIZE + 1); |
| wr32(fbd, FBNIC_QM_TQS_MTU_CTL1, |
| FIELD_PREP(FBNIC_QM_TQS_MTU_CTL1_BULK, |
| FBNIC_MAX_JUMBO_FRAME_SIZE + 1)); |
| |
| clock_freq = FBNIC_CLOCK_FREQ; |
| |
| /* Be aggressive on the timings. We will have the interrupt |
| * threshold timer tick once every 1 usec and coalesce writes for |
| * up to 80 usecs. |
| */ |
| wr32(fbd, FBNIC_QM_TCQ_CTL0, |
| FIELD_PREP(FBNIC_QM_TCQ_CTL0_TICK_CYCLES, |
| clock_freq / 1000000) | |
| FIELD_PREP(FBNIC_QM_TCQ_CTL0_COAL_WAIT, |
| clock_freq / 12500)); |
| |
| /* We will have the interrupt threshold timer tick once every |
| * 1 usec and coalesce writes for up to 2 usecs. |
| */ |
| wr32(fbd, FBNIC_QM_RCQ_CTL0, |
| FIELD_PREP(FBNIC_QM_RCQ_CTL0_TICK_CYCLES, |
| clock_freq / 1000000) | |
| FIELD_PREP(FBNIC_QM_RCQ_CTL0_COAL_WAIT, |
| clock_freq / 500000)); |
| |
| /* Configure spacer control to 64 beats. */ |
| wr32(fbd, FBNIC_FAB_AXI4_AR_SPACER_2_CFG, |
| FBNIC_FAB_AXI4_AR_SPACER_MASK | |
| FIELD_PREP(FBNIC_FAB_AXI4_AR_SPACER_THREADSHOLD, 2)); |
| } |
| |
| #define FBNIC_DROP_EN_MASK 0x7d |
| #define FBNIC_PAUSE_EN_MASK 0x14 |
| #define FBNIC_ECN_EN_MASK 0x10 |
| |
| struct fbnic_fifo_config { |
| unsigned int addr; |
| unsigned int size; |
| }; |
| |
| /* Rx FIFO Configuration |
| * The table consists of 8 entries, of which only 4 are currently used |
| * The starting addr is in units of 64B and the size is in 2KB units |
| * Below is the human readable version of the table defined below: |
| * Function Addr Size |
| * ---------------------------------- |
| * Network to Host/BMC 384K 64K |
| * Unused |
| * Unused |
| * Network to BMC 448K 32K |
| * Network to Host 0 384K |
| * Unused |
| * BMC to Host 480K 32K |
| * Unused |
| */ |
| static const struct fbnic_fifo_config fifo_config[] = { |
| { .addr = 0x1800, .size = 0x20 }, /* Network to Host/BMC */ |
| { }, /* Unused */ |
| { }, /* Unused */ |
| { .addr = 0x1c00, .size = 0x10 }, /* Network to BMC */ |
| { .addr = 0x0000, .size = 0xc0 }, /* Network to Host */ |
| { }, /* Unused */ |
| { .addr = 0x1e00, .size = 0x10 }, /* BMC to Host */ |
| { } /* Unused */ |
| }; |
| |
| static void fbnic_mac_init_rxb(struct fbnic_dev *fbd) |
| { |
| bool rx_enable; |
| int i; |
| |
| rx_enable = !!(rd32(fbd, FBNIC_RPC_RMI_CONFIG) & |
| FBNIC_RPC_RMI_CONFIG_ENABLE); |
| |
| for (i = 0; i < 8; i++) { |
| unsigned int size = fifo_config[i].size; |
| |
| /* If we are coming up on a system that already has the |
| * Rx data path enabled we don't need to reconfigure the |
| * FIFOs. Instead we can check to verify the values are |
| * large enough to meet our needs, and use the values to |
| * populate the flow control, ECN, and drop thresholds. |
| */ |
| if (rx_enable) { |
| size = FIELD_GET(FBNIC_RXB_PBUF_SIZE, |
| rd32(fbd, FBNIC_RXB_PBUF_CFG(i))); |
| if (size < fifo_config[i].size) |
| dev_warn(fbd->dev, |
| "fifo%d size of %d smaller than expected value of %d\n", |
| i, size << 11, |
| fifo_config[i].size << 11); |
| } else { |
| /* Program RXB Cuthrough */ |
| wr32(fbd, FBNIC_RXB_CT_SIZE(i), |
| FIELD_PREP(FBNIC_RXB_CT_SIZE_HEADER, 4) | |
| FIELD_PREP(FBNIC_RXB_CT_SIZE_PAYLOAD, 2)); |
| |
| /* The granularity for the packet buffer size is 2KB |
| * granularity while the packet buffer base address is |
| * only 64B granularity |
| */ |
| wr32(fbd, FBNIC_RXB_PBUF_CFG(i), |
| FIELD_PREP(FBNIC_RXB_PBUF_BASE_ADDR, |
| fifo_config[i].addr) | |
| FIELD_PREP(FBNIC_RXB_PBUF_SIZE, size)); |
| |
| /* The granularity for the credits is 64B. This is |
| * based on RXB_PBUF_SIZE * 32 + 4. |
| */ |
| wr32(fbd, FBNIC_RXB_PBUF_CREDIT(i), |
| FIELD_PREP(FBNIC_RXB_PBUF_CREDIT_MASK, |
| size ? size * 32 + 4 : 0)); |
| } |
| |
| if (!size) |
| continue; |
| |
| /* Pause is size of FIFO with 56KB skid to start/stop */ |
| wr32(fbd, FBNIC_RXB_PAUSE_THLD(i), |
| !(FBNIC_PAUSE_EN_MASK & (1u << i)) ? 0x1fff : |
| FIELD_PREP(FBNIC_RXB_PAUSE_THLD_ON, |
| size * 32 - 0x380) | |
| FIELD_PREP(FBNIC_RXB_PAUSE_THLD_OFF, 0x380)); |
| |
| /* Enable Drop when only one packet is left in the FIFO */ |
| wr32(fbd, FBNIC_RXB_DROP_THLD(i), |
| !(FBNIC_DROP_EN_MASK & (1u << i)) ? 0x1fff : |
| FIELD_PREP(FBNIC_RXB_DROP_THLD_ON, |
| size * 32 - |
| FBNIC_MAX_JUMBO_FRAME_SIZE / 64) | |
| FIELD_PREP(FBNIC_RXB_DROP_THLD_OFF, |
| size * 32 - |
| FBNIC_MAX_JUMBO_FRAME_SIZE / 64)); |
| |
| /* Enable ECN bit when 1/4 of RXB is filled with at least |
| * 1 room for one full jumbo frame before setting ECN |
| */ |
| wr32(fbd, FBNIC_RXB_ECN_THLD(i), |
| !(FBNIC_ECN_EN_MASK & (1u << i)) ? 0x1fff : |
| FIELD_PREP(FBNIC_RXB_ECN_THLD_ON, |
| max_t(unsigned int, |
| size * 32 / 4, |
| FBNIC_MAX_JUMBO_FRAME_SIZE / 64)) | |
| FIELD_PREP(FBNIC_RXB_ECN_THLD_OFF, |
| max_t(unsigned int, |
| size * 32 / 4, |
| FBNIC_MAX_JUMBO_FRAME_SIZE / 64))); |
| } |
| |
| /* For now only enable drop and ECN. We need to add driver/kernel |
| * interfaces for configuring pause. |
| */ |
| wr32(fbd, FBNIC_RXB_PAUSE_DROP_CTRL, |
| FIELD_PREP(FBNIC_RXB_PAUSE_DROP_CTRL_DROP_ENABLE, |
| FBNIC_DROP_EN_MASK) | |
| FIELD_PREP(FBNIC_RXB_PAUSE_DROP_CTRL_ECN_ENABLE, |
| FBNIC_ECN_EN_MASK)); |
| |
| /* Program INTF credits */ |
| wr32(fbd, FBNIC_RXB_INTF_CREDIT, |
| FBNIC_RXB_INTF_CREDIT_MASK0 | |
| FBNIC_RXB_INTF_CREDIT_MASK1 | |
| FBNIC_RXB_INTF_CREDIT_MASK2 | |
| FIELD_PREP(FBNIC_RXB_INTF_CREDIT_MASK3, 8)); |
| |
| /* Configure calendar slots. |
| * Rx: 0 - 62 RDE 1st, BMC 2nd |
| * 63 BMC 1st, RDE 2nd |
| */ |
| for (i = 0; i < 16; i++) { |
| u32 calendar_val = (i == 15) ? 0x1e1b1b1b : 0x1b1b1b1b; |
| |
| wr32(fbd, FBNIC_RXB_CLDR_PRIO_CFG(i), calendar_val); |
| } |
| |
| /* Split the credits for the DRR up as follows: |
| * Quantum0: 8000 Network to Host |
| * Quantum1: 0 Not used |
| * Quantum2: 80 BMC to Host |
| * Quantum3: 0 Not used |
| * Quantum4: 8000 Multicast to Host and BMC |
| */ |
| wr32(fbd, FBNIC_RXB_DWRR_RDE_WEIGHT0, |
| FIELD_PREP(FBNIC_RXB_DWRR_RDE_WEIGHT0_QUANTUM0, 0x40) | |
| FIELD_PREP(FBNIC_RXB_DWRR_RDE_WEIGHT0_QUANTUM2, 0x50)); |
| wr32(fbd, FBNIC_RXB_DWRR_RDE_WEIGHT0_EXT, |
| FIELD_PREP(FBNIC_RXB_DWRR_RDE_WEIGHT0_QUANTUM0, 0x1f)); |
| wr32(fbd, FBNIC_RXB_DWRR_RDE_WEIGHT1, |
| FIELD_PREP(FBNIC_RXB_DWRR_RDE_WEIGHT1_QUANTUM4, 0x40)); |
| wr32(fbd, FBNIC_RXB_DWRR_RDE_WEIGHT1_EXT, |
| FIELD_PREP(FBNIC_RXB_DWRR_RDE_WEIGHT1_QUANTUM4, 0x1f)); |
| |
| /* Program RXB FCS Endian register */ |
| wr32(fbd, FBNIC_RXB_ENDIAN_FCS, 0x0aaaaaa0); |
| } |
| |
| static void fbnic_mac_init_txb(struct fbnic_dev *fbd) |
| { |
| int i; |
| |
| wr32(fbd, FBNIC_TCE_TXB_CTRL, 0); |
| |
| /* Configure Tx QM Credits */ |
| wr32(fbd, FBNIC_QM_TQS_CTL1, |
| FIELD_PREP(FBNIC_QM_TQS_CTL1_MC_MAX_CREDITS, 0x40) | |
| FIELD_PREP(FBNIC_QM_TQS_CTL1_BULK_MAX_CREDITS, 0x20)); |
| |
| /* Initialize internal Tx queues */ |
| wr32(fbd, FBNIC_TCE_TXB_TEI_Q0_CTRL, 0); |
| wr32(fbd, FBNIC_TCE_TXB_TEI_Q1_CTRL, 0); |
| wr32(fbd, FBNIC_TCE_TXB_MC_Q_CTRL, |
| FIELD_PREP(FBNIC_TCE_TXB_Q_CTRL_SIZE, 0x400) | |
| FIELD_PREP(FBNIC_TCE_TXB_Q_CTRL_START, 0x000)); |
| wr32(fbd, FBNIC_TCE_TXB_RX_TEI_Q_CTRL, 0); |
| wr32(fbd, FBNIC_TCE_TXB_TX_BMC_Q_CTRL, |
| FIELD_PREP(FBNIC_TCE_TXB_Q_CTRL_SIZE, 0x200) | |
| FIELD_PREP(FBNIC_TCE_TXB_Q_CTRL_START, 0x400)); |
| wr32(fbd, FBNIC_TCE_TXB_RX_BMC_Q_CTRL, |
| FIELD_PREP(FBNIC_TCE_TXB_Q_CTRL_SIZE, 0x200) | |
| FIELD_PREP(FBNIC_TCE_TXB_Q_CTRL_START, 0x600)); |
| |
| wr32(fbd, FBNIC_TCE_LSO_CTRL, |
| FBNIC_TCE_LSO_CTRL_IPID_MODE_INC | |
| FIELD_PREP(FBNIC_TCE_LSO_CTRL_TCPF_CLR_1ST, TCPHDR_PSH | |
| TCPHDR_FIN) | |
| FIELD_PREP(FBNIC_TCE_LSO_CTRL_TCPF_CLR_MID, TCPHDR_PSH | |
| TCPHDR_CWR | |
| TCPHDR_FIN) | |
| FIELD_PREP(FBNIC_TCE_LSO_CTRL_TCPF_CLR_END, TCPHDR_CWR)); |
| wr32(fbd, FBNIC_TCE_CSO_CTRL, 0); |
| |
| wr32(fbd, FBNIC_TCE_BMC_MAX_PKTSZ, |
| FIELD_PREP(FBNIC_TCE_BMC_MAX_PKTSZ_TX, |
| FBNIC_MAX_JUMBO_FRAME_SIZE) | |
| FIELD_PREP(FBNIC_TCE_BMC_MAX_PKTSZ_RX, |
| FBNIC_MAX_JUMBO_FRAME_SIZE)); |
| wr32(fbd, FBNIC_TCE_MC_MAX_PKTSZ, |
| FIELD_PREP(FBNIC_TCE_MC_MAX_PKTSZ_TMI, |
| FBNIC_MAX_JUMBO_FRAME_SIZE)); |
| |
| /* Configure calendar slots. |
| * Tx: 0 - 62 TMI 1st, BMC 2nd |
| * 63 BMC 1st, TMI 2nd |
| */ |
| for (i = 0; i < 16; i++) { |
| u32 calendar_val = (i == 15) ? 0x1e1b1b1b : 0x1b1b1b1b; |
| |
| wr32(fbd, FBNIC_TCE_TXB_CLDR_SLOT_CFG(i), calendar_val); |
| } |
| |
| /* Configure DWRR */ |
| wr32(fbd, FBNIC_TCE_TXB_ENQ_WRR_CTRL, |
| FIELD_PREP(FBNIC_TCE_TXB_ENQ_WRR_CTRL_WEIGHT0, 0x64) | |
| FIELD_PREP(FBNIC_TCE_TXB_ENQ_WRR_CTRL_WEIGHT2, 0x04)); |
| wr32(fbd, FBNIC_TCE_TXB_TEI_DWRR_CTRL, 0); |
| wr32(fbd, FBNIC_TCE_TXB_TEI_DWRR_CTRL_EXT, 0); |
| wr32(fbd, FBNIC_TCE_TXB_BMC_DWRR_CTRL, |
| FIELD_PREP(FBNIC_TCE_TXB_BMC_DWRR_CTRL_QUANTUM0, 0x50) | |
| FIELD_PREP(FBNIC_TCE_TXB_BMC_DWRR_CTRL_QUANTUM1, 0x82)); |
| wr32(fbd, FBNIC_TCE_TXB_BMC_DWRR_CTRL_EXT, 0); |
| wr32(fbd, FBNIC_TCE_TXB_NTWRK_DWRR_CTRL, |
| FIELD_PREP(FBNIC_TCE_TXB_NTWRK_DWRR_CTRL_QUANTUM1, 0x50) | |
| FIELD_PREP(FBNIC_TCE_TXB_NTWRK_DWRR_CTRL_QUANTUM2, 0x20)); |
| wr32(fbd, FBNIC_TCE_TXB_NTWRK_DWRR_CTRL_EXT, |
| FIELD_PREP(FBNIC_TCE_TXB_NTWRK_DWRR_CTRL_QUANTUM2, 0x03)); |
| |
| /* Configure SOP protocol protection */ |
| wr32(fbd, FBNIC_TCE_SOP_PROT_CTRL, |
| FIELD_PREP(FBNIC_TCE_SOP_PROT_CTRL_TBI, 0x78) | |
| FIELD_PREP(FBNIC_TCE_SOP_PROT_CTRL_TTI_FRM, 0x40) | |
| FIELD_PREP(FBNIC_TCE_SOP_PROT_CTRL_TTI_CM, 0x0c)); |
| |
| /* Conservative configuration on MAC interface Start of Packet |
| * protection FIFO. This sets the minimum depth of the FIFO before |
| * we start sending packets to the MAC measured in 64B units and |
| * up to 160 entries deep. |
| * |
| * For the ASIC the clock is fast enough that we will likely fill |
| * the SOP FIFO before the MAC can drain it. So just use a minimum |
| * value of 8. |
| */ |
| wr32(fbd, FBNIC_TMI_SOP_PROT_CTRL, 8); |
| |
| wrfl(fbd); |
| wr32(fbd, FBNIC_TCE_TXB_CTRL, FBNIC_TCE_TXB_CTRL_TCAM_ENABLE | |
| FBNIC_TCE_TXB_CTRL_LOAD); |
| } |
| |
| static void fbnic_mac_init_regs(struct fbnic_dev *fbd) |
| { |
| fbnic_mac_init_axi(fbd); |
| fbnic_mac_init_qm(fbd); |
| fbnic_mac_init_rxb(fbd); |
| fbnic_mac_init_txb(fbd); |
| } |
| |
| static const struct fbnic_mac fbnic_mac_asic = { |
| .init_regs = fbnic_mac_init_regs, |
| }; |
| |
| /** |
| * fbnic_mac_init - Assign a MAC type and initialize the fbnic device |
| * @fbd: Device pointer to device to initialize |
| * |
| * Return: zero on success, negative on failure |
| * |
| * Initialize the MAC function pointers and initializes the MAC of |
| * the device. |
| **/ |
| int fbnic_mac_init(struct fbnic_dev *fbd) |
| { |
| fbd->mac = &fbnic_mac_asic; |
| |
| fbd->mac->init_regs(fbd); |
| |
| return 0; |
| } |