Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
Cross-merge networking fixes after downstream PR.
Conflicts:
drivers/net/ethernet/intel/ice/ice_main.c
c9663f79cd82 ("ice: adjust switchdev rebuild path")
7758017911a4 ("ice: restore timestamp configuration after device reset")
https://lore.kernel.org/all/20231121211259.3348630-1-anthony.l.nguyen@intel.com/
Adjacent changes:
kernel/bpf/verifier.c
bb124da69c47 ("bpf: keep track of max number of bpf_loop callback iterations")
5f99f312bd3b ("bpf: add register bounds sanity checks and sanitization")
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
diff --git a/Documentation/bpf/kfuncs.rst b/Documentation/bpf/kfuncs.rst
index 723408e..7985c66 100644
--- a/Documentation/bpf/kfuncs.rst
+++ b/Documentation/bpf/kfuncs.rst
@@ -135,6 +135,30 @@
annotation, the verifier will reject the program if a null pointer is passed in with
a nonzero size.
+2.2.5 __str Annotation
+----------------------------
+This annotation is used to indicate that the argument is a constant string.
+
+An example is given below::
+
+ __bpf_kfunc bpf_get_file_xattr(..., const char *name__str, ...)
+ {
+ ...
+ }
+
+In this case, ``bpf_get_file_xattr()`` can be called as::
+
+ bpf_get_file_xattr(..., "xattr_name", ...);
+
+Or::
+
+ const char name[] = "xattr_name"; /* This need to be global */
+ int BPF_PROG(...)
+ {
+ ...
+ bpf_get_file_xattr(..., name, ...);
+ ...
+ }
.. _BPF_kfunc_nodef:
diff --git a/Documentation/devicetree/bindings/net/marvell,aquantia.yaml b/Documentation/devicetree/bindings/net/marvell,aquantia.yaml
new file mode 100644
index 0000000..9854fab
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/marvell,aquantia.yaml
@@ -0,0 +1,116 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/marvell,aquantia.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Marvell Aquantia Ethernet PHY
+
+maintainers:
+ - Christian Marangi <ansuelsmth@gmail.com>
+
+description: |
+ Marvell Aquantia Ethernet PHY require a firmware to be loaded to actually
+ work.
+
+ This can be done and is implemented by OEM in 3 different way:
+ - Attached SPI flash directly to the PHY with the firmware. The PHY
+ will self load the firmware in the presence of this configuration.
+ - Read from a dedicated partition on system NAND declared in an
+ NVMEM cell, and loaded to the PHY using its mailbox interface.
+ - Manually provided firmware loaded from a file in the filesystem.
+
+allOf:
+ - $ref: ethernet-phy.yaml#
+
+select:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - ethernet-phy-id03a1.b445
+ - ethernet-phy-id03a1.b460
+ - ethernet-phy-id03a1.b4a2
+ - ethernet-phy-id03a1.b4d0
+ - ethernet-phy-id03a1.b4e0
+ - ethernet-phy-id03a1.b5c2
+ - ethernet-phy-id03a1.b4b0
+ - ethernet-phy-id03a1.b662
+ - ethernet-phy-id03a1.b712
+ - ethernet-phy-id31c3.1c12
+ required:
+ - compatible
+
+properties:
+ reg:
+ maxItems: 1
+
+ firmware-name:
+ description: specify the name of PHY firmware to load
+
+ nvmem-cells:
+ description: phandle to the firmware nvmem cell
+ maxItems: 1
+
+ nvmem-cell-names:
+ const: firmware
+
+required:
+ - compatible
+ - reg
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ mdio {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ ethernet-phy@0 {
+ compatible = "ethernet-phy-id31c3.1c12",
+ "ethernet-phy-ieee802.3-c45";
+
+ reg = <0>;
+ firmware-name = "AQR-G4_v5.4.C-AQR_CIG_WF-1945_0x8_ID44776_VER1630.cld";
+ };
+
+ ethernet-phy@1 {
+ compatible = "ethernet-phy-id31c3.1c12",
+ "ethernet-phy-ieee802.3-c45";
+
+ reg = <1>;
+ nvmem-cells = <&aqr_fw>;
+ nvmem-cell-names = "firmware";
+ };
+ };
+
+ flash {
+ compatible = "jedec,spi-nor";
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ partitions {
+ compatible = "fixed-partitions";
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ /* ... */
+
+ partition@650000 {
+ compatible = "nvmem-cells";
+ label = "0:ethphyfw";
+ reg = <0x650000 0x80000>;
+ read-only;
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ aqr_fw: aqr_fw@0 {
+ reg = <0x0 0x5f42a>;
+ };
+ };
+
+ /* ... */
+
+ };
+ };
diff --git a/Documentation/devicetree/bindings/net/renesas,etheravb.yaml b/Documentation/devicetree/bindings/net/renesas,etheravb.yaml
index 5d074f2..d3306b1 100644
--- a/Documentation/devicetree/bindings/net/renesas,etheravb.yaml
+++ b/Documentation/devicetree/bindings/net/renesas,etheravb.yaml
@@ -55,7 +55,7 @@
- items:
- enum:
- - renesas,r9a07g043-gbeth # RZ/G2UL
+ - renesas,r9a07g043-gbeth # RZ/G2UL and RZ/Five
- renesas,r9a07g044-gbeth # RZ/G2{L,LC}
- renesas,r9a07g054-gbeth # RZ/V2L
- const: renesas,rzg2l-gbeth # RZ/{G2L,G2UL,V2L} family
diff --git a/Documentation/devicetree/bindings/net/renesas,ethertsn.yaml b/Documentation/devicetree/bindings/net/renesas,ethertsn.yaml
new file mode 100644
index 0000000..475aff7
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/renesas,ethertsn.yaml
@@ -0,0 +1,133 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/renesas,ethertsn.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Renesas Ethernet TSN End-station
+
+maintainers:
+ - Niklas Söderlund <niklas.soderlund@ragnatech.se>
+
+description:
+ The RTSN device provides Ethernet network using a 10 Mbps, 100 Mbps, or 1
+ Gbps full-duplex link via MII/GMII/RMII/RGMII. Depending on the connected PHY.
+
+allOf:
+ - $ref: ethernet-controller.yaml#
+
+properties:
+ compatible:
+ items:
+ - enum:
+ - renesas,r8a779g0-ethertsn # R-Car V4H
+ - const: renesas,rcar-gen4-ethertsn
+
+ reg:
+ items:
+ - description: TSN End Station target
+ - description: generalized Precision Time Protocol target
+
+ reg-names:
+ items:
+ - const: tsnes
+ - const: gptp
+
+ interrupts:
+ items:
+ - description: TX data interrupt
+ - description: RX data interrupt
+
+ interrupt-names:
+ items:
+ - const: tx
+ - const: rx
+
+ clocks:
+ maxItems: 1
+
+ power-domains:
+ maxItems: 1
+
+ resets:
+ maxItems: 1
+
+ phy-mode:
+ contains:
+ enum:
+ - mii
+ - rgmii
+
+ phy-handle:
+ $ref: /schemas/types.yaml#/definitions/phandle
+ description:
+ Specifies a reference to a node representing a PHY device.
+
+ rx-internal-delay-ps:
+ enum: [0, 1800]
+
+ tx-internal-delay-ps:
+ enum: [0, 2000]
+
+ '#address-cells':
+ const: 1
+
+ '#size-cells':
+ const: 0
+
+patternProperties:
+ "^ethernet-phy@[0-9a-f]$":
+ type: object
+ $ref: ethernet-phy.yaml#
+ unevaluatedProperties: false
+
+required:
+ - compatible
+ - reg
+ - reg-names
+ - interrupts
+ - interrupt-names
+ - clocks
+ - power-domains
+ - resets
+ - phy-mode
+ - phy-handle
+ - '#address-cells'
+ - '#size-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/r8a779g0-cpg-mssr.h>
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ #include <dt-bindings/power/r8a779g0-sysc.h>
+ #include <dt-bindings/gpio/gpio.h>
+
+ tsn0: ethernet@e6460000 {
+ compatible = "renesas,r8a779g0-ethertsn", "renesas,rcar-gen4-ethertsn";
+ reg = <0xe6460000 0x7000>,
+ <0xe6449000 0x500>;
+ reg-names = "tsnes", "gptp";
+ interrupts = <GIC_SPI 429 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 430 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "tx", "rx";
+ clocks = <&cpg CPG_MOD 2723>;
+ power-domains = <&sysc R8A779G0_PD_ALWAYS_ON>;
+ resets = <&cpg 2723>;
+
+ phy-mode = "rgmii";
+ tx-internal-delay-ps = <2000>;
+ phy-handle = <&phy3>;
+
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ phy3: ethernet-phy@3 {
+ compatible = "ethernet-phy-ieee802.3-c45";
+ reg = <0>;
+ interrupt-parent = <&gpio4>;
+ interrupts = <3 IRQ_TYPE_LEVEL_LOW>;
+ reset-gpios = <&gpio1 23 GPIO_ACTIVE_LOW>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/net/xlnx,axi-ethernet.yaml b/Documentation/devicetree/bindings/net/xlnx,axi-ethernet.yaml
index 1d33d80..bbe89ea 100644
--- a/Documentation/devicetree/bindings/net/xlnx,axi-ethernet.yaml
+++ b/Documentation/devicetree/bindings/net/xlnx,axi-ethernet.yaml
@@ -122,6 +122,20 @@
and "phy-handle" should point to an external PHY if exists.
maxItems: 1
+ dmas:
+ minItems: 2
+ maxItems: 32
+ description: TX and RX DMA channel phandle
+
+ dma-names:
+ items:
+ pattern: "^[tr]x_chan([0-9]|1[0-5])$"
+ description:
+ Should be "tx_chan0", "tx_chan1" ... "tx_chan15" for DMA Tx channel
+ Should be "rx_chan0", "rx_chan1" ... "rx_chan15" for DMA Rx channel
+ minItems: 2
+ maxItems: 32
+
required:
- compatible
- interrupts
@@ -143,6 +157,8 @@
clocks = <&axi_clk>, <&axi_clk>, <&pl_enet_ref_clk>, <&mgt_clk>;
phy-mode = "mii";
reg = <0x40c00000 0x40000>,<0x50c00000 0x40000>;
+ dmas = <&xilinx_dma 0>, <&xilinx_dma 1>;
+ dma-names = "tx_chan0", "rx_chan0";
xlnx,rxcsum = <0x2>;
xlnx,rxmem = <0x800>;
xlnx,txcsum = <0x2>;
diff --git a/Documentation/netlink/specs/devlink.yaml b/Documentation/netlink/specs/devlink.yaml
index 572d83a..43067e1 100644
--- a/Documentation/netlink/specs/devlink.yaml
+++ b/Documentation/netlink/specs/devlink.yaml
@@ -1484,8 +1484,8 @@
dont-validate: [ strict ]
flags: [ admin-perm ]
do:
- pre: devlink-nl-pre-doit
- post: devlink-nl-post-doit
+ pre: devlink-nl-pre-doit-dev-lock
+ post: devlink-nl-post-doit-dev-lock
request:
attributes:
- bus-name
diff --git a/Documentation/networking/device_drivers/ethernet/marvell/octeon_ep.rst b/Documentation/networking/device_drivers/ethernet/marvell/octeon_ep.rst
index cad96c8..613a818 100644
--- a/Documentation/networking/device_drivers/ethernet/marvell/octeon_ep.rst
+++ b/Documentation/networking/device_drivers/ethernet/marvell/octeon_ep.rst
@@ -24,6 +24,10 @@
Currently, this driver support following devices:
* Network controller: Cavium, Inc. Device b200
* Network controller: Cavium, Inc. Device b400
+ * Network controller: Cavium, Inc. Device b900
+ * Network controller: Cavium, Inc. Device ba00
+ * Network controller: Cavium, Inc. Device bc00
+ * Network controller: Cavium, Inc. Device bd00
Interface Control
=================
diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
index dc2c7b9..7edf0fd 100644
--- a/drivers/net/bonding/bond_alb.c
+++ b/drivers/net/bonding/bond_alb.c
@@ -985,7 +985,8 @@ static int alb_upper_dev_walk(struct net_device *upper,
if (netif_is_macvlan(upper) && !strict_match) {
tags = bond_verify_device_path(bond->dev, upper, 0);
if (IS_ERR_OR_NULL(tags))
- BUG();
+ return -ENOMEM;
+
alb_send_lp_vid(slave, upper->dev_addr,
tags[0].vlan_proto, tags[0].vlan_id);
kfree(tags);
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 8e6cc0e..d987432 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -2973,8 +2973,11 @@ struct bond_vlan_tag *bond_verify_device_path(struct net_device *start_dev,
if (start_dev == end_dev) {
tags = kcalloc(level + 1, sizeof(*tags), GFP_ATOMIC);
- if (!tags)
+ if (!tags) {
+ net_err_ratelimited("%s: %s: Failed to allocate tags\n",
+ __func__, start_dev->name);
return ERR_PTR(-ENOMEM);
+ }
tags[level].vlan_proto = BOND_VLAN_PROTO_NONE;
return tags;
}
@@ -5755,10 +5758,8 @@ static int bond_ethtool_get_ts_info(struct net_device *bond_dev,
{
struct bonding *bond = netdev_priv(bond_dev);
struct ethtool_ts_info ts_info;
- const struct ethtool_ops *ops;
struct net_device *real_dev;
bool sw_tx_support = false;
- struct phy_device *phydev;
struct list_head *iter;
struct slave *slave;
int ret = 0;
@@ -5769,29 +5770,12 @@ static int bond_ethtool_get_ts_info(struct net_device *bond_dev,
rcu_read_unlock();
if (real_dev) {
- ops = real_dev->ethtool_ops;
- phydev = real_dev->phydev;
-
- if (phy_has_tsinfo(phydev)) {
- ret = phy_ts_info(phydev, info);
- goto out;
- } else if (ops->get_ts_info) {
- ret = ops->get_ts_info(real_dev, info);
- goto out;
- }
+ ret = ethtool_get_ts_info_by_layer(real_dev, info);
} else {
/* Check if all slaves support software tx timestamping */
rcu_read_lock();
bond_for_each_slave_rcu(bond, slave, iter) {
- ret = -1;
- ops = slave->dev->ethtool_ops;
- phydev = slave->dev->phydev;
-
- if (phy_has_tsinfo(phydev))
- ret = phy_ts_info(phydev, &ts_info);
- else if (ops->get_ts_info)
- ret = ops->get_ts_info(slave->dev, &ts_info);
-
+ ret = ethtool_get_ts_info_by_layer(slave->dev, &ts_info);
if (!ret && (ts_info.so_timestamping & SOF_TIMESTAMPING_TX_SOFTWARE)) {
sw_tx_support = true;
continue;
@@ -5803,15 +5787,9 @@ static int bond_ethtool_get_ts_info(struct net_device *bond_dev,
rcu_read_unlock();
}
- ret = 0;
- info->so_timestamping = SOF_TIMESTAMPING_RX_SOFTWARE |
- SOF_TIMESTAMPING_SOFTWARE;
if (sw_tx_support)
info->so_timestamping |= SOF_TIMESTAMPING_TX_SOFTWARE;
- info->phc_index = -1;
-
-out:
dev_put(real_dev);
return ret;
}
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index d0359b5..d8cf7b3 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -254,22 +254,22 @@ static bool bnxt_vf_pciid(enum board_idx idx)
writel(DB_CP_IRQ_DIS_FLAGS, db)
#define BNXT_DB_CQ(db, idx) \
- writel(DB_CP_FLAGS | RING_CMP(idx), (db)->doorbell)
+ writel(DB_CP_FLAGS | DB_RING_IDX(db, idx), (db)->doorbell)
#define BNXT_DB_NQ_P5(db, idx) \
- bnxt_writeq(bp, (db)->db_key64 | DBR_TYPE_NQ | RING_CMP(idx), \
+ bnxt_writeq(bp, (db)->db_key64 | DBR_TYPE_NQ | DB_RING_IDX(db, idx),\
(db)->doorbell)
#define BNXT_DB_CQ_ARM(db, idx) \
- writel(DB_CP_REARM_FLAGS | RING_CMP(idx), (db)->doorbell)
+ writel(DB_CP_REARM_FLAGS | DB_RING_IDX(db, idx), (db)->doorbell)
#define BNXT_DB_NQ_ARM_P5(db, idx) \
- bnxt_writeq(bp, (db)->db_key64 | DBR_TYPE_NQ_ARM | RING_CMP(idx),\
- (db)->doorbell)
+ bnxt_writeq(bp, (db)->db_key64 | DBR_TYPE_NQ_ARM | \
+ DB_RING_IDX(db, idx), (db)->doorbell)
static void bnxt_db_nq(struct bnxt *bp, struct bnxt_db_info *db, u32 idx)
{
- if (bp->flags & BNXT_FLAG_CHIP_P5)
+ if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS)
BNXT_DB_NQ_P5(db, idx);
else
BNXT_DB_CQ(db, idx);
@@ -277,7 +277,7 @@ static void bnxt_db_nq(struct bnxt *bp, struct bnxt_db_info *db, u32 idx)
static void bnxt_db_nq_arm(struct bnxt *bp, struct bnxt_db_info *db, u32 idx)
{
- if (bp->flags & BNXT_FLAG_CHIP_P5)
+ if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS)
BNXT_DB_NQ_ARM_P5(db, idx);
else
BNXT_DB_CQ_ARM(db, idx);
@@ -285,9 +285,9 @@ static void bnxt_db_nq_arm(struct bnxt *bp, struct bnxt_db_info *db, u32 idx)
static void bnxt_db_cq(struct bnxt *bp, struct bnxt_db_info *db, u32 idx)
{
- if (bp->flags & BNXT_FLAG_CHIP_P5)
+ if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS)
bnxt_writeq(bp, db->db_key64 | DBR_TYPE_CQ_ARMALL |
- RING_CMP(idx), db->doorbell);
+ DB_RING_IDX(db, idx), db->doorbell);
else
BNXT_DB_CQ(db, idx);
}
@@ -321,7 +321,7 @@ static void bnxt_sched_reset_rxr(struct bnxt *bp, struct bnxt_rx_ring_info *rxr)
{
if (!rxr->bnapi->in_reset) {
rxr->bnapi->in_reset = true;
- if (bp->flags & BNXT_FLAG_CHIP_P5)
+ if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS)
set_bit(BNXT_RESET_TASK_SP_EVENT, &bp->sp_event);
else
set_bit(BNXT_RST_RING_SP_EVENT, &bp->sp_event);
@@ -331,16 +331,16 @@ static void bnxt_sched_reset_rxr(struct bnxt *bp, struct bnxt_rx_ring_info *rxr)
}
void bnxt_sched_reset_txr(struct bnxt *bp, struct bnxt_tx_ring_info *txr,
- int idx)
+ u16 curr)
{
struct bnxt_napi *bnapi = txr->bnapi;
if (bnapi->tx_fault)
return;
- netdev_err(bp->dev, "Invalid Tx completion (ring:%d tx_pkts:%d cons:%u prod:%u i:%d)",
- txr->txq_index, bnapi->tx_pkts,
- txr->tx_cons, txr->tx_prod, idx);
+ netdev_err(bp->dev, "Invalid Tx completion (ring:%d tx_hw_cons:%u cons:%u prod:%u curr:%u)",
+ txr->txq_index, txr->tx_hw_cons,
+ txr->tx_cons, txr->tx_prod, curr);
WARN_ON_ONCE(1);
bnapi->tx_fault = 1;
bnxt_queue_sp_work(bp, BNXT_RESET_TASK_SP_EVENT);
@@ -381,6 +381,8 @@ static u16 bnxt_xmit_get_cfa_action(struct sk_buff *skb)
static void bnxt_txr_db_kick(struct bnxt *bp, struct bnxt_tx_ring_info *txr,
u16 prod)
{
+ /* Sync BD data before updating doorbell */
+ wmb();
bnxt_db_write(bp, &txr->tx_db, prod);
txr->kick_pending = 0;
}
@@ -388,7 +390,7 @@ static void bnxt_txr_db_kick(struct bnxt *bp, struct bnxt_tx_ring_info *txr,
static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct bnxt *bp = netdev_priv(dev);
- struct tx_bd *txbd;
+ struct tx_bd *txbd, *txbd0;
struct tx_bd_ext *txbd1;
struct netdev_queue *txq;
int i;
@@ -430,11 +432,9 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev)
len = skb_headlen(skb);
last_frag = skb_shinfo(skb)->nr_frags;
- txbd = &txr->tx_desc_ring[TX_RING(prod)][TX_IDX(prod)];
+ txbd = &txr->tx_desc_ring[TX_RING(bp, prod)][TX_IDX(prod)];
- txbd->tx_bd_opaque = prod;
-
- tx_buf = &txr->tx_buf_ring[prod];
+ tx_buf = &txr->tx_buf_ring[RING_TX(bp, prod)];
tx_buf->skb = skb;
tx_buf->nr_frags = last_frag;
@@ -519,12 +519,15 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev)
txbd->tx_bd_len_flags_type = tx_push->tx_bd_len_flags_type;
txbd->tx_bd_haddr = txr->data_mapping;
+ txbd->tx_bd_opaque = SET_TX_OPAQUE(bp, txr, prod, 2);
prod = NEXT_TX(prod);
- txbd = &txr->tx_desc_ring[TX_RING(prod)][TX_IDX(prod)];
+ tx_push->tx_bd_opaque = txbd->tx_bd_opaque;
+ txbd = &txr->tx_desc_ring[TX_RING(bp, prod)][TX_IDX(prod)];
memcpy(txbd, tx_push1, sizeof(*txbd));
prod = NEXT_TX(prod);
tx_push->doorbell =
- cpu_to_le32(DB_KEY_TX_PUSH | DB_LONG_TX_PUSH | prod);
+ cpu_to_le32(DB_KEY_TX_PUSH | DB_LONG_TX_PUSH |
+ DB_RING_IDX(&txr->tx_db, prod));
WRITE_ONCE(txr->tx_prod, prod);
tx_buf->is_push = 1;
@@ -562,10 +565,11 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev)
((last_frag + 2) << TX_BD_FLAGS_BD_CNT_SHIFT);
txbd->tx_bd_haddr = cpu_to_le64(mapping);
+ txbd->tx_bd_opaque = SET_TX_OPAQUE(bp, txr, prod, 2 + last_frag);
prod = NEXT_TX(prod);
txbd1 = (struct tx_bd_ext *)
- &txr->tx_desc_ring[TX_RING(prod)][TX_IDX(prod)];
+ &txr->tx_desc_ring[TX_RING(bp, prod)][TX_IDX(prod)];
txbd1->tx_bd_hsize_lflags = lflags;
if (skb_is_gso(skb)) {
@@ -601,11 +605,12 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev)
txbd1->tx_bd_cfa_meta = cpu_to_le32(vlan_tag_flags);
txbd1->tx_bd_cfa_action =
cpu_to_le32(cfa_action << TX_BD_CFA_ACTION_SHIFT);
+ txbd0 = txbd;
for (i = 0; i < last_frag; i++) {
skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
prod = NEXT_TX(prod);
- txbd = &txr->tx_desc_ring[TX_RING(prod)][TX_IDX(prod)];
+ txbd = &txr->tx_desc_ring[TX_RING(bp, prod)][TX_IDX(prod)];
len = skb_frag_size(frag);
mapping = skb_frag_dma_map(&pdev->dev, frag, 0, len,
@@ -614,7 +619,7 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev)
if (unlikely(dma_mapping_error(&pdev->dev, mapping)))
goto tx_dma_error;
- tx_buf = &txr->tx_buf_ring[prod];
+ tx_buf = &txr->tx_buf_ring[RING_TX(bp, prod)];
dma_unmap_addr_set(tx_buf, mapping, mapping);
txbd->tx_bd_haddr = cpu_to_le64(mapping);
@@ -632,16 +637,17 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev)
skb_tx_timestamp(skb);
- /* Sync BD data before updating doorbell */
- wmb();
-
prod = NEXT_TX(prod);
WRITE_ONCE(txr->tx_prod, prod);
- if (!netdev_xmit_more() || netif_xmit_stopped(txq))
+ if (!netdev_xmit_more() || netif_xmit_stopped(txq)) {
bnxt_txr_db_kick(bp, txr, prod);
- else
+ } else {
+ if (free_size >= bp->tx_wake_thresh)
+ txbd0->tx_bd_len_flags_type |=
+ cpu_to_le32(TX_BD_FLAGS_NO_CMPL);
txr->kick_pending = 1;
+ }
tx_done:
@@ -662,7 +668,7 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev)
/* start back at beginning and unmap skb */
prod = txr->tx_prod;
- tx_buf = &txr->tx_buf_ring[prod];
+ tx_buf = &txr->tx_buf_ring[RING_TX(bp, prod)];
dma_unmap_single(&pdev->dev, dma_unmap_addr(tx_buf, mapping),
skb_headlen(skb), DMA_TO_DEVICE);
prod = NEXT_TX(prod);
@@ -670,7 +676,7 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev)
/* unmap remaining mapped pages */
for (i = 0; i < last_frag; i++) {
prod = NEXT_TX(prod);
- tx_buf = &txr->tx_buf_ring[prod];
+ tx_buf = &txr->tx_buf_ring[RING_TX(bp, prod)];
dma_unmap_page(&pdev->dev, dma_unmap_addr(tx_buf, mapping),
skb_frag_size(&skb_shinfo(skb)->frags[i]),
DMA_TO_DEVICE);
@@ -686,31 +692,32 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev)
return NETDEV_TX_OK;
}
-static void bnxt_tx_int(struct bnxt *bp, struct bnxt_napi *bnapi, int budget)
+static void __bnxt_tx_int(struct bnxt *bp, struct bnxt_tx_ring_info *txr,
+ int budget)
{
- struct bnxt_tx_ring_info *txr = bnapi->tx_ring;
struct netdev_queue *txq = netdev_get_tx_queue(bp->dev, txr->txq_index);
- u16 cons = txr->tx_cons;
struct pci_dev *pdev = bp->pdev;
- int nr_pkts = bnapi->tx_pkts;
- int i;
+ u16 hw_cons = txr->tx_hw_cons;
unsigned int tx_bytes = 0;
+ u16 cons = txr->tx_cons;
+ int tx_pkts = 0;
- for (i = 0; i < nr_pkts; i++) {
+ while (RING_TX(bp, cons) != hw_cons) {
struct bnxt_sw_tx_bd *tx_buf;
struct sk_buff *skb;
int j, last;
- tx_buf = &txr->tx_buf_ring[cons];
+ tx_buf = &txr->tx_buf_ring[RING_TX(bp, cons)];
cons = NEXT_TX(cons);
skb = tx_buf->skb;
tx_buf->skb = NULL;
if (unlikely(!skb)) {
- bnxt_sched_reset_txr(bp, txr, i);
+ bnxt_sched_reset_txr(bp, txr, cons);
return;
}
+ tx_pkts++;
tx_bytes += skb->len;
if (tx_buf->is_push) {
@@ -724,7 +731,7 @@ static void bnxt_tx_int(struct bnxt *bp, struct bnxt_napi *bnapi, int budget)
for (j = 0; j < last; j++) {
cons = NEXT_TX(cons);
- tx_buf = &txr->tx_buf_ring[cons];
+ tx_buf = &txr->tx_buf_ring[RING_TX(bp, cons)];
dma_unmap_page(
&pdev->dev,
dma_unmap_addr(tx_buf, mapping),
@@ -732,7 +739,7 @@ static void bnxt_tx_int(struct bnxt *bp, struct bnxt_napi *bnapi, int budget)
DMA_TO_DEVICE);
}
if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS)) {
- if (bp->flags & BNXT_FLAG_CHIP_P5) {
+ if (BNXT_CHIP_P5(bp)) {
/* PTP worker takes ownership of the skb */
if (!bnxt_get_tx_ts_p5(bp, skb))
skb = NULL;
@@ -747,14 +754,25 @@ static void bnxt_tx_int(struct bnxt *bp, struct bnxt_napi *bnapi, int budget)
dev_consume_skb_any(skb);
}
- bnapi->tx_pkts = 0;
WRITE_ONCE(txr->tx_cons, cons);
- __netif_txq_completed_wake(txq, nr_pkts, tx_bytes,
+ __netif_txq_completed_wake(txq, tx_pkts, tx_bytes,
bnxt_tx_avail(bp, txr), bp->tx_wake_thresh,
READ_ONCE(txr->dev_state) == BNXT_DEV_STATE_CLOSING);
}
+static void bnxt_tx_int(struct bnxt *bp, struct bnxt_napi *bnapi, int budget)
+{
+ struct bnxt_tx_ring_info *txr;
+ int i;
+
+ bnxt_for_each_napi_tx(i, bnapi, txr) {
+ if (txr->tx_hw_cons != txr->tx_cons)
+ __bnxt_tx_int(bp, txr, budget);
+ }
+ bnapi->events &= ~BNXT_TX_CMP_EVENT;
+}
+
static struct page *__bnxt_alloc_rx_page(struct bnxt *bp, dma_addr_t *mapping,
struct bnxt_rx_ring_info *rxr,
unsigned int *offset,
@@ -803,8 +821,8 @@ static inline u8 *__bnxt_alloc_rx_frag(struct bnxt *bp, dma_addr_t *mapping,
int bnxt_alloc_rx_data(struct bnxt *bp, struct bnxt_rx_ring_info *rxr,
u16 prod, gfp_t gfp)
{
- struct rx_bd *rxbd = &rxr->rx_desc_ring[RX_RING(prod)][RX_IDX(prod)];
- struct bnxt_sw_rx_bd *rx_buf = &rxr->rx_buf_ring[prod];
+ struct rx_bd *rxbd = &rxr->rx_desc_ring[RX_RING(bp, prod)][RX_IDX(prod)];
+ struct bnxt_sw_rx_bd *rx_buf = &rxr->rx_buf_ring[RING_RX(bp, prod)];
dma_addr_t mapping;
if (BNXT_RX_PAGE_MODE(bp)) {
@@ -837,9 +855,10 @@ void bnxt_reuse_rx_data(struct bnxt_rx_ring_info *rxr, u16 cons, void *data)
{
u16 prod = rxr->rx_prod;
struct bnxt_sw_rx_bd *cons_rx_buf, *prod_rx_buf;
+ struct bnxt *bp = rxr->bnapi->bp;
struct rx_bd *cons_bd, *prod_bd;
- prod_rx_buf = &rxr->rx_buf_ring[prod];
+ prod_rx_buf = &rxr->rx_buf_ring[RING_RX(bp, prod)];
cons_rx_buf = &rxr->rx_buf_ring[cons];
prod_rx_buf->data = data;
@@ -847,8 +866,8 @@ void bnxt_reuse_rx_data(struct bnxt_rx_ring_info *rxr, u16 cons, void *data)
prod_rx_buf->mapping = cons_rx_buf->mapping;
- prod_bd = &rxr->rx_desc_ring[RX_RING(prod)][RX_IDX(prod)];
- cons_bd = &rxr->rx_desc_ring[RX_RING(cons)][RX_IDX(cons)];
+ prod_bd = &rxr->rx_desc_ring[RX_RING(bp, prod)][RX_IDX(prod)];
+ cons_bd = &rxr->rx_desc_ring[RX_RING(bp, cons)][RX_IDX(cons)];
prod_bd->rx_bd_haddr = cons_bd->rx_bd_haddr;
}
@@ -868,7 +887,7 @@ static inline int bnxt_alloc_rx_page(struct bnxt *bp,
u16 prod, gfp_t gfp)
{
struct rx_bd *rxbd =
- &rxr->rx_agg_desc_ring[RX_RING(prod)][RX_IDX(prod)];
+ &rxr->rx_agg_desc_ring[RX_AGG_RING(bp, prod)][RX_IDX(prod)];
struct bnxt_sw_rx_agg_bd *rx_agg_buf;
struct page *page;
dma_addr_t mapping;
@@ -885,7 +904,7 @@ static inline int bnxt_alloc_rx_page(struct bnxt *bp,
__set_bit(sw_prod, rxr->rx_agg_bmap);
rx_agg_buf = &rxr->rx_agg_ring[sw_prod];
- rxr->rx_sw_agg_prod = NEXT_RX_AGG(sw_prod);
+ rxr->rx_sw_agg_prod = RING_RX_AGG(bp, NEXT_RX_AGG(sw_prod));
rx_agg_buf->page = page;
rx_agg_buf->offset = offset;
@@ -927,7 +946,7 @@ static void bnxt_reuse_rx_agg_bufs(struct bnxt_cp_ring_info *cpr, u16 idx,
bool p5_tpa = false;
u32 i;
- if ((bp->flags & BNXT_FLAG_CHIP_P5) && tpa)
+ if ((bp->flags & BNXT_FLAG_CHIP_P5_PLUS) && tpa)
p5_tpa = true;
for (i = 0; i < agg_bufs; i++) {
@@ -961,13 +980,13 @@ static void bnxt_reuse_rx_agg_bufs(struct bnxt_cp_ring_info *cpr, u16 idx,
prod_rx_buf->mapping = cons_rx_buf->mapping;
- prod_bd = &rxr->rx_agg_desc_ring[RX_RING(prod)][RX_IDX(prod)];
+ prod_bd = &rxr->rx_agg_desc_ring[RX_AGG_RING(bp, prod)][RX_IDX(prod)];
prod_bd->rx_bd_haddr = cpu_to_le64(cons_rx_buf->mapping);
prod_bd->rx_bd_opaque = sw_prod;
prod = NEXT_RX_AGG(prod);
- sw_prod = NEXT_RX_AGG(sw_prod);
+ sw_prod = RING_RX_AGG(bp, NEXT_RX_AGG(sw_prod));
}
rxr->rx_agg_prod = prod;
rxr->rx_sw_agg_prod = sw_prod;
@@ -1094,7 +1113,7 @@ static u32 __bnxt_rx_agg_pages(struct bnxt *bp,
u32 i, total_frag_len = 0;
bool p5_tpa = false;
- if ((bp->flags & BNXT_FLAG_CHIP_P5) && tpa)
+ if ((bp->flags & BNXT_FLAG_CHIP_P5_PLUS) && tpa)
p5_tpa = true;
for (i = 0; i < agg_bufs; i++) {
@@ -1249,7 +1268,7 @@ static int bnxt_discard_rx(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
} else if (cmp_type == CMP_TYPE_RX_L2_TPA_END_CMP) {
struct rx_tpa_end_cmp *tpa_end = cmp;
- if (bp->flags & BNXT_FLAG_CHIP_P5)
+ if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS)
return 0;
agg_bufs = TPA_END_AGG_BUFS(tpa_end);
@@ -1300,7 +1319,7 @@ static void bnxt_tpa_start(struct bnxt *bp, struct bnxt_rx_ring_info *rxr,
struct rx_bd *prod_bd;
dma_addr_t mapping;
- if (bp->flags & BNXT_FLAG_CHIP_P5) {
+ if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) {
agg_id = TPA_START_AGG_ID_P5(tpa_start);
agg_id = bnxt_alloc_agg_idx(rxr, agg_id);
} else {
@@ -1309,7 +1328,7 @@ static void bnxt_tpa_start(struct bnxt *bp, struct bnxt_rx_ring_info *rxr,
cons = tpa_start->rx_tpa_start_cmp_opaque;
prod = rxr->rx_prod;
cons_rx_buf = &rxr->rx_buf_ring[cons];
- prod_rx_buf = &rxr->rx_buf_ring[prod];
+ prod_rx_buf = &rxr->rx_buf_ring[RING_RX(bp, prod)];
tpa_info = &rxr->rx_tpa[agg_id];
if (unlikely(cons != rxr->rx_next_cons ||
@@ -1330,7 +1349,7 @@ static void bnxt_tpa_start(struct bnxt *bp, struct bnxt_rx_ring_info *rxr,
mapping = tpa_info->mapping;
prod_rx_buf->mapping = mapping;
- prod_bd = &rxr->rx_desc_ring[RX_RING(prod)][RX_IDX(prod)];
+ prod_bd = &rxr->rx_desc_ring[RX_RING(bp, prod)][RX_IDX(prod)];
prod_bd->rx_bd_haddr = cpu_to_le64(mapping);
@@ -1363,8 +1382,8 @@ static void bnxt_tpa_start(struct bnxt *bp, struct bnxt_rx_ring_info *rxr,
tpa_info->agg_count = 0;
rxr->rx_prod = NEXT_RX(prod);
- cons = NEXT_RX(cons);
- rxr->rx_next_cons = NEXT_RX(cons);
+ cons = RING_RX(bp, NEXT_RX(cons));
+ rxr->rx_next_cons = RING_RX(bp, NEXT_RX(cons));
cons_rx_buf = &rxr->rx_buf_ring[cons];
bnxt_reuse_rx_data(rxr, cons, cons_rx_buf->data);
@@ -1563,7 +1582,7 @@ static inline struct sk_buff *bnxt_gro_skb(struct bnxt *bp,
skb_shinfo(skb)->gso_size =
le32_to_cpu(tpa_end1->rx_tpa_end_cmp_seg_len);
skb_shinfo(skb)->gso_type = tpa_info->gso_type;
- if (bp->flags & BNXT_FLAG_CHIP_P5)
+ if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS)
payload_off = TPA_END_PAYLOAD_OFF_P5(tpa_end1);
else
payload_off = TPA_END_PAYLOAD_OFF(tpa_end);
@@ -1611,7 +1630,7 @@ static inline struct sk_buff *bnxt_tpa_end(struct bnxt *bp,
return NULL;
}
- if (bp->flags & BNXT_FLAG_CHIP_P5) {
+ if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) {
agg_id = TPA_END_AGG_ID_P5(tpa_end);
agg_id = bnxt_lookup_agg_idx(rxr, agg_id);
agg_bufs = TPA_END_AGG_BUFS_P5(tpa_end1);
@@ -1876,7 +1895,7 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
rc = -EIO;
if (rx_err & RX_CMPL_ERRORS_BUFFER_ERROR_MASK) {
bnapi->cp_ring.sw_stats.rx.rx_buf_errors++;
- if (!(bp->flags & BNXT_FLAG_CHIP_P5) &&
+ if (!(bp->flags & BNXT_FLAG_CHIP_P5_PLUS) &&
!(bp->fw_cap & BNXT_FW_CAP_RING_MONITOR)) {
netdev_warn_once(bp->dev, "RX buffer error %x\n",
rx_err);
@@ -2007,7 +2026,7 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
if (unlikely((flags & RX_CMP_FLAGS_ITYPES_MASK) ==
RX_CMP_FLAGS_ITYPE_PTP_W_TS) || bp->ptp_all_rx_tstamp) {
- if (bp->flags & BNXT_FLAG_CHIP_P5) {
+ if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) {
u32 cmpl_ts = le32_to_cpu(rxcmp1->rx_cmp_timestamp);
u64 ns, ts;
@@ -2032,7 +2051,7 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
next_rx_no_len:
rxr->rx_prod = NEXT_RX(prod);
- rxr->rx_next_cons = NEXT_RX(cons);
+ rxr->rx_next_cons = RING_RX(bp, NEXT_RX(cons));
next_rx_no_prod_no_len:
*raw_cons = tmp_raw_cons;
@@ -2415,7 +2434,7 @@ static int bnxt_async_event_process(struct bnxt *bp,
struct bnxt_rx_ring_info *rxr;
u16 grp_idx;
- if (bp->flags & BNXT_FLAG_CHIP_P5)
+ if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS)
goto async_event_process_exit;
netdev_warn(bp->dev, "Ring monitor event, ring type %lu id 0x%x\n",
@@ -2588,7 +2607,6 @@ static int __bnxt_poll_work(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
struct bnxt_napi *bnapi = cpr->bnapi;
u32 raw_cons = cpr->cp_raw_cons;
u32 cons;
- int tx_pkts = 0;
int rx_pkts = 0;
u8 event = 0;
struct tx_cmp *txcmp;
@@ -2609,9 +2627,17 @@ static int __bnxt_poll_work(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
*/
dma_rmb();
if (TX_CMP_TYPE(txcmp) == CMP_TYPE_TX_L2_CMP) {
- tx_pkts++;
+ u32 opaque = txcmp->tx_cmp_opaque;
+ struct bnxt_tx_ring_info *txr;
+ u16 tx_freed;
+
+ txr = bnapi->tx_ring[TX_OPAQUE_RING(opaque)];
+ event |= BNXT_TX_CMP_EVENT;
+ txr->tx_hw_cons = TX_OPAQUE_PROD(bp, opaque);
+ tx_freed = (txr->tx_hw_cons - txr->tx_cons) &
+ bp->tx_ring_mask;
/* return full budget so NAPI will complete. */
- if (unlikely(tx_pkts >= bp->tx_wake_thresh)) {
+ if (unlikely(tx_freed >= bp->tx_wake_thresh)) {
rx_pkts = budget;
raw_cons = NEXT_RAW_CMP(raw_cons);
if (budget)
@@ -2655,7 +2681,7 @@ static int __bnxt_poll_work(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
xdp_do_flush();
if (event & BNXT_TX_EVENT) {
- struct bnxt_tx_ring_info *txr = bnapi->tx_ring;
+ struct bnxt_tx_ring_info *txr = bnapi->tx_ring[0];
u16 prod = txr->tx_prod;
/* Sync BD data before updating doorbell */
@@ -2665,7 +2691,6 @@ static int __bnxt_poll_work(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
}
cpr->cp_raw_cons = raw_cons;
- bnapi->tx_pkts += tx_pkts;
bnapi->events |= event;
return rx_pkts;
}
@@ -2673,7 +2698,7 @@ static int __bnxt_poll_work(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
static void __bnxt_poll_work_done(struct bnxt *bp, struct bnxt_napi *bnapi,
int budget)
{
- if (bnapi->tx_pkts && !bnapi->tx_fault)
+ if ((bnapi->events & BNXT_TX_CMP_EVENT) && !bnapi->tx_fault)
bnapi->tx_int(bp, bnapi, budget);
if ((bnapi->events & BNXT_RX_EVENT) && !(bnapi->in_reset)) {
@@ -2686,7 +2711,7 @@ static void __bnxt_poll_work_done(struct bnxt *bp, struct bnxt_napi *bnapi,
bnxt_db_write(bp, &rxr->rx_agg_db, rxr->rx_agg_prod);
}
- bnapi->events = 0;
+ bnapi->events &= BNXT_TX_CMP_EVENT;
}
static int bnxt_poll_work(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
@@ -2826,10 +2851,10 @@ static int __bnxt_poll_cqs(struct bnxt *bp, struct bnxt_napi *bnapi, int budget)
struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring;
int i, work_done = 0;
- for (i = 0; i < 2; i++) {
- struct bnxt_cp_ring_info *cpr2 = cpr->cp_ring_arr[i];
+ for (i = 0; i < cpr->cp_ring_count; i++) {
+ struct bnxt_cp_ring_info *cpr2 = &cpr->cp_ring_arr[i];
- if (cpr2) {
+ if (cpr2->had_nqe_notify) {
work_done += __bnxt_poll_work(bp, cpr2,
budget - work_done);
cpr->has_more_work |= cpr2->has_more_work;
@@ -2844,15 +2869,18 @@ static void __bnxt_poll_cqs_done(struct bnxt *bp, struct bnxt_napi *bnapi,
struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring;
int i;
- for (i = 0; i < 2; i++) {
- struct bnxt_cp_ring_info *cpr2 = cpr->cp_ring_arr[i];
+ for (i = 0; i < cpr->cp_ring_count; i++) {
+ struct bnxt_cp_ring_info *cpr2 = &cpr->cp_ring_arr[i];
struct bnxt_db_info *db;
- if (cpr2 && cpr2->had_work_done) {
+ if (cpr2->had_work_done) {
db = &cpr2->cp_db;
bnxt_writeq(bp, db->db_key64 | dbr_type |
- RING_CMP(cpr2->cp_raw_cons), db->doorbell);
+ DB_RING_IDX(db, cpr2->cp_raw_cons),
+ db->doorbell);
cpr2->had_work_done = 0;
+ if (dbr_type == DBR_TYPE_CQ_ARMALL)
+ cpr2->had_nqe_notify = 0;
}
}
__bnxt_poll_work_done(bp, bnapi, budget);
@@ -2901,13 +2929,17 @@ static int bnxt_poll_p5(struct napi_struct *napi, int budget)
if (nqcmp->type == cpu_to_le16(NQ_CN_TYPE_CQ_NOTIFICATION)) {
u32 idx = le32_to_cpu(nqcmp->cq_handle_low);
+ u32 cq_type = BNXT_NQ_HDL_TYPE(idx);
struct bnxt_cp_ring_info *cpr2;
/* No more budget for RX work */
- if (budget && work_done >= budget && idx == BNXT_RX_HDL)
+ if (budget && work_done >= budget &&
+ cq_type == BNXT_NQ_HDL_TYPE_RX)
break;
- cpr2 = cpr->cp_ring_arr[idx];
+ idx = BNXT_NQ_HDL_IDX(idx);
+ cpr2 = &cpr->cp_ring_arr[idx];
+ cpr2->had_nqe_notify = 1;
work_done += __bnxt_poll_work(bp, cpr2,
budget - work_done);
cpr->has_more_work |= cpr2->has_more_work;
@@ -2922,8 +2954,9 @@ static int bnxt_poll_p5(struct napi_struct *napi, int budget)
BNXT_DB_NQ_P5(&cpr->cp_db, raw_cons);
}
poll_done:
- cpr_rx = cpr->cp_ring_arr[BNXT_RX_HDL];
- if (cpr_rx && (bp->flags & BNXT_FLAG_DIM)) {
+ cpr_rx = &cpr->cp_ring_arr[0];
+ if (cpr_rx->cp_ring_type == BNXT_NQ_HDL_TYPE_RX &&
+ (bp->flags & BNXT_FLAG_DIM)) {
struct dim_sample dim_sample = {};
dim_update_sample(cpr->event_ctr,
@@ -3097,20 +3130,20 @@ static void bnxt_free_skbs(struct bnxt *bp)
bnxt_free_rx_skbs(bp);
}
-static void bnxt_init_ctx_mem(struct bnxt_mem_init *mem_init, void *p, int len)
+static void bnxt_init_ctx_mem(struct bnxt_ctx_mem_type *ctxm, void *p, int len)
{
- u8 init_val = mem_init->init_val;
- u16 offset = mem_init->offset;
+ u8 init_val = ctxm->init_value;
+ u16 offset = ctxm->init_offset;
u8 *p2 = p;
int i;
if (!init_val)
return;
- if (offset == BNXT_MEM_INVALID_OFFSET) {
+ if (offset == BNXT_CTX_INIT_INVALID_OFFSET) {
memset(p, init_val, len);
return;
}
- for (i = 0; i < len; i += mem_init->size)
+ for (i = 0; i < len; i += ctxm->entry_size)
*(p2 + i + offset) = init_val;
}
@@ -3177,8 +3210,8 @@ static int bnxt_alloc_ring(struct bnxt *bp, struct bnxt_ring_mem_info *rmem)
if (!rmem->pg_arr[i])
return -ENOMEM;
- if (rmem->mem_init)
- bnxt_init_ctx_mem(rmem->mem_init, rmem->pg_arr[i],
+ if (rmem->ctx_mem)
+ bnxt_init_ctx_mem(rmem->ctx_mem, rmem->pg_arr[i],
rmem->page_size);
if (rmem->nr_pages > 1 || rmem->depth > 0) {
if (i == rmem->nr_pages - 2 &&
@@ -3225,7 +3258,7 @@ static int bnxt_alloc_tpa_info(struct bnxt *bp)
int i, j;
bp->max_tpa = MAX_TPA;
- if (bp->flags & BNXT_FLAG_CHIP_P5) {
+ if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) {
if (!bp->max_tpa_v2)
return 0;
bp->max_tpa = max_t(u16, bp->max_tpa_v2, MAX_TPA_P5);
@@ -3240,7 +3273,7 @@ static int bnxt_alloc_tpa_info(struct bnxt *bp)
if (!rxr->rx_tpa)
return -ENOMEM;
- if (!(bp->flags & BNXT_FLAG_CHIP_P5))
+ if (!(bp->flags & BNXT_FLAG_CHIP_P5_PLUS))
continue;
for (j = 0; j < bp->max_tpa; j++) {
agg = kcalloc(MAX_SKB_FRAGS, sizeof(*agg), GFP_KERNEL);
@@ -3395,6 +3428,15 @@ static void bnxt_free_tx_rings(struct bnxt *bp)
}
}
+#define BNXT_TC_TO_RING_BASE(bp, tc) \
+ ((tc) * (bp)->tx_nr_rings_per_tc)
+
+#define BNXT_RING_TO_TC_OFF(bp, tx) \
+ ((tx) % (bp)->tx_nr_rings_per_tc)
+
+#define BNXT_RING_TO_TC(bp, tx) \
+ ((tx) / (bp)->tx_nr_rings_per_tc)
+
static int bnxt_alloc_tx_rings(struct bnxt *bp)
{
int i, j, rc;
@@ -3450,7 +3492,7 @@ static int bnxt_alloc_tx_rings(struct bnxt *bp)
spin_lock_init(&txr->xdp_tx_lock);
if (i < bp->tx_nr_rings_xdp)
continue;
- if (i % bp->tx_nr_rings_per_tc == (bp->tx_nr_rings_per_tc - 1))
+ if (BNXT_RING_TO_TC_OFF(bp, i) == (bp->tx_nr_rings_per_tc - 1))
j++;
}
return 0;
@@ -3533,36 +3575,33 @@ static void bnxt_free_cp_rings(struct bnxt *bp)
bnxt_free_ring(bp, &ring->ring_mem);
- for (j = 0; j < 2; j++) {
- struct bnxt_cp_ring_info *cpr2 = cpr->cp_ring_arr[j];
+ if (!cpr->cp_ring_arr)
+ continue;
- if (cpr2) {
- ring = &cpr2->cp_ring_struct;
- bnxt_free_ring(bp, &ring->ring_mem);
- bnxt_free_cp_arrays(cpr2);
- kfree(cpr2);
- cpr->cp_ring_arr[j] = NULL;
- }
+ for (j = 0; j < cpr->cp_ring_count; j++) {
+ struct bnxt_cp_ring_info *cpr2 = &cpr->cp_ring_arr[j];
+
+ ring = &cpr2->cp_ring_struct;
+ bnxt_free_ring(bp, &ring->ring_mem);
+ bnxt_free_cp_arrays(cpr2);
}
+ kfree(cpr->cp_ring_arr);
+ cpr->cp_ring_arr = NULL;
+ cpr->cp_ring_count = 0;
}
}
-static struct bnxt_cp_ring_info *bnxt_alloc_cp_sub_ring(struct bnxt *bp)
+static int bnxt_alloc_cp_sub_ring(struct bnxt *bp,
+ struct bnxt_cp_ring_info *cpr)
{
struct bnxt_ring_mem_info *rmem;
struct bnxt_ring_struct *ring;
- struct bnxt_cp_ring_info *cpr;
int rc;
- cpr = kzalloc(sizeof(*cpr), GFP_KERNEL);
- if (!cpr)
- return NULL;
-
rc = bnxt_alloc_cp_arrays(cpr, bp->cp_nr_pages);
if (rc) {
bnxt_free_cp_arrays(cpr);
- kfree(cpr);
- return NULL;
+ return -ENOMEM;
}
ring = &cpr->cp_ring_struct;
rmem = &ring->ring_mem;
@@ -3575,23 +3614,26 @@ static struct bnxt_cp_ring_info *bnxt_alloc_cp_sub_ring(struct bnxt *bp)
if (rc) {
bnxt_free_ring(bp, rmem);
bnxt_free_cp_arrays(cpr);
- kfree(cpr);
- cpr = NULL;
}
- return cpr;
+ return rc;
}
static int bnxt_alloc_cp_rings(struct bnxt *bp)
{
bool sh = !!(bp->flags & BNXT_FLAG_SHARED_RINGS);
- int i, rc, ulp_base_vec, ulp_msix;
+ int i, j, rc, ulp_base_vec, ulp_msix;
+ int tcs = netdev_get_num_tc(bp->dev);
+ if (!tcs)
+ tcs = 1;
ulp_msix = bnxt_get_ulp_msix_num(bp);
ulp_base_vec = bnxt_get_ulp_msix_base(bp);
- for (i = 0; i < bp->cp_nr_rings; i++) {
+ for (i = 0, j = 0; i < bp->cp_nr_rings; i++) {
struct bnxt_napi *bnapi = bp->bnapi[i];
- struct bnxt_cp_ring_info *cpr;
+ struct bnxt_cp_ring_info *cpr, *cpr2;
struct bnxt_ring_struct *ring;
+ int cp_count = 0, k;
+ int rx = 0, tx = 0;
if (!bnapi)
continue;
@@ -3609,35 +3651,55 @@ static int bnxt_alloc_cp_rings(struct bnxt *bp)
else
ring->map_idx = i;
- if (!(bp->flags & BNXT_FLAG_CHIP_P5))
+ if (!(bp->flags & BNXT_FLAG_CHIP_P5_PLUS))
continue;
if (i < bp->rx_nr_rings) {
- struct bnxt_cp_ring_info *cpr2 =
- bnxt_alloc_cp_sub_ring(bp);
-
- cpr->cp_ring_arr[BNXT_RX_HDL] = cpr2;
- if (!cpr2)
- return -ENOMEM;
- cpr2->bnapi = bnapi;
+ cp_count++;
+ rx = 1;
}
- if ((sh && i < bp->tx_nr_rings) ||
- (!sh && i >= bp->rx_nr_rings)) {
- struct bnxt_cp_ring_info *cpr2 =
- bnxt_alloc_cp_sub_ring(bp);
-
- cpr->cp_ring_arr[BNXT_TX_HDL] = cpr2;
- if (!cpr2)
- return -ENOMEM;
- cpr2->bnapi = bnapi;
+ if (i < bp->tx_nr_rings_xdp) {
+ cp_count++;
+ tx = 1;
+ } else if ((sh && i < bp->tx_nr_rings) ||
+ (!sh && i >= bp->rx_nr_rings)) {
+ cp_count += tcs;
+ tx = 1;
}
+
+ cpr->cp_ring_arr = kcalloc(cp_count, sizeof(*cpr),
+ GFP_KERNEL);
+ if (!cpr->cp_ring_arr)
+ return -ENOMEM;
+ cpr->cp_ring_count = cp_count;
+
+ for (k = 0; k < cp_count; k++) {
+ cpr2 = &cpr->cp_ring_arr[k];
+ rc = bnxt_alloc_cp_sub_ring(bp, cpr2);
+ if (rc)
+ return rc;
+ cpr2->bnapi = bnapi;
+ cpr2->cp_idx = k;
+ if (!k && rx) {
+ bp->rx_ring[i].rx_cpr = cpr2;
+ cpr2->cp_ring_type = BNXT_NQ_HDL_TYPE_RX;
+ } else {
+ int n, tc = k - rx;
+
+ n = BNXT_TC_TO_RING_BASE(bp, tc) + j;
+ bp->tx_ring[n].tx_cpr = cpr2;
+ cpr2->cp_ring_type = BNXT_NQ_HDL_TYPE_TX;
+ }
+ }
+ if (tx)
+ j++;
}
return 0;
}
static void bnxt_init_ring_struct(struct bnxt *bp)
{
- int i;
+ int i, j;
for (i = 0; i < bp->cp_nr_rings; i++) {
struct bnxt_napi *bnapi = bp->bnapi[i];
@@ -3682,18 +3744,16 @@ static void bnxt_init_ring_struct(struct bnxt *bp)
rmem->vmem = (void **)&rxr->rx_agg_ring;
skip_rx:
- txr = bnapi->tx_ring;
- if (!txr)
- continue;
-
- ring = &txr->tx_ring_struct;
- rmem = &ring->ring_mem;
- rmem->nr_pages = bp->tx_nr_pages;
- rmem->page_size = HW_RXBD_RING_SIZE;
- rmem->pg_arr = (void **)txr->tx_desc_ring;
- rmem->dma_arr = txr->tx_desc_mapping;
- rmem->vmem_size = SW_TXBD_RING_SIZE * bp->tx_nr_pages;
- rmem->vmem = (void **)&txr->tx_buf_ring;
+ bnxt_for_each_napi_tx(j, bnapi, txr) {
+ ring = &txr->tx_ring_struct;
+ rmem = &ring->ring_mem;
+ rmem->nr_pages = bp->tx_nr_pages;
+ rmem->page_size = HW_TXBD_RING_SIZE;
+ rmem->pg_arr = (void **)txr->tx_desc_ring;
+ rmem->dma_arr = txr->tx_desc_mapping;
+ rmem->vmem_size = SW_TXBD_RING_SIZE * bp->tx_nr_pages;
+ rmem->vmem = (void **)&txr->tx_buf_ring;
+ }
}
}
@@ -3814,11 +3874,10 @@ static void bnxt_init_cp_rings(struct bnxt *bp)
ring->fw_ring_id = INVALID_HW_RING_ID;
cpr->rx_ring_coal.coal_ticks = bp->rx_coal.coal_ticks;
cpr->rx_ring_coal.coal_bufs = bp->rx_coal.coal_bufs;
- for (j = 0; j < 2; j++) {
- struct bnxt_cp_ring_info *cpr2 = cpr->cp_ring_arr[j];
-
- if (!cpr2)
- continue;
+ if (!cpr->cp_ring_arr)
+ continue;
+ for (j = 0; j < cpr->cp_ring_count; j++) {
+ struct bnxt_cp_ring_info *cpr2 = &cpr->cp_ring_arr[j];
ring = &cpr2->cp_ring_struct;
ring->fw_ring_id = INVALID_HW_RING_ID;
@@ -3906,7 +3965,7 @@ static int bnxt_alloc_vnics(struct bnxt *bp)
int num_vnics = 1;
#ifdef CONFIG_RFS_ACCEL
- if ((bp->flags & (BNXT_FLAG_RFS | BNXT_FLAG_CHIP_P5)) == BNXT_FLAG_RFS)
+ if ((bp->flags & (BNXT_FLAG_RFS | BNXT_FLAG_CHIP_P5_PLUS)) == BNXT_FLAG_RFS)
num_vnics += bp->rx_nr_rings;
#endif
@@ -4179,7 +4238,7 @@ static int bnxt_alloc_vnic_attributes(struct bnxt *bp)
}
}
- if (bp->flags & BNXT_FLAG_CHIP_P5)
+ if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS)
goto vnic_skip_grps;
if (vnic->flags & BNXT_VNIC_RSS_FLAG)
@@ -4199,7 +4258,7 @@ static int bnxt_alloc_vnic_attributes(struct bnxt *bp)
/* Allocate rss table and hash key */
size = L1_CACHE_ALIGN(HW_HASH_INDEX_SIZE * sizeof(u16));
- if (bp->flags & BNXT_FLAG_CHIP_P5)
+ if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS)
size = L1_CACHE_ALIGN(BNXT_MAX_RSS_TABLE_SIZE_P5);
vnic->rss_table_size = size + HW_HASH_KEY_SIZE;
@@ -4309,7 +4368,7 @@ static int bnxt_hwrm_func_qstat_ext(struct bnxt *bp,
int rc;
if (!(bp->fw_cap & BNXT_FW_CAP_EXT_HW_STATS_SUPPORTED) ||
- !(bp->flags & BNXT_FLAG_CHIP_P5))
+ !(bp->flags & BNXT_FLAG_CHIP_P5_PLUS))
return -EOPNOTSUPP;
rc = hwrm_req_init(bp, req, HWRM_FUNC_QSTATS_EXT);
@@ -4347,7 +4406,7 @@ static void bnxt_init_stats(struct bnxt *bp)
stats = &cpr->stats;
rc = bnxt_hwrm_func_qstat_ext(bp, stats);
if (rc) {
- if (bp->flags & BNXT_FLAG_CHIP_P5)
+ if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS)
mask = (1ULL << 48) - 1;
else
mask = -1ULL;
@@ -4493,7 +4552,7 @@ static int bnxt_alloc_stats(struct bnxt *bp)
static void bnxt_clear_ring_indices(struct bnxt *bp)
{
- int i;
+ int i, j;
if (!bp->bnapi)
return;
@@ -4510,10 +4569,10 @@ static void bnxt_clear_ring_indices(struct bnxt *bp)
cpr = &bnapi->cp_ring;
cpr->cp_raw_cons = 0;
- txr = bnapi->tx_ring;
- if (txr) {
+ bnxt_for_each_napi_tx(j, bnapi, txr) {
txr->tx_prod = 0;
txr->tx_cons = 0;
+ txr->tx_hw_cons = 0;
}
rxr = bnapi->rx_ring;
@@ -4523,6 +4582,7 @@ static void bnxt_clear_ring_indices(struct bnxt *bp)
rxr->rx_sw_agg_prod = 0;
rxr->rx_next_cons = 0;
}
+ bnapi->events = 0;
}
}
@@ -4626,7 +4686,7 @@ static int bnxt_alloc_mem(struct bnxt *bp, bool irq_re_init)
bp->bnapi[i] = bnapi;
bp->bnapi[i]->index = i;
bp->bnapi[i]->bp = bp;
- if (bp->flags & BNXT_FLAG_CHIP_P5) {
+ if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) {
struct bnxt_cp_ring_info *cpr =
&bp->bnapi[i]->cp_ring;
@@ -4644,11 +4704,13 @@ static int bnxt_alloc_mem(struct bnxt *bp, bool irq_re_init)
for (i = 0; i < bp->rx_nr_rings; i++) {
struct bnxt_rx_ring_info *rxr = &bp->rx_ring[i];
- if (bp->flags & BNXT_FLAG_CHIP_P5) {
+ if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) {
rxr->rx_ring_struct.ring_mem.flags =
BNXT_RMEM_RING_PTE_FLAG;
rxr->rx_agg_ring_struct.ring_mem.flags =
BNXT_RMEM_RING_PTE_FLAG;
+ } else {
+ rxr->rx_cpr = &bp->bnapi[i]->cp_ring;
}
rxr->bnapi = bp->bnapi[i];
bp->bnapi[i]->rx_ring = &bp->rx_ring[i];
@@ -4671,22 +4733,33 @@ static int bnxt_alloc_mem(struct bnxt *bp, bool irq_re_init)
else
j = bp->rx_nr_rings;
- for (i = 0; i < bp->tx_nr_rings; i++, j++) {
+ for (i = 0; i < bp->tx_nr_rings; i++) {
struct bnxt_tx_ring_info *txr = &bp->tx_ring[i];
+ struct bnxt_napi *bnapi2;
- if (bp->flags & BNXT_FLAG_CHIP_P5)
+ if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS)
txr->tx_ring_struct.ring_mem.flags =
BNXT_RMEM_RING_PTE_FLAG;
- txr->bnapi = bp->bnapi[j];
- bp->bnapi[j]->tx_ring = txr;
bp->tx_ring_map[i] = bp->tx_nr_rings_xdp + i;
if (i >= bp->tx_nr_rings_xdp) {
+ int k = j + BNXT_RING_TO_TC_OFF(bp, i);
+
+ bnapi2 = bp->bnapi[k];
txr->txq_index = i - bp->tx_nr_rings_xdp;
- bp->bnapi[j]->tx_int = bnxt_tx_int;
+ txr->tx_napi_idx =
+ BNXT_RING_TO_TC(bp, txr->txq_index);
+ bnapi2->tx_ring[txr->tx_napi_idx] = txr;
+ bnapi2->tx_int = bnxt_tx_int;
} else {
- bp->bnapi[j]->flags |= BNXT_NAPI_FLAG_XDP;
- bp->bnapi[j]->tx_int = bnxt_tx_int_xdp;
+ bnapi2 = bp->bnapi[j];
+ bnapi2->flags |= BNXT_NAPI_FLAG_XDP;
+ bnapi2->tx_ring[0] = txr;
+ bnapi2->tx_int = bnxt_tx_int_xdp;
+ j++;
}
+ txr->bnapi = bnapi2;
+ if (!(bp->flags & BNXT_FLAG_CHIP_P5_PLUS))
+ txr->tx_cpr = &bnapi2->cp_ring;
}
rc = bnxt_alloc_stats(bp);
@@ -5211,7 +5284,7 @@ static int bnxt_hwrm_vnic_set_tpa(struct bnxt *bp, u16 vnic_id, u32 tpa_flags)
nsegs = (MAX_SKB_FRAGS - n) / n;
}
- if (bp->flags & BNXT_FLAG_CHIP_P5) {
+ if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) {
segs = MAX_TPA_SEGS_P5;
max_aggs = bp->max_tpa;
} else {
@@ -5237,35 +5310,25 @@ static u16 bnxt_cp_ring_from_grp(struct bnxt *bp, struct bnxt_ring_struct *ring)
static u16 bnxt_cp_ring_for_rx(struct bnxt *bp, struct bnxt_rx_ring_info *rxr)
{
- if (bp->flags & BNXT_FLAG_CHIP_P5) {
- struct bnxt_napi *bnapi = rxr->bnapi;
- struct bnxt_cp_ring_info *cpr;
-
- cpr = bnapi->cp_ring.cp_ring_arr[BNXT_RX_HDL];
- return cpr->cp_ring_struct.fw_ring_id;
- } else {
+ if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS)
+ return rxr->rx_cpr->cp_ring_struct.fw_ring_id;
+ else
return bnxt_cp_ring_from_grp(bp, &rxr->rx_ring_struct);
- }
}
static u16 bnxt_cp_ring_for_tx(struct bnxt *bp, struct bnxt_tx_ring_info *txr)
{
- if (bp->flags & BNXT_FLAG_CHIP_P5) {
- struct bnxt_napi *bnapi = txr->bnapi;
- struct bnxt_cp_ring_info *cpr;
-
- cpr = bnapi->cp_ring.cp_ring_arr[BNXT_TX_HDL];
- return cpr->cp_ring_struct.fw_ring_id;
- } else {
+ if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS)
+ return txr->tx_cpr->cp_ring_struct.fw_ring_id;
+ else
return bnxt_cp_ring_from_grp(bp, &txr->tx_ring_struct);
- }
}
static int bnxt_alloc_rss_indir_tbl(struct bnxt *bp)
{
int entries;
- if (bp->flags & BNXT_FLAG_CHIP_P5)
+ if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS)
entries = BNXT_MAX_RSS_TABLE_ENTRIES_P5;
else
entries = HW_HASH_INDEX_SIZE;
@@ -5315,7 +5378,7 @@ static u16 bnxt_get_max_rss_ring(struct bnxt *bp)
int bnxt_get_nr_rss_ctxs(struct bnxt *bp, int rx_rings)
{
- if (bp->flags & BNXT_FLAG_CHIP_P5)
+ if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS)
return DIV_ROUND_UP(rx_rings, BNXT_RSS_TABLE_ENTRIES_P5);
if (BNXT_CHIP_TYPE_NITRO_A0(bp))
return 2;
@@ -5361,7 +5424,7 @@ static void
__bnxt_hwrm_vnic_set_rss(struct bnxt *bp, struct hwrm_vnic_rss_cfg_input *req,
struct bnxt_vnic_info *vnic)
{
- if (bp->flags & BNXT_FLAG_CHIP_P5)
+ if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS)
bnxt_fill_hw_rss_tbl_p5(bp, vnic);
else
bnxt_fill_hw_rss_tbl(bp, vnic);
@@ -5386,7 +5449,7 @@ static int bnxt_hwrm_vnic_set_rss(struct bnxt *bp, u16 vnic_id, bool set_rss)
struct hwrm_vnic_rss_cfg_input *req;
int rc;
- if ((bp->flags & BNXT_FLAG_CHIP_P5) ||
+ if ((bp->flags & BNXT_FLAG_CHIP_P5_PLUS) ||
vnic->fw_rss_cos_lb_ctx[0] == INVALID_HW_RING_ID)
return 0;
@@ -5551,7 +5614,7 @@ int bnxt_hwrm_vnic_cfg(struct bnxt *bp, u16 vnic_id)
if (rc)
return rc;
- if (bp->flags & BNXT_FLAG_CHIP_P5) {
+ if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) {
struct bnxt_rx_ring_info *rxr = &bp->rx_ring[0];
req->default_rx_ring_id =
@@ -5651,7 +5714,7 @@ static int bnxt_hwrm_vnic_alloc(struct bnxt *bp, u16 vnic_id,
if (rc)
return rc;
- if (bp->flags & BNXT_FLAG_CHIP_P5)
+ if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS)
goto vnic_no_ring_grps;
/* map ring groups to this vnic */
@@ -5699,7 +5762,7 @@ static int bnxt_hwrm_vnic_qcaps(struct bnxt *bp)
if (!rc) {
u32 flags = le32_to_cpu(resp->flags);
- if (!(bp->flags & BNXT_FLAG_CHIP_P5) &&
+ if (!(bp->flags & BNXT_FLAG_CHIP_P5_PLUS) &&
(flags & VNIC_QCAPS_RESP_FLAGS_RSS_DFLT_CR_CAP))
bp->flags |= BNXT_FLAG_NEW_RSS_CAP;
if (flags &
@@ -5710,14 +5773,14 @@ static int bnxt_hwrm_vnic_qcaps(struct bnxt *bp)
* VLAN_STRIP_CAP properly.
*/
if ((flags & VNIC_QCAPS_RESP_FLAGS_VLAN_STRIP_CAP) ||
- (BNXT_CHIP_P5_THOR(bp) &&
+ (BNXT_CHIP_P5(bp) &&
!(bp->fw_cap & BNXT_FW_CAP_EXT_HW_STATS_SUPPORTED)))
bp->fw_cap |= BNXT_FW_CAP_VLAN_RX_STRIP;
if (flags & VNIC_QCAPS_RESP_FLAGS_RSS_HASH_TYPE_DELTA_CAP)
bp->fw_cap |= BNXT_FW_CAP_RSS_HASH_TYPE_DELTA;
bp->max_tpa_v2 = le16_to_cpu(resp->max_aggs_supported);
if (bp->max_tpa_v2) {
- if (BNXT_CHIP_P5_THOR(bp))
+ if (BNXT_CHIP_P5(bp))
bp->hw_ring_stats_size = BNXT_RING_STATS_SIZE_P5;
else
bp->hw_ring_stats_size = BNXT_RING_STATS_SIZE_P5_SR2;
@@ -5734,7 +5797,7 @@ static int bnxt_hwrm_ring_grp_alloc(struct bnxt *bp)
int rc;
u16 i;
- if (bp->flags & BNXT_FLAG_CHIP_P5)
+ if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS)
return 0;
rc = hwrm_req_init(bp, req, HWRM_RING_GRP_ALLOC);
@@ -5767,7 +5830,7 @@ static void bnxt_hwrm_ring_grp_free(struct bnxt *bp)
struct hwrm_ring_grp_free_input *req;
u16 i;
- if (!bp->grp_info || (bp->flags & BNXT_FLAG_CHIP_P5))
+ if (!bp->grp_info || (bp->flags & BNXT_FLAG_CHIP_P5_PLUS))
return;
if (hwrm_req_init(bp, req, HWRM_RING_GRP_FREE))
@@ -5832,7 +5895,7 @@ static int hwrm_ring_alloc_send_msg(struct bnxt *bp,
case HWRM_RING_ALLOC_RX:
req->ring_type = RING_ALLOC_REQ_RING_TYPE_RX;
req->length = cpu_to_le32(bp->rx_ring_mask + 1);
- if (bp->flags & BNXT_FLAG_CHIP_P5) {
+ if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) {
u16 flags = 0;
/* Association of rx ring with stats context */
@@ -5847,7 +5910,7 @@ static int hwrm_ring_alloc_send_msg(struct bnxt *bp,
}
break;
case HWRM_RING_ALLOC_AGG:
- if (bp->flags & BNXT_FLAG_CHIP_P5) {
+ if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) {
req->ring_type = RING_ALLOC_REQ_RING_TYPE_RX_AGG;
/* Association of agg ring with rx ring */
grp_info = &bp->grp_info[ring->grp_idx];
@@ -5865,7 +5928,7 @@ static int hwrm_ring_alloc_send_msg(struct bnxt *bp,
case HWRM_RING_ALLOC_CMPL:
req->ring_type = RING_ALLOC_REQ_RING_TYPE_L2_CMPL;
req->length = cpu_to_le32(bp->cp_ring_mask + 1);
- if (bp->flags & BNXT_FLAG_CHIP_P5) {
+ if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) {
/* Association of cp ring with nq */
grp_info = &bp->grp_info[map_index];
req->nq_ring_id = cpu_to_le16(grp_info->cp_fw_ring_id);
@@ -5933,10 +5996,30 @@ static int bnxt_hwrm_set_async_event_cr(struct bnxt *bp, int idx)
}
}
+static void bnxt_set_db_mask(struct bnxt *bp, struct bnxt_db_info *db,
+ u32 ring_type)
+{
+ switch (ring_type) {
+ case HWRM_RING_ALLOC_TX:
+ db->db_ring_mask = bp->tx_ring_mask;
+ break;
+ case HWRM_RING_ALLOC_RX:
+ db->db_ring_mask = bp->rx_ring_mask;
+ break;
+ case HWRM_RING_ALLOC_AGG:
+ db->db_ring_mask = bp->rx_agg_ring_mask;
+ break;
+ case HWRM_RING_ALLOC_CMPL:
+ case HWRM_RING_ALLOC_NQ:
+ db->db_ring_mask = bp->cp_ring_mask;
+ break;
+ }
+}
+
static void bnxt_set_db(struct bnxt *bp, struct bnxt_db_info *db, u32 ring_type,
u32 map_idx, u32 xid)
{
- if (bp->flags & BNXT_FLAG_CHIP_P5) {
+ if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) {
if (BNXT_PF(bp))
db->doorbell = bp->bar1 + DB_PF_OFFSET_P5;
else
@@ -5972,6 +6055,7 @@ static void bnxt_set_db(struct bnxt *bp, struct bnxt_db_info *db, u32 ring_type,
break;
}
}
+ bnxt_set_db_mask(bp, db, ring_type);
}
static int bnxt_hwrm_ring_alloc(struct bnxt *bp)
@@ -5980,7 +6064,7 @@ static int bnxt_hwrm_ring_alloc(struct bnxt *bp)
int i, rc = 0;
u32 type;
- if (bp->flags & BNXT_FLAG_CHIP_P5)
+ if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS)
type = HWRM_RING_ALLOC_NQ;
else
type = HWRM_RING_ALLOC_CMPL;
@@ -6016,15 +6100,13 @@ static int bnxt_hwrm_ring_alloc(struct bnxt *bp)
struct bnxt_ring_struct *ring;
u32 map_idx;
- if (bp->flags & BNXT_FLAG_CHIP_P5) {
+ if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) {
+ struct bnxt_cp_ring_info *cpr2 = txr->tx_cpr;
struct bnxt_napi *bnapi = txr->bnapi;
- struct bnxt_cp_ring_info *cpr, *cpr2;
u32 type2 = HWRM_RING_ALLOC_CMPL;
- cpr = &bnapi->cp_ring;
- cpr2 = cpr->cp_ring_arr[BNXT_TX_HDL];
ring = &cpr2->cp_ring_struct;
- ring->handle = BNXT_TX_HDL;
+ ring->handle = BNXT_SET_NQ_HDL(cpr2);
map_idx = bnapi->index;
rc = hwrm_ring_alloc_send_msg(bp, ring, type2, map_idx);
if (rc)
@@ -6056,14 +6138,12 @@ static int bnxt_hwrm_ring_alloc(struct bnxt *bp)
if (!agg_rings)
bnxt_db_write(bp, &rxr->rx_db, rxr->rx_prod);
bp->grp_info[map_idx].rx_fw_ring_id = ring->fw_ring_id;
- if (bp->flags & BNXT_FLAG_CHIP_P5) {
- struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring;
+ if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) {
+ struct bnxt_cp_ring_info *cpr2 = rxr->rx_cpr;
u32 type2 = HWRM_RING_ALLOC_CMPL;
- struct bnxt_cp_ring_info *cpr2;
- cpr2 = cpr->cp_ring_arr[BNXT_RX_HDL];
ring = &cpr2->cp_ring_struct;
- ring->handle = BNXT_RX_HDL;
+ ring->handle = BNXT_SET_NQ_HDL(cpr2);
rc = hwrm_ring_alloc_send_msg(bp, ring, type2, map_idx);
if (rc)
goto err_out;
@@ -6171,7 +6251,7 @@ static void bnxt_hwrm_ring_free(struct bnxt *bp, bool close_path)
}
}
- if (bp->flags & BNXT_FLAG_CHIP_P5)
+ if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS)
type = RING_FREE_REQ_RING_TYPE_RX_AGG;
else
type = RING_FREE_REQ_RING_TYPE_RX;
@@ -6198,7 +6278,7 @@ static void bnxt_hwrm_ring_free(struct bnxt *bp, bool close_path)
*/
bnxt_disable_int_sync(bp);
- if (bp->flags & BNXT_FLAG_CHIP_P5)
+ if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS)
type = RING_FREE_REQ_RING_TYPE_NQ;
else
type = RING_FREE_REQ_RING_TYPE_L2_CMPL;
@@ -6208,18 +6288,16 @@ static void bnxt_hwrm_ring_free(struct bnxt *bp, bool close_path)
struct bnxt_ring_struct *ring;
int j;
- for (j = 0; j < 2; j++) {
- struct bnxt_cp_ring_info *cpr2 = cpr->cp_ring_arr[j];
+ for (j = 0; j < cpr->cp_ring_count && cpr->cp_ring_arr; j++) {
+ struct bnxt_cp_ring_info *cpr2 = &cpr->cp_ring_arr[j];
- if (cpr2) {
- ring = &cpr2->cp_ring_struct;
- if (ring->fw_ring_id == INVALID_HW_RING_ID)
- continue;
- hwrm_ring_free_send_msg(bp, ring,
- RING_FREE_REQ_RING_TYPE_L2_CMPL,
- INVALID_HW_RING_ID);
- ring->fw_ring_id = INVALID_HW_RING_ID;
- }
+ ring = &cpr2->cp_ring_struct;
+ if (ring->fw_ring_id == INVALID_HW_RING_ID)
+ continue;
+ hwrm_ring_free_send_msg(bp, ring,
+ RING_FREE_REQ_RING_TYPE_L2_CMPL,
+ INVALID_HW_RING_ID);
+ ring->fw_ring_id = INVALID_HW_RING_ID;
}
ring = &cpr->cp_ring_struct;
if (ring->fw_ring_id != INVALID_HW_RING_ID) {
@@ -6267,14 +6345,15 @@ static int bnxt_hwrm_get_rings(struct bnxt *bp)
cp = le16_to_cpu(resp->alloc_cmpl_rings);
stats = le16_to_cpu(resp->alloc_stat_ctx);
hw_resc->resv_irqs = cp;
- if (bp->flags & BNXT_FLAG_CHIP_P5) {
+ if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) {
int rx = hw_resc->resv_rx_rings;
int tx = hw_resc->resv_tx_rings;
if (bp->flags & BNXT_FLAG_AGG_RINGS)
rx >>= 1;
if (cp < (rx + tx)) {
- bnxt_trim_rings(bp, &rx, &tx, cp, false);
+ rx = cp / 2;
+ tx = rx;
if (bp->flags & BNXT_FLAG_AGG_RINGS)
rx <<= 1;
hw_resc->resv_rx_rings = rx;
@@ -6331,7 +6410,7 @@ __bnxt_hwrm_reserve_pf_rings(struct bnxt *bp, int tx_rings, int rx_rings,
if (BNXT_NEW_RM(bp)) {
enables |= rx_rings ? FUNC_CFG_REQ_ENABLES_NUM_RX_RINGS : 0;
enables |= stats ? FUNC_CFG_REQ_ENABLES_NUM_STAT_CTXS : 0;
- if (bp->flags & BNXT_FLAG_CHIP_P5) {
+ if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) {
enables |= cp_rings ? FUNC_CFG_REQ_ENABLES_NUM_MSIX : 0;
enables |= tx_rings + ring_grps ?
FUNC_CFG_REQ_ENABLES_NUM_CMPL_RINGS : 0;
@@ -6347,7 +6426,7 @@ __bnxt_hwrm_reserve_pf_rings(struct bnxt *bp, int tx_rings, int rx_rings,
enables |= vnics ? FUNC_CFG_REQ_ENABLES_NUM_VNICS : 0;
req->num_rx_rings = cpu_to_le16(rx_rings);
- if (bp->flags & BNXT_FLAG_CHIP_P5) {
+ if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) {
req->num_cmpl_rings = cpu_to_le16(tx_rings + ring_grps);
req->num_msix = cpu_to_le16(cp_rings);
req->num_rsscos_ctxs =
@@ -6382,7 +6461,7 @@ __bnxt_hwrm_reserve_vf_rings(struct bnxt *bp, int tx_rings, int rx_rings,
enables |= rx_rings ? FUNC_VF_CFG_REQ_ENABLES_NUM_RX_RINGS |
FUNC_VF_CFG_REQ_ENABLES_NUM_RSSCOS_CTXS : 0;
enables |= stats ? FUNC_VF_CFG_REQ_ENABLES_NUM_STAT_CTXS : 0;
- if (bp->flags & BNXT_FLAG_CHIP_P5) {
+ if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) {
enables |= tx_rings + ring_grps ?
FUNC_VF_CFG_REQ_ENABLES_NUM_CMPL_RINGS : 0;
} else {
@@ -6397,7 +6476,7 @@ __bnxt_hwrm_reserve_vf_rings(struct bnxt *bp, int tx_rings, int rx_rings,
req->num_l2_ctxs = cpu_to_le16(BNXT_VF_MAX_L2_CTX);
req->num_tx_rings = cpu_to_le16(tx_rings);
req->num_rx_rings = cpu_to_le16(rx_rings);
- if (bp->flags & BNXT_FLAG_CHIP_P5) {
+ if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) {
req->num_cmpl_rings = cpu_to_le16(tx_rings + ring_grps);
req->num_rsscos_ctxs = cpu_to_le16(DIV_ROUND_UP(ring_grps, 64));
} else {
@@ -6493,7 +6572,7 @@ static int bnxt_cp_rings_in_use(struct bnxt *bp)
{
int cp;
- if (!(bp->flags & BNXT_FLAG_CHIP_P5))
+ if (!(bp->flags & BNXT_FLAG_CHIP_P5_PLUS))
return bnxt_nq_rings_in_use(bp);
cp = bp->tx_nr_rings + bp->rx_nr_rings;
@@ -6550,7 +6629,8 @@ static bool bnxt_need_reserve_rings(struct bnxt *bp)
bnxt_check_rss_tbl_no_rmgr(bp);
return false;
}
- if ((bp->flags & BNXT_FLAG_RFS) && !(bp->flags & BNXT_FLAG_CHIP_P5))
+ if ((bp->flags & BNXT_FLAG_RFS) &&
+ !(bp->flags & BNXT_FLAG_CHIP_P5_PLUS))
vnic = rx + 1;
if (bp->flags & BNXT_FLAG_AGG_RINGS)
rx <<= 1;
@@ -6558,9 +6638,9 @@ static bool bnxt_need_reserve_rings(struct bnxt *bp)
if (hw_resc->resv_rx_rings != rx || hw_resc->resv_cp_rings != cp ||
hw_resc->resv_vnics != vnic || hw_resc->resv_stat_ctxs != stat ||
(hw_resc->resv_hw_ring_grps != grp &&
- !(bp->flags & BNXT_FLAG_CHIP_P5)))
+ !(bp->flags & BNXT_FLAG_CHIP_P5_PLUS)))
return true;
- if ((bp->flags & BNXT_FLAG_CHIP_P5) && BNXT_PF(bp) &&
+ if ((bp->flags & BNXT_FLAG_CHIP_P5_PLUS) && BNXT_PF(bp) &&
hw_resc->resv_irqs != nq)
return true;
return false;
@@ -6575,13 +6655,15 @@ static int __bnxt_reserve_rings(struct bnxt *bp)
int grp, rx_rings, rc;
int vnic = 1, stat;
bool sh = false;
+ int tx_cp;
if (!bnxt_need_reserve_rings(bp))
return 0;
if (bp->flags & BNXT_FLAG_SHARED_RINGS)
sh = true;
- if ((bp->flags & BNXT_FLAG_RFS) && !(bp->flags & BNXT_FLAG_CHIP_P5))
+ if ((bp->flags & BNXT_FLAG_RFS) &&
+ !(bp->flags & BNXT_FLAG_CHIP_P5_PLUS))
vnic = rx + 1;
if (bp->flags & BNXT_FLAG_AGG_RINGS)
rx <<= 1;
@@ -6624,7 +6706,8 @@ static int __bnxt_reserve_rings(struct bnxt *bp)
rc = bnxt_trim_rings(bp, &rx_rings, &tx, cp, sh);
if (bp->flags & BNXT_FLAG_AGG_RINGS)
rx = rx_rings << 1;
- cp = sh ? max_t(int, tx, rx_rings) : tx + rx_rings;
+ tx_cp = bnxt_num_tx_to_cp(bp, tx);
+ cp = sh ? max_t(int, tx_cp, rx_rings) : tx_cp + rx_rings;
bp->tx_nr_rings = tx;
/* If we cannot reserve all the RX rings, reset the RSS map only
@@ -6671,7 +6754,7 @@ static int bnxt_hwrm_check_vf_rings(struct bnxt *bp, int tx_rings, int rx_rings,
FUNC_VF_CFG_REQ_FLAGS_STAT_CTX_ASSETS_TEST |
FUNC_VF_CFG_REQ_FLAGS_VNIC_ASSETS_TEST |
FUNC_VF_CFG_REQ_FLAGS_RSSCOS_CTX_ASSETS_TEST;
- if (!(bp->flags & BNXT_FLAG_CHIP_P5))
+ if (!(bp->flags & BNXT_FLAG_CHIP_P5_PLUS))
flags |= FUNC_VF_CFG_REQ_FLAGS_RING_GRP_ASSETS_TEST;
req->flags = cpu_to_le32(flags);
@@ -6693,7 +6776,7 @@ static int bnxt_hwrm_check_pf_rings(struct bnxt *bp, int tx_rings, int rx_rings,
FUNC_CFG_REQ_FLAGS_CMPL_ASSETS_TEST |
FUNC_CFG_REQ_FLAGS_STAT_CTX_ASSETS_TEST |
FUNC_CFG_REQ_FLAGS_VNIC_ASSETS_TEST;
- if (bp->flags & BNXT_FLAG_CHIP_P5)
+ if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS)
flags |= FUNC_CFG_REQ_FLAGS_RSSCOS_CTX_ASSETS_TEST |
FUNC_CFG_REQ_FLAGS_NQ_ASSETS_TEST;
else
@@ -6887,10 +6970,40 @@ int bnxt_hwrm_set_ring_coal(struct bnxt *bp, struct bnxt_napi *bnapi)
return hwrm_req_send(bp, req_rx);
}
+static int
+bnxt_hwrm_set_rx_coal(struct bnxt *bp, struct bnxt_napi *bnapi,
+ struct hwrm_ring_cmpl_ring_cfg_aggint_params_input *req)
+{
+ u16 ring_id = bnxt_cp_ring_for_rx(bp, bnapi->rx_ring);
+
+ req->ring_id = cpu_to_le16(ring_id);
+ return hwrm_req_send(bp, req);
+}
+
+static int
+bnxt_hwrm_set_tx_coal(struct bnxt *bp, struct bnxt_napi *bnapi,
+ struct hwrm_ring_cmpl_ring_cfg_aggint_params_input *req)
+{
+ struct bnxt_tx_ring_info *txr;
+ int i, rc;
+
+ bnxt_for_each_napi_tx(i, bnapi, txr) {
+ u16 ring_id;
+
+ ring_id = bnxt_cp_ring_for_tx(bp, txr);
+ req->ring_id = cpu_to_le16(ring_id);
+ rc = hwrm_req_send(bp, req);
+ if (rc)
+ return rc;
+ if (!(bp->flags & BNXT_FLAG_CHIP_P5_PLUS))
+ return 0;
+ }
+ return 0;
+}
+
int bnxt_hwrm_set_coal(struct bnxt *bp)
{
- struct hwrm_ring_cmpl_ring_cfg_aggint_params_input *req_rx, *req_tx,
- *req;
+ struct hwrm_ring_cmpl_ring_cfg_aggint_params_input *req_rx, *req_tx;
int i, rc;
rc = hwrm_req_init(bp, req_rx, HWRM_RING_CMPL_RING_CFG_AGGINT_PARAMS);
@@ -6911,29 +7024,19 @@ int bnxt_hwrm_set_coal(struct bnxt *bp)
for (i = 0; i < bp->cp_nr_rings; i++) {
struct bnxt_napi *bnapi = bp->bnapi[i];
struct bnxt_coal *hw_coal;
- u16 ring_id;
- req = req_rx;
- if (!bnapi->rx_ring) {
- ring_id = bnxt_cp_ring_for_tx(bp, bnapi->tx_ring);
- req = req_tx;
- } else {
- ring_id = bnxt_cp_ring_for_rx(bp, bnapi->rx_ring);
- }
- req->ring_id = cpu_to_le16(ring_id);
-
- rc = hwrm_req_send(bp, req);
+ if (!bnapi->rx_ring)
+ rc = bnxt_hwrm_set_tx_coal(bp, bnapi, req_tx);
+ else
+ rc = bnxt_hwrm_set_rx_coal(bp, bnapi, req_rx);
if (rc)
break;
- if (!(bp->flags & BNXT_FLAG_CHIP_P5))
+ if (!(bp->flags & BNXT_FLAG_CHIP_P5_PLUS))
continue;
- if (bnapi->rx_ring && bnapi->tx_ring) {
- req = req_tx;
- ring_id = bnxt_cp_ring_for_tx(bp, bnapi->tx_ring);
- req->ring_id = cpu_to_le16(ring_id);
- rc = hwrm_req_send(bp, req);
+ if (bnapi->rx_ring && bnapi->tx_ring[0]) {
+ rc = bnxt_hwrm_set_tx_coal(bp, bnapi, req_tx);
if (rc)
break;
}
@@ -7087,7 +7190,7 @@ static int bnxt_hwrm_func_qcfg(struct bnxt *bp)
if (bp->db_size)
goto func_qcfg_exit;
- if (bp->flags & BNXT_FLAG_CHIP_P5) {
+ if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) {
if (BNXT_PF(bp))
min_db_offset = DB_PF_OFFSET_P5;
else
@@ -7104,37 +7207,103 @@ static int bnxt_hwrm_func_qcfg(struct bnxt *bp)
return rc;
}
-static void bnxt_init_ctx_initializer(struct bnxt_ctx_mem_info *ctx,
- struct hwrm_func_backing_store_qcaps_output *resp)
+static void bnxt_init_ctx_initializer(struct bnxt_ctx_mem_type *ctxm,
+ u8 init_val, u8 init_offset,
+ bool init_mask_set)
{
- struct bnxt_mem_init *mem_init;
- u16 init_mask;
- u8 init_val;
- u8 *offset;
- int i;
+ ctxm->init_value = init_val;
+ ctxm->init_offset = BNXT_CTX_INIT_INVALID_OFFSET;
+ if (init_mask_set)
+ ctxm->init_offset = init_offset * 4;
+ else
+ ctxm->init_value = 0;
+}
- init_val = resp->ctx_kind_initializer;
- init_mask = le16_to_cpu(resp->ctx_init_mask);
- offset = &resp->qp_init_offset;
- mem_init = &ctx->mem_init[BNXT_CTX_MEM_INIT_QP];
- for (i = 0; i < BNXT_CTX_MEM_INIT_MAX; i++, mem_init++, offset++) {
- mem_init->init_val = init_val;
- mem_init->offset = BNXT_MEM_INVALID_OFFSET;
- if (!init_mask)
+static int bnxt_alloc_all_ctx_pg_info(struct bnxt *bp, int ctx_max)
+{
+ struct bnxt_ctx_mem_info *ctx = bp->ctx;
+ u16 type;
+
+ for (type = 0; type < ctx_max; type++) {
+ struct bnxt_ctx_mem_type *ctxm = &ctx->ctx_arr[type];
+ int n = 1;
+
+ if (!ctxm->max_entries)
continue;
- if (i == BNXT_CTX_MEM_INIT_STAT)
- offset = &resp->stat_init_offset;
- if (init_mask & (1 << i))
- mem_init->offset = *offset * 4;
- else
- mem_init->init_val = 0;
+
+ if (ctxm->instance_bmap)
+ n = hweight32(ctxm->instance_bmap);
+ ctxm->pg_info = kcalloc(n, sizeof(*ctxm->pg_info), GFP_KERNEL);
+ if (!ctxm->pg_info)
+ return -ENOMEM;
}
- ctx->mem_init[BNXT_CTX_MEM_INIT_QP].size = ctx->qp_entry_size;
- ctx->mem_init[BNXT_CTX_MEM_INIT_SRQ].size = ctx->srq_entry_size;
- ctx->mem_init[BNXT_CTX_MEM_INIT_CQ].size = ctx->cq_entry_size;
- ctx->mem_init[BNXT_CTX_MEM_INIT_VNIC].size = ctx->vnic_entry_size;
- ctx->mem_init[BNXT_CTX_MEM_INIT_STAT].size = ctx->stat_entry_size;
- ctx->mem_init[BNXT_CTX_MEM_INIT_MRAV].size = ctx->mrav_entry_size;
+ return 0;
+}
+
+#define BNXT_CTX_INIT_VALID(flags) \
+ (!!((flags) & \
+ FUNC_BACKING_STORE_QCAPS_V2_RESP_FLAGS_ENABLE_CTX_KIND_INIT))
+
+static int bnxt_hwrm_func_backing_store_qcaps_v2(struct bnxt *bp)
+{
+ struct hwrm_func_backing_store_qcaps_v2_output *resp;
+ struct hwrm_func_backing_store_qcaps_v2_input *req;
+ u16 last_valid_type = BNXT_CTX_INV;
+ struct bnxt_ctx_mem_info *ctx;
+ u16 type;
+ int rc;
+
+ rc = hwrm_req_init(bp, req, HWRM_FUNC_BACKING_STORE_QCAPS_V2);
+ if (rc)
+ return rc;
+
+ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+ if (!ctx)
+ return -ENOMEM;
+ bp->ctx = ctx;
+
+ resp = hwrm_req_hold(bp, req);
+
+ for (type = 0; type < BNXT_CTX_V2_MAX; ) {
+ struct bnxt_ctx_mem_type *ctxm = &ctx->ctx_arr[type];
+ u8 init_val, init_off, i;
+ __le32 *p;
+ u32 flags;
+
+ req->type = cpu_to_le16(type);
+ rc = hwrm_req_send(bp, req);
+ if (rc)
+ goto ctx_done;
+ flags = le32_to_cpu(resp->flags);
+ type = le16_to_cpu(resp->next_valid_type);
+ if (!(flags & FUNC_BACKING_STORE_QCAPS_V2_RESP_FLAGS_TYPE_VALID))
+ continue;
+
+ ctxm->type = le16_to_cpu(resp->type);
+ last_valid_type = ctxm->type;
+ ctxm->entry_size = le16_to_cpu(resp->entry_size);
+ ctxm->flags = flags;
+ ctxm->instance_bmap = le32_to_cpu(resp->instance_bit_map);
+ ctxm->entry_multiple = resp->entry_multiple;
+ ctxm->max_entries = le32_to_cpu(resp->max_num_entries);
+ ctxm->min_entries = le32_to_cpu(resp->min_num_entries);
+ init_val = resp->ctx_init_value;
+ init_off = resp->ctx_init_offset;
+ bnxt_init_ctx_initializer(ctxm, init_val, init_off,
+ BNXT_CTX_INIT_VALID(flags));
+ ctxm->split_entry_cnt = min_t(u8, resp->subtype_valid_cnt,
+ BNXT_MAX_SPLIT_ENTRY);
+ for (i = 0, p = &resp->split_entry_0; i < ctxm->split_entry_cnt;
+ i++, p++)
+ ctxm->split[i] = le32_to_cpu(*p);
+ }
+ if (last_valid_type < BNXT_CTX_V2_MAX)
+ ctx->ctx_arr[last_valid_type].last = true;
+ rc = bnxt_alloc_all_ctx_pg_info(bp, BNXT_CTX_V2_MAX);
+
+ctx_done:
+ hwrm_req_drop(bp, req);
+ return rc;
}
static int bnxt_hwrm_func_backing_store_qcaps(struct bnxt *bp)
@@ -7146,6 +7315,9 @@ static int bnxt_hwrm_func_backing_store_qcaps(struct bnxt *bp)
if (bp->hwrm_spec_code < 0x10902 || BNXT_VF(bp) || bp->ctx)
return 0;
+ if (bp->fw_cap & BNXT_FW_CAP_BACKING_STORE_V2)
+ return bnxt_hwrm_func_backing_store_qcaps_v2(bp);
+
rc = hwrm_req_init(bp, req, HWRM_FUNC_BACKING_STORE_QCAPS);
if (rc)
return rc;
@@ -7153,48 +7325,83 @@ static int bnxt_hwrm_func_backing_store_qcaps(struct bnxt *bp)
resp = hwrm_req_hold(bp, req);
rc = hwrm_req_send_silent(bp, req);
if (!rc) {
- struct bnxt_ctx_pg_info *ctx_pg;
+ struct bnxt_ctx_mem_type *ctxm;
struct bnxt_ctx_mem_info *ctx;
- int i, tqm_rings;
+ u8 init_val, init_idx = 0;
+ u16 init_mask;
- ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+ ctx = bp->ctx;
if (!ctx) {
- rc = -ENOMEM;
- goto ctx_err;
+ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+ if (!ctx) {
+ rc = -ENOMEM;
+ goto ctx_err;
+ }
+ bp->ctx = ctx;
}
- ctx->qp_max_entries = le32_to_cpu(resp->qp_max_entries);
- ctx->qp_min_qp1_entries = le16_to_cpu(resp->qp_min_qp1_entries);
- ctx->qp_max_l2_entries = le16_to_cpu(resp->qp_max_l2_entries);
- ctx->qp_entry_size = le16_to_cpu(resp->qp_entry_size);
- ctx->srq_max_l2_entries = le16_to_cpu(resp->srq_max_l2_entries);
- ctx->srq_max_entries = le32_to_cpu(resp->srq_max_entries);
- ctx->srq_entry_size = le16_to_cpu(resp->srq_entry_size);
- ctx->cq_max_l2_entries = le16_to_cpu(resp->cq_max_l2_entries);
- ctx->cq_max_entries = le32_to_cpu(resp->cq_max_entries);
- ctx->cq_entry_size = le16_to_cpu(resp->cq_entry_size);
- ctx->vnic_max_vnic_entries =
- le16_to_cpu(resp->vnic_max_vnic_entries);
- ctx->vnic_max_ring_table_entries =
- le16_to_cpu(resp->vnic_max_ring_table_entries);
- ctx->vnic_entry_size = le16_to_cpu(resp->vnic_entry_size);
- ctx->stat_max_entries = le32_to_cpu(resp->stat_max_entries);
- ctx->stat_entry_size = le16_to_cpu(resp->stat_entry_size);
- ctx->tqm_entry_size = le16_to_cpu(resp->tqm_entry_size);
- ctx->tqm_min_entries_per_ring =
- le32_to_cpu(resp->tqm_min_entries_per_ring);
- ctx->tqm_max_entries_per_ring =
- le32_to_cpu(resp->tqm_max_entries_per_ring);
- ctx->tqm_entries_multiple = resp->tqm_entries_multiple;
- if (!ctx->tqm_entries_multiple)
- ctx->tqm_entries_multiple = 1;
- ctx->mrav_max_entries = le32_to_cpu(resp->mrav_max_entries);
- ctx->mrav_entry_size = le16_to_cpu(resp->mrav_entry_size);
- ctx->mrav_num_entries_units =
- le16_to_cpu(resp->mrav_num_entries_units);
- ctx->tim_entry_size = le16_to_cpu(resp->tim_entry_size);
- ctx->tim_max_entries = le32_to_cpu(resp->tim_max_entries);
+ init_val = resp->ctx_kind_initializer;
+ init_mask = le16_to_cpu(resp->ctx_init_mask);
- bnxt_init_ctx_initializer(ctx, resp);
+ ctxm = &ctx->ctx_arr[BNXT_CTX_QP];
+ ctxm->max_entries = le32_to_cpu(resp->qp_max_entries);
+ ctxm->qp_qp1_entries = le16_to_cpu(resp->qp_min_qp1_entries);
+ ctxm->qp_l2_entries = le16_to_cpu(resp->qp_max_l2_entries);
+ ctxm->entry_size = le16_to_cpu(resp->qp_entry_size);
+ bnxt_init_ctx_initializer(ctxm, init_val, resp->qp_init_offset,
+ (init_mask & (1 << init_idx++)) != 0);
+
+ ctxm = &ctx->ctx_arr[BNXT_CTX_SRQ];
+ ctxm->srq_l2_entries = le16_to_cpu(resp->srq_max_l2_entries);
+ ctxm->max_entries = le32_to_cpu(resp->srq_max_entries);
+ ctxm->entry_size = le16_to_cpu(resp->srq_entry_size);
+ bnxt_init_ctx_initializer(ctxm, init_val, resp->srq_init_offset,
+ (init_mask & (1 << init_idx++)) != 0);
+
+ ctxm = &ctx->ctx_arr[BNXT_CTX_CQ];
+ ctxm->cq_l2_entries = le16_to_cpu(resp->cq_max_l2_entries);
+ ctxm->max_entries = le32_to_cpu(resp->cq_max_entries);
+ ctxm->entry_size = le16_to_cpu(resp->cq_entry_size);
+ bnxt_init_ctx_initializer(ctxm, init_val, resp->cq_init_offset,
+ (init_mask & (1 << init_idx++)) != 0);
+
+ ctxm = &ctx->ctx_arr[BNXT_CTX_VNIC];
+ ctxm->vnic_entries = le16_to_cpu(resp->vnic_max_vnic_entries);
+ ctxm->max_entries = ctxm->vnic_entries +
+ le16_to_cpu(resp->vnic_max_ring_table_entries);
+ ctxm->entry_size = le16_to_cpu(resp->vnic_entry_size);
+ bnxt_init_ctx_initializer(ctxm, init_val,
+ resp->vnic_init_offset,
+ (init_mask & (1 << init_idx++)) != 0);
+
+ ctxm = &ctx->ctx_arr[BNXT_CTX_STAT];
+ ctxm->max_entries = le32_to_cpu(resp->stat_max_entries);
+ ctxm->entry_size = le16_to_cpu(resp->stat_entry_size);
+ bnxt_init_ctx_initializer(ctxm, init_val,
+ resp->stat_init_offset,
+ (init_mask & (1 << init_idx++)) != 0);
+
+ ctxm = &ctx->ctx_arr[BNXT_CTX_STQM];
+ ctxm->entry_size = le16_to_cpu(resp->tqm_entry_size);
+ ctxm->min_entries = le32_to_cpu(resp->tqm_min_entries_per_ring);
+ ctxm->max_entries = le32_to_cpu(resp->tqm_max_entries_per_ring);
+ ctxm->entry_multiple = resp->tqm_entries_multiple;
+ if (!ctxm->entry_multiple)
+ ctxm->entry_multiple = 1;
+
+ memcpy(&ctx->ctx_arr[BNXT_CTX_FTQM], ctxm, sizeof(*ctxm));
+
+ ctxm = &ctx->ctx_arr[BNXT_CTX_MRAV];
+ ctxm->max_entries = le32_to_cpu(resp->mrav_max_entries);
+ ctxm->entry_size = le16_to_cpu(resp->mrav_entry_size);
+ ctxm->mrav_num_entries_units =
+ le16_to_cpu(resp->mrav_num_entries_units);
+ bnxt_init_ctx_initializer(ctxm, init_val,
+ resp->mrav_init_offset,
+ (init_mask & (1 << init_idx++)) != 0);
+
+ ctxm = &ctx->ctx_arr[BNXT_CTX_TIM];
+ ctxm->entry_size = le16_to_cpu(resp->tim_entry_size);
+ ctxm->max_entries = le32_to_cpu(resp->tim_max_entries);
ctx->tqm_fp_rings_count = resp->tqm_fp_rings_count;
if (!ctx->tqm_fp_rings_count)
@@ -7202,16 +7409,11 @@ static int bnxt_hwrm_func_backing_store_qcaps(struct bnxt *bp)
else if (ctx->tqm_fp_rings_count > BNXT_MAX_TQM_FP_RINGS)
ctx->tqm_fp_rings_count = BNXT_MAX_TQM_FP_RINGS;
- tqm_rings = ctx->tqm_fp_rings_count + BNXT_MAX_TQM_SP_RINGS;
- ctx_pg = kcalloc(tqm_rings, sizeof(*ctx_pg), GFP_KERNEL);
- if (!ctx_pg) {
- kfree(ctx);
- rc = -ENOMEM;
- goto ctx_err;
- }
- for (i = 0; i < tqm_rings; i++, ctx_pg++)
- ctx->tqm_mem[i] = ctx_pg;
- bp->ctx = ctx;
+ ctxm = &ctx->ctx_arr[BNXT_CTX_FTQM];
+ memcpy(ctxm, &ctx->ctx_arr[BNXT_CTX_STQM], sizeof(*ctxm));
+ ctxm->instance_bmap = (1 << ctx->tqm_fp_rings_count) - 1;
+
+ rc = bnxt_alloc_all_ctx_pg_info(bp, BNXT_CTX_MAX);
} else {
rc = 0;
}
@@ -7250,6 +7452,7 @@ static int bnxt_hwrm_func_backing_store_cfg(struct bnxt *bp, u32 enables)
struct hwrm_func_backing_store_cfg_input *req;
struct bnxt_ctx_mem_info *ctx = bp->ctx;
struct bnxt_ctx_pg_info *ctx_pg;
+ struct bnxt_ctx_mem_type *ctxm;
void **__req = (void **)&req;
u32 req_len = sizeof(*req);
__le32 *num_entries;
@@ -7271,82 +7474,99 @@ static int bnxt_hwrm_func_backing_store_cfg(struct bnxt *bp, u32 enables)
req->enables = cpu_to_le32(enables);
if (enables & FUNC_BACKING_STORE_CFG_REQ_ENABLES_QP) {
- ctx_pg = &ctx->qp_mem;
+ ctxm = &ctx->ctx_arr[BNXT_CTX_QP];
+ ctx_pg = ctxm->pg_info;
req->qp_num_entries = cpu_to_le32(ctx_pg->entries);
- req->qp_num_qp1_entries = cpu_to_le16(ctx->qp_min_qp1_entries);
- req->qp_num_l2_entries = cpu_to_le16(ctx->qp_max_l2_entries);
- req->qp_entry_size = cpu_to_le16(ctx->qp_entry_size);
+ req->qp_num_qp1_entries = cpu_to_le16(ctxm->qp_qp1_entries);
+ req->qp_num_l2_entries = cpu_to_le16(ctxm->qp_l2_entries);
+ req->qp_entry_size = cpu_to_le16(ctxm->entry_size);
bnxt_hwrm_set_pg_attr(&ctx_pg->ring_mem,
&req->qpc_pg_size_qpc_lvl,
&req->qpc_page_dir);
}
if (enables & FUNC_BACKING_STORE_CFG_REQ_ENABLES_SRQ) {
- ctx_pg = &ctx->srq_mem;
+ ctxm = &ctx->ctx_arr[BNXT_CTX_SRQ];
+ ctx_pg = ctxm->pg_info;
req->srq_num_entries = cpu_to_le32(ctx_pg->entries);
- req->srq_num_l2_entries = cpu_to_le16(ctx->srq_max_l2_entries);
- req->srq_entry_size = cpu_to_le16(ctx->srq_entry_size);
+ req->srq_num_l2_entries = cpu_to_le16(ctxm->srq_l2_entries);
+ req->srq_entry_size = cpu_to_le16(ctxm->entry_size);
bnxt_hwrm_set_pg_attr(&ctx_pg->ring_mem,
&req->srq_pg_size_srq_lvl,
&req->srq_page_dir);
}
if (enables & FUNC_BACKING_STORE_CFG_REQ_ENABLES_CQ) {
- ctx_pg = &ctx->cq_mem;
+ ctxm = &ctx->ctx_arr[BNXT_CTX_CQ];
+ ctx_pg = ctxm->pg_info;
req->cq_num_entries = cpu_to_le32(ctx_pg->entries);
- req->cq_num_l2_entries = cpu_to_le16(ctx->cq_max_l2_entries);
- req->cq_entry_size = cpu_to_le16(ctx->cq_entry_size);
+ req->cq_num_l2_entries = cpu_to_le16(ctxm->cq_l2_entries);
+ req->cq_entry_size = cpu_to_le16(ctxm->entry_size);
bnxt_hwrm_set_pg_attr(&ctx_pg->ring_mem,
&req->cq_pg_size_cq_lvl,
&req->cq_page_dir);
}
if (enables & FUNC_BACKING_STORE_CFG_REQ_ENABLES_VNIC) {
- ctx_pg = &ctx->vnic_mem;
- req->vnic_num_vnic_entries =
- cpu_to_le16(ctx->vnic_max_vnic_entries);
+ ctxm = &ctx->ctx_arr[BNXT_CTX_VNIC];
+ ctx_pg = ctxm->pg_info;
+ req->vnic_num_vnic_entries = cpu_to_le16(ctxm->vnic_entries);
req->vnic_num_ring_table_entries =
- cpu_to_le16(ctx->vnic_max_ring_table_entries);
- req->vnic_entry_size = cpu_to_le16(ctx->vnic_entry_size);
+ cpu_to_le16(ctxm->max_entries - ctxm->vnic_entries);
+ req->vnic_entry_size = cpu_to_le16(ctxm->entry_size);
bnxt_hwrm_set_pg_attr(&ctx_pg->ring_mem,
&req->vnic_pg_size_vnic_lvl,
&req->vnic_page_dir);
}
if (enables & FUNC_BACKING_STORE_CFG_REQ_ENABLES_STAT) {
- ctx_pg = &ctx->stat_mem;
- req->stat_num_entries = cpu_to_le32(ctx->stat_max_entries);
- req->stat_entry_size = cpu_to_le16(ctx->stat_entry_size);
+ ctxm = &ctx->ctx_arr[BNXT_CTX_STAT];
+ ctx_pg = ctxm->pg_info;
+ req->stat_num_entries = cpu_to_le32(ctxm->max_entries);
+ req->stat_entry_size = cpu_to_le16(ctxm->entry_size);
bnxt_hwrm_set_pg_attr(&ctx_pg->ring_mem,
&req->stat_pg_size_stat_lvl,
&req->stat_page_dir);
}
if (enables & FUNC_BACKING_STORE_CFG_REQ_ENABLES_MRAV) {
- ctx_pg = &ctx->mrav_mem;
+ u32 units;
+
+ ctxm = &ctx->ctx_arr[BNXT_CTX_MRAV];
+ ctx_pg = ctxm->pg_info;
req->mrav_num_entries = cpu_to_le32(ctx_pg->entries);
- if (ctx->mrav_num_entries_units)
- flags |=
- FUNC_BACKING_STORE_CFG_REQ_FLAGS_MRAV_RESERVATION_SPLIT;
- req->mrav_entry_size = cpu_to_le16(ctx->mrav_entry_size);
+ units = ctxm->mrav_num_entries_units;
+ if (units) {
+ u32 num_mr, num_ah = ctxm->mrav_av_entries;
+ u32 entries;
+
+ num_mr = ctx_pg->entries - num_ah;
+ entries = ((num_mr / units) << 16) | (num_ah / units);
+ req->mrav_num_entries = cpu_to_le32(entries);
+ flags |= FUNC_BACKING_STORE_CFG_REQ_FLAGS_MRAV_RESERVATION_SPLIT;
+ }
+ req->mrav_entry_size = cpu_to_le16(ctxm->entry_size);
bnxt_hwrm_set_pg_attr(&ctx_pg->ring_mem,
&req->mrav_pg_size_mrav_lvl,
&req->mrav_page_dir);
}
if (enables & FUNC_BACKING_STORE_CFG_REQ_ENABLES_TIM) {
- ctx_pg = &ctx->tim_mem;
+ ctxm = &ctx->ctx_arr[BNXT_CTX_TIM];
+ ctx_pg = ctxm->pg_info;
req->tim_num_entries = cpu_to_le32(ctx_pg->entries);
- req->tim_entry_size = cpu_to_le16(ctx->tim_entry_size);
+ req->tim_entry_size = cpu_to_le16(ctxm->entry_size);
bnxt_hwrm_set_pg_attr(&ctx_pg->ring_mem,
&req->tim_pg_size_tim_lvl,
&req->tim_page_dir);
}
+ ctxm = &ctx->ctx_arr[BNXT_CTX_STQM];
for (i = 0, num_entries = &req->tqm_sp_num_entries,
pg_attr = &req->tqm_sp_pg_size_tqm_sp_lvl,
pg_dir = &req->tqm_sp_page_dir,
- ena = FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_SP;
+ ena = FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_SP,
+ ctx_pg = ctxm->pg_info;
i < BNXT_MAX_TQM_RINGS;
+ ctx_pg = &ctx->ctx_arr[BNXT_CTX_FTQM].pg_info[i],
i++, num_entries++, pg_attr++, pg_dir++, ena <<= 1) {
if (!(enables & ena))
continue;
- req->tqm_entry_size = cpu_to_le16(ctx->tqm_entry_size);
- ctx_pg = ctx->tqm_mem[i];
+ req->tqm_entry_size = cpu_to_le16(ctxm->entry_size);
*num_entries = cpu_to_le32(ctx_pg->entries);
bnxt_hwrm_set_pg_attr(&ctx_pg->ring_mem, pg_attr, pg_dir);
}
@@ -7370,7 +7590,7 @@ static int bnxt_alloc_ctx_mem_blk(struct bnxt *bp,
static int bnxt_alloc_ctx_pg_tbls(struct bnxt *bp,
struct bnxt_ctx_pg_info *ctx_pg, u32 mem_size,
- u8 depth, struct bnxt_mem_init *mem_init)
+ u8 depth, struct bnxt_ctx_mem_type *ctxm)
{
struct bnxt_ring_mem_info *rmem = &ctx_pg->ring_mem;
int rc;
@@ -7408,7 +7628,7 @@ static int bnxt_alloc_ctx_pg_tbls(struct bnxt *bp,
rmem->pg_tbl_map = ctx_pg->ctx_dma_arr[i];
rmem->depth = 1;
rmem->nr_pages = MAX_CTX_PAGES;
- rmem->mem_init = mem_init;
+ rmem->ctx_mem = ctxm;
if (i == (nr_tbls - 1)) {
int rem = ctx_pg->nr_pages % MAX_CTX_PAGES;
@@ -7423,7 +7643,7 @@ static int bnxt_alloc_ctx_pg_tbls(struct bnxt *bp,
rmem->nr_pages = DIV_ROUND_UP(mem_size, BNXT_PAGE_SIZE);
if (rmem->nr_pages > 1 || depth)
rmem->depth = 1;
- rmem->mem_init = mem_init;
+ rmem->ctx_mem = ctxm;
rc = bnxt_alloc_ctx_mem_blk(bp, ctx_pg);
}
return rc;
@@ -7458,38 +7678,131 @@ static void bnxt_free_ctx_pg_tbls(struct bnxt *bp,
ctx_pg->nr_pages = 0;
}
+static int bnxt_setup_ctxm_pg_tbls(struct bnxt *bp,
+ struct bnxt_ctx_mem_type *ctxm, u32 entries,
+ u8 pg_lvl)
+{
+ struct bnxt_ctx_pg_info *ctx_pg = ctxm->pg_info;
+ int i, rc = 0, n = 1;
+ u32 mem_size;
+
+ if (!ctxm->entry_size || !ctx_pg)
+ return -EINVAL;
+ if (ctxm->instance_bmap)
+ n = hweight32(ctxm->instance_bmap);
+ if (ctxm->entry_multiple)
+ entries = roundup(entries, ctxm->entry_multiple);
+ entries = clamp_t(u32, entries, ctxm->min_entries, ctxm->max_entries);
+ mem_size = entries * ctxm->entry_size;
+ for (i = 0; i < n && !rc; i++) {
+ ctx_pg[i].entries = entries;
+ rc = bnxt_alloc_ctx_pg_tbls(bp, &ctx_pg[i], mem_size, pg_lvl,
+ ctxm->init_value ? ctxm : NULL);
+ }
+ return rc;
+}
+
+static int bnxt_hwrm_func_backing_store_cfg_v2(struct bnxt *bp,
+ struct bnxt_ctx_mem_type *ctxm,
+ bool last)
+{
+ struct hwrm_func_backing_store_cfg_v2_input *req;
+ u32 instance_bmap = ctxm->instance_bmap;
+ int i, j, rc = 0, n = 1;
+ __le32 *p;
+
+ if (!(ctxm->flags & BNXT_CTX_MEM_TYPE_VALID) || !ctxm->pg_info)
+ return 0;
+
+ if (instance_bmap)
+ n = hweight32(ctxm->instance_bmap);
+ else
+ instance_bmap = 1;
+
+ rc = hwrm_req_init(bp, req, HWRM_FUNC_BACKING_STORE_CFG_V2);
+ if (rc)
+ return rc;
+ hwrm_req_hold(bp, req);
+ req->type = cpu_to_le16(ctxm->type);
+ req->entry_size = cpu_to_le16(ctxm->entry_size);
+ req->subtype_valid_cnt = ctxm->split_entry_cnt;
+ for (i = 0, p = &req->split_entry_0; i < ctxm->split_entry_cnt; i++)
+ p[i] = cpu_to_le32(ctxm->split[i]);
+ for (i = 0, j = 0; j < n && !rc; i++) {
+ struct bnxt_ctx_pg_info *ctx_pg;
+
+ if (!(instance_bmap & (1 << i)))
+ continue;
+ req->instance = cpu_to_le16(i);
+ ctx_pg = &ctxm->pg_info[j++];
+ if (!ctx_pg->entries)
+ continue;
+ req->num_entries = cpu_to_le32(ctx_pg->entries);
+ bnxt_hwrm_set_pg_attr(&ctx_pg->ring_mem,
+ &req->page_size_pbl_level,
+ &req->page_dir);
+ if (last && j == n)
+ req->flags =
+ cpu_to_le32(FUNC_BACKING_STORE_CFG_V2_REQ_FLAGS_BS_CFG_ALL_DONE);
+ rc = hwrm_req_send(bp, req);
+ }
+ hwrm_req_drop(bp, req);
+ return rc;
+}
+
+static int bnxt_backing_store_cfg_v2(struct bnxt *bp)
+{
+ struct bnxt_ctx_mem_info *ctx = bp->ctx;
+ struct bnxt_ctx_mem_type *ctxm;
+ int rc = 0;
+ u16 type;
+
+ for (type = 0 ; type < BNXT_CTX_V2_MAX; type++) {
+ ctxm = &ctx->ctx_arr[type];
+
+ rc = bnxt_hwrm_func_backing_store_cfg_v2(bp, ctxm, ctxm->last);
+ if (rc)
+ return rc;
+ }
+ return 0;
+}
+
void bnxt_free_ctx_mem(struct bnxt *bp)
{
struct bnxt_ctx_mem_info *ctx = bp->ctx;
- int i;
+ u16 type;
if (!ctx)
return;
- if (ctx->tqm_mem[0]) {
- for (i = 0; i < ctx->tqm_fp_rings_count + 1; i++)
- bnxt_free_ctx_pg_tbls(bp, ctx->tqm_mem[i]);
- kfree(ctx->tqm_mem[0]);
- ctx->tqm_mem[0] = NULL;
+ for (type = 0; type < BNXT_CTX_V2_MAX; type++) {
+ struct bnxt_ctx_mem_type *ctxm = &ctx->ctx_arr[type];
+ struct bnxt_ctx_pg_info *ctx_pg = ctxm->pg_info;
+ int i, n = 1;
+
+ if (!ctx_pg)
+ continue;
+ if (ctxm->instance_bmap)
+ n = hweight32(ctxm->instance_bmap);
+ for (i = 0; i < n; i++)
+ bnxt_free_ctx_pg_tbls(bp, &ctx_pg[i]);
+
+ kfree(ctx_pg);
+ ctxm->pg_info = NULL;
}
- bnxt_free_ctx_pg_tbls(bp, &ctx->tim_mem);
- bnxt_free_ctx_pg_tbls(bp, &ctx->mrav_mem);
- bnxt_free_ctx_pg_tbls(bp, &ctx->stat_mem);
- bnxt_free_ctx_pg_tbls(bp, &ctx->vnic_mem);
- bnxt_free_ctx_pg_tbls(bp, &ctx->cq_mem);
- bnxt_free_ctx_pg_tbls(bp, &ctx->srq_mem);
- bnxt_free_ctx_pg_tbls(bp, &ctx->qp_mem);
ctx->flags &= ~BNXT_CTX_FLAG_INITED;
+ kfree(ctx);
+ bp->ctx = NULL;
}
static int bnxt_alloc_ctx_mem(struct bnxt *bp)
{
- struct bnxt_ctx_pg_info *ctx_pg;
+ struct bnxt_ctx_mem_type *ctxm;
struct bnxt_ctx_mem_info *ctx;
- struct bnxt_mem_init *init;
- u32 mem_size, ena, entries;
- u32 entries_sp, min;
+ u32 l2_qps, qp1_qps, max_qps;
+ u32 ena, entries_sp, entries;
+ u32 srqs, max_srqs, min;
u32 num_mr, num_ah;
u32 extra_srqs = 0;
u32 extra_qps = 0;
@@ -7506,120 +7819,93 @@ static int bnxt_alloc_ctx_mem(struct bnxt *bp)
if (!ctx || (ctx->flags & BNXT_CTX_FLAG_INITED))
return 0;
+ ctxm = &ctx->ctx_arr[BNXT_CTX_QP];
+ l2_qps = ctxm->qp_l2_entries;
+ qp1_qps = ctxm->qp_qp1_entries;
+ max_qps = ctxm->max_entries;
+ ctxm = &ctx->ctx_arr[BNXT_CTX_SRQ];
+ srqs = ctxm->srq_l2_entries;
+ max_srqs = ctxm->max_entries;
if ((bp->flags & BNXT_FLAG_ROCE_CAP) && !is_kdump_kernel()) {
pg_lvl = 2;
- extra_qps = 65536;
- extra_srqs = 8192;
+ extra_qps = min_t(u32, 65536, max_qps - l2_qps - qp1_qps);
+ extra_srqs = min_t(u32, 8192, max_srqs - srqs);
}
- ctx_pg = &ctx->qp_mem;
- ctx_pg->entries = ctx->qp_min_qp1_entries + ctx->qp_max_l2_entries +
- extra_qps;
- if (ctx->qp_entry_size) {
- mem_size = ctx->qp_entry_size * ctx_pg->entries;
- init = &ctx->mem_init[BNXT_CTX_MEM_INIT_QP];
- rc = bnxt_alloc_ctx_pg_tbls(bp, ctx_pg, mem_size, pg_lvl, init);
- if (rc)
- return rc;
- }
+ ctxm = &ctx->ctx_arr[BNXT_CTX_QP];
+ rc = bnxt_setup_ctxm_pg_tbls(bp, ctxm, l2_qps + qp1_qps + extra_qps,
+ pg_lvl);
+ if (rc)
+ return rc;
- ctx_pg = &ctx->srq_mem;
- ctx_pg->entries = ctx->srq_max_l2_entries + extra_srqs;
- if (ctx->srq_entry_size) {
- mem_size = ctx->srq_entry_size * ctx_pg->entries;
- init = &ctx->mem_init[BNXT_CTX_MEM_INIT_SRQ];
- rc = bnxt_alloc_ctx_pg_tbls(bp, ctx_pg, mem_size, pg_lvl, init);
- if (rc)
- return rc;
- }
+ ctxm = &ctx->ctx_arr[BNXT_CTX_SRQ];
+ rc = bnxt_setup_ctxm_pg_tbls(bp, ctxm, srqs + extra_srqs, pg_lvl);
+ if (rc)
+ return rc;
- ctx_pg = &ctx->cq_mem;
- ctx_pg->entries = ctx->cq_max_l2_entries + extra_qps * 2;
- if (ctx->cq_entry_size) {
- mem_size = ctx->cq_entry_size * ctx_pg->entries;
- init = &ctx->mem_init[BNXT_CTX_MEM_INIT_CQ];
- rc = bnxt_alloc_ctx_pg_tbls(bp, ctx_pg, mem_size, pg_lvl, init);
- if (rc)
- return rc;
- }
+ ctxm = &ctx->ctx_arr[BNXT_CTX_CQ];
+ rc = bnxt_setup_ctxm_pg_tbls(bp, ctxm, ctxm->cq_l2_entries +
+ extra_qps * 2, pg_lvl);
+ if (rc)
+ return rc;
- ctx_pg = &ctx->vnic_mem;
- ctx_pg->entries = ctx->vnic_max_vnic_entries +
- ctx->vnic_max_ring_table_entries;
- if (ctx->vnic_entry_size) {
- mem_size = ctx->vnic_entry_size * ctx_pg->entries;
- init = &ctx->mem_init[BNXT_CTX_MEM_INIT_VNIC];
- rc = bnxt_alloc_ctx_pg_tbls(bp, ctx_pg, mem_size, 1, init);
- if (rc)
- return rc;
- }
+ ctxm = &ctx->ctx_arr[BNXT_CTX_VNIC];
+ rc = bnxt_setup_ctxm_pg_tbls(bp, ctxm, ctxm->max_entries, 1);
+ if (rc)
+ return rc;
- ctx_pg = &ctx->stat_mem;
- ctx_pg->entries = ctx->stat_max_entries;
- if (ctx->stat_entry_size) {
- mem_size = ctx->stat_entry_size * ctx_pg->entries;
- init = &ctx->mem_init[BNXT_CTX_MEM_INIT_STAT];
- rc = bnxt_alloc_ctx_pg_tbls(bp, ctx_pg, mem_size, 1, init);
- if (rc)
- return rc;
- }
+ ctxm = &ctx->ctx_arr[BNXT_CTX_STAT];
+ rc = bnxt_setup_ctxm_pg_tbls(bp, ctxm, ctxm->max_entries, 1);
+ if (rc)
+ return rc;
ena = 0;
if (!(bp->flags & BNXT_FLAG_ROCE_CAP))
goto skip_rdma;
- ctx_pg = &ctx->mrav_mem;
+ ctxm = &ctx->ctx_arr[BNXT_CTX_MRAV];
/* 128K extra is needed to accommodate static AH context
* allocation by f/w.
*/
- num_mr = 1024 * 256;
- num_ah = 1024 * 128;
- ctx_pg->entries = num_mr + num_ah;
- if (ctx->mrav_entry_size) {
- mem_size = ctx->mrav_entry_size * ctx_pg->entries;
- init = &ctx->mem_init[BNXT_CTX_MEM_INIT_MRAV];
- rc = bnxt_alloc_ctx_pg_tbls(bp, ctx_pg, mem_size, 2, init);
- if (rc)
- return rc;
- }
- ena = FUNC_BACKING_STORE_CFG_REQ_ENABLES_MRAV;
- if (ctx->mrav_num_entries_units)
- ctx_pg->entries =
- ((num_mr / ctx->mrav_num_entries_units) << 16) |
- (num_ah / ctx->mrav_num_entries_units);
+ num_mr = min_t(u32, ctxm->max_entries / 2, 1024 * 256);
+ num_ah = min_t(u32, num_mr, 1024 * 128);
+ ctxm->split_entry_cnt = BNXT_CTX_MRAV_AV_SPLIT_ENTRY + 1;
+ if (!ctxm->mrav_av_entries || ctxm->mrav_av_entries > num_ah)
+ ctxm->mrav_av_entries = num_ah;
- ctx_pg = &ctx->tim_mem;
- ctx_pg->entries = ctx->qp_mem.entries;
- if (ctx->tim_entry_size) {
- mem_size = ctx->tim_entry_size * ctx_pg->entries;
- rc = bnxt_alloc_ctx_pg_tbls(bp, ctx_pg, mem_size, 1, NULL);
- if (rc)
- return rc;
- }
+ rc = bnxt_setup_ctxm_pg_tbls(bp, ctxm, num_mr + num_ah, 2);
+ if (rc)
+ return rc;
+ ena = FUNC_BACKING_STORE_CFG_REQ_ENABLES_MRAV;
+
+ ctxm = &ctx->ctx_arr[BNXT_CTX_TIM];
+ rc = bnxt_setup_ctxm_pg_tbls(bp, ctxm, l2_qps + qp1_qps + extra_qps, 1);
+ if (rc)
+ return rc;
ena |= FUNC_BACKING_STORE_CFG_REQ_ENABLES_TIM;
skip_rdma:
- min = ctx->tqm_min_entries_per_ring;
- entries_sp = ctx->vnic_max_vnic_entries + ctx->qp_max_l2_entries +
- 2 * (extra_qps + ctx->qp_min_qp1_entries) + min;
- entries_sp = roundup(entries_sp, ctx->tqm_entries_multiple);
- entries = ctx->qp_max_l2_entries + 2 * (extra_qps + ctx->qp_min_qp1_entries);
- entries = roundup(entries, ctx->tqm_entries_multiple);
- entries = clamp_t(u32, entries, min, ctx->tqm_max_entries_per_ring);
- for (i = 0; i < ctx->tqm_fp_rings_count + 1; i++) {
- ctx_pg = ctx->tqm_mem[i];
- ctx_pg->entries = i ? entries : entries_sp;
- if (ctx->tqm_entry_size) {
- mem_size = ctx->tqm_entry_size * ctx_pg->entries;
- rc = bnxt_alloc_ctx_pg_tbls(bp, ctx_pg, mem_size, 1,
- NULL);
- if (rc)
- return rc;
- }
+ ctxm = &ctx->ctx_arr[BNXT_CTX_STQM];
+ min = ctxm->min_entries;
+ entries_sp = ctx->ctx_arr[BNXT_CTX_VNIC].vnic_entries + l2_qps +
+ 2 * (extra_qps + qp1_qps) + min;
+ rc = bnxt_setup_ctxm_pg_tbls(bp, ctxm, entries_sp, 2);
+ if (rc)
+ return rc;
+
+ ctxm = &ctx->ctx_arr[BNXT_CTX_FTQM];
+ entries = l2_qps + 2 * (extra_qps + qp1_qps);
+ rc = bnxt_setup_ctxm_pg_tbls(bp, ctxm, entries, 2);
+ if (rc)
+ return rc;
+ for (i = 0; i < ctx->tqm_fp_rings_count + 1; i++)
ena |= FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_SP << i;
- }
ena |= FUNC_BACKING_STORE_CFG_REQ_DFLT_ENABLES;
- rc = bnxt_hwrm_func_backing_store_cfg(bp, ena);
+
+ if (bp->fw_cap & BNXT_FW_CAP_BACKING_STORE_V2)
+ rc = bnxt_backing_store_cfg_v2(bp);
+ else
+ rc = bnxt_hwrm_func_backing_store_cfg(bp, ena);
if (rc) {
netdev_err(bp->dev, "Failed configuring context mem, rc = %d.\n",
rc);
@@ -7667,7 +7953,7 @@ int bnxt_hwrm_func_resc_qcaps(struct bnxt *bp, bool all)
hw_resc->min_stat_ctxs = le16_to_cpu(resp->min_stat_ctx);
hw_resc->max_stat_ctxs = le16_to_cpu(resp->max_stat_ctx);
- if (bp->flags & BNXT_FLAG_CHIP_P5) {
+ if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) {
u16 max_msix = le16_to_cpu(resp->max_msix);
hw_resc->max_nqs = max_msix;
@@ -7696,7 +7982,7 @@ static int __bnxt_hwrm_ptp_qcfg(struct bnxt *bp)
u8 flags;
int rc;
- if (bp->hwrm_spec_code < 0x10801 || !BNXT_CHIP_P5_THOR(bp)) {
+ if (bp->hwrm_spec_code < 0x10801 || !BNXT_CHIP_P5(bp)) {
rc = -ENODEV;
goto no_ptp;
}
@@ -7728,7 +8014,7 @@ static int __bnxt_hwrm_ptp_qcfg(struct bnxt *bp)
if (flags & PORT_MAC_PTP_QCFG_RESP_FLAGS_PARTIAL_DIRECT_ACCESS_REF_CLOCK) {
ptp->refclk_regs[0] = le32_to_cpu(resp->ts_ref_clock_reg_lower);
ptp->refclk_regs[1] = le32_to_cpu(resp->ts_ref_clock_reg_upper);
- } else if (bp->flags & BNXT_FLAG_CHIP_P5) {
+ } else if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) {
ptp->refclk_regs[0] = BNXT_TS_REG_TIMESYNC_TS0_LOWER;
ptp->refclk_regs[1] = BNXT_TS_REG_TIMESYNC_TS0_UPPER;
} else {
@@ -7800,6 +8086,8 @@ static int __bnxt_hwrm_func_qcaps(struct bnxt *bp)
bp->fw_cap |= BNXT_FW_CAP_HOT_RESET_IF;
if (BNXT_PF(bp) && (flags_ext & FUNC_QCAPS_RESP_FLAGS_EXT_FW_LIVEPATCH_SUPPORTED))
bp->fw_cap |= BNXT_FW_CAP_LIVEPATCH;
+ if (flags_ext & FUNC_QCAPS_RESP_FLAGS_EXT_BS_V2_SUPPORTED)
+ bp->fw_cap |= BNXT_FW_CAP_BACKING_STORE_V2;
flags_ext2 = le32_to_cpu(resp->flags_ext2);
if (flags_ext2 & FUNC_QCAPS_RESP_FLAGS_EXT2_RX_ALL_PKTS_TIMESTAMPS_SUPPORTED)
@@ -8435,7 +8723,7 @@ static void bnxt_accumulate_all_stats(struct bnxt *bp)
int i;
/* Chip bug. Counter intermittently becomes 0. */
- if (bp->flags & BNXT_FLAG_CHIP_P5)
+ if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS)
ignore_zero = true;
for (i = 0; i < bp->cp_nr_rings; i++) {
@@ -8629,7 +8917,7 @@ static void bnxt_clear_vnic(struct bnxt *bp)
return;
bnxt_hwrm_clear_vnic_filter(bp);
- if (!(bp->flags & BNXT_FLAG_CHIP_P5)) {
+ if (!(bp->flags & BNXT_FLAG_CHIP_P5_PLUS)) {
/* clear all RSS setting before free vnic ctx */
bnxt_hwrm_clear_vnic_rss(bp);
bnxt_hwrm_vnic_ctx_free(bp);
@@ -8638,7 +8926,7 @@ static void bnxt_clear_vnic(struct bnxt *bp)
if (bp->flags & BNXT_FLAG_TPA)
bnxt_set_tpa(bp, false);
bnxt_hwrm_vnic_free(bp);
- if (bp->flags & BNXT_FLAG_CHIP_P5)
+ if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS)
bnxt_hwrm_vnic_ctx_free(bp);
}
@@ -8795,7 +9083,7 @@ static int __bnxt_setup_vnic_p5(struct bnxt *bp, u16 vnic_id)
static int bnxt_setup_vnic(struct bnxt *bp, u16 vnic_id)
{
- if (bp->flags & BNXT_FLAG_CHIP_P5)
+ if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS)
return __bnxt_setup_vnic_p5(bp, vnic_id);
else
return __bnxt_setup_vnic(bp, vnic_id);
@@ -8806,7 +9094,7 @@ static int bnxt_alloc_rfs_vnics(struct bnxt *bp)
#ifdef CONFIG_RFS_ACCEL
int i, rc = 0;
- if (bp->flags & BNXT_FLAG_CHIP_P5)
+ if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS)
return 0;
for (i = 0; i < bp->rx_nr_rings; i++) {
@@ -9031,8 +9319,8 @@ static int bnxt_set_real_num_queues(struct bnxt *bp)
return rc;
}
-static int bnxt_trim_rings(struct bnxt *bp, int *rx, int *tx, int max,
- bool shared)
+static int __bnxt_trim_rings(struct bnxt *bp, int *rx, int *tx, int max,
+ bool shared)
{
int _rx = *rx, _tx = *tx;
@@ -9055,6 +9343,46 @@ static int bnxt_trim_rings(struct bnxt *bp, int *rx, int *tx, int max,
return 0;
}
+static int __bnxt_num_tx_to_cp(struct bnxt *bp, int tx, int tx_sets, int tx_xdp)
+{
+ return (tx - tx_xdp) / tx_sets + tx_xdp;
+}
+
+int bnxt_num_tx_to_cp(struct bnxt *bp, int tx)
+{
+ int tcs = netdev_get_num_tc(bp->dev);
+
+ if (!tcs)
+ tcs = 1;
+ return __bnxt_num_tx_to_cp(bp, tx, tcs, bp->tx_nr_rings_xdp);
+}
+
+static int bnxt_num_cp_to_tx(struct bnxt *bp, int tx_cp)
+{
+ int tcs = netdev_get_num_tc(bp->dev);
+
+ return (tx_cp - bp->tx_nr_rings_xdp) * tcs +
+ bp->tx_nr_rings_xdp;
+}
+
+static int bnxt_trim_rings(struct bnxt *bp, int *rx, int *tx, int max,
+ bool sh)
+{
+ int tx_cp = bnxt_num_tx_to_cp(bp, *tx);
+
+ if (tx_cp != *tx) {
+ int tx_saved = tx_cp, rc;
+
+ rc = __bnxt_trim_rings(bp, rx, &tx_cp, max, sh);
+ if (rc)
+ return rc;
+ if (tx_cp != tx_saved)
+ *tx = bnxt_num_cp_to_tx(bp, tx_cp);
+ return 0;
+ }
+ return __bnxt_trim_rings(bp, rx, tx, max, sh);
+}
+
static void bnxt_setup_msix(struct bnxt *bp)
{
const int len = sizeof(bp->irq_tbl[0].name);
@@ -9067,7 +9395,7 @@ static void bnxt_setup_msix(struct bnxt *bp)
for (i = 0; i < tcs; i++) {
count = bp->tx_nr_rings_per_tc;
- off = i * count;
+ off = BNXT_TC_TO_RING_BASE(bp, i);
netdev_set_tc_queue(dev, i, count, off);
}
}
@@ -9148,7 +9476,7 @@ static unsigned int bnxt_get_max_func_cp_rings_for_en(struct bnxt *bp)
{
unsigned int cp = bp->hw_resc.max_cp_rings;
- if (!(bp->flags & BNXT_FLAG_CHIP_P5))
+ if (!(bp->flags & BNXT_FLAG_CHIP_P5_PLUS))
cp -= bnxt_get_ulp_msix_num(bp);
return cp;
@@ -9158,7 +9486,7 @@ static unsigned int bnxt_get_max_func_irqs(struct bnxt *bp)
{
struct bnxt_hw_resc *hw_resc = &bp->hw_resc;
- if (bp->flags & BNXT_FLAG_CHIP_P5)
+ if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS)
return min_t(unsigned int, hw_resc->max_irqs, hw_resc->max_nqs);
return min_t(unsigned int, hw_resc->max_irqs, hw_resc->max_cp_rings);
@@ -9174,7 +9502,7 @@ unsigned int bnxt_get_avail_cp_rings_for_en(struct bnxt *bp)
unsigned int cp;
cp = bnxt_get_max_func_cp_rings_for_en(bp);
- if (bp->flags & BNXT_FLAG_CHIP_P5)
+ if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS)
return cp - bp->rx_nr_rings - bp->tx_nr_rings;
else
return cp - bp->cp_nr_rings;
@@ -9193,7 +9521,7 @@ int bnxt_get_avail_msix(struct bnxt *bp, int num)
int max_idx, avail_msix;
max_idx = bp->total_irqs;
- if (!(bp->flags & BNXT_FLAG_CHIP_P5))
+ if (!(bp->flags & BNXT_FLAG_CHIP_P5_PLUS))
max_idx = min_t(int, bp->total_irqs, max_cp);
avail_msix = max_idx - bp->cp_nr_rings;
if (!BNXT_NEW_RM(bp) || avail_msix >= num)
@@ -9217,7 +9545,7 @@ static int bnxt_get_num_msix(struct bnxt *bp)
static int bnxt_init_msix(struct bnxt *bp)
{
- int i, total_vecs, max, rc = 0, min = 1, ulp_msix;
+ int i, total_vecs, max, rc = 0, min = 1, ulp_msix, tx_cp;
struct msix_entry *msix_ent;
total_vecs = bnxt_get_num_msix(bp);
@@ -9259,9 +9587,10 @@ static int bnxt_init_msix(struct bnxt *bp)
if (rc)
goto msix_setup_exit;
+ tx_cp = bnxt_num_tx_to_cp(bp, bp->tx_nr_rings);
bp->cp_nr_rings = (min == 1) ?
- max_t(int, bp->tx_nr_rings, bp->rx_nr_rings) :
- bp->tx_nr_rings + bp->rx_nr_rings;
+ max_t(int, tx_cp, bp->rx_nr_rings) :
+ tx_cp + bp->rx_nr_rings;
} else {
rc = -ENOMEM;
@@ -9471,7 +9800,7 @@ static void bnxt_init_napi(struct bnxt *bp)
if (bp->flags & BNXT_FLAG_USING_MSIX) {
int (*poll_fn)(struct napi_struct *, int) = bnxt_poll;
- if (bp->flags & BNXT_FLAG_CHIP_P5)
+ if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS)
poll_fn = bnxt_poll_p5;
else if (BNXT_CHIP_TYPE_NITRO_A0(bp))
cp_nr_rings--;
@@ -9527,8 +9856,6 @@ static void bnxt_enable_napi(struct bnxt *bp)
cpr = &bnapi->cp_ring;
bnapi->in_reset = false;
- bnapi->tx_pkts = 0;
-
if (bnapi->rx_ring) {
INIT_WORK(&cpr->dim.work, bnxt_dim_work);
cpr->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
@@ -10211,8 +10538,6 @@ static int bnxt_hwrm_if_change(struct bnxt *bp, bool up)
if (!test_bit(BNXT_STATE_IN_FW_RESET, &bp->state))
bnxt_ulp_stop(bp);
bnxt_free_ctx_mem(bp);
- kfree(bp->ctx);
- bp->ctx = NULL;
bnxt_dcb_free(bp);
rc = bnxt_fw_init_one(bp);
if (rc) {
@@ -11194,7 +11519,7 @@ static bool bnxt_can_reserve_rings(struct bnxt *bp)
/* If the chip and firmware supports RFS */
static bool bnxt_rfs_supported(struct bnxt *bp)
{
- if (bp->flags & BNXT_FLAG_CHIP_P5) {
+ if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) {
if (bp->fw_cap & BNXT_FW_CAP_CFA_RFS_RING_TBL_IDX_V2)
return true;
return false;
@@ -11215,7 +11540,7 @@ static bool bnxt_rfs_capable(struct bnxt *bp)
#ifdef CONFIG_RFS_ACCEL
int vnics, max_vnics, max_rss_ctxs;
- if (bp->flags & BNXT_FLAG_CHIP_P5)
+ if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS)
return bnxt_rfs_supported(bp);
if (!(bp->flags & BNXT_FLAG_MSIX_CAP) || !bnxt_can_reserve_rings(bp) || !bp->rx_nr_rings)
return false;
@@ -11317,7 +11642,7 @@ static int bnxt_set_features(struct net_device *dev, netdev_features_t features)
update_tpa = true;
if ((bp->flags & BNXT_FLAG_TPA) == 0 ||
(flags & BNXT_FLAG_TPA) == 0 ||
- (bp->flags & BNXT_FLAG_CHIP_P5))
+ (bp->flags & BNXT_FLAG_CHIP_P5_PLUS))
re_init = true;
}
@@ -11559,15 +11884,13 @@ static int bnxt_dbg_hwrm_ring_info_get(struct bnxt *bp, u8 ring_type,
static void bnxt_dump_tx_sw_state(struct bnxt_napi *bnapi)
{
- struct bnxt_tx_ring_info *txr = bnapi->tx_ring;
- int i = bnapi->index;
+ struct bnxt_tx_ring_info *txr;
+ int i = bnapi->index, j;
- if (!txr)
- return;
-
- netdev_info(bnapi->bp->dev, "[%d]: tx{fw_ring: %d prod: %x cons: %x}\n",
- i, txr->tx_ring_struct.fw_ring_id, txr->tx_prod,
- txr->tx_cons);
+ bnxt_for_each_napi_tx(j, bnapi, txr)
+ netdev_info(bnapi->bp->dev, "[%d.%d]: tx{fw_ring: %d prod: %x cons: %x}\n",
+ i, j, txr->tx_ring_struct.fw_ring_id, txr->tx_prod,
+ txr->tx_cons);
}
static void bnxt_dump_rx_sw_state(struct bnxt_napi *bnapi)
@@ -11730,8 +12053,7 @@ static void bnxt_timer(struct timer_list *t)
if (test_bit(BNXT_STATE_L2_FILTER_RETRY, &bp->state))
bnxt_queue_sp_work(bp, BNXT_RX_MASK_SP_EVENT);
- if ((bp->flags & BNXT_FLAG_CHIP_P5) && !bp->chip_rev &&
- netif_carrier_ok(dev))
+ if ((BNXT_CHIP_P5(bp)) && !bp->chip_rev && netif_carrier_ok(dev))
bnxt_queue_sp_work(bp, BNXT_RING_COAL_NOW_SP_EVENT);
bnxt_restart_timer:
@@ -11840,8 +12162,6 @@ static void bnxt_fw_reset_close(struct bnxt *bp)
if (pci_is_enabled(bp->pdev))
pci_disable_device(bp->pdev);
bnxt_free_ctx_mem(bp);
- kfree(bp->ctx);
- bp->ctx = NULL;
}
static bool is_bnxt_fw_ok(struct bnxt *bp)
@@ -11984,7 +12304,7 @@ static void bnxt_chk_missed_irq(struct bnxt *bp)
{
int i;
- if (!(bp->flags & BNXT_FLAG_CHIP_P5))
+ if (!(bp->flags & BNXT_FLAG_CHIP_P5_PLUS))
return;
for (i = 0; i < bp->cp_nr_rings; i++) {
@@ -11997,12 +12317,11 @@ static void bnxt_chk_missed_irq(struct bnxt *bp)
continue;
cpr = &bnapi->cp_ring;
- for (j = 0; j < 2; j++) {
- struct bnxt_cp_ring_info *cpr2 = cpr->cp_ring_arr[j];
+ for (j = 0; j < cpr->cp_ring_count; j++) {
+ struct bnxt_cp_ring_info *cpr2 = &cpr->cp_ring_arr[j];
u32 val[2];
- if (!cpr2 || cpr2->has_more_work ||
- !bnxt_has_work(bp, cpr2))
+ if (cpr2->has_more_work || !bnxt_has_work(bp, cpr2))
continue;
if (cpr2->cp_raw_cons != cpr2->last_cp_raw_cons) {
@@ -12161,23 +12480,27 @@ static void bnxt_sp_task(struct work_struct *work)
clear_bit(BNXT_STATE_IN_SP_TASK, &bp->state);
}
+static void _bnxt_get_max_rings(struct bnxt *bp, int *max_rx, int *max_tx,
+ int *max_cp);
+
/* Under rtnl_lock */
int bnxt_check_rings(struct bnxt *bp, int tx, int rx, bool sh, int tcs,
int tx_xdp)
{
- int max_rx, max_tx, tx_sets = 1;
+ int max_rx, max_tx, max_cp, tx_sets = 1, tx_cp;
int tx_rings_needed, stats;
int rx_rings = rx;
- int cp, vnics, rc;
+ int cp, vnics;
if (tcs)
tx_sets = tcs;
- rc = bnxt_get_max_rings(bp, &max_rx, &max_tx, sh);
- if (rc)
- return rc;
+ if (bp->flags & BNXT_FLAG_AGG_RINGS)
+ rx_rings <<= 1;
- if (max_rx < rx)
+ _bnxt_get_max_rings(bp, &max_rx, &max_tx, &max_cp);
+
+ if (max_rx < rx_rings)
return -ENOMEM;
tx_rings_needed = tx * tx_sets + tx_xdp;
@@ -12185,12 +12508,14 @@ int bnxt_check_rings(struct bnxt *bp, int tx, int rx, bool sh, int tcs,
return -ENOMEM;
vnics = 1;
- if ((bp->flags & (BNXT_FLAG_RFS | BNXT_FLAG_CHIP_P5)) == BNXT_FLAG_RFS)
- vnics += rx_rings;
+ if ((bp->flags & (BNXT_FLAG_RFS | BNXT_FLAG_CHIP_P5_PLUS)) ==
+ BNXT_FLAG_RFS)
+ vnics += rx;
- if (bp->flags & BNXT_FLAG_AGG_RINGS)
- rx_rings <<= 1;
- cp = sh ? max_t(int, tx_rings_needed, rx) : tx_rings_needed + rx;
+ tx_cp = __bnxt_num_tx_to_cp(bp, tx_rings_needed, tx_sets, tx_xdp);
+ cp = sh ? max_t(int, tx_cp, rx) : tx_cp + rx;
+ if (max_cp < cp)
+ return -ENOMEM;
stats = cp;
if (BNXT_NEW_RM(bp)) {
cp += bnxt_get_ulp_msix_num(bp);
@@ -12265,10 +12590,10 @@ static bool bnxt_fw_pre_resv_vnics(struct bnxt *bp)
{
u16 fw_maj = BNXT_FW_MAJ(bp), fw_bld = BNXT_FW_BLD(bp);
- if (!(bp->flags & BNXT_FLAG_CHIP_P5) &&
+ if (!(bp->flags & BNXT_FLAG_CHIP_P5_PLUS) &&
(fw_maj > 218 || (fw_maj == 218 && fw_bld >= 18)))
return true;
- if ((bp->flags & BNXT_FLAG_CHIP_P5) &&
+ if ((bp->flags & BNXT_FLAG_CHIP_P5_PLUS) &&
(fw_maj > 216 || (fw_maj == 216 && fw_bld >= 172)))
return true;
return false;
@@ -12824,7 +13149,7 @@ int bnxt_setup_mq_tc(struct net_device *dev, u8 tc)
{
struct bnxt *bp = netdev_priv(dev);
bool sh = false;
- int rc;
+ int rc, tx_cp;
if (tc > bp->max_tc) {
netdev_err(dev, "Too many traffic classes requested: %d. Max supported is %d.\n",
@@ -12855,8 +13180,9 @@ int bnxt_setup_mq_tc(struct net_device *dev, u8 tc)
netdev_reset_tc(dev);
}
bp->tx_nr_rings += bp->tx_nr_rings_xdp;
- bp->cp_nr_rings = sh ? max_t(int, bp->tx_nr_rings, bp->rx_nr_rings) :
- bp->tx_nr_rings + bp->rx_nr_rings;
+ tx_cp = bnxt_num_tx_to_cp(bp, bp->tx_nr_rings);
+ bp->cp_nr_rings = sh ? max_t(int, tx_cp, bp->rx_nr_rings) :
+ tx_cp + bp->rx_nr_rings;
if (netif_running(bp->dev))
return bnxt_open_nic(bp, true, false);
@@ -13255,8 +13581,6 @@ static void bnxt_remove_one(struct pci_dev *pdev)
bp->fw_health = NULL;
bnxt_cleanup_pci(bp);
bnxt_free_ctx_mem(bp);
- kfree(bp->ctx);
- bp->ctx = NULL;
kfree(bp->rss_indir_tbl);
bp->rss_indir_tbl = NULL;
bnxt_free_port_stats(bp);
@@ -13325,7 +13649,7 @@ static void _bnxt_get_max_rings(struct bnxt *bp, int *max_rx, int *max_tx,
max_irq = min_t(int, bnxt_get_max_func_irqs(bp) -
bnxt_get_ulp_msix_num(bp),
hw_resc->max_stat_ctxs - bnxt_get_ulp_stat_ctxs(bp));
- if (!(bp->flags & BNXT_FLAG_CHIP_P5))
+ if (!(bp->flags & BNXT_FLAG_CHIP_P5_PLUS))
*max_cp = min_t(int, *max_cp, max_irq);
max_ring_grps = hw_resc->max_hw_ring_grps;
if (BNXT_CHIP_TYPE_NITRO_A0(bp) && BNXT_PF(bp)) {
@@ -13334,8 +13658,11 @@ static void _bnxt_get_max_rings(struct bnxt *bp, int *max_rx, int *max_tx,
}
if (bp->flags & BNXT_FLAG_AGG_RINGS)
*max_rx >>= 1;
- if (bp->flags & BNXT_FLAG_CHIP_P5) {
- bnxt_trim_rings(bp, max_rx, max_tx, *max_cp, false);
+ if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) {
+ if (*max_cp < (*max_rx + *max_tx)) {
+ *max_rx = *max_cp / 2;
+ *max_tx = *max_rx;
+ }
/* On P5 chips, max_cp output param should be available NQs */
*max_cp = max_irq;
}
@@ -13636,7 +13963,8 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
}
max_irqs = bnxt_get_max_irq(pdev);
- dev = alloc_etherdev_mq(sizeof(*bp), max_irqs);
+ dev = alloc_etherdev_mqs(sizeof(*bp), max_irqs * BNXT_MAX_QUEUE,
+ max_irqs);
if (!dev)
return -ENOMEM;
@@ -13678,8 +14006,8 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
if (BNXT_PF(bp))
bnxt_vpd_read_info(bp);
- if (BNXT_CHIP_P5(bp)) {
- bp->flags |= BNXT_FLAG_CHIP_P5;
+ if (BNXT_CHIP_P5_PLUS(bp)) {
+ bp->flags |= BNXT_FLAG_CHIP_P5_PLUS;
if (BNXT_CHIP_SR2(bp))
bp->flags |= BNXT_FLAG_CHIP_SR2;
}
@@ -13744,7 +14072,7 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
bp->gro_func = bnxt_gro_func_5730x;
if (BNXT_CHIP_P4(bp))
bp->gro_func = bnxt_gro_func_5731x;
- else if (BNXT_CHIP_P5(bp))
+ else if (BNXT_CHIP_P5_PLUS(bp))
bp->gro_func = bnxt_gro_func_5750x;
}
if (!BNXT_CHIP_P4_PLUS(bp))
@@ -13852,8 +14180,6 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
bp->fw_health = NULL;
bnxt_cleanup_pci(bp);
bnxt_free_ctx_mem(bp);
- kfree(bp->ctx);
- bp->ctx = NULL;
kfree(bp->rss_indir_tbl);
bp->rss_indir_tbl = NULL;
@@ -13906,8 +14232,6 @@ static int bnxt_suspend(struct device *device)
bnxt_hwrm_func_drv_unrgtr(bp);
pci_disable_device(bp->pdev);
bnxt_free_ctx_mem(bp);
- kfree(bp->ctx);
- bp->ctx = NULL;
rtnl_unlock();
return rc;
}
@@ -14004,8 +14328,6 @@ static pci_ers_result_t bnxt_io_error_detected(struct pci_dev *pdev,
if (pci_is_enabled(pdev))
pci_disable_device(pdev);
bnxt_free_ctx_mem(bp);
- kfree(bp->ctx);
- bp->ctx = NULL;
rtnl_unlock();
/* Request a slot slot reset. */
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index e702dbc..94b3627 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -61,6 +61,24 @@ struct tx_bd {
__le64 tx_bd_haddr;
} __packed;
+#define TX_OPAQUE_IDX_MASK 0x0000ffff
+#define TX_OPAQUE_BDS_MASK 0x00ff0000
+#define TX_OPAQUE_BDS_SHIFT 16
+#define TX_OPAQUE_RING_MASK 0xff000000
+#define TX_OPAQUE_RING_SHIFT 24
+
+#define SET_TX_OPAQUE(bp, txr, idx, bds) \
+ (((txr)->tx_napi_idx << TX_OPAQUE_RING_SHIFT) | \
+ ((bds) << TX_OPAQUE_BDS_SHIFT) | ((idx) & (bp)->tx_ring_mask))
+
+#define TX_OPAQUE_IDX(opq) ((opq) & TX_OPAQUE_IDX_MASK)
+#define TX_OPAQUE_RING(opq) (((opq) & TX_OPAQUE_RING_MASK) >> \
+ TX_OPAQUE_RING_SHIFT)
+#define TX_OPAQUE_BDS(opq) (((opq) & TX_OPAQUE_BDS_MASK) >> \
+ TX_OPAQUE_BDS_SHIFT)
+#define TX_OPAQUE_PROD(bp, opq) ((TX_OPAQUE_IDX(opq) + TX_OPAQUE_BDS(opq)) &\
+ (bp)->tx_ring_mask)
+
struct tx_bd_ext {
__le32 tx_bd_hsize_lflags;
#define TX_BD_FLAGS_TCP_UDP_CHKSUM (1 << 0)
@@ -530,6 +548,19 @@ struct nqe_cn {
__le32 cq_handle_high;
};
+#define BNXT_NQ_HDL_IDX_MASK 0x00ffffff
+#define BNXT_NQ_HDL_TYPE_MASK 0xff000000
+#define BNXT_NQ_HDL_TYPE_SHIFT 24
+#define BNXT_NQ_HDL_TYPE_RX 0x00
+#define BNXT_NQ_HDL_TYPE_TX 0x01
+
+#define BNXT_NQ_HDL_IDX(hdl) ((hdl) & BNXT_NQ_HDL_IDX_MASK)
+#define BNXT_NQ_HDL_TYPE(hdl) (((hdl) & BNXT_NQ_HDL_TYPE_MASK) >> \
+ BNXT_NQ_HDL_TYPE_SHIFT)
+
+#define BNXT_SET_NQ_HDL(cpr) \
+ (((cpr)->cp_ring_type << BNXT_NQ_HDL_TYPE_SHIFT) | (cpr)->cp_idx)
+
#define DB_IDX_MASK 0xffffff
#define DB_IDX_VALID (0x1 << 26)
#define DB_IRQ_DIS (0x1 << 27)
@@ -655,10 +686,12 @@ struct nqe_cn {
*/
#define BNXT_MIN_TX_DESC_CNT (MAX_SKB_FRAGS + 2)
-#define RX_RING(x) (((x) & ~(RX_DESC_CNT - 1)) >> (BNXT_PAGE_SHIFT - 4))
+#define RX_RING(bp, x) (((x) & (bp)->rx_ring_mask) >> (BNXT_PAGE_SHIFT - 4))
+#define RX_AGG_RING(bp, x) (((x) & (bp)->rx_agg_ring_mask) >> \
+ (BNXT_PAGE_SHIFT - 4))
#define RX_IDX(x) ((x) & (RX_DESC_CNT - 1))
-#define TX_RING(x) (((x) & ~(TX_DESC_CNT - 1)) >> (BNXT_PAGE_SHIFT - 4))
+#define TX_RING(bp, x) (((x) & (bp)->tx_ring_mask) >> (BNXT_PAGE_SHIFT - 4))
#define TX_IDX(x) ((x) & (TX_DESC_CNT - 1))
#define CP_RING(x) (((x) & ~(CP_DESC_CNT - 1)) >> (BNXT_PAGE_SHIFT - 4))
@@ -685,11 +718,14 @@ struct nqe_cn {
#define RX_CMP_TYPE(rxcmp) \
(le32_to_cpu((rxcmp)->rx_cmp_len_flags_type) & RX_CMP_CMP_TYPE)
-#define NEXT_RX(idx) (((idx) + 1) & bp->rx_ring_mask)
+#define RING_RX(bp, idx) ((idx) & (bp)->rx_ring_mask)
+#define NEXT_RX(idx) ((idx) + 1)
-#define NEXT_RX_AGG(idx) (((idx) + 1) & bp->rx_agg_ring_mask)
+#define RING_RX_AGG(bp, idx) ((idx) & (bp)->rx_agg_ring_mask)
+#define NEXT_RX_AGG(idx) ((idx) + 1)
-#define NEXT_TX(idx) (((idx) + 1) & bp->tx_ring_mask)
+#define RING_TX(bp, idx) ((idx) & (bp)->tx_ring_mask)
+#define NEXT_TX(idx) ((idx) + 1)
#define ADV_RAW_CMP(idx, n) ((idx) + (n))
#define NEXT_RAW_CMP(idx) ADV_RAW_CMP(idx, 1)
@@ -702,6 +738,7 @@ struct nqe_cn {
#define BNXT_AGG_EVENT 2
#define BNXT_TX_EVENT 4
#define BNXT_REDIRECT_EVENT 8
+#define BNXT_TX_CMP_EVENT 0x10
struct bnxt_sw_tx_bd {
union {
@@ -730,13 +767,6 @@ struct bnxt_sw_rx_agg_bd {
dma_addr_t mapping;
};
-struct bnxt_mem_init {
- u8 init_val;
- u16 offset;
-#define BNXT_MEM_INVALID_OFFSET 0xffff
- u16 size;
-};
-
struct bnxt_ring_mem_info {
int nr_pages;
int page_size;
@@ -746,7 +776,7 @@ struct bnxt_ring_mem_info {
#define BNXT_RMEM_USE_FULL_PAGE_FLAG 4
u16 depth;
- struct bnxt_mem_init *mem_init;
+ struct bnxt_ctx_mem_type *ctx_mem;
void **pg_arr;
dma_addr_t *dma_arr;
@@ -788,13 +818,19 @@ struct bnxt_db_info {
u64 db_key64;
u32 db_key32;
};
+ u32 db_ring_mask;
};
+#define DB_RING_IDX(db, idx) ((idx) & (db)->db_ring_mask)
+
struct bnxt_tx_ring_info {
struct bnxt_napi *bnapi;
+ struct bnxt_cp_ring_info *tx_cpr;
u16 tx_prod;
u16 tx_cons;
+ u16 tx_hw_cons;
u16 txq_index;
+ u8 tx_napi_idx;
u8 kick_pending;
struct bnxt_db_info tx_db;
@@ -901,6 +937,7 @@ struct bnxt_tpa_idx_map {
struct bnxt_rx_ring_info {
struct bnxt_napi *bnapi;
+ struct bnxt_cp_ring_info *rx_cpr;
u16 rx_prod;
u16 rx_agg_prod;
u16 rx_sw_agg_prod;
@@ -980,6 +1017,10 @@ struct bnxt_cp_ring_info {
u8 had_work_done:1;
u8 has_more_work:1;
+ u8 had_nqe_notify:1;
+
+ u8 cp_ring_type;
+ u8 cp_idx;
u32 last_cp_raw_cons;
@@ -1004,11 +1045,18 @@ struct bnxt_cp_ring_info {
struct bnxt_ring_struct cp_ring_struct;
- struct bnxt_cp_ring_info *cp_ring_arr[2];
-#define BNXT_RX_HDL 0
-#define BNXT_TX_HDL 1
+ int cp_ring_count;
+ struct bnxt_cp_ring_info *cp_ring_arr;
};
+#define BNXT_MAX_QUEUE 8
+#define BNXT_MAX_TXR_PER_NAPI BNXT_MAX_QUEUE
+
+#define bnxt_for_each_napi_tx(iter, bnapi, txr) \
+ for (iter = 0, txr = (bnapi)->tx_ring[0]; txr; \
+ txr = (iter < BNXT_MAX_TXR_PER_NAPI - 1) ? \
+ (bnapi)->tx_ring[++iter] : NULL)
+
struct bnxt_napi {
struct napi_struct napi;
struct bnxt *bp;
@@ -1016,11 +1064,10 @@ struct bnxt_napi {
int index;
struct bnxt_cp_ring_info cp_ring;
struct bnxt_rx_ring_info *rx_ring;
- struct bnxt_tx_ring_info *tx_ring;
+ struct bnxt_tx_ring_info *tx_ring[BNXT_MAX_TXR_PER_NAPI];
void (*tx_int)(struct bnxt *, struct bnxt_napi *,
int budget);
- int tx_pkts;
u8 events;
u8 tx_fault:1;
@@ -1355,8 +1402,6 @@ struct bnxt_link_info {
(PORT_PHY_CFG_REQ_FLAGS_FEC_CLAUSE74_DISABLE | \
BNXT_FEC_RS_OFF(link_info))
-#define BNXT_MAX_QUEUE 8
-
struct bnxt_queue_info {
u8 queue_id;
u8 queue_profile;
@@ -1385,7 +1430,7 @@ struct bnxt_test_info {
};
#define CHIMP_REG_VIEW_ADDR \
- ((bp->flags & BNXT_FLAG_CHIP_P5) ? 0x80000000 : 0xb1000000)
+ ((bp->flags & BNXT_FLAG_CHIP_P5_PLUS) ? 0x80000000 : 0xb1000000)
#define BNXT_GRCPF_REG_CHIMP_COMM 0x0
#define BNXT_GRCPF_REG_CHIMP_COMM_TRIGGER 0x100
@@ -1509,53 +1554,72 @@ do { \
attr = FUNC_BACKING_STORE_CFG_REQ_QPC_PG_SIZE_PG_4K; \
} while (0)
+struct bnxt_ctx_mem_type {
+ u16 type;
+ u16 entry_size;
+ u32 flags;
+#define BNXT_CTX_MEM_TYPE_VALID FUNC_BACKING_STORE_QCAPS_V2_RESP_FLAGS_TYPE_VALID
+ u32 instance_bmap;
+ u8 init_value;
+ u8 entry_multiple;
+ u16 init_offset;
+#define BNXT_CTX_INIT_INVALID_OFFSET 0xffff
+ u32 max_entries;
+ u32 min_entries;
+ u8 last:1;
+ u8 split_entry_cnt;
+#define BNXT_MAX_SPLIT_ENTRY 4
+ union {
+ struct {
+ u32 qp_l2_entries;
+ u32 qp_qp1_entries;
+ u32 qp_fast_qpmd_entries;
+ };
+ u32 srq_l2_entries;
+ u32 cq_l2_entries;
+ u32 vnic_entries;
+ struct {
+ u32 mrav_av_entries;
+ u32 mrav_num_entries_units;
+ };
+ u32 split[BNXT_MAX_SPLIT_ENTRY];
+ };
+ struct bnxt_ctx_pg_info *pg_info;
+};
+
+#define BNXT_CTX_MRAV_AV_SPLIT_ENTRY 0
+
+#define BNXT_CTX_QP FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_QP
+#define BNXT_CTX_SRQ FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_SRQ
+#define BNXT_CTX_CQ FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_CQ
+#define BNXT_CTX_VNIC FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_VNIC
+#define BNXT_CTX_STAT FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_STAT
+#define BNXT_CTX_STQM FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_SP_TQM_RING
+#define BNXT_CTX_FTQM FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_FP_TQM_RING
+#define BNXT_CTX_MRAV FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_MRAV
+#define BNXT_CTX_TIM FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_TIM
+#define BNXT_CTX_TKC FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_TKC
+#define BNXT_CTX_RKC FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_RKC
+#define BNXT_CTX_MTQM FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_MP_TQM_RING
+#define BNXT_CTX_SQDBS FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_SQ_DB_SHADOW
+#define BNXT_CTX_RQDBS FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_RQ_DB_SHADOW
+#define BNXT_CTX_SRQDBS FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_SRQ_DB_SHADOW
+#define BNXT_CTX_CQDBS FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_CQ_DB_SHADOW
+#define BNXT_CTX_QTKC FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_QUIC_TKC
+#define BNXT_CTX_QRKC FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_QUIC_RKC
+#define BNXT_CTX_TBLSC FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_TBL_SCOPE
+#define BNXT_CTX_XPAR FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_XID_PARTITION
+
+#define BNXT_CTX_MAX (BNXT_CTX_TIM + 1)
+#define BNXT_CTX_V2_MAX (BNXT_CTX_XPAR + 1)
+#define BNXT_CTX_INV ((u16)-1)
+
struct bnxt_ctx_mem_info {
- u32 qp_max_entries;
- u16 qp_min_qp1_entries;
- u16 qp_max_l2_entries;
- u16 qp_entry_size;
- u16 srq_max_l2_entries;
- u32 srq_max_entries;
- u16 srq_entry_size;
- u16 cq_max_l2_entries;
- u32 cq_max_entries;
- u16 cq_entry_size;
- u16 vnic_max_vnic_entries;
- u16 vnic_max_ring_table_entries;
- u16 vnic_entry_size;
- u32 stat_max_entries;
- u16 stat_entry_size;
- u16 tqm_entry_size;
- u32 tqm_min_entries_per_ring;
- u32 tqm_max_entries_per_ring;
- u32 mrav_max_entries;
- u16 mrav_entry_size;
- u16 tim_entry_size;
- u32 tim_max_entries;
- u16 mrav_num_entries_units;
- u8 tqm_entries_multiple;
u8 tqm_fp_rings_count;
u32 flags;
#define BNXT_CTX_FLAG_INITED 0x01
-
- struct bnxt_ctx_pg_info qp_mem;
- struct bnxt_ctx_pg_info srq_mem;
- struct bnxt_ctx_pg_info cq_mem;
- struct bnxt_ctx_pg_info vnic_mem;
- struct bnxt_ctx_pg_info stat_mem;
- struct bnxt_ctx_pg_info mrav_mem;
- struct bnxt_ctx_pg_info tim_mem;
- struct bnxt_ctx_pg_info *tqm_mem[BNXT_MAX_TQM_RINGS];
-
-#define BNXT_CTX_MEM_INIT_QP 0
-#define BNXT_CTX_MEM_INIT_SRQ 1
-#define BNXT_CTX_MEM_INIT_CQ 2
-#define BNXT_CTX_MEM_INIT_VNIC 3
-#define BNXT_CTX_MEM_INIT_STAT 4
-#define BNXT_CTX_MEM_INIT_MRAV 5
-#define BNXT_CTX_MEM_INIT_MAX 6
- struct bnxt_mem_init mem_init[BNXT_CTX_MEM_INIT_MAX];
+ struct bnxt_ctx_mem_type ctx_arr[BNXT_CTX_V2_MAX];
};
enum bnxt_health_severity {
@@ -1795,7 +1859,7 @@ struct bnxt {
atomic_t intr_sem;
u32 flags;
- #define BNXT_FLAG_CHIP_P5 0x1
+ #define BNXT_FLAG_CHIP_P5_PLUS 0x1
#define BNXT_FLAG_VF 0x2
#define BNXT_FLAG_LRO 0x4
#ifdef CONFIG_INET
@@ -1849,21 +1913,21 @@ struct bnxt {
#define BNXT_CHIP_TYPE_NITRO_A0(bp) ((bp)->flags & BNXT_FLAG_CHIP_NITRO_A0)
#define BNXT_RX_PAGE_MODE(bp) ((bp)->flags & BNXT_FLAG_RX_PAGE_MODE)
#define BNXT_SUPPORTS_TPA(bp) (!BNXT_CHIP_TYPE_NITRO_A0(bp) && \
- (!((bp)->flags & BNXT_FLAG_CHIP_P5) || \
+ (!((bp)->flags & BNXT_FLAG_CHIP_P5_PLUS) ||\
(bp)->max_tpa_v2) && !is_kdump_kernel())
#define BNXT_RX_JUMBO_MODE(bp) ((bp)->flags & BNXT_FLAG_JUMBO)
#define BNXT_CHIP_SR2(bp) \
((bp)->chip_num == CHIP_NUM_58818)
-#define BNXT_CHIP_P5_THOR(bp) \
+#define BNXT_CHIP_P5(bp) \
((bp)->chip_num == CHIP_NUM_57508 || \
(bp)->chip_num == CHIP_NUM_57504 || \
(bp)->chip_num == CHIP_NUM_57502)
/* Chip class phase 5 */
-#define BNXT_CHIP_P5(bp) \
- (BNXT_CHIP_P5_THOR(bp) || BNXT_CHIP_SR2(bp))
+#define BNXT_CHIP_P5_PLUS(bp) \
+ (BNXT_CHIP_P5(bp) || BNXT_CHIP_SR2(bp))
/* Chip class phase 4.x */
#define BNXT_CHIP_P4(bp) \
@@ -1874,7 +1938,7 @@ struct bnxt {
!BNXT_CHIP_TYPE_NITRO_A0(bp)))
#define BNXT_CHIP_P4_PLUS(bp) \
- (BNXT_CHIP_P4(bp) || BNXT_CHIP_P5(bp))
+ (BNXT_CHIP_P4(bp) || BNXT_CHIP_P5_PLUS(bp))
struct bnxt_aux_priv *aux_priv;
struct bnxt_en_dev *edev;
@@ -2017,6 +2081,7 @@ struct bnxt {
#define BNXT_FW_CAP_THRESHOLD_TEMP_SUPPORTED BIT_ULL(33)
#define BNXT_FW_CAP_DFLT_VLAN_TPID_PCP BIT_ULL(34)
#define BNXT_FW_CAP_PRE_RESV_VNICS BIT_ULL(35)
+ #define BNXT_FW_CAP_BACKING_STORE_V2 BIT_ULL(36)
u32 fw_dbg_cap;
@@ -2297,10 +2362,11 @@ static inline void bnxt_writeq_relaxed(struct bnxt *bp, u64 val,
static inline void bnxt_db_write_relaxed(struct bnxt *bp,
struct bnxt_db_info *db, u32 idx)
{
- if (bp->flags & BNXT_FLAG_CHIP_P5) {
- bnxt_writeq_relaxed(bp, db->db_key64 | idx, db->doorbell);
+ if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) {
+ bnxt_writeq_relaxed(bp, db->db_key64 | DB_RING_IDX(db, idx),
+ db->doorbell);
} else {
- u32 db_val = db->db_key32 | idx;
+ u32 db_val = db->db_key32 | DB_RING_IDX(db, idx);
writel_relaxed(db_val, db->doorbell);
if (bp->flags & BNXT_FLAG_DOUBLE_DB)
@@ -2312,10 +2378,11 @@ static inline void bnxt_db_write_relaxed(struct bnxt *bp,
static inline void bnxt_db_write(struct bnxt *bp, struct bnxt_db_info *db,
u32 idx)
{
- if (bp->flags & BNXT_FLAG_CHIP_P5) {
- bnxt_writeq(bp, db->db_key64 | idx, db->doorbell);
+ if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) {
+ bnxt_writeq(bp, db->db_key64 | DB_RING_IDX(db, idx),
+ db->doorbell);
} else {
- u32 db_val = db->db_key32 | idx;
+ u32 db_val = db->db_key32 | DB_RING_IDX(db, idx);
writel(db_val, db->doorbell);
if (bp->flags & BNXT_FLAG_DOUBLE_DB)
@@ -2351,6 +2418,7 @@ int __bnxt_hwrm_get_tx_rings(struct bnxt *bp, u16 fid, int *tx_rings);
int bnxt_nq_rings_in_use(struct bnxt *bp);
int bnxt_hwrm_set_coal(struct bnxt *);
void bnxt_free_ctx_mem(struct bnxt *bp);
+int bnxt_num_tx_to_cp(struct bnxt *bp, int tx);
unsigned int bnxt_get_max_func_stat_ctxs(struct bnxt *bp);
unsigned int bnxt_get_avail_stat_ctxs_for_en(struct bnxt *bp);
unsigned int bnxt_get_max_func_cp_rings(struct bnxt *bp);
@@ -2360,7 +2428,7 @@ int bnxt_reserve_rings(struct bnxt *bp, bool irq_re_init);
void bnxt_tx_disable(struct bnxt *bp);
void bnxt_tx_enable(struct bnxt *bp);
void bnxt_sched_reset_txr(struct bnxt *bp, struct bnxt_tx_ring_info *txr,
- int idx);
+ u16 curr);
void bnxt_report_link(struct bnxt *bp);
int bnxt_update_link(struct bnxt *bp, bool chng_link_state);
int bnxt_hwrm_set_pause(struct bnxt *);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
index f302dac..ae1bdda 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
@@ -469,8 +469,6 @@ static int bnxt_dl_reload_down(struct devlink *dl, bool netns_change,
}
bnxt_cancel_reservations(bp, false);
bnxt_free_ctx_mem(bp);
- kfree(bp->ctx);
- bp->ctx = NULL;
break;
}
case DEVLINK_RELOAD_ACTION_FW_ACTIVATE: {
@@ -741,7 +739,7 @@ static int bnxt_hwrm_get_nvm_cfg_ver(struct bnxt *bp, u32 *nvm_cfg_ver)
}
/* earlier devices present as an array of raw bytes */
- if (!BNXT_CHIP_P5(bp)) {
+ if (!BNXT_CHIP_P5_PLUS(bp)) {
dim = 0;
i = 0;
bits *= 3; /* array of 3 version components */
@@ -761,7 +759,7 @@ static int bnxt_hwrm_get_nvm_cfg_ver(struct bnxt *bp, u32 *nvm_cfg_ver)
goto exit;
bnxt_copy_from_nvm_data(&ver, data, bits, bytes);
- if (BNXT_CHIP_P5(bp)) {
+ if (BNXT_CHIP_P5_PLUS(bp)) {
*nvm_cfg_ver <<= 8;
*nvm_cfg_ver |= ver.vu8;
} else {
@@ -781,7 +779,7 @@ static int bnxt_dl_info_put(struct bnxt *bp, struct devlink_info_req *req,
if (!strlen(buf))
return 0;
- if ((bp->flags & BNXT_FLAG_CHIP_P5) &&
+ if ((bp->flags & BNXT_FLAG_CHIP_P5_PLUS) &&
(!strcmp(key, DEVLINK_INFO_VERSION_GENERIC_FW_NCSI) ||
!strcmp(key, DEVLINK_INFO_VERSION_GENERIC_FW_ROCE)))
return 0;
@@ -1007,7 +1005,7 @@ static int bnxt_dl_info_get(struct devlink *dl, struct devlink_info_req *req,
if (rc)
return rc;
- if (BNXT_CHIP_P5(bp)) {
+ if (BNXT_CHIP_P5_PLUS(bp)) {
rc = bnxt_dl_livepatch_info_put(bp, req, BNXT_FW_SRT_PATCH);
if (rc)
return rc;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
index f3f3847..b0cea5b 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
@@ -511,7 +511,7 @@ static int bnxt_get_num_tpa_ring_stats(struct bnxt *bp)
{
if (BNXT_SUPPORTS_TPA(bp)) {
if (bp->max_tpa_v2) {
- if (BNXT_CHIP_P5_THOR(bp))
+ if (BNXT_CHIP_P5(bp))
return BNXT_NUM_TPA_RING_STATS_P5;
return BNXT_NUM_TPA_RING_STATS_P5_SR2;
}
@@ -528,7 +528,8 @@ static int bnxt_get_num_ring_stats(struct bnxt *bp)
bnxt_get_num_tpa_ring_stats(bp);
tx = NUM_RING_TX_HW_STATS;
cmn = NUM_RING_CMN_SW_STATS;
- return rx * bp->rx_nr_rings + tx * bp->tx_nr_rings +
+ return rx * bp->rx_nr_rings +
+ tx * (bp->tx_nr_rings_xdp + bp->tx_nr_rings_per_tc) +
cmn * bp->cp_nr_rings;
}
@@ -923,6 +924,7 @@ static int bnxt_set_channels(struct net_device *dev,
bool sh = false;
int tx_xdp = 0;
int rc = 0;
+ int tx_cp;
if (channel->other_count)
return -EINVAL;
@@ -994,8 +996,9 @@ static int bnxt_set_channels(struct net_device *dev,
if (tcs > 1)
bp->tx_nr_rings = bp->tx_nr_rings_per_tc * tcs + tx_xdp;
- bp->cp_nr_rings = sh ? max_t(int, bp->tx_nr_rings, bp->rx_nr_rings) :
- bp->tx_nr_rings + bp->rx_nr_rings;
+ tx_cp = bnxt_num_tx_to_cp(bp, bp->tx_nr_rings);
+ bp->cp_nr_rings = sh ? max_t(int, tx_cp, bp->rx_nr_rings) :
+ tx_cp + bp->rx_nr_rings;
/* After changing number of rx channels, update NTUPLE feature. */
netdev_update_features(dev);
@@ -1319,7 +1322,7 @@ u32 bnxt_get_rxfh_indir_size(struct net_device *dev)
{
struct bnxt *bp = netdev_priv(dev);
- if (bp->flags & BNXT_FLAG_CHIP_P5)
+ if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS)
return ALIGN(bp->rx_nr_rings, BNXT_RSS_TABLE_ENTRIES_P5);
return HW_HASH_INDEX_SIZE;
}
@@ -3940,8 +3943,8 @@ static int bnxt_run_loopback(struct bnxt *bp)
int rc;
cpr = &rxr->bnapi->cp_ring;
- if (bp->flags & BNXT_FLAG_CHIP_P5)
- cpr = cpr->cp_ring_arr[BNXT_RX_HDL];
+ if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS)
+ cpr = rxr->rx_cpr;
pkt_size = min(bp->dev->mtu + ETH_HLEN, bp->rx_copy_thresh);
skb = netdev_alloc_skb(bp->dev, pkt_size);
if (!skb)
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c
index f388671..a1ec39b 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c
@@ -650,7 +650,7 @@ static int bnxt_map_ptp_regs(struct bnxt *bp)
int rc, i;
reg_arr = ptp->refclk_regs;
- if (bp->flags & BNXT_FLAG_CHIP_P5) {
+ if (BNXT_CHIP_P5(bp)) {
rc = bnxt_map_regs(bp, reg_arr, 2, BNXT_PTP_GRC_WIN);
if (rc)
return rc;
@@ -967,7 +967,7 @@ int bnxt_ptp_init(struct bnxt *bp, bool phc_cfg)
rc = err;
goto out;
}
- if (bp->flags & BNXT_FLAG_CHIP_P5) {
+ if (BNXT_CHIP_P5(bp)) {
spin_lock_bh(&ptp->ptp_lock);
bnxt_refclk_read(bp, NULL, &ptp->current_time);
WRITE_ONCE(ptp->old_time, ptp->current_time);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
index c722b3b..175192e 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
@@ -536,7 +536,7 @@ static int bnxt_hwrm_func_vf_resc_cfg(struct bnxt *bp, int num_vfs, bool reset)
if (rc)
return rc;
- if (bp->flags & BNXT_FLAG_CHIP_P5) {
+ if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) {
vf_msix = hw_resc->max_nqs - bnxt_nq_rings_in_use(bp);
vf_ring_grps = 0;
} else {
@@ -565,7 +565,7 @@ static int bnxt_hwrm_func_vf_resc_cfg(struct bnxt *bp, int num_vfs, bool reset)
req->min_l2_ctxs = cpu_to_le16(min);
req->min_vnics = cpu_to_le16(min);
req->min_stat_ctx = cpu_to_le16(min);
- if (!(bp->flags & BNXT_FLAG_CHIP_P5))
+ if (!(bp->flags & BNXT_FLAG_CHIP_P5_PLUS))
req->min_hw_ring_grps = cpu_to_le16(min);
} else {
vf_cp_rings /= num_vfs;
@@ -602,7 +602,7 @@ static int bnxt_hwrm_func_vf_resc_cfg(struct bnxt *bp, int num_vfs, bool reset)
req->max_stat_ctx = cpu_to_le16(vf_stat_ctx);
req->max_hw_ring_grps = cpu_to_le16(vf_ring_grps);
req->max_rsscos_ctx = cpu_to_le16(vf_rss);
- if (bp->flags & BNXT_FLAG_CHIP_P5)
+ if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS)
req->max_msix = cpu_to_le16(vf_msix / num_vfs);
hwrm_req_hold(bp, req);
@@ -630,7 +630,7 @@ static int bnxt_hwrm_func_vf_resc_cfg(struct bnxt *bp, int num_vfs, bool reset)
le16_to_cpu(req->min_rsscos_ctx) * n;
hw_resc->max_stat_ctxs -= le16_to_cpu(req->min_stat_ctx) * n;
hw_resc->max_vnics -= le16_to_cpu(req->min_vnics) * n;
- if (bp->flags & BNXT_FLAG_CHIP_P5)
+ if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS)
hw_resc->max_nqs -= vf_msix;
rc = pf->active_vfs;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
index 6ba2b93..e897314 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
@@ -42,7 +42,7 @@ static void bnxt_fill_msix_vecs(struct bnxt *bp, struct bnxt_msix_entry *ent)
for (i = 0; i < num_msix; i++) {
ent[i].vector = bp->irq_tbl[idx + i].vector;
ent[i].ring_idx = idx + i;
- if (bp->flags & BNXT_FLAG_CHIP_P5) {
+ if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) {
ent[i].db_offset = DB_PF_OFFSET_P5;
if (BNXT_VF(bp))
ent[i].db_offset = DB_VF_OFFSET_P5;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
index 96f5ca7..4791f6a 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
@@ -42,17 +42,17 @@ struct bnxt_sw_tx_bd *bnxt_xmit_bd(struct bnxt *bp,
/* fill up the first buffer */
prod = txr->tx_prod;
- tx_buf = &txr->tx_buf_ring[prod];
+ tx_buf = &txr->tx_buf_ring[RING_TX(bp, prod)];
tx_buf->nr_frags = num_frags;
if (xdp)
tx_buf->page = virt_to_head_page(xdp->data);
- txbd = &txr->tx_desc_ring[TX_RING(prod)][TX_IDX(prod)];
+ txbd = &txr->tx_desc_ring[TX_RING(bp, prod)][TX_IDX(prod)];
flags = (len << TX_BD_LEN_SHIFT) |
((num_frags + 1) << TX_BD_FLAGS_BD_CNT_SHIFT) |
bnxt_lhint_arr[len >> 9];
txbd->tx_bd_len_flags_type = cpu_to_le32(flags);
- txbd->tx_bd_opaque = prod;
+ txbd->tx_bd_opaque = SET_TX_OPAQUE(bp, txr, prod, 1 + num_frags);
txbd->tx_bd_haddr = cpu_to_le64(mapping);
/* now let us fill up the frags into the next buffers */
@@ -67,10 +67,10 @@ struct bnxt_sw_tx_bd *bnxt_xmit_bd(struct bnxt *bp,
WRITE_ONCE(txr->tx_prod, prod);
/* first fill up the first buffer */
- frag_tx_buf = &txr->tx_buf_ring[prod];
+ frag_tx_buf = &txr->tx_buf_ring[RING_TX(bp, prod)];
frag_tx_buf->page = skb_frag_page(frag);
- txbd = &txr->tx_desc_ring[TX_RING(prod)][TX_IDX(prod)];
+ txbd = &txr->tx_desc_ring[TX_RING(bp, prod)][TX_IDX(prod)];
frag_len = skb_frag_size(frag);
frag_mapping = skb_frag_dma_map(&pdev->dev, frag, 0,
@@ -127,20 +127,20 @@ static void __bnxt_xmit_xdp_redirect(struct bnxt *bp,
void bnxt_tx_int_xdp(struct bnxt *bp, struct bnxt_napi *bnapi, int budget)
{
- struct bnxt_tx_ring_info *txr = bnapi->tx_ring;
+ struct bnxt_tx_ring_info *txr = bnapi->tx_ring[0];
struct bnxt_rx_ring_info *rxr = bnapi->rx_ring;
+ u16 tx_hw_cons = txr->tx_hw_cons;
bool rx_doorbell_needed = false;
- int nr_pkts = bnapi->tx_pkts;
struct bnxt_sw_tx_bd *tx_buf;
u16 tx_cons = txr->tx_cons;
u16 last_tx_cons = tx_cons;
- int i, j, frags;
+ int j, frags;
if (!budget)
return;
- for (i = 0; i < nr_pkts; i++) {
- tx_buf = &txr->tx_buf_ring[tx_cons];
+ while (RING_TX(bp, tx_cons) != tx_hw_cons) {
+ tx_buf = &txr->tx_buf_ring[RING_TX(bp, tx_cons)];
if (tx_buf->action == XDP_REDIRECT) {
struct pci_dev *pdev = bp->pdev;
@@ -160,17 +160,17 @@ void bnxt_tx_int_xdp(struct bnxt *bp, struct bnxt_napi *bnapi, int budget)
frags = tx_buf->nr_frags;
for (j = 0; j < frags; j++) {
tx_cons = NEXT_TX(tx_cons);
- tx_buf = &txr->tx_buf_ring[tx_cons];
+ tx_buf = &txr->tx_buf_ring[RING_TX(bp, tx_cons)];
page_pool_recycle_direct(rxr->page_pool, tx_buf->page);
}
} else {
- bnxt_sched_reset_txr(bp, txr, i);
+ bnxt_sched_reset_txr(bp, txr, tx_cons);
return;
}
tx_cons = NEXT_TX(tx_cons);
}
- bnapi->tx_pkts = 0;
+ bnapi->events &= ~BNXT_TX_CMP_EVENT;
WRITE_ONCE(txr->tx_cons, tx_cons);
if (rx_doorbell_needed) {
tx_buf = &txr->tx_buf_ring[last_tx_cons];
@@ -249,7 +249,7 @@ bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons,
pdev = bp->pdev;
offset = bp->rx_offset;
- txr = rxr->bnapi->tx_ring;
+ txr = rxr->bnapi->tx_ring[0];
/* BNXT_RX_PAGE_MODE(bp) when XDP enabled */
orig_data = xdp.data;
@@ -275,7 +275,7 @@ bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons,
case XDP_TX:
rx_buf = &rxr->rx_buf_ring[cons];
mapping = rx_buf->mapping - bp->rx_dma_offset;
- *event = 0;
+ *event &= BNXT_TX_CMP_EVENT;
if (unlikely(xdp_buff_has_frags(&xdp))) {
struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(&xdp);
@@ -398,7 +398,7 @@ int bnxt_xdp_xmit(struct net_device *dev, int num_frames,
static int bnxt_xdp_set(struct bnxt *bp, struct bpf_prog *prog)
{
struct net_device *dev = bp->dev;
- int tx_xdp = 0, rc, tc;
+ int tx_xdp = 0, tx_cp, rc, tc;
struct bpf_prog *old;
if (prog && !prog->aux->xdp_has_frags &&
@@ -446,7 +446,8 @@ static int bnxt_xdp_set(struct bnxt *bp, struct bpf_prog *prog)
}
bp->tx_nr_rings_xdp = tx_xdp;
bp->tx_nr_rings = bp->tx_nr_rings_per_tc * tc + tx_xdp;
- bp->cp_nr_rings = max_t(int, bp->tx_nr_rings, bp->rx_nr_rings);
+ tx_cp = bnxt_num_tx_to_cp(bp, bp->tx_nr_rings);
+ bp->cp_nr_rings = max_t(int, tx_cp, bp->rx_nr_rings);
bnxt_set_tpa_flags(bp);
bnxt_set_ring_params(bp);
diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h
index 78c972b..aa5700a 100644
--- a/drivers/net/ethernet/cadence/macb.h
+++ b/drivers/net/ethernet/cadence/macb.h
@@ -1165,9 +1165,10 @@ struct macb_ptp_info {
int (*get_ts_info)(struct net_device *dev,
struct ethtool_ts_info *info);
int (*get_hwtst)(struct net_device *netdev,
- struct ifreq *ifr);
+ struct kernel_hwtstamp_config *tstamp_config);
int (*set_hwtst)(struct net_device *netdev,
- struct ifreq *ifr, int cmd);
+ struct kernel_hwtstamp_config *tstamp_config,
+ struct netlink_ext_ack *extack);
};
struct macb_pm_data {
@@ -1314,7 +1315,7 @@ struct macb {
struct ptp_clock *ptp_clock;
struct ptp_clock_info ptp_clock_info;
struct tsu_incr tsu_incr;
- struct hwtstamp_config tstamp_config;
+ struct kernel_hwtstamp_config tstamp_config;
/* RX queue filer rule set*/
struct ethtool_rx_fs_list rx_fs_list;
@@ -1363,8 +1364,12 @@ static inline void gem_ptp_do_rxstamp(struct macb *bp, struct sk_buff *skb, stru
gem_ptp_rxstamp(bp, skb, desc);
}
-int gem_get_hwtst(struct net_device *dev, struct ifreq *rq);
-int gem_set_hwtst(struct net_device *dev, struct ifreq *ifr, int cmd);
+
+int gem_get_hwtst(struct net_device *dev,
+ struct kernel_hwtstamp_config *tstamp_config);
+int gem_set_hwtst(struct net_device *dev,
+ struct kernel_hwtstamp_config *tstamp_config,
+ struct netlink_ext_ack *extack);
#else
static inline void gem_ptp_init(struct net_device *ndev) { }
static inline void gem_ptp_remove(struct net_device *ndev) { }
diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index cebae0f..898debf 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -3773,18 +3773,38 @@ static int macb_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
if (!netif_running(dev))
return -EINVAL;
- if (bp->ptp_info) {
- switch (cmd) {
- case SIOCSHWTSTAMP:
- return bp->ptp_info->set_hwtst(dev, rq, cmd);
- case SIOCGHWTSTAMP:
- return bp->ptp_info->get_hwtst(dev, rq);
- }
- }
-
return phylink_mii_ioctl(bp->phylink, rq, cmd);
}
+static int macb_hwtstamp_get(struct net_device *dev,
+ struct kernel_hwtstamp_config *cfg)
+{
+ struct macb *bp = netdev_priv(dev);
+
+ if (!netif_running(dev))
+ return -EINVAL;
+
+ if (!bp->ptp_info)
+ return -EOPNOTSUPP;
+
+ return bp->ptp_info->get_hwtst(dev, cfg);
+}
+
+static int macb_hwtstamp_set(struct net_device *dev,
+ struct kernel_hwtstamp_config *cfg,
+ struct netlink_ext_ack *extack)
+{
+ struct macb *bp = netdev_priv(dev);
+
+ if (!netif_running(dev))
+ return -EINVAL;
+
+ if (!bp->ptp_info)
+ return -EOPNOTSUPP;
+
+ return bp->ptp_info->set_hwtst(dev, cfg, extack);
+}
+
static inline void macb_set_txcsum_feature(struct macb *bp,
netdev_features_t features)
{
@@ -3884,6 +3904,8 @@ static const struct net_device_ops macb_netdev_ops = {
#endif
.ndo_set_features = macb_set_features,
.ndo_features_check = macb_features_check,
+ .ndo_hwtstamp_set = macb_hwtstamp_set,
+ .ndo_hwtstamp_get = macb_hwtstamp_get,
};
/* Configure peripheral capabilities according to device tree
@@ -4539,6 +4561,8 @@ static const struct net_device_ops at91ether_netdev_ops = {
#ifdef CONFIG_NET_POLL_CONTROLLER
.ndo_poll_controller = at91ether_poll_controller,
#endif
+ .ndo_hwtstamp_set = macb_hwtstamp_set,
+ .ndo_hwtstamp_get = macb_hwtstamp_get,
};
static int at91ether_clk_init(struct platform_device *pdev, struct clk **pclk,
diff --git a/drivers/net/ethernet/cadence/macb_ptp.c b/drivers/net/ethernet/cadence/macb_ptp.c
index 51d26fa..a63bf29 100644
--- a/drivers/net/ethernet/cadence/macb_ptp.c
+++ b/drivers/net/ethernet/cadence/macb_ptp.c
@@ -374,19 +374,16 @@ static int gem_ptp_set_ts_mode(struct macb *bp,
return 0;
}
-int gem_get_hwtst(struct net_device *dev, struct ifreq *rq)
+int gem_get_hwtst(struct net_device *dev,
+ struct kernel_hwtstamp_config *tstamp_config)
{
- struct hwtstamp_config *tstamp_config;
struct macb *bp = netdev_priv(dev);
- tstamp_config = &bp->tstamp_config;
+ *tstamp_config = bp->tstamp_config;
if ((bp->hw_dma_cap & HW_DMA_CAP_PTP) == 0)
return -EOPNOTSUPP;
- if (copy_to_user(rq->ifr_data, tstamp_config, sizeof(*tstamp_config)))
- return -EFAULT;
- else
- return 0;
+ return 0;
}
static void gem_ptp_set_one_step_sync(struct macb *bp, u8 enable)
@@ -401,22 +398,18 @@ static void gem_ptp_set_one_step_sync(struct macb *bp, u8 enable)
macb_writel(bp, NCR, reg_val & ~MACB_BIT(OSSMODE));
}
-int gem_set_hwtst(struct net_device *dev, struct ifreq *ifr, int cmd)
+int gem_set_hwtst(struct net_device *dev,
+ struct kernel_hwtstamp_config *tstamp_config,
+ struct netlink_ext_ack *extack)
{
enum macb_bd_control tx_bd_control = TSTAMP_DISABLED;
enum macb_bd_control rx_bd_control = TSTAMP_DISABLED;
- struct hwtstamp_config *tstamp_config;
struct macb *bp = netdev_priv(dev);
u32 regval;
- tstamp_config = &bp->tstamp_config;
if ((bp->hw_dma_cap & HW_DMA_CAP_PTP) == 0)
return -EOPNOTSUPP;
- if (copy_from_user(tstamp_config, ifr->ifr_data,
- sizeof(*tstamp_config)))
- return -EFAULT;
-
switch (tstamp_config->tx_type) {
case HWTSTAMP_TX_OFF:
break;
@@ -463,12 +456,11 @@ int gem_set_hwtst(struct net_device *dev, struct ifreq *ifr, int cmd)
return -ERANGE;
}
+ bp->tstamp_config = *tstamp_config;
+
if (gem_ptp_set_ts_mode(bp, tx_bd_control, rx_bd_control) != 0)
return -ERANGE;
- if (copy_to_user(ifr->ifr_data, tstamp_config, sizeof(*tstamp_config)))
- return -EFAULT;
- else
- return 0;
+ return 0;
}
diff --git a/drivers/net/ethernet/google/gve/gve_dqo.h b/drivers/net/ethernet/google/gve/gve_dqo.h
index 1eb4d5f..c36b93f 100644
--- a/drivers/net/ethernet/google/gve/gve_dqo.h
+++ b/drivers/net/ethernet/google/gve/gve_dqo.h
@@ -33,6 +33,9 @@
#define GVE_DEALLOCATE_COMPL_TIMEOUT 60
netdev_tx_t gve_tx_dqo(struct sk_buff *skb, struct net_device *dev);
+netdev_features_t gve_features_check_dqo(struct sk_buff *skb,
+ struct net_device *dev,
+ netdev_features_t features);
bool gve_tx_poll_dqo(struct gve_notify_block *block, bool do_clean);
int gve_rx_poll_dqo(struct gve_notify_block *block, int budget);
int gve_tx_alloc_rings_dqo(struct gve_priv *priv);
diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c
index 2d42e73..cc16974 100644
--- a/drivers/net/ethernet/google/gve/gve_main.c
+++ b/drivers/net/ethernet/google/gve/gve_main.c
@@ -79,6 +79,18 @@ static int gve_verify_driver_compatibility(struct gve_priv *priv)
return err;
}
+static netdev_features_t gve_features_check(struct sk_buff *skb,
+ struct net_device *dev,
+ netdev_features_t features)
+{
+ struct gve_priv *priv = netdev_priv(dev);
+
+ if (!gve_is_gqi(priv))
+ return gve_features_check_dqo(skb, dev, features);
+
+ return features;
+}
+
static netdev_tx_t gve_start_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct gve_priv *priv = netdev_priv(dev);
@@ -1879,6 +1891,7 @@ static int gve_set_features(struct net_device *netdev,
static const struct net_device_ops gve_netdev_ops = {
.ndo_start_xmit = gve_start_xmit,
+ .ndo_features_check = gve_features_check,
.ndo_open = gve_open,
.ndo_stop = gve_close,
.ndo_get_stats64 = gve_get_stats,
diff --git a/drivers/net/ethernet/google/gve/gve_tx_dqo.c b/drivers/net/ethernet/google/gve/gve_tx_dqo.c
index 1e19b83..f59c471 100644
--- a/drivers/net/ethernet/google/gve/gve_tx_dqo.c
+++ b/drivers/net/ethernet/google/gve/gve_tx_dqo.c
@@ -843,6 +843,16 @@ static bool gve_can_send_tso(const struct sk_buff *skb)
return true;
}
+netdev_features_t gve_features_check_dqo(struct sk_buff *skb,
+ struct net_device *dev,
+ netdev_features_t features)
+{
+ if (skb_is_gso(skb) && !gve_can_send_tso(skb))
+ return features & ~NETIF_F_GSO_MASK;
+
+ return features;
+}
+
/* Attempt to transmit specified SKB.
*
* Returns 0 if the SKB was transmitted or dropped.
@@ -854,11 +864,10 @@ static int gve_try_tx_skb(struct gve_priv *priv, struct gve_tx_ring *tx,
int num_buffer_descs;
int total_num_descs;
- if (tx->dqo.qpl) {
- if (skb_is_gso(skb))
- if (unlikely(ipv6_hopopt_jumbo_remove(skb)))
- goto drop;
+ if (skb_is_gso(skb) && unlikely(ipv6_hopopt_jumbo_remove(skb)))
+ goto drop;
+ if (tx->dqo.qpl) {
/* We do not need to verify the number of buffers used per
* packet or per segment in case of TSO as with 2K size buffers
* none of the TX packet rules would be violated.
@@ -868,24 +877,8 @@ static int gve_try_tx_skb(struct gve_priv *priv, struct gve_tx_ring *tx,
*/
num_buffer_descs = DIV_ROUND_UP(skb->len, GVE_TX_BUF_SIZE_DQO);
} else {
- if (skb_is_gso(skb)) {
- /* If TSO doesn't meet HW requirements, attempt to linearize the
- * packet.
- */
- if (unlikely(!gve_can_send_tso(skb) &&
- skb_linearize(skb) < 0)) {
- net_err_ratelimited("%s: Failed to transmit TSO packet\n",
- priv->dev->name);
- goto drop;
- }
-
- if (unlikely(ipv6_hopopt_jumbo_remove(skb)))
- goto drop;
-
- num_buffer_descs = gve_num_buffer_descs_needed(skb);
- } else {
- num_buffer_descs = gve_num_buffer_descs_needed(skb);
-
+ num_buffer_descs = gve_num_buffer_descs_needed(skb);
+ if (!skb_is_gso(skb)) {
if (unlikely(num_buffer_descs > GVE_TX_MAX_DATA_DESCS)) {
if (unlikely(skb_linearize(skb) < 0))
goto drop;
diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
index 1bf424a..6022944 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -24,6 +24,7 @@
#define I40E_MAX_VEB 16
#define I40E_MAX_NUM_DESCRIPTORS 4096
+#define I40E_MAX_NUM_DESCRIPTORS_XL710 8160
#define I40E_MAX_CSR_SPACE (4 * 1024 * 1024 - 64 * 1024)
#define I40E_DEFAULT_NUM_DESCRIPTORS 512
#define I40E_REQ_DESCRIPTOR_MULTIPLE 32
@@ -33,11 +34,11 @@
#define I40E_MIN_VSI_ALLOC 83 /* LAN, ATR, FCOE, 64 VF */
/* max 16 qps */
#define i40e_default_queues_per_vmdq(pf) \
- (((pf)->hw_features & I40E_HW_RSS_AQ_CAPABLE) ? 4 : 1)
+ (test_bit(I40E_HW_CAP_RSS_AQ, (pf)->hw.caps) ? 4 : 1)
#define I40E_DEFAULT_QUEUES_PER_VF 4
#define I40E_MAX_VF_QUEUES 16
#define i40e_pf_get_max_q_per_tc(pf) \
- (((pf)->hw_features & I40E_HW_128_QP_RSS_CAPABLE) ? 128 : 64)
+ (test_bit(I40E_HW_CAP_128_QP_RSS, (pf)->hw.caps) ? 128 : 64)
#define I40E_FDIR_RING_COUNT 32
#define I40E_MAX_AQ_BUF_SIZE 4096
#define I40E_AQ_LEN 256
@@ -78,7 +79,7 @@
#define I40E_MAX_BW_INACTIVE_ACCUM 4 /* accumulate 4 credits max */
/* driver state flags */
-enum i40e_state_t {
+enum i40e_state {
__I40E_TESTING,
__I40E_CONFIG_BUSY,
__I40E_CONFIG_DONE,
@@ -126,7 +127,7 @@ enum i40e_state_t {
BIT_ULL(__I40E_PF_RESET_AND_REBUILD_REQUESTED)
/* VSI state flags */
-enum i40e_vsi_state_t {
+enum i40e_vsi_state {
__I40E_VSI_DOWN,
__I40E_VSI_NEEDS_RESTART,
__I40E_VSI_SYNCING_FILTERS,
@@ -138,6 +139,60 @@ enum i40e_vsi_state_t {
__I40E_VSI_STATE_SIZE__,
};
+enum i40e_pf_flags {
+ I40E_FLAG_MSI_ENA,
+ I40E_FLAG_MSIX_ENA,
+ I40E_FLAG_RSS_ENA,
+ I40E_FLAG_VMDQ_ENA,
+ I40E_FLAG_SRIOV_ENA,
+ I40E_FLAG_DCB_CAPABLE,
+ I40E_FLAG_DCB_ENA,
+ I40E_FLAG_FD_SB_ENA,
+ I40E_FLAG_FD_ATR_ENA,
+ I40E_FLAG_MFP_ENA,
+ I40E_FLAG_HW_ATR_EVICT_ENA,
+ I40E_FLAG_VEB_MODE_ENA,
+ I40E_FLAG_VEB_STATS_ENA,
+ I40E_FLAG_LINK_POLLING_ENA,
+ I40E_FLAG_TRUE_PROMISC_ENA,
+ I40E_FLAG_LEGACY_RX_ENA,
+ I40E_FLAG_PTP_ENA,
+ I40E_FLAG_IWARP_ENA,
+ I40E_FLAG_LINK_DOWN_ON_CLOSE_ENA,
+ I40E_FLAG_SOURCE_PRUNING_DIS,
+ I40E_FLAG_TC_MQPRIO_ENA,
+ I40E_FLAG_FD_SB_INACTIVE,
+ I40E_FLAG_FD_SB_TO_CLOUD_FILTER,
+ I40E_FLAG_FW_LLDP_DIS,
+ I40E_FLAG_RS_FEC,
+ I40E_FLAG_BASE_R_FEC,
+ /* TOTAL_PORT_SHUTDOWN_ENA
+ * Allows to physically disable the link on the NIC's port.
+ * If enabled, (after link down request from the OS)
+ * no link, traffic or led activity is possible on that port.
+ *
+ * If I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENA is set, the
+ * I40E_FLAG_LINK_DOWN_ON_CLOSE_ENA must be explicitly forced
+ * to true and cannot be disabled by system admin at that time.
+ * The functionalities are exclusive in terms of configuration, but
+ * they also have similar behavior (allowing to disable physical
+ * link of the port), with following differences:
+ * - LINK_DOWN_ON_CLOSE_ENA is configurable at host OS run-time and
+ * is supported by whole family of 7xx Intel Ethernet Controllers
+ * - TOTAL_PORT_SHUTDOWN_ENA may be enabled only before OS loads
+ * (in BIOS) only if motherboard's BIOS and NIC's FW has support of it
+ * - when LINK_DOWN_ON_CLOSE_ENABLED is used, the link is being brought
+ * down by sending phy_type=0 to NIC's FW
+ * - when TOTAL_PORT_SHUTDOWN_ENA is used, phy_type is not altered,
+ * instead the link is being brought down by clearing
+ * bit (I40E_AQ_PHY_ENABLE_LINK) in abilities field of
+ * i40e_aq_set_phy_config structure
+ */
+ I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENA,
+ I40E_FLAG_VF_VLAN_PRUNING_ENA,
+ I40E_PF_FLAGS_NBITS, /* must be last */
+};
+
enum i40e_interrupt_policy {
I40E_INTERRUPT_BEST_CASE,
I40E_INTERRUPT_MEDIUM,
@@ -480,78 +535,7 @@ struct i40e_pf {
struct timer_list service_timer;
struct work_struct service_task;
- u32 hw_features;
-#define I40E_HW_RSS_AQ_CAPABLE BIT(0)
-#define I40E_HW_128_QP_RSS_CAPABLE BIT(1)
-#define I40E_HW_ATR_EVICT_CAPABLE BIT(2)
-#define I40E_HW_WB_ON_ITR_CAPABLE BIT(3)
-#define I40E_HW_MULTIPLE_TCP_UDP_RSS_PCTYPE BIT(4)
-#define I40E_HW_NO_PCI_LINK_CHECK BIT(5)
-#define I40E_HW_100M_SGMII_CAPABLE BIT(6)
-#define I40E_HW_NO_DCB_SUPPORT BIT(7)
-#define I40E_HW_USE_SET_LLDP_MIB BIT(8)
-#define I40E_HW_GENEVE_OFFLOAD_CAPABLE BIT(9)
-#define I40E_HW_PTP_L4_CAPABLE BIT(10)
-#define I40E_HW_WOL_MC_MAGIC_PKT_WAKE BIT(11)
-#define I40E_HW_HAVE_CRT_RETIMER BIT(13)
-#define I40E_HW_OUTER_UDP_CSUM_CAPABLE BIT(14)
-#define I40E_HW_PHY_CONTROLS_LEDS BIT(15)
-#define I40E_HW_STOP_FW_LLDP BIT(16)
-#define I40E_HW_PORT_ID_VALID BIT(17)
-#define I40E_HW_RESTART_AUTONEG BIT(18)
-
- u32 flags;
-#define I40E_FLAG_RX_CSUM_ENABLED BIT(0)
-#define I40E_FLAG_MSI_ENABLED BIT(1)
-#define I40E_FLAG_MSIX_ENABLED BIT(2)
-#define I40E_FLAG_RSS_ENABLED BIT(3)
-#define I40E_FLAG_VMDQ_ENABLED BIT(4)
-#define I40E_FLAG_SRIOV_ENABLED BIT(5)
-#define I40E_FLAG_DCB_CAPABLE BIT(6)
-#define I40E_FLAG_DCB_ENABLED BIT(7)
-#define I40E_FLAG_FD_SB_ENABLED BIT(8)
-#define I40E_FLAG_FD_ATR_ENABLED BIT(9)
-#define I40E_FLAG_MFP_ENABLED BIT(10)
-#define I40E_FLAG_HW_ATR_EVICT_ENABLED BIT(11)
-#define I40E_FLAG_VEB_MODE_ENABLED BIT(12)
-#define I40E_FLAG_VEB_STATS_ENABLED BIT(13)
-#define I40E_FLAG_LINK_POLLING_ENABLED BIT(14)
-#define I40E_FLAG_TRUE_PROMISC_SUPPORT BIT(15)
-#define I40E_FLAG_LEGACY_RX BIT(16)
-#define I40E_FLAG_PTP BIT(17)
-#define I40E_FLAG_IWARP_ENABLED BIT(18)
-#define I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED BIT(19)
-#define I40E_FLAG_SOURCE_PRUNING_DISABLED BIT(20)
-#define I40E_FLAG_TC_MQPRIO BIT(21)
-#define I40E_FLAG_FD_SB_INACTIVE BIT(22)
-#define I40E_FLAG_FD_SB_TO_CLOUD_FILTER BIT(23)
-#define I40E_FLAG_DISABLE_FW_LLDP BIT(24)
-#define I40E_FLAG_RS_FEC BIT(25)
-#define I40E_FLAG_BASE_R_FEC BIT(26)
-/* TOTAL_PORT_SHUTDOWN
- * Allows to physically disable the link on the NIC's port.
- * If enabled, (after link down request from the OS)
- * no link, traffic or led activity is possible on that port.
- *
- * If I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENABLED is set, the
- * I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED must be explicitly forced to true
- * and cannot be disabled by system admin at that time.
- * The functionalities are exclusive in terms of configuration, but they also
- * have similar behavior (allowing to disable physical link of the port),
- * with following differences:
- * - LINK_DOWN_ON_CLOSE_ENABLED is configurable at host OS run-time and is
- * supported by whole family of 7xx Intel Ethernet Controllers
- * - TOTAL_PORT_SHUTDOWN may be enabled only before OS loads (in BIOS)
- * only if motherboard's BIOS and NIC's FW has support of it
- * - when LINK_DOWN_ON_CLOSE_ENABLED is used, the link is being brought down
- * by sending phy_type=0 to NIC's FW
- * - when TOTAL_PORT_SHUTDOWN is used, phy_type is not altered, instead
- * the link is being brought down by clearing bit (I40E_AQ_PHY_ENABLE_LINK)
- * in abilities field of i40e_aq_set_phy_config structure
- */
-#define I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENABLED BIT(27)
-#define I40E_FLAG_VF_VLAN_PRUNING BIT(28)
-
+ DECLARE_BITMAP(flags, I40E_PF_FLAGS_NBITS);
struct i40e_client_instance *cinst;
bool stat_offsets_loaded;
struct i40e_hw_port_stats stats;
@@ -1267,7 +1251,7 @@ struct i40e_mac_filter *i40e_find_mac(struct i40e_vsi *vsi, const u8 *macaddr);
void i40e_vlan_stripping_enable(struct i40e_vsi *vsi);
static inline bool i40e_is_sw_dcb(struct i40e_pf *pf)
{
- return !!(pf->flags & I40E_FLAG_DISABLE_FW_LLDP);
+ return test_bit(I40E_FLAG_FW_LLDP_DIS, pf->flags);
}
#ifdef CONFIG_I40E_DCB
@@ -1301,7 +1285,7 @@ int i40e_set_partition_bw_setting(struct i40e_pf *pf);
int i40e_commit_partition_bw_setting(struct i40e_pf *pf);
void i40e_print_link_message(struct i40e_vsi *vsi, bool isup);
-void i40e_set_fec_in_flags(u8 fec_cfg, u32 *flags);
+void i40e_set_fec_in_flags(u8 fec_cfg, unsigned long *flags);
static inline bool i40e_enabled_xdp_vsi(struct i40e_vsi *vsi)
{
@@ -1321,13 +1305,13 @@ int i40e_add_del_cloud_filter_big_buf(struct i40e_vsi *vsi,
* i40e_is_tc_mqprio_enabled - check if TC MQPRIO is enabled on PF
* @pf: pointer to a pf.
*
- * Check and return value of flag I40E_FLAG_TC_MQPRIO.
+ * Check and return state of flag I40E_FLAG_TC_MQPRIO.
*
- * Return: I40E_FLAG_TC_MQPRIO set state.
+ * Return: true/false if I40E_FLAG_TC_MQPRIO is set or not
**/
-static inline u32 i40e_is_tc_mqprio_enabled(struct i40e_pf *pf)
+static inline bool i40e_is_tc_mqprio_enabled(struct i40e_pf *pf)
{
- return pf->flags & I40E_FLAG_TC_MQPRIO;
+ return test_bit(I40E_FLAG_TC_MQPRIO_ENA, pf->flags);
}
/**
diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq.c b/drivers/net/ethernet/intel/i40e/i40e_adminq.c
index 9ce6e63..896c439 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_adminq.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_adminq.c
@@ -17,29 +17,16 @@ static void i40e_resume_aq(struct i40e_hw *hw);
static void i40e_adminq_init_regs(struct i40e_hw *hw)
{
/* set head and tail registers in our local struct */
- if (i40e_is_vf(hw)) {
- hw->aq.asq.tail = I40E_VF_ATQT1;
- hw->aq.asq.head = I40E_VF_ATQH1;
- hw->aq.asq.len = I40E_VF_ATQLEN1;
- hw->aq.asq.bal = I40E_VF_ATQBAL1;
- hw->aq.asq.bah = I40E_VF_ATQBAH1;
- hw->aq.arq.tail = I40E_VF_ARQT1;
- hw->aq.arq.head = I40E_VF_ARQH1;
- hw->aq.arq.len = I40E_VF_ARQLEN1;
- hw->aq.arq.bal = I40E_VF_ARQBAL1;
- hw->aq.arq.bah = I40E_VF_ARQBAH1;
- } else {
- hw->aq.asq.tail = I40E_PF_ATQT;
- hw->aq.asq.head = I40E_PF_ATQH;
- hw->aq.asq.len = I40E_PF_ATQLEN;
- hw->aq.asq.bal = I40E_PF_ATQBAL;
- hw->aq.asq.bah = I40E_PF_ATQBAH;
- hw->aq.arq.tail = I40E_PF_ARQT;
- hw->aq.arq.head = I40E_PF_ARQH;
- hw->aq.arq.len = I40E_PF_ARQLEN;
- hw->aq.arq.bal = I40E_PF_ARQBAL;
- hw->aq.arq.bah = I40E_PF_ARQBAH;
- }
+ hw->aq.asq.tail = I40E_PF_ATQT;
+ hw->aq.asq.head = I40E_PF_ATQH;
+ hw->aq.asq.len = I40E_PF_ATQLEN;
+ hw->aq.asq.bal = I40E_PF_ATQBAL;
+ hw->aq.asq.bah = I40E_PF_ATQBAH;
+ hw->aq.arq.tail = I40E_PF_ARQT;
+ hw->aq.arq.head = I40E_PF_ARQH;
+ hw->aq.arq.len = I40E_PF_ARQLEN;
+ hw->aq.arq.bal = I40E_PF_ARQBAL;
+ hw->aq.arq.bah = I40E_PF_ARQBAH;
}
/**
@@ -503,44 +490,76 @@ static int i40e_shutdown_arq(struct i40e_hw *hw)
}
/**
- * i40e_set_hw_flags - set HW flags
+ * i40e_set_hw_caps - set HW flags
* @hw: pointer to the hardware structure
**/
-static void i40e_set_hw_flags(struct i40e_hw *hw)
+static void i40e_set_hw_caps(struct i40e_hw *hw)
{
- struct i40e_adminq_info *aq = &hw->aq;
-
- hw->flags = 0;
+ bitmap_zero(hw->caps, I40E_HW_CAPS_NBITS);
switch (hw->mac.type) {
case I40E_MAC_XL710:
- if (aq->api_maj_ver > 1 ||
- (aq->api_maj_ver == 1 &&
- aq->api_min_ver >= I40E_MINOR_VER_GET_LINK_INFO_XL710)) {
- hw->flags |= I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE;
- hw->flags |= I40E_HW_FLAG_FW_LLDP_STOPPABLE;
+ if (i40e_is_aq_api_ver_ge(hw, 1,
+ I40E_MINOR_VER_GET_LINK_INFO_XL710)) {
+ set_bit(I40E_HW_CAP_AQ_PHY_ACCESS, hw->caps);
+ set_bit(I40E_HW_CAP_FW_LLDP_STOPPABLE, hw->caps);
/* The ability to RX (not drop) 802.1ad frames */
- hw->flags |= I40E_HW_FLAG_802_1AD_CAPABLE;
+ set_bit(I40E_HW_CAP_802_1AD, hw->caps);
+ }
+ if (i40e_is_aq_api_ver_ge(hw, 1, 5)) {
+ /* Supported in FW API version higher than 1.4 */
+ set_bit(I40E_HW_CAP_GENEVE_OFFLOAD, hw->caps);
+ }
+ if (i40e_is_fw_ver_lt(hw, 4, 33)) {
+ set_bit(I40E_HW_CAP_RESTART_AUTONEG, hw->caps);
+ /* No DCB support for FW < v4.33 */
+ set_bit(I40E_HW_CAP_NO_DCB_SUPPORT, hw->caps);
+ }
+ if (i40e_is_fw_ver_lt(hw, 4, 3)) {
+ /* Disable FW LLDP if FW < v4.3 */
+ set_bit(I40E_HW_CAP_STOP_FW_LLDP, hw->caps);
+ }
+ if (i40e_is_fw_ver_ge(hw, 4, 40)) {
+ /* Use the FW Set LLDP MIB API if FW >= v4.40 */
+ set_bit(I40E_HW_CAP_USE_SET_LLDP_MIB, hw->caps);
+ }
+ if (i40e_is_fw_ver_ge(hw, 6, 0)) {
+ /* Enable PTP L4 if FW > v6.0 */
+ set_bit(I40E_HW_CAP_PTP_L4, hw->caps);
}
break;
case I40E_MAC_X722:
- hw->flags |= I40E_HW_FLAG_AQ_SRCTL_ACCESS_ENABLE |
- I40E_HW_FLAG_NVM_READ_REQUIRES_LOCK;
+ set_bit(I40E_HW_CAP_AQ_SRCTL_ACCESS_ENABLE, hw->caps);
+ set_bit(I40E_HW_CAP_NVM_READ_REQUIRES_LOCK, hw->caps);
+ set_bit(I40E_HW_CAP_RSS_AQ, hw->caps);
+ set_bit(I40E_HW_CAP_128_QP_RSS, hw->caps);
+ set_bit(I40E_HW_CAP_ATR_EVICT, hw->caps);
+ set_bit(I40E_HW_CAP_WB_ON_ITR, hw->caps);
+ set_bit(I40E_HW_CAP_MULTI_TCP_UDP_RSS_PCTYPE, hw->caps);
+ set_bit(I40E_HW_CAP_NO_PCI_LINK_CHECK, hw->caps);
+ set_bit(I40E_HW_CAP_USE_SET_LLDP_MIB, hw->caps);
+ set_bit(I40E_HW_CAP_GENEVE_OFFLOAD, hw->caps);
+ set_bit(I40E_HW_CAP_PTP_L4, hw->caps);
+ set_bit(I40E_HW_CAP_WOL_MC_MAGIC_PKT_WAKE, hw->caps);
+ set_bit(I40E_HW_CAP_OUTER_UDP_CSUM, hw->caps);
- if (aq->api_maj_ver > 1 ||
- (aq->api_maj_ver == 1 &&
- aq->api_min_ver >= I40E_MINOR_VER_FW_LLDP_STOPPABLE_X722))
- hw->flags |= I40E_HW_FLAG_FW_LLDP_STOPPABLE;
+ if (rd32(hw, I40E_GLQF_FDEVICTENA(1)) !=
+ I40E_FDEVICT_PCTYPE_DEFAULT) {
+ hw_warn(hw, "FD EVICT PCTYPES are not right, disable FD HW EVICT\n");
+ clear_bit(I40E_HW_CAP_ATR_EVICT, hw->caps);
+ }
- if (aq->api_maj_ver > 1 ||
- (aq->api_maj_ver == 1 &&
- aq->api_min_ver >= I40E_MINOR_VER_GET_LINK_INFO_X722))
- hw->flags |= I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE;
+ if (i40e_is_aq_api_ver_ge(hw, 1,
+ I40E_MINOR_VER_FW_LLDP_STOPPABLE_X722))
+ set_bit(I40E_HW_CAP_FW_LLDP_STOPPABLE, hw->caps);
- if (aq->api_maj_ver > 1 ||
- (aq->api_maj_ver == 1 &&
- aq->api_min_ver >= I40E_MINOR_VER_FW_REQUEST_FEC_X722))
- hw->flags |= I40E_HW_FLAG_X722_FEC_REQUEST_CAPABLE;
+ if (i40e_is_aq_api_ver_ge(hw, 1,
+ I40E_MINOR_VER_GET_LINK_INFO_X722))
+ set_bit(I40E_HW_CAP_AQ_PHY_ACCESS, hw->caps);
+
+ if (i40e_is_aq_api_ver_ge(hw, 1,
+ I40E_MINOR_VER_FW_REQUEST_FEC_X722))
+ set_bit(I40E_HW_CAP_X722_FEC_REQUEST, hw->caps);
fallthrough;
default:
@@ -548,22 +567,18 @@ static void i40e_set_hw_flags(struct i40e_hw *hw)
}
/* Newer versions of firmware require lock when reading the NVM */
- if (aq->api_maj_ver > 1 ||
- (aq->api_maj_ver == 1 &&
- aq->api_min_ver >= 5))
- hw->flags |= I40E_HW_FLAG_NVM_READ_REQUIRES_LOCK;
+ if (i40e_is_aq_api_ver_ge(hw, 1, 5))
+ set_bit(I40E_HW_CAP_NVM_READ_REQUIRES_LOCK, hw->caps);
- if (aq->api_maj_ver > 1 ||
- (aq->api_maj_ver == 1 &&
- aq->api_min_ver >= 8)) {
- hw->flags |= I40E_HW_FLAG_FW_LLDP_PERSISTENT;
- hw->flags |= I40E_HW_FLAG_DROP_MODE;
- }
+ /* The ability to RX (not drop) 802.1ad frames was added in API 1.7 */
+ if (i40e_is_aq_api_ver_ge(hw, 1, 7))
+ set_bit(I40E_HW_CAP_802_1AD, hw->caps);
- if (aq->api_maj_ver > 1 ||
- (aq->api_maj_ver == 1 &&
- aq->api_min_ver >= 9))
- hw->flags |= I40E_HW_FLAG_AQ_PHY_ACCESS_EXTENDED;
+ if (i40e_is_aq_api_ver_ge(hw, 1, 8))
+ set_bit(I40E_HW_CAP_FW_LLDP_PERSISTENT, hw->caps);
+
+ if (i40e_is_aq_api_ver_ge(hw, 1, 9))
+ set_bit(I40E_HW_CAP_AQ_PHY_ACCESS_EXTENDED, hw->caps);
}
/**
@@ -633,7 +648,7 @@ int i40e_init_adminq(struct i40e_hw *hw)
/* Some features were introduced in different FW API version
* for different MAC type.
*/
- i40e_set_hw_flags(hw);
+ i40e_set_hw_caps(hw);
/* get the NVM version info */
i40e_read_nvm_word(hw, I40E_SR_NVM_DEV_STARTER_VERSION,
@@ -648,25 +663,7 @@ int i40e_init_adminq(struct i40e_hw *hw)
&oem_lo);
hw->nvm.oem_ver = ((u32)oem_hi << 16) | oem_lo;
- if (hw->mac.type == I40E_MAC_XL710 &&
- hw->aq.api_maj_ver == I40E_FW_API_VERSION_MAJOR &&
- hw->aq.api_min_ver >= I40E_MINOR_VER_GET_LINK_INFO_XL710) {
- hw->flags |= I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE;
- hw->flags |= I40E_HW_FLAG_FW_LLDP_STOPPABLE;
- }
- if (hw->mac.type == I40E_MAC_X722 &&
- hw->aq.api_maj_ver == I40E_FW_API_VERSION_MAJOR &&
- hw->aq.api_min_ver >= I40E_MINOR_VER_FW_LLDP_STOPPABLE_X722) {
- hw->flags |= I40E_HW_FLAG_FW_LLDP_STOPPABLE;
- }
-
- /* The ability to RX (not drop) 802.1ad frames was added in API 1.7 */
- if (hw->aq.api_maj_ver > 1 ||
- (hw->aq.api_maj_ver == 1 &&
- hw->aq.api_min_ver >= 7))
- hw->flags |= I40E_HW_FLAG_802_1AD_CAPABLE;
-
- if (hw->aq.api_maj_ver > I40E_FW_API_VERSION_MAJOR) {
+ if (i40e_is_aq_api_ver_ge(hw, I40E_FW_API_VERSION_MAJOR + 1, 0)) {
ret_code = -EIO;
goto init_adminq_free_arq;
}
diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c
index d7e24d6..e171f48 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_common.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_common.c
@@ -1374,8 +1374,8 @@ i40e_aq_get_phy_capabilities(struct i40e_hw *hw,
if (report_init) {
if (hw->mac.type == I40E_MAC_XL710 &&
- hw->aq.api_maj_ver == I40E_FW_API_VERSION_MAJOR &&
- hw->aq.api_min_ver >= I40E_MINOR_VER_GET_LINK_INFO_XL710) {
+ i40e_is_aq_api_ver_ge(hw, I40E_FW_API_VERSION_MAJOR,
+ I40E_MINOR_VER_GET_LINK_INFO_XL710)) {
status = i40e_aq_get_link_info(hw, true, NULL, NULL);
} else {
hw->phy.phy_types = le32_to_cpu(abilities->phy_type);
@@ -1645,12 +1645,11 @@ int i40e_aq_get_link_info(struct i40e_hw *hw,
else
hw_link_info->lse_enable = false;
- if ((hw->mac.type == I40E_MAC_XL710) &&
- (hw->aq.fw_maj_ver < 4 || (hw->aq.fw_maj_ver == 4 &&
- hw->aq.fw_min_ver < 40)) && hw_link_info->phy_type == 0xE)
+ if (hw->mac.type == I40E_MAC_XL710 && i40e_is_fw_ver_lt(hw, 4, 40) &&
+ hw_link_info->phy_type == 0xE)
hw_link_info->phy_type = I40E_PHY_TYPE_10GBASE_SFPP_CU;
- if (hw->flags & I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE &&
+ if (test_bit(I40E_HW_CAP_AQ_PHY_ACCESS, hw->caps) &&
hw->mac.type != I40E_MAC_X722) {
__le32 tmp;
@@ -1750,21 +1749,6 @@ int i40e_aq_set_phy_debug(struct i40e_hw *hw, u8 cmd_flags,
}
/**
- * i40e_is_aq_api_ver_ge
- * @aq: pointer to AdminQ info containing HW API version to compare
- * @maj: API major value
- * @min: API minor value
- *
- * Assert whether current HW API version is greater/equal than provided.
- **/
-static bool i40e_is_aq_api_ver_ge(struct i40e_adminq_info *aq, u16 maj,
- u16 min)
-{
- return (aq->api_maj_ver > maj ||
- (aq->api_maj_ver == maj && aq->api_min_ver >= min));
-}
-
-/**
* i40e_aq_add_vsi
* @hw: pointer to the hw struct
* @vsi_ctx: pointer to a vsi context struct
@@ -1890,14 +1874,14 @@ int i40e_aq_set_vsi_unicast_promiscuous(struct i40e_hw *hw,
if (set) {
flags |= I40E_AQC_SET_VSI_PROMISC_UNICAST;
- if (rx_only_promisc && i40e_is_aq_api_ver_ge(&hw->aq, 1, 5))
+ if (rx_only_promisc && i40e_is_aq_api_ver_ge(hw, 1, 5))
flags |= I40E_AQC_SET_VSI_PROMISC_RX_ONLY;
}
cmd->promiscuous_flags = cpu_to_le16(flags);
cmd->valid_flags = cpu_to_le16(I40E_AQC_SET_VSI_PROMISC_UNICAST);
- if (i40e_is_aq_api_ver_ge(&hw->aq, 1, 5))
+ if (i40e_is_aq_api_ver_ge(hw, 1, 5))
cmd->valid_flags |=
cpu_to_le16(I40E_AQC_SET_VSI_PROMISC_RX_ONLY);
@@ -2000,13 +1984,13 @@ int i40e_aq_set_vsi_uc_promisc_on_vlan(struct i40e_hw *hw,
if (enable) {
flags |= I40E_AQC_SET_VSI_PROMISC_UNICAST;
- if (i40e_is_aq_api_ver_ge(&hw->aq, 1, 5))
+ if (i40e_is_aq_api_ver_ge(hw, 1, 5))
flags |= I40E_AQC_SET_VSI_PROMISC_RX_ONLY;
}
cmd->promiscuous_flags = cpu_to_le16(flags);
cmd->valid_flags = cpu_to_le16(I40E_AQC_SET_VSI_PROMISC_UNICAST);
- if (i40e_is_aq_api_ver_ge(&hw->aq, 1, 5))
+ if (i40e_is_aq_api_ver_ge(hw, 1, 5))
cmd->valid_flags |=
cpu_to_le16(I40E_AQC_SET_VSI_PROMISC_RX_ONLY);
cmd->seid = cpu_to_le16(seid);
@@ -2253,7 +2237,7 @@ int i40e_aq_set_switch_config(struct i40e_hw *hw,
scfg->flags = cpu_to_le16(flags);
scfg->valid_flags = cpu_to_le16(valid_flags);
scfg->mode = mode;
- if (hw->flags & I40E_HW_FLAG_802_1AD_CAPABLE) {
+ if (test_bit(I40E_HW_CAP_802_1AD, hw->caps)) {
scfg->switch_tag = cpu_to_le16(hw->switch_tag);
scfg->first_tag = cpu_to_le16(hw->first_tag);
scfg->second_tag = cpu_to_le16(hw->second_tag);
@@ -3637,7 +3621,7 @@ i40e_aq_restore_lldp(struct i40e_hw *hw, u8 *setting, bool restore,
(struct i40e_aqc_lldp_restore *)&desc.params.raw;
int status;
- if (!(hw->flags & I40E_HW_FLAG_FW_LLDP_PERSISTENT)) {
+ if (!test_bit(I40E_HW_CAP_FW_LLDP_PERSISTENT, hw->caps)) {
i40e_debug(hw, I40E_DEBUG_ALL,
"Restore LLDP not supported by current FW version.\n");
return -ENODEV;
@@ -3680,7 +3664,7 @@ int i40e_aq_stop_lldp(struct i40e_hw *hw, bool shutdown_agent,
cmd->command |= I40E_AQ_LLDP_AGENT_SHUTDOWN;
if (persist) {
- if (hw->flags & I40E_HW_FLAG_FW_LLDP_PERSISTENT)
+ if (test_bit(I40E_HW_CAP_FW_LLDP_PERSISTENT, hw->caps))
cmd->command |= I40E_AQ_LLDP_AGENT_STOP_PERSIST;
else
i40e_debug(hw, I40E_DEBUG_ALL,
@@ -3713,7 +3697,7 @@ int i40e_aq_start_lldp(struct i40e_hw *hw, bool persist,
cmd->command = I40E_AQ_LLDP_AGENT_START;
if (persist) {
- if (hw->flags & I40E_HW_FLAG_FW_LLDP_PERSISTENT)
+ if (test_bit(I40E_HW_CAP_FW_LLDP_PERSISTENT, hw->caps))
cmd->command |= I40E_AQ_LLDP_AGENT_START_PERSIST;
else
i40e_debug(hw, I40E_DEBUG_ALL,
@@ -3741,7 +3725,7 @@ i40e_aq_set_dcb_parameters(struct i40e_hw *hw, bool dcb_enable,
(struct i40e_aqc_set_dcb_parameters *)&desc.params.raw;
int status;
- if (!(hw->flags & I40E_HW_FLAG_FW_LLDP_STOPPABLE))
+ if (!test_bit(I40E_HW_CAP_FW_LLDP_STOPPABLE, hw->caps))
return -ENODEV;
i40e_fill_default_direct_cmd_desc(&desc,
@@ -5043,7 +5027,7 @@ static int i40e_led_get_reg(struct i40e_hw *hw, u16 led_addr,
u32 i;
*reg_val = 0;
- if (hw->flags & I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE) {
+ if (test_bit(I40E_HW_CAP_AQ_PHY_ACCESS, hw->caps)) {
status =
i40e_aq_get_phy_register(hw,
I40E_AQ_PHY_REG_ACCESS_EXTERNAL,
@@ -5076,7 +5060,7 @@ static int i40e_led_set_reg(struct i40e_hw *hw, u16 led_addr,
int status;
u32 i;
- if (hw->flags & I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE) {
+ if (test_bit(I40E_HW_CAP_AQ_PHY_ACCESS, hw->caps)) {
status =
i40e_aq_set_phy_register(hw,
I40E_AQ_PHY_REG_ACCESS_EXTERNAL,
@@ -5115,7 +5099,7 @@ int i40e_led_get_phy(struct i40e_hw *hw, u16 *led_addr,
u8 port_num;
u32 i;
- if (hw->flags & I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE) {
+ if (test_bit(I40E_HW_CAP_AQ_PHY_ACCESS, hw->caps)) {
status =
i40e_aq_get_phy_register(hw,
I40E_AQ_PHY_REG_ACCESS_EXTERNAL,
@@ -5238,14 +5222,14 @@ int i40e_aq_rx_ctl_read_register(struct i40e_hw *hw,
**/
u32 i40e_read_rx_ctl(struct i40e_hw *hw, u32 reg_addr)
{
- bool use_register;
+ bool use_register = false;
int status = 0;
int retry = 5;
u32 val = 0;
- use_register = (((hw->aq.api_maj_ver == 1) &&
- (hw->aq.api_min_ver < 5)) ||
- (hw->mac.type == I40E_MAC_X722));
+ if (i40e_is_aq_api_ver_lt(hw, 1, 5) || hw->mac.type == I40E_MAC_X722)
+ use_register = true;
+
if (!use_register) {
do_retry:
status = i40e_aq_rx_ctl_read_register(hw, reg_addr, &val, NULL);
@@ -5300,13 +5284,13 @@ int i40e_aq_rx_ctl_write_register(struct i40e_hw *hw,
**/
void i40e_write_rx_ctl(struct i40e_hw *hw, u32 reg_addr, u32 reg_val)
{
- bool use_register;
+ bool use_register = false;
int status = 0;
int retry = 5;
- use_register = (((hw->aq.api_maj_ver == 1) &&
- (hw->aq.api_min_ver < 5)) ||
- (hw->mac.type == I40E_MAC_X722));
+ if (i40e_is_aq_api_ver_lt(hw, 1, 5) || hw->mac.type == I40E_MAC_X722)
+ use_register = true;
+
if (!use_register) {
do_retry:
status = i40e_aq_rx_ctl_write_register(hw, reg_addr,
@@ -5335,7 +5319,7 @@ static void i40e_mdio_if_number_selection(struct i40e_hw *hw, bool set_mdio,
struct i40e_aqc_phy_register_access *cmd)
{
if (set_mdio && cmd->phy_interface == I40E_AQ_PHY_REG_ACCESS_EXTERNAL) {
- if (hw->flags & I40E_HW_FLAG_AQ_PHY_ACCESS_EXTENDED)
+ if (test_bit(I40E_HW_CAP_AQ_PHY_ACCESS_EXTENDED, hw->caps))
cmd->cmd_flags |=
I40E_AQ_PHY_REG_ACCESS_SET_MDIO_IF_NUMBER |
((mdio_num <<
diff --git a/drivers/net/ethernet/intel/i40e/i40e_dcb.c b/drivers/net/ethernet/intel/i40e/i40e_dcb.c
index 68602fc..498728e 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_dcb.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_dcb.c
@@ -804,14 +804,11 @@ int i40e_get_dcb_config(struct i40e_hw *hw)
int ret = 0;
/* If Firmware version < v4.33 on X710/XL710, IEEE only */
- if ((hw->mac.type == I40E_MAC_XL710) &&
- (((hw->aq.fw_maj_ver == 4) && (hw->aq.fw_min_ver < 33)) ||
- (hw->aq.fw_maj_ver < 4)))
+ if (hw->mac.type == I40E_MAC_XL710 && i40e_is_fw_ver_lt(hw, 4, 33))
return i40e_get_ieee_dcb_config(hw);
/* If Firmware version == v4.33 on X710/XL710, use old CEE struct */
- if ((hw->mac.type == I40E_MAC_XL710) &&
- ((hw->aq.fw_maj_ver == 4) && (hw->aq.fw_min_ver == 33))) {
+ if (hw->mac.type == I40E_MAC_XL710 && i40e_is_fw_ver_eq(hw, 4, 33)) {
ret = i40e_aq_get_cee_dcb_config(hw, &cee_v1_cfg,
sizeof(cee_v1_cfg), NULL);
if (!ret) {
@@ -877,7 +874,7 @@ int i40e_init_dcb(struct i40e_hw *hw, bool enable_mib_change)
return -EOPNOTSUPP;
/* Read LLDP NVM area */
- if (hw->flags & I40E_HW_FLAG_FW_LLDP_PERSISTENT) {
+ if (test_bit(I40E_HW_CAP_FW_LLDP_PERSISTENT, hw->caps)) {
u8 offset = 0;
if (hw->mac.type == I40E_MAC_XL710)
diff --git a/drivers/net/ethernet/intel/i40e/i40e_dcb_nl.c b/drivers/net/ethernet/intel/i40e/i40e_dcb_nl.c
index 077a95d..4721845 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_dcb_nl.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_dcb_nl.c
@@ -310,8 +310,8 @@ static u8 i40e_dcbnl_getstate(struct net_device *netdev)
struct i40e_pf *pf = i40e_netdev_to_pf(netdev);
dev_dbg(&pf->pdev->dev, "DCB state=%d\n",
- !!(pf->flags & I40E_FLAG_DCB_ENABLED));
- return !!(pf->flags & I40E_FLAG_DCB_ENABLED);
+ test_bit(I40E_FLAG_DCB_ENA, pf->flags) ? 1 : 0);
+ return test_bit(I40E_FLAG_DCB_ENA, pf->flags) ? 1 : 0;
}
/**
@@ -331,19 +331,19 @@ static u8 i40e_dcbnl_setstate(struct net_device *netdev, u8 state)
return ret;
dev_dbg(&pf->pdev->dev, "new state=%d current state=%d\n",
- state, (pf->flags & I40E_FLAG_DCB_ENABLED) ? 1 : 0);
+ state, test_bit(I40E_FLAG_DCB_ENA, pf->flags) ? 1 : 0);
/* Nothing to do */
- if (!state == !(pf->flags & I40E_FLAG_DCB_ENABLED))
+ if (!state == !test_bit(I40E_FLAG_DCB_ENA, pf->flags))
return ret;
if (i40e_is_sw_dcb(pf)) {
if (state) {
- pf->flags |= I40E_FLAG_DCB_ENABLED;
+ set_bit(I40E_FLAG_DCB_ENA, pf->flags);
memcpy(&pf->hw.desired_dcbx_config,
&pf->hw.local_dcbx_config,
sizeof(struct i40e_dcbx_config));
} else {
- pf->flags &= ~I40E_FLAG_DCB_ENABLED;
+ clear_bit(I40E_FLAG_DCB_ENA, pf->flags);
}
} else {
/* Cannot directly manipulate FW LLDP Agent */
@@ -653,7 +653,7 @@ static u8 i40e_dcbnl_get_cap(struct net_device *netdev, int capid, u8 *cap)
{
struct i40e_pf *pf = i40e_netdev_to_pf(netdev);
- if (!(pf->flags & I40E_FLAG_DCB_CAPABLE))
+ if (!test_bit(I40E_FLAG_DCB_CAPABLE, pf->flags))
return I40E_DCBNL_STATUS_ERROR;
switch (capid) {
@@ -693,7 +693,7 @@ static int i40e_dcbnl_getnumtcs(struct net_device *netdev, int tcid, u8 *num)
{
struct i40e_pf *pf = i40e_netdev_to_pf(netdev);
- if (!(pf->flags & I40E_FLAG_DCB_CAPABLE))
+ if (!test_bit(I40E_FLAG_DCB_CAPABLE, pf->flags))
return -EINVAL;
*num = I40E_MAX_TRAFFIC_CLASS;
@@ -827,15 +827,12 @@ static void i40e_dcbnl_get_perm_hw_addr(struct net_device *dev,
u8 *perm_addr)
{
struct i40e_pf *pf = i40e_netdev_to_pf(dev);
- int i, j;
+ int i;
memset(perm_addr, 0xff, MAX_ADDR_LEN);
for (i = 0; i < dev->addr_len; i++)
perm_addr[i] = pf->hw.mac.perm_addr[i];
-
- for (j = 0; j < dev->addr_len; j++, i++)
- perm_addr[i] = pf->hw.mac.san_addr[j];
}
static const struct dcbnl_rtnl_ops dcbnl_ops = {
@@ -891,11 +888,11 @@ void i40e_dcbnl_set_all(struct i40e_vsi *vsi)
return;
/* DCB not enabled */
- if (!(pf->flags & I40E_FLAG_DCB_ENABLED))
+ if (!test_bit(I40E_FLAG_DCB_ENA, pf->flags))
return;
/* MFP mode but not an iSCSI PF so return */
- if ((pf->flags & I40E_FLAG_MFP_ENABLED) && !(hw->func_caps.iscsi))
+ if (test_bit(I40E_FLAG_MFP_ENA, pf->flags) && !(hw->func_caps.iscsi))
return;
dcbxcfg = &hw->local_dcbx_config;
@@ -1002,7 +999,7 @@ void i40e_dcbnl_flush_apps(struct i40e_pf *pf,
int i;
/* MFP mode but not an iSCSI PF so return */
- if ((pf->flags & I40E_FLAG_MFP_ENABLED) && !(pf->hw.func_caps.iscsi))
+ if (test_bit(I40E_FLAG_MFP_ENA, pf->flags) && !(pf->hw.func_caps.iscsi))
return;
for (i = 0; i < old_cfg->numapps; i++) {
@@ -1025,7 +1022,7 @@ void i40e_dcbnl_setup(struct i40e_vsi *vsi)
struct i40e_pf *pf = i40e_netdev_to_pf(dev);
/* Not DCB capable */
- if (!(pf->flags & I40E_FLAG_DCB_CAPABLE))
+ if (!test_bit(I40E_FLAG_DCB_CAPABLE, pf->flags))
return;
dev->dcbnl_ops = &dcbnl_ops;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_debug.h b/drivers/net/ethernet/intel/i40e/i40e_debug.h
index 27ebc72..e9871df 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_debug.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_debug.h
@@ -37,6 +37,7 @@ struct i40e_hw;
struct device *i40e_hw_to_dev(struct i40e_hw *hw);
#define hw_dbg(hw, S, A...) dev_dbg(i40e_hw_to_dev(hw), S, ##A)
+#define hw_warn(hw, S, A...) dev_warn(i40e_hw_to_dev(hw), S, ##A)
#define i40e_debug(h, m, s, ...) \
do { \
diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
index 999c970..8824057 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
@@ -147,9 +147,8 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid)
" state[%d] = %08lx\n",
i, vsi->state[i]);
if (vsi == pf->vsi[pf->lan_vsi])
- dev_info(&pf->pdev->dev, " MAC address: %pM SAN MAC: %pM Port MAC: %pM\n",
+ dev_info(&pf->pdev->dev, " MAC address: %pM Port MAC: %pM\n",
pf->hw.mac.addr,
- pf->hw.mac.san_addr,
pf->hw.mac.port_addr);
hash_for_each(vsi->mac_filter_hash, bkt, f, hlist) {
dev_info(&pf->pdev->dev,
@@ -820,8 +819,8 @@ static ssize_t i40e_dbg_command_write(struct file *filp,
/* By default we are in VEPA mode, if this is the first VF/VMDq
* VSI to be added switch to VEB mode.
*/
- if (!(pf->flags & I40E_FLAG_VEB_MODE_ENABLED)) {
- pf->flags |= I40E_FLAG_VEB_MODE_ENABLED;
+ if (!test_bit(I40E_FLAG_VEB_MODE_ENA, pf->flags)) {
+ set_bit(I40E_FLAG_VEB_MODE_ENA, pf->flags);
i40e_do_reset_safe(pf, I40E_PF_RESET_FLAG);
}
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index fd71631..eb9a7b3 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -430,35 +430,35 @@ static const char i40e_gstrings_test[][ETH_GSTRING_LEN] = {
struct i40e_priv_flags {
char flag_string[ETH_GSTRING_LEN];
- u64 flag;
+ u8 bitno;
bool read_only;
};
-#define I40E_PRIV_FLAG(_name, _flag, _read_only) { \
+#define I40E_PRIV_FLAG(_name, _bitno, _read_only) { \
.flag_string = _name, \
- .flag = _flag, \
+ .bitno = _bitno, \
.read_only = _read_only, \
}
static const struct i40e_priv_flags i40e_gstrings_priv_flags[] = {
/* NOTE: MFP setting cannot be changed */
- I40E_PRIV_FLAG("MFP", I40E_FLAG_MFP_ENABLED, 1),
+ I40E_PRIV_FLAG("MFP", I40E_FLAG_MFP_ENA, 1),
I40E_PRIV_FLAG("total-port-shutdown",
- I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENABLED, 1),
- I40E_PRIV_FLAG("LinkPolling", I40E_FLAG_LINK_POLLING_ENABLED, 0),
- I40E_PRIV_FLAG("flow-director-atr", I40E_FLAG_FD_ATR_ENABLED, 0),
- I40E_PRIV_FLAG("veb-stats", I40E_FLAG_VEB_STATS_ENABLED, 0),
- I40E_PRIV_FLAG("hw-atr-eviction", I40E_FLAG_HW_ATR_EVICT_ENABLED, 0),
+ I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENA, 1),
+ I40E_PRIV_FLAG("LinkPolling", I40E_FLAG_LINK_POLLING_ENA, 0),
+ I40E_PRIV_FLAG("flow-director-atr", I40E_FLAG_FD_ATR_ENA, 0),
+ I40E_PRIV_FLAG("veb-stats", I40E_FLAG_VEB_STATS_ENA, 0),
+ I40E_PRIV_FLAG("hw-atr-eviction", I40E_FLAG_HW_ATR_EVICT_ENA, 0),
I40E_PRIV_FLAG("link-down-on-close",
- I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED, 0),
- I40E_PRIV_FLAG("legacy-rx", I40E_FLAG_LEGACY_RX, 0),
+ I40E_FLAG_LINK_DOWN_ON_CLOSE_ENA, 0),
+ I40E_PRIV_FLAG("legacy-rx", I40E_FLAG_LEGACY_RX_ENA, 0),
I40E_PRIV_FLAG("disable-source-pruning",
- I40E_FLAG_SOURCE_PRUNING_DISABLED, 0),
- I40E_PRIV_FLAG("disable-fw-lldp", I40E_FLAG_DISABLE_FW_LLDP, 0),
+ I40E_FLAG_SOURCE_PRUNING_DIS, 0),
+ I40E_PRIV_FLAG("disable-fw-lldp", I40E_FLAG_FW_LLDP_DIS, 0),
I40E_PRIV_FLAG("rs-fec", I40E_FLAG_RS_FEC, 0),
I40E_PRIV_FLAG("base-r-fec", I40E_FLAG_BASE_R_FEC, 0),
I40E_PRIV_FLAG("vf-vlan-pruning",
- I40E_FLAG_VF_VLAN_PRUNING, 0),
+ I40E_FLAG_VF_VLAN_PRUNING_ENA, 0),
};
#define I40E_PRIV_FLAGS_STR_LEN ARRAY_SIZE(i40e_gstrings_priv_flags)
@@ -466,7 +466,7 @@ static const struct i40e_priv_flags i40e_gstrings_priv_flags[] = {
/* Private flags with a global effect, restricted to PF 0 */
static const struct i40e_priv_flags i40e_gl_gstrings_priv_flags[] = {
I40E_PRIV_FLAG("vf-true-promisc-support",
- I40E_FLAG_TRUE_PROMISC_SUPPORT, 0),
+ I40E_FLAG_TRUE_PROMISC_ENA, 0),
};
#define I40E_GL_PRIV_FLAGS_STR_LEN ARRAY_SIZE(i40e_gl_gstrings_priv_flags)
@@ -502,7 +502,7 @@ static void i40e_phy_type_to_ethtool(struct i40e_pf *pf,
if (hw_link_info->requested_speeds & I40E_LINK_SPEED_1GB)
ethtool_link_ksettings_add_link_mode(ks, advertising,
1000baseT_Full);
- if (pf->hw_features & I40E_HW_100M_SGMII_CAPABLE) {
+ if (test_bit(I40E_HW_CAP_100M_SGMII, pf->hw.caps)) {
ethtool_link_ksettings_add_link_mode(ks, supported,
100baseT_Full);
ethtool_link_ksettings_add_link_mode(ks, advertising,
@@ -601,7 +601,7 @@ static void i40e_phy_type_to_ethtool(struct i40e_pf *pf,
10000baseKX4_Full);
}
if (phy_types & I40E_CAP_PHY_TYPE_10GBASE_KR &&
- !(pf->hw_features & I40E_HW_HAVE_CRT_RETIMER)) {
+ !test_bit(I40E_HW_CAP_CRT_RETIMER, pf->hw.caps)) {
ethtool_link_ksettings_add_link_mode(ks, supported,
10000baseKR_Full);
if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB)
@@ -609,7 +609,7 @@ static void i40e_phy_type_to_ethtool(struct i40e_pf *pf,
10000baseKR_Full);
}
if (phy_types & I40E_CAP_PHY_TYPE_1000BASE_KX &&
- !(pf->hw_features & I40E_HW_HAVE_CRT_RETIMER)) {
+ !test_bit(I40E_HW_CAP_CRT_RETIMER, pf->hw.caps)) {
ethtool_link_ksettings_add_link_mode(ks, supported,
1000baseKX_Full);
if (hw_link_info->requested_speeds & I40E_LINK_SPEED_1GB)
@@ -917,7 +917,7 @@ static void i40e_get_settings_link_up(struct i40e_hw *hw,
if (hw_link_info->requested_speeds & I40E_LINK_SPEED_1GB)
ethtool_link_ksettings_add_link_mode(ks, advertising,
1000baseT_Full);
- if (pf->hw_features & I40E_HW_100M_SGMII_CAPABLE) {
+ if (test_bit(I40E_HW_CAP_100M_SGMII, pf->hw.caps)) {
ethtool_link_ksettings_add_link_mode(ks, supported,
100baseT_Full);
if (hw_link_info->requested_speeds &
@@ -1488,12 +1488,8 @@ static int i40e_set_fec_cfg(struct net_device *netdev, u8 fec_cfg)
struct i40e_pf *pf = np->vsi->back;
struct i40e_hw *hw = &pf->hw;
int status = 0;
- u32 flags = 0;
int err = 0;
- flags = READ_ONCE(pf->flags);
- i40e_set_fec_in_flags(fec_cfg, &flags);
-
/* Get the current phy config */
memset(&abilities, 0, sizeof(abilities));
status = i40e_aq_get_phy_capabilities(hw, false, false, &abilities,
@@ -1525,7 +1521,7 @@ static int i40e_set_fec_cfg(struct net_device *netdev, u8 fec_cfg)
err = -EAGAIN;
goto done;
}
- pf->flags = flags;
+ i40e_set_fec_in_flags(fec_cfg, pf->flags);
status = i40e_update_link_info(hw);
if (status)
/* debug level message only due to relation to the link
@@ -1599,7 +1595,7 @@ static int i40e_set_fec_param(struct net_device *netdev,
return -EPERM;
if (hw->mac.type == I40E_MAC_X722 &&
- !(hw->flags & I40E_HW_FLAG_X722_FEC_REQUEST_CAPABLE)) {
+ !test_bit(I40E_HW_CAP_X722_FEC_REQUEST, hw->caps)) {
netdev_err(netdev, "Setting FEC encoding not supported by firmware. Please update the NVM image.\n");
return -EOPNOTSUPP;
}
@@ -2015,6 +2011,18 @@ static void i40e_get_drvinfo(struct net_device *netdev,
drvinfo->n_priv_flags += I40E_GL_PRIV_FLAGS_STR_LEN;
}
+static u32 i40e_get_max_num_descriptors(struct i40e_pf *pf)
+{
+ struct i40e_hw *hw = &pf->hw;
+
+ switch (hw->mac.type) {
+ case I40E_MAC_XL710:
+ return I40E_MAX_NUM_DESCRIPTORS_XL710;
+ default:
+ return I40E_MAX_NUM_DESCRIPTORS;
+ }
+}
+
static void i40e_get_ringparam(struct net_device *netdev,
struct ethtool_ringparam *ring,
struct kernel_ethtool_ringparam *kernel_ring,
@@ -2024,8 +2032,8 @@ static void i40e_get_ringparam(struct net_device *netdev,
struct i40e_pf *pf = np->vsi->back;
struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi];
- ring->rx_max_pending = I40E_MAX_NUM_DESCRIPTORS;
- ring->tx_max_pending = I40E_MAX_NUM_DESCRIPTORS;
+ ring->rx_max_pending = i40e_get_max_num_descriptors(pf);
+ ring->tx_max_pending = i40e_get_max_num_descriptors(pf);
ring->rx_mini_max_pending = 0;
ring->rx_jumbo_max_pending = 0;
ring->rx_pending = vsi->rx_rings[0]->count;
@@ -2050,12 +2058,12 @@ static int i40e_set_ringparam(struct net_device *netdev,
struct kernel_ethtool_ringparam *kernel_ring,
struct netlink_ext_ack *extack)
{
+ u32 new_rx_count, new_tx_count, max_num_descriptors;
struct i40e_ring *tx_rings = NULL, *rx_rings = NULL;
struct i40e_netdev_priv *np = netdev_priv(netdev);
struct i40e_hw *hw = &np->vsi->back->hw;
struct i40e_vsi *vsi = np->vsi;
struct i40e_pf *pf = vsi->back;
- u32 new_rx_count, new_tx_count;
u16 tx_alloc_queue_pairs;
int timeout = 50;
int i, err = 0;
@@ -2063,14 +2071,15 @@ static int i40e_set_ringparam(struct net_device *netdev,
if ((ring->rx_mini_pending) || (ring->rx_jumbo_pending))
return -EINVAL;
- if (ring->tx_pending > I40E_MAX_NUM_DESCRIPTORS ||
+ max_num_descriptors = i40e_get_max_num_descriptors(pf);
+ if (ring->tx_pending > max_num_descriptors ||
ring->tx_pending < I40E_MIN_NUM_DESCRIPTORS ||
- ring->rx_pending > I40E_MAX_NUM_DESCRIPTORS ||
+ ring->rx_pending > max_num_descriptors ||
ring->rx_pending < I40E_MIN_NUM_DESCRIPTORS) {
netdev_info(netdev,
"Descriptors requested (Tx: %d / Rx: %d) out of range [%d-%d]\n",
ring->tx_pending, ring->rx_pending,
- I40E_MIN_NUM_DESCRIPTORS, I40E_MAX_NUM_DESCRIPTORS);
+ I40E_MIN_NUM_DESCRIPTORS, max_num_descriptors);
return -EINVAL;
}
@@ -2419,7 +2428,7 @@ static void i40e_get_ethtool_stats(struct net_device *netdev,
veb_stats = ((pf->lan_veb != I40E_NO_VEB) &&
(pf->lan_veb < I40E_MAX_VEB) &&
- (pf->flags & I40E_FLAG_VEB_STATS_ENABLED));
+ test_bit(I40E_FLAG_VEB_STATS_ENA, pf->flags));
if (veb_stats) {
veb = pf->veb[pf->lan_veb];
@@ -2548,7 +2557,7 @@ static int i40e_get_ts_info(struct net_device *dev,
struct i40e_pf *pf = i40e_netdev_to_pf(dev);
/* only report HW timestamping if PTP is enabled */
- if (!(pf->flags & I40E_FLAG_PTP))
+ if (!test_bit(I40E_FLAG_PTP_ENA, pf->flags))
return ethtool_op_get_ts_info(dev, info);
info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE |
@@ -2570,7 +2579,7 @@ static int i40e_get_ts_info(struct net_device *dev,
BIT(HWTSTAMP_FILTER_PTP_V2_L2_SYNC) |
BIT(HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ);
- if (pf->hw_features & I40E_HW_PTP_L4_CAPABLE)
+ if (test_bit(I40E_HW_CAP_PTP_L4, pf->hw.caps))
info->rx_filters |= BIT(HWTSTAMP_FILTER_PTP_V1_L4_SYNC) |
BIT(HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ) |
BIT(HWTSTAMP_FILTER_PTP_V2_EVENT) |
@@ -2819,10 +2828,10 @@ static int i40e_set_phys_id(struct net_device *netdev,
switch (state) {
case ETHTOOL_ID_ACTIVE:
- if (!(pf->hw_features & I40E_HW_PHY_CONTROLS_LEDS)) {
+ if (!test_bit(I40E_HW_CAP_PHY_CONTROLS_LEDS, pf->hw.caps)) {
pf->led_status = i40e_led_get(hw);
} else {
- if (!(hw->flags & I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE))
+ if (!test_bit(I40E_HW_CAP_AQ_PHY_ACCESS, hw->caps))
i40e_aq_set_phy_debug(hw, I40E_PHY_DEBUG_ALL,
NULL);
ret = i40e_led_get_phy(hw, &temp_status,
@@ -2831,25 +2840,25 @@ static int i40e_set_phys_id(struct net_device *netdev,
}
return blink_freq;
case ETHTOOL_ID_ON:
- if (!(pf->hw_features & I40E_HW_PHY_CONTROLS_LEDS))
+ if (!test_bit(I40E_HW_CAP_PHY_CONTROLS_LEDS, pf->hw.caps))
i40e_led_set(hw, 0xf, false);
else
ret = i40e_led_set_phy(hw, true, pf->led_status, 0);
break;
case ETHTOOL_ID_OFF:
- if (!(pf->hw_features & I40E_HW_PHY_CONTROLS_LEDS))
+ if (!test_bit(I40E_HW_CAP_PHY_CONTROLS_LEDS, pf->hw.caps))
i40e_led_set(hw, 0x0, false);
else
ret = i40e_led_set_phy(hw, false, pf->led_status, 0);
break;
case ETHTOOL_ID_INACTIVE:
- if (!(pf->hw_features & I40E_HW_PHY_CONTROLS_LEDS)) {
+ if (!test_bit(I40E_HW_CAP_PHY_CONTROLS_LEDS, pf->hw.caps)) {
i40e_led_set(hw, pf->led_status, false);
} else {
ret = i40e_led_set_phy(hw, false, pf->led_status,
(pf->phy_led_val |
I40E_PHY_LED_MODE_ORIG));
- if (!(hw->flags & I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE))
+ if (!test_bit(I40E_HW_CAP_AQ_PHY_ACCESS, hw->caps))
i40e_aq_set_phy_debug(hw, 0, NULL);
}
break;
@@ -3628,7 +3637,7 @@ static int i40e_set_rss_hash_opt(struct i40e_pf *pf, struct ethtool_rxnfc *nfc)
bitmap_zero(flow_pctypes, FLOW_PCTYPES_SIZE);
- if (pf->flags & I40E_FLAG_MFP_ENABLED) {
+ if (test_bit(I40E_FLAG_MFP_ENA, pf->flags)) {
dev_err(&pf->pdev->dev,
"Change of RSS hash input set is not supported when MFP mode is enabled\n");
return -EOPNOTSUPP;
@@ -3644,19 +3653,22 @@ static int i40e_set_rss_hash_opt(struct i40e_pf *pf, struct ethtool_rxnfc *nfc)
switch (nfc->flow_type) {
case TCP_V4_FLOW:
set_bit(I40E_FILTER_PCTYPE_NONF_IPV4_TCP, flow_pctypes);
- if (pf->hw_features & I40E_HW_MULTIPLE_TCP_UDP_RSS_PCTYPE)
+ if (test_bit(I40E_HW_CAP_MULTI_TCP_UDP_RSS_PCTYPE,
+ pf->hw.caps))
set_bit(I40E_FILTER_PCTYPE_NONF_IPV4_TCP_SYN_NO_ACK,
flow_pctypes);
break;
case TCP_V6_FLOW:
set_bit(I40E_FILTER_PCTYPE_NONF_IPV6_TCP, flow_pctypes);
- if (pf->hw_features & I40E_HW_MULTIPLE_TCP_UDP_RSS_PCTYPE)
+ if (test_bit(I40E_HW_CAP_MULTI_TCP_UDP_RSS_PCTYPE,
+ pf->hw.caps))
set_bit(I40E_FILTER_PCTYPE_NONF_IPV6_TCP_SYN_NO_ACK,
flow_pctypes);
break;
case UDP_V4_FLOW:
set_bit(I40E_FILTER_PCTYPE_NONF_IPV4_UDP, flow_pctypes);
- if (pf->hw_features & I40E_HW_MULTIPLE_TCP_UDP_RSS_PCTYPE) {
+ if (test_bit(I40E_HW_CAP_MULTI_TCP_UDP_RSS_PCTYPE,
+ pf->hw.caps)) {
set_bit(I40E_FILTER_PCTYPE_NONF_UNICAST_IPV4_UDP,
flow_pctypes);
set_bit(I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV4_UDP,
@@ -3666,7 +3678,8 @@ static int i40e_set_rss_hash_opt(struct i40e_pf *pf, struct ethtool_rxnfc *nfc)
break;
case UDP_V6_FLOW:
set_bit(I40E_FILTER_PCTYPE_NONF_IPV6_UDP, flow_pctypes);
- if (pf->hw_features & I40E_HW_MULTIPLE_TCP_UDP_RSS_PCTYPE) {
+ if (test_bit(I40E_HW_CAP_MULTI_TCP_UDP_RSS_PCTYPE,
+ pf->hw.caps)) {
set_bit(I40E_FILTER_PCTYPE_NONF_UNICAST_IPV6_UDP,
flow_pctypes);
set_bit(I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP,
@@ -4644,7 +4657,7 @@ static int i40e_check_fdir_input_set(struct i40e_vsi *vsi,
* main port cannot change them when in MFP mode as this would impact
* any filters on the other ports.
*/
- if (pf->flags & I40E_FLAG_MFP_ENABLED) {
+ if (test_bit(I40E_FLAG_MFP_ENA, pf->flags)) {
netif_err(pf, drv, vsi->netdev, "Cannot change Flow Director input sets while MFP is enabled\n");
return -EOPNOTSUPP;
}
@@ -4804,7 +4817,7 @@ static int i40e_add_fdir_ethtool(struct i40e_vsi *vsi,
return -EINVAL;
pf = vsi->back;
- if (!(pf->flags & I40E_FLAG_FD_SB_ENABLED))
+ if (!test_bit(I40E_FLAG_FD_SB_ENA, pf->flags))
return -EOPNOTSUPP;
if (test_bit(__I40E_FD_SB_AUTO_DISABLED, pf->state))
@@ -5001,7 +5014,7 @@ static void i40e_get_channels(struct net_device *dev,
ch->max_combined = i40e_max_channels(vsi);
/* report info for other vector */
- ch->other_count = (pf->flags & I40E_FLAG_FD_SB_ENABLED) ? 1 : 0;
+ ch->other_count = test_bit(I40E_FLAG_FD_SB_ENA, pf->flags) ? 1 : 0;
ch->max_other = ch->other_count;
/* Note: This code assumes DCB is disabled for now. */
@@ -5044,7 +5057,7 @@ static int i40e_set_channels(struct net_device *dev,
return -EINVAL;
/* verify other_count has not changed */
- if (ch->other_count != ((pf->flags & I40E_FLAG_FD_SB_ENABLED) ? 1 : 0))
+ if (ch->other_count != (test_bit(I40E_FLAG_FD_SB_ENA, pf->flags) ? 1 : 0))
return -EINVAL;
/* verify the number of channels does not exceed hardware limits */
@@ -5215,11 +5228,11 @@ static u32 i40e_get_priv_flags(struct net_device *dev)
u32 i, j, ret_flags = 0;
for (i = 0; i < I40E_PRIV_FLAGS_STR_LEN; i++) {
- const struct i40e_priv_flags *priv_flags;
+ const struct i40e_priv_flags *priv_flag;
- priv_flags = &i40e_gstrings_priv_flags[i];
+ priv_flag = &i40e_gstrings_priv_flags[i];
- if (priv_flags->flag & pf->flags)
+ if (test_bit(priv_flag->bitno, pf->flags))
ret_flags |= BIT(i);
}
@@ -5227,11 +5240,11 @@ static u32 i40e_get_priv_flags(struct net_device *dev)
return ret_flags;
for (j = 0; j < I40E_GL_PRIV_FLAGS_STR_LEN; j++) {
- const struct i40e_priv_flags *priv_flags;
+ const struct i40e_priv_flags *priv_flag;
- priv_flags = &i40e_gl_gstrings_priv_flags[j];
+ priv_flag = &i40e_gl_gstrings_priv_flags[j];
- if (priv_flags->flag & pf->flags)
+ if (test_bit(priv_flag->bitno, pf->flags))
ret_flags |= BIT(i + j);
}
@@ -5245,8 +5258,10 @@ static u32 i40e_get_priv_flags(struct net_device *dev)
**/
static int i40e_set_priv_flags(struct net_device *dev, u32 flags)
{
+ DECLARE_BITMAP(changed_flags, I40E_PF_FLAGS_NBITS);
+ DECLARE_BITMAP(orig_flags, I40E_PF_FLAGS_NBITS);
+ DECLARE_BITMAP(new_flags, I40E_PF_FLAGS_NBITS);
struct i40e_netdev_priv *np = netdev_priv(dev);
- u64 orig_flags, new_flags, changed_flags;
enum i40e_admin_queue_err adq_err;
struct i40e_vsi *vsi = np->vsi;
struct i40e_pf *pf = vsi->back;
@@ -5254,51 +5269,57 @@ static int i40e_set_priv_flags(struct net_device *dev, u32 flags)
int status;
u32 i, j;
- orig_flags = READ_ONCE(pf->flags);
- new_flags = orig_flags;
+ bitmap_copy(orig_flags, pf->flags, I40E_PF_FLAGS_NBITS);
+ bitmap_copy(new_flags, pf->flags, I40E_PF_FLAGS_NBITS);
for (i = 0; i < I40E_PRIV_FLAGS_STR_LEN; i++) {
- const struct i40e_priv_flags *priv_flags;
+ const struct i40e_priv_flags *priv_flag;
+ bool new_val;
- priv_flags = &i40e_gstrings_priv_flags[i];
-
- if (flags & BIT(i))
- new_flags |= priv_flags->flag;
- else
- new_flags &= ~(priv_flags->flag);
+ priv_flag = &i40e_gstrings_priv_flags[i];
+ new_val = (flags & BIT(i)) ? true : false;
/* If this is a read-only flag, it can't be changed */
- if (priv_flags->read_only &&
- ((orig_flags ^ new_flags) & ~BIT(i)))
+ if (priv_flag->read_only &&
+ test_bit(priv_flag->bitno, orig_flags) != new_val)
return -EOPNOTSUPP;
+
+ if (new_val)
+ set_bit(priv_flag->bitno, new_flags);
+ else
+ clear_bit(priv_flag->bitno, new_flags);
}
if (pf->hw.pf_id != 0)
goto flags_complete;
for (j = 0; j < I40E_GL_PRIV_FLAGS_STR_LEN; j++) {
- const struct i40e_priv_flags *priv_flags;
+ const struct i40e_priv_flags *priv_flag;
+ bool new_val;
- priv_flags = &i40e_gl_gstrings_priv_flags[j];
-
- if (flags & BIT(i + j))
- new_flags |= priv_flags->flag;
- else
- new_flags &= ~(priv_flags->flag);
+ priv_flag = &i40e_gl_gstrings_priv_flags[j];
+ new_val = (flags & BIT(i + j)) ? true : false;
/* If this is a read-only flag, it can't be changed */
- if (priv_flags->read_only &&
- ((orig_flags ^ new_flags) & ~BIT(i)))
+ if (priv_flag->read_only &&
+ test_bit(priv_flag->bitno, orig_flags) != new_val)
return -EOPNOTSUPP;
+
+ if (new_val)
+ set_bit(priv_flag->bitno, new_flags);
+ else
+ clear_bit(priv_flag->bitno, new_flags);
}
flags_complete:
- changed_flags = orig_flags ^ new_flags;
+ bitmap_xor(changed_flags, pf->flags, orig_flags, I40E_PF_FLAGS_NBITS);
- if (changed_flags & I40E_FLAG_DISABLE_FW_LLDP)
+ if (test_bit(I40E_FLAG_FW_LLDP_DIS, changed_flags))
reset_needed = I40E_PF_RESET_AND_REBUILD_FLAG;
- if (changed_flags & (I40E_FLAG_VEB_STATS_ENABLED |
- I40E_FLAG_LEGACY_RX | I40E_FLAG_SOURCE_PRUNING_DISABLED))
+
+ if (test_bit(I40E_FLAG_VEB_STATS_ENA, changed_flags) ||
+ test_bit(I40E_FLAG_LEGACY_RX_ENA, changed_flags) ||
+ test_bit(I40E_FLAG_SOURCE_PRUNING_DIS, changed_flags))
reset_needed = BIT(__I40E_PF_RESET_REQUESTED);
/* Before we finalize any flag changes, we need to perform some
@@ -5306,8 +5327,8 @@ static int i40e_set_priv_flags(struct net_device *dev, u32 flags)
*/
/* ATR eviction is not supported on all devices */
- if ((new_flags & I40E_FLAG_HW_ATR_EVICT_ENABLED) &&
- !(pf->hw_features & I40E_HW_ATR_EVICT_CAPABLE))
+ if (test_bit(I40E_FLAG_HW_ATR_EVICT_ENA, new_flags) &&
+ !test_bit(I40E_HW_CAP_ATR_EVICT, pf->hw.caps))
return -EOPNOTSUPP;
/* If the driver detected FW LLDP was disabled on init, this flag could
@@ -5318,15 +5339,14 @@ static int i40e_set_priv_flags(struct net_device *dev, u32 flags)
* disable LLDP, however we _must_ not allow the user to enable/disable
* LLDP with this flag on unsupported FW versions.
*/
- if (changed_flags & I40E_FLAG_DISABLE_FW_LLDP) {
- if (!(pf->hw.flags & I40E_HW_FLAG_FW_LLDP_STOPPABLE)) {
- dev_warn(&pf->pdev->dev,
- "Device does not support changing FW LLDP\n");
- return -EOPNOTSUPP;
- }
+ if (test_bit(I40E_FLAG_FW_LLDP_DIS, changed_flags) &&
+ !test_bit(I40E_HW_CAP_FW_LLDP_STOPPABLE, pf->hw.caps)) {
+ dev_warn(&pf->pdev->dev,
+ "Device does not support changing FW LLDP\n");
+ return -EOPNOTSUPP;
}
- if (changed_flags & I40E_FLAG_RS_FEC &&
+ if (test_bit(I40E_FLAG_RS_FEC, changed_flags) &&
pf->hw.device_id != I40E_DEV_ID_25G_SFP28 &&
pf->hw.device_id != I40E_DEV_ID_25G_B) {
dev_warn(&pf->pdev->dev,
@@ -5334,7 +5354,7 @@ static int i40e_set_priv_flags(struct net_device *dev, u32 flags)
return -EOPNOTSUPP;
}
- if (changed_flags & I40E_FLAG_BASE_R_FEC &&
+ if (test_bit(I40E_FLAG_BASE_R_FEC, changed_flags) &&
pf->hw.device_id != I40E_DEV_ID_25G_SFP28 &&
pf->hw.device_id != I40E_DEV_ID_25G_B &&
pf->hw.device_id != I40E_DEV_ID_KX_X722) {
@@ -5349,17 +5369,17 @@ static int i40e_set_priv_flags(struct net_device *dev, u32 flags)
*/
/* Flush current ATR settings if ATR was disabled */
- if ((changed_flags & I40E_FLAG_FD_ATR_ENABLED) &&
- !(new_flags & I40E_FLAG_FD_ATR_ENABLED)) {
+ if (test_bit(I40E_FLAG_FD_ATR_ENA, changed_flags) &&
+ !test_bit(I40E_FLAG_FD_ATR_ENA, new_flags)) {
set_bit(__I40E_FD_ATR_AUTO_DISABLED, pf->state);
set_bit(__I40E_FD_FLUSH_REQUESTED, pf->state);
}
- if (changed_flags & I40E_FLAG_TRUE_PROMISC_SUPPORT) {
+ if (test_bit(I40E_FLAG_TRUE_PROMISC_ENA, changed_flags)) {
u16 sw_flags = 0, valid_flags = 0;
int ret;
- if (!(new_flags & I40E_FLAG_TRUE_PROMISC_SUPPORT))
+ if (!test_bit(I40E_FLAG_TRUE_PROMISC_ENA, new_flags))
sw_flags = I40E_AQ_SET_SWITCH_CFG_PROMISC;
valid_flags = I40E_AQ_SET_SWITCH_CFG_PROMISC;
ret = i40e_aq_set_switch_config(&pf->hw, sw_flags, valid_flags,
@@ -5374,17 +5394,17 @@ static int i40e_set_priv_flags(struct net_device *dev, u32 flags)
}
}
- if ((changed_flags & I40E_FLAG_RS_FEC) ||
- (changed_flags & I40E_FLAG_BASE_R_FEC)) {
+ if (test_bit(I40E_FLAG_RS_FEC, changed_flags) ||
+ test_bit(I40E_FLAG_BASE_R_FEC, changed_flags)) {
u8 fec_cfg = 0;
- if (new_flags & I40E_FLAG_RS_FEC &&
- new_flags & I40E_FLAG_BASE_R_FEC) {
+ if (test_bit(I40E_FLAG_RS_FEC, new_flags) &&
+ test_bit(I40E_FLAG_BASE_R_FEC, new_flags)) {
fec_cfg = I40E_AQ_SET_FEC_AUTO;
- } else if (new_flags & I40E_FLAG_RS_FEC) {
+ } else if (test_bit(I40E_FLAG_RS_FEC, new_flags)) {
fec_cfg = (I40E_AQ_SET_FEC_REQUEST_RS |
I40E_AQ_SET_FEC_ABILITY_RS);
- } else if (new_flags & I40E_FLAG_BASE_R_FEC) {
+ } else if (test_bit(I40E_FLAG_BASE_R_FEC, new_flags)) {
fec_cfg = (I40E_AQ_SET_FEC_REQUEST_KR |
I40E_AQ_SET_FEC_ABILITY_KR);
}
@@ -5392,35 +5412,35 @@ static int i40e_set_priv_flags(struct net_device *dev, u32 flags)
dev_warn(&pf->pdev->dev, "Cannot change FEC config\n");
}
- if ((changed_flags & I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED) &&
- (orig_flags & I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENABLED)) {
+ if (test_bit(I40E_FLAG_LINK_DOWN_ON_CLOSE_ENA, changed_flags) &&
+ test_bit(I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENA, orig_flags)) {
dev_err(&pf->pdev->dev,
"Setting link-down-on-close not supported on this port (because total-port-shutdown is enabled)\n");
return -EOPNOTSUPP;
}
- if ((changed_flags & I40E_FLAG_VF_VLAN_PRUNING) &&
+ if (test_bit(I40E_FLAG_VF_VLAN_PRUNING_ENA, changed_flags) &&
pf->num_alloc_vfs) {
dev_warn(&pf->pdev->dev,
"Changing vf-vlan-pruning flag while VF(s) are active is not supported\n");
return -EOPNOTSUPP;
}
- if ((changed_flags & I40E_FLAG_LEGACY_RX) &&
+ if (test_bit(I40E_FLAG_LEGACY_RX_ENA, changed_flags) &&
I40E_2K_TOO_SMALL_WITH_PADDING) {
dev_warn(&pf->pdev->dev,
"2k Rx buffer is too small to fit standard MTU and skb_shared_info\n");
return -EOPNOTSUPP;
}
- if ((changed_flags & new_flags &
- I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED) &&
- (new_flags & I40E_FLAG_MFP_ENABLED))
+ if (test_bit(I40E_FLAG_LINK_DOWN_ON_CLOSE_ENA, changed_flags) &&
+ test_bit(I40E_FLAG_LINK_DOWN_ON_CLOSE_ENA, new_flags) &&
+ test_bit(I40E_FLAG_MFP_ENA, new_flags))
dev_warn(&pf->pdev->dev,
"Turning on link-down-on-close flag may affect other partitions\n");
- if (changed_flags & I40E_FLAG_DISABLE_FW_LLDP) {
- if (new_flags & I40E_FLAG_DISABLE_FW_LLDP) {
+ if (test_bit(I40E_FLAG_FW_LLDP_DIS, changed_flags)) {
+ if (test_bit(I40E_FLAG_FW_LLDP_DIS, new_flags)) {
#ifdef CONFIG_I40E_DCB
i40e_dcb_sw_default_config(pf);
#endif /* CONFIG_I40E_DCB */
@@ -5461,7 +5481,7 @@ static int i40e_set_priv_flags(struct net_device *dev, u32 flags)
* initialization or (b) while holding the RTNL lock, we don't need
* anything fancy here.
*/
- pf->flags = new_flags;
+ bitmap_copy(pf->flags, new_flags, I40E_PF_FLAGS_NBITS);
/* Issue reset to cause things to take effect, as additional bits
* are added we will need to create a mask of bits requiring reset
@@ -5491,7 +5511,7 @@ static int i40e_get_module_info(struct net_device *netdev,
int status;
/* Check if firmware supports reading module EEPROM. */
- if (!(hw->flags & I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE)) {
+ if (!test_bit(I40E_HW_CAP_AQ_PHY_ACCESS, hw->caps)) {
netdev_err(vsi->netdev, "Module EEPROM memory read not supported. Please update the NVM image.\n");
return -EINVAL;
}
@@ -5571,8 +5591,8 @@ static int i40e_get_module_info(struct net_device *netdev,
modinfo->eeprom_len = I40E_MODULE_QSFP_MAX_LEN;
break;
default:
- netdev_err(vsi->netdev, "Module type unrecognized\n");
- return -EINVAL;
+ netdev_dbg(vsi->netdev, "SFP module type unrecognized or no SFP connector used.\n");
+ return -EOPNOTSUPP;
}
return 0;
}
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index f7a332e..7ded598 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -371,7 +371,7 @@ static void i40e_tx_timeout(struct net_device *netdev, unsigned int txqueue)
if (tx_ring) {
head = i40e_get_head(tx_ring);
/* Read interrupt register */
- if (pf->flags & I40E_FLAG_MSIX_ENABLED)
+ if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags))
val = rd32(&pf->hw,
I40E_PFINT_DYN_CTLN(tx_ring->q_vector->v_idx +
tx_ring->vsi->base_vector - 1));
@@ -1209,13 +1209,13 @@ static void i40e_update_pf_stats(struct i40e_pf *pf)
pf->stat_offsets_loaded,
&osd->rx_lpi_count, &nsd->rx_lpi_count);
- if (pf->flags & I40E_FLAG_FD_SB_ENABLED &&
+ if (test_bit(I40E_FLAG_FD_SB_ENA, pf->flags) &&
!test_bit(__I40E_FD_SB_AUTO_DISABLED, pf->state))
nsd->fd_sb_status = true;
else
nsd->fd_sb_status = false;
- if (pf->flags & I40E_FLAG_FD_ATR_ENABLED &&
+ if (test_bit(I40E_FLAG_FD_ATR_ENA, pf->flags) &&
!test_bit(__I40E_FD_ATR_AUTO_DISABLED, pf->state))
nsd->fd_atr_status = true;
else
@@ -1485,7 +1485,7 @@ static s16 i40e_get_vf_new_vlan(struct i40e_vsi *vsi,
return pvid;
is_any = (trusted ||
- !(pf->flags & I40E_FLAG_VF_VLAN_PRUNING));
+ !test_bit(I40E_FLAG_VF_VLAN_PRUNING_ENA, pf->flags));
if ((vlan_filters && f->vlan == I40E_VLAN_ANY) ||
(!is_any && !vlan_filters && f->vlan == I40E_VLAN_ANY) ||
@@ -1890,7 +1890,7 @@ static int i40e_vsi_config_rss(struct i40e_vsi *vsi)
u8 *lut;
int ret;
- if (!(pf->hw_features & I40E_HW_RSS_AQ_CAPABLE))
+ if (!test_bit(I40E_HW_CAP_RSS_AQ, pf->hw.caps))
return 0;
if (!vsi->rss_size)
vsi->rss_size = min_t(int, pf->alloc_rss_size,
@@ -2045,7 +2045,7 @@ static void i40e_vsi_setup_queue_map(struct i40e_vsi *vsi,
*/
if (vsi->req_queue_pairs > 0)
vsi->num_queue_pairs = vsi->req_queue_pairs;
- else if (pf->flags & I40E_FLAG_MSIX_ENABLED)
+ else if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags))
vsi->num_queue_pairs = pf->num_lan_msix;
else
vsi->num_queue_pairs = 1;
@@ -2058,7 +2058,7 @@ static void i40e_vsi_setup_queue_map(struct i40e_vsi *vsi,
else
num_tc_qps = vsi->alloc_queue_pairs;
- if (enabled_tc && (vsi->back->flags & I40E_FLAG_DCB_ENABLED)) {
+ if (enabled_tc && test_bit(I40E_FLAG_DCB_ENA, vsi->back->flags)) {
/* Find numtc from enabled TC bitmap */
for (i = 0, numtc = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
if (enabled_tc & BIT(i)) /* TC is enabled */
@@ -2077,7 +2077,7 @@ static void i40e_vsi_setup_queue_map(struct i40e_vsi *vsi,
vsi->tc_config.enabled_tc = enabled_tc ? enabled_tc : 1;
/* Do not allow use more TC queue pairs than MSI-X vectors exist */
- if (pf->flags & I40E_FLAG_MSIX_ENABLED)
+ if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags))
num_tc_qps = min_t(int, num_tc_qps, pf->num_lan_msix);
/* Setup queue offset/count for all TCs for given VSI */
@@ -2089,8 +2089,10 @@ static void i40e_vsi_setup_queue_map(struct i40e_vsi *vsi,
switch (vsi->type) {
case I40E_VSI_MAIN:
- if (!(pf->flags & (I40E_FLAG_FD_SB_ENABLED |
- I40E_FLAG_FD_ATR_ENABLED)) ||
+ if ((!test_bit(I40E_FLAG_FD_SB_ENA,
+ pf->flags) &&
+ !test_bit(I40E_FLAG_FD_ATR_ENA,
+ pf->flags)) ||
vsi->tc_config.enabled_tc != 1) {
qcount = min_t(int, pf->alloc_rss_size,
num_tc_qps);
@@ -2476,7 +2478,7 @@ static int i40e_set_promiscuous(struct i40e_pf *pf, bool promisc)
if (vsi->type == I40E_VSI_MAIN &&
pf->lan_veb != I40E_NO_VEB &&
- !(pf->flags & I40E_FLAG_MFP_ENABLED)) {
+ !test_bit(I40E_FLAG_MFP_ENA, pf->flags)) {
/* set defport ON for Main VSI instead of true promisc
* this way we will get all unicast/multicast and VLAN
* promisc behavior but will not get VF or VMDq traffic
@@ -2907,7 +2909,7 @@ static void i40e_sync_filters_subtask(struct i40e_pf *pf)
*/
static u16 i40e_calculate_vsi_rx_buf_len(struct i40e_vsi *vsi)
{
- if (!vsi->netdev || (vsi->back->flags & I40E_FLAG_LEGACY_RX))
+ if (!vsi->netdev || test_bit(I40E_FLAG_LEGACY_RX_ENA, vsi->back->flags))
return SKB_WITH_OVERHEAD(I40E_RXBUFFER_2048);
return PAGE_SIZE < 8192 ? I40E_RXBUFFER_3072 : I40E_RXBUFFER_2048;
@@ -3462,7 +3464,7 @@ static int i40e_configure_tx_ring(struct i40e_ring *ring)
ring->xsk_pool = i40e_xsk_pool(ring);
/* some ATR related tx ring init */
- if (vsi->back->flags & I40E_FLAG_FD_ATR_ENABLED) {
+ if (test_bit(I40E_FLAG_FD_ATR_ENA, vsi->back->flags)) {
ring->atr_sample_rate = vsi->back->atr_sample_rate;
ring->atr_count = 0;
} else {
@@ -3478,9 +3480,11 @@ static int i40e_configure_tx_ring(struct i40e_ring *ring)
tx_ctx.new_context = 1;
tx_ctx.base = (ring->dma / 128);
tx_ctx.qlen = ring->count;
- tx_ctx.fd_ena = !!(vsi->back->flags & (I40E_FLAG_FD_SB_ENABLED |
- I40E_FLAG_FD_ATR_ENABLED));
- tx_ctx.timesync_ena = !!(vsi->back->flags & I40E_FLAG_PTP);
+ if (test_bit(I40E_FLAG_FD_SB_ENA, vsi->back->flags) ||
+ test_bit(I40E_FLAG_FD_ATR_ENA, vsi->back->flags))
+ tx_ctx.fd_ena = 1;
+ if (test_bit(I40E_FLAG_PTP_ENA, vsi->back->flags))
+ tx_ctx.timesync_ena = 1;
/* FDIR VSI tx ring can still use RS bit and writebacks */
if (vsi->type != I40E_VSI_FDIR)
tx_ctx.head_wb_ena = 1;
@@ -3663,7 +3667,7 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring)
}
/* configure Rx buffer alignment */
- if (!vsi->netdev || (vsi->back->flags & I40E_FLAG_LEGACY_RX)) {
+ if (!vsi->netdev || test_bit(I40E_FLAG_LEGACY_RX_ENA, vsi->back->flags)) {
if (I40E_2K_TOO_SMALL_WITH_PADDING) {
dev_info(&vsi->back->pdev->dev,
"2k Rx buffer is too small to fit standard MTU and skb_shared_info\n");
@@ -3761,7 +3765,7 @@ static void i40e_vsi_config_dcb_rings(struct i40e_vsi *vsi)
u16 qoffset, qcount;
int i, n;
- if (!(vsi->back->flags & I40E_FLAG_DCB_ENABLED)) {
+ if (!test_bit(I40E_FLAG_DCB_ENA, vsi->back->flags)) {
/* Reset the TC information */
for (i = 0; i < vsi->num_queue_pairs; i++) {
rx_ring = vsi->rx_rings[i];
@@ -3828,7 +3832,7 @@ static void i40e_fdir_filter_restore(struct i40e_vsi *vsi)
struct i40e_pf *pf = vsi->back;
struct hlist_node *node;
- if (!(pf->flags & I40E_FLAG_FD_SB_ENABLED))
+ if (!test_bit(I40E_FLAG_FD_SB_ENA, pf->flags))
return;
/* Reset FDir counters as we're replaying all existing filters */
@@ -3966,10 +3970,10 @@ static void i40e_enable_misc_int_causes(struct i40e_pf *pf)
I40E_PFINT_ICR0_ENA_VFLR_MASK |
I40E_PFINT_ICR0_ENA_ADMINQ_MASK;
- if (pf->flags & I40E_FLAG_IWARP_ENABLED)
+ if (test_bit(I40E_FLAG_IWARP_ENA, pf->flags))
val |= I40E_PFINT_ICR0_ENA_PE_CRITERR_MASK;
- if (pf->flags & I40E_FLAG_PTP)
+ if (test_bit(I40E_FLAG_PTP_ENA, pf->flags))
val |= I40E_PFINT_ICR0_ENA_TIMESYNC_MASK;
wr32(hw, I40E_PFINT_ICR0_ENA, val);
@@ -4205,7 +4209,7 @@ static void i40e_vsi_disable_irq(struct i40e_vsi *vsi)
}
/* disable each interrupt */
- if (pf->flags & I40E_FLAG_MSIX_ENABLED) {
+ if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) {
for (i = vsi->base_vector;
i < (vsi->num_q_vectors + vsi->base_vector); i++)
wr32(hw, I40E_PFINT_DYN_CTLN(i - 1), 0);
@@ -4231,7 +4235,7 @@ static int i40e_vsi_enable_irq(struct i40e_vsi *vsi)
struct i40e_pf *pf = vsi->back;
int i;
- if (pf->flags & I40E_FLAG_MSIX_ENABLED) {
+ if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) {
for (i = 0; i < vsi->num_q_vectors; i++)
i40e_irq_dynamic_enable(vsi, i);
} else {
@@ -4252,7 +4256,7 @@ static void i40e_free_misc_vector(struct i40e_pf *pf)
wr32(&pf->hw, I40E_PFINT_ICR0_ENA, 0);
i40e_flush(&pf->hw);
- if (pf->flags & I40E_FLAG_MSIX_ENABLED && pf->msix_entries) {
+ if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags) && pf->msix_entries) {
free_irq(pf->msix_entries[0].vector, pf);
clear_bit(__I40E_MISC_IRQ_REQUESTED, pf->state);
}
@@ -4287,7 +4291,7 @@ static irqreturn_t i40e_intr(int irq, void *data)
(icr0 & I40E_PFINT_ICR0_SWINT_MASK))
pf->sw_int_count++;
- if ((pf->flags & I40E_FLAG_IWARP_ENABLED) &&
+ if (test_bit(I40E_FLAG_IWARP_ENA, pf->flags) &&
(icr0 & I40E_PFINT_ICR0_ENA_PE_CRITERR_MASK)) {
ena_mask &= ~I40E_PFINT_ICR0_ENA_PE_CRITERR_MASK;
dev_dbg(&pf->pdev->dev, "cleared PE_CRITERR\n");
@@ -4480,7 +4484,7 @@ static bool i40e_clean_fdir_tx_irq(struct i40e_ring *tx_ring, int budget)
i += tx_ring->count;
tx_ring->next_to_clean = i;
- if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED)
+ if (test_bit(I40E_FLAG_MSIX_ENA, vsi->back->flags))
i40e_irq_dynamic_enable(vsi, tx_ring->q_vector->v_idx);
return budget > 0;
@@ -4593,9 +4597,9 @@ static int i40e_vsi_request_irq(struct i40e_vsi *vsi, char *basename)
struct i40e_pf *pf = vsi->back;
int err;
- if (pf->flags & I40E_FLAG_MSIX_ENABLED)
+ if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags))
err = i40e_vsi_request_irq_msix(vsi, basename);
- else if (pf->flags & I40E_FLAG_MSI_ENABLED)
+ else if (test_bit(I40E_FLAG_MSI_ENA, pf->flags))
err = request_irq(pf->pdev->irq, i40e_intr, 0,
pf->int_name, pf);
else
@@ -4627,7 +4631,7 @@ static void i40e_netpoll(struct net_device *netdev)
if (test_bit(__I40E_VSI_DOWN, vsi->state))
return;
- if (pf->flags & I40E_FLAG_MSIX_ENABLED) {
+ if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) {
for (i = 0; i < vsi->num_q_vectors; i++)
i40e_msix_clean_rings(0, vsi->q_vectors[i]);
} else {
@@ -4967,7 +4971,7 @@ static void i40e_vsi_free_irq(struct i40e_vsi *vsi)
u32 val, qp;
int i;
- if (pf->flags & I40E_FLAG_MSIX_ENABLED) {
+ if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) {
if (!vsi->q_vectors)
return;
@@ -5129,16 +5133,17 @@ static void i40e_vsi_free_q_vectors(struct i40e_vsi *vsi)
static void i40e_reset_interrupt_capability(struct i40e_pf *pf)
{
/* If we're in Legacy mode, the interrupt was cleaned in vsi_close */
- if (pf->flags & I40E_FLAG_MSIX_ENABLED) {
+ if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) {
pci_disable_msix(pf->pdev);
kfree(pf->msix_entries);
pf->msix_entries = NULL;
kfree(pf->irq_pile);
pf->irq_pile = NULL;
- } else if (pf->flags & I40E_FLAG_MSI_ENABLED) {
+ } else if (test_bit(I40E_FLAG_MSI_ENA, pf->flags)) {
pci_disable_msi(pf->pdev);
}
- pf->flags &= ~(I40E_FLAG_MSIX_ENABLED | I40E_FLAG_MSI_ENABLED);
+ clear_bit(I40E_FLAG_MSI_ENA, pf->flags);
+ clear_bit(I40E_FLAG_MSIX_ENA, pf->flags);
}
/**
@@ -5478,11 +5483,11 @@ static u8 i40e_pf_get_num_tc(struct i40e_pf *pf)
return pf->vsi[pf->lan_vsi]->mqprio_qopt.qopt.num_tc;
/* If neither MQPRIO nor DCB is enabled, then always use single TC */
- if (!(pf->flags & I40E_FLAG_DCB_ENABLED))
+ if (!test_bit(I40E_FLAG_DCB_ENA, pf->flags))
return 1;
/* SFP mode will be enabled for all TCs on port */
- if (!(pf->flags & I40E_FLAG_MFP_ENABLED))
+ if (!test_bit(I40E_FLAG_MFP_ENA, pf->flags))
return i40e_dcb_get_num_tc(dcbcfg);
/* MFP mode return count of enabled TCs for this PF */
@@ -5512,11 +5517,11 @@ static u8 i40e_pf_get_tc_map(struct i40e_pf *pf)
/* If neither MQPRIO nor DCB is enabled for this PF then just return
* default TC
*/
- if (!(pf->flags & I40E_FLAG_DCB_ENABLED))
+ if (!test_bit(I40E_FLAG_DCB_ENA, pf->flags))
return I40E_DEFAULT_TRAFFIC_CLASS;
/* SFP mode we want PF to be enabled for all TCs */
- if (!(pf->flags & I40E_FLAG_MFP_ENABLED))
+ if (!test_bit(I40E_FLAG_MFP_ENA, pf->flags))
return i40e_dcb_get_enabled_tc(&pf->hw.local_dcbx_config);
/* MFP enabled and iSCSI PF type */
@@ -5605,7 +5610,7 @@ static int i40e_vsi_configure_bw_alloc(struct i40e_vsi *vsi, u8 enabled_tc,
/* There is no need to reset BW when mqprio mode is on. */
if (i40e_is_tc_mqprio_enabled(pf))
return 0;
- if (!vsi->mqprio_qopt.qopt.hw && !(pf->flags & I40E_FLAG_DCB_ENABLED)) {
+ if (!vsi->mqprio_qopt.qopt.hw && !test_bit(I40E_FLAG_DCB_ENA, pf->flags)) {
ret = i40e_set_bw_limit(vsi, vsi->seid, 0);
if (ret)
dev_info(&pf->pdev->dev,
@@ -5858,7 +5863,7 @@ static int i40e_vsi_config_tc(struct i40e_vsi *vsi, u8 enabled_tc)
}
vsi->reconfig_rss = false;
}
- if (vsi->back->flags & I40E_FLAG_IWARP_ENABLED) {
+ if (test_bit(I40E_FLAG_IWARP_ENA, vsi->back->flags)) {
ctxt.info.valid_sections |=
cpu_to_le16(I40E_AQ_VSI_PROP_QUEUE_OPT_VALID);
ctxt.info.queueing_opt_flags |= I40E_AQ_VSI_QUE_OPT_TCP_ENA;
@@ -6271,7 +6276,7 @@ static int i40e_add_channel(struct i40e_pf *pf, u16 uplink_seid,
if (ch->type == I40E_VSI_VMDQ2)
ctxt.flags = I40E_AQ_VSI_TYPE_VMDQ2;
- if (pf->flags & I40E_FLAG_VEB_MODE_ENABLED) {
+ if (test_bit(I40E_FLAG_VEB_MODE_ENA, pf->flags)) {
ctxt.info.valid_sections |=
cpu_to_le16(I40E_AQ_VSI_PROP_SWITCH_VALID);
ctxt.info.switch_id =
@@ -6576,8 +6581,8 @@ int i40e_create_queue_channel(struct i40e_vsi *vsi,
* VSI to be added switch to VEB mode.
*/
- if (!(pf->flags & I40E_FLAG_VEB_MODE_ENABLED)) {
- pf->flags |= I40E_FLAG_VEB_MODE_ENABLED;
+ if (!test_bit(I40E_FLAG_VEB_MODE_ENA, pf->flags)) {
+ set_bit(I40E_FLAG_VEB_MODE_ENA, pf->flags);
if (vsi->type == I40E_VSI_MAIN) {
if (i40e_is_tc_mqprio_enabled(pf))
@@ -6988,9 +6993,9 @@ int i40e_hw_dcb_config(struct i40e_pf *pf, struct i40e_dcbx_config *new_cfg)
if (need_reconfig) {
/* Enable DCB tagging only when more than one TC */
if (new_numtc > 1)
- pf->flags |= I40E_FLAG_DCB_ENABLED;
+ set_bit(I40E_FLAG_DCB_ENA, pf->flags);
else
- pf->flags &= ~I40E_FLAG_DCB_ENABLED;
+ clear_bit(I40E_FLAG_DCB_ENA, pf->flags);
set_bit(__I40E_PORT_SUSPENDED, pf->state);
/* Reconfiguration needed quiesce all VSIs */
@@ -7080,7 +7085,7 @@ int i40e_hw_dcb_config(struct i40e_pf *pf, struct i40e_dcbx_config *new_cfg)
set_bit(__I40E_CLIENT_L2_CHANGE, pf->state);
}
/* registers are set, lets apply */
- if (pf->hw_features & I40E_HW_USE_SET_LLDP_MIB)
+ if (test_bit(I40E_HW_CAP_USE_SET_LLDP_MIB, pf->hw.caps))
ret = i40e_hw_set_dcb_config(pf, new_cfg);
}
@@ -7101,7 +7106,7 @@ int i40e_dcb_sw_default_config(struct i40e_pf *pf)
struct i40e_hw *hw = &pf->hw;
int err;
- if (pf->hw_features & I40E_HW_USE_SET_LLDP_MIB) {
+ if (test_bit(I40E_HW_CAP_USE_SET_LLDP_MIB, pf->hw.caps)) {
/* Update the local cached instance with TC0 ETS */
memset(&pf->tmp_cfg, 0, sizeof(struct i40e_dcbx_config));
pf->tmp_cfg.etscfg.willing = I40E_IEEE_DEFAULT_ETS_WILLING;
@@ -7162,12 +7167,12 @@ static int i40e_init_pf_dcb(struct i40e_pf *pf)
/* Do not enable DCB for SW1 and SW2 images even if the FW is capable
* Also do not enable DCBx if FW LLDP agent is disabled
*/
- if (pf->hw_features & I40E_HW_NO_DCB_SUPPORT) {
+ if (test_bit(I40E_HW_CAP_NO_DCB_SUPPORT, pf->hw.caps)) {
dev_info(&pf->pdev->dev, "DCB is not supported.\n");
err = -EOPNOTSUPP;
goto out;
}
- if (pf->flags & I40E_FLAG_DISABLE_FW_LLDP) {
+ if (test_bit(I40E_FLAG_FW_LLDP_DIS, pf->flags)) {
dev_info(&pf->pdev->dev, "FW LLDP is disabled, attempting SW DCB\n");
err = i40e_dcb_sw_default_config(pf);
if (err) {
@@ -7178,8 +7183,8 @@ static int i40e_init_pf_dcb(struct i40e_pf *pf)
pf->dcbx_cap = DCB_CAP_DCBX_HOST |
DCB_CAP_DCBX_VER_IEEE;
/* at init capable but disabled */
- pf->flags |= I40E_FLAG_DCB_CAPABLE;
- pf->flags &= ~I40E_FLAG_DCB_ENABLED;
+ set_bit(I40E_FLAG_DCB_CAPABLE, pf->flags);
+ clear_bit(I40E_FLAG_DCB_ENA, pf->flags);
goto out;
}
err = i40e_init_dcb(hw, true);
@@ -7194,20 +7199,20 @@ static int i40e_init_pf_dcb(struct i40e_pf *pf)
pf->dcbx_cap = DCB_CAP_DCBX_LLD_MANAGED |
DCB_CAP_DCBX_VER_IEEE;
- pf->flags |= I40E_FLAG_DCB_CAPABLE;
+ set_bit(I40E_FLAG_DCB_CAPABLE, pf->flags);
/* Enable DCB tagging only when more than one TC
* or explicitly disable if only one TC
*/
if (i40e_dcb_get_num_tc(&hw->local_dcbx_config) > 1)
- pf->flags |= I40E_FLAG_DCB_ENABLED;
+ set_bit(I40E_FLAG_DCB_ENA, pf->flags);
else
- pf->flags &= ~I40E_FLAG_DCB_ENABLED;
+ clear_bit(I40E_FLAG_DCB_ENA, pf->flags);
dev_dbg(&pf->pdev->dev,
"DCBX offload is supported for this PF.\n");
}
} else if (pf->hw.aq.asq_last_status == I40E_AQ_RC_EPERM) {
dev_info(&pf->pdev->dev, "FW LLDP disabled for this PF.\n");
- pf->flags |= I40E_FLAG_DISABLE_FW_LLDP;
+ set_bit(I40E_FLAG_FW_LLDP_DIS, pf->flags);
} else {
dev_info(&pf->pdev->dev,
"Query for DCB configuration failed, err %pe aq_err %s\n",
@@ -7367,7 +7372,7 @@ static int i40e_up_complete(struct i40e_vsi *vsi)
struct i40e_pf *pf = vsi->back;
int err;
- if (pf->flags & I40E_FLAG_MSIX_ENABLED)
+ if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags))
i40e_vsi_configure_msix(vsi);
else
i40e_configure_msi_and_legacy(vsi);
@@ -7471,7 +7476,7 @@ static int i40e_force_link_state(struct i40e_pf *pf, bool is_up)
* and its speed values are OK, no need for a flap
* if non_zero_phy_type was set, still need to force up
*/
- if (pf->flags & I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENABLED)
+ if (test_bit(I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENA, pf->flags))
non_zero_phy_type = true;
else if (is_up && abilities.phy_type != 0 && abilities.link_speed != 0)
return 0;
@@ -7487,7 +7492,7 @@ static int i40e_force_link_state(struct i40e_pf *pf, bool is_up)
non_zero_phy_type ? (u8)((mask >> 32) & 0xff) : 0;
/* Copy the old settings, except of phy_type */
config.abilities = abilities.abilities;
- if (pf->flags & I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENABLED) {
+ if (test_bit(I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENA, pf->flags)) {
if (is_up)
config.abilities |= I40E_AQ_PHY_ENABLE_LINK;
else
@@ -7537,8 +7542,8 @@ int i40e_up(struct i40e_vsi *vsi)
int err;
if (vsi->type == I40E_VSI_MAIN &&
- (vsi->back->flags & I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED ||
- vsi->back->flags & I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENABLED))
+ (test_bit(I40E_FLAG_LINK_DOWN_ON_CLOSE_ENA, vsi->back->flags) ||
+ test_bit(I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENA, vsi->back->flags)))
i40e_force_link_state(vsi->back, true);
err = i40e_vsi_configure(vsi);
@@ -7566,8 +7571,8 @@ void i40e_down(struct i40e_vsi *vsi)
i40e_vsi_disable_irq(vsi);
i40e_vsi_stop_rings(vsi);
if (vsi->type == I40E_VSI_MAIN &&
- (vsi->back->flags & I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED ||
- vsi->back->flags & I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENABLED))
+ (test_bit(I40E_FLAG_LINK_DOWN_ON_CLOSE_ENA, vsi->back->flags) ||
+ test_bit(I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENA, vsi->back->flags)))
i40e_force_link_state(vsi->back, false);
i40e_napi_disable_all(vsi);
@@ -7973,7 +7978,7 @@ static void *i40e_fwd_add(struct net_device *netdev, struct net_device *vdev)
struct i40e_fwd_adapter *fwd;
int avail_macvlan, ret;
- if ((pf->flags & I40E_FLAG_DCB_ENABLED)) {
+ if (test_bit(I40E_FLAG_DCB_ENA, pf->flags)) {
netdev_info(netdev, "Macvlans are not supported when DCB is enabled\n");
return ERR_PTR(-EINVAL);
}
@@ -8168,23 +8173,23 @@ static int i40e_setup_tc(struct net_device *netdev, void *type_data)
hw = mqprio_qopt->qopt.hw;
mode = mqprio_qopt->mode;
if (!hw) {
- pf->flags &= ~I40E_FLAG_TC_MQPRIO;
+ clear_bit(I40E_FLAG_TC_MQPRIO_ENA, pf->flags);
memcpy(&vsi->mqprio_qopt, mqprio_qopt, sizeof(*mqprio_qopt));
goto config_tc;
}
/* Check if MFP enabled */
- if (pf->flags & I40E_FLAG_MFP_ENABLED) {
+ if (test_bit(I40E_FLAG_MFP_ENA, pf->flags)) {
netdev_info(netdev,
"Configuring TC not supported in MFP mode\n");
return ret;
}
switch (mode) {
case TC_MQPRIO_MODE_DCB:
- pf->flags &= ~I40E_FLAG_TC_MQPRIO;
+ clear_bit(I40E_FLAG_TC_MQPRIO_ENA, pf->flags);
/* Check if DCB enabled to continue */
- if (!(pf->flags & I40E_FLAG_DCB_ENABLED)) {
+ if (!test_bit(I40E_FLAG_DCB_ENA, pf->flags)) {
netdev_info(netdev,
"DCB is not enabled for adapter\n");
return ret;
@@ -8198,20 +8203,20 @@ static int i40e_setup_tc(struct net_device *netdev, void *type_data)
}
break;
case TC_MQPRIO_MODE_CHANNEL:
- if (pf->flags & I40E_FLAG_DCB_ENABLED) {
+ if (test_bit(I40E_FLAG_DCB_ENA, pf->flags)) {
netdev_info(netdev,
"Full offload of TC Mqprio options is not supported when DCB is enabled\n");
return ret;
}
- if (!(pf->flags & I40E_FLAG_MSIX_ENABLED))
+ if (!test_bit(I40E_FLAG_MSIX_ENA, pf->flags))
return ret;
ret = i40e_validate_mqprio_qopt(vsi, mqprio_qopt);
if (ret)
return ret;
memcpy(&vsi->mqprio_qopt, mqprio_qopt,
sizeof(*mqprio_qopt));
- pf->flags |= I40E_FLAG_TC_MQPRIO;
- pf->flags &= ~I40E_FLAG_DCB_ENABLED;
+ set_bit(I40E_FLAG_TC_MQPRIO_ENA, pf->flags);
+ clear_bit(I40E_FLAG_DCB_ENA, pf->flags);
break;
default:
return -EINVAL;
@@ -8795,11 +8800,11 @@ static int i40e_configure_clsflower(struct i40e_vsi *vsi,
return -EINVAL;
}
- if (vsi->back->flags & I40E_FLAG_FD_SB_ENABLED) {
+ if (test_bit(I40E_FLAG_FD_SB_ENA, vsi->back->flags)) {
dev_err(&vsi->back->pdev->dev,
"Disable Flow Director Sideband, configuring Cloud filters via tc-flower\n");
- vsi->back->flags &= ~I40E_FLAG_FD_SB_ENABLED;
- vsi->back->flags |= I40E_FLAG_FD_SB_TO_CLOUD_FILTER;
+ clear_bit(I40E_FLAG_FD_SB_ENA, vsi->back->flags);
+ clear_bit(I40E_FLAG_FD_SB_TO_CLOUD_FILTER, vsi->back->flags);
}
filter = kzalloc(sizeof(*filter), GFP_KERNEL);
@@ -8895,11 +8900,11 @@ static int i40e_delete_clsflower(struct i40e_vsi *vsi,
pf->num_cloud_filters--;
if (!pf->num_cloud_filters)
- if ((pf->flags & I40E_FLAG_FD_SB_TO_CLOUD_FILTER) &&
- !(pf->flags & I40E_FLAG_FD_SB_INACTIVE)) {
- pf->flags |= I40E_FLAG_FD_SB_ENABLED;
- pf->flags &= ~I40E_FLAG_FD_SB_TO_CLOUD_FILTER;
- pf->flags &= ~I40E_FLAG_FD_SB_INACTIVE;
+ if (test_bit(I40E_FLAG_FD_SB_TO_CLOUD_FILTER, pf->flags) &&
+ !test_bit(I40E_FLAG_FD_SB_INACTIVE, pf->flags)) {
+ set_bit(I40E_FLAG_FD_SB_ENA, pf->flags);
+ clear_bit(I40E_FLAG_FD_SB_TO_CLOUD_FILTER, pf->flags);
+ clear_bit(I40E_FLAG_FD_SB_INACTIVE, pf->flags);
}
return 0;
}
@@ -9200,11 +9205,11 @@ static void i40e_cloud_filter_exit(struct i40e_pf *pf)
}
pf->num_cloud_filters = 0;
- if ((pf->flags & I40E_FLAG_FD_SB_TO_CLOUD_FILTER) &&
- !(pf->flags & I40E_FLAG_FD_SB_INACTIVE)) {
- pf->flags |= I40E_FLAG_FD_SB_ENABLED;
- pf->flags &= ~I40E_FLAG_FD_SB_TO_CLOUD_FILTER;
- pf->flags &= ~I40E_FLAG_FD_SB_INACTIVE;
+ if (test_bit(I40E_FLAG_FD_SB_TO_CLOUD_FILTER, pf->flags) &&
+ !test_bit(I40E_FLAG_FD_SB_INACTIVE, pf->flags)) {
+ set_bit(I40E_FLAG_FD_SB_ENA, pf->flags);
+ clear_bit(I40E_FLAG_FD_SB_TO_CLOUD_FILTER, pf->flags);
+ clear_bit(I40E_FLAG_FD_SB_INACTIVE, pf->flags);
}
}
@@ -9292,7 +9297,7 @@ void i40e_do_reset(struct i40e_pf *pf, u32 reset_flags, bool lock_acquired)
i40e_prep_for_reset(pf);
i40e_reset_and_rebuild(pf, true, lock_acquired);
dev_info(&pf->pdev->dev,
- pf->flags & I40E_FLAG_DISABLE_FW_LLDP ?
+ test_bit(I40E_FLAG_FW_LLDP_DIS, pf->flags) ?
"FW LLDP is disabled\n" :
"FW LLDP is enabled\n");
@@ -9407,12 +9412,12 @@ static int i40e_handle_lldp_event(struct i40e_pf *pf,
if (I40E_IS_X710TL_DEVICE(hw->device_id) &&
(hw->phy.link_info.link_speed &
~(I40E_LINK_SPEED_2_5GB | I40E_LINK_SPEED_5GB)) &&
- !(pf->flags & I40E_FLAG_DCB_CAPABLE))
+ !test_bit(I40E_FLAG_DCB_CAPABLE, pf->flags))
/* let firmware decide if the DCB should be disabled */
- pf->flags |= I40E_FLAG_DCB_CAPABLE;
+ set_bit(I40E_FLAG_DCB_CAPABLE, pf->flags);
/* Not DCB capable or capability disabled */
- if (!(pf->flags & I40E_FLAG_DCB_CAPABLE))
+ if (!test_bit(I40E_FLAG_DCB_CAPABLE, pf->flags))
return ret;
/* Ignore if event is not for Nearest Bridge */
@@ -9448,7 +9453,7 @@ static int i40e_handle_lldp_event(struct i40e_pf *pf,
(I40E_LINK_SPEED_2_5GB | I40E_LINK_SPEED_5GB))) {
dev_warn(&pf->pdev->dev,
"DCB is not supported for X710-T*L 2.5/5G speeds\n");
- pf->flags &= ~I40E_FLAG_DCB_CAPABLE;
+ clear_bit(I40E_FLAG_DCB_CAPABLE, pf->flags);
} else {
dev_info(&pf->pdev->dev,
"Failed querying DCB configuration data from firmware, err %pe aq_err %s\n",
@@ -9476,9 +9481,9 @@ static int i40e_handle_lldp_event(struct i40e_pf *pf,
/* Enable DCB tagging only when more than one TC */
if (i40e_dcb_get_num_tc(&hw->local_dcbx_config) > 1)
- pf->flags |= I40E_FLAG_DCB_ENABLED;
+ set_bit(I40E_FLAG_DCB_ENA, pf->flags);
else
- pf->flags &= ~I40E_FLAG_DCB_ENABLED;
+ clear_bit(I40E_FLAG_DCB_ENA, pf->flags);
set_bit(__I40E_PORT_SUSPENDED, pf->state);
/* Reconfiguration needed quiesce all VSIs */
@@ -9610,7 +9615,7 @@ u32 i40e_get_global_fd_count(struct i40e_pf *pf)
static void i40e_reenable_fdir_sb(struct i40e_pf *pf)
{
if (test_and_clear_bit(__I40E_FD_SB_AUTO_DISABLED, pf->state))
- if ((pf->flags & I40E_FLAG_FD_SB_ENABLED) &&
+ if (test_bit(I40E_FLAG_FD_SB_ENA, pf->flags) &&
(I40E_DEBUG_FD & pf->hw.debug_mask))
dev_info(&pf->pdev->dev, "FD Sideband/ntuple is being enabled since we have space in the table now\n");
}
@@ -9631,7 +9636,7 @@ static void i40e_reenable_fdir_atr(struct i40e_pf *pf)
I40E_L3_SRC_MASK | I40E_L3_DST_MASK |
I40E_L4_SRC_MASK | I40E_L4_DST_MASK);
- if ((pf->flags & I40E_FLAG_FD_ATR_ENABLED) &&
+ if (test_bit(I40E_FLAG_FD_ATR_ENA, pf->flags) &&
(I40E_DEBUG_FD & pf->hw.debug_mask))
dev_info(&pf->pdev->dev, "ATR is being enabled since we have space in the table and there are no conflicting ntuple rules\n");
}
@@ -9946,7 +9951,7 @@ static void i40e_link_event(struct i40e_pf *pf)
if (pf->vf)
i40e_vc_notify_link_state(pf);
- if (pf->flags & I40E_FLAG_PTP)
+ if (test_bit(I40E_FLAG_PTP_ENA, pf->flags))
i40e_ptp_set_increment(pf);
#ifdef CONFIG_I40E_DCB
if (new_link == old_link)
@@ -9963,13 +9968,13 @@ static void i40e_link_event(struct i40e_pf *pf)
memset(&pf->tmp_cfg, 0, sizeof(pf->tmp_cfg));
err = i40e_dcb_sw_default_config(pf);
if (err) {
- pf->flags &= ~(I40E_FLAG_DCB_CAPABLE |
- I40E_FLAG_DCB_ENABLED);
+ clear_bit(I40E_FLAG_DCB_CAPABLE, pf->flags);
+ clear_bit(I40E_FLAG_DCB_ENA, pf->flags);
} else {
pf->dcbx_cap = DCB_CAP_DCBX_HOST |
DCB_CAP_DCBX_VER_IEEE;
- pf->flags |= I40E_FLAG_DCB_CAPABLE;
- pf->flags &= ~I40E_FLAG_DCB_ENABLED;
+ set_bit(I40E_FLAG_DCB_CAPABLE, pf->flags);
+ clear_bit(I40E_FLAG_DCB_ENA, pf->flags);
}
}
#endif /* CONFIG_I40E_DCB */
@@ -9994,7 +9999,7 @@ static void i40e_watchdog_subtask(struct i40e_pf *pf)
return;
pf->service_timer_previous = jiffies;
- if ((pf->flags & I40E_FLAG_LINK_POLLING_ENABLED) ||
+ if (test_bit(I40E_FLAG_LINK_POLLING_ENA, pf->flags) ||
test_bit(__I40E_TEMP_LINK_POLLING, pf->state))
i40e_link_event(pf);
@@ -10005,7 +10010,7 @@ static void i40e_watchdog_subtask(struct i40e_pf *pf)
if (pf->vsi[i] && pf->vsi[i]->netdev)
i40e_update_stats(pf->vsi[i]);
- if (pf->flags & I40E_FLAG_VEB_STATS_ENABLED) {
+ if (test_bit(I40E_FLAG_VEB_STATS_ENA, pf->flags)) {
/* Update the stats for the active switching components */
for (i = 0; i < I40E_MAX_VEB; i++)
if (pf->veb[i])
@@ -10094,7 +10099,7 @@ static void i40e_handle_link_event(struct i40e_pf *pf,
if ((status->link_info & I40E_AQ_MEDIA_AVAILABLE) &&
(!(status->an_info & I40E_AQ_QUALIFIED_MODULE)) &&
(!(status->link_info & I40E_AQ_LINK_UP)) &&
- (!(pf->flags & I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED))) {
+ (!test_bit(I40E_FLAG_LINK_DOWN_ON_CLOSE_ENA, pf->flags))) {
dev_err(&pf->pdev->dev,
"Rx/Tx is disabled on this device because an unsupported SFP module type was detected.\n");
dev_err(&pf->pdev->dev,
@@ -10400,7 +10405,7 @@ static int i40e_reconstitute_veb(struct i40e_veb *veb)
if (ret)
goto end_reconstitute;
- if (pf->flags & I40E_FLAG_VEB_MODE_ENABLED)
+ if (test_bit(I40E_FLAG_VEB_MODE_ENA, pf->flags))
veb->bridge_mode = BRIDGE_MODE_VEB;
else
veb->bridge_mode = BRIDGE_MODE_VEPA;
@@ -10545,7 +10550,7 @@ static void i40e_fdir_sb_setup(struct i40e_pf *pf)
wr32(&pf->hw, I40E_GLQF_HKEY(i), hkey[i]);
}
- if (!(pf->flags & I40E_FLAG_FD_SB_ENABLED))
+ if (!test_bit(I40E_FLAG_FD_SB_ENA, pf->flags))
return;
/* find existing VSI and see if it needs configuring */
@@ -10557,8 +10562,8 @@ static void i40e_fdir_sb_setup(struct i40e_pf *pf)
pf->vsi[pf->lan_vsi]->seid, 0);
if (!vsi) {
dev_info(&pf->pdev->dev, "Couldn't create FDir VSI\n");
- pf->flags &= ~I40E_FLAG_FD_SB_ENABLED;
- pf->flags |= I40E_FLAG_FD_SB_INACTIVE;
+ clear_bit(I40E_FLAG_FD_SB_ENA, pf->flags);
+ set_bit(I40E_FLAG_FD_SB_INACTIVE, pf->flags);
return;
}
}
@@ -10936,14 +10941,14 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired)
i40e_aq_set_dcb_parameters(hw, false, NULL);
dev_warn(&pf->pdev->dev,
"DCB is not supported for X710-T*L 2.5/5G speeds\n");
- pf->flags &= ~I40E_FLAG_DCB_CAPABLE;
+ clear_bit(I40E_FLAG_DCB_CAPABLE, pf->flags);
} else {
i40e_aq_set_dcb_parameters(hw, true, NULL);
ret = i40e_init_pf_dcb(pf);
if (ret) {
dev_info(&pf->pdev->dev, "DCB init failed %d, disabled\n",
ret);
- pf->flags &= ~I40E_FLAG_DCB_CAPABLE;
+ clear_bit(I40E_FLAG_DCB_CAPABLE, pf->flags);
/* Continue without DCB enabled */
}
}
@@ -11064,7 +11069,7 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired)
wr32(hw, I40E_REG_MSS, val);
}
- if (pf->hw_features & I40E_HW_RESTART_AUTONEG) {
+ if (test_bit(I40E_HW_CAP_RESTART_AUTONEG, pf->hw.caps)) {
msleep(75);
ret = i40e_aq_set_link_restart_an(&pf->hw, true, NULL);
if (ret)
@@ -11074,7 +11079,7 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired)
pf->hw.aq.asq_last_status));
}
/* reinit the misc interrupt */
- if (pf->flags & I40E_FLAG_MSIX_ENABLED) {
+ if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) {
ret = i40e_setup_misc_vector(pf);
if (ret)
goto end_unlock;
@@ -11349,7 +11354,7 @@ static int i40e_set_num_rings_in_vsi(struct i40e_vsi *vsi)
if (!vsi->num_rx_desc)
vsi->num_rx_desc = ALIGN(I40E_DEFAULT_NUM_DESCRIPTORS,
I40E_REQ_DESCRIPTOR_MULTIPLE);
- if (pf->flags & I40E_FLAG_MSIX_ENABLED)
+ if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags))
vsi->num_q_vectors = pf->num_lan_msix;
else
vsi->num_q_vectors = 1;
@@ -11667,7 +11672,7 @@ static int i40e_alloc_rings(struct i40e_vsi *vsi)
ring->count = vsi->num_tx_desc;
ring->size = 0;
ring->dcb_tc = 0;
- if (vsi->back->hw_features & I40E_HW_WB_ON_ITR_CAPABLE)
+ if (test_bit(I40E_HW_CAP_WB_ON_ITR, vsi->back->hw.caps))
ring->flags = I40E_TXR_FLAGS_WB_ON_ITR;
ring->itr_setting = pf->tx_itr_default;
WRITE_ONCE(vsi->tx_rings[i], ring++);
@@ -11684,7 +11689,7 @@ static int i40e_alloc_rings(struct i40e_vsi *vsi)
ring->count = vsi->num_tx_desc;
ring->size = 0;
ring->dcb_tc = 0;
- if (vsi->back->hw_features & I40E_HW_WB_ON_ITR_CAPABLE)
+ if (test_bit(I40E_HW_CAP_WB_ON_ITR, vsi->back->hw.caps))
ring->flags = I40E_TXR_FLAGS_WB_ON_ITR;
set_ring_xdp(ring);
ring->itr_setting = pf->tx_itr_default;
@@ -11748,7 +11753,7 @@ static int i40e_init_msix(struct i40e_pf *pf)
int v_actual;
int iwarp_requested = 0;
- if (!(pf->flags & I40E_FLAG_MSIX_ENABLED))
+ if (!test_bit(I40E_FLAG_MSIX_ENA, pf->flags))
return -ENODEV;
/* The number of vectors we'll request will be comprised of:
@@ -11787,7 +11792,7 @@ static int i40e_init_msix(struct i40e_pf *pf)
vectors_left -= pf->num_lan_msix;
/* reserve one vector for sideband flow director */
- if (pf->flags & I40E_FLAG_FD_SB_ENABLED) {
+ if (test_bit(I40E_FLAG_FD_SB_ENA, pf->flags)) {
if (vectors_left) {
pf->num_fdsb_msix = 1;
v_budget++;
@@ -11798,7 +11803,7 @@ static int i40e_init_msix(struct i40e_pf *pf)
}
/* can we reserve enough for iWARP? */
- if (pf->flags & I40E_FLAG_IWARP_ENABLED) {
+ if (test_bit(I40E_FLAG_IWARP_ENA, pf->flags)) {
iwarp_requested = pf->num_iwarp_msix;
if (!vectors_left)
@@ -11810,7 +11815,7 @@ static int i40e_init_msix(struct i40e_pf *pf)
}
/* any vectors left over go for VMDq support */
- if (pf->flags & I40E_FLAG_VMDQ_ENABLED) {
+ if (test_bit(I40E_FLAG_VMDQ_ENA, pf->flags)) {
if (!vectors_left) {
pf->num_vmdq_msix = 0;
pf->num_vmdq_qps = 0;
@@ -11867,7 +11872,7 @@ static int i40e_init_msix(struct i40e_pf *pf)
v_actual = i40e_reserve_msix_vectors(pf, v_budget);
if (v_actual < I40E_MIN_MSIX) {
- pf->flags &= ~I40E_FLAG_MSIX_ENABLED;
+ clear_bit(I40E_FLAG_MSIX_ENA, pf->flags);
kfree(pf->msix_entries);
pf->msix_entries = NULL;
pci_disable_msix(pf->pdev);
@@ -11905,7 +11910,7 @@ static int i40e_init_msix(struct i40e_pf *pf)
pf->num_lan_msix = 1;
break;
case 3:
- if (pf->flags & I40E_FLAG_IWARP_ENABLED) {
+ if (test_bit(I40E_FLAG_IWARP_ENA, pf->flags)) {
pf->num_lan_msix = 1;
pf->num_iwarp_msix = 1;
} else {
@@ -11913,7 +11918,7 @@ static int i40e_init_msix(struct i40e_pf *pf)
}
break;
default:
- if (pf->flags & I40E_FLAG_IWARP_ENABLED) {
+ if (test_bit(I40E_FLAG_IWARP_ENA, pf->flags)) {
pf->num_iwarp_msix = min_t(int, (vec / 3),
iwarp_requested);
pf->num_vmdq_vsis = min_t(int, (vec / 3),
@@ -11922,7 +11927,7 @@ static int i40e_init_msix(struct i40e_pf *pf)
pf->num_vmdq_vsis = min_t(int, (vec / 2),
I40E_DEFAULT_NUM_VMDQ_VSI);
}
- if (pf->flags & I40E_FLAG_FD_SB_ENABLED) {
+ if (test_bit(I40E_FLAG_FD_SB_ENA, pf->flags)) {
pf->num_fdsb_msix = 1;
vec--;
}
@@ -11934,22 +11939,20 @@ static int i40e_init_msix(struct i40e_pf *pf)
}
}
- if ((pf->flags & I40E_FLAG_FD_SB_ENABLED) &&
- (pf->num_fdsb_msix == 0)) {
+ if (test_bit(I40E_FLAG_FD_SB_ENA, pf->flags) && pf->num_fdsb_msix == 0) {
dev_info(&pf->pdev->dev, "Sideband Flowdir disabled, not enough MSI-X vectors\n");
- pf->flags &= ~I40E_FLAG_FD_SB_ENABLED;
- pf->flags |= I40E_FLAG_FD_SB_INACTIVE;
+ clear_bit(I40E_FLAG_FD_SB_ENA, pf->flags);
+ set_bit(I40E_FLAG_FD_SB_INACTIVE, pf->flags);
}
- if ((pf->flags & I40E_FLAG_VMDQ_ENABLED) &&
- (pf->num_vmdq_msix == 0)) {
+ if (test_bit(I40E_FLAG_VMDQ_ENA, pf->flags) && pf->num_vmdq_msix == 0) {
dev_info(&pf->pdev->dev, "VMDq disabled, not enough MSI-X vectors\n");
- pf->flags &= ~I40E_FLAG_VMDQ_ENABLED;
+ clear_bit(I40E_FLAG_VMDQ_ENA, pf->flags);
}
- if ((pf->flags & I40E_FLAG_IWARP_ENABLED) &&
- (pf->num_iwarp_msix == 0)) {
+ if (test_bit(I40E_FLAG_IWARP_ENA, pf->flags) &&
+ pf->num_iwarp_msix == 0) {
dev_info(&pf->pdev->dev, "IWARP disabled, not enough MSI-X vectors\n");
- pf->flags &= ~I40E_FLAG_IWARP_ENABLED;
+ clear_bit(I40E_FLAG_IWARP_ENA, pf->flags);
}
i40e_debug(&pf->hw, I40E_DEBUG_INIT,
"MSI-X vector distribution: PF %d, VMDq %d, FDSB %d, iWARP %d\n",
@@ -12003,7 +12006,7 @@ static int i40e_vsi_alloc_q_vectors(struct i40e_vsi *vsi)
int err, v_idx, num_q_vectors;
/* if not MSIX, give the one vector only to the LAN VSI */
- if (pf->flags & I40E_FLAG_MSIX_ENABLED)
+ if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags))
num_q_vectors = vsi->num_q_vectors;
else if (vsi == pf->vsi[pf->lan_vsi])
num_q_vectors = 1;
@@ -12034,38 +12037,39 @@ static int i40e_init_interrupt_scheme(struct i40e_pf *pf)
int vectors = 0;
ssize_t size;
- if (pf->flags & I40E_FLAG_MSIX_ENABLED) {
+ if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) {
vectors = i40e_init_msix(pf);
if (vectors < 0) {
- pf->flags &= ~(I40E_FLAG_MSIX_ENABLED |
- I40E_FLAG_IWARP_ENABLED |
- I40E_FLAG_RSS_ENABLED |
- I40E_FLAG_DCB_CAPABLE |
- I40E_FLAG_DCB_ENABLED |
- I40E_FLAG_SRIOV_ENABLED |
- I40E_FLAG_FD_SB_ENABLED |
- I40E_FLAG_FD_ATR_ENABLED |
- I40E_FLAG_VMDQ_ENABLED);
- pf->flags |= I40E_FLAG_FD_SB_INACTIVE;
+ clear_bit(I40E_FLAG_MSIX_ENA, pf->flags);
+ clear_bit(I40E_FLAG_IWARP_ENA, pf->flags);
+ clear_bit(I40E_FLAG_RSS_ENA, pf->flags);
+ clear_bit(I40E_FLAG_DCB_CAPABLE, pf->flags);
+ clear_bit(I40E_FLAG_DCB_ENA, pf->flags);
+ clear_bit(I40E_FLAG_SRIOV_ENA, pf->flags);
+ clear_bit(I40E_FLAG_FD_SB_ENA, pf->flags);
+ clear_bit(I40E_FLAG_FD_ATR_ENA, pf->flags);
+ clear_bit(I40E_FLAG_VMDQ_ENA, pf->flags);
+ set_bit(I40E_FLAG_FD_SB_INACTIVE, pf->flags);
/* rework the queue expectations without MSIX */
i40e_determine_queue_usage(pf);
}
}
- if (!(pf->flags & I40E_FLAG_MSIX_ENABLED) &&
- (pf->flags & I40E_FLAG_MSI_ENABLED)) {
+ if (!test_bit(I40E_FLAG_MSIX_ENA, pf->flags) &&
+ test_bit(I40E_FLAG_MSI_ENA, pf->flags)) {
dev_info(&pf->pdev->dev, "MSI-X not available, trying MSI\n");
vectors = pci_enable_msi(pf->pdev);
if (vectors < 0) {
dev_info(&pf->pdev->dev, "MSI init failed - %d\n",
vectors);
- pf->flags &= ~I40E_FLAG_MSI_ENABLED;
+ clear_bit(I40E_FLAG_MSI_ENA, pf->flags);
}
vectors = 1; /* one MSI or Legacy vector */
}
- if (!(pf->flags & (I40E_FLAG_MSIX_ENABLED | I40E_FLAG_MSI_ENABLED)))
+ if (!test_bit(I40E_FLAG_MSI_ENA, pf->flags) &&
+ !test_bit(I40E_FLAG_MSIX_ENA, pf->flags))
dev_info(&pf->pdev->dev, "MSI-X and MSI not available, falling back to Legacy IRQ\n");
/* set up vector assignment tracking */
@@ -12098,7 +12102,8 @@ static int i40e_restore_interrupt_scheme(struct i40e_pf *pf)
* scheme. We need to re-enabled them here in order to attempt to
* re-acquire the MSI or MSI-X vectors
*/
- pf->flags |= (I40E_FLAG_MSIX_ENABLED | I40E_FLAG_MSI_ENABLED);
+ set_bit(I40E_FLAG_MSI_ENA, pf->flags);
+ set_bit(I40E_FLAG_MSIX_ENA, pf->flags);
err = i40e_init_interrupt_scheme(pf);
if (err)
@@ -12120,7 +12125,7 @@ static int i40e_restore_interrupt_scheme(struct i40e_pf *pf)
if (err)
goto err_unwind;
- if (pf->flags & I40E_FLAG_IWARP_ENABLED)
+ if (test_bit(I40E_FLAG_IWARP_ENA, pf->flags))
i40e_client_update_msix_info(pf);
return 0;
@@ -12148,7 +12153,7 @@ static int i40e_setup_misc_vector_for_recovery_mode(struct i40e_pf *pf)
{
int err;
- if (pf->flags & I40E_FLAG_MSIX_ENABLED) {
+ if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) {
err = i40e_setup_misc_vector(pf);
if (err) {
@@ -12158,7 +12163,7 @@ static int i40e_setup_misc_vector_for_recovery_mode(struct i40e_pf *pf)
return err;
}
} else {
- u32 flags = pf->flags & I40E_FLAG_MSI_ENABLED ? 0 : IRQF_SHARED;
+ u32 flags = test_bit(I40E_FLAG_MSI_ENA, pf->flags) ? 0 : IRQF_SHARED;
err = request_irq(pf->pdev->irq, i40e_intr, flags,
pf->int_name, pf);
@@ -12362,7 +12367,7 @@ int i40e_config_rss(struct i40e_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size)
{
struct i40e_pf *pf = vsi->back;
- if (pf->hw_features & I40E_HW_RSS_AQ_CAPABLE)
+ if (test_bit(I40E_HW_CAP_RSS_AQ, pf->hw.caps))
return i40e_config_rss_aq(vsi, seed, lut, lut_size);
else
return i40e_config_rss_reg(vsi, seed, lut, lut_size);
@@ -12381,7 +12386,7 @@ int i40e_get_rss(struct i40e_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size)
{
struct i40e_pf *pf = vsi->back;
- if (pf->hw_features & I40E_HW_RSS_AQ_CAPABLE)
+ if (test_bit(I40E_HW_CAP_RSS_AQ, pf->hw.caps))
return i40e_get_rss_aq(vsi, seed, lut, lut_size);
else
return i40e_get_rss_reg(vsi, seed, lut, lut_size);
@@ -12484,7 +12489,7 @@ int i40e_reconfig_rss_queues(struct i40e_pf *pf, int queue_count)
struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi];
int new_rss_size;
- if (!(pf->flags & I40E_FLAG_RSS_ENABLED))
+ if (!test_bit(I40E_FLAG_RSS_ENA, pf->flags))
return 0;
queue_count = min_t(int, queue_count, num_online_cpus());
@@ -12717,9 +12722,9 @@ static int i40e_sw_init(struct i40e_pf *pf)
u16 pow;
/* Set default capability flags */
- pf->flags = I40E_FLAG_RX_CSUM_ENABLED |
- I40E_FLAG_MSI_ENABLED |
- I40E_FLAG_MSIX_ENABLED;
+ bitmap_zero(pf->flags, I40E_PF_FLAGS_NBITS);
+ set_bit(I40E_FLAG_MSI_ENA, pf->flags);
+ set_bit(I40E_FLAG_MSIX_ENA, pf->flags);
/* Set default ITR */
pf->rx_itr_default = I40E_ITR_RX_DEF;
@@ -12739,14 +12744,14 @@ static int i40e_sw_init(struct i40e_pf *pf)
pf->rss_size_max = min_t(int, pf->rss_size_max, pow);
if (pf->hw.func_caps.rss) {
- pf->flags |= I40E_FLAG_RSS_ENABLED;
+ set_bit(I40E_FLAG_RSS_ENA, pf->flags);
pf->alloc_rss_size = min_t(int, pf->rss_size_max,
num_online_cpus());
}
/* MFP mode enabled */
if (pf->hw.func_caps.npar_enable || pf->hw.func_caps.flex10_enable) {
- pf->flags |= I40E_FLAG_MFP_ENABLED;
+ set_bit(I40E_FLAG_MFP_ENA, pf->flags);
dev_info(&pf->pdev->dev, "MFP mode Enabled\n");
if (i40e_get_partition_bw_setting(pf)) {
dev_warn(&pf->pdev->dev,
@@ -12763,84 +12768,32 @@ static int i40e_sw_init(struct i40e_pf *pf)
if ((pf->hw.func_caps.fd_filters_guaranteed > 0) ||
(pf->hw.func_caps.fd_filters_best_effort > 0)) {
- pf->flags |= I40E_FLAG_FD_ATR_ENABLED;
+ set_bit(I40E_FLAG_FD_ATR_ENA, pf->flags);
pf->atr_sample_rate = I40E_DEFAULT_ATR_SAMPLE_RATE;
- if (pf->flags & I40E_FLAG_MFP_ENABLED &&
+ if (test_bit(I40E_FLAG_MFP_ENA, pf->flags) &&
pf->hw.num_partitions > 1)
dev_info(&pf->pdev->dev,
"Flow Director Sideband mode Disabled in MFP mode\n");
else
- pf->flags |= I40E_FLAG_FD_SB_ENABLED;
+ set_bit(I40E_FLAG_FD_SB_ENA, pf->flags);
pf->fdir_pf_filter_count =
pf->hw.func_caps.fd_filters_guaranteed;
pf->hw.fdir_shared_filter_count =
pf->hw.func_caps.fd_filters_best_effort;
}
- if (pf->hw.mac.type == I40E_MAC_X722) {
- pf->hw_features |= (I40E_HW_RSS_AQ_CAPABLE |
- I40E_HW_128_QP_RSS_CAPABLE |
- I40E_HW_ATR_EVICT_CAPABLE |
- I40E_HW_WB_ON_ITR_CAPABLE |
- I40E_HW_MULTIPLE_TCP_UDP_RSS_PCTYPE |
- I40E_HW_NO_PCI_LINK_CHECK |
- I40E_HW_USE_SET_LLDP_MIB |
- I40E_HW_GENEVE_OFFLOAD_CAPABLE |
- I40E_HW_PTP_L4_CAPABLE |
- I40E_HW_WOL_MC_MAGIC_PKT_WAKE |
- I40E_HW_OUTER_UDP_CSUM_CAPABLE);
-
-#define I40E_FDEVICT_PCTYPE_DEFAULT 0xc03
- if (rd32(&pf->hw, I40E_GLQF_FDEVICTENA(1)) !=
- I40E_FDEVICT_PCTYPE_DEFAULT) {
- dev_warn(&pf->pdev->dev,
- "FD EVICT PCTYPES are not right, disable FD HW EVICT\n");
- pf->hw_features &= ~I40E_HW_ATR_EVICT_CAPABLE;
- }
- } else if ((pf->hw.aq.api_maj_ver > 1) ||
- ((pf->hw.aq.api_maj_ver == 1) &&
- (pf->hw.aq.api_min_ver > 4))) {
- /* Supported in FW API version higher than 1.4 */
- pf->hw_features |= I40E_HW_GENEVE_OFFLOAD_CAPABLE;
- }
-
/* Enable HW ATR eviction if possible */
- if (pf->hw_features & I40E_HW_ATR_EVICT_CAPABLE)
- pf->flags |= I40E_FLAG_HW_ATR_EVICT_ENABLED;
-
- if ((pf->hw.mac.type == I40E_MAC_XL710) &&
- (((pf->hw.aq.fw_maj_ver == 4) && (pf->hw.aq.fw_min_ver < 33)) ||
- (pf->hw.aq.fw_maj_ver < 4))) {
- pf->hw_features |= I40E_HW_RESTART_AUTONEG;
- /* No DCB support for FW < v4.33 */
- pf->hw_features |= I40E_HW_NO_DCB_SUPPORT;
- }
-
- /* Disable FW LLDP if FW < v4.3 */
- if ((pf->hw.mac.type == I40E_MAC_XL710) &&
- (((pf->hw.aq.fw_maj_ver == 4) && (pf->hw.aq.fw_min_ver < 3)) ||
- (pf->hw.aq.fw_maj_ver < 4)))
- pf->hw_features |= I40E_HW_STOP_FW_LLDP;
-
- /* Use the FW Set LLDP MIB API if FW > v4.40 */
- if ((pf->hw.mac.type == I40E_MAC_XL710) &&
- (((pf->hw.aq.fw_maj_ver == 4) && (pf->hw.aq.fw_min_ver >= 40)) ||
- (pf->hw.aq.fw_maj_ver >= 5)))
- pf->hw_features |= I40E_HW_USE_SET_LLDP_MIB;
-
- /* Enable PTP L4 if FW > v6.0 */
- if (pf->hw.mac.type == I40E_MAC_XL710 &&
- pf->hw.aq.fw_maj_ver >= 6)
- pf->hw_features |= I40E_HW_PTP_L4_CAPABLE;
+ if (test_bit(I40E_HW_CAP_ATR_EVICT, pf->hw.caps))
+ set_bit(I40E_FLAG_HW_ATR_EVICT_ENA, pf->flags);
if (pf->hw.func_caps.vmdq && num_online_cpus() != 1) {
pf->num_vmdq_vsis = I40E_DEFAULT_NUM_VMDQ_VSI;
- pf->flags |= I40E_FLAG_VMDQ_ENABLED;
+ set_bit(I40E_FLAG_VMDQ_ENA, pf->flags);
pf->num_vmdq_qps = i40e_default_queues_per_vmdq(pf);
}
if (pf->hw.func_caps.iwarp && num_online_cpus() != 1) {
- pf->flags |= I40E_FLAG_IWARP_ENABLED;
+ set_bit(I40E_FLAG_IWARP_ENA, pf->flags);
/* IWARP needs one extra vector for CQP just like MISC.*/
pf->num_iwarp_msix = (int)num_online_cpus() + 1;
}
@@ -12850,14 +12803,13 @@ static int i40e_sw_init(struct i40e_pf *pf)
* if NPAR is functioning so unset this hw flag in this case.
*/
if (pf->hw.mac.type == I40E_MAC_XL710 &&
- pf->hw.func_caps.npar_enable &&
- (pf->hw.flags & I40E_HW_FLAG_FW_LLDP_STOPPABLE))
- pf->hw.flags &= ~I40E_HW_FLAG_FW_LLDP_STOPPABLE;
+ pf->hw.func_caps.npar_enable)
+ clear_bit(I40E_HW_CAP_FW_LLDP_STOPPABLE, pf->hw.caps);
#ifdef CONFIG_PCI_IOV
if (pf->hw.func_caps.num_vfs && pf->hw.partition_id == 1) {
pf->num_vf_qps = I40E_DEFAULT_QUEUES_PER_VF;
- pf->flags |= I40E_FLAG_SRIOV_ENABLED;
+ set_bit(I40E_FLAG_SRIOV_ENA, pf->flags);
pf->num_req_vfs = min_t(int,
pf->hw.func_caps.num_vfs,
I40E_MAX_VF_COUNT);
@@ -12868,7 +12820,7 @@ static int i40e_sw_init(struct i40e_pf *pf)
pf->lan_vsi = I40E_NO_VSI;
/* By default FW has this off for performance reasons */
- pf->flags &= ~I40E_FLAG_VEB_STATS_ENABLED;
+ clear_bit(I40E_FLAG_VEB_STATS_ENA, pf->flags);
/* set up queue assignment tracking */
size = sizeof(struct i40e_lump_tracking)
@@ -12887,8 +12839,8 @@ static int i40e_sw_init(struct i40e_pf *pf)
/* Link down on close must be on when total port shutdown
* is enabled for a given port
*/
- pf->flags |= (I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENABLED |
- I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED);
+ set_bit(I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENA, pf->flags);
+ set_bit(I40E_FLAG_LINK_DOWN_ON_CLOSE_ENA, pf->flags);
dev_info(&pf->pdev->dev,
"total-port-shutdown was enabled, link-down-on-close is forced on\n");
}
@@ -12914,31 +12866,31 @@ bool i40e_set_ntuple(struct i40e_pf *pf, netdev_features_t features)
*/
if (features & NETIF_F_NTUPLE) {
/* Enable filters and mark for reset */
- if (!(pf->flags & I40E_FLAG_FD_SB_ENABLED))
+ if (!test_bit(I40E_FLAG_FD_SB_ENA, pf->flags))
need_reset = true;
/* enable FD_SB only if there is MSI-X vector and no cloud
* filters exist
*/
if (pf->num_fdsb_msix > 0 && !pf->num_cloud_filters) {
- pf->flags |= I40E_FLAG_FD_SB_ENABLED;
- pf->flags &= ~I40E_FLAG_FD_SB_INACTIVE;
+ set_bit(I40E_FLAG_FD_SB_ENA, pf->flags);
+ clear_bit(I40E_FLAG_FD_SB_INACTIVE, pf->flags);
}
} else {
/* turn off filters, mark for reset and clear SW filter list */
- if (pf->flags & I40E_FLAG_FD_SB_ENABLED) {
+ if (test_bit(I40E_FLAG_FD_SB_ENA, pf->flags)) {
need_reset = true;
i40e_fdir_filter_exit(pf);
}
- pf->flags &= ~I40E_FLAG_FD_SB_ENABLED;
+ clear_bit(I40E_FLAG_FD_SB_ENA, pf->flags);
clear_bit(__I40E_FD_SB_AUTO_DISABLED, pf->state);
- pf->flags |= I40E_FLAG_FD_SB_INACTIVE;
+ set_bit(I40E_FLAG_FD_SB_INACTIVE, pf->flags);
/* reset fd counters */
pf->fd_add_err = 0;
pf->fd_atr_cnt = 0;
/* if ATR was auto disabled it can be re-enabled. */
if (test_and_clear_bit(__I40E_FD_ATR_AUTO_DISABLED, pf->state))
- if ((pf->flags & I40E_FLAG_FD_ATR_ENABLED) &&
+ if (test_bit(I40E_FLAG_FD_ATR_ENA, pf->flags) &&
(I40E_DEBUG_FD & pf->hw.debug_mask))
dev_info(&pf->pdev->dev, "ATR re-enabled.\n");
}
@@ -13087,7 +13039,7 @@ static int i40e_get_phys_port_id(struct net_device *netdev,
struct i40e_pf *pf = np->vsi->back;
struct i40e_hw *hw = &pf->hw;
- if (!(pf->hw_features & I40E_HW_PORT_ID_VALID))
+ if (!test_bit(I40E_HW_CAP_PORT_ID_VALID, pf->hw.caps))
return -EOPNOTSUPP;
ppid->id_len = min_t(int, sizeof(hw->mac.port_addr), sizeof(ppid->id));
@@ -13116,7 +13068,7 @@ static int i40e_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
struct i40e_pf *pf = np->vsi->back;
int err = 0;
- if (!(pf->flags & I40E_FLAG_SRIOV_ENABLED))
+ if (!test_bit(I40E_FLAG_SRIOV_ENA, pf->flags))
return -EOPNOTSUPP;
if (vid) {
@@ -13216,9 +13168,9 @@ static int i40e_ndo_bridge_setlink(struct net_device *dev,
veb->bridge_mode = mode;
/* TODO: If no VFs or VMDq VSIs, disallow VEB mode */
if (mode == BRIDGE_MODE_VEB)
- pf->flags |= I40E_FLAG_VEB_MODE_ENABLED;
+ set_bit(I40E_FLAG_VEB_MODE_ENA, pf->flags);
else
- pf->flags &= ~I40E_FLAG_VEB_MODE_ENABLED;
+ clear_bit(I40E_FLAG_VEB_MODE_ENA, pf->flags);
i40e_do_reset(pf, I40E_PF_RESET_FLAG, true);
break;
}
@@ -13552,7 +13504,7 @@ static void i40e_queue_pair_enable_irq(struct i40e_vsi *vsi, int queue_pair)
struct i40e_hw *hw = &pf->hw;
/* All rings in a qp belong to the same qvector. */
- if (pf->flags & I40E_FLAG_MSIX_ENABLED)
+ if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags))
i40e_irq_dynamic_enable(vsi, rxr->q_vector->v_idx);
else
i40e_irq_dynamic_enable_icr0(pf);
@@ -13577,7 +13529,7 @@ static void i40e_queue_pair_disable_irq(struct i40e_vsi *vsi, int queue_pair)
*
* All rings in a qp belong to the same qvector.
*/
- if (pf->flags & I40E_FLAG_MSIX_ENABLED) {
+ if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) {
u32 intpf = vsi->base_vector + rxr->q_vector->v_idx;
wr32(hw, I40E_PFINT_DYN_CTLN(intpf - 1), 0);
@@ -13762,7 +13714,7 @@ static int i40e_config_netdev(struct i40e_vsi *vsi)
NETIF_F_RXCSUM |
0;
- if (!(pf->hw_features & I40E_HW_OUTER_UDP_CSUM_CAPABLE))
+ if (!test_bit(I40E_HW_CAP_OUTER_UDP_CSUM, pf->hw.caps))
netdev->gso_partial_features |= NETIF_F_GSO_UDP_TUNNEL_CSUM;
netdev->udp_tunnel_nic_info = &pf->udp_tunnel_nic;
@@ -13798,7 +13750,7 @@ static int i40e_config_netdev(struct i40e_vsi *vsi)
NETIF_F_HW_VLAN_CTAG_TX |
NETIF_F_HW_VLAN_CTAG_RX;
- if (!(pf->flags & I40E_FLAG_MFP_ENABLED))
+ if (!test_bit(I40E_FLAG_MFP_ENA, pf->flags))
hw_features |= NETIF_F_NTUPLE | NETIF_F_HW_TC;
netdev->hw_features |= hw_features | NETIF_F_LOOPBACK;
@@ -13989,7 +13941,7 @@ static int i40e_add_vsi(struct i40e_vsi *vsi)
* negative logic - if it's set, we need to fiddle with
* the VSI to disable source pruning.
*/
- if (pf->flags & I40E_FLAG_SOURCE_PRUNING_DISABLED) {
+ if (test_bit(I40E_FLAG_SOURCE_PRUNING_DIS, pf->flags)) {
memset(&ctxt, 0, sizeof(ctxt));
ctxt.seid = pf->main_vsi_seid;
ctxt.pf_num = pf->hw.pf_id;
@@ -14011,7 +13963,7 @@ static int i40e_add_vsi(struct i40e_vsi *vsi)
}
/* MFP mode setup queue map and update VSI */
- if ((pf->flags & I40E_FLAG_MFP_ENABLED) &&
+ if (test_bit(I40E_FLAG_MFP_ENA, pf->flags) &&
!(pf->hw.func_caps.iscsi)) { /* NIC type PF */
memset(&ctxt, 0, sizeof(ctxt));
ctxt.seid = pf->main_vsi_seid;
@@ -14059,7 +14011,7 @@ static int i40e_add_vsi(struct i40e_vsi *vsi)
ctxt.uplink_seid = vsi->uplink_seid;
ctxt.connection_type = I40E_AQ_VSI_CONN_TYPE_NORMAL;
ctxt.flags = I40E_AQ_VSI_TYPE_PF;
- if ((pf->flags & I40E_FLAG_VEB_MODE_ENABLED) &&
+ if (test_bit(I40E_FLAG_VEB_MODE_ENA, pf->flags) &&
(i40e_is_vsi_uplink_mode_veb(vsi))) {
ctxt.info.valid_sections |=
cpu_to_le16(I40E_AQ_VSI_PROP_SWITCH_VALID);
@@ -14107,7 +14059,7 @@ static int i40e_add_vsi(struct i40e_vsi *vsi)
cpu_to_le16(I40E_AQ_VSI_SW_ID_FLAG_ALLOW_LB);
}
- if (vsi->back->flags & I40E_FLAG_IWARP_ENABLED) {
+ if (test_bit(I40E_FLAG_IWARP_ENA, vsi->back->flags)) {
ctxt.info.valid_sections |=
cpu_to_le16(I40E_AQ_VSI_PROP_QUEUE_OPT_VALID);
ctxt.info.queueing_opt_flags |=
@@ -14323,7 +14275,7 @@ static int i40e_vsi_setup_vectors(struct i40e_vsi *vsi)
/* In Legacy mode, we do not have to get any other vector since we
* piggyback on the misc/ICR0 for queue interrupts.
*/
- if (!(pf->flags & I40E_FLAG_MSIX_ENABLED))
+ if (!test_bit(I40E_FLAG_MSIX_ENA, pf->flags))
return ret;
if (vsi->num_q_vectors)
vsi->base_vector = i40e_get_lump(pf, pf->irq_pile,
@@ -14490,9 +14442,9 @@ struct i40e_vsi *i40e_vsi_setup(struct i40e_pf *pf, u8 type,
* already enabled, in which case we can't force VEPA
* mode.
*/
- if (!(pf->flags & I40E_FLAG_VEB_MODE_ENABLED)) {
+ if (!test_bit(I40E_FLAG_VEB_MODE_ENA, pf->flags)) {
veb->bridge_mode = BRIDGE_MODE_VEPA;
- pf->flags &= ~I40E_FLAG_VEB_MODE_ENABLED;
+ clear_bit(I40E_FLAG_VEB_MODE_ENA, pf->flags);
}
i40e_config_bridge_mode(veb);
}
@@ -14588,12 +14540,16 @@ struct i40e_vsi *i40e_vsi_setup(struct i40e_pf *pf, u8 type,
break;
}
- if ((pf->hw_features & I40E_HW_RSS_AQ_CAPABLE) &&
- (vsi->type == I40E_VSI_VMDQ2)) {
+ if (test_bit(I40E_HW_CAP_RSS_AQ, pf->hw.caps) &&
+ vsi->type == I40E_VSI_VMDQ2) {
ret = i40e_vsi_config_rss(vsi);
+ if (ret)
+ goto err_config;
}
return vsi;
+err_config:
+ i40e_vsi_clear_rings(vsi);
err_rings:
i40e_vsi_free_q_vectors(vsi);
err_msix:
@@ -14831,7 +14787,7 @@ void i40e_veb_release(struct i40e_veb *veb)
static int i40e_add_veb(struct i40e_veb *veb, struct i40e_vsi *vsi)
{
struct i40e_pf *pf = veb->pf;
- bool enable_stats = !!(pf->flags & I40E_FLAG_VEB_STATS_ENABLED);
+ bool enable_stats = !!test_bit(I40E_FLAG_VEB_STATS_ENA, pf->flags);
int ret;
ret = i40e_aq_add_veb(&pf->hw, veb->uplink_seid, vsi->seid,
@@ -15131,7 +15087,7 @@ static int i40e_setup_pf_switch(struct i40e_pf *pf, bool reinit, bool lock_acqui
*/
if ((pf->hw.pf_id == 0) &&
- !(pf->flags & I40E_FLAG_TRUE_PROMISC_SUPPORT)) {
+ !test_bit(I40E_FLAG_TRUE_PROMISC_ENA, pf->flags)) {
flags = I40E_AQ_SET_SWITCH_CFG_PROMISC;
pf->last_sw_conf_flags = flags;
}
@@ -15198,7 +15154,7 @@ static int i40e_setup_pf_switch(struct i40e_pf *pf, bool reinit, bool lock_acqui
/* enable RSS in the HW, even for only one queue, as the stack can use
* the hash
*/
- if ((pf->flags & I40E_FLAG_RSS_ENABLED))
+ if (test_bit(I40E_FLAG_RSS_ENA, pf->flags))
i40e_pf_config_rss(pf);
/* fill in link information and enable LSE reporting */
@@ -15240,42 +15196,42 @@ static void i40e_determine_queue_usage(struct i40e_pf *pf)
queues_left = pf->hw.func_caps.num_tx_qp;
if ((queues_left == 1) ||
- !(pf->flags & I40E_FLAG_MSIX_ENABLED)) {
+ !test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) {
/* one qp for PF, no queues for anything else */
queues_left = 0;
pf->alloc_rss_size = pf->num_lan_qps = 1;
/* make sure all the fancies are disabled */
- pf->flags &= ~(I40E_FLAG_RSS_ENABLED |
- I40E_FLAG_IWARP_ENABLED |
- I40E_FLAG_FD_SB_ENABLED |
- I40E_FLAG_FD_ATR_ENABLED |
- I40E_FLAG_DCB_CAPABLE |
- I40E_FLAG_DCB_ENABLED |
- I40E_FLAG_SRIOV_ENABLED |
- I40E_FLAG_VMDQ_ENABLED);
- pf->flags |= I40E_FLAG_FD_SB_INACTIVE;
- } else if (!(pf->flags & (I40E_FLAG_RSS_ENABLED |
- I40E_FLAG_FD_SB_ENABLED |
- I40E_FLAG_FD_ATR_ENABLED |
- I40E_FLAG_DCB_CAPABLE))) {
+ clear_bit(I40E_FLAG_RSS_ENA, pf->flags);
+ clear_bit(I40E_FLAG_IWARP_ENA, pf->flags);
+ clear_bit(I40E_FLAG_FD_SB_ENA, pf->flags);
+ clear_bit(I40E_FLAG_FD_ATR_ENA, pf->flags);
+ clear_bit(I40E_FLAG_DCB_CAPABLE, pf->flags);
+ clear_bit(I40E_FLAG_DCB_ENA, pf->flags);
+ clear_bit(I40E_FLAG_SRIOV_ENA, pf->flags);
+ clear_bit(I40E_FLAG_VMDQ_ENA, pf->flags);
+ set_bit(I40E_FLAG_FD_SB_INACTIVE, pf->flags);
+ } else if (!test_bit(I40E_FLAG_RSS_ENA, pf->flags) &&
+ !test_bit(I40E_FLAG_FD_SB_ENA, pf->flags) &&
+ !test_bit(I40E_FLAG_FD_ATR_ENA, pf->flags) &&
+ !test_bit(I40E_FLAG_DCB_CAPABLE, pf->flags)) {
/* one qp for PF */
pf->alloc_rss_size = pf->num_lan_qps = 1;
queues_left -= pf->num_lan_qps;
- pf->flags &= ~(I40E_FLAG_RSS_ENABLED |
- I40E_FLAG_IWARP_ENABLED |
- I40E_FLAG_FD_SB_ENABLED |
- I40E_FLAG_FD_ATR_ENABLED |
- I40E_FLAG_DCB_ENABLED |
- I40E_FLAG_VMDQ_ENABLED);
- pf->flags |= I40E_FLAG_FD_SB_INACTIVE;
+ clear_bit(I40E_FLAG_RSS_ENA, pf->flags);
+ clear_bit(I40E_FLAG_IWARP_ENA, pf->flags);
+ clear_bit(I40E_FLAG_FD_SB_ENA, pf->flags);
+ clear_bit(I40E_FLAG_FD_ATR_ENA, pf->flags);
+ clear_bit(I40E_FLAG_DCB_ENA, pf->flags);
+ clear_bit(I40E_FLAG_VMDQ_ENA, pf->flags);
+ set_bit(I40E_FLAG_FD_SB_INACTIVE, pf->flags);
} else {
/* Not enough queues for all TCs */
- if ((pf->flags & I40E_FLAG_DCB_CAPABLE) &&
- (queues_left < I40E_MAX_TRAFFIC_CLASS)) {
- pf->flags &= ~(I40E_FLAG_DCB_CAPABLE |
- I40E_FLAG_DCB_ENABLED);
+ if (test_bit(I40E_FLAG_DCB_CAPABLE, pf->flags) &&
+ queues_left < I40E_MAX_TRAFFIC_CLASS) {
+ clear_bit(I40E_FLAG_DCB_CAPABLE, pf->flags);
+ clear_bit(I40E_FLAG_DCB_ENA, pf->flags);
dev_info(&pf->pdev->dev, "not enough queues for DCB. DCB is disabled.\n");
}
@@ -15288,24 +15244,24 @@ static void i40e_determine_queue_usage(struct i40e_pf *pf)
queues_left -= pf->num_lan_qps;
}
- if (pf->flags & I40E_FLAG_FD_SB_ENABLED) {
+ if (test_bit(I40E_FLAG_FD_SB_ENA, pf->flags)) {
if (queues_left > 1) {
queues_left -= 1; /* save 1 queue for FD */
} else {
- pf->flags &= ~I40E_FLAG_FD_SB_ENABLED;
- pf->flags |= I40E_FLAG_FD_SB_INACTIVE;
+ clear_bit(I40E_FLAG_FD_SB_ENA, pf->flags);
+ set_bit(I40E_FLAG_FD_SB_INACTIVE, pf->flags);
dev_info(&pf->pdev->dev, "not enough queues for Flow Director. Flow Director feature is disabled\n");
}
}
- if ((pf->flags & I40E_FLAG_SRIOV_ENABLED) &&
+ if (test_bit(I40E_FLAG_SRIOV_ENA, pf->flags) &&
pf->num_vf_qps && pf->num_req_vfs && queues_left) {
pf->num_req_vfs = min_t(int, pf->num_req_vfs,
(queues_left / pf->num_vf_qps));
queues_left -= (pf->num_req_vfs * pf->num_vf_qps);
}
- if ((pf->flags & I40E_FLAG_VMDQ_ENABLED) &&
+ if (test_bit(I40E_FLAG_VMDQ_ENA, pf->flags) &&
pf->num_vmdq_vsis && pf->num_vmdq_qps && queues_left) {
pf->num_vmdq_vsis = min_t(int, pf->num_vmdq_vsis,
(queues_left / pf->num_vmdq_qps));
@@ -15316,7 +15272,7 @@ static void i40e_determine_queue_usage(struct i40e_pf *pf)
dev_dbg(&pf->pdev->dev,
"qs_avail=%d FD SB=%d lan_qs=%d lan_tc0=%d vf=%d*%d vmdq=%d*%d, remaining=%d\n",
pf->hw.func_caps.num_tx_qp,
- !!(pf->flags & I40E_FLAG_FD_SB_ENABLED),
+ !!test_bit(I40E_FLAG_FD_SB_ENA, pf->flags),
pf->num_lan_qps, pf->alloc_rss_size, pf->num_req_vfs,
pf->num_vf_qps, pf->num_vmdq_vsis, pf->num_vmdq_qps,
queues_left);
@@ -15340,7 +15296,8 @@ static int i40e_setup_pf_filter_control(struct i40e_pf *pf)
settings->hash_lut_size = I40E_HASH_LUT_SIZE_128;
/* Flow Director is enabled */
- if (pf->flags & (I40E_FLAG_FD_SB_ENABLED | I40E_FLAG_FD_ATR_ENABLED))
+ if (test_bit(I40E_FLAG_FD_SB_ENA, pf->flags) ||
+ test_bit(I40E_FLAG_FD_ATR_ENA, pf->flags))
settings->enable_fdir = true;
/* Ethtype and MACVLAN filters enabled for PF */
@@ -15372,21 +15329,21 @@ static void i40e_print_features(struct i40e_pf *pf)
i += scnprintf(&buf[i], REMAIN(i), " VSIs: %d QP: %d",
pf->hw.func_caps.num_vsis,
pf->vsi[pf->lan_vsi]->num_queue_pairs);
- if (pf->flags & I40E_FLAG_RSS_ENABLED)
+ if (test_bit(I40E_FLAG_RSS_ENA, pf->flags))
i += scnprintf(&buf[i], REMAIN(i), " RSS");
- if (pf->flags & I40E_FLAG_FD_ATR_ENABLED)
+ if (test_bit(I40E_FLAG_FD_ATR_ENA, pf->flags))
i += scnprintf(&buf[i], REMAIN(i), " FD_ATR");
- if (pf->flags & I40E_FLAG_FD_SB_ENABLED) {
+ if (test_bit(I40E_FLAG_FD_SB_ENA, pf->flags)) {
i += scnprintf(&buf[i], REMAIN(i), " FD_SB");
i += scnprintf(&buf[i], REMAIN(i), " NTUPLE");
}
- if (pf->flags & I40E_FLAG_DCB_CAPABLE)
+ if (test_bit(I40E_FLAG_DCB_CAPABLE, pf->flags))
i += scnprintf(&buf[i], REMAIN(i), " DCB");
i += scnprintf(&buf[i], REMAIN(i), " VxLAN");
i += scnprintf(&buf[i], REMAIN(i), " Geneve");
- if (pf->flags & I40E_FLAG_PTP)
+ if (test_bit(I40E_FLAG_PTP_ENA, pf->flags))
i += scnprintf(&buf[i], REMAIN(i), " PTP");
- if (pf->flags & I40E_FLAG_VEB_MODE_ENABLED)
+ if (test_bit(I40E_FLAG_VEB_MODE_ENA, pf->flags))
i += scnprintf(&buf[i], REMAIN(i), " VEB");
else
i += scnprintf(&buf[i], REMAIN(i), " VEPA");
@@ -15417,22 +15374,26 @@ static void i40e_get_platform_mac_addr(struct pci_dev *pdev, struct i40e_pf *pf)
* @fec_cfg: FEC option to set in flags
* @flags: ptr to flags in which we set FEC option
**/
-void i40e_set_fec_in_flags(u8 fec_cfg, u32 *flags)
+void i40e_set_fec_in_flags(u8 fec_cfg, unsigned long *flags)
{
- if (fec_cfg & I40E_AQ_SET_FEC_AUTO)
- *flags |= I40E_FLAG_RS_FEC | I40E_FLAG_BASE_R_FEC;
+ if (fec_cfg & I40E_AQ_SET_FEC_AUTO) {
+ set_bit(I40E_FLAG_RS_FEC, flags);
+ set_bit(I40E_FLAG_BASE_R_FEC, flags);
+ }
if ((fec_cfg & I40E_AQ_SET_FEC_REQUEST_RS) ||
(fec_cfg & I40E_AQ_SET_FEC_ABILITY_RS)) {
- *flags |= I40E_FLAG_RS_FEC;
- *flags &= ~I40E_FLAG_BASE_R_FEC;
+ set_bit(I40E_FLAG_RS_FEC, flags);
+ clear_bit(I40E_FLAG_BASE_R_FEC, flags);
}
if ((fec_cfg & I40E_AQ_SET_FEC_REQUEST_KR) ||
(fec_cfg & I40E_AQ_SET_FEC_ABILITY_KR)) {
- *flags |= I40E_FLAG_BASE_R_FEC;
- *flags &= ~I40E_FLAG_RS_FEC;
+ set_bit(I40E_FLAG_BASE_R_FEC, flags);
+ clear_bit(I40E_FLAG_RS_FEC, flags);
}
- if (fec_cfg == 0)
- *flags &= ~(I40E_FLAG_RS_FEC | I40E_FLAG_BASE_R_FEC);
+ if (fec_cfg == 0) {
+ clear_bit(I40E_FLAG_RS_FEC, flags);
+ clear_bit(I40E_FLAG_BASE_R_FEC, flags);
+ }
}
/**
@@ -15858,15 +15819,15 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
hw->vendor_id, hw->device_id, hw->subsystem_vendor_id,
hw->subsystem_device_id);
- if (hw->aq.api_maj_ver == I40E_FW_API_VERSION_MAJOR &&
- hw->aq.api_min_ver > I40E_FW_MINOR_VERSION(hw))
+ if (i40e_is_aq_api_ver_ge(hw, I40E_FW_API_VERSION_MAJOR,
+ I40E_FW_MINOR_VERSION(hw) + 1))
dev_dbg(&pdev->dev,
"The driver for the device detected a newer version of the NVM image v%u.%u than v%u.%u.\n",
hw->aq.api_maj_ver,
hw->aq.api_min_ver,
I40E_FW_API_VERSION_MAJOR,
I40E_FW_MINOR_VERSION(hw));
- else if (hw->aq.api_maj_ver == 1 && hw->aq.api_min_ver < 4)
+ else if (i40e_is_aq_api_ver_lt(hw, 1, 4))
dev_info(&pdev->dev,
"The driver for the device detected an older version of the NVM image v%u.%u than expected v%u.%u. Please update the NVM image.\n",
hw->aq.api_maj_ver,
@@ -15913,7 +15874,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
* Ignore error return codes because if it was already disabled via
* hardware settings this will fail
*/
- if (pf->hw_features & I40E_HW_STOP_FW_LLDP) {
+ if (test_bit(I40E_HW_CAP_STOP_FW_LLDP, pf->hw.caps)) {
dev_info(&pdev->dev, "Stopping firmware LLDP agent.\n");
i40e_aq_stop_lldp(hw, true, false, NULL);
}
@@ -15930,7 +15891,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
ether_addr_copy(hw->mac.perm_addr, hw->mac.addr);
i40e_get_port_mac_addr(hw, hw->mac.port_addr);
if (is_valid_ether_addr(hw->mac.port_addr))
- pf->hw_features |= I40E_HW_PORT_ID_VALID;
+ set_bit(I40E_HW_CAP_PORT_ID_VALID, pf->hw.caps);
i40e_ptp_alloc_pins(pf);
pci_set_drvdata(pdev, pf);
@@ -15940,10 +15901,10 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
status = i40e_get_fw_lldp_status(&pf->hw, &lldp_status);
(!status &&
lldp_status == I40E_GET_FW_LLDP_STATUS_ENABLED) ?
- (pf->flags &= ~I40E_FLAG_DISABLE_FW_LLDP) :
- (pf->flags |= I40E_FLAG_DISABLE_FW_LLDP);
+ (clear_bit(I40E_FLAG_FW_LLDP_DIS, pf->flags)) :
+ (set_bit(I40E_FLAG_FW_LLDP_DIS, pf->flags));
dev_info(&pdev->dev,
- (pf->flags & I40E_FLAG_DISABLE_FW_LLDP) ?
+ test_bit(I40E_FLAG_FW_LLDP_DIS, pf->flags) ?
"FW LLDP is disabled\n" :
"FW LLDP is enabled\n");
@@ -15953,7 +15914,8 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
err = i40e_init_pf_dcb(pf);
if (err) {
dev_info(&pdev->dev, "DCB init failed %d, disabled\n", err);
- pf->flags &= ~(I40E_FLAG_DCB_CAPABLE | I40E_FLAG_DCB_ENABLED);
+ clear_bit(I40E_FLAG_DCB_CAPABLE, pf->flags);
+ clear_bit(I40E_FLAG_DCB_ENA, pf->flags);
/* Continue without DCB enabled */
}
#endif /* CONFIG_I40E_DCB */
@@ -16021,11 +15983,11 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
#ifdef CONFIG_PCI_IOV
/* prep for VF support */
- if ((pf->flags & I40E_FLAG_SRIOV_ENABLED) &&
- (pf->flags & I40E_FLAG_MSIX_ENABLED) &&
+ if (test_bit(I40E_FLAG_SRIOV_ENA, pf->flags) &&
+ test_bit(I40E_FLAG_MSIX_ENA, pf->flags) &&
!test_bit(__I40E_BAD_EEPROM, pf->state)) {
if (pci_num_vf(pdev))
- pf->flags |= I40E_FLAG_VEB_MODE_ENABLED;
+ set_bit(I40E_FLAG_VEB_MODE_ENA, pf->flags);
}
#endif
err = i40e_setup_pf_switch(pf, false, false);
@@ -16066,7 +16028,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
wr32(hw, I40E_REG_MSS, val);
}
- if (pf->hw_features & I40E_HW_RESTART_AUTONEG) {
+ if (test_bit(I40E_HW_CAP_RESTART_AUTONEG, pf->hw.caps)) {
msleep(75);
err = i40e_aq_set_link_restart_an(&pf->hw, true, NULL);
if (err)
@@ -16086,7 +16048,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
* the misc functionality and queue processing is combined in
* the same vector and that gets setup at open.
*/
- if (pf->flags & I40E_FLAG_MSIX_ENABLED) {
+ if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) {
err = i40e_setup_misc_vector(pf);
if (err) {
dev_info(&pdev->dev,
@@ -16099,8 +16061,8 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
#ifdef CONFIG_PCI_IOV
/* prep for VF support */
- if ((pf->flags & I40E_FLAG_SRIOV_ENABLED) &&
- (pf->flags & I40E_FLAG_MSIX_ENABLED) &&
+ if (test_bit(I40E_FLAG_SRIOV_ENA, pf->flags) &&
+ test_bit(I40E_FLAG_MSIX_ENA, pf->flags) &&
!test_bit(__I40E_BAD_EEPROM, pf->state)) {
/* disable link interrupts for VFs */
val = rd32(hw, I40E_PFGEN_PORTMDIO_NUM);
@@ -16120,7 +16082,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
}
#endif /* CONFIG_PCI_IOV */
- if (pf->flags & I40E_FLAG_IWARP_ENABLED) {
+ if (test_bit(I40E_FLAG_IWARP_ENA, pf->flags)) {
pf->iwarp_base_vector = i40e_get_lump(pf, pf->irq_pile,
pf->num_iwarp_msix,
I40E_IWARP_IRQ_PILE_ID);
@@ -16128,7 +16090,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
dev_info(&pdev->dev,
"failed to get tracking for %d vectors for IWARP err=%d\n",
pf->num_iwarp_msix, pf->iwarp_base_vector);
- pf->flags &= ~I40E_FLAG_IWARP_ENABLED;
+ clear_bit(I40E_FLAG_IWARP_ENA, pf->flags);
}
}
@@ -16142,7 +16104,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
round_jiffies(jiffies + pf->service_timer_period));
/* add this PF to client device list and launch a client service task */
- if (pf->flags & I40E_FLAG_IWARP_ENABLED) {
+ if (test_bit(I40E_FLAG_IWARP_ENA, pf->flags)) {
err = i40e_lan_add_device(pf);
if (err)
dev_info(&pdev->dev, "Failed to add PF to client API service list: %d\n",
@@ -16155,7 +16117,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
* and will report PCI Gen 1 x 1 by default so don't bother
* checking them.
*/
- if (!(pf->hw_features & I40E_HW_NO_PCI_LINK_CHECK)) {
+ if (!test_bit(I40E_HW_CAP_NO_PCI_LINK_CHECK, pf->hw.caps)) {
char speed[PCI_SPEED_SIZE] = "Unknown";
char width[PCI_WIDTH_SIZE] = "Unknown";
@@ -16209,7 +16171,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
pf->hw.phy.link_info.requested_speeds = abilities.link_speed;
/* set the FEC config due to the board capabilities */
- i40e_set_fec_in_flags(abilities.fec_cfg_curr_mod_ext_info, &pf->flags);
+ i40e_set_fec_in_flags(abilities.fec_cfg_curr_mod_ext_info, pf->flags);
/* get the supported phy types from the fw */
err = i40e_aq_get_phy_capabilities(hw, false, true, &abilities, NULL);
@@ -16236,10 +16198,10 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
pf->main_vsi_seid);
if ((pf->hw.device_id == I40E_DEV_ID_10G_BASE_T) ||
- (pf->hw.device_id == I40E_DEV_ID_10G_BASE_T4))
- pf->hw_features |= I40E_HW_PHY_CONTROLS_LEDS;
+ (pf->hw.device_id == I40E_DEV_ID_10G_BASE_T4))
+ set_bit(I40E_HW_CAP_PHY_CONTROLS_LEDS, pf->hw.caps);
if (pf->hw.device_id == I40E_DEV_ID_SFP_I_X722)
- pf->hw_features |= I40E_HW_HAVE_CRT_RETIMER;
+ set_bit(I40E_HW_CAP_CRT_RETIMER, pf->hw.caps);
/* print a string summarizing features */
i40e_print_features(pf);
@@ -16308,10 +16270,10 @@ static void i40e_remove(struct pci_dev *pdev)
usleep_range(1000, 2000);
set_bit(__I40E_IN_REMOVE, pf->state);
- if (pf->flags & I40E_FLAG_SRIOV_ENABLED) {
+ if (test_bit(I40E_FLAG_SRIOV_ENA, pf->flags)) {
set_bit(__I40E_VF_RESETS_DISABLED, pf->state);
i40e_free_vfs(pf);
- pf->flags &= ~I40E_FLAG_SRIOV_ENABLED;
+ clear_bit(I40E_FLAG_SRIOV_ENA, pf->flags);
}
/* no more scheduling of any task */
set_bit(__I40E_SUSPENDED, pf->state);
@@ -16366,7 +16328,7 @@ static void i40e_remove(struct pci_dev *pdev)
i40e_cloud_filter_exit(pf);
/* remove attached clients */
- if (pf->flags & I40E_FLAG_IWARP_ENABLED) {
+ if (test_bit(I40E_FLAG_IWARP_ENA, pf->flags)) {
ret_code = i40e_lan_del_device(pf);
if (ret_code)
dev_warn(&pdev->dev, "Failed to delete client device: %d\n",
@@ -16385,7 +16347,7 @@ static void i40e_remove(struct pci_dev *pdev)
unmap:
/* Free MSI/legacy interrupt 0 when in recovery mode. */
if (test_bit(__I40E_RECOVERY_MODE, pf->state) &&
- !(pf->flags & I40E_FLAG_MSIX_ENABLED))
+ !test_bit(I40E_FLAG_MSIX_ENA, pf->flags))
free_irq(pf->pdev->irq, pf);
/* shutdown the adminq */
@@ -16601,7 +16563,8 @@ static void i40e_shutdown(struct pci_dev *pdev)
*/
i40e_notify_client_of_netdev_close(pf->vsi[pf->lan_vsi], false);
- if (pf->wol_en && (pf->hw_features & I40E_HW_WOL_MC_MAGIC_PKT_WAKE))
+ if (test_bit(I40E_HW_CAP_WOL_MC_MAGIC_PKT_WAKE, pf->hw.caps) &&
+ pf->wol_en)
i40e_enable_mc_magic_wake(pf);
i40e_prep_for_reset(pf);
@@ -16613,7 +16576,7 @@ static void i40e_shutdown(struct pci_dev *pdev)
/* Free MSI/legacy interrupt 0 when in recovery mode. */
if (test_bit(__I40E_RECOVERY_MODE, pf->state) &&
- !(pf->flags & I40E_FLAG_MSIX_ENABLED))
+ !test_bit(I40E_FLAG_MSIX_ENA, pf->flags))
free_irq(pf->pdev->irq, pf);
/* Since we're going to destroy queues during the
@@ -16654,7 +16617,8 @@ static int __maybe_unused i40e_suspend(struct device *dev)
*/
i40e_notify_client_of_netdev_close(pf->vsi[pf->lan_vsi], false);
- if (pf->wol_en && (pf->hw_features & I40E_HW_WOL_MC_MAGIC_PKT_WAKE))
+ if (test_bit(I40E_HW_CAP_WOL_MC_MAGIC_PKT_WAKE, pf->hw.caps) &&
+ pf->wol_en)
i40e_enable_mc_magic_wake(pf);
/* Since we're going to destroy queues during the
diff --git a/drivers/net/ethernet/intel/i40e/i40e_nvm.c b/drivers/net/ethernet/intel/i40e/i40e_nvm.c
index 77cdbfc1..62eb348 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_nvm.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_nvm.c
@@ -291,7 +291,7 @@ static int i40e_read_nvm_word_aq(struct i40e_hw *hw, u16 offset,
static int __i40e_read_nvm_word(struct i40e_hw *hw,
u16 offset, u16 *data)
{
- if (hw->flags & I40E_HW_FLAG_AQ_SRCTL_ACCESS_ENABLE)
+ if (test_bit(I40E_HW_CAP_AQ_SRCTL_ACCESS_ENABLE, hw->caps))
return i40e_read_nvm_word_aq(hw, offset, data);
return i40e_read_nvm_word_srctl(hw, offset, data);
@@ -310,14 +310,14 @@ int i40e_read_nvm_word(struct i40e_hw *hw, u16 offset,
{
int ret_code = 0;
- if (hw->flags & I40E_HW_FLAG_NVM_READ_REQUIRES_LOCK)
+ if (test_bit(I40E_HW_CAP_NVM_READ_REQUIRES_LOCK, hw->caps))
ret_code = i40e_acquire_nvm(hw, I40E_RESOURCE_READ);
if (ret_code)
return ret_code;
ret_code = __i40e_read_nvm_word(hw, offset, data);
- if (hw->flags & I40E_HW_FLAG_NVM_READ_REQUIRES_LOCK)
+ if (test_bit(I40E_HW_CAP_NVM_READ_REQUIRES_LOCK, hw->caps))
i40e_release_nvm(hw);
return ret_code;
@@ -499,7 +499,7 @@ static int __i40e_read_nvm_buffer(struct i40e_hw *hw,
u16 offset, u16 *words,
u16 *data)
{
- if (hw->flags & I40E_HW_FLAG_AQ_SRCTL_ACCESS_ENABLE)
+ if (test_bit(I40E_HW_CAP_AQ_SRCTL_ACCESS_ENABLE, hw->caps))
return i40e_read_nvm_buffer_aq(hw, offset, words, data);
return i40e_read_nvm_buffer_srctl(hw, offset, words, data);
@@ -521,7 +521,7 @@ int i40e_read_nvm_buffer(struct i40e_hw *hw, u16 offset,
{
int ret_code = 0;
- if (hw->flags & I40E_HW_FLAG_AQ_SRCTL_ACCESS_ENABLE) {
+ if (test_bit(I40E_HW_CAP_AQ_SRCTL_ACCESS_ENABLE, hw->caps)) {
ret_code = i40e_acquire_nvm(hw, I40E_RESOURCE_READ);
if (!ret_code) {
ret_code = i40e_read_nvm_buffer_aq(hw, offset, words,
diff --git a/drivers/net/ethernet/intel/i40e/i40e_prototype.h b/drivers/net/ethernet/intel/i40e/i40e_prototype.h
index 0011620..af42693 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_prototype.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_prototype.h
@@ -501,4 +501,74 @@ i40e_add_pinfo_to_list(struct i40e_hw *hw,
/* i40e_ddp */
int i40e_ddp_flash(struct net_device *netdev, struct ethtool_flash *flash);
+/* Firmware and AdminQ version check helpers */
+
+/**
+ * i40e_is_aq_api_ver_ge
+ * @hw: pointer to i40e_hw structure
+ * @maj: API major value to compare
+ * @min: API minor value to compare
+ *
+ * Assert whether current HW API version is greater/equal than provided.
+ **/
+static inline bool i40e_is_aq_api_ver_ge(struct i40e_hw *hw, u16 maj, u16 min)
+{
+ return (hw->aq.api_maj_ver > maj ||
+ (hw->aq.api_maj_ver == maj && hw->aq.api_min_ver >= min));
+}
+
+/**
+ * i40e_is_aq_api_ver_lt
+ * @hw: pointer to i40e_hw structure
+ * @maj: API major value to compare
+ * @min: API minor value to compare
+ *
+ * Assert whether current HW API version is less than provided.
+ **/
+static inline bool i40e_is_aq_api_ver_lt(struct i40e_hw *hw, u16 maj, u16 min)
+{
+ return !i40e_is_aq_api_ver_ge(hw, maj, min);
+}
+
+/**
+ * i40e_is_fw_ver_ge
+ * @hw: pointer to i40e_hw structure
+ * @maj: API major value to compare
+ * @min: API minor value to compare
+ *
+ * Assert whether current firmware version is greater/equal than provided.
+ **/
+static inline bool i40e_is_fw_ver_ge(struct i40e_hw *hw, u16 maj, u16 min)
+{
+ return (hw->aq.fw_maj_ver > maj ||
+ (hw->aq.fw_maj_ver == maj && hw->aq.fw_min_ver >= min));
+}
+
+/**
+ * i40e_is_fw_ver_lt
+ * @hw: pointer to i40e_hw structure
+ * @maj: API major value to compare
+ * @min: API minor value to compare
+ *
+ * Assert whether current firmware version is less than provided.
+ **/
+static inline bool i40e_is_fw_ver_lt(struct i40e_hw *hw, u16 maj, u16 min)
+{
+ return !i40e_is_fw_ver_ge(hw, maj, min);
+}
+
+/**
+ * i40e_is_fw_ver_eq
+ * @hw: pointer to i40e_hw structure
+ * @maj: API major value to compare
+ * @min: API minor value to compare
+ *
+ * Assert whether current firmware version is equal to provided.
+ **/
+static inline bool i40e_is_fw_ver_eq(struct i40e_hw *hw, u16 maj, u16 min)
+{
+ return (hw->aq.fw_maj_ver > maj ||
+ (hw->aq.fw_maj_ver == maj && hw->aq.fw_min_ver == min));
+}
+
#endif /* _I40E_PROTOTYPE_H_ */
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ptp.c b/drivers/net/ethernet/intel/i40e/i40e_ptp.c
index 20b7739..1cf993a 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ptp.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ptp.c
@@ -35,7 +35,7 @@ enum i40e_ptp_pin {
GPIO_4
};
-enum i40e_can_set_pins_t {
+enum i40e_can_set_pins {
CANT_DO_PINS = -1,
CAN_SET_PINS,
CAN_DO_PINS
@@ -193,7 +193,7 @@ static bool i40e_is_ptp_pin_dev(struct i40e_hw *hw)
* return CAN_DO_PINS if pins can be manipulated within a NIC or
* return CANT_DO_PINS otherwise.
**/
-static enum i40e_can_set_pins_t i40e_can_set_pins(struct i40e_pf *pf)
+static enum i40e_can_set_pins i40e_can_set_pins(struct i40e_pf *pf)
{
if (!i40e_is_ptp_pin_dev(&pf->hw)) {
dev_warn(&pf->pdev->dev,
@@ -680,7 +680,7 @@ void i40e_ptp_rx_hang(struct i40e_pf *pf)
* configured. We don't want to spuriously warn about Rx timestamp
* hangs if we don't care about the timestamps.
*/
- if (!(pf->flags & I40E_FLAG_PTP) || !pf->ptp_rx)
+ if (!test_bit(I40E_FLAG_PTP_ENA, pf->flags) || !pf->ptp_rx)
return;
spin_lock_bh(&pf->ptp_rx_lock);
@@ -733,7 +733,7 @@ void i40e_ptp_tx_hang(struct i40e_pf *pf)
{
struct sk_buff *skb;
- if (!(pf->flags & I40E_FLAG_PTP) || !pf->ptp_tx)
+ if (!test_bit(I40E_FLAG_PTP_ENA, pf->flags) || !pf->ptp_tx)
return;
/* Nothing to do if we're not already waiting for a timestamp */
@@ -771,7 +771,7 @@ void i40e_ptp_tx_hwtstamp(struct i40e_pf *pf)
u32 hi, lo;
u64 ns;
- if (!(pf->flags & I40E_FLAG_PTP) || !pf->ptp_tx)
+ if (!test_bit(I40E_FLAG_PTP_ENA, pf->flags) || !pf->ptp_tx)
return;
/* don't attempt to timestamp if we don't have an skb */
@@ -818,7 +818,7 @@ void i40e_ptp_rx_hwtstamp(struct i40e_pf *pf, struct sk_buff *skb, u8 index)
/* Since we cannot turn off the Rx timestamp logic if the device is
* doing Tx timestamping, check if Rx timestamping is configured.
*/
- if (!(pf->flags & I40E_FLAG_PTP) || !pf->ptp_rx)
+ if (!test_bit(I40E_FLAG_PTP_ENA, pf->flags) || !pf->ptp_rx)
return;
hw = &pf->hw;
@@ -924,7 +924,7 @@ int i40e_ptp_get_ts_config(struct i40e_pf *pf, struct ifreq *ifr)
{
struct hwtstamp_config *config = &pf->tstamp_config;
- if (!(pf->flags & I40E_FLAG_PTP))
+ if (!test_bit(I40E_FLAG_PTP_ENA, pf->flags))
return -EOPNOTSUPP;
return copy_to_user(ifr->ifr_data, config, sizeof(*config)) ?
@@ -1071,7 +1071,7 @@ static void i40e_ptp_set_pins_hw(struct i40e_pf *pf)
static int i40e_ptp_set_pins(struct i40e_pf *pf,
struct i40e_ptp_pins_settings *pins)
{
- enum i40e_can_set_pins_t pin_caps = i40e_can_set_pins(pf);
+ enum i40e_can_set_pins pin_caps = i40e_can_set_pins(pf);
int i = 0;
if (pin_caps == CANT_DO_PINS)
@@ -1211,7 +1211,7 @@ static int i40e_ptp_set_timestamp_mode(struct i40e_pf *pf,
case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
- if (!(pf->hw_features & I40E_HW_PTP_L4_CAPABLE))
+ if (!test_bit(I40E_HW_CAP_PTP_L4, pf->hw.caps))
return -ERANGE;
pf->ptp_rx = true;
tsyntype = I40E_PRTTSYN_CTL1_V1MESSTYPE0_MASK |
@@ -1225,7 +1225,7 @@ static int i40e_ptp_set_timestamp_mode(struct i40e_pf *pf,
case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
- if (!(pf->hw_features & I40E_HW_PTP_L4_CAPABLE))
+ if (!test_bit(I40E_HW_CAP_PTP_L4, pf->hw.caps))
return -ERANGE;
fallthrough;
case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
@@ -1234,7 +1234,7 @@ static int i40e_ptp_set_timestamp_mode(struct i40e_pf *pf,
pf->ptp_rx = true;
tsyntype = I40E_PRTTSYN_CTL1_V2MESSTYPE0_MASK |
I40E_PRTTSYN_CTL1_TSYNTYPE_V2;
- if (pf->hw_features & I40E_HW_PTP_L4_CAPABLE) {
+ if (test_bit(I40E_HW_CAP_PTP_L4, pf->hw.caps)) {
tsyntype |= I40E_PRTTSYN_CTL1_UDP_ENA_MASK;
config->rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
} else {
@@ -1308,7 +1308,7 @@ int i40e_ptp_set_ts_config(struct i40e_pf *pf, struct ifreq *ifr)
struct hwtstamp_config config;
int err;
- if (!(pf->flags & I40E_FLAG_PTP))
+ if (!test_bit(I40E_FLAG_PTP_ENA, pf->flags))
return -EOPNOTSUPP;
if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
@@ -1426,7 +1426,7 @@ static long i40e_ptp_create_clock(struct i40e_pf *pf)
void i40e_ptp_save_hw_time(struct i40e_pf *pf)
{
/* don't try to access the PTP clock if it's not enabled */
- if (!(pf->flags & I40E_FLAG_PTP))
+ if (!test_bit(I40E_FLAG_PTP_ENA, pf->flags))
return;
i40e_ptp_gettimex(&pf->ptp_caps, &pf->ptp_prev_hw_time, NULL);
@@ -1483,7 +1483,7 @@ void i40e_ptp_init(struct i40e_pf *pf)
pf_id = (rd32(hw, I40E_PRTTSYN_CTL0) & I40E_PRTTSYN_CTL0_PF_ID_MASK) >>
I40E_PRTTSYN_CTL0_PF_ID_SHIFT;
if (hw->pf_id != pf_id) {
- pf->flags &= ~I40E_FLAG_PTP;
+ clear_bit(I40E_FLAG_PTP_ENA, pf->flags);
dev_info(&pf->pdev->dev, "%s: PTP not supported on %s\n",
__func__,
netdev->name);
@@ -1504,7 +1504,7 @@ void i40e_ptp_init(struct i40e_pf *pf)
if (pf->hw.debug_mask & I40E_DEBUG_LAN)
dev_info(&pf->pdev->dev, "PHC enabled\n");
- pf->flags |= I40E_FLAG_PTP;
+ set_bit(I40E_FLAG_PTP_ENA, pf->flags);
/* Ensure the clocks are running. */
regval = rd32(hw, I40E_PRTTSYN_CTL0);
@@ -1539,7 +1539,7 @@ void i40e_ptp_stop(struct i40e_pf *pf)
struct i40e_hw *hw = &pf->hw;
u32 regval;
- pf->flags &= ~I40E_FLAG_PTP;
+ clear_bit(I40E_FLAG_PTP_ENA, pf->flags);
pf->ptp_tx = false;
pf->ptp_rx = false;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_register.h b/drivers/net/ethernet/intel/i40e/i40e_register.h
index f408fcf..d561687 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_register.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_register.h
@@ -899,6 +899,7 @@
#define I40E_GLQF_ORT_FLX_PAYLOAD_SHIFT 7
#define I40E_GLQF_ORT_FLX_PAYLOAD_MASK I40E_MASK(0x1, I40E_GLQF_ORT_FLX_PAYLOAD_SHIFT)
#define I40E_GLQF_FDEVICTENA(_i) (0x00270384 + ((_i) * 4)) /* _i=0...1 */ /* Reset: CORER */
+#define I40E_FDEVICT_PCTYPE_DEFAULT 0xc03
/* Redefined for X722 family */
#define I40E_GLGEN_STAT_CLEAR 0x00390004 /* Reset: CORER */
#endif /* _I40E_REGISTER_H_ */
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index dd410b1..b82df5b 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -464,7 +464,7 @@ static int i40e_add_del_fdir_tcp(struct i40e_vsi *vsi,
&pf->fd_tcp6_filter_cnt);
if (add) {
- if ((pf->flags & I40E_FLAG_FD_ATR_ENABLED) &&
+ if (test_bit(I40E_FLAG_FD_ATR_ENA, pf->flags) &&
I40E_DEBUG_FD & pf->hw.debug_mask)
dev_info(&pf->pdev->dev, "Forcing ATR off, sideband rules for TCP/IPv4 flow being applied\n");
set_bit(__I40E_FD_ATR_AUTO_DISABLED, pf->state);
@@ -734,7 +734,7 @@ static void i40e_fd_handle_status(struct i40e_ring *rx_ring, u64 qword0_raw,
* FD ATR/SB and then re-enable it when there is room.
*/
if (fcnt_prog >= (fcnt_avail - I40E_FDIR_BUFFER_FULL_MARGIN)) {
- if ((pf->flags & I40E_FLAG_FD_SB_ENABLED) &&
+ if (test_bit(I40E_FLAG_FD_SB_ENA, pf->flags) &&
!test_and_set_bit(__I40E_FD_SB_AUTO_DISABLED,
pf->state))
if (I40E_DEBUG_FD & pf->hw.debug_mask)
@@ -1071,7 +1071,7 @@ static void i40e_enable_wb_on_itr(struct i40e_vsi *vsi,
if (q_vector->arm_wb_state)
return;
- if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED) {
+ if (test_bit(I40E_FLAG_MSIX_ENA, vsi->back->flags)) {
val = I40E_PFINT_DYN_CTLN_WB_ON_ITR_MASK |
I40E_PFINT_DYN_CTLN_ITR_INDX_MASK; /* set noitr */
@@ -1095,7 +1095,7 @@ static void i40e_enable_wb_on_itr(struct i40e_vsi *vsi,
**/
void i40e_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector)
{
- if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED) {
+ if (test_bit(I40E_FLAG_MSIX_ENA, vsi->back->flags)) {
u32 val = I40E_PFINT_DYN_CTLN_INTENA_MASK |
I40E_PFINT_DYN_CTLN_ITR_INDX_MASK | /* set noitr */
I40E_PFINT_DYN_CTLN_SWINT_TRIG_MASK |
@@ -2699,7 +2699,7 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi,
u32 intval;
/* If we don't have MSIX, then we only need to re-enable icr0 */
- if (!(vsi->back->flags & I40E_FLAG_MSIX_ENABLED)) {
+ if (!test_bit(I40E_FLAG_MSIX_ENA, vsi->back->flags)) {
i40e_irq_dynamic_enable_icr0(vsi->back);
return;
}
@@ -2888,7 +2888,7 @@ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb,
u16 i;
/* make sure ATR is enabled */
- if (!(pf->flags & I40E_FLAG_FD_ATR_ENABLED))
+ if (!test_bit(I40E_FLAG_FD_ATR_ENA, pf->flags))
return;
if (test_bit(__I40E_FD_ATR_AUTO_DISABLED, pf->state))
@@ -2933,7 +2933,7 @@ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb,
/* Due to lack of space, no more new filters can be programmed */
if (th->syn && test_bit(__I40E_FD_ATR_AUTO_DISABLED, pf->state))
return;
- if (pf->flags & I40E_FLAG_HW_ATR_EVICT_ENABLED) {
+ if (test_bit(I40E_FLAG_HW_ATR_EVICT_ENA, pf->flags)) {
/* HW ATR eviction will take care of removing filters on FIN
* and RST packets.
*/
@@ -2995,7 +2995,7 @@ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb,
I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) &
I40E_TXD_FLTR_QW1_CNTINDEX_MASK;
- if (pf->flags & I40E_FLAG_HW_ATR_EVICT_ENABLED)
+ if (test_bit(I40E_FLAG_HW_ATR_EVICT_ENA, pf->flags))
dtype_cmd |= I40E_TXD_FLTR_QW1_ATR_MASK;
fdir_desc->qindex_flex_ptype_vsi = cpu_to_le32(flex_ptype);
@@ -3053,7 +3053,7 @@ static inline int i40e_tx_prepare_vlan_flags(struct sk_buff *skb,
tx_flags |= I40E_TX_FLAGS_SW_VLAN;
}
- if (!(tx_ring->vsi->back->flags & I40E_FLAG_DCB_ENABLED))
+ if (!test_bit(I40E_FLAG_DCB_ENA, tx_ring->vsi->back->flags))
goto out;
/* Insert 802.1p priority into VLAN header */
@@ -3229,7 +3229,7 @@ static int i40e_tsyn(struct i40e_ring *tx_ring, struct sk_buff *skb,
* we are not already transmitting a packet to be timestamped
*/
pf = i40e_netdev_to_pf(tx_ring->netdev);
- if (!(pf->flags & I40E_FLAG_PTP))
+ if (!test_bit(I40E_FLAG_PTP_ENA, pf->flags))
return 0;
if (pf->ptp_tx &&
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
index 421fe56..abf1506 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
@@ -58,7 +58,7 @@ static inline u16 i40e_intrl_usec_to_reg(int intrl)
* mentioning ITR_INDX, ITR_NONE cannot be used as an index 'n' into any
* register but instead is a special value meaning "don't update" ITR0/1/2.
*/
-enum i40e_dyn_idx_t {
+enum i40e_dyn_idx {
I40E_IDX_ITR0 = 0,
I40E_IDX_ITR1 = 1,
I40E_IDX_ITR2 = 2,
@@ -92,8 +92,8 @@ enum i40e_dyn_idx_t {
BIT_ULL(I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP))
#define i40e_pf_get_default_rss_hena(pf) \
- (((pf)->hw_features & I40E_HW_MULTIPLE_TCP_UDP_RSS_PCTYPE) ? \
- I40E_DEFAULT_RSS_HENA_EXPANDED : I40E_DEFAULT_RSS_HENA)
+ (test_bit(I40E_HW_CAP_MULTI_TCP_UDP_RSS_PCTYPE, (pf)->hw.caps) ? \
+ I40E_DEFAULT_RSS_HENA_EXPANDED : I40E_DEFAULT_RSS_HENA)
/* Supported Rx Buffer Sizes (a multiple of 128) */
#define I40E_RXBUFFER_256 256
@@ -306,7 +306,7 @@ struct i40e_rx_queue_stats {
u64 page_busy_count;
};
-enum i40e_ring_state_t {
+enum i40e_ring_state {
__I40E_TX_FDIR_INIT_DONE,
__I40E_TX_XPS_INIT_DONE,
__I40E_RING_STATE_NBITS /* must be last */
diff --git a/drivers/net/ethernet/intel/i40e/i40e_type.h b/drivers/net/ethernet/intel/i40e/i40e_type.h
index aff6dc6..de69c2e 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_type.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_type.h
@@ -64,9 +64,7 @@ typedef void (*I40E_ADMINQ_CALLBACK)(struct i40e_hw *, struct i40e_aq_desc *);
enum i40e_mac_type {
I40E_MAC_UNKNOWN = 0,
I40E_MAC_XL710,
- I40E_MAC_VF,
I40E_MAC_X722,
- I40E_MAC_X722_VF,
I40E_MAC_GENERIC,
};
@@ -272,9 +270,7 @@ struct i40e_mac_info {
enum i40e_mac_type type;
u8 addr[ETH_ALEN];
u8 perm_addr[ETH_ALEN];
- u8 san_addr[ETH_ALEN];
u8 port_addr[ETH_ALEN];
- u16 max_fcoeq;
};
enum i40e_aq_resources_ids {
@@ -482,6 +478,36 @@ struct i40e_dcbx_config {
struct i40e_dcb_app_priority_table app[I40E_DCBX_MAX_APPS];
};
+enum i40e_hw_flags {
+ I40E_HW_CAP_AQ_SRCTL_ACCESS_ENABLE,
+ I40E_HW_CAP_802_1AD,
+ I40E_HW_CAP_AQ_PHY_ACCESS,
+ I40E_HW_CAP_NVM_READ_REQUIRES_LOCK,
+ I40E_HW_CAP_FW_LLDP_STOPPABLE,
+ I40E_HW_CAP_FW_LLDP_PERSISTENT,
+ I40E_HW_CAP_AQ_PHY_ACCESS_EXTENDED,
+ I40E_HW_CAP_X722_FEC_REQUEST,
+ I40E_HW_CAP_RSS_AQ,
+ I40E_HW_CAP_128_QP_RSS,
+ I40E_HW_CAP_ATR_EVICT,
+ I40E_HW_CAP_WB_ON_ITR,
+ I40E_HW_CAP_MULTI_TCP_UDP_RSS_PCTYPE,
+ I40E_HW_CAP_NO_PCI_LINK_CHECK,
+ I40E_HW_CAP_100M_SGMII,
+ I40E_HW_CAP_NO_DCB_SUPPORT,
+ I40E_HW_CAP_USE_SET_LLDP_MIB,
+ I40E_HW_CAP_GENEVE_OFFLOAD,
+ I40E_HW_CAP_PTP_L4,
+ I40E_HW_CAP_WOL_MC_MAGIC_PKT_WAKE,
+ I40E_HW_CAP_CRT_RETIMER,
+ I40E_HW_CAP_OUTER_UDP_CSUM,
+ I40E_HW_CAP_PHY_CONTROLS_LEDS,
+ I40E_HW_CAP_STOP_FW_LLDP,
+ I40E_HW_CAP_PORT_ID_VALID,
+ I40E_HW_CAP_RESTART_AUTONEG,
+ I40E_HW_CAPS_NBITS,
+};
+
/* Port hardware description */
struct i40e_hw {
u8 __iomem *hw_addr;
@@ -546,16 +572,7 @@ struct i40e_hw {
struct i40e_dcbx_config remote_dcbx_config; /* Peer Cfg */
struct i40e_dcbx_config desired_dcbx_config; /* CEE Desired Cfg */
-#define I40E_HW_FLAG_AQ_SRCTL_ACCESS_ENABLE BIT_ULL(0)
-#define I40E_HW_FLAG_802_1AD_CAPABLE BIT_ULL(1)
-#define I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE BIT_ULL(2)
-#define I40E_HW_FLAG_NVM_READ_REQUIRES_LOCK BIT_ULL(3)
-#define I40E_HW_FLAG_FW_LLDP_STOPPABLE BIT_ULL(4)
-#define I40E_HW_FLAG_FW_LLDP_PERSISTENT BIT_ULL(5)
-#define I40E_HW_FLAG_AQ_PHY_ACCESS_EXTENDED BIT_ULL(6)
-#define I40E_HW_FLAG_DROP_MODE BIT_ULL(7)
-#define I40E_HW_FLAG_X722_FEC_REQUEST_CAPABLE BIT_ULL(8)
- u64 flags;
+ DECLARE_BITMAP(caps, I40E_HW_CAPS_NBITS);
/* Used in set switch config AQ command */
u16 switch_tag;
@@ -567,12 +584,6 @@ struct i40e_hw {
char err_str[16];
};
-static inline bool i40e_is_vf(struct i40e_hw *hw)
-{
- return (hw->mac.type == I40E_MAC_VF ||
- hw->mac.type == I40E_MAC_X722_VF);
-}
-
struct i40e_driver_version {
u8 major_version;
u8 minor_version;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index 3f99eb1..37cca48 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -1808,7 +1808,7 @@ int i40e_alloc_vfs(struct i40e_pf *pf, u16 num_alloc_vfs)
if (pci_num_vf(pf->pdev) != num_alloc_vfs) {
ret = pci_enable_sriov(pf->pdev, num_alloc_vfs);
if (ret) {
- pf->flags &= ~I40E_FLAG_VEB_MODE_ENABLED;
+ clear_bit(I40E_FLAG_VEB_MODE_ENA, pf->flags);
pf->num_alloc_vfs = 0;
goto err_iov;
}
@@ -1919,8 +1919,8 @@ int i40e_pci_sriov_configure(struct pci_dev *pdev, int num_vfs)
}
if (num_vfs) {
- if (!(pf->flags & I40E_FLAG_VEB_MODE_ENABLED)) {
- pf->flags |= I40E_FLAG_VEB_MODE_ENABLED;
+ if (!test_bit(I40E_FLAG_VEB_MODE_ENA, pf->flags)) {
+ set_bit(I40E_FLAG_VEB_MODE_ENA, pf->flags);
i40e_do_reset_safe(pf, I40E_PF_RESET_AND_REBUILD_FLAG);
}
ret = i40e_pci_sriov_enable(pdev, num_vfs);
@@ -1929,7 +1929,7 @@ int i40e_pci_sriov_configure(struct pci_dev *pdev, int num_vfs)
if (!pci_vfs_assigned(pf->pdev)) {
i40e_free_vfs(pf);
- pf->flags &= ~I40E_FLAG_VEB_MODE_ENABLED;
+ clear_bit(I40E_FLAG_VEB_MODE_ENA, pf->flags);
i40e_do_reset_safe(pf, I40E_PF_RESET_AND_REBUILD_FLAG);
} else {
dev_warn(&pdev->dev, "Unable to free VFs because some are assigned to VMs.\n");
@@ -2137,14 +2137,14 @@ static int i40e_vc_get_vf_resources_msg(struct i40e_vf *vf, u8 *msg)
if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RSS_PF) {
vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_RSS_PF;
} else {
- if ((pf->hw_features & I40E_HW_RSS_AQ_CAPABLE) &&
+ if (test_bit(I40E_HW_CAP_RSS_AQ, pf->hw.caps) &&
(vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RSS_AQ))
vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_RSS_AQ;
else
vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_RSS_REG;
}
- if (pf->hw_features & I40E_HW_MULTIPLE_TCP_UDP_RSS_PCTYPE) {
+ if (test_bit(I40E_HW_CAP_MULTI_TCP_UDP_RSS_PCTYPE, pf->hw.caps)) {
if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2)
vfres->vf_cap_flags |=
VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2;
@@ -2153,12 +2153,12 @@ static int i40e_vc_get_vf_resources_msg(struct i40e_vf *vf, u8 *msg)
if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_ENCAP)
vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_ENCAP;
- if ((pf->hw_features & I40E_HW_OUTER_UDP_CSUM_CAPABLE) &&
+ if (test_bit(I40E_HW_CAP_OUTER_UDP_CSUM, pf->hw.caps) &&
(vf->driver_caps & VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM))
vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM;
if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RX_POLLING) {
- if (pf->flags & I40E_FLAG_MFP_ENABLED) {
+ if (test_bit(I40E_FLAG_MFP_ENA, pf->flags)) {
dev_err(&pf->pdev->dev,
"VF %d requested polling mode: this feature is supported only when the device is running in single function per port (SFP) mode\n",
vf->vf_id);
@@ -2168,7 +2168,7 @@ static int i40e_vc_get_vf_resources_msg(struct i40e_vf *vf, u8 *msg)
vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_RX_POLLING;
}
- if (pf->hw_features & I40E_HW_WB_ON_ITR_CAPABLE) {
+ if (test_bit(I40E_HW_CAP_WB_ON_ITR, pf->hw.caps)) {
if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_WB_ON_ITR)
vfres->vf_cap_flags |=
VIRTCHNL_VF_OFFLOAD_WB_ON_ITR;
@@ -4843,7 +4843,7 @@ int i40e_ndo_set_vf_trust(struct net_device *netdev, int vf_id, bool setting)
goto out;
}
- if (pf->flags & I40E_FLAG_MFP_ENABLED) {
+ if (test_bit(I40E_FLAG_MFP_ENA, pf->flags)) {
dev_err(&pf->pdev->dev, "Trusted VF not supported in MFP mode.\n");
ret = -EINVAL;
goto out;
diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index 351e0d3..cd7dcd0 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -522,11 +522,18 @@ enum ice_misc_thread_tasks {
ICE_MISC_THREAD_NBITS /* must be last */
};
-struct ice_switchdev_info {
+struct ice_eswitch {
struct ice_vsi *control_vsi;
struct ice_vsi *uplink_vsi;
struct ice_esw_br_offloads *br_offloads;
+ struct xarray reprs;
bool is_running;
+ /* struct to allow cp queues management optimization */
+ struct {
+ int to_reach;
+ int value;
+ bool is_reaching;
+ } qs;
};
struct ice_agg_node {
@@ -637,7 +644,7 @@ struct ice_pf {
struct ice_link_default_override_tlv link_dflt_override;
struct ice_lag *lag; /* Link Aggregation information */
- struct ice_switchdev_info switchdev;
+ struct ice_eswitch eswitch;
struct ice_esw_br_port *br_port;
#define ICE_INVALID_AGG_NODE_ID 0
@@ -846,7 +853,7 @@ static inline struct ice_vsi *ice_find_vsi(struct ice_pf *pf, u16 vsi_num)
*/
static inline bool ice_is_switchdev_running(struct ice_pf *pf)
{
- return pf->switchdev.is_running;
+ return pf->eswitch.is_running;
}
#define ICE_FD_STAT_CTR_BLOCK_COUNT 256
diff --git a/drivers/net/ethernet/intel/ice/ice_devlink.c b/drivers/net/ethernet/intel/ice/ice_devlink.c
index 80dc544..f4e24d1 100644
--- a/drivers/net/ethernet/intel/ice/ice_devlink.c
+++ b/drivers/net/ethernet/intel/ice/ice_devlink.c
@@ -810,6 +810,10 @@ static void ice_traverse_tx_tree(struct devlink *devlink, struct ice_sched_node
struct ice_vf *vf;
int i;
+ if (node->rate_node)
+ /* already added, skip to the next */
+ goto traverse_children;
+
if (node->parent == tc_node) {
/* create root node */
rate_node = devl_rate_node_create(devlink, node, node->name, NULL);
@@ -831,6 +835,7 @@ static void ice_traverse_tx_tree(struct devlink *devlink, struct ice_sched_node
if (rate_node && !IS_ERR(rate_node))
node->rate_node = rate_node;
+traverse_children:
for (i = 0; i < node->num_children; i++)
ice_traverse_tx_tree(devlink, node->children[i], tc_node, pf);
}
@@ -861,6 +866,30 @@ int ice_devlink_rate_init_tx_topology(struct devlink *devlink, struct ice_vsi *v
return 0;
}
+static void ice_clear_rate_nodes(struct ice_sched_node *node)
+{
+ node->rate_node = NULL;
+
+ for (int i = 0; i < node->num_children; i++)
+ ice_clear_rate_nodes(node->children[i]);
+}
+
+/**
+ * ice_devlink_rate_clear_tx_topology - clear node->rate_node
+ * @vsi: main vsi struct
+ *
+ * Clear rate_node to cleanup creation of Tx topology.
+ *
+ */
+void ice_devlink_rate_clear_tx_topology(struct ice_vsi *vsi)
+{
+ struct ice_port_info *pi = vsi->port_info;
+
+ mutex_lock(&pi->sched_lock);
+ ice_clear_rate_nodes(pi->root->children[0]);
+ mutex_unlock(&pi->sched_lock);
+}
+
/**
* ice_set_object_tx_share - sets node scheduling parameter
* @pi: devlink struct instance
diff --git a/drivers/net/ethernet/intel/ice/ice_devlink.h b/drivers/net/ethernet/intel/ice/ice_devlink.h
index 6ec9677..d291c0e 100644
--- a/drivers/net/ethernet/intel/ice/ice_devlink.h
+++ b/drivers/net/ethernet/intel/ice/ice_devlink.h
@@ -20,5 +20,6 @@ void ice_devlink_destroy_regions(struct ice_pf *pf);
int ice_devlink_rate_init_tx_topology(struct devlink *devlink, struct ice_vsi *vsi);
void ice_tear_down_devlink_rate_tree(struct ice_pf *pf);
+void ice_devlink_rate_clear_tx_topology(struct ice_vsi *vsi);
#endif /* _ICE_DEVLINK_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_eswitch.c b/drivers/net/ethernet/intel/ice/ice_eswitch.c
index a655d49..3f80e20 100644
--- a/drivers/net/ethernet/intel/ice/ice_eswitch.c
+++ b/drivers/net/ethernet/intel/ice/ice_eswitch.c
@@ -11,17 +11,34 @@
#include "ice_tc_lib.h"
/**
- * ice_eswitch_add_vf_sp_rule - add adv rule with VF's VSI index
+ * ice_eswitch_del_sp_rules - delete adv rules added on PRs
+ * @pf: pointer to the PF struct
+ *
+ * Delete all advanced rules that were used to forward packets with the
+ * device's VSI index to the corresponding eswitch ctrl VSI queue.
+ */
+static void ice_eswitch_del_sp_rules(struct ice_pf *pf)
+{
+ struct ice_repr *repr;
+ unsigned long id;
+
+ xa_for_each(&pf->eswitch.reprs, id, repr) {
+ if (repr->sp_rule.rid)
+ ice_rem_adv_rule_by_id(&pf->hw, &repr->sp_rule);
+ }
+}
+
+/**
+ * ice_eswitch_add_sp_rule - add adv rule with device's VSI index
* @pf: pointer to PF struct
- * @vf: pointer to VF struct
+ * @repr: pointer to the repr struct
*
* This function adds advanced rule that forwards packets with
- * VF's VSI index to the corresponding switchdev ctrl VSI queue.
+ * device's VSI index to the corresponding eswitch ctrl VSI queue.
*/
-static int
-ice_eswitch_add_vf_sp_rule(struct ice_pf *pf, struct ice_vf *vf)
+static int ice_eswitch_add_sp_rule(struct ice_pf *pf, struct ice_repr *repr)
{
- struct ice_vsi *ctrl_vsi = pf->switchdev.control_vsi;
+ struct ice_vsi *ctrl_vsi = pf->eswitch.control_vsi;
struct ice_adv_rule_info rule_info = { 0 };
struct ice_adv_lkup_elem *list;
struct ice_hw *hw = &pf->hw;
@@ -38,39 +55,42 @@ ice_eswitch_add_vf_sp_rule(struct ice_pf *pf, struct ice_vf *vf)
rule_info.sw_act.vsi_handle = ctrl_vsi->idx;
rule_info.sw_act.fltr_act = ICE_FWD_TO_Q;
rule_info.sw_act.fwd_id.q_id = hw->func_caps.common_cap.rxq_first_id +
- ctrl_vsi->rxq_map[vf->vf_id];
+ ctrl_vsi->rxq_map[repr->q_id];
rule_info.flags_info.act |= ICE_SINGLE_ACT_LB_ENABLE;
rule_info.flags_info.act_valid = true;
rule_info.tun_type = ICE_SW_TUN_AND_NON_TUN;
- rule_info.src_vsi = vf->lan_vsi_idx;
+ rule_info.src_vsi = repr->src_vsi->idx;
err = ice_add_adv_rule(hw, list, lkups_cnt, &rule_info,
- &vf->repr->sp_rule);
+ &repr->sp_rule);
if (err)
- dev_err(ice_pf_to_dev(pf), "Unable to add VF slow-path rule in switchdev mode for VF %d",
- vf->vf_id);
+ dev_err(ice_pf_to_dev(pf), "Unable to add slow-path rule for eswitch for PR %d",
+ repr->id);
kfree(list);
return err;
}
-/**
- * ice_eswitch_del_vf_sp_rule - delete adv rule with VF's VSI index
- * @vf: pointer to the VF struct
- *
- * Delete the advanced rule that was used to forward packets with the VF's VSI
- * index to the corresponding switchdev ctrl VSI queue.
- */
-static void ice_eswitch_del_vf_sp_rule(struct ice_vf *vf)
+static int
+ice_eswitch_add_sp_rules(struct ice_pf *pf)
{
- if (!vf->repr)
- return;
+ struct ice_repr *repr;
+ unsigned long id;
+ int err;
- ice_rem_adv_rule_by_id(&vf->pf->hw, &vf->repr->sp_rule);
+ xa_for_each(&pf->eswitch.reprs, id, repr) {
+ err = ice_eswitch_add_sp_rule(pf, repr);
+ if (err) {
+ ice_eswitch_del_sp_rules(pf);
+ return err;
+ }
+ }
+
+ return 0;
}
/**
- * ice_eswitch_setup_env - configure switchdev HW filters
+ * ice_eswitch_setup_env - configure eswitch HW filters
* @pf: pointer to PF struct
*
* This function adds HW filters configuration specific for switchdev
@@ -78,18 +98,18 @@ static void ice_eswitch_del_vf_sp_rule(struct ice_vf *vf)
*/
static int ice_eswitch_setup_env(struct ice_pf *pf)
{
- struct ice_vsi *uplink_vsi = pf->switchdev.uplink_vsi;
- struct net_device *uplink_netdev = uplink_vsi->netdev;
- struct ice_vsi *ctrl_vsi = pf->switchdev.control_vsi;
+ struct ice_vsi *uplink_vsi = pf->eswitch.uplink_vsi;
+ struct ice_vsi *ctrl_vsi = pf->eswitch.control_vsi;
+ struct net_device *netdev = uplink_vsi->netdev;
struct ice_vsi_vlan_ops *vlan_ops;
bool rule_added = false;
ice_remove_vsi_fltr(&pf->hw, uplink_vsi->idx);
- netif_addr_lock_bh(uplink_netdev);
- __dev_uc_unsync(uplink_netdev, NULL);
- __dev_mc_unsync(uplink_netdev, NULL);
- netif_addr_unlock_bh(uplink_netdev);
+ netif_addr_lock_bh(netdev);
+ __dev_uc_unsync(netdev, NULL);
+ __dev_mc_unsync(netdev, NULL);
+ netif_addr_unlock_bh(netdev);
if (ice_vsi_add_vlan_zero(uplink_vsi))
goto err_def_rx;
@@ -132,19 +152,20 @@ static int ice_eswitch_setup_env(struct ice_pf *pf)
}
/**
- * ice_eswitch_remap_rings_to_vectors - reconfigure rings of switchdev ctrl VSI
- * @pf: pointer to PF struct
+ * ice_eswitch_remap_rings_to_vectors - reconfigure rings of eswitch ctrl VSI
+ * @eswitch: pointer to eswitch struct
*
- * In switchdev number of allocated Tx/Rx rings is equal.
+ * In eswitch number of allocated Tx/Rx rings is equal.
*
* This function fills q_vectors structures associated with representor and
* move each ring pairs to port representor netdevs. Each port representor
* will have dedicated 1 Tx/Rx ring pair, so number of rings pair is equal to
* number of VFs.
*/
-static void ice_eswitch_remap_rings_to_vectors(struct ice_pf *pf)
+static void ice_eswitch_remap_rings_to_vectors(struct ice_eswitch *eswitch)
{
- struct ice_vsi *vsi = pf->switchdev.control_vsi;
+ struct ice_vsi *vsi = eswitch->control_vsi;
+ unsigned long repr_id = 0;
int q_id;
ice_for_each_txq(vsi, q_id) {
@@ -152,13 +173,14 @@ static void ice_eswitch_remap_rings_to_vectors(struct ice_pf *pf)
struct ice_tx_ring *tx_ring;
struct ice_rx_ring *rx_ring;
struct ice_repr *repr;
- struct ice_vf *vf;
- vf = ice_get_vf_by_id(pf, q_id);
- if (WARN_ON(!vf))
- continue;
+ repr = xa_find(&eswitch->reprs, &repr_id, U32_MAX,
+ XA_PRESENT);
+ if (!repr)
+ break;
- repr = vf->repr;
+ repr_id += 1;
+ repr->q_id = q_id;
q_vector = repr->q_vector;
tx_ring = vsi->tx_rings[q_id];
rx_ring = vsi->rx_rings[q_id];
@@ -181,136 +203,96 @@ static void ice_eswitch_remap_rings_to_vectors(struct ice_pf *pf)
rx_ring->q_vector = q_vector;
rx_ring->next = NULL;
rx_ring->netdev = repr->netdev;
-
- ice_put_vf(vf);
}
}
/**
- * ice_eswitch_release_reprs - clear PR VSIs configuration
+ * ice_eswitch_release_repr - clear PR VSI configuration
* @pf: poiner to PF struct
- * @ctrl_vsi: pointer to switchdev control VSI
+ * @repr: pointer to PR
*/
static void
-ice_eswitch_release_reprs(struct ice_pf *pf, struct ice_vsi *ctrl_vsi)
+ice_eswitch_release_repr(struct ice_pf *pf, struct ice_repr *repr)
{
- struct ice_vf *vf;
- unsigned int bkt;
+ struct ice_vsi *vsi = repr->src_vsi;
- lockdep_assert_held(&pf->vfs.table_lock);
+ /* Skip representors that aren't configured */
+ if (!repr->dst)
+ return;
- ice_for_each_vf(pf, bkt, vf) {
- struct ice_vsi *vsi = vf->repr->src_vsi;
+ ice_vsi_update_security(vsi, ice_vsi_ctx_set_antispoof);
+ metadata_dst_free(repr->dst);
+ repr->dst = NULL;
+ ice_fltr_add_mac_and_broadcast(vsi, repr->parent_mac,
+ ICE_FWD_TO_VSI);
- /* Skip VFs that aren't configured */
- if (!vf->repr->dst)
- continue;
-
- ice_vsi_update_security(vsi, ice_vsi_ctx_set_antispoof);
- metadata_dst_free(vf->repr->dst);
- vf->repr->dst = NULL;
- ice_eswitch_del_vf_sp_rule(vf);
- ice_fltr_add_mac_and_broadcast(vsi, vf->hw_lan_addr,
- ICE_FWD_TO_VSI);
-
- netif_napi_del(&vf->repr->q_vector->napi);
- }
+ netif_napi_del(&repr->q_vector->napi);
}
/**
- * ice_eswitch_setup_reprs - configure port reprs to run in switchdev mode
+ * ice_eswitch_setup_repr - configure PR to run in switchdev mode
* @pf: pointer to PF struct
+ * @repr: pointer to PR struct
*/
-static int ice_eswitch_setup_reprs(struct ice_pf *pf)
+static int ice_eswitch_setup_repr(struct ice_pf *pf, struct ice_repr *repr)
{
- struct ice_vsi *ctrl_vsi = pf->switchdev.control_vsi;
- int max_vsi_num = 0;
- struct ice_vf *vf;
- unsigned int bkt;
+ struct ice_vsi *ctrl_vsi = pf->eswitch.control_vsi;
+ struct ice_vsi *vsi = repr->src_vsi;
+ struct metadata_dst *dst;
- lockdep_assert_held(&pf->vfs.table_lock);
+ ice_remove_vsi_fltr(&pf->hw, vsi->idx);
+ repr->dst = metadata_dst_alloc(0, METADATA_HW_PORT_MUX,
+ GFP_KERNEL);
+ if (!repr->dst)
+ goto err_add_mac_fltr;
- ice_for_each_vf(pf, bkt, vf) {
- struct ice_vsi *vsi = vf->repr->src_vsi;
+ if (ice_vsi_update_security(vsi, ice_vsi_ctx_clear_antispoof))
+ goto err_dst_free;
- ice_remove_vsi_fltr(&pf->hw, vsi->idx);
- vf->repr->dst = metadata_dst_alloc(0, METADATA_HW_PORT_MUX,
- GFP_KERNEL);
- if (!vf->repr->dst) {
- ice_fltr_add_mac_and_broadcast(vsi, vf->hw_lan_addr,
- ICE_FWD_TO_VSI);
- goto err;
- }
+ if (ice_vsi_add_vlan_zero(vsi))
+ goto err_update_security;
- if (ice_eswitch_add_vf_sp_rule(pf, vf)) {
- ice_fltr_add_mac_and_broadcast(vsi, vf->hw_lan_addr,
- ICE_FWD_TO_VSI);
- goto err;
- }
+ netif_napi_add(repr->netdev, &repr->q_vector->napi,
+ ice_napi_poll);
- if (ice_vsi_update_security(vsi, ice_vsi_ctx_clear_antispoof)) {
- ice_fltr_add_mac_and_broadcast(vsi, vf->hw_lan_addr,
- ICE_FWD_TO_VSI);
- ice_eswitch_del_vf_sp_rule(vf);
- metadata_dst_free(vf->repr->dst);
- vf->repr->dst = NULL;
- goto err;
- }
+ netif_keep_dst(repr->netdev);
- if (ice_vsi_add_vlan_zero(vsi)) {
- ice_fltr_add_mac_and_broadcast(vsi, vf->hw_lan_addr,
- ICE_FWD_TO_VSI);
- ice_eswitch_del_vf_sp_rule(vf);
- metadata_dst_free(vf->repr->dst);
- vf->repr->dst = NULL;
- ice_vsi_update_security(vsi, ice_vsi_ctx_set_antispoof);
- goto err;
- }
-
- if (max_vsi_num < vsi->vsi_num)
- max_vsi_num = vsi->vsi_num;
-
- netif_napi_add(vf->repr->netdev, &vf->repr->q_vector->napi,
- ice_napi_poll);
-
- netif_keep_dst(vf->repr->netdev);
- }
-
- ice_for_each_vf(pf, bkt, vf) {
- struct ice_repr *repr = vf->repr;
- struct ice_vsi *vsi = repr->src_vsi;
- struct metadata_dst *dst;
-
- dst = repr->dst;
- dst->u.port_info.port_id = vsi->vsi_num;
- dst->u.port_info.lower_dev = repr->netdev;
- ice_repr_set_traffic_vsi(repr, ctrl_vsi);
- }
+ dst = repr->dst;
+ dst->u.port_info.port_id = vsi->vsi_num;
+ dst->u.port_info.lower_dev = repr->netdev;
+ ice_repr_set_traffic_vsi(repr, ctrl_vsi);
return 0;
-err:
- ice_eswitch_release_reprs(pf, ctrl_vsi);
+err_update_security:
+ ice_vsi_update_security(vsi, ice_vsi_ctx_set_antispoof);
+err_dst_free:
+ metadata_dst_free(repr->dst);
+ repr->dst = NULL;
+err_add_mac_fltr:
+ ice_fltr_add_mac_and_broadcast(vsi, repr->parent_mac, ICE_FWD_TO_VSI);
return -ENODEV;
}
/**
- * ice_eswitch_update_repr - reconfigure VF port representor
- * @vsi: VF VSI for which port representor is configured
+ * ice_eswitch_update_repr - reconfigure port representor
+ * @repr_id: representor ID
+ * @vsi: VSI for which port representor is configured
*/
-void ice_eswitch_update_repr(struct ice_vsi *vsi)
+void ice_eswitch_update_repr(unsigned long repr_id, struct ice_vsi *vsi)
{
struct ice_pf *pf = vsi->back;
struct ice_repr *repr;
- struct ice_vf *vf;
int ret;
if (!ice_is_switchdev_running(pf))
return;
- vf = vsi->vf;
- repr = vf->repr;
+ repr = xa_load(&pf->eswitch.reprs, repr_id);
+ if (!repr)
+ return;
+
repr->src_vsi = vsi;
repr->dst->u.port_info.port_id = vsi->vsi_num;
@@ -319,9 +301,10 @@ void ice_eswitch_update_repr(struct ice_vsi *vsi)
ret = ice_vsi_update_security(vsi, ice_vsi_ctx_clear_antispoof);
if (ret) {
- ice_fltr_add_mac_and_broadcast(vsi, vf->hw_lan_addr, ICE_FWD_TO_VSI);
- dev_err(ice_pf_to_dev(pf), "Failed to update VF %d port representor",
- vsi->vf->vf_id);
+ ice_fltr_add_mac_and_broadcast(vsi, repr->parent_mac,
+ ICE_FWD_TO_VSI);
+ dev_err(ice_pf_to_dev(pf), "Failed to update VSI of port representor %d",
+ repr->id);
}
}
@@ -353,13 +336,13 @@ ice_eswitch_port_start_xmit(struct sk_buff *skb, struct net_device *netdev)
skb_dst_drop(skb);
dst_hold((struct dst_entry *)repr->dst);
skb_dst_set(skb, (struct dst_entry *)repr->dst);
- skb->queue_mapping = repr->vf->vf_id;
+ skb->queue_mapping = repr->q_id;
return ice_start_xmit(skb, netdev);
}
/**
- * ice_eswitch_set_target_vsi - set switchdev context in Tx context descriptor
+ * ice_eswitch_set_target_vsi - set eswitch context in Tx context descriptor
* @skb: pointer to send buffer
* @off: pointer to offload struct
*/
@@ -382,7 +365,7 @@ ice_eswitch_set_target_vsi(struct sk_buff *skb,
}
/**
- * ice_eswitch_release_env - clear switchdev HW filters
+ * ice_eswitch_release_env - clear eswitch HW filters
* @pf: pointer to PF struct
*
* This function removes HW filters configuration specific for switchdev
@@ -390,8 +373,8 @@ ice_eswitch_set_target_vsi(struct sk_buff *skb,
*/
static void ice_eswitch_release_env(struct ice_pf *pf)
{
- struct ice_vsi *uplink_vsi = pf->switchdev.uplink_vsi;
- struct ice_vsi *ctrl_vsi = pf->switchdev.control_vsi;
+ struct ice_vsi *uplink_vsi = pf->eswitch.uplink_vsi;
+ struct ice_vsi *ctrl_vsi = pf->eswitch.control_vsi;
struct ice_vsi_vlan_ops *vlan_ops;
vlan_ops = ice_get_compat_vsi_vlan_ops(uplink_vsi);
@@ -407,7 +390,7 @@ static void ice_eswitch_release_env(struct ice_pf *pf)
}
/**
- * ice_eswitch_vsi_setup - configure switchdev control VSI
+ * ice_eswitch_vsi_setup - configure eswitch control VSI
* @pf: pointer to PF structure
* @pi: pointer to port_info structure
*/
@@ -424,48 +407,29 @@ ice_eswitch_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi)
}
/**
- * ice_eswitch_napi_del - remove NAPI handle for all port representors
- * @pf: pointer to PF structure
- */
-static void ice_eswitch_napi_del(struct ice_pf *pf)
-{
- struct ice_vf *vf;
- unsigned int bkt;
-
- lockdep_assert_held(&pf->vfs.table_lock);
-
- ice_for_each_vf(pf, bkt, vf)
- netif_napi_del(&vf->repr->q_vector->napi);
-}
-
-/**
* ice_eswitch_napi_enable - enable NAPI for all port representors
- * @pf: pointer to PF structure
+ * @reprs: xarray of reprs
*/
-static void ice_eswitch_napi_enable(struct ice_pf *pf)
+static void ice_eswitch_napi_enable(struct xarray *reprs)
{
- struct ice_vf *vf;
- unsigned int bkt;
+ struct ice_repr *repr;
+ unsigned long id;
- lockdep_assert_held(&pf->vfs.table_lock);
-
- ice_for_each_vf(pf, bkt, vf)
- napi_enable(&vf->repr->q_vector->napi);
+ xa_for_each(reprs, id, repr)
+ napi_enable(&repr->q_vector->napi);
}
/**
* ice_eswitch_napi_disable - disable NAPI for all port representors
- * @pf: pointer to PF structure
+ * @reprs: xarray of reprs
*/
-static void ice_eswitch_napi_disable(struct ice_pf *pf)
+static void ice_eswitch_napi_disable(struct xarray *reprs)
{
- struct ice_vf *vf;
- unsigned int bkt;
+ struct ice_repr *repr;
+ unsigned long id;
- lockdep_assert_held(&pf->vfs.table_lock);
-
- ice_for_each_vf(pf, bkt, vf)
- napi_disable(&vf->repr->q_vector->napi);
+ xa_for_each(reprs, id, repr)
+ napi_disable(&repr->q_vector->napi);
}
/**
@@ -486,39 +450,26 @@ static int ice_eswitch_enable_switchdev(struct ice_pf *pf)
return -EINVAL;
}
- pf->switchdev.control_vsi = ice_eswitch_vsi_setup(pf, pf->hw.port_info);
- if (!pf->switchdev.control_vsi)
+ pf->eswitch.control_vsi = ice_eswitch_vsi_setup(pf, pf->hw.port_info);
+ if (!pf->eswitch.control_vsi)
return -ENODEV;
- ctrl_vsi = pf->switchdev.control_vsi;
- pf->switchdev.uplink_vsi = uplink_vsi;
+ ctrl_vsi = pf->eswitch.control_vsi;
+ /* cp VSI is createad with 1 queue as default */
+ pf->eswitch.qs.value = 1;
+ pf->eswitch.uplink_vsi = uplink_vsi;
if (ice_eswitch_setup_env(pf))
goto err_vsi;
- if (ice_repr_add_for_all_vfs(pf))
- goto err_repr_add;
-
- if (ice_eswitch_setup_reprs(pf))
- goto err_setup_reprs;
-
- ice_eswitch_remap_rings_to_vectors(pf);
-
- if (ice_vsi_open(ctrl_vsi))
- goto err_setup_reprs;
-
if (ice_eswitch_br_offloads_init(pf))
goto err_br_offloads;
- ice_eswitch_napi_enable(pf);
+ pf->eswitch.is_running = true;
return 0;
err_br_offloads:
- ice_vsi_close(ctrl_vsi);
-err_setup_reprs:
- ice_repr_rem_from_all_vfs(pf);
-err_repr_add:
ice_eswitch_release_env(pf);
err_vsi:
ice_vsi_release(ctrl_vsi);
@@ -526,19 +477,19 @@ static int ice_eswitch_enable_switchdev(struct ice_pf *pf)
}
/**
- * ice_eswitch_disable_switchdev - disable switchdev resources
+ * ice_eswitch_disable_switchdev - disable eswitch resources
* @pf: pointer to PF structure
*/
static void ice_eswitch_disable_switchdev(struct ice_pf *pf)
{
- struct ice_vsi *ctrl_vsi = pf->switchdev.control_vsi;
+ struct ice_vsi *ctrl_vsi = pf->eswitch.control_vsi;
- ice_eswitch_napi_disable(pf);
ice_eswitch_br_offloads_deinit(pf);
ice_eswitch_release_env(pf);
- ice_eswitch_release_reprs(pf, ctrl_vsi);
ice_vsi_release(ctrl_vsi);
- ice_repr_rem_from_all_vfs(pf);
+
+ pf->eswitch.is_running = false;
+ pf->eswitch.qs.is_reaching = false;
}
/**
@@ -566,6 +517,7 @@ ice_eswitch_mode_set(struct devlink *devlink, u16 mode,
case DEVLINK_ESWITCH_MODE_LEGACY:
dev_info(ice_pf_to_dev(pf), "PF %d changed eswitch mode to legacy",
pf->hw.pf_id);
+ xa_destroy(&pf->eswitch.reprs);
NL_SET_ERR_MSG_MOD(extack, "Changed eswitch mode to legacy");
break;
case DEVLINK_ESWITCH_MODE_SWITCHDEV:
@@ -578,6 +530,7 @@ ice_eswitch_mode_set(struct devlink *devlink, u16 mode,
dev_info(ice_pf_to_dev(pf), "PF %d changed eswitch mode to switchdev",
pf->hw.pf_id);
+ xa_init_flags(&pf->eswitch.reprs, XA_FLAGS_ALLOC);
NL_SET_ERR_MSG_MOD(extack, "Changed eswitch mode to switchdev");
break;
}
@@ -616,55 +569,19 @@ bool ice_is_eswitch_mode_switchdev(struct ice_pf *pf)
}
/**
- * ice_eswitch_release - cleanup eswitch
- * @pf: pointer to PF structure
- */
-void ice_eswitch_release(struct ice_pf *pf)
-{
- if (pf->eswitch_mode == DEVLINK_ESWITCH_MODE_LEGACY)
- return;
-
- ice_eswitch_disable_switchdev(pf);
- pf->switchdev.is_running = false;
-}
-
-/**
- * ice_eswitch_configure - configure eswitch
- * @pf: pointer to PF structure
- */
-int ice_eswitch_configure(struct ice_pf *pf)
-{
- int status;
-
- if (pf->eswitch_mode == DEVLINK_ESWITCH_MODE_LEGACY || pf->switchdev.is_running)
- return 0;
-
- status = ice_eswitch_enable_switchdev(pf);
- if (status)
- return status;
-
- pf->switchdev.is_running = true;
- return 0;
-}
-
-/**
* ice_eswitch_start_all_tx_queues - start Tx queues of all port representors
* @pf: pointer to PF structure
*/
static void ice_eswitch_start_all_tx_queues(struct ice_pf *pf)
{
- struct ice_vf *vf;
- unsigned int bkt;
-
- lockdep_assert_held(&pf->vfs.table_lock);
+ struct ice_repr *repr;
+ unsigned long id;
if (test_bit(ICE_DOWN, pf->state))
return;
- ice_for_each_vf(pf, bkt, vf) {
- if (vf->repr)
- ice_repr_start_tx_queues(vf->repr);
- }
+ xa_for_each(&pf->eswitch.reprs, id, repr)
+ ice_repr_start_tx_queues(repr);
}
/**
@@ -673,17 +590,145 @@ static void ice_eswitch_start_all_tx_queues(struct ice_pf *pf)
*/
void ice_eswitch_stop_all_tx_queues(struct ice_pf *pf)
{
- struct ice_vf *vf;
- unsigned int bkt;
-
- lockdep_assert_held(&pf->vfs.table_lock);
+ struct ice_repr *repr;
+ unsigned long id;
if (test_bit(ICE_DOWN, pf->state))
return;
- ice_for_each_vf(pf, bkt, vf) {
- if (vf->repr)
- ice_repr_stop_tx_queues(vf->repr);
+ xa_for_each(&pf->eswitch.reprs, id, repr)
+ ice_repr_stop_tx_queues(repr);
+}
+
+static void ice_eswitch_stop_reprs(struct ice_pf *pf)
+{
+ ice_eswitch_del_sp_rules(pf);
+ ice_eswitch_stop_all_tx_queues(pf);
+ ice_eswitch_napi_disable(&pf->eswitch.reprs);
+}
+
+static void ice_eswitch_start_reprs(struct ice_pf *pf)
+{
+ ice_eswitch_napi_enable(&pf->eswitch.reprs);
+ ice_eswitch_start_all_tx_queues(pf);
+ ice_eswitch_add_sp_rules(pf);
+}
+
+static void
+ice_eswitch_cp_change_queues(struct ice_eswitch *eswitch, int change)
+{
+ struct ice_vsi *cp = eswitch->control_vsi;
+ int queues = 0;
+
+ if (eswitch->qs.is_reaching) {
+ if (eswitch->qs.to_reach >= eswitch->qs.value + change) {
+ queues = eswitch->qs.to_reach;
+ eswitch->qs.is_reaching = false;
+ } else {
+ queues = 0;
+ }
+ } else if ((change > 0 && cp->alloc_txq <= eswitch->qs.value) ||
+ change < 0) {
+ queues = cp->alloc_txq + change;
+ }
+
+ if (queues) {
+ cp->req_txq = queues;
+ cp->req_rxq = queues;
+ ice_vsi_close(cp);
+ ice_vsi_rebuild(cp, ICE_VSI_FLAG_NO_INIT);
+ ice_vsi_open(cp);
+ } else if (!change) {
+ /* change == 0 means that VSI wasn't open, open it here */
+ ice_vsi_open(cp);
+ }
+
+ eswitch->qs.value += change;
+ ice_eswitch_remap_rings_to_vectors(eswitch);
+}
+
+int
+ice_eswitch_attach(struct ice_pf *pf, struct ice_vf *vf)
+{
+ struct ice_repr *repr;
+ int change = 1;
+ int err;
+
+ if (pf->eswitch_mode == DEVLINK_ESWITCH_MODE_LEGACY)
+ return 0;
+
+ if (xa_empty(&pf->eswitch.reprs)) {
+ err = ice_eswitch_enable_switchdev(pf);
+ if (err)
+ return err;
+ /* Control plane VSI is created with 1 queue as default */
+ pf->eswitch.qs.to_reach -= 1;
+ change = 0;
+ }
+
+ ice_eswitch_stop_reprs(pf);
+
+ repr = ice_repr_add_vf(vf);
+ if (IS_ERR(repr))
+ goto err_create_repr;
+
+ err = ice_eswitch_setup_repr(pf, repr);
+ if (err)
+ goto err_setup_repr;
+
+ err = xa_alloc(&pf->eswitch.reprs, &repr->id, repr,
+ XA_LIMIT(1, INT_MAX), GFP_KERNEL);
+ if (err)
+ goto err_xa_alloc;
+
+ vf->repr_id = repr->id;
+
+ ice_eswitch_cp_change_queues(&pf->eswitch, change);
+ ice_eswitch_start_reprs(pf);
+
+ return 0;
+
+err_xa_alloc:
+ ice_eswitch_release_repr(pf, repr);
+err_setup_repr:
+ ice_repr_rem_vf(repr);
+err_create_repr:
+ if (xa_empty(&pf->eswitch.reprs))
+ ice_eswitch_disable_switchdev(pf);
+ ice_eswitch_start_reprs(pf);
+
+ return err;
+}
+
+void ice_eswitch_detach(struct ice_pf *pf, struct ice_vf *vf)
+{
+ struct ice_repr *repr = xa_load(&pf->eswitch.reprs, vf->repr_id);
+ struct devlink *devlink = priv_to_devlink(pf);
+
+ if (!repr)
+ return;
+
+ ice_eswitch_stop_reprs(pf);
+ xa_erase(&pf->eswitch.reprs, repr->id);
+
+ if (xa_empty(&pf->eswitch.reprs))
+ ice_eswitch_disable_switchdev(pf);
+ else
+ ice_eswitch_cp_change_queues(&pf->eswitch, -1);
+
+ ice_eswitch_release_repr(pf, repr);
+ ice_repr_rem_vf(repr);
+
+ if (xa_empty(&pf->eswitch.reprs)) {
+ /* since all port representors are destroyed, there is
+ * no point in keeping the nodes
+ */
+ ice_devlink_rate_clear_tx_topology(ice_get_main_vsi(pf));
+ devl_lock(devlink);
+ devl_rate_nodes_destroy(devlink);
+ devl_unlock(devlink);
+ } else {
+ ice_eswitch_start_reprs(pf);
}
}
@@ -693,30 +738,35 @@ void ice_eswitch_stop_all_tx_queues(struct ice_pf *pf)
*/
int ice_eswitch_rebuild(struct ice_pf *pf)
{
- struct ice_vsi *ctrl_vsi = pf->switchdev.control_vsi;
- int status;
+ struct ice_repr *repr;
+ unsigned long id;
+ int err;
- ice_eswitch_napi_disable(pf);
- ice_eswitch_napi_del(pf);
+ if (!ice_is_switchdev_running(pf))
+ return 0;
- status = ice_eswitch_setup_env(pf);
- if (status)
- return status;
+ err = ice_vsi_rebuild(pf->eswitch.control_vsi, ICE_VSI_FLAG_INIT);
+ if (err)
+ return err;
- status = ice_eswitch_setup_reprs(pf);
- if (status)
- return status;
-
- ice_eswitch_remap_rings_to_vectors(pf);
-
- ice_replay_tc_fltrs(pf);
-
- status = ice_vsi_open(ctrl_vsi);
- if (status)
- return status;
-
- ice_eswitch_napi_enable(pf);
- ice_eswitch_start_all_tx_queues(pf);
+ xa_for_each(&pf->eswitch.reprs, id, repr)
+ ice_eswitch_detach(pf, repr->vf);
return 0;
}
+
+/**
+ * ice_eswitch_reserve_cp_queues - reserve control plane VSI queues
+ * @pf: pointer to PF structure
+ * @change: how many more (or less) queues is needed
+ *
+ * Remember to call ice_eswitch_attach/detach() the "change" times.
+ */
+void ice_eswitch_reserve_cp_queues(struct ice_pf *pf, int change)
+{
+ if (pf->eswitch.qs.value + change < 0)
+ return;
+
+ pf->eswitch.qs.to_reach = pf->eswitch.qs.value + change;
+ pf->eswitch.qs.is_reaching = true;
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_eswitch.h b/drivers/net/ethernet/intel/ice/ice_eswitch.h
index b18bf83..1a288a0 100644
--- a/drivers/net/ethernet/intel/ice/ice_eswitch.h
+++ b/drivers/net/ethernet/intel/ice/ice_eswitch.h
@@ -7,8 +7,9 @@
#include <net/devlink.h>
#ifdef CONFIG_ICE_SWITCHDEV
-void ice_eswitch_release(struct ice_pf *pf);
-int ice_eswitch_configure(struct ice_pf *pf);
+void ice_eswitch_detach(struct ice_pf *pf, struct ice_vf *vf);
+int
+ice_eswitch_attach(struct ice_pf *pf, struct ice_vf *vf);
int ice_eswitch_rebuild(struct ice_pf *pf);
int ice_eswitch_mode_get(struct devlink *devlink, u16 *mode);
@@ -17,7 +18,7 @@ ice_eswitch_mode_set(struct devlink *devlink, u16 mode,
struct netlink_ext_ack *extack);
bool ice_is_eswitch_mode_switchdev(struct ice_pf *pf);
-void ice_eswitch_update_repr(struct ice_vsi *vsi);
+void ice_eswitch_update_repr(unsigned long repr_id, struct ice_vsi *vsi);
void ice_eswitch_stop_all_tx_queues(struct ice_pf *pf);
@@ -25,8 +26,15 @@ void ice_eswitch_set_target_vsi(struct sk_buff *skb,
struct ice_tx_offload_params *off);
netdev_tx_t
ice_eswitch_port_start_xmit(struct sk_buff *skb, struct net_device *netdev);
+void ice_eswitch_reserve_cp_queues(struct ice_pf *pf, int change);
#else /* CONFIG_ICE_SWITCHDEV */
-static inline void ice_eswitch_release(struct ice_pf *pf) { }
+static inline void ice_eswitch_detach(struct ice_pf *pf, struct ice_vf *vf) { }
+
+static inline int
+ice_eswitch_attach(struct ice_pf *pf, struct ice_vf *vf)
+{
+ return -EOPNOTSUPP;
+}
static inline void ice_eswitch_stop_all_tx_queues(struct ice_pf *pf) { }
@@ -34,7 +42,8 @@ static inline void
ice_eswitch_set_target_vsi(struct sk_buff *skb,
struct ice_tx_offload_params *off) { }
-static inline void ice_eswitch_update_repr(struct ice_vsi *vsi) { }
+static inline void
+ice_eswitch_update_repr(unsigned long repr_id, struct ice_vsi *vsi) { }
static inline int ice_eswitch_configure(struct ice_pf *pf)
{
@@ -68,5 +77,8 @@ ice_eswitch_port_start_xmit(struct sk_buff *skb, struct net_device *netdev)
{
return NETDEV_TX_BUSY;
}
+
+static inline void
+ice_eswitch_reserve_cp_queues(struct ice_pf *pf, int change) { }
#endif /* CONFIG_ICE_SWITCHDEV */
#endif /* _ICE_ESWITCH_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_eswitch_br.c b/drivers/net/ethernet/intel/ice/ice_eswitch_br.c
index 6ae0269..ac5beec 100644
--- a/drivers/net/ethernet/intel/ice/ice_eswitch_br.c
+++ b/drivers/net/ethernet/intel/ice/ice_eswitch_br.c
@@ -893,10 +893,14 @@ ice_eswitch_br_port_deinit(struct ice_esw_br *bridge,
ice_eswitch_br_fdb_entry_delete(bridge, fdb_entry);
}
- if (br_port->type == ICE_ESWITCH_BR_UPLINK_PORT && vsi->back)
+ if (br_port->type == ICE_ESWITCH_BR_UPLINK_PORT && vsi->back) {
vsi->back->br_port = NULL;
- else if (vsi->vf && vsi->vf->repr)
- vsi->vf->repr->br_port = NULL;
+ } else {
+ struct ice_repr *repr = ice_repr_get_by_vsi(vsi);
+
+ if (repr)
+ repr->br_port = NULL;
+ }
xa_erase(&bridge->ports, br_port->vsi_idx);
ice_eswitch_br_port_vlans_flush(br_port);
@@ -947,7 +951,7 @@ ice_eswitch_br_vf_repr_port_init(struct ice_esw_br *bridge,
static int
ice_eswitch_br_uplink_port_init(struct ice_esw_br *bridge, struct ice_pf *pf)
{
- struct ice_vsi *vsi = pf->switchdev.uplink_vsi;
+ struct ice_vsi *vsi = pf->eswitch.uplink_vsi;
struct ice_esw_br_port *br_port;
int err;
@@ -1185,7 +1189,7 @@ ice_eswitch_br_port_event(struct notifier_block *nb,
static void
ice_eswitch_br_offloads_dealloc(struct ice_pf *pf)
{
- struct ice_esw_br_offloads *br_offloads = pf->switchdev.br_offloads;
+ struct ice_esw_br_offloads *br_offloads = pf->eswitch.br_offloads;
ASSERT_RTNL();
@@ -1194,7 +1198,7 @@ ice_eswitch_br_offloads_dealloc(struct ice_pf *pf)
ice_eswitch_br_deinit(br_offloads, br_offloads->bridge);
- pf->switchdev.br_offloads = NULL;
+ pf->eswitch.br_offloads = NULL;
kfree(br_offloads);
}
@@ -1205,14 +1209,14 @@ ice_eswitch_br_offloads_alloc(struct ice_pf *pf)
ASSERT_RTNL();
- if (pf->switchdev.br_offloads)
+ if (pf->eswitch.br_offloads)
return ERR_PTR(-EEXIST);
br_offloads = kzalloc(sizeof(*br_offloads), GFP_KERNEL);
if (!br_offloads)
return ERR_PTR(-ENOMEM);
- pf->switchdev.br_offloads = br_offloads;
+ pf->eswitch.br_offloads = br_offloads;
br_offloads->pf = pf;
return br_offloads;
@@ -1223,7 +1227,7 @@ ice_eswitch_br_offloads_deinit(struct ice_pf *pf)
{
struct ice_esw_br_offloads *br_offloads;
- br_offloads = pf->switchdev.br_offloads;
+ br_offloads = pf->eswitch.br_offloads;
if (!br_offloads)
return;
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index 4b1e563..d826b5a 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -212,11 +212,18 @@ static void ice_vsi_set_num_qs(struct ice_vsi *vsi)
vsi->alloc_txq));
break;
case ICE_VSI_SWITCHDEV_CTRL:
- /* The number of queues for ctrl VSI is equal to number of VFs.
+ /* The number of queues for ctrl VSI is equal to number of PRs
* Each ring is associated to the corresponding VF_PR netdev.
+ * Tx and Rx rings are always equal
*/
- vsi->alloc_txq = ice_get_num_vfs(pf);
- vsi->alloc_rxq = vsi->alloc_txq;
+ if (vsi->req_txq && vsi->req_rxq) {
+ vsi->alloc_txq = vsi->req_txq;
+ vsi->alloc_rxq = vsi->req_rxq;
+ } else {
+ vsi->alloc_txq = 1;
+ vsi->alloc_rxq = 1;
+ }
+
vsi->num_q_vectors = 1;
break;
case ICE_VSI_VF:
@@ -519,16 +526,14 @@ static irqreturn_t ice_eswitch_msix_clean_rings(int __always_unused irq, void *d
{
struct ice_q_vector *q_vector = (struct ice_q_vector *)data;
struct ice_pf *pf = q_vector->vsi->back;
- struct ice_vf *vf;
- unsigned int bkt;
+ struct ice_repr *repr;
+ unsigned long id;
if (!q_vector->tx.tx_ring && !q_vector->rx.rx_ring)
return IRQ_HANDLED;
- rcu_read_lock();
- ice_for_each_vf_rcu(pf, bkt, vf)
- napi_schedule(&vf->repr->q_vector->napi);
- rcu_read_unlock();
+ xa_for_each(&pf->eswitch.reprs, id, repr)
+ napi_schedule(&repr->q_vector->napi);
return IRQ_HANDLED;
}
@@ -3071,27 +3076,26 @@ ice_vsi_rebuild_set_coalesce(struct ice_vsi *vsi,
}
/**
- * ice_vsi_realloc_stat_arrays - Frees unused stat structures
+ * ice_vsi_realloc_stat_arrays - Frees unused stat structures or alloc new ones
* @vsi: VSI pointer
- * @prev_txq: Number of Tx rings before ring reallocation
- * @prev_rxq: Number of Rx rings before ring reallocation
*/
-static void
-ice_vsi_realloc_stat_arrays(struct ice_vsi *vsi, int prev_txq, int prev_rxq)
+static int
+ice_vsi_realloc_stat_arrays(struct ice_vsi *vsi)
{
+ u16 req_txq = vsi->req_txq ? vsi->req_txq : vsi->alloc_txq;
+ u16 req_rxq = vsi->req_rxq ? vsi->req_rxq : vsi->alloc_rxq;
+ struct ice_ring_stats **tx_ring_stats;
+ struct ice_ring_stats **rx_ring_stats;
struct ice_vsi_stats *vsi_stat;
struct ice_pf *pf = vsi->back;
+ u16 prev_txq = vsi->alloc_txq;
+ u16 prev_rxq = vsi->alloc_rxq;
int i;
- if (!prev_txq || !prev_rxq)
- return;
- if (vsi->type == ICE_VSI_CHNL)
- return;
-
vsi_stat = pf->vsi_stats[vsi->idx];
- if (vsi->num_txq < prev_txq) {
- for (i = vsi->num_txq; i < prev_txq; i++) {
+ if (req_txq < prev_txq) {
+ for (i = req_txq; i < prev_txq; i++) {
if (vsi_stat->tx_ring_stats[i]) {
kfree_rcu(vsi_stat->tx_ring_stats[i], rcu);
WRITE_ONCE(vsi_stat->tx_ring_stats[i], NULL);
@@ -3099,14 +3103,36 @@ ice_vsi_realloc_stat_arrays(struct ice_vsi *vsi, int prev_txq, int prev_rxq)
}
}
- if (vsi->num_rxq < prev_rxq) {
- for (i = vsi->num_rxq; i < prev_rxq; i++) {
+ tx_ring_stats = vsi_stat->rx_ring_stats;
+ vsi_stat->tx_ring_stats =
+ krealloc_array(vsi_stat->tx_ring_stats, req_txq,
+ sizeof(*vsi_stat->tx_ring_stats),
+ GFP_KERNEL | __GFP_ZERO);
+ if (!vsi_stat->tx_ring_stats) {
+ vsi_stat->tx_ring_stats = tx_ring_stats;
+ return -ENOMEM;
+ }
+
+ if (req_rxq < prev_rxq) {
+ for (i = req_rxq; i < prev_rxq; i++) {
if (vsi_stat->rx_ring_stats[i]) {
kfree_rcu(vsi_stat->rx_ring_stats[i], rcu);
WRITE_ONCE(vsi_stat->rx_ring_stats[i], NULL);
}
}
}
+
+ rx_ring_stats = vsi_stat->rx_ring_stats;
+ vsi_stat->rx_ring_stats =
+ krealloc_array(vsi_stat->rx_ring_stats, req_rxq,
+ sizeof(*vsi_stat->rx_ring_stats),
+ GFP_KERNEL | __GFP_ZERO);
+ if (!vsi_stat->rx_ring_stats) {
+ vsi_stat->rx_ring_stats = rx_ring_stats;
+ return -ENOMEM;
+ }
+
+ return 0;
}
/**
@@ -3123,9 +3149,9 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, u32 vsi_flags)
{
struct ice_vsi_cfg_params params = {};
struct ice_coalesce_stored *coalesce;
- int ret, prev_txq, prev_rxq;
int prev_num_q_vectors = 0;
struct ice_pf *pf;
+ int ret;
if (!vsi)
return -EINVAL;
@@ -3144,8 +3170,9 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, u32 vsi_flags)
prev_num_q_vectors = ice_vsi_rebuild_get_coalesce(vsi, coalesce);
- prev_txq = vsi->num_txq;
- prev_rxq = vsi->num_rxq;
+ ret = ice_vsi_realloc_stat_arrays(vsi);
+ if (ret)
+ goto err_vsi_cfg;
ice_vsi_decfg(vsi);
ret = ice_vsi_cfg_def(vsi, ¶ms);
@@ -3163,8 +3190,6 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, u32 vsi_flags)
return ice_schedule_reset(pf, ICE_RESET_PFR);
}
- ice_vsi_realloc_stat_arrays(vsi, prev_txq, prev_rxq);
-
ice_vsi_rebuild_set_coalesce(vsi, coalesce, prev_num_q_vectors);
kfree(coalesce);
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index fb9c93f..43ba3e5 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -4702,6 +4702,8 @@ static void ice_deinit_features(struct ice_pf *pf)
ice_ptp_release(pf);
if (test_bit(ICE_FLAG_DPLL, pf->flags))
ice_dpll_deinit(pf);
+ if (pf->eswitch_mode == DEVLINK_ESWITCH_MODE_SWITCHDEV)
+ xa_destroy(&pf->eswitch.reprs);
}
static void ice_init_wakeup(struct ice_pf *pf)
@@ -7401,9 +7403,9 @@ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type)
goto err_vsi_rebuild;
}
- err = ice_vsi_rebuild_by_type(pf, ICE_VSI_SWITCHDEV_CTRL);
+ err = ice_eswitch_rebuild(pf);
if (err) {
- dev_err(dev, "Switchdev CTRL VSI rebuild failed: %d\n", err);
+ dev_err(dev, "Switchdev rebuild failed: %d\n", err);
goto err_vsi_rebuild;
}
diff --git a/drivers/net/ethernet/intel/ice/ice_repr.c b/drivers/net/ethernet/intel/ice/ice_repr.c
index c686ac0..5f30fb1 100644
--- a/drivers/net/ethernet/intel/ice/ice_repr.c
+++ b/drivers/net/ethernet/intel/ice/ice_repr.c
@@ -14,7 +14,7 @@
*/
static int ice_repr_get_sw_port_id(struct ice_repr *repr)
{
- return repr->vf->pf->hw.port_info->lport;
+ return repr->src_vsi->back->hw.port_info->lport;
}
/**
@@ -35,7 +35,7 @@ ice_repr_get_phys_port_name(struct net_device *netdev, char *buf, size_t len)
return -EOPNOTSUPP;
res = snprintf(buf, len, "pf%dvfr%d", ice_repr_get_sw_port_id(repr),
- repr->vf->vf_id);
+ repr->id);
if (res <= 0)
return -EOPNOTSUPP;
return 0;
@@ -278,25 +278,67 @@ ice_repr_reg_netdev(struct net_device *netdev)
return register_netdev(netdev);
}
+static void ice_repr_remove_node(struct devlink_port *devlink_port)
+{
+ devl_lock(devlink_port->devlink);
+ devl_rate_leaf_destroy(devlink_port);
+ devl_unlock(devlink_port->devlink);
+}
+
/**
- * ice_repr_add - add representor for VF
- * @vf: pointer to VF structure
+ * ice_repr_rem - remove representor from VF
+ * @repr: pointer to representor structure
*/
-static int ice_repr_add(struct ice_vf *vf)
+static void ice_repr_rem(struct ice_repr *repr)
+{
+ kfree(repr->q_vector);
+ free_netdev(repr->netdev);
+ kfree(repr);
+}
+
+/**
+ * ice_repr_rem_vf - remove representor from VF
+ * @repr: pointer to representor structure
+ */
+void ice_repr_rem_vf(struct ice_repr *repr)
+{
+ ice_repr_remove_node(&repr->vf->devlink_port);
+ unregister_netdev(repr->netdev);
+ ice_devlink_destroy_vf_port(repr->vf);
+ ice_virtchnl_set_dflt_ops(repr->vf);
+ ice_repr_rem(repr);
+}
+
+static void ice_repr_set_tx_topology(struct ice_pf *pf)
+{
+ struct devlink *devlink;
+
+ /* only export if ADQ and DCB disabled and eswitch enabled*/
+ if (ice_is_adq_active(pf) || ice_is_dcb_active(pf) ||
+ !ice_is_switchdev_running(pf))
+ return;
+
+ devlink = priv_to_devlink(pf);
+ ice_devlink_rate_init_tx_topology(devlink, ice_get_main_vsi(pf));
+}
+
+/**
+ * ice_repr_add - add representor for generic VSI
+ * @pf: pointer to PF structure
+ * @src_vsi: pointer to VSI structure of device to represent
+ * @parent_mac: device MAC address
+ */
+static struct ice_repr *
+ice_repr_add(struct ice_pf *pf, struct ice_vsi *src_vsi, const u8 *parent_mac)
{
struct ice_q_vector *q_vector;
struct ice_netdev_priv *np;
struct ice_repr *repr;
- struct ice_vsi *vsi;
int err;
- vsi = ice_get_vf_vsi(vf);
- if (!vsi)
- return -EINVAL;
-
repr = kzalloc(sizeof(*repr), GFP_KERNEL);
if (!repr)
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
repr->netdev = alloc_etherdev(sizeof(struct ice_netdev_priv));
if (!repr->netdev) {
@@ -304,9 +346,7 @@ static int ice_repr_add(struct ice_vf *vf)
goto err_alloc;
}
- repr->src_vsi = vsi;
- repr->vf = vf;
- vf->repr = repr;
+ repr->src_vsi = src_vsi;
np = netdev_priv(repr->netdev);
np->repr = repr;
@@ -316,10 +356,40 @@ static int ice_repr_add(struct ice_vf *vf)
goto err_alloc_q_vector;
}
repr->q_vector = q_vector;
+ repr->q_id = repr->id;
+
+ ether_addr_copy(repr->parent_mac, parent_mac);
+
+ return repr;
+
+err_alloc_q_vector:
+ free_netdev(repr->netdev);
+err_alloc:
+ kfree(repr);
+ return ERR_PTR(err);
+}
+
+struct ice_repr *ice_repr_add_vf(struct ice_vf *vf)
+{
+ struct ice_repr *repr;
+ struct ice_vsi *vsi;
+ int err;
+
+ vsi = ice_get_vf_vsi(vf);
+ if (!vsi)
+ return ERR_PTR(-ENOENT);
err = ice_devlink_create_vf_port(vf);
if (err)
- goto err_devlink;
+ return ERR_PTR(err);
+
+ repr = ice_repr_add(vf->pf, vsi, vf->hw_lan_addr);
+ if (IS_ERR(repr)) {
+ err = PTR_ERR(repr);
+ goto err_repr_add;
+ }
+
+ repr->vf = vf;
repr->netdev->min_mtu = ETH_MIN_MTU;
repr->netdev->max_mtu = ICE_MAX_MTU;
@@ -331,100 +401,23 @@ static int ice_repr_add(struct ice_vf *vf)
goto err_netdev;
ice_virtchnl_set_repr_ops(vf);
+ ice_repr_set_tx_topology(vf->pf);
- return 0;
+ return repr;
err_netdev:
+ ice_repr_rem(repr);
+err_repr_add:
ice_devlink_destroy_vf_port(vf);
-err_devlink:
- kfree(repr->q_vector);
- vf->repr->q_vector = NULL;
-err_alloc_q_vector:
- free_netdev(repr->netdev);
- repr->netdev = NULL;
-err_alloc:
- kfree(repr);
- vf->repr = NULL;
- return err;
+ return ERR_PTR(err);
}
-/**
- * ice_repr_rem - remove representor from VF
- * @vf: pointer to VF structure
- */
-static void ice_repr_rem(struct ice_vf *vf)
+struct ice_repr *ice_repr_get_by_vsi(struct ice_vsi *vsi)
{
- if (!vf->repr)
- return;
+ if (!vsi->vf)
+ return NULL;
- kfree(vf->repr->q_vector);
- vf->repr->q_vector = NULL;
- unregister_netdev(vf->repr->netdev);
- ice_devlink_destroy_vf_port(vf);
- free_netdev(vf->repr->netdev);
- vf->repr->netdev = NULL;
- kfree(vf->repr);
- vf->repr = NULL;
-
- ice_virtchnl_set_dflt_ops(vf);
-}
-
-/**
- * ice_repr_rem_from_all_vfs - remove port representor for all VFs
- * @pf: pointer to PF structure
- */
-void ice_repr_rem_from_all_vfs(struct ice_pf *pf)
-{
- struct devlink *devlink;
- struct ice_vf *vf;
- unsigned int bkt;
-
- lockdep_assert_held(&pf->vfs.table_lock);
-
- ice_for_each_vf(pf, bkt, vf)
- ice_repr_rem(vf);
-
- /* since all port representors are destroyed, there is
- * no point in keeping the nodes
- */
- devlink = priv_to_devlink(pf);
- devl_lock(devlink);
- devl_rate_nodes_destroy(devlink);
- devl_unlock(devlink);
-}
-
-/**
- * ice_repr_add_for_all_vfs - add port representor for all VFs
- * @pf: pointer to PF structure
- */
-int ice_repr_add_for_all_vfs(struct ice_pf *pf)
-{
- struct devlink *devlink;
- struct ice_vf *vf;
- unsigned int bkt;
- int err;
-
- lockdep_assert_held(&pf->vfs.table_lock);
-
- ice_for_each_vf(pf, bkt, vf) {
- err = ice_repr_add(vf);
- if (err)
- goto err;
- }
-
- /* only export if ADQ and DCB disabled */
- if (ice_is_adq_active(pf) || ice_is_dcb_active(pf))
- return 0;
-
- devlink = priv_to_devlink(pf);
- ice_devlink_rate_init_tx_topology(devlink, ice_get_main_vsi(pf));
-
- return 0;
-
-err:
- ice_repr_rem_from_all_vfs(pf);
-
- return err;
+ return xa_load(&vsi->back->eswitch.reprs, vsi->vf->repr_id);
}
/**
diff --git a/drivers/net/ethernet/intel/ice/ice_repr.h b/drivers/net/ethernet/intel/ice/ice_repr.h
index e1ee2d2..f9aede3 100644
--- a/drivers/net/ethernet/intel/ice/ice_repr.h
+++ b/drivers/net/ethernet/intel/ice/ice_repr.h
@@ -13,14 +13,17 @@ struct ice_repr {
struct net_device *netdev;
struct metadata_dst *dst;
struct ice_esw_br_port *br_port;
+ int q_id;
+ u32 id;
+ u8 parent_mac[ETH_ALEN];
#ifdef CONFIG_ICE_SWITCHDEV
/* info about slow path rule */
struct ice_rule_query_data sp_rule;
#endif
};
-int ice_repr_add_for_all_vfs(struct ice_pf *pf);
-void ice_repr_rem_from_all_vfs(struct ice_pf *pf);
+struct ice_repr *ice_repr_add_vf(struct ice_vf *vf);
+void ice_repr_rem_vf(struct ice_repr *repr);
void ice_repr_start_tx_queues(struct ice_repr *repr);
void ice_repr_stop_tx_queues(struct ice_repr *repr);
@@ -29,4 +32,6 @@ void ice_repr_set_traffic_vsi(struct ice_repr *repr, struct ice_vsi *vsi);
struct ice_repr *ice_netdev_to_repr(struct net_device *netdev);
bool ice_is_port_repr_netdev(const struct net_device *netdev);
+
+struct ice_repr *ice_repr_get_by_vsi(struct ice_vsi *vsi);
#endif
diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.c b/drivers/net/ethernet/intel/ice/ice_sriov.c
index 2a5e661..5a45bd5 100644
--- a/drivers/net/ethernet/intel/ice/ice_sriov.c
+++ b/drivers/net/ethernet/intel/ice/ice_sriov.c
@@ -172,13 +172,14 @@ void ice_free_vfs(struct ice_pf *pf)
else
dev_warn(dev, "VFs are assigned - not disabling SR-IOV\n");
- mutex_lock(&vfs->table_lock);
+ ice_eswitch_reserve_cp_queues(pf, -ice_get_num_vfs(pf));
- ice_eswitch_release(pf);
+ mutex_lock(&vfs->table_lock);
ice_for_each_vf(pf, bkt, vf) {
mutex_lock(&vf->cfg_lock);
+ ice_eswitch_detach(pf, vf);
ice_dis_vf_qs(vf);
if (test_bit(ICE_VF_STATE_INIT, vf->vf_states)) {
@@ -614,6 +615,14 @@ static int ice_start_vfs(struct ice_pf *pf)
goto teardown;
}
+ retval = ice_eswitch_attach(pf, vf);
+ if (retval) {
+ dev_err(ice_pf_to_dev(pf), "Failed to attach VF %d to eswitch, error %d",
+ vf->vf_id, retval);
+ ice_vf_vsi_release(vf);
+ goto teardown;
+ }
+
set_bit(ICE_VF_STATE_INIT, vf->vf_states);
ice_ena_vf_mappings(vf);
wr32(hw, VFGEN_RSTAT(vf->vf_id), VIRTCHNL_VFR_VFACTIVE);
@@ -923,6 +932,7 @@ static int ice_ena_vfs(struct ice_pf *pf, u16 num_vfs)
goto err_unroll_sriov;
}
+ ice_eswitch_reserve_cp_queues(pf, num_vfs);
ret = ice_start_vfs(pf);
if (ret) {
dev_err(dev, "Failed to start %d VFs, err %d\n", num_vfs, ret);
@@ -932,12 +942,6 @@ static int ice_ena_vfs(struct ice_pf *pf, u16 num_vfs)
clear_bit(ICE_VF_DIS, pf->state);
- ret = ice_eswitch_configure(pf);
- if (ret) {
- dev_err(dev, "Failed to configure eswitch, err %d\n", ret);
- goto err_unroll_sriov;
- }
-
/* rearm global interrupts */
if (test_and_clear_bit(ICE_OICR_INTR_DIS, pf->state))
ice_irq_dynamic_ena(hw, NULL, NULL);
diff --git a/drivers/net/ethernet/intel/ice/ice_tc_lib.c b/drivers/net/ethernet/intel/ice/ice_tc_lib.c
index dd03cb6..08d3bbf 100644
--- a/drivers/net/ethernet/intel/ice/ice_tc_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_tc_lib.c
@@ -653,7 +653,7 @@ static int ice_tc_setup_redirect_action(struct net_device *filter_dev,
ice_tc_is_dev_uplink(target_dev)) {
repr = ice_netdev_to_repr(filter_dev);
- fltr->dest_vsi = repr->src_vsi->back->switchdev.uplink_vsi;
+ fltr->dest_vsi = repr->src_vsi->back->eswitch.uplink_vsi;
fltr->direction = ICE_ESWITCH_FLTR_EGRESS;
} else if (ice_tc_is_dev_uplink(filter_dev) &&
ice_is_port_repr_netdev(target_dev)) {
@@ -765,7 +765,7 @@ ice_eswitch_add_tc_fltr(struct ice_vsi *vsi, struct ice_tc_flower_fltr *fltr)
rule_info.sw_act.src = hw->pf_id;
rule_info.flags_info.act = ICE_SINGLE_ACT_LB_ENABLE;
} else if (fltr->direction == ICE_ESWITCH_FLTR_EGRESS &&
- fltr->dest_vsi == vsi->back->switchdev.uplink_vsi) {
+ fltr->dest_vsi == vsi->back->eswitch.uplink_vsi) {
/* VF to Uplink */
rule_info.sw_act.flag |= ICE_FLTR_TX;
rule_info.sw_act.src = vsi->idx;
diff --git a/drivers/net/ethernet/intel/ice/ice_vf_lib.c b/drivers/net/ethernet/intel/ice/ice_vf_lib.c
index aca1f2e..d2a99a2 100644
--- a/drivers/net/ethernet/intel/ice/ice_vf_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_vf_lib.c
@@ -760,6 +760,7 @@ void ice_reset_all_vfs(struct ice_pf *pf)
ice_for_each_vf(pf, bkt, vf) {
mutex_lock(&vf->cfg_lock);
+ ice_eswitch_detach(pf, vf);
vf->driver_caps = 0;
ice_vc_set_default_allowlist(vf);
@@ -775,13 +776,11 @@ void ice_reset_all_vfs(struct ice_pf *pf)
ice_vf_rebuild_vsi(vf);
ice_vf_post_vsi_rebuild(vf);
+ ice_eswitch_attach(pf, vf);
+
mutex_unlock(&vf->cfg_lock);
}
- if (ice_is_eswitch_mode_switchdev(pf))
- if (ice_eswitch_rebuild(pf))
- dev_warn(dev, "eswitch rebuild failed\n");
-
ice_flush(hw);
clear_bit(ICE_VF_DIS, pf->state);
@@ -928,7 +927,7 @@ int ice_reset_vf(struct ice_vf *vf, u32 flags)
goto out_unlock;
}
- ice_eswitch_update_repr(vsi);
+ ice_eswitch_update_repr(vf->repr_id, vsi);
/* if the VF has been reset allow it to come up again */
ice_mbx_clear_malvf(&vf->mbx_info);
diff --git a/drivers/net/ethernet/intel/ice/ice_vf_lib.h b/drivers/net/ethernet/intel/ice/ice_vf_lib.h
index 93c774f..3586655 100644
--- a/drivers/net/ethernet/intel/ice/ice_vf_lib.h
+++ b/drivers/net/ethernet/intel/ice/ice_vf_lib.h
@@ -130,7 +130,7 @@ struct ice_vf {
struct ice_mdd_vf_events mdd_tx_events;
DECLARE_BITMAP(opcodes_allowlist, VIRTCHNL_OP_MAX);
- struct ice_repr *repr;
+ unsigned long repr_id;
const struct ice_virtchnl_ops *virtchnl_ops;
const struct ice_vf_ops *vf_ops;
diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h
index f48f82d..ac7c861 100644
--- a/drivers/net/ethernet/intel/igc/igc.h
+++ b/drivers/net/ethernet/intel/igc/igc.h
@@ -81,6 +81,21 @@ struct igc_tx_timestamp_request {
u32 flags; /* flags that should be added to the tx_buffer */
};
+struct igc_inline_rx_tstamps {
+ /* Timestamps are saved in little endian at the beginning of the packet
+ * buffer following the layout:
+ *
+ * DWORD: | 0 | 1 | 2 | 3 |
+ * Field: | Timer1 SYSTIML | Timer1 SYSTIMH | Timer0 SYSTIML | Timer0 SYSTIMH |
+ *
+ * SYSTIML holds the nanoseconds part while SYSTIMH holds the seconds
+ * part of the timestamp.
+ *
+ */
+ __le32 timer1[2];
+ __le32 timer0[2];
+};
+
struct igc_ring_container {
struct igc_ring *ring; /* pointer to linked list of rings */
unsigned int total_bytes; /* total bytes processed this int */
@@ -261,6 +276,8 @@ struct igc_adapter {
unsigned int ptp_flags;
/* System time value lock */
spinlock_t tmreg_lock;
+ /* Free-running timer lock */
+ spinlock_t free_timer_lock;
struct cyclecounter cc;
struct timecounter tc;
struct timespec64 prev_ptp_time; /* Pre-reset PTP clock */
@@ -469,6 +486,8 @@ enum igc_tx_flags {
IGC_TX_FLAGS_TSTAMP_1 = 0x100,
IGC_TX_FLAGS_TSTAMP_2 = 0x200,
IGC_TX_FLAGS_TSTAMP_3 = 0x400,
+
+ IGC_TX_FLAGS_TSTAMP_TIMER_1 = 0x800,
};
enum igc_boards {
@@ -531,7 +550,7 @@ struct igc_rx_buffer {
struct igc_xdp_buff {
struct xdp_buff xdp;
union igc_adv_rx_desc *rx_desc;
- ktime_t rx_ts; /* data indication bit IGC_RXDADV_STAT_TSIP */
+ struct igc_inline_rx_tstamps *rx_ts; /* data indication bit IGC_RXDADV_STAT_TSIP */
};
struct igc_q_vector {
diff --git a/drivers/net/ethernet/intel/igc/igc_base.h b/drivers/net/ethernet/intel/igc/igc_base.h
index f7d6491d..bf8cdfb 100644
--- a/drivers/net/ethernet/intel/igc/igc_base.h
+++ b/drivers/net/ethernet/intel/igc/igc_base.h
@@ -37,6 +37,10 @@ struct igc_adv_tx_context_desc {
#define IGC_ADVTXD_TSTAMP_REG_1 0x00010000 /* Select register 1 for timestamp */
#define IGC_ADVTXD_TSTAMP_REG_2 0x00020000 /* Select register 2 for timestamp */
#define IGC_ADVTXD_TSTAMP_REG_3 0x00030000 /* Select register 3 for timestamp */
+#define IGC_ADVTXD_TSTAMP_TIMER_1 0x00010000 /* Select timer 1 for timestamp */
+#define IGC_ADVTXD_TSTAMP_TIMER_2 0x00020000 /* Select timer 2 for timestamp */
+#define IGC_ADVTXD_TSTAMP_TIMER_3 0x00030000 /* Select timer 3 for timestamp */
+
#define IGC_ADVTXD_DTYP_CTXT 0x00200000 /* Advanced Context Descriptor */
#define IGC_ADVTXD_DTYP_DATA 0x00300000 /* Advanced Data Descriptor */
#define IGC_ADVTXD_DCMD_EOP 0x01000000 /* End of Packet */
diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h
index b303701..5f92b3c 100644
--- a/drivers/net/ethernet/intel/igc/igc_defines.h
+++ b/drivers/net/ethernet/intel/igc/igc_defines.h
@@ -317,6 +317,8 @@
#define IGC_TXD_CMD_TSE 0x04000000 /* TCP Seg enable */
#define IGC_TXD_EXTCMD_TSTAMP 0x00000010 /* IEEE1588 Timestamp packet */
+#define IGC_TXD_PTP2_TIMER_1 0x00000020
+
/* IPSec Encrypt Enable */
#define IGC_ADVTXD_L4LEN_SHIFT 8 /* Adv ctxt L4LEN shift */
#define IGC_ADVTXD_MSS_SHIFT 16 /* Adv ctxt MSS shift */
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index e9bb403..61db1d3 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -1299,14 +1299,16 @@ static void igc_tx_olinfo_status(struct igc_ring *tx_ring,
u32 olinfo_status = paylen << IGC_ADVTXD_PAYLEN_SHIFT;
/* insert L4 checksum */
- olinfo_status |= (tx_flags & IGC_TX_FLAGS_CSUM) *
- ((IGC_TXD_POPTS_TXSM << 8) /
- IGC_TX_FLAGS_CSUM);
+ olinfo_status |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_CSUM,
+ (IGC_TXD_POPTS_TXSM << 8));
/* insert IPv4 checksum */
- olinfo_status |= (tx_flags & IGC_TX_FLAGS_IPV4) *
- (((IGC_TXD_POPTS_IXSM << 8)) /
- IGC_TX_FLAGS_IPV4);
+ olinfo_status |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_IPV4,
+ (IGC_TXD_POPTS_IXSM << 8));
+
+ /* Use the second timer (free running, in general) for the timestamp */
+ olinfo_status |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_TSTAMP_TIMER_1,
+ IGC_TXD_PTP2_TIMER_1);
tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
}
@@ -1651,6 +1653,8 @@ static netdev_tx_t igc_xmit_frame_ring(struct sk_buff *skb,
if (igc_request_tx_tstamp(adapter, skb, &tstamp_flags)) {
skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
tx_flags |= IGC_TX_FLAGS_TSTAMP | tstamp_flags;
+ if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP_USE_CYCLES)
+ tx_flags |= IGC_TX_FLAGS_TSTAMP_TIMER_1;
} else {
adapter->tx_hwtstamp_skipped++;
}
@@ -1963,9 +1967,9 @@ static struct sk_buff *igc_build_skb(struct igc_ring *rx_ring,
static struct sk_buff *igc_construct_skb(struct igc_ring *rx_ring,
struct igc_rx_buffer *rx_buffer,
- struct xdp_buff *xdp,
- ktime_t timestamp)
+ struct igc_xdp_buff *ctx)
{
+ struct xdp_buff *xdp = &ctx->xdp;
unsigned int metasize = xdp->data - xdp->data_meta;
unsigned int size = xdp->data_end - xdp->data;
unsigned int truesize = igc_get_rx_frame_truesize(rx_ring, size);
@@ -1982,8 +1986,10 @@ static struct sk_buff *igc_construct_skb(struct igc_ring *rx_ring,
if (unlikely(!skb))
return NULL;
- if (timestamp)
- skb_hwtstamps(skb)->hwtstamp = timestamp;
+ if (ctx->rx_ts) {
+ skb_shinfo(skb)->tx_flags |= SKBTX_HW_TSTAMP_NETDEV;
+ skb_hwtstamps(skb)->netdev_data = ctx->rx_ts;
+ }
/* Determine available headroom for copy */
headlen = size;
@@ -2583,11 +2589,10 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget)
int xdp_status = 0, rx_buffer_pgcnt;
while (likely(total_packets < budget)) {
- union igc_adv_rx_desc *rx_desc;
+ struct igc_xdp_buff ctx = { .rx_ts = NULL };
struct igc_rx_buffer *rx_buffer;
+ union igc_adv_rx_desc *rx_desc;
unsigned int size, truesize;
- struct igc_xdp_buff ctx;
- ktime_t timestamp = 0;
int pkt_offset = 0;
void *pktbuf;
@@ -2614,9 +2619,7 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget)
pktbuf = page_address(rx_buffer->page) + rx_buffer->page_offset;
if (igc_test_staterr(rx_desc, IGC_RXDADV_STAT_TSIP)) {
- timestamp = igc_ptp_rx_pktstamp(q_vector->adapter,
- pktbuf);
- ctx.rx_ts = timestamp;
+ ctx.rx_ts = pktbuf;
pkt_offset = IGC_TS_HDR_LEN;
size -= IGC_TS_HDR_LEN;
}
@@ -2653,8 +2656,7 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget)
else if (ring_uses_build_skb(rx_ring))
skb = igc_build_skb(rx_ring, rx_buffer, &ctx.xdp);
else
- skb = igc_construct_skb(rx_ring, rx_buffer, &ctx.xdp,
- timestamp);
+ skb = igc_construct_skb(rx_ring, rx_buffer, &ctx);
/* exit if we failed to retrieve a buffer */
if (!skb) {
@@ -2803,9 +2805,7 @@ static int igc_clean_rx_irq_zc(struct igc_q_vector *q_vector, const int budget)
ctx->rx_desc = desc;
if (igc_test_staterr(desc, IGC_RXDADV_STAT_TSIP)) {
- timestamp = igc_ptp_rx_pktstamp(q_vector->adapter,
- bi->xdp->data);
- ctx->rx_ts = timestamp;
+ ctx->rx_ts = bi->xdp->data;
bi->xdp->data += IGC_TS_HDR_LEN;
@@ -6562,6 +6562,24 @@ int igc_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags)
return 0;
}
+static ktime_t igc_get_tstamp(struct net_device *dev,
+ const struct skb_shared_hwtstamps *hwtstamps,
+ bool cycles)
+{
+ struct igc_adapter *adapter = netdev_priv(dev);
+ struct igc_inline_rx_tstamps *tstamp;
+ ktime_t timestamp;
+
+ tstamp = hwtstamps->netdev_data;
+
+ if (cycles)
+ timestamp = igc_ptp_rx_pktstamp(adapter, tstamp->timer1);
+ else
+ timestamp = igc_ptp_rx_pktstamp(adapter, tstamp->timer0);
+
+ return timestamp;
+}
+
static const struct net_device_ops igc_netdev_ops = {
.ndo_open = igc_open,
.ndo_stop = igc_close,
@@ -6579,6 +6597,7 @@ static const struct net_device_ops igc_netdev_ops = {
.ndo_bpf = igc_bpf,
.ndo_xdp_xmit = igc_xdp_xmit,
.ndo_xsk_wakeup = igc_xsk_wakeup,
+ .ndo_get_tstamp = igc_get_tstamp,
};
/* PCIe configuration access */
@@ -6682,9 +6701,11 @@ static int igc_xdp_rx_hash(const struct xdp_md *_ctx, u32 *hash,
static int igc_xdp_rx_timestamp(const struct xdp_md *_ctx, u64 *timestamp)
{
const struct igc_xdp_buff *ctx = (void *)_ctx;
+ struct igc_adapter *adapter = netdev_priv(ctx->xdp.rxq->dev);
+ struct igc_inline_rx_tstamps *tstamp = ctx->rx_ts;
if (igc_test_staterr(ctx->rx_desc, IGC_RXDADV_STAT_TSIP)) {
- *timestamp = ctx->rx_ts;
+ *timestamp = igc_ptp_rx_pktstamp(adapter, tstamp->timer0);
return 0;
}
diff --git a/drivers/net/ethernet/intel/igc/igc_ptp.c b/drivers/net/ethernet/intel/igc/igc_ptp.c
index 928f387..885faaa 100644
--- a/drivers/net/ethernet/intel/igc/igc_ptp.c
+++ b/drivers/net/ethernet/intel/igc/igc_ptp.c
@@ -459,12 +459,10 @@ static int igc_ptp_systim_to_hwtstamp(struct igc_adapter *adapter,
/**
* igc_ptp_rx_pktstamp - Retrieve timestamp from Rx packet buffer
* @adapter: Pointer to adapter the packet buffer belongs to
- * @buf: Pointer to packet buffer
+ * @buf: Pointer to start of timestamp in HW format (2 32-bit words)
*
- * This function retrieves the timestamp saved in the beginning of packet
- * buffer. While two timestamps are available, one in timer0 reference and the
- * other in timer1 reference, this function considers only the timestamp in
- * timer0 reference.
+ * This function retrieves and converts the timestamp stored at @buf
+ * to ktime_t, adjusting for hardware latencies.
*
* Returns timestamp value.
*/
@@ -474,17 +472,8 @@ ktime_t igc_ptp_rx_pktstamp(struct igc_adapter *adapter, __le32 *buf)
u32 secs, nsecs;
int adjust;
- /* Timestamps are saved in little endian at the beginning of the packet
- * buffer following the layout:
- *
- * DWORD: | 0 | 1 | 2 | 3 |
- * Field: | Timer1 SYSTIML | Timer1 SYSTIMH | Timer0 SYSTIML | Timer0 SYSTIMH |
- *
- * SYSTIML holds the nanoseconds part while SYSTIMH holds the seconds
- * part of the timestamp.
- */
- nsecs = le32_to_cpu(buf[2]);
- secs = le32_to_cpu(buf[3]);
+ nsecs = le32_to_cpu(buf[0]);
+ secs = le32_to_cpu(buf[1]);
timestamp = ktime_set(secs, nsecs);
@@ -542,10 +531,11 @@ static void igc_ptp_enable_rx_timestamp(struct igc_adapter *adapter)
for (i = 0; i < adapter->num_rx_queues; i++) {
val = rd32(IGC_SRRCTL(i));
- /* FIXME: For now, only support retrieving RX timestamps from
- * timer 0.
+ /* Enable retrieving timestamps from timer 0, the
+ * "adjustable clock" and timer 1 the "free running
+ * clock".
*/
- val |= IGC_SRRCTL_TIMER1SEL(0) | IGC_SRRCTL_TIMER0SEL(0) |
+ val |= IGC_SRRCTL_TIMER1SEL(1) | IGC_SRRCTL_TIMER0SEL(0) |
IGC_SRRCTL_TIMESTAMP;
wr32(IGC_SRRCTL(i), val);
}
@@ -1035,6 +1025,26 @@ static int igc_ptp_getcrosststamp(struct ptp_clock_info *ptp,
adapter, &adapter->snapshot, cts);
}
+static int igc_ptp_getcyclesx64(struct ptp_clock_info *ptp,
+ struct timespec64 *ts,
+ struct ptp_system_timestamp *sts)
+{
+ struct igc_adapter *igc = container_of(ptp, struct igc_adapter, ptp_caps);
+ struct igc_hw *hw = &igc->hw;
+ unsigned long flags;
+
+ spin_lock_irqsave(&igc->free_timer_lock, flags);
+
+ ptp_read_system_prets(sts);
+ ts->tv_nsec = rd32(IGC_SYSTIML_1);
+ ts->tv_sec = rd32(IGC_SYSTIMH_1);
+ ptp_read_system_postts(sts);
+
+ spin_unlock_irqrestore(&igc->free_timer_lock, flags);
+
+ return 0;
+}
+
/**
* igc_ptp_init - Initialize PTP functionality
* @adapter: Board private structure
@@ -1088,6 +1098,7 @@ void igc_ptp_init(struct igc_adapter *adapter)
adapter->ptp_caps.adjfine = igc_ptp_adjfine_i225;
adapter->ptp_caps.adjtime = igc_ptp_adjtime_i225;
adapter->ptp_caps.gettimex64 = igc_ptp_gettimex64_i225;
+ adapter->ptp_caps.getcyclesx64 = igc_ptp_getcyclesx64;
adapter->ptp_caps.settime64 = igc_ptp_settime_i225;
adapter->ptp_caps.enable = igc_ptp_feature_enable_i225;
adapter->ptp_caps.pps = 1;
@@ -1108,6 +1119,7 @@ void igc_ptp_init(struct igc_adapter *adapter)
}
spin_lock_init(&adapter->ptp_tx_lock);
+ spin_lock_init(&adapter->free_timer_lock);
spin_lock_init(&adapter->tmreg_lock);
adapter->tstamp_config.rx_filter = HWTSTAMP_FILTER_NONE;
diff --git a/drivers/net/ethernet/intel/igc/igc_regs.h b/drivers/net/ethernet/intel/igc/igc_regs.h
index 20e17f5..d38c87d 100644
--- a/drivers/net/ethernet/intel/igc/igc_regs.h
+++ b/drivers/net/ethernet/intel/igc/igc_regs.h
@@ -243,6 +243,11 @@
#define IGC_SYSTIMR 0x0B6F8 /* System time register Residue */
#define IGC_TIMINCA 0x0B608 /* Increment attributes register - RW */
+#define IGC_SYSTIML_1 0x0B688 /* System time register Low - RO (timer 1) */
+#define IGC_SYSTIMH_1 0x0B68C /* System time register High - RO (timer 1) */
+#define IGC_SYSTIMR_1 0x0B684 /* System time register Residue (timer 1) */
+#define IGC_TIMINCA_1 0x0B690 /* Increment attributes register - RW (timer 1) */
+
/* TX Timestamp Low */
#define IGC_TXSTMPL_0 0x0B618
#define IGC_TXSTMPL_1 0x0B698
diff --git a/drivers/net/ethernet/marvell/octeon_ep/Makefile b/drivers/net/ethernet/marvell/octeon_ep/Makefile
index 2026c81..02a4a21 100644
--- a/drivers/net/ethernet/marvell/octeon_ep/Makefile
+++ b/drivers/net/ethernet/marvell/octeon_ep/Makefile
@@ -6,4 +6,5 @@
obj-$(CONFIG_OCTEON_EP) += octeon_ep.o
octeon_ep-y := octep_main.o octep_cn9k_pf.o octep_tx.o octep_rx.o \
- octep_ethtool.o octep_ctrl_mbox.o octep_ctrl_net.o
+ octep_ethtool.o octep_ctrl_mbox.o octep_ctrl_net.o \
+ octep_cnxk_pf.o
diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_cnxk_pf.c b/drivers/net/ethernet/marvell/octeon_ep/octep_cnxk_pf.c
new file mode 100644
index 0000000..abb03e9
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeon_ep/octep_cnxk_pf.c
@@ -0,0 +1,886 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Marvell Octeon EP (EndPoint) Ethernet Driver
+ *
+ * Copyright (C) 2020 Marvell.
+ *
+ */
+
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+
+#include "octep_config.h"
+#include "octep_main.h"
+#include "octep_regs_cnxk_pf.h"
+
+/* We will support 128 pf's in control mbox */
+#define CTRL_MBOX_MAX_PF 128
+#define CTRL_MBOX_SZ ((size_t)(0x400000 / CTRL_MBOX_MAX_PF))
+
+/* Names of Hardware non-queue generic interrupts */
+static char *cnxk_non_ioq_msix_names[] = {
+ "epf_ire_rint",
+ "epf_ore_rint",
+ "epf_vfire_rint",
+ "epf_rsvd0",
+ "epf_vfore_rint",
+ "epf_rsvd1",
+ "epf_mbox_rint",
+ "epf_rsvd2_0",
+ "epf_rsvd2_1",
+ "epf_dma_rint",
+ "epf_dma_vf_rint",
+ "epf_rsvd3",
+ "epf_pp_vf_rint",
+ "epf_rsvd3",
+ "epf_misc_rint",
+ "epf_rsvd5",
+ /* Next 16 are for OEI_RINT */
+ "epf_oei_rint0",
+ "epf_oei_rint1",
+ "epf_oei_rint2",
+ "epf_oei_rint3",
+ "epf_oei_rint4",
+ "epf_oei_rint5",
+ "epf_oei_rint6",
+ "epf_oei_rint7",
+ "epf_oei_rint8",
+ "epf_oei_rint9",
+ "epf_oei_rint10",
+ "epf_oei_rint11",
+ "epf_oei_rint12",
+ "epf_oei_rint13",
+ "epf_oei_rint14",
+ "epf_oei_rint15",
+ /* IOQ interrupt */
+ "octeon_ep"
+};
+
+/* Dump useful hardware CSRs for debug purpose */
+static void cnxk_dump_regs(struct octep_device *oct, int qno)
+{
+ struct device *dev = &oct->pdev->dev;
+
+ dev_info(dev, "IQ-%d register dump\n", qno);
+ dev_info(dev, "R[%d]_IN_INSTR_DBELL[0x%llx]: 0x%016llx\n",
+ qno, CNXK_SDP_R_IN_INSTR_DBELL(qno),
+ octep_read_csr64(oct, CNXK_SDP_R_IN_INSTR_DBELL(qno)));
+ dev_info(dev, "R[%d]_IN_CONTROL[0x%llx]: 0x%016llx\n",
+ qno, CNXK_SDP_R_IN_CONTROL(qno),
+ octep_read_csr64(oct, CNXK_SDP_R_IN_CONTROL(qno)));
+ dev_info(dev, "R[%d]_IN_ENABLE[0x%llx]: 0x%016llx\n",
+ qno, CNXK_SDP_R_IN_ENABLE(qno),
+ octep_read_csr64(oct, CNXK_SDP_R_IN_ENABLE(qno)));
+ dev_info(dev, "R[%d]_IN_INSTR_BADDR[0x%llx]: 0x%016llx\n",
+ qno, CNXK_SDP_R_IN_INSTR_BADDR(qno),
+ octep_read_csr64(oct, CNXK_SDP_R_IN_INSTR_BADDR(qno)));
+ dev_info(dev, "R[%d]_IN_INSTR_RSIZE[0x%llx]: 0x%016llx\n",
+ qno, CNXK_SDP_R_IN_INSTR_RSIZE(qno),
+ octep_read_csr64(oct, CNXK_SDP_R_IN_INSTR_RSIZE(qno)));
+ dev_info(dev, "R[%d]_IN_CNTS[0x%llx]: 0x%016llx\n",
+ qno, CNXK_SDP_R_IN_CNTS(qno),
+ octep_read_csr64(oct, CNXK_SDP_R_IN_CNTS(qno)));
+ dev_info(dev, "R[%d]_IN_INT_LEVELS[0x%llx]: 0x%016llx\n",
+ qno, CNXK_SDP_R_IN_INT_LEVELS(qno),
+ octep_read_csr64(oct, CNXK_SDP_R_IN_INT_LEVELS(qno)));
+ dev_info(dev, "R[%d]_IN_PKT_CNT[0x%llx]: 0x%016llx\n",
+ qno, CNXK_SDP_R_IN_PKT_CNT(qno),
+ octep_read_csr64(oct, CNXK_SDP_R_IN_PKT_CNT(qno)));
+ dev_info(dev, "R[%d]_IN_BYTE_CNT[0x%llx]: 0x%016llx\n",
+ qno, CNXK_SDP_R_IN_BYTE_CNT(qno),
+ octep_read_csr64(oct, CNXK_SDP_R_IN_BYTE_CNT(qno)));
+
+ dev_info(dev, "OQ-%d register dump\n", qno);
+ dev_info(dev, "R[%d]_OUT_SLIST_DBELL[0x%llx]: 0x%016llx\n",
+ qno, CNXK_SDP_R_OUT_SLIST_DBELL(qno),
+ octep_read_csr64(oct, CNXK_SDP_R_OUT_SLIST_DBELL(qno)));
+ dev_info(dev, "R[%d]_OUT_CONTROL[0x%llx]: 0x%016llx\n",
+ qno, CNXK_SDP_R_OUT_CONTROL(qno),
+ octep_read_csr64(oct, CNXK_SDP_R_OUT_CONTROL(qno)));
+ dev_info(dev, "R[%d]_OUT_ENABLE[0x%llx]: 0x%016llx\n",
+ qno, CNXK_SDP_R_OUT_ENABLE(qno),
+ octep_read_csr64(oct, CNXK_SDP_R_OUT_ENABLE(qno)));
+ dev_info(dev, "R[%d]_OUT_SLIST_BADDR[0x%llx]: 0x%016llx\n",
+ qno, CNXK_SDP_R_OUT_SLIST_BADDR(qno),
+ octep_read_csr64(oct, CNXK_SDP_R_OUT_SLIST_BADDR(qno)));
+ dev_info(dev, "R[%d]_OUT_SLIST_RSIZE[0x%llx]: 0x%016llx\n",
+ qno, CNXK_SDP_R_OUT_SLIST_RSIZE(qno),
+ octep_read_csr64(oct, CNXK_SDP_R_OUT_SLIST_RSIZE(qno)));
+ dev_info(dev, "R[%d]_OUT_CNTS[0x%llx]: 0x%016llx\n",
+ qno, CNXK_SDP_R_OUT_CNTS(qno),
+ octep_read_csr64(oct, CNXK_SDP_R_OUT_CNTS(qno)));
+ dev_info(dev, "R[%d]_OUT_INT_LEVELS[0x%llx]: 0x%016llx\n",
+ qno, CNXK_SDP_R_OUT_INT_LEVELS(qno),
+ octep_read_csr64(oct, CNXK_SDP_R_OUT_INT_LEVELS(qno)));
+ dev_info(dev, "R[%d]_OUT_PKT_CNT[0x%llx]: 0x%016llx\n",
+ qno, CNXK_SDP_R_OUT_PKT_CNT(qno),
+ octep_read_csr64(oct, CNXK_SDP_R_OUT_PKT_CNT(qno)));
+ dev_info(dev, "R[%d]_OUT_BYTE_CNT[0x%llx]: 0x%016llx\n",
+ qno, CNXK_SDP_R_OUT_BYTE_CNT(qno),
+ octep_read_csr64(oct, CNXK_SDP_R_OUT_BYTE_CNT(qno)));
+ dev_info(dev, "R[%d]_ERR_TYPE[0x%llx]: 0x%016llx\n",
+ qno, CNXK_SDP_R_ERR_TYPE(qno),
+ octep_read_csr64(oct, CNXK_SDP_R_ERR_TYPE(qno)));
+}
+
+/* Reset Hardware Tx queue */
+static int cnxk_reset_iq(struct octep_device *oct, int q_no)
+{
+ struct octep_config *conf = oct->conf;
+ u64 val = 0ULL;
+
+ dev_dbg(&oct->pdev->dev, "Reset PF IQ-%d\n", q_no);
+
+ /* Get absolute queue number */
+ q_no += conf->pf_ring_cfg.srn;
+
+ /* Disable the Tx/Instruction Ring */
+ octep_write_csr64(oct, CNXK_SDP_R_IN_ENABLE(q_no), val);
+
+ /* clear the Instruction Ring packet/byte counts and doorbell CSRs */
+ octep_write_csr64(oct, CNXK_SDP_R_IN_CNTS(q_no), val);
+ octep_write_csr64(oct, CNXK_SDP_R_IN_INT_LEVELS(q_no), val);
+ octep_write_csr64(oct, CNXK_SDP_R_IN_PKT_CNT(q_no), val);
+ octep_write_csr64(oct, CNXK_SDP_R_IN_BYTE_CNT(q_no), val);
+ octep_write_csr64(oct, CNXK_SDP_R_IN_INSTR_BADDR(q_no), val);
+ octep_write_csr64(oct, CNXK_SDP_R_IN_INSTR_RSIZE(q_no), val);
+
+ val = 0xFFFFFFFF;
+ octep_write_csr64(oct, CNXK_SDP_R_IN_INSTR_DBELL(q_no), val);
+
+ return 0;
+}
+
+/* Reset Hardware Rx queue */
+static void cnxk_reset_oq(struct octep_device *oct, int q_no)
+{
+ u64 val = 0ULL;
+
+ q_no += CFG_GET_PORTS_PF_SRN(oct->conf);
+
+ /* Disable Output (Rx) Ring */
+ octep_write_csr64(oct, CNXK_SDP_R_OUT_ENABLE(q_no), val);
+ octep_write_csr64(oct, CNXK_SDP_R_OUT_SLIST_BADDR(q_no), val);
+ octep_write_csr64(oct, CNXK_SDP_R_OUT_SLIST_RSIZE(q_no), val);
+ octep_write_csr64(oct, CNXK_SDP_R_OUT_INT_LEVELS(q_no), val);
+
+ /* Clear count CSRs */
+ val = octep_read_csr(oct, CNXK_SDP_R_OUT_CNTS(q_no));
+ octep_write_csr(oct, CNXK_SDP_R_OUT_CNTS(q_no), val);
+
+ octep_write_csr64(oct, CNXK_SDP_R_OUT_PKT_CNT(q_no), 0xFFFFFFFFFULL);
+ octep_write_csr64(oct, CNXK_SDP_R_OUT_SLIST_DBELL(q_no), 0xFFFFFFFF);
+}
+
+/* Reset all hardware Tx/Rx queues */
+static void octep_reset_io_queues_cnxk_pf(struct octep_device *oct)
+{
+ struct pci_dev *pdev = oct->pdev;
+ int q;
+
+ dev_dbg(&pdev->dev, "Reset OCTEP_CNXK PF IO Queues\n");
+
+ for (q = 0; q < CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); q++) {
+ cnxk_reset_iq(oct, q);
+ cnxk_reset_oq(oct, q);
+ }
+}
+
+/* Initialize windowed addresses to access some hardware registers */
+static void octep_setup_pci_window_regs_cnxk_pf(struct octep_device *oct)
+{
+ u8 __iomem *bar0_pciaddr = oct->mmio[0].hw_addr;
+
+ oct->pci_win_regs.pci_win_wr_addr = (u8 __iomem *)(bar0_pciaddr + CNXK_SDP_WIN_WR_ADDR64);
+ oct->pci_win_regs.pci_win_rd_addr = (u8 __iomem *)(bar0_pciaddr + CNXK_SDP_WIN_RD_ADDR64);
+ oct->pci_win_regs.pci_win_wr_data = (u8 __iomem *)(bar0_pciaddr + CNXK_SDP_WIN_WR_DATA64);
+ oct->pci_win_regs.pci_win_rd_data = (u8 __iomem *)(bar0_pciaddr + CNXK_SDP_WIN_RD_DATA64);
+}
+
+/* Configure Hardware mapping: inform hardware which rings belong to PF. */
+static void octep_configure_ring_mapping_cnxk_pf(struct octep_device *oct)
+{
+ struct octep_config *conf = oct->conf;
+ struct pci_dev *pdev = oct->pdev;
+ u64 pf_srn = CFG_GET_PORTS_PF_SRN(oct->conf);
+ int q;
+
+ for (q = 0; q < CFG_GET_PORTS_ACTIVE_IO_RINGS(conf); q++) {
+ u64 regval = 0;
+
+ if (oct->pcie_port)
+ regval = 8 << CNXK_SDP_FUNC_SEL_EPF_BIT_POS;
+
+ octep_write_csr64(oct, CNXK_SDP_EPVF_RING(pf_srn + q), regval);
+
+ regval = octep_read_csr64(oct, CNXK_SDP_EPVF_RING(pf_srn + q));
+ dev_dbg(&pdev->dev, "Write SDP_EPVF_RING[0x%llx] = 0x%llx\n",
+ CNXK_SDP_EPVF_RING(pf_srn + q), regval);
+ }
+}
+
+/* Initialize configuration limits and initial active config */
+static void octep_init_config_cnxk_pf(struct octep_device *oct)
+{
+ struct octep_config *conf = oct->conf;
+ struct pci_dev *pdev = oct->pdev;
+ u8 link = 0;
+ u64 val;
+ int pos;
+
+ /* Read ring configuration:
+ * PF ring count, number of VFs and rings per VF supported
+ */
+ val = octep_read_csr64(oct, CNXK_SDP_EPF_RINFO);
+ dev_info(&pdev->dev, "SDP_EPF_RINFO[0x%x]:0x%llx\n", CNXK_SDP_EPF_RINFO, val);
+ conf->sriov_cfg.max_rings_per_vf = CNXK_SDP_EPF_RINFO_RPVF(val);
+ conf->sriov_cfg.active_rings_per_vf = conf->sriov_cfg.max_rings_per_vf;
+ conf->sriov_cfg.max_vfs = CNXK_SDP_EPF_RINFO_NVFS(val);
+ conf->sriov_cfg.active_vfs = conf->sriov_cfg.max_vfs;
+ conf->sriov_cfg.vf_srn = CNXK_SDP_EPF_RINFO_SRN(val);
+
+ val = octep_read_csr64(oct, CNXK_SDP_MAC_PF_RING_CTL(oct->pcie_port));
+ dev_info(&pdev->dev, "SDP_MAC_PF_RING_CTL[%d]:0x%llx\n", oct->pcie_port, val);
+ conf->pf_ring_cfg.srn = CNXK_SDP_MAC_PF_RING_CTL_SRN(val);
+ conf->pf_ring_cfg.max_io_rings = CNXK_SDP_MAC_PF_RING_CTL_RPPF(val);
+ conf->pf_ring_cfg.active_io_rings = conf->pf_ring_cfg.max_io_rings;
+ dev_info(&pdev->dev, "pf_srn=%u rpvf=%u nvfs=%u rppf=%u\n",
+ conf->pf_ring_cfg.srn, conf->sriov_cfg.active_rings_per_vf,
+ conf->sriov_cfg.active_vfs, conf->pf_ring_cfg.active_io_rings);
+
+ conf->iq.num_descs = OCTEP_IQ_MAX_DESCRIPTORS;
+ conf->iq.instr_type = OCTEP_64BYTE_INSTR;
+ conf->iq.db_min = OCTEP_DB_MIN;
+ conf->iq.intr_threshold = OCTEP_IQ_INTR_THRESHOLD;
+
+ conf->oq.num_descs = OCTEP_OQ_MAX_DESCRIPTORS;
+ conf->oq.buf_size = OCTEP_OQ_BUF_SIZE;
+ conf->oq.refill_threshold = OCTEP_OQ_REFILL_THRESHOLD;
+ conf->oq.oq_intr_pkt = OCTEP_OQ_INTR_PKT_THRESHOLD;
+ conf->oq.oq_intr_time = OCTEP_OQ_INTR_TIME_THRESHOLD;
+
+ conf->msix_cfg.non_ioq_msix = CNXK_NUM_NON_IOQ_INTR;
+ conf->msix_cfg.ioq_msix = conf->pf_ring_cfg.active_io_rings;
+ conf->msix_cfg.non_ioq_msix_names = cnxk_non_ioq_msix_names;
+
+ pos = pci_find_ext_capability(oct->pdev, PCI_EXT_CAP_ID_SRIOV);
+ if (pos) {
+ pci_read_config_byte(oct->pdev,
+ pos + PCI_SRIOV_FUNC_LINK,
+ &link);
+ link = PCI_DEVFN(PCI_SLOT(oct->pdev->devfn), link);
+ }
+ conf->ctrl_mbox_cfg.barmem_addr = (void __iomem *)oct->mmio[2].hw_addr +
+ CNXK_PEM_BAR4_INDEX_OFFSET +
+ (link * CTRL_MBOX_SZ);
+
+ conf->fw_info.hb_interval = OCTEP_DEFAULT_FW_HB_INTERVAL;
+ conf->fw_info.hb_miss_count = OCTEP_DEFAULT_FW_HB_MISS_COUNT;
+}
+
+/* Setup registers for a hardware Tx Queue */
+static void octep_setup_iq_regs_cnxk_pf(struct octep_device *oct, int iq_no)
+{
+ struct octep_iq *iq = oct->iq[iq_no];
+ u32 reset_instr_cnt;
+ u64 reg_val;
+
+ iq_no += CFG_GET_PORTS_PF_SRN(oct->conf);
+ reg_val = octep_read_csr64(oct, CNXK_SDP_R_IN_CONTROL(iq_no));
+
+ /* wait for IDLE to set to 1 */
+ if (!(reg_val & CNXK_R_IN_CTL_IDLE)) {
+ do {
+ reg_val = octep_read_csr64(oct, CNXK_SDP_R_IN_CONTROL(iq_no));
+ } while (!(reg_val & CNXK_R_IN_CTL_IDLE));
+ }
+
+ reg_val |= CNXK_R_IN_CTL_RDSIZE;
+ reg_val |= CNXK_R_IN_CTL_IS_64B;
+ reg_val |= CNXK_R_IN_CTL_ESR;
+ octep_write_csr64(oct, CNXK_SDP_R_IN_CONTROL(iq_no), reg_val);
+
+ /* Write the start of the input queue's ring and its size */
+ octep_write_csr64(oct, CNXK_SDP_R_IN_INSTR_BADDR(iq_no),
+ iq->desc_ring_dma);
+ octep_write_csr64(oct, CNXK_SDP_R_IN_INSTR_RSIZE(iq_no),
+ iq->max_count);
+
+ /* Remember the doorbell & instruction count register addr
+ * for this queue
+ */
+ iq->doorbell_reg = oct->mmio[0].hw_addr +
+ CNXK_SDP_R_IN_INSTR_DBELL(iq_no);
+ iq->inst_cnt_reg = oct->mmio[0].hw_addr +
+ CNXK_SDP_R_IN_CNTS(iq_no);
+ iq->intr_lvl_reg = oct->mmio[0].hw_addr +
+ CNXK_SDP_R_IN_INT_LEVELS(iq_no);
+
+ /* Store the current instruction counter (used in flush_iq calculation) */
+ reset_instr_cnt = readl(iq->inst_cnt_reg);
+ writel(reset_instr_cnt, iq->inst_cnt_reg);
+
+ /* INTR_THRESHOLD is set to max(FFFFFFFF) to disable the INTR */
+ reg_val = CFG_GET_IQ_INTR_THRESHOLD(oct->conf) & 0xffffffff;
+ octep_write_csr64(oct, CNXK_SDP_R_IN_INT_LEVELS(iq_no), reg_val);
+}
+
+/* Setup registers for a hardware Rx Queue */
+static void octep_setup_oq_regs_cnxk_pf(struct octep_device *oct, int oq_no)
+{
+ u64 reg_val;
+ u64 oq_ctl = 0ULL;
+ u32 time_threshold = 0;
+ struct octep_oq *oq = oct->oq[oq_no];
+
+ oq_no += CFG_GET_PORTS_PF_SRN(oct->conf);
+ reg_val = octep_read_csr64(oct, CNXK_SDP_R_OUT_CONTROL(oq_no));
+
+ /* wait for IDLE to set to 1 */
+ if (!(reg_val & CNXK_R_OUT_CTL_IDLE)) {
+ do {
+ reg_val = octep_read_csr64(oct, CNXK_SDP_R_OUT_CONTROL(oq_no));
+ } while (!(reg_val & CNXK_R_OUT_CTL_IDLE));
+ }
+
+ reg_val &= ~(CNXK_R_OUT_CTL_IMODE);
+ reg_val &= ~(CNXK_R_OUT_CTL_ROR_P);
+ reg_val &= ~(CNXK_R_OUT_CTL_NSR_P);
+ reg_val &= ~(CNXK_R_OUT_CTL_ROR_I);
+ reg_val &= ~(CNXK_R_OUT_CTL_NSR_I);
+ reg_val &= ~(CNXK_R_OUT_CTL_ES_I);
+ reg_val &= ~(CNXK_R_OUT_CTL_ROR_D);
+ reg_val &= ~(CNXK_R_OUT_CTL_NSR_D);
+ reg_val &= ~(CNXK_R_OUT_CTL_ES_D);
+ reg_val |= (CNXK_R_OUT_CTL_ES_P);
+
+ octep_write_csr64(oct, CNXK_SDP_R_OUT_CONTROL(oq_no), reg_val);
+ octep_write_csr64(oct, CNXK_SDP_R_OUT_SLIST_BADDR(oq_no),
+ oq->desc_ring_dma);
+ octep_write_csr64(oct, CNXK_SDP_R_OUT_SLIST_RSIZE(oq_no),
+ oq->max_count);
+
+ oq_ctl = octep_read_csr64(oct, CNXK_SDP_R_OUT_CONTROL(oq_no));
+
+ /* Clear the ISIZE and BSIZE (22-0) */
+ oq_ctl &= ~0x7fffffULL;
+
+ /* Populate the BSIZE (15-0) */
+ oq_ctl |= (oq->buffer_size & 0xffff);
+ octep_write_csr64(oct, CNXK_SDP_R_OUT_CONTROL(oq_no), oq_ctl);
+
+ /* Get the mapped address of the pkt_sent and pkts_credit regs */
+ oq->pkts_sent_reg = oct->mmio[0].hw_addr + CNXK_SDP_R_OUT_CNTS(oq_no);
+ oq->pkts_credit_reg = oct->mmio[0].hw_addr +
+ CNXK_SDP_R_OUT_SLIST_DBELL(oq_no);
+
+ time_threshold = CFG_GET_OQ_INTR_TIME(oct->conf);
+ reg_val = ((u64)time_threshold << 32) |
+ CFG_GET_OQ_INTR_PKT(oct->conf);
+ octep_write_csr64(oct, CNXK_SDP_R_OUT_INT_LEVELS(oq_no), reg_val);
+}
+
+/* Setup registers for a PF mailbox */
+static void octep_setup_mbox_regs_cnxk_pf(struct octep_device *oct, int q_no)
+{
+ struct octep_mbox *mbox = oct->mbox[q_no];
+
+ mbox->q_no = q_no;
+
+ /* PF mbox interrupt reg */
+ mbox->mbox_int_reg = oct->mmio[0].hw_addr + CNXK_SDP_EPF_MBOX_RINT(0);
+
+ /* PF to VF DATA reg. PF writes into this reg */
+ mbox->mbox_write_reg = oct->mmio[0].hw_addr + CNXK_SDP_R_MBOX_PF_VF_DATA(q_no);
+
+ /* VF to PF DATA reg. PF reads from this reg */
+ mbox->mbox_read_reg = oct->mmio[0].hw_addr + CNXK_SDP_R_MBOX_VF_PF_DATA(q_no);
+}
+
+/* Poll OEI events like heartbeat */
+static void octep_poll_oei_cnxk_pf(struct octep_device *oct)
+{
+ u64 reg0;
+
+ /* Check for OEI INTR */
+ reg0 = octep_read_csr64(oct, CNXK_SDP_EPF_OEI_RINT);
+ if (reg0) {
+ octep_write_csr64(oct, CNXK_SDP_EPF_OEI_RINT, reg0);
+ if (reg0 & CNXK_SDP_EPF_OEI_RINT_DATA_BIT_MBOX)
+ queue_work(octep_wq, &oct->ctrl_mbox_task);
+ if (reg0 & CNXK_SDP_EPF_OEI_RINT_DATA_BIT_HBEAT)
+ atomic_set(&oct->hb_miss_cnt, 0);
+ }
+}
+
+/* OEI interrupt handler */
+static irqreturn_t octep_oei_intr_handler_cnxk_pf(void *dev)
+{
+ struct octep_device *oct = (struct octep_device *)dev;
+
+ octep_poll_oei_cnxk_pf(oct);
+ return IRQ_HANDLED;
+}
+
+/* Process non-ioq interrupts required to keep pf interface running.
+ * OEI_RINT is needed for control mailbox
+ * MBOX_RINT is needed for pfvf mailbox
+ */
+static void octep_poll_non_ioq_interrupts_cnxk_pf(struct octep_device *oct)
+{
+ octep_poll_oei_cnxk_pf(oct);
+}
+
+/* Interrupt handler for input ring error interrupts. */
+static irqreturn_t octep_ire_intr_handler_cnxk_pf(void *dev)
+{
+ struct octep_device *oct = (struct octep_device *)dev;
+ struct pci_dev *pdev = oct->pdev;
+ u64 reg_val = 0;
+ int i = 0;
+
+ /* Check for IRERR INTR */
+ reg_val = octep_read_csr64(oct, CNXK_SDP_EPF_IRERR_RINT);
+ if (reg_val) {
+ dev_info(&pdev->dev,
+ "received IRERR_RINT intr: 0x%llx\n", reg_val);
+ octep_write_csr64(oct, CNXK_SDP_EPF_IRERR_RINT, reg_val);
+
+ for (i = 0; i < CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); i++) {
+ reg_val = octep_read_csr64(oct,
+ CNXK_SDP_R_ERR_TYPE(i));
+ if (reg_val) {
+ dev_info(&pdev->dev,
+ "Received err type on IQ-%d: 0x%llx\n",
+ i, reg_val);
+ octep_write_csr64(oct, CNXK_SDP_R_ERR_TYPE(i),
+ reg_val);
+ }
+ }
+ }
+ return IRQ_HANDLED;
+}
+
+/* Interrupt handler for output ring error interrupts. */
+static irqreturn_t octep_ore_intr_handler_cnxk_pf(void *dev)
+{
+ struct octep_device *oct = (struct octep_device *)dev;
+ struct pci_dev *pdev = oct->pdev;
+ u64 reg_val = 0;
+ int i = 0;
+
+ /* Check for ORERR INTR */
+ reg_val = octep_read_csr64(oct, CNXK_SDP_EPF_ORERR_RINT);
+ if (reg_val) {
+ dev_info(&pdev->dev,
+ "Received ORERR_RINT intr: 0x%llx\n", reg_val);
+ octep_write_csr64(oct, CNXK_SDP_EPF_ORERR_RINT, reg_val);
+ for (i = 0; i < CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); i++) {
+ reg_val = octep_read_csr64(oct, CNXK_SDP_R_ERR_TYPE(i));
+ if (reg_val) {
+ dev_info(&pdev->dev,
+ "Received err type on OQ-%d: 0x%llx\n",
+ i, reg_val);
+ octep_write_csr64(oct, CNXK_SDP_R_ERR_TYPE(i),
+ reg_val);
+ }
+ }
+ }
+ return IRQ_HANDLED;
+}
+
+/* Interrupt handler for vf input ring error interrupts. */
+static irqreturn_t octep_vfire_intr_handler_cnxk_pf(void *dev)
+{
+ struct octep_device *oct = (struct octep_device *)dev;
+ struct pci_dev *pdev = oct->pdev;
+ u64 reg_val = 0;
+
+ /* Check for VFIRE INTR */
+ reg_val = octep_read_csr64(oct, CNXK_SDP_EPF_VFIRE_RINT(0));
+ if (reg_val) {
+ dev_info(&pdev->dev,
+ "Received VFIRE_RINT intr: 0x%llx\n", reg_val);
+ octep_write_csr64(oct, CNXK_SDP_EPF_VFIRE_RINT(0), reg_val);
+ }
+ return IRQ_HANDLED;
+}
+
+/* Interrupt handler for vf output ring error interrupts. */
+static irqreturn_t octep_vfore_intr_handler_cnxk_pf(void *dev)
+{
+ struct octep_device *oct = (struct octep_device *)dev;
+ struct pci_dev *pdev = oct->pdev;
+ u64 reg_val = 0;
+
+ /* Check for VFORE INTR */
+ reg_val = octep_read_csr64(oct, CNXK_SDP_EPF_VFORE_RINT(0));
+ if (reg_val) {
+ dev_info(&pdev->dev,
+ "Received VFORE_RINT intr: 0x%llx\n", reg_val);
+ octep_write_csr64(oct, CNXK_SDP_EPF_VFORE_RINT(0), reg_val);
+ }
+ return IRQ_HANDLED;
+}
+
+/* Interrupt handler for dpi dma related interrupts. */
+static irqreturn_t octep_dma_intr_handler_cnxk_pf(void *dev)
+{
+ struct octep_device *oct = (struct octep_device *)dev;
+ u64 reg_val = 0;
+
+ /* Check for DMA INTR */
+ reg_val = octep_read_csr64(oct, CNXK_SDP_EPF_DMA_RINT);
+ if (reg_val)
+ octep_write_csr64(oct, CNXK_SDP_EPF_DMA_RINT, reg_val);
+
+ return IRQ_HANDLED;
+}
+
+/* Interrupt handler for dpi dma transaction error interrupts for VFs */
+static irqreturn_t octep_dma_vf_intr_handler_cnxk_pf(void *dev)
+{
+ struct octep_device *oct = (struct octep_device *)dev;
+ struct pci_dev *pdev = oct->pdev;
+ u64 reg_val = 0;
+
+ /* Check for DMA VF INTR */
+ reg_val = octep_read_csr64(oct, CNXK_SDP_EPF_DMA_VF_RINT(0));
+ if (reg_val) {
+ dev_info(&pdev->dev,
+ "Received DMA_VF_RINT intr: 0x%llx\n", reg_val);
+ octep_write_csr64(oct, CNXK_SDP_EPF_DMA_VF_RINT(0), reg_val);
+ }
+ return IRQ_HANDLED;
+}
+
+/* Interrupt handler for pp transaction error interrupts for VFs */
+static irqreturn_t octep_pp_vf_intr_handler_cnxk_pf(void *dev)
+{
+ struct octep_device *oct = (struct octep_device *)dev;
+ struct pci_dev *pdev = oct->pdev;
+ u64 reg_val = 0;
+
+ /* Check for PPVF INTR */
+ reg_val = octep_read_csr64(oct, CNXK_SDP_EPF_PP_VF_RINT(0));
+ if (reg_val) {
+ dev_info(&pdev->dev,
+ "Received PP_VF_RINT intr: 0x%llx\n", reg_val);
+ octep_write_csr64(oct, CNXK_SDP_EPF_PP_VF_RINT(0), reg_val);
+ }
+ return IRQ_HANDLED;
+}
+
+/* Interrupt handler for mac related interrupts. */
+static irqreturn_t octep_misc_intr_handler_cnxk_pf(void *dev)
+{
+ struct octep_device *oct = (struct octep_device *)dev;
+ struct pci_dev *pdev = oct->pdev;
+ u64 reg_val = 0;
+
+ /* Check for MISC INTR */
+ reg_val = octep_read_csr64(oct, CNXK_SDP_EPF_MISC_RINT);
+ if (reg_val) {
+ dev_info(&pdev->dev,
+ "Received MISC_RINT intr: 0x%llx\n", reg_val);
+ octep_write_csr64(oct, CNXK_SDP_EPF_MISC_RINT, reg_val);
+ }
+ return IRQ_HANDLED;
+}
+
+/* Interrupts handler for all reserved interrupts. */
+static irqreturn_t octep_rsvd_intr_handler_cnxk_pf(void *dev)
+{
+ struct octep_device *oct = (struct octep_device *)dev;
+ struct pci_dev *pdev = oct->pdev;
+
+ dev_info(&pdev->dev, "Reserved interrupts raised; Ignore\n");
+ return IRQ_HANDLED;
+}
+
+/* Tx/Rx queue interrupt handler */
+static irqreturn_t octep_ioq_intr_handler_cnxk_pf(void *data)
+{
+ struct octep_ioq_vector *vector = (struct octep_ioq_vector *)data;
+ struct octep_oq *oq = vector->oq;
+
+ napi_schedule_irqoff(oq->napi);
+ return IRQ_HANDLED;
+}
+
+/* soft reset */
+static int octep_soft_reset_cnxk_pf(struct octep_device *oct)
+{
+ dev_info(&oct->pdev->dev, "CNXKXX: Doing soft reset\n");
+
+ octep_write_csr64(oct, CNXK_SDP_WIN_WR_MASK_REG, 0xFF);
+
+ /* Firmware status CSR is supposed to be cleared by
+ * core domain reset, but due to a hw bug, it is not.
+ * Set it to RUNNING right before reset so that it is not
+ * left in READY (1) state after a reset. This is required
+ * in addition to the early setting to handle the case where
+ * the OcteonTX is unexpectedly reset, reboots, and then
+ * the module is removed.
+ */
+ OCTEP_PCI_WIN_WRITE(oct, CNXK_PEMX_PFX_CSX_PFCFGX(0, 0, CNXK_PCIEEP_VSECST_CTL),
+ FW_STATUS_RUNNING);
+
+ /* Set chip domain reset bit */
+ OCTEP_PCI_WIN_WRITE(oct, CNXK_RST_CHIP_DOMAIN_W1S, 1);
+ /* Wait till Octeon resets. */
+ mdelay(10);
+ /* restore the reset value */
+ octep_write_csr64(oct, CNXK_SDP_WIN_WR_MASK_REG, 0xFF);
+
+ return 0;
+}
+
+/* Re-initialize Octeon hardware registers */
+static void octep_reinit_regs_cnxk_pf(struct octep_device *oct)
+{
+ u32 i;
+
+ for (i = 0; i < CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); i++)
+ oct->hw_ops.setup_iq_regs(oct, i);
+
+ for (i = 0; i < CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); i++)
+ oct->hw_ops.setup_oq_regs(oct, i);
+
+ oct->hw_ops.enable_interrupts(oct);
+ oct->hw_ops.enable_io_queues(oct);
+
+ for (i = 0; i < CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); i++)
+ writel(oct->oq[i]->max_count, oct->oq[i]->pkts_credit_reg);
+}
+
+/* Enable all interrupts */
+static void octep_enable_interrupts_cnxk_pf(struct octep_device *oct)
+{
+ u64 intr_mask = 0ULL;
+ int srn, num_rings, i;
+
+ srn = CFG_GET_PORTS_PF_SRN(oct->conf);
+ num_rings = CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf);
+
+ for (i = 0; i < num_rings; i++)
+ intr_mask |= (0x1ULL << (srn + i));
+
+ octep_write_csr64(oct, CNXK_SDP_EPF_IRERR_RINT_ENA_W1S, intr_mask);
+ octep_write_csr64(oct, CNXK_SDP_EPF_ORERR_RINT_ENA_W1S, intr_mask);
+ octep_write_csr64(oct, CNXK_SDP_EPF_OEI_RINT_ENA_W1S, -1ULL);
+
+ octep_write_csr64(oct, CNXK_SDP_EPF_VFIRE_RINT_ENA_W1S(0), -1ULL);
+ octep_write_csr64(oct, CNXK_SDP_EPF_VFORE_RINT_ENA_W1S(0), -1ULL);
+
+ octep_write_csr64(oct, CNXK_SDP_EPF_MISC_RINT_ENA_W1S, intr_mask);
+ octep_write_csr64(oct, CNXK_SDP_EPF_DMA_RINT_ENA_W1S, intr_mask);
+
+ octep_write_csr64(oct, CNXK_SDP_EPF_DMA_VF_RINT_ENA_W1S(0), -1ULL);
+ octep_write_csr64(oct, CNXK_SDP_EPF_PP_VF_RINT_ENA_W1S(0), -1ULL);
+}
+
+/* Disable all interrupts */
+static void octep_disable_interrupts_cnxk_pf(struct octep_device *oct)
+{
+ u64 intr_mask = 0ULL;
+ int srn, num_rings, i;
+
+ srn = CFG_GET_PORTS_PF_SRN(oct->conf);
+ num_rings = CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf);
+
+ for (i = 0; i < num_rings; i++)
+ intr_mask |= (0x1ULL << (srn + i));
+
+ octep_write_csr64(oct, CNXK_SDP_EPF_IRERR_RINT_ENA_W1C, intr_mask);
+ octep_write_csr64(oct, CNXK_SDP_EPF_ORERR_RINT_ENA_W1C, intr_mask);
+ octep_write_csr64(oct, CNXK_SDP_EPF_OEI_RINT_ENA_W1C, -1ULL);
+
+ octep_write_csr64(oct, CNXK_SDP_EPF_VFIRE_RINT_ENA_W1C(0), -1ULL);
+ octep_write_csr64(oct, CNXK_SDP_EPF_VFORE_RINT_ENA_W1C(0), -1ULL);
+
+ octep_write_csr64(oct, CNXK_SDP_EPF_MISC_RINT_ENA_W1C, intr_mask);
+ octep_write_csr64(oct, CNXK_SDP_EPF_DMA_RINT_ENA_W1C, intr_mask);
+
+ octep_write_csr64(oct, CNXK_SDP_EPF_DMA_VF_RINT_ENA_W1C(0), -1ULL);
+ octep_write_csr64(oct, CNXK_SDP_EPF_PP_VF_RINT_ENA_W1C(0), -1ULL);
+}
+
+/* Get new Octeon Read Index: index of descriptor that Octeon reads next. */
+static u32 octep_update_iq_read_index_cnxk_pf(struct octep_iq *iq)
+{
+ u32 pkt_in_done = readl(iq->inst_cnt_reg);
+ u32 last_done, new_idx;
+
+ last_done = pkt_in_done - iq->pkt_in_done;
+ iq->pkt_in_done = pkt_in_done;
+
+ new_idx = (iq->octep_read_index + last_done) % iq->max_count;
+
+ return new_idx;
+}
+
+/* Enable a hardware Tx Queue */
+static void octep_enable_iq_cnxk_pf(struct octep_device *oct, int iq_no)
+{
+ u64 loop = HZ;
+ u64 reg_val;
+
+ iq_no += CFG_GET_PORTS_PF_SRN(oct->conf);
+
+ octep_write_csr64(oct, CNXK_SDP_R_IN_INSTR_DBELL(iq_no), 0xFFFFFFFF);
+
+ while (octep_read_csr64(oct, CNXK_SDP_R_IN_INSTR_DBELL(iq_no)) &&
+ loop--) {
+ schedule_timeout_interruptible(1);
+ }
+
+ reg_val = octep_read_csr64(oct, CNXK_SDP_R_IN_INT_LEVELS(iq_no));
+ reg_val |= (0x1ULL << 62);
+ octep_write_csr64(oct, CNXK_SDP_R_IN_INT_LEVELS(iq_no), reg_val);
+
+ reg_val = octep_read_csr64(oct, CNXK_SDP_R_IN_ENABLE(iq_no));
+ reg_val |= 0x1ULL;
+ octep_write_csr64(oct, CNXK_SDP_R_IN_ENABLE(iq_no), reg_val);
+}
+
+/* Enable a hardware Rx Queue */
+static void octep_enable_oq_cnxk_pf(struct octep_device *oct, int oq_no)
+{
+ u64 reg_val = 0ULL;
+
+ oq_no += CFG_GET_PORTS_PF_SRN(oct->conf);
+
+ reg_val = octep_read_csr64(oct, CNXK_SDP_R_OUT_INT_LEVELS(oq_no));
+ reg_val |= (0x1ULL << 62);
+ octep_write_csr64(oct, CNXK_SDP_R_OUT_INT_LEVELS(oq_no), reg_val);
+
+ octep_write_csr64(oct, CNXK_SDP_R_OUT_SLIST_DBELL(oq_no), 0xFFFFFFFF);
+
+ reg_val = octep_read_csr64(oct, CNXK_SDP_R_OUT_ENABLE(oq_no));
+ reg_val |= 0x1ULL;
+ octep_write_csr64(oct, CNXK_SDP_R_OUT_ENABLE(oq_no), reg_val);
+}
+
+/* Enable all hardware Tx/Rx Queues assined to PF */
+static void octep_enable_io_queues_cnxk_pf(struct octep_device *oct)
+{
+ u8 q;
+
+ for (q = 0; q < CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); q++) {
+ octep_enable_iq_cnxk_pf(oct, q);
+ octep_enable_oq_cnxk_pf(oct, q);
+ }
+}
+
+/* Disable a hardware Tx Queue assined to PF */
+static void octep_disable_iq_cnxk_pf(struct octep_device *oct, int iq_no)
+{
+ u64 reg_val = 0ULL;
+
+ iq_no += CFG_GET_PORTS_PF_SRN(oct->conf);
+
+ reg_val = octep_read_csr64(oct, CNXK_SDP_R_IN_ENABLE(iq_no));
+ reg_val &= ~0x1ULL;
+ octep_write_csr64(oct, CNXK_SDP_R_IN_ENABLE(iq_no), reg_val);
+}
+
+/* Disable a hardware Rx Queue assined to PF */
+static void octep_disable_oq_cnxk_pf(struct octep_device *oct, int oq_no)
+{
+ u64 reg_val = 0ULL;
+
+ oq_no += CFG_GET_PORTS_PF_SRN(oct->conf);
+ reg_val = octep_read_csr64(oct, CNXK_SDP_R_OUT_ENABLE(oq_no));
+ reg_val &= ~0x1ULL;
+ octep_write_csr64(oct, CNXK_SDP_R_OUT_ENABLE(oq_no), reg_val);
+}
+
+/* Disable all hardware Tx/Rx Queues assined to PF */
+static void octep_disable_io_queues_cnxk_pf(struct octep_device *oct)
+{
+ int q = 0;
+
+ for (q = 0; q < CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); q++) {
+ octep_disable_iq_cnxk_pf(oct, q);
+ octep_disable_oq_cnxk_pf(oct, q);
+ }
+}
+
+/* Dump hardware registers (including Tx/Rx queues) for debugging. */
+static void octep_dump_registers_cnxk_pf(struct octep_device *oct)
+{
+ u8 srn, num_rings, q;
+
+ srn = CFG_GET_PORTS_PF_SRN(oct->conf);
+ num_rings = CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf);
+
+ for (q = srn; q < srn + num_rings; q++)
+ cnxk_dump_regs(oct, q);
+}
+
+/**
+ * octep_device_setup_cnxk_pf() - Setup Octeon device.
+ *
+ * @oct: Octeon device private data structure.
+ *
+ * - initialize hardware operations.
+ * - get target side pcie port number for the device.
+ * - setup window access to hardware registers.
+ * - set initial configuration and max limits.
+ * - setup hardware mapping of rings to the PF device.
+ */
+void octep_device_setup_cnxk_pf(struct octep_device *oct)
+{
+ oct->hw_ops.setup_iq_regs = octep_setup_iq_regs_cnxk_pf;
+ oct->hw_ops.setup_oq_regs = octep_setup_oq_regs_cnxk_pf;
+ oct->hw_ops.setup_mbox_regs = octep_setup_mbox_regs_cnxk_pf;
+
+ oct->hw_ops.oei_intr_handler = octep_oei_intr_handler_cnxk_pf;
+ oct->hw_ops.ire_intr_handler = octep_ire_intr_handler_cnxk_pf;
+ oct->hw_ops.ore_intr_handler = octep_ore_intr_handler_cnxk_pf;
+ oct->hw_ops.vfire_intr_handler = octep_vfire_intr_handler_cnxk_pf;
+ oct->hw_ops.vfore_intr_handler = octep_vfore_intr_handler_cnxk_pf;
+ oct->hw_ops.dma_intr_handler = octep_dma_intr_handler_cnxk_pf;
+ oct->hw_ops.dma_vf_intr_handler = octep_dma_vf_intr_handler_cnxk_pf;
+ oct->hw_ops.pp_vf_intr_handler = octep_pp_vf_intr_handler_cnxk_pf;
+ oct->hw_ops.misc_intr_handler = octep_misc_intr_handler_cnxk_pf;
+ oct->hw_ops.rsvd_intr_handler = octep_rsvd_intr_handler_cnxk_pf;
+ oct->hw_ops.ioq_intr_handler = octep_ioq_intr_handler_cnxk_pf;
+ oct->hw_ops.soft_reset = octep_soft_reset_cnxk_pf;
+ oct->hw_ops.reinit_regs = octep_reinit_regs_cnxk_pf;
+
+ oct->hw_ops.enable_interrupts = octep_enable_interrupts_cnxk_pf;
+ oct->hw_ops.disable_interrupts = octep_disable_interrupts_cnxk_pf;
+ oct->hw_ops.poll_non_ioq_interrupts = octep_poll_non_ioq_interrupts_cnxk_pf;
+
+ oct->hw_ops.update_iq_read_idx = octep_update_iq_read_index_cnxk_pf;
+
+ oct->hw_ops.enable_iq = octep_enable_iq_cnxk_pf;
+ oct->hw_ops.enable_oq = octep_enable_oq_cnxk_pf;
+ oct->hw_ops.enable_io_queues = octep_enable_io_queues_cnxk_pf;
+
+ oct->hw_ops.disable_iq = octep_disable_iq_cnxk_pf;
+ oct->hw_ops.disable_oq = octep_disable_oq_cnxk_pf;
+ oct->hw_ops.disable_io_queues = octep_disable_io_queues_cnxk_pf;
+ oct->hw_ops.reset_io_queues = octep_reset_io_queues_cnxk_pf;
+
+ oct->hw_ops.dump_registers = octep_dump_registers_cnxk_pf;
+
+ octep_setup_pci_window_regs_cnxk_pf(oct);
+
+ oct->pcie_port = octep_read_csr64(oct, CNXK_SDP_MAC_NUMBER) & 0xff;
+ dev_info(&oct->pdev->dev,
+ "Octeon device using PCIE Port %d\n", oct->pcie_port);
+
+ octep_init_config_cnxk_pf(oct);
+ octep_configure_ring_mapping_cnxk_pf(oct);
+
+ /* Firmware status CSR is supposed to be cleared by
+ * core domain reset, but due to IPBUPEM-38842, it is not.
+ * Set it to RUNNING early in boot, so that unexpected resets
+ * leave it in a state that is not READY (1).
+ */
+ OCTEP_PCI_WIN_WRITE(oct, CNXK_PEMX_PFX_CSX_PFCFGX(0, 0, CNXK_PCIEEP_VSECST_CTL),
+ FW_STATUS_RUNNING);
+}
diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_config.h b/drivers/net/ethernet/marvell/octeon_ep/octep_config.h
index 1622a6eb..91cfa19 100644
--- a/drivers/net/ethernet/marvell/octeon_ep/octep_config.h
+++ b/drivers/net/ethernet/marvell/octeon_ep/octep_config.h
@@ -13,9 +13,10 @@
#define OCTEP_64BYTE_INSTR 64
/* Tx Queue: maximum descriptors per ring */
+/* This needs to be a power of 2 */
#define OCTEP_IQ_MAX_DESCRIPTORS 1024
/* Minimum input (Tx) requests to be enqueued to ring doorbell */
-#define OCTEP_DB_MIN 1
+#define OCTEP_DB_MIN 8
/* Packet threshold for Tx queue interrupt */
#define OCTEP_IQ_INTR_THRESHOLD 0x0
diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c
index 552970c..2da00a7 100644
--- a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c
+++ b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c
@@ -24,6 +24,10 @@ struct workqueue_struct *octep_wq;
static const struct pci_device_id octep_pci_id_tbl[] = {
{PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, OCTEP_PCI_DEVICE_ID_CN93_PF)},
{PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, OCTEP_PCI_DEVICE_ID_CNF95N_PF)},
+ {PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, OCTEP_PCI_DEVICE_ID_CN10KA_PF)},
+ {PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, OCTEP_PCI_DEVICE_ID_CNF10KA_PF)},
+ {PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, OCTEP_PCI_DEVICE_ID_CNF10KB_PF)},
+ {PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, OCTEP_PCI_DEVICE_ID_CN10KB_PF)},
{0, },
};
MODULE_DEVICE_TABLE(pci, octep_pci_id_tbl);
@@ -777,17 +781,24 @@ static int octep_stop(struct net_device *netdev)
*/
static inline int octep_iq_full_check(struct octep_iq *iq)
{
- if (likely((iq->max_count - atomic_read(&iq->instr_pending)) >=
+ if (likely((IQ_INSTR_SPACE(iq)) >
OCTEP_WAKE_QUEUE_THRESHOLD))
return 0;
/* Stop the queue if unable to send */
netif_stop_subqueue(iq->netdev, iq->q_no);
+ /* Allow for pending updates in write index
+ * from iq_process_completion in other cpus
+ * to reflect, in case queue gets free
+ * entries.
+ */
+ smp_mb();
+
/* check again and restart the queue, in case NAPI has just freed
* enough Tx ring entries.
*/
- if (unlikely((iq->max_count - atomic_read(&iq->instr_pending)) >=
+ if (unlikely(IQ_INSTR_SPACE(iq) >
OCTEP_WAKE_QUEUE_THRESHOLD)) {
netif_start_subqueue(iq->netdev, iq->q_no);
iq->stats.restart_cnt++;
@@ -818,8 +829,12 @@ static netdev_tx_t octep_start_xmit(struct sk_buff *skb,
struct octep_iq *iq;
skb_frag_t *frag;
u16 nr_frags, si;
+ int xmit_more;
u16 q_no, wi;
+ if (skb_put_padto(skb, ETH_ZLEN))
+ return NETDEV_TX_OK;
+
q_no = skb_get_queue_mapping(skb);
if (q_no >= oct->num_iqs) {
netdev_err(netdev, "Invalid Tx skb->queue_mapping=%d\n", q_no);
@@ -827,10 +842,6 @@ static netdev_tx_t octep_start_xmit(struct sk_buff *skb,
}
iq = oct->iq[q_no];
- if (octep_iq_full_check(iq)) {
- iq->stats.tx_busy++;
- return NETDEV_TX_BUSY;
- }
shinfo = skb_shinfo(skb);
nr_frags = shinfo->nr_frags;
@@ -869,9 +880,6 @@ static netdev_tx_t octep_start_xmit(struct sk_buff *skb,
if (dma_mapping_error(iq->dev, dma))
goto dma_map_err;
- dma_sync_single_for_cpu(iq->dev, tx_buffer->sglist_dma,
- OCTEP_SGLIST_SIZE_PER_PKT,
- DMA_TO_DEVICE);
memset(sglist, 0, OCTEP_SGLIST_SIZE_PER_PKT);
sglist[0].len[3] = len;
sglist[0].dma_ptr[0] = dma;
@@ -891,26 +899,33 @@ static netdev_tx_t octep_start_xmit(struct sk_buff *skb,
frag++;
si++;
}
- dma_sync_single_for_device(iq->dev, tx_buffer->sglist_dma,
- OCTEP_SGLIST_SIZE_PER_PKT,
- DMA_TO_DEVICE);
-
hw_desc->dptr = tx_buffer->sglist_dma;
}
- netdev_tx_sent_queue(iq->netdev_q, skb->len);
+ xmit_more = netdev_xmit_more();
+
+ __netdev_tx_sent_queue(iq->netdev_q, skb->len, xmit_more);
+
skb_tx_timestamp(skb);
- atomic_inc(&iq->instr_pending);
+ iq->fill_cnt++;
wi++;
- if (wi == iq->max_count)
- wi = 0;
- iq->host_write_index = wi;
+ iq->host_write_index = wi & iq->ring_size_mask;
+
+ /* octep_iq_full_check stops the queue and returns
+ * true if so, in case the queue has become full
+ * by inserting current packet. If so, we can
+ * go ahead and ring doorbell.
+ */
+ if (!octep_iq_full_check(iq) && xmit_more &&
+ iq->fill_cnt < iq->fill_threshold)
+ return NETDEV_TX_OK;
+
/* Flush the hw descriptor before writing to doorbell */
wmb();
-
- /* Ring Doorbell to notify the NIC there is a new packet */
- writel(1, iq->doorbell_reg);
- iq->stats.instr_posted++;
+ /* Ring Doorbell to notify the NIC of new packets */
+ writel(iq->fill_cnt, iq->doorbell_reg);
+ iq->stats.instr_posted += iq->fill_cnt;
+ iq->fill_cnt = 0;
return NETDEV_TX_OK;
dma_map_sg_err:
@@ -1136,6 +1151,14 @@ static const char *octep_devid_to_str(struct octep_device *oct)
return "CN93XX";
case OCTEP_PCI_DEVICE_ID_CNF95N_PF:
return "CNF95N";
+ case OCTEP_PCI_DEVICE_ID_CN10KA_PF:
+ return "CN10KA";
+ case OCTEP_PCI_DEVICE_ID_CNF10KA_PF:
+ return "CNF10KA";
+ case OCTEP_PCI_DEVICE_ID_CNF10KB_PF:
+ return "CNF10KB";
+ case OCTEP_PCI_DEVICE_ID_CN10KB_PF:
+ return "CN10KB";
default:
return "Unsupported";
}
@@ -1181,6 +1204,14 @@ int octep_device_setup(struct octep_device *oct)
OCTEP_MINOR_REV(oct));
octep_device_setup_cn93_pf(oct);
break;
+ case OCTEP_PCI_DEVICE_ID_CNF10KA_PF:
+ case OCTEP_PCI_DEVICE_ID_CN10KA_PF:
+ case OCTEP_PCI_DEVICE_ID_CNF10KB_PF:
+ case OCTEP_PCI_DEVICE_ID_CN10KB_PF:
+ dev_info(&pdev->dev, "Setting up OCTEON %s PF PASS%d.%d\n",
+ octep_devid_to_str(oct), OCTEP_MAJOR_REV(oct), OCTEP_MINOR_REV(oct));
+ octep_device_setup_cnxk_pf(oct);
+ break;
default:
dev_err(&pdev->dev,
"%s: unsupported device\n", __func__);
diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_main.h b/drivers/net/ethernet/marvell/octeon_ep/octep_main.h
index 6df902e..e2fe8b2 100644
--- a/drivers/net/ethernet/marvell/octeon_ep/octep_main.h
+++ b/drivers/net/ethernet/marvell/octeon_ep/octep_main.h
@@ -23,6 +23,11 @@
#define OCTEP_PCI_DEVICE_ID_CNF95N_PF 0xB400 //95N PF
+#define OCTEP_PCI_DEVICE_ID_CN10KA_PF 0xB900 //CN10KA PF
+#define OCTEP_PCI_DEVICE_ID_CNF10KA_PF 0xBA00 //CNF10KA PF
+#define OCTEP_PCI_DEVICE_ID_CNF10KB_PF 0xBC00 //CNF10KB PF
+#define OCTEP_PCI_DEVICE_ID_CN10KB_PF 0xBD00 //CN10KB PF
+
#define OCTEP_MAX_QUEUES 63
#define OCTEP_MAX_IQ OCTEP_MAX_QUEUES
#define OCTEP_MAX_OQ OCTEP_MAX_QUEUES
@@ -40,6 +45,15 @@
#define OCTEP_OQ_INTR_RESEND_BIT 59
#define OCTEP_MMIO_REGIONS 3
+
+#define IQ_INSTR_PENDING(iq) ({ typeof(iq) iq__ = (iq); \
+ ((iq__)->host_write_index - (iq__)->flush_index) & \
+ (iq__)->ring_size_mask; \
+ })
+#define IQ_INSTR_SPACE(iq) ({ typeof(iq) iq_ = (iq); \
+ (iq_)->max_count - IQ_INSTR_PENDING(iq_); \
+ })
+
/* PCI address space mapping information.
* Each of the 3 address spaces given by BAR0, BAR2 and BAR4 of
* Octeon gets mapped to different physical address spaces in
@@ -377,6 +391,7 @@ int octep_setup_oqs(struct octep_device *oct);
void octep_free_oqs(struct octep_device *oct);
void octep_oq_dbell_init(struct octep_device *oct);
void octep_device_setup_cn93_pf(struct octep_device *oct);
+void octep_device_setup_cnxk_pf(struct octep_device *oct);
int octep_iq_process_completions(struct octep_iq *iq, u16 budget);
int octep_oq_process_rx(struct octep_oq *oq, int budget);
void octep_set_ethtool_ops(struct net_device *netdev);
diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_regs_cnxk_pf.h b/drivers/net/ethernet/marvell/octeon_ep/octep_regs_cnxk_pf.h
new file mode 100644
index 0000000..abe02df
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeon_ep/octep_regs_cnxk_pf.h
@@ -0,0 +1,400 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Marvell Octeon EP (EndPoint) Ethernet Driver
+ *
+ * Copyright (C) 2020 Marvell.
+ *
+ */
+
+#ifndef _OCTEP_REGS_CNXK_PF_H_
+#define _OCTEP_REGS_CNXK_PF_H_
+
+/* ############################ RST ######################### */
+#define CNXK_RST_BOOT 0x000087E006001600ULL
+#define CNXK_RST_CHIP_DOMAIN_W1S 0x000087E006001810ULL
+#define CNXK_RST_CORE_DOMAIN_W1S 0x000087E006001820ULL
+#define CNXK_RST_CORE_DOMAIN_W1C 0x000087E006001828ULL
+
+#define CNXK_CONFIG_XPANSION_BAR 0x38
+#define CNXK_CONFIG_PCIE_CAP 0x70
+#define CNXK_CONFIG_PCIE_DEVCAP 0x74
+#define CNXK_CONFIG_PCIE_DEVCTL 0x78
+#define CNXK_CONFIG_PCIE_LINKCAP 0x7C
+#define CNXK_CONFIG_PCIE_LINKCTL 0x80
+#define CNXK_CONFIG_PCIE_SLOTCAP 0x84
+#define CNXK_CONFIG_PCIE_SLOTCTL 0x88
+
+#define CNXK_PCIE_SRIOV_FDL 0x188 /* 0x98 */
+#define CNXK_PCIE_SRIOV_FDL_BIT_POS 0x10
+#define CNXK_PCIE_SRIOV_FDL_MASK 0xFF
+
+#define CNXK_CONFIG_PCIE_FLTMSK 0x720
+
+/* ################# Offsets of RING, EPF, MAC ######################### */
+#define CNXK_RING_OFFSET (0x1ULL << 17)
+#define CNXK_EPF_OFFSET (0x1ULL << 25)
+#define CNXK_MAC_OFFSET (0x1ULL << 4)
+#define CNXK_BIT_ARRAY_OFFSET (0x1ULL << 4)
+#define CNXK_EPVF_RING_OFFSET (0x1ULL << 4)
+
+/* ################# Scratch Registers ######################### */
+#define CNXK_SDP_EPF_SCRATCH 0x209E0
+
+/* ################# Window Registers ######################### */
+#define CNXK_SDP_WIN_WR_ADDR64 0x20000
+#define CNXK_SDP_WIN_RD_ADDR64 0x20010
+#define CNXK_SDP_WIN_WR_DATA64 0x20020
+#define CNXK_SDP_WIN_WR_MASK_REG 0x20030
+#define CNXK_SDP_WIN_RD_DATA64 0x20040
+
+#define CNXK_SDP_MAC_NUMBER 0x2C100
+
+/* ################# Global Previliged registers ######################### */
+#define CNXK_SDP_EPF_RINFO 0x209F0
+
+#define CNXK_SDP_EPF_RINFO_SRN(val) ((val) & 0x7F)
+#define CNXK_SDP_EPF_RINFO_RPVF(val) (((val) >> 32) & 0xF)
+#define CNXK_SDP_EPF_RINFO_NVFS(val) (((val) >> 48) & 0x7F)
+
+/* SDP Function select */
+#define CNXK_SDP_FUNC_SEL_EPF_BIT_POS 7
+#define CNXK_SDP_FUNC_SEL_FUNC_BIT_POS 0
+
+/* ##### RING IN (Into device from PCI: Tx Ring) REGISTERS #### */
+#define CNXK_SDP_R_IN_CONTROL_START 0x10000
+#define CNXK_SDP_R_IN_ENABLE_START 0x10010
+#define CNXK_SDP_R_IN_INSTR_BADDR_START 0x10020
+#define CNXK_SDP_R_IN_INSTR_RSIZE_START 0x10030
+#define CNXK_SDP_R_IN_INSTR_DBELL_START 0x10040
+#define CNXK_SDP_R_IN_CNTS_START 0x10050
+#define CNXK_SDP_R_IN_INT_LEVELS_START 0x10060
+#define CNXK_SDP_R_IN_PKT_CNT_START 0x10080
+#define CNXK_SDP_R_IN_BYTE_CNT_START 0x10090
+
+#define CNXK_SDP_R_IN_CONTROL(ring) \
+ (CNXK_SDP_R_IN_CONTROL_START + ((ring) * CNXK_RING_OFFSET))
+
+#define CNXK_SDP_R_IN_ENABLE(ring) \
+ (CNXK_SDP_R_IN_ENABLE_START + ((ring) * CNXK_RING_OFFSET))
+
+#define CNXK_SDP_R_IN_INSTR_BADDR(ring) \
+ (CNXK_SDP_R_IN_INSTR_BADDR_START + ((ring) * CNXK_RING_OFFSET))
+
+#define CNXK_SDP_R_IN_INSTR_RSIZE(ring) \
+ (CNXK_SDP_R_IN_INSTR_RSIZE_START + ((ring) * CNXK_RING_OFFSET))
+
+#define CNXK_SDP_R_IN_INSTR_DBELL(ring) \
+ (CNXK_SDP_R_IN_INSTR_DBELL_START + ((ring) * CNXK_RING_OFFSET))
+
+#define CNXK_SDP_R_IN_CNTS(ring) \
+ (CNXK_SDP_R_IN_CNTS_START + ((ring) * CNXK_RING_OFFSET))
+
+#define CNXK_SDP_R_IN_INT_LEVELS(ring) \
+ (CNXK_SDP_R_IN_INT_LEVELS_START + ((ring) * CNXK_RING_OFFSET))
+
+#define CNXK_SDP_R_IN_PKT_CNT(ring) \
+ (CNXK_SDP_R_IN_PKT_CNT_START + ((ring) * CNXK_RING_OFFSET))
+
+#define CNXK_SDP_R_IN_BYTE_CNT(ring) \
+ (CNXK_SDP_R_IN_BYTE_CNT_START + ((ring) * CNXK_RING_OFFSET))
+
+/* Rings per Virtual Function */
+#define CNXK_R_IN_CTL_RPVF_MASK (0xF)
+#define CNXK_R_IN_CTL_RPVF_POS (48)
+
+/* Number of instructions to be read in one MAC read request.
+ * setting to Max value(4)
+ */
+#define CNXK_R_IN_CTL_IDLE (0x1ULL << 28)
+#define CNXK_R_IN_CTL_RDSIZE (0x3ULL << 25)
+#define CNXK_R_IN_CTL_IS_64B (0x1ULL << 24)
+#define CNXK_R_IN_CTL_D_NSR (0x1ULL << 8)
+#define CNXK_R_IN_CTL_D_ESR (0x1ULL << 6)
+#define CNXK_R_IN_CTL_D_ROR (0x1ULL << 5)
+#define CNXK_R_IN_CTL_NSR (0x1ULL << 3)
+#define CNXK_R_IN_CTL_ESR (0x1ULL << 1)
+#define CNXK_R_IN_CTL_ROR (0x1ULL << 0)
+
+#define CNXK_R_IN_CTL_MASK (CNXK_R_IN_CTL_RDSIZE | CNXK_R_IN_CTL_IS_64B)
+
+/* ##### RING OUT (out from device to PCI host: Rx Ring) REGISTERS #### */
+#define CNXK_SDP_R_OUT_CNTS_START 0x10100
+#define CNXK_SDP_R_OUT_INT_LEVELS_START 0x10110
+#define CNXK_SDP_R_OUT_SLIST_BADDR_START 0x10120
+#define CNXK_SDP_R_OUT_SLIST_RSIZE_START 0x10130
+#define CNXK_SDP_R_OUT_SLIST_DBELL_START 0x10140
+#define CNXK_SDP_R_OUT_CONTROL_START 0x10150
+#define CNXK_SDP_R_OUT_WMARK_START 0x10160
+#define CNXK_SDP_R_OUT_ENABLE_START 0x10170
+#define CNXK_SDP_R_OUT_PKT_CNT_START 0x10180
+#define CNXK_SDP_R_OUT_BYTE_CNT_START 0x10190
+
+#define CNXK_SDP_R_OUT_CONTROL(ring) \
+ (CNXK_SDP_R_OUT_CONTROL_START + ((ring) * CNXK_RING_OFFSET))
+
+#define CNXK_SDP_R_OUT_ENABLE(ring) \
+ (CNXK_SDP_R_OUT_ENABLE_START + ((ring) * CNXK_RING_OFFSET))
+
+#define CNXK_SDP_R_OUT_SLIST_BADDR(ring) \
+ (CNXK_SDP_R_OUT_SLIST_BADDR_START + ((ring) * CNXK_RING_OFFSET))
+
+#define CNXK_SDP_R_OUT_SLIST_RSIZE(ring) \
+ (CNXK_SDP_R_OUT_SLIST_RSIZE_START + ((ring) * CNXK_RING_OFFSET))
+
+#define CNXK_SDP_R_OUT_SLIST_DBELL(ring) \
+ (CNXK_SDP_R_OUT_SLIST_DBELL_START + ((ring) * CNXK_RING_OFFSET))
+
+#define CNXK_SDP_R_OUT_CNTS(ring) \
+ (CNXK_SDP_R_OUT_CNTS_START + ((ring) * CNXK_RING_OFFSET))
+
+#define CNXK_SDP_R_OUT_INT_LEVELS(ring) \
+ (CNXK_SDP_R_OUT_INT_LEVELS_START + ((ring) * CNXK_RING_OFFSET))
+
+#define CNXK_SDP_R_OUT_PKT_CNT(ring) \
+ (CNXK_SDP_R_OUT_PKT_CNT_START + ((ring) * CNXK_RING_OFFSET))
+
+#define CNXK_SDP_R_OUT_BYTE_CNT(ring) \
+ (CNXK_SDP_R_OUT_BYTE_CNT_START + ((ring) * CNXK_RING_OFFSET))
+
+/*------------------ R_OUT Masks ----------------*/
+#define CNXK_R_OUT_INT_LEVELS_BMODE BIT_ULL(63)
+#define CNXK_R_OUT_INT_LEVELS_TIMET (32)
+
+#define CNXK_R_OUT_CTL_IDLE BIT_ULL(40)
+#define CNXK_R_OUT_CTL_ES_I BIT_ULL(34)
+#define CNXK_R_OUT_CTL_NSR_I BIT_ULL(33)
+#define CNXK_R_OUT_CTL_ROR_I BIT_ULL(32)
+#define CNXK_R_OUT_CTL_ES_D BIT_ULL(30)
+#define CNXK_R_OUT_CTL_NSR_D BIT_ULL(29)
+#define CNXK_R_OUT_CTL_ROR_D BIT_ULL(28)
+#define CNXK_R_OUT_CTL_ES_P BIT_ULL(26)
+#define CNXK_R_OUT_CTL_NSR_P BIT_ULL(25)
+#define CNXK_R_OUT_CTL_ROR_P BIT_ULL(24)
+#define CNXK_R_OUT_CTL_IMODE BIT_ULL(23)
+
+/* ############### Interrupt Moderation Registers ############### */
+#define CNXK_SDP_R_IN_INT_MDRT_CTL0_START 0x10280
+#define CNXK_SDP_R_IN_INT_MDRT_CTL1_START 0x102A0
+#define CNXK_SDP_R_IN_INT_MDRT_DBG_START 0x102C0
+
+#define CNXK_SDP_R_OUT_INT_MDRT_CTL0_START 0x10380
+#define CNXK_SDP_R_OUT_INT_MDRT_CTL1_START 0x103A0
+#define CNXK_SDP_R_OUT_INT_MDRT_DBG_START 0x103C0
+
+#define CNXK_SDP_R_OUT_CNTS_ISM_START 0x10510
+#define CNXK_SDP_R_IN_CNTS_ISM_START 0x10520
+
+#define CNXK_SDP_R_IN_INT_MDRT_CTL0(ring) \
+ (CNXK_SDP_R_IN_INT_MDRT_CTL0_START + ((ring) * CNXK_RING_OFFSET))
+
+#define CNXK_SDP_R_IN_INT_MDRT_CTL1(ring) \
+ (CNXK_SDP_R_IN_INT_MDRT_CTL1_START + ((ring) * CNXK_RING_OFFSET))
+
+#define CNXK_SDP_R_IN_INT_MDRT_DBG(ring) \
+ (CNXK_SDP_R_IN_INT_MDRT_DBG_START + ((ring) * CNXK_RING_OFFSET))
+
+#define CNXK_SDP_R_OUT_INT_MDRT_CTL0(ring) \
+ (CNXK_SDP_R_OUT_INT_MDRT_CTL0_START + ((ring) * CNXK_RING_OFFSET))
+
+#define CNXK_SDP_R_OUT_INT_MDRT_CTL1(ring) \
+ (CNXK_SDP_R_OUT_INT_MDRT_CTL1_START + ((ring) * CNXK_RING_OFFSET))
+
+#define CNXK_SDP_R_OUT_INT_MDRT_DBG(ring) \
+ (CNXK_SDP_R_OUT_INT_MDRT_DBG_START + ((ring) * CNXK_RING_OFFSET))
+
+#define CNXK_SDP_R_OUT_CNTS_ISM(ring) \
+ (CNXK_SDP_R_OUT_CNTS_ISM_START + ((ring) * CNXK_RING_OFFSET))
+
+#define CNXK_SDP_R_IN_CNTS_ISM(ring) \
+ (CNXK_SDP_R_IN_CNTS_ISM_START + ((ring) * CNXK_RING_OFFSET))
+
+/* ##################### Mail Box Registers ########################## */
+/* INT register for VF. when a MBOX write from PF happed to a VF,
+ * corresponding bit will be set in this register as well as in
+ * PF_VF_INT register.
+ *
+ * This is a RO register, the int can be cleared by writing 1 to PF_VF_INT
+ */
+/* Basically first 3 are from PF to VF. The last one is data from VF to PF */
+#define CNXK_SDP_R_MBOX_PF_VF_DATA_START 0x10210
+#define CNXK_SDP_R_MBOX_PF_VF_INT_START 0x10220
+#define CNXK_SDP_R_MBOX_VF_PF_DATA_START 0x10230
+
+#define CNXK_SDP_R_MBOX_PF_VF_DATA(ring) \
+ (CNXK_SDP_R_MBOX_PF_VF_DATA_START + ((ring) * CNXK_RING_OFFSET))
+
+#define CNXK_SDP_R_MBOX_PF_VF_INT(ring) \
+ (CNXK_SDP_R_MBOX_PF_VF_INT_START + ((ring) * CNXK_RING_OFFSET))
+
+#define CNXK_SDP_R_MBOX_VF_PF_DATA(ring) \
+ (CNXK_SDP_R_MBOX_VF_PF_DATA_START + ((ring) * CNXK_RING_OFFSET))
+
+/* ##################### Interrupt Registers ########################## */
+#define CNXK_SDP_R_ERR_TYPE_START 0x10400
+
+#define CNXK_SDP_R_ERR_TYPE(ring) \
+ (CNXK_SDP_R_ERR_TYPE_START + ((ring) * CNXK_RING_OFFSET))
+
+#define CNXK_SDP_R_MBOX_ISM_START 0x10500
+#define CNXK_SDP_R_OUT_CNTS_ISM_START 0x10510
+#define CNXK_SDP_R_IN_CNTS_ISM_START 0x10520
+
+#define CNXK_SDP_R_MBOX_ISM(ring) \
+ (CNXK_SDP_R_MBOX_ISM_START + ((ring) * CNXK_RING_OFFSET))
+
+#define CNXK_SDP_R_OUT_CNTS_ISM(ring) \
+ (CNXK_SDP_R_OUT_CNTS_ISM_START + ((ring) * CNXK_RING_OFFSET))
+
+#define CNXK_SDP_R_IN_CNTS_ISM(ring) \
+ (CNXK_SDP_R_IN_CNTS_ISM_START + ((ring) * CNXK_RING_OFFSET))
+
+#define CNXK_SDP_EPF_MBOX_RINT_START 0x20100
+#define CNXK_SDP_EPF_MBOX_RINT_W1S_START 0x20120
+#define CNXK_SDP_EPF_MBOX_RINT_ENA_W1C_START 0x20140
+#define CNXK_SDP_EPF_MBOX_RINT_ENA_W1S_START 0x20160
+
+#define CNXK_SDP_EPF_VFIRE_RINT_START 0x20180
+#define CNXK_SDP_EPF_VFIRE_RINT_W1S_START 0x201A0
+#define CNXK_SDP_EPF_VFIRE_RINT_ENA_W1C_START 0x201C0
+#define CNXK_SDP_EPF_VFIRE_RINT_ENA_W1S_START 0x201E0
+
+#define CNXK_SDP_EPF_IRERR_RINT 0x20200
+#define CNXK_SDP_EPF_IRERR_RINT_W1S 0x20210
+#define CNXK_SDP_EPF_IRERR_RINT_ENA_W1C 0x20220
+#define CNXK_SDP_EPF_IRERR_RINT_ENA_W1S 0x20230
+
+#define CNXK_SDP_EPF_VFORE_RINT_START 0x20240
+#define CNXK_SDP_EPF_VFORE_RINT_W1S_START 0x20260
+#define CNXK_SDP_EPF_VFORE_RINT_ENA_W1C_START 0x20280
+#define CNXK_SDP_EPF_VFORE_RINT_ENA_W1S_START 0x202A0
+
+#define CNXK_SDP_EPF_ORERR_RINT 0x20320
+#define CNXK_SDP_EPF_ORERR_RINT_W1S 0x20330
+#define CNXK_SDP_EPF_ORERR_RINT_ENA_W1C 0x20340
+#define CNXK_SDP_EPF_ORERR_RINT_ENA_W1S 0x20350
+
+#define CNXK_SDP_EPF_OEI_RINT 0x20400
+#define CNXK_SDP_EPF_OEI_RINT_W1S 0x20500
+#define CNXK_SDP_EPF_OEI_RINT_ENA_W1C 0x20600
+#define CNXK_SDP_EPF_OEI_RINT_ENA_W1S 0x20700
+
+#define CNXK_SDP_EPF_DMA_RINT 0x20800
+#define CNXK_SDP_EPF_DMA_RINT_W1S 0x20810
+#define CNXK_SDP_EPF_DMA_RINT_ENA_W1C 0x20820
+#define CNXK_SDP_EPF_DMA_RINT_ENA_W1S 0x20830
+
+#define CNXK_SDP_EPF_DMA_INT_LEVEL_START 0x20840
+#define CNXK_SDP_EPF_DMA_CNT_START 0x20860
+#define CNXK_SDP_EPF_DMA_TIM_START 0x20880
+
+#define CNXK_SDP_EPF_MISC_RINT 0x208A0
+#define CNXK_SDP_EPF_MISC_RINT_W1S 0x208B0
+#define CNXK_SDP_EPF_MISC_RINT_ENA_W1C 0x208C0
+#define CNXK_SDP_EPF_MISC_RINT_ENA_W1S 0x208D0
+
+#define CNXK_SDP_EPF_DMA_VF_RINT_START 0x208E0
+#define CNXK_SDP_EPF_DMA_VF_RINT_W1S_START 0x20900
+#define CNXK_SDP_EPF_DMA_VF_RINT_ENA_W1C_START 0x20920
+#define CNXK_SDP_EPF_DMA_VF_RINT_ENA_W1S_START 0x20940
+
+#define CNXK_SDP_EPF_PP_VF_RINT_START 0x20960
+#define CNXK_SDP_EPF_PP_VF_RINT_W1S_START 0x20980
+#define CNXK_SDP_EPF_PP_VF_RINT_ENA_W1C_START 0x209A0
+#define CNXK_SDP_EPF_PP_VF_RINT_ENA_W1S_START 0x209C0
+
+#define CNXK_SDP_EPF_MBOX_RINT(index) \
+ (CNXK_SDP_EPF_MBOX_RINT_START + ((index) * CNXK_BIT_ARRAY_OFFSET))
+#define CNXK_SDP_EPF_MBOX_RINT_W1S(index) \
+ (CNXK_SDP_EPF_MBOX_RINT_W1S_START + ((index) * CNXK_BIT_ARRAY_OFFSET))
+#define CNXK_SDP_EPF_MBOX_RINT_ENA_W1C(index) \
+ (CNXK_SDP_EPF_MBOX_RINT_ENA_W1C_START + ((index) * CNXK_BIT_ARRAY_OFFSET))
+#define CNXK_SDP_EPF_MBOX_RINT_ENA_W1S(index) \
+ (CNXK_SDP_EPF_MBOX_RINT_ENA_W1S_START + ((index) * CNXK_BIT_ARRAY_OFFSET))
+
+#define CNXK_SDP_EPF_VFIRE_RINT(index) \
+ (CNXK_SDP_EPF_VFIRE_RINT_START + ((index) * CNXK_BIT_ARRAY_OFFSET))
+#define CNXK_SDP_EPF_VFIRE_RINT_W1S(index) \
+ (CNXK_SDP_EPF_VFIRE_RINT_W1S_START + ((index) * CNXK_BIT_ARRAY_OFFSET))
+#define CNXK_SDP_EPF_VFIRE_RINT_ENA_W1C(index) \
+ (CNXK_SDP_EPF_VFIRE_RINT_ENA_W1C_START + ((index) * CNXK_BIT_ARRAY_OFFSET))
+#define CNXK_SDP_EPF_VFIRE_RINT_ENA_W1S(index) \
+ (CNXK_SDP_EPF_VFIRE_RINT_ENA_W1S_START + ((index) * CNXK_BIT_ARRAY_OFFSET))
+
+#define CNXK_SDP_EPF_VFORE_RINT(index) \
+ (CNXK_SDP_EPF_VFORE_RINT_START + ((index) * CNXK_BIT_ARRAY_OFFSET))
+#define CNXK_SDP_EPF_VFORE_RINT_W1S(index) \
+ (CNXK_SDP_EPF_VFORE_RINT_W1S_START + ((index) * CNXK_BIT_ARRAY_OFFSET))
+#define CNXK_SDP_EPF_VFORE_RINT_ENA_W1C(index) \
+ (CNXK_SDP_EPF_VFORE_RINT_ENA_W1C_START + ((index) * CNXK_BIT_ARRAY_OFFSET))
+#define CNXK_SDP_EPF_VFORE_RINT_ENA_W1S(index) \
+ (CNXK_SDP_EPF_VFORE_RINT_ENA_W1S_START + ((index) * CNXK_BIT_ARRAY_OFFSET))
+
+#define CNXK_SDP_EPF_DMA_VF_RINT(index) \
+ (CNXK_SDP_EPF_DMA_VF_RINT_START + ((index) + CNXK_BIT_ARRAY_OFFSET))
+#define CNXK_SDP_EPF_DMA_VF_RINT_W1S(index) \
+ (CNXK_SDP_EPF_DMA_VF_RINT_W1S_START + ((index) + CNXK_BIT_ARRAY_OFFSET))
+#define CNXK_SDP_EPF_DMA_VF_RINT_ENA_W1C(index) \
+ (CNXK_SDP_EPF_DMA_VF_RINT_ENA_W1C_START + ((index) + CNXK_BIT_ARRAY_OFFSET))
+#define CNXK_SDP_EPF_DMA_VF_RINT_ENA_W1S(index) \
+ (CNXK_SDP_EPF_DMA_VF_RINT_ENA_W1S_START + ((index) + CNXK_BIT_ARRAY_OFFSET))
+
+#define CNXK_SDP_EPF_PP_VF_RINT(index) \
+ (CNXK_SDP_EPF_PP_VF_RINT_START + ((index) + CNXK_BIT_ARRAY_OFFSET))
+#define CNXK_SDP_EPF_PP_VF_RINT_W1S(index) \
+ (CNXK_SDP_EPF_PP_VF_RINT_W1S_START + ((index) + CNXK_BIT_ARRAY_OFFSET))
+#define CNXK_SDP_EPF_PP_VF_RINT_ENA_W1C(index) \
+ (CNXK_SDP_EPF_PP_VF_RINT_ENA_W1C_START + ((index) + CNXK_BIT_ARRAY_OFFSET))
+#define CNXK_SDP_EPF_PP_VF_RINT_ENA_W1S(index) \
+ (CNXK_SDP_EPF_PP_VF_RINT_ENA_W1S_START + ((index) + CNXK_BIT_ARRAY_OFFSET))
+
+/*------------------ Interrupt Masks ----------------*/
+#define CNXK_INTR_R_SEND_ISM BIT_ULL(63)
+#define CNXK_INTR_R_OUT_INT BIT_ULL(62)
+#define CNXK_INTR_R_IN_INT BIT_ULL(61)
+#define CNXK_INTR_R_MBOX_INT BIT_ULL(60)
+#define CNXK_INTR_R_RESEND BIT_ULL(59)
+#define CNXK_INTR_R_CLR_TIM BIT_ULL(58)
+
+/* ####################### Ring Mapping Registers ################################## */
+#define CNXK_SDP_EPVF_RING_START 0x26000
+#define CNXK_SDP_IN_RING_TB_MAP_START 0x28000
+#define CNXK_SDP_IN_RATE_LIMIT_START 0x2A000
+#define CNXK_SDP_MAC_PF_RING_CTL_START 0x2C000
+
+#define CNXK_SDP_EPVF_RING(ring) \
+ (CNXK_SDP_EPVF_RING_START + ((ring) * CNXK_EPVF_RING_OFFSET))
+#define CNXK_SDP_IN_RING_TB_MAP(ring) \
+ (CNXK_SDP_N_RING_TB_MAP_START + ((ring) * CNXK_EPVF_RING_OFFSET))
+#define CNXK_SDP_IN_RATE_LIMIT(ring) \
+ (CNXK_SDP_IN_RATE_LIMIT_START + ((ring) * CNXK_EPVF_RING_OFFSET))
+#define CNXK_SDP_MAC_PF_RING_CTL(mac) \
+ (CNXK_SDP_MAC_PF_RING_CTL_START + ((mac) * CNXK_MAC_OFFSET))
+
+#define CNXK_SDP_MAC_PF_RING_CTL_NPFS(val) ((val) & 0x3)
+#define CNXK_SDP_MAC_PF_RING_CTL_SRN(val) (((val) >> 8) & 0x7F)
+#define CNXK_SDP_MAC_PF_RING_CTL_RPPF(val) (((val) >> 16) & 0x3F)
+
+/* Number of non-queue interrupts in CNXKxx */
+#define CNXK_NUM_NON_IOQ_INTR 32
+
+/* bit 0 for control mbox interrupt */
+#define CNXK_SDP_EPF_OEI_RINT_DATA_BIT_MBOX BIT_ULL(0)
+/* bit 1 for firmware heartbeat interrupt */
+#define CNXK_SDP_EPF_OEI_RINT_DATA_BIT_HBEAT BIT_ULL(1)
+#define FW_STATUS_RUNNING 2ULL
+#define CNXK_PEMX_PFX_CSX_PFCFGX(pem, pf, offset) ({ typeof(offset) _off = (offset); \
+ ((0x8e0000008000 | \
+ (uint64_t)(pem) << 36 \
+ | (pf) << 18 \
+ | ((_off >> 16) & 1) << 16 \
+ | (_off >> 3) << 3) \
+ + (((_off >> 2) & 1) << 2)); \
+ })
+
+/* Register defines for use with CNXK_PEMX_PFX_CSX_PFCFGX */
+#define CNXK_PCIEEP_VSECST_CTL 0x418
+
+#define CNXK_PEM_BAR4_INDEX 7
+#define CNXK_PEM_BAR4_INDEX_SIZE 0x400000ULL
+#define CNXK_PEM_BAR4_INDEX_OFFSET (CNXK_PEM_BAR4_INDEX * CNXK_PEM_BAR4_INDEX_SIZE)
+
+#endif /* _OCTEP_REGS_CNXK_PF_H_ */
diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_tx.c b/drivers/net/ethernet/marvell/octeon_ep/octep_tx.c
index d0adb82..06851b7 100644
--- a/drivers/net/ethernet/marvell/octeon_ep/octep_tx.c
+++ b/drivers/net/ethernet/marvell/octeon_ep/octep_tx.c
@@ -21,7 +21,6 @@ static void octep_iq_reset_indices(struct octep_iq *iq)
iq->flush_index = 0;
iq->pkts_processed = 0;
iq->pkt_in_done = 0;
- atomic_set(&iq->instr_pending, 0);
}
/**
@@ -82,7 +81,6 @@ int octep_iq_process_completions(struct octep_iq *iq, u16 budget)
}
iq->pkts_processed += compl_pkts;
- atomic_sub(compl_pkts, &iq->instr_pending);
iq->stats.instr_completed += compl_pkts;
iq->stats.bytes_sent += compl_bytes;
iq->stats.sgentry_sent += compl_sg;
@@ -91,7 +89,7 @@ int octep_iq_process_completions(struct octep_iq *iq, u16 budget)
netdev_tx_completed_queue(iq->netdev_q, compl_pkts, compl_bytes);
if (unlikely(__netif_subqueue_stopped(iq->netdev, iq->q_no)) &&
- ((iq->max_count - atomic_read(&iq->instr_pending)) >
+ (IQ_INSTR_SPACE(iq) >
OCTEP_WAKE_QUEUE_THRESHOLD))
netif_wake_subqueue(iq->netdev, iq->q_no);
return !budget;
@@ -144,7 +142,6 @@ static void octep_iq_free_pending(struct octep_iq *iq)
dev_kfree_skb_any(skb);
}
- atomic_set(&iq->instr_pending, 0);
iq->flush_index = fi;
netdev_tx_reset_queue(netdev_get_tx_queue(iq->netdev, iq->q_no));
}
diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_tx.h b/drivers/net/ethernet/marvell/octeon_ep/octep_tx.h
index 86c98b1..1ba4ff6 100644
--- a/drivers/net/ethernet/marvell/octeon_ep/octep_tx.h
+++ b/drivers/net/ethernet/marvell/octeon_ep/octep_tx.h
@@ -172,9 +172,6 @@ struct octep_iq {
/* Statistics for this input queue. */
struct octep_iq_stats stats;
- /* This field keeps track of the instructions pending in this queue. */
- atomic_t instr_pending;
-
/* Pointer to the Virtual Base addr of the input ring. */
struct octep_tx_desc_hw *desc_ring;
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index 3cf6589..a6e9157 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -1159,15 +1159,18 @@ static int mtk_init_fq_dma(struct mtk_eth *eth)
phy_ring_tail = eth->phy_scratch_ring + soc->txrx.txd_size * (cnt - 1);
for (i = 0; i < cnt; i++) {
+ dma_addr_t addr = dma_addr + i * MTK_QDMA_PAGE_SIZE;
struct mtk_tx_dma_v2 *txd;
txd = eth->scratch_ring + i * soc->txrx.txd_size;
- txd->txd1 = dma_addr + i * MTK_QDMA_PAGE_SIZE;
+ txd->txd1 = addr;
if (i < cnt - 1)
txd->txd2 = eth->phy_scratch_ring +
(i + 1) * soc->txrx.txd_size;
txd->txd3 = TX_DMA_PLEN0(MTK_QDMA_PAGE_SIZE);
+ if (MTK_HAS_CAPS(soc->caps, MTK_36BIT_DMA))
+ txd->txd3 |= TX_DMA_PREP_ADDR64(addr);
txd->txd4 = 0;
if (mtk_is_netsys_v2_or_greater(eth)) {
txd->txd5 = 0;
diff --git a/drivers/net/ethernet/mediatek/mtk_wed.c b/drivers/net/ethernet/mediatek/mtk_wed.c
index 9a6744c..c895e26 100644
--- a/drivers/net/ethernet/mediatek/mtk_wed.c
+++ b/drivers/net/ethernet/mediatek/mtk_wed.c
@@ -670,7 +670,7 @@ mtk_wed_tx_buffer_alloc(struct mtk_wed_device *dev)
void *buf;
int s;
- page = __dev_alloc_pages(GFP_KERNEL, 0);
+ page = __dev_alloc_page(GFP_KERNEL);
if (!page)
return -ENOMEM;
@@ -691,10 +691,11 @@ mtk_wed_tx_buffer_alloc(struct mtk_wed_device *dev)
for (s = 0; s < MTK_WED_BUF_PER_PAGE; s++) {
struct mtk_wdma_desc *desc = desc_ptr;
+ u32 ctrl;
desc->buf0 = cpu_to_le32(buf_phys);
if (!mtk_wed_is_v3_or_greater(dev->hw)) {
- u32 txd_size, ctrl;
+ u32 txd_size;
txd_size = dev->wlan.init_buf(buf, buf_phys,
token++);
@@ -708,11 +709,11 @@ mtk_wed_tx_buffer_alloc(struct mtk_wed_device *dev)
ctrl |= MTK_WDMA_DESC_CTRL_LAST_SEG0 |
FIELD_PREP(MTK_WDMA_DESC_CTRL_LEN1_V2,
MTK_WED_BUF_SIZE - txd_size);
- desc->ctrl = cpu_to_le32(ctrl);
desc->info = 0;
} else {
- desc->ctrl = cpu_to_le32(token << 16);
+ ctrl = token << 16 | TX_DMA_PREP_ADDR64(buf_phys);
}
+ desc->ctrl = cpu_to_le32(ctrl);
desc_ptr += desc_size;
buf += MTK_WED_BUF_SIZE;
@@ -811,6 +812,7 @@ mtk_wed_hwrro_buffer_alloc(struct mtk_wed_device *dev)
buf_phys = page_phys;
for (s = 0; s < MTK_WED_RX_BUF_PER_PAGE; s++) {
desc->buf0 = cpu_to_le32(buf_phys);
+ desc->token = cpu_to_le32(RX_DMA_PREP_ADDR64(buf_phys));
buf_phys += MTK_WED_PAGE_BUF_SIZE;
desc++;
}
diff --git a/drivers/net/ethernet/mediatek/mtk_wed_wo.c b/drivers/net/ethernet/mediatek/mtk_wed_wo.c
index 3bd51a3d..7ffbd4f 100644
--- a/drivers/net/ethernet/mediatek/mtk_wed_wo.c
+++ b/drivers/net/ethernet/mediatek/mtk_wed_wo.c
@@ -142,7 +142,8 @@ mtk_wed_wo_queue_refill(struct mtk_wed_wo *wo, struct mtk_wed_wo_queue *q,
dma_addr_t addr;
void *buf;
- buf = page_frag_alloc(&q->cache, q->buf_size, GFP_ATOMIC);
+ buf = page_frag_alloc(&q->cache, q->buf_size,
+ GFP_ATOMIC | GFP_DMA32);
if (!buf)
break;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/crdump.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/crdump.c
index 28d0274..7659ad2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/crdump.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/crdump.c
@@ -55,7 +55,10 @@ int mlx5_crdump_collect(struct mlx5_core_dev *dev, u32 *cr_data)
ret = mlx5_vsc_sem_set_space(dev, MLX5_SEMAPHORE_SW_RESET,
MLX5_VSC_LOCK);
if (ret) {
- mlx5_core_warn(dev, "Failed to lock SW reset semaphore\n");
+ if (ret == -EBUSY)
+ mlx5_core_info(dev, "SW reset semaphore is already in use\n");
+ else
+ mlx5_core_warn(dev, "Failed to lock SW reset semaphore\n");
goto unlock_gw;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
index b12fe3c..a55452c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
@@ -147,6 +147,20 @@ mlx5e_rep_setup_tc_cls_flower(struct mlx5e_priv *priv,
}
}
+static void mlx5e_tc_stats_matchall(struct mlx5e_priv *priv,
+ struct tc_cls_matchall_offload *ma)
+{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ u64 dbytes;
+ u64 dpkts;
+
+ dpkts = priv->stats.rep_stats.vport_rx_packets - rpriv->prev_vf_vport_stats.rx_packets;
+ dbytes = priv->stats.rep_stats.vport_rx_bytes - rpriv->prev_vf_vport_stats.rx_bytes;
+ mlx5e_stats_copy_rep_stats(&rpriv->prev_vf_vport_stats, &priv->stats.rep_stats);
+ flow_stats_update(&ma->stats, dbytes, dpkts, 0, jiffies,
+ FLOW_ACTION_HW_STATS_DELAYED);
+}
+
static
int mlx5e_rep_setup_tc_cls_matchall(struct mlx5e_priv *priv,
struct tc_cls_matchall_offload *ma)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.c
index 368a95f..b14cd62 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.c
@@ -48,7 +48,8 @@ mlx5e_tc_act_pedit_parse_action(struct mlx5e_priv *priv,
struct pedit_headers_action *hdrs,
struct netlink_ext_ack *extack)
{
- u8 cmd = (act->id == FLOW_ACTION_MANGLE) ? 0 : 1;
+ u8 cmd = (act->id == FLOW_ACTION_MANGLE) ? TCA_PEDIT_KEY_EX_CMD_SET :
+ TCA_PEDIT_KEY_EX_CMD_ADD;
u8 htype = act->mangle.htype;
int err = -EOPNOTSUPP;
u32 mask, val, offset;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index ea58c69..3aecdf0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -5244,7 +5244,6 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
netdev->gso_partial_features |= NETIF_F_GSO_UDP_L4;
netdev->hw_features |= NETIF_F_GSO_UDP_L4;
- netdev->features |= NETIF_F_GSO_UDP_L4;
mlx5_query_port_fcs(mdev, &fcs_supported, &fcs_enabled);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 3ab682b..fe0726c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -112,8 +112,18 @@ static const struct counter_desc vport_rep_stats_desc[] = {
tx_vport_rdma_multicast_bytes) },
};
+static const struct counter_desc vport_rep_loopback_stats_desc[] = {
+ { MLX5E_DECLARE_STAT(struct mlx5e_rep_stats,
+ vport_loopback_packets) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_rep_stats,
+ vport_loopback_bytes) },
+};
+
#define NUM_VPORT_REP_SW_COUNTERS ARRAY_SIZE(sw_rep_stats_desc)
#define NUM_VPORT_REP_HW_COUNTERS ARRAY_SIZE(vport_rep_stats_desc)
+#define NUM_VPORT_REP_LOOPBACK_COUNTERS(dev) \
+ (MLX5_CAP_GEN(dev, vport_counter_local_loopback) ? \
+ ARRAY_SIZE(vport_rep_loopback_stats_desc) : 0)
static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(sw_rep)
{
@@ -157,7 +167,8 @@ static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(sw_rep)
static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(vport_rep)
{
- return NUM_VPORT_REP_HW_COUNTERS;
+ return NUM_VPORT_REP_HW_COUNTERS +
+ NUM_VPORT_REP_LOOPBACK_COUNTERS(priv->mdev);
}
static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(vport_rep)
@@ -166,6 +177,9 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(vport_rep)
for (i = 0; i < NUM_VPORT_REP_HW_COUNTERS; i++)
strcpy(data + (idx++) * ETH_GSTRING_LEN, vport_rep_stats_desc[i].format);
+ for (i = 0; i < NUM_VPORT_REP_LOOPBACK_COUNTERS(priv->mdev); i++)
+ strcpy(data + (idx++) * ETH_GSTRING_LEN,
+ vport_rep_loopback_stats_desc[i].format);
return idx;
}
@@ -176,6 +190,9 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(vport_rep)
for (i = 0; i < NUM_VPORT_REP_HW_COUNTERS; i++)
data[idx++] = MLX5E_READ_CTR64_CPU(&priv->stats.rep_stats,
vport_rep_stats_desc, i);
+ for (i = 0; i < NUM_VPORT_REP_LOOPBACK_COUNTERS(priv->mdev); i++)
+ data[idx++] = MLX5E_READ_CTR64_CPU(&priv->stats.rep_stats,
+ vport_rep_loopback_stats_desc, i);
return idx;
}
@@ -247,6 +264,13 @@ static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(vport_rep)
rep_stats->tx_vport_rdma_multicast_bytes =
MLX5_GET_CTR(out, received_ib_multicast.octets);
+ if (MLX5_CAP_GEN(priv->mdev, vport_counter_local_loopback)) {
+ rep_stats->vport_loopback_packets =
+ MLX5_GET_CTR(out, local_loopback.packets);
+ rep_stats->vport_loopback_bytes =
+ MLX5_GET_CTR(out, local_loopback.octets);
+ }
+
out:
kvfree(out);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
index 477c547..12b3607 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
@@ -476,6 +476,8 @@ struct mlx5e_rep_stats {
u64 tx_vport_rdma_multicast_packets;
u64 rx_vport_rdma_multicast_bytes;
u64 tx_vport_rdma_multicast_bytes;
+ u64 vport_loopback_packets;
+ u64 vport_loopback_bytes;
};
struct mlx5e_stats {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 7ca9e5b..9dca280 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -3206,10 +3206,10 @@ static int offload_pedit_fields(struct mlx5e_priv *priv,
headers_c = mlx5e_get_match_headers_criteria(*action_flags, &parse_attr->spec);
headers_v = mlx5e_get_match_headers_value(*action_flags, &parse_attr->spec);
- set_masks = &hdrs[0].masks;
- add_masks = &hdrs[1].masks;
- set_vals = &hdrs[0].vals;
- add_vals = &hdrs[1].vals;
+ set_masks = &hdrs[TCA_PEDIT_KEY_EX_CMD_SET].masks;
+ add_masks = &hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].masks;
+ set_vals = &hdrs[TCA_PEDIT_KEY_EX_CMD_SET].vals;
+ add_vals = &hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].vals;
for (i = 0; i < ARRAY_SIZE(fields); i++) {
bool skip;
@@ -5011,22 +5011,6 @@ int mlx5e_tc_delete_matchall(struct mlx5e_priv *priv,
return apply_police_params(priv, 0, extack);
}
-void mlx5e_tc_stats_matchall(struct mlx5e_priv *priv,
- struct tc_cls_matchall_offload *ma)
-{
- struct mlx5e_rep_priv *rpriv = priv->ppriv;
- struct rtnl_link_stats64 cur_stats;
- u64 dbytes;
- u64 dpkts;
-
- mlx5e_stats_copy_rep_stats(&cur_stats, &priv->stats.rep_stats);
- dpkts = cur_stats.rx_packets - rpriv->prev_vf_vport_stats.rx_packets;
- dbytes = cur_stats.rx_bytes - rpriv->prev_vf_vport_stats.rx_bytes;
- rpriv->prev_vf_vport_stats = cur_stats;
- flow_stats_update(&ma->stats, dbytes, dpkts, 0, jiffies,
- FLOW_ACTION_HW_STATS_DELAYED);
-}
-
static void mlx5e_tc_hairpin_update_dead_peer(struct mlx5e_priv *priv,
struct mlx5e_priv *peer_priv)
{
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
index adb39e3..c24bda5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
@@ -203,8 +203,6 @@ int mlx5e_tc_configure_matchall(struct mlx5e_priv *priv,
struct tc_cls_matchall_offload *f);
int mlx5e_tc_delete_matchall(struct mlx5e_priv *priv,
struct tc_cls_matchall_offload *f);
-void mlx5e_tc_stats_matchall(struct mlx5e_priv *priv,
- struct tc_cls_matchall_offload *ma);
struct mlx5e_encap_entry;
void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
index 4aed176..78eb6b7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
@@ -181,7 +181,7 @@ struct mlx5_flow_rule {
struct mlx5_flow_handle {
int num_rules;
- struct mlx5_flow_rule *rule[];
+ struct mlx5_flow_rule *rule[] __counted_by(num_rules);
};
/* Type of children is mlx5_flow_group */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
index 17fe30a..0c26d70 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
@@ -539,7 +539,7 @@ struct mlx5_fc_bulk {
u32 base_id;
int bulk_len;
unsigned long *bitmask;
- struct mlx5_fc fcs[];
+ struct mlx5_fc fcs[] __counted_by(bulk_len);
};
static void mlx5_fc_init(struct mlx5_fc *counter, struct mlx5_fc_bulk *bulk,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
index b568988..4b8cb12 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
@@ -325,6 +325,25 @@ static void mlx5_fw_live_patch_event(struct work_struct *work)
mlx5_core_err(dev, "Failed to reload FW tracer\n");
}
+static const struct pci_device_id mgt_ifc_device_ids[] = {
+ { PCI_VDEVICE(MELLANOX, 0xc2d2) }, /* BlueField1 MGT interface device ID */
+ { PCI_VDEVICE(MELLANOX, 0xc2d3) }, /* BlueField2 MGT interface device ID */
+ { PCI_VDEVICE(MELLANOX, 0xc2d4) }, /* BlueField3-Lx MGT interface device ID */
+ { PCI_VDEVICE(MELLANOX, 0xc2d5) }, /* BlueField3 MGT interface device ID */
+ { PCI_VDEVICE(MELLANOX, 0xc2d6) }, /* BlueField4 MGT interface device ID */
+};
+
+static bool mlx5_is_mgt_ifc_pci_device(struct mlx5_core_dev *dev, u16 dev_id)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(mgt_ifc_device_ids); ++i)
+ if (mgt_ifc_device_ids[i].device == dev_id)
+ return true;
+
+ return false;
+}
+
static int mlx5_check_dev_ids(struct mlx5_core_dev *dev, u16 dev_id)
{
struct pci_bus *bridge_bus = dev->pdev->bus;
@@ -339,10 +358,15 @@ static int mlx5_check_dev_ids(struct mlx5_core_dev *dev, u16 dev_id)
err = pci_read_config_word(sdev, PCI_DEVICE_ID, &sdev_id);
if (err)
return pcibios_err_to_errno(err);
- if (sdev_id != dev_id) {
- mlx5_core_warn(dev, "unrecognized dev_id (0x%x)\n", sdev_id);
- return -EPERM;
- }
+
+ if (sdev_id == dev_id)
+ continue;
+
+ if (mlx5_is_mgt_ifc_pci_device(dev, sdev_id))
+ continue;
+
+ mlx5_core_warn(dev, "unrecognized dev_id (0x%x)\n", sdev_id);
+ return -EPERM;
}
return 0;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
index 0c83ef1..0361741 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
@@ -266,9 +266,6 @@ static int mlx5_ptp_settime_real_time(struct mlx5_core_dev *mdev,
{
u32 in[MLX5_ST_SZ_DW(mtutc_reg)] = {};
- if (!mlx5_modify_mtutc_allowed(mdev))
- return 0;
-
if (ts->tv_sec < 0 || ts->tv_sec > U32_MAX ||
ts->tv_nsec < 0 || ts->tv_nsec > NSEC_PER_SEC)
return -EINVAL;
@@ -286,12 +283,15 @@ static int mlx5_ptp_settime(struct ptp_clock_info *ptp, const struct timespec64
struct mlx5_timer *timer = &clock->timer;
struct mlx5_core_dev *mdev;
unsigned long flags;
- int err;
mdev = container_of(clock, struct mlx5_core_dev, clock);
- err = mlx5_ptp_settime_real_time(mdev, ts);
- if (err)
- return err;
+
+ if (mlx5_modify_mtutc_allowed(mdev)) {
+ int err = mlx5_ptp_settime_real_time(mdev, ts);
+
+ if (err)
+ return err;
+ }
write_seqlock_irqsave(&clock->lock, flags);
timecounter_init(&timer->tc, &timer->cycles, timespec64_to_ns(ts));
@@ -341,9 +341,6 @@ static int mlx5_ptp_adjtime_real_time(struct mlx5_core_dev *mdev, s64 delta)
{
u32 in[MLX5_ST_SZ_DW(mtutc_reg)] = {};
- if (!mlx5_modify_mtutc_allowed(mdev))
- return 0;
-
/* HW time adjustment range is checked. If out of range, settime instead */
if (!mlx5_is_mtutc_time_adj_cap(mdev, delta)) {
struct timespec64 ts;
@@ -367,13 +364,16 @@ static int mlx5_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
struct mlx5_timer *timer = &clock->timer;
struct mlx5_core_dev *mdev;
unsigned long flags;
- int err;
mdev = container_of(clock, struct mlx5_core_dev, clock);
- err = mlx5_ptp_adjtime_real_time(mdev, delta);
- if (err)
- return err;
+ if (mlx5_modify_mtutc_allowed(mdev)) {
+ int err = mlx5_ptp_adjtime_real_time(mdev, delta);
+
+ if (err)
+ return err;
+ }
+
write_seqlock_irqsave(&clock->lock, flags);
timecounter_adjtime(&timer->tc, delta);
mlx5_update_clock_info_page(mdev);
@@ -396,15 +396,14 @@ static int mlx5_ptp_freq_adj_real_time(struct mlx5_core_dev *mdev, long scaled_p
{
u32 in[MLX5_ST_SZ_DW(mtutc_reg)] = {};
- if (!mlx5_modify_mtutc_allowed(mdev))
- return 0;
-
MLX5_SET(mtutc_reg, in, operation, MLX5_MTUTC_OPERATION_ADJUST_FREQ_UTC);
- if (MLX5_CAP_MCAM_FEATURE(mdev, mtutc_freq_adj_units)) {
+ if (MLX5_CAP_MCAM_FEATURE(mdev, mtutc_freq_adj_units) &&
+ scaled_ppm <= S32_MAX && scaled_ppm >= S32_MIN) {
+ /* HW scaled_ppm support on mlx5 devices only supports a 32-bit value */
MLX5_SET(mtutc_reg, in, freq_adj_units,
MLX5_MTUTC_FREQ_ADJ_UNITS_SCALED_PPM);
- MLX5_SET(mtutc_reg, in, freq_adjustment, scaled_ppm);
+ MLX5_SET(mtutc_reg, in, freq_adjustment, (s32)scaled_ppm);
} else {
MLX5_SET(mtutc_reg, in, freq_adj_units, MLX5_MTUTC_FREQ_ADJ_UNITS_PPB);
MLX5_SET(mtutc_reg, in, freq_adjustment, scaled_ppm_to_ppb(scaled_ppm));
@@ -420,13 +419,15 @@ static int mlx5_ptp_adjfine(struct ptp_clock_info *ptp, long scaled_ppm)
struct mlx5_core_dev *mdev;
unsigned long flags;
u32 mult;
- int err;
mdev = container_of(clock, struct mlx5_core_dev, clock);
- err = mlx5_ptp_freq_adj_real_time(mdev, scaled_ppm);
- if (err)
- return err;
+ if (mlx5_modify_mtutc_allowed(mdev)) {
+ int err = mlx5_ptp_freq_adj_real_time(mdev, scaled_ppm);
+
+ if (err)
+ return err;
+ }
mult = (u32)adjust_by_scaled_ppm(timer->nominal_c_mult, scaled_ppm);
@@ -1004,14 +1005,38 @@ static void mlx5_init_clock_info(struct mlx5_core_dev *mdev)
info->frac = timer->tc.frac;
}
+static void mlx5_init_timer_max_freq_adjustment(struct mlx5_core_dev *mdev)
+{
+ struct mlx5_clock *clock = &mdev->clock;
+ u32 out[MLX5_ST_SZ_DW(mtutc_reg)] = {};
+ u32 in[MLX5_ST_SZ_DW(mtutc_reg)] = {};
+ u8 log_max_freq_adjustment = 0;
+ int err;
+
+ err = mlx5_core_access_reg(mdev, in, sizeof(in), out, sizeof(out),
+ MLX5_REG_MTUTC, 0, 0);
+ if (!err)
+ log_max_freq_adjustment =
+ MLX5_GET(mtutc_reg, out, log_max_freq_adjustment);
+
+ if (log_max_freq_adjustment)
+ clock->ptp_info.max_adj =
+ min(S32_MAX, 1 << log_max_freq_adjustment);
+}
+
static void mlx5_init_timer_clock(struct mlx5_core_dev *mdev)
{
struct mlx5_clock *clock = &mdev->clock;
+ /* Configure the PHC */
+ clock->ptp_info = mlx5_ptp_clock_info;
+
+ if (MLX5_CAP_MCAM_REG(mdev, mtutc))
+ mlx5_init_timer_max_freq_adjustment(mdev);
+
mlx5_timecounter_init(mdev);
mlx5_init_clock_info(mdev);
mlx5_init_overflow_period(clock);
- clock->ptp_info = mlx5_ptp_clock_info;
if (mlx5_real_time_mode(mdev)) {
struct timespec64 ts;
@@ -1042,11 +1067,10 @@ void mlx5_init_clock(struct mlx5_core_dev *mdev)
}
seqlock_init(&clock->lock);
- mlx5_init_timer_clock(mdev);
INIT_WORK(&clock->pps_info.out_work, mlx5_pps_out);
- /* Configure the PHC */
- clock->ptp_info = mlx5_ptp_clock_info;
+ /* Initialize the device clock */
+ mlx5_init_timer_clock(mdev);
/* Initialize 1PPS data structures */
mlx5_init_pps(mdev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index a17152c..bccf6e5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -219,7 +219,6 @@ static void mlx5_set_driver_version(struct mlx5_core_dev *dev)
int driver_ver_sz = MLX5_FLD_SZ_BYTES(set_driver_version_in,
driver_version);
u8 in[MLX5_ST_SZ_BYTES(set_driver_version_in)] = {};
- int remaining_size = driver_ver_sz;
char *string;
if (!MLX5_CAP_GEN(dev, driver_version))
@@ -227,22 +226,9 @@ static void mlx5_set_driver_version(struct mlx5_core_dev *dev)
string = MLX5_ADDR_OF(set_driver_version_in, in, driver_version);
- strncpy(string, "Linux", remaining_size);
-
- remaining_size = max_t(int, 0, driver_ver_sz - strlen(string));
- strncat(string, ",", remaining_size);
-
- remaining_size = max_t(int, 0, driver_ver_sz - strlen(string));
- strncat(string, KBUILD_MODNAME, remaining_size);
-
- remaining_size = max_t(int, 0, driver_ver_sz - strlen(string));
- strncat(string, ",", remaining_size);
-
- remaining_size = max_t(int, 0, driver_ver_sz - strlen(string));
-
- snprintf(string + strlen(string), remaining_size, "%u.%u.%u",
- LINUX_VERSION_MAJOR, LINUX_VERSION_PATCHLEVEL,
- LINUX_VERSION_SUBLEVEL);
+ snprintf(string, driver_ver_sz, "Linux,%s,%u.%u.%u",
+ KBUILD_MODNAME, LINUX_VERSION_MAJOR,
+ LINUX_VERSION_PATCHLEVEL, LINUX_VERSION_SUBLEVEL);
/*Send the command*/
MLX5_SET(set_driver_version_in, in, opcode,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/cmd.h b/drivers/net/ethernet/mellanox/mlxsw/cmd.h
index e827c78..e3271c8 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/cmd.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/cmd.h
@@ -282,6 +282,12 @@ MLXSW_ITEM32(cmd_mbox, query_fw, fw_day, 0x14, 0, 8);
*/
MLXSW_ITEM32(cmd_mbox, query_fw, lag_mode_support, 0x18, 1, 1);
+/* cmd_mbox_query_fw_cff_support
+ * 0: CONFIG_PROFILE.flood_mode = 5 (CFF) is not supported by FW
+ * 1: CONFIG_PROFILE.flood_mode = 5 (CFF) is supported by FW
+ */
+MLXSW_ITEM32(cmd_mbox, query_fw, cff_support, 0x18, 2, 1);
+
/* cmd_mbox_query_fw_clr_int_base_offset
* Clear Interrupt register's offset from clr_int_bar register
* in PCI address space.
@@ -779,6 +785,11 @@ enum mlxsw_cmd_mbox_config_profile_flood_mode {
* used.
*/
MLXSW_CMD_MBOX_CONFIG_PROFILE_FLOOD_MODE_CONTROLLED = 4,
+ /* CFF - Compressed FID Flood (CFF) mode.
+ * Reserved when legacy bridge model is used.
+ * Supported only by Spectrum-2+.
+ */
+ MLXSW_CMD_MBOX_CONFIG_PROFILE_FLOOD_MODE_CFF = 5,
};
/* cmd_mbox_config_profile_flood_mode
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c
index f23421f..e4d7739 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.c
@@ -211,6 +211,13 @@ mlxsw_core_lag_mode(struct mlxsw_core *mlxsw_core)
}
EXPORT_SYMBOL(mlxsw_core_lag_mode);
+enum mlxsw_cmd_mbox_config_profile_flood_mode
+mlxsw_core_flood_mode(struct mlxsw_core *mlxsw_core)
+{
+ return mlxsw_core->bus->flood_mode(mlxsw_core->bus_priv);
+}
+EXPORT_SYMBOL(mlxsw_core_flood_mode);
+
void *mlxsw_core_driver_priv(struct mlxsw_core *mlxsw_core)
{
return mlxsw_core->driver_priv;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h
index 764d14b..6d11225 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.h
@@ -38,6 +38,8 @@ unsigned int mlxsw_core_max_ports(const struct mlxsw_core *mlxsw_core);
int mlxsw_core_max_lag(struct mlxsw_core *mlxsw_core, u16 *p_max_lag);
enum mlxsw_cmd_mbox_config_profile_lag_mode
mlxsw_core_lag_mode(struct mlxsw_core *mlxsw_core);
+enum mlxsw_cmd_mbox_config_profile_flood_mode
+mlxsw_core_flood_mode(struct mlxsw_core *mlxsw_core);
void *mlxsw_core_driver_priv(struct mlxsw_core *mlxsw_core);
@@ -322,7 +324,12 @@ struct mlxsw_config_profile {
u16 max_regions;
u8 max_flood_tables;
u8 max_vid_flood_tables;
+
+ /* Flood mode to use if used_flood_mode. If flood_mode_prefer_cff,
+ * the backup flood mode (if any) when CFF unsupported.
+ */
u8 flood_mode;
+
u8 max_fid_offset_flood_tables;
u16 fid_offset_flood_table_size;
u8 max_fid_flood_tables;
@@ -338,6 +345,7 @@ struct mlxsw_config_profile {
u8 kvd_hash_double_parts;
u8 cqe_time_stamp_type;
bool lag_mode_prefer_sw;
+ bool flood_mode_prefer_cff;
struct mlxsw_swid_config swid_config[MLXSW_CONFIG_PROFILE_SWID_COUNT];
};
@@ -489,6 +497,7 @@ struct mlxsw_bus {
u32 (*read_utc_sec)(void *bus_priv);
u32 (*read_utc_nsec)(void *bus_priv);
enum mlxsw_cmd_mbox_config_profile_lag_mode (*lag_mode)(void *bus_priv);
+ enum mlxsw_cmd_mbox_config_profile_flood_mode (*flood_mode)(void *priv);
u8 features;
};
diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c
index e4b25e1..0d58f13 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/pci.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c
@@ -106,7 +106,9 @@ struct mlxsw_pci {
u64 utc_sec_offset;
u64 utc_nsec_offset;
bool lag_mode_support;
+ bool cff_support;
enum mlxsw_cmd_mbox_config_profile_lag_mode lag_mode;
+ enum mlxsw_cmd_mbox_config_profile_flood_mode flood_mode;
struct mlxsw_pci_queue_type_group queues[MLXSW_PCI_QUEUE_TYPE_COUNT];
u32 doorbell_offset;
struct mlxsw_core *core;
@@ -130,6 +132,7 @@ struct mlxsw_pci {
const struct pci_device_id *id;
enum mlxsw_pci_cqe_v max_cqe_ver; /* Maximal supported CQE version */
u8 num_sdq_cqs; /* Number of CQs used for SDQs */
+ bool skip_reset;
};
static void mlxsw_pci_queue_tasklet_schedule(struct mlxsw_pci_queue *q)
@@ -1245,11 +1248,22 @@ static int mlxsw_pci_config_profile(struct mlxsw_pci *mlxsw_pci, char *mbox,
mlxsw_cmd_mbox_config_profile_fid_flood_table_size_set(
mbox, profile->fid_flood_table_size);
}
- if (profile->used_flood_mode) {
+ if (profile->flood_mode_prefer_cff && mlxsw_pci->cff_support) {
+ enum mlxsw_cmd_mbox_config_profile_flood_mode flood_mode =
+ MLXSW_CMD_MBOX_CONFIG_PROFILE_FLOOD_MODE_CFF;
+
+ mlxsw_cmd_mbox_config_profile_set_flood_mode_set(mbox, 1);
+ mlxsw_cmd_mbox_config_profile_flood_mode_set(mbox, flood_mode);
+ mlxsw_pci->flood_mode = flood_mode;
+ } else if (profile->used_flood_mode) {
mlxsw_cmd_mbox_config_profile_set_flood_mode_set(
mbox, 1);
mlxsw_cmd_mbox_config_profile_flood_mode_set(
mbox, profile->flood_mode);
+ mlxsw_pci->flood_mode = profile->flood_mode;
+ } else {
+ WARN_ON(1);
+ return -EINVAL;
}
if (profile->used_max_ib_mc) {
mlxsw_cmd_mbox_config_profile_set_max_ib_mc_set(
@@ -1476,11 +1490,47 @@ static int mlxsw_pci_sys_ready_wait(struct mlxsw_pci *mlxsw_pci,
return -EBUSY;
}
-static int mlxsw_pci_sw_reset(struct mlxsw_pci *mlxsw_pci,
- const struct pci_device_id *id)
+static int mlxsw_pci_reset_at_pci_disable(struct mlxsw_pci *mlxsw_pci)
{
struct pci_dev *pdev = mlxsw_pci->pdev;
char mrsr_pl[MLXSW_REG_MRSR_LEN];
+ int err;
+
+ mlxsw_reg_mrsr_pack(mrsr_pl,
+ MLXSW_REG_MRSR_COMMAND_RESET_AT_PCI_DISABLE);
+ err = mlxsw_reg_write(mlxsw_pci->core, MLXSW_REG(mrsr), mrsr_pl);
+ if (err)
+ return err;
+
+ device_lock_assert(&pdev->dev);
+
+ pci_cfg_access_lock(pdev);
+ pci_save_state(pdev);
+
+ err = __pci_reset_function_locked(pdev);
+ if (err)
+ pci_err(pdev, "PCI function reset failed with %d\n", err);
+
+ pci_restore_state(pdev);
+ pci_cfg_access_unlock(pdev);
+
+ return err;
+}
+
+static int mlxsw_pci_reset_sw(struct mlxsw_pci *mlxsw_pci)
+{
+ char mrsr_pl[MLXSW_REG_MRSR_LEN];
+
+ mlxsw_reg_mrsr_pack(mrsr_pl, MLXSW_REG_MRSR_COMMAND_SOFTWARE_RESET);
+ return mlxsw_reg_write(mlxsw_pci->core, MLXSW_REG(mrsr), mrsr_pl);
+}
+
+static int
+mlxsw_pci_reset(struct mlxsw_pci *mlxsw_pci, const struct pci_device_id *id)
+{
+ struct pci_dev *pdev = mlxsw_pci->pdev;
+ char mcam_pl[MLXSW_REG_MCAM_LEN];
+ bool pci_reset_supported;
u32 sys_status;
int err;
@@ -1491,11 +1541,27 @@ static int mlxsw_pci_sw_reset(struct mlxsw_pci *mlxsw_pci,
return err;
}
- mlxsw_reg_mrsr_pack(mrsr_pl);
- err = mlxsw_reg_write(mlxsw_pci->core, MLXSW_REG(mrsr), mrsr_pl);
+ /* PCI core already issued a PCI reset, do not issue another reset. */
+ if (mlxsw_pci->skip_reset)
+ return 0;
+
+ mlxsw_reg_mcam_pack(mcam_pl,
+ MLXSW_REG_MCAM_FEATURE_GROUP_ENHANCED_FEATURES);
+ err = mlxsw_reg_query(mlxsw_pci->core, MLXSW_REG(mcam), mcam_pl);
if (err)
return err;
+ mlxsw_reg_mcam_unpack(mcam_pl, MLXSW_REG_MCAM_PCI_RESET,
+ &pci_reset_supported);
+
+ if (pci_reset_supported) {
+ pci_dbg(pdev, "Starting PCI reset flow\n");
+ err = mlxsw_pci_reset_at_pci_disable(mlxsw_pci);
+ } else {
+ pci_dbg(pdev, "Starting software reset flow\n");
+ err = mlxsw_pci_reset_sw(mlxsw_pci);
+ }
+
err = mlxsw_pci_sys_ready_wait(mlxsw_pci, id, &sys_status);
if (err) {
dev_err(&pdev->dev, "Failed to reach system ready status after reset. Status is 0x%x\n",
@@ -1537,9 +1603,9 @@ static int mlxsw_pci_init(void *bus_priv, struct mlxsw_core *mlxsw_core,
if (!mbox)
return -ENOMEM;
- err = mlxsw_pci_sw_reset(mlxsw_pci, mlxsw_pci->id);
+ err = mlxsw_pci_reset(mlxsw_pci, mlxsw_pci->id);
if (err)
- goto err_sw_reset;
+ goto err_reset;
err = mlxsw_pci_alloc_irq_vectors(mlxsw_pci);
if (err < 0) {
@@ -1601,6 +1667,9 @@ static int mlxsw_pci_init(void *bus_priv, struct mlxsw_core *mlxsw_core,
mlxsw_pci->lag_mode_support =
mlxsw_cmd_mbox_query_fw_lag_mode_support_get(mbox);
+ mlxsw_pci->cff_support =
+ mlxsw_cmd_mbox_query_fw_cff_support_get(mbox);
+
num_pages = mlxsw_cmd_mbox_query_fw_fw_pages_get(mbox);
err = mlxsw_pci_fw_area_init(mlxsw_pci, mbox, num_pages);
if (err)
@@ -1672,7 +1741,7 @@ static int mlxsw_pci_init(void *bus_priv, struct mlxsw_core *mlxsw_core,
err_query_fw:
mlxsw_pci_free_irq_vectors(mlxsw_pci);
err_alloc_irq:
-err_sw_reset:
+err_reset:
mbox_put:
mlxsw_cmd_mbox_free(mbox);
return err;
@@ -1917,6 +1986,14 @@ mlxsw_pci_lag_mode(void *bus_priv)
return mlxsw_pci->lag_mode;
}
+static enum mlxsw_cmd_mbox_config_profile_flood_mode
+mlxsw_pci_flood_mode(void *bus_priv)
+{
+ struct mlxsw_pci *mlxsw_pci = bus_priv;
+
+ return mlxsw_pci->flood_mode;
+}
+
static const struct mlxsw_bus mlxsw_pci_bus = {
.kind = "pci",
.init = mlxsw_pci_init,
@@ -1929,6 +2006,7 @@ static const struct mlxsw_bus mlxsw_pci_bus = {
.read_utc_sec = mlxsw_pci_read_utc_sec,
.read_utc_nsec = mlxsw_pci_read_utc_nsec,
.lag_mode = mlxsw_pci_lag_mode,
+ .flood_mode = mlxsw_pci_flood_mode,
.features = MLXSW_BUS_F_TXRX | MLXSW_BUS_F_RESET,
};
@@ -2059,11 +2137,34 @@ static void mlxsw_pci_remove(struct pci_dev *pdev)
kfree(mlxsw_pci);
}
+static void mlxsw_pci_reset_prepare(struct pci_dev *pdev)
+{
+ struct mlxsw_pci *mlxsw_pci = pci_get_drvdata(pdev);
+
+ mlxsw_core_bus_device_unregister(mlxsw_pci->core, false);
+}
+
+static void mlxsw_pci_reset_done(struct pci_dev *pdev)
+{
+ struct mlxsw_pci *mlxsw_pci = pci_get_drvdata(pdev);
+
+ mlxsw_pci->skip_reset = true;
+ mlxsw_core_bus_device_register(&mlxsw_pci->bus_info, &mlxsw_pci_bus,
+ mlxsw_pci, false, NULL, NULL);
+ mlxsw_pci->skip_reset = false;
+}
+
+static const struct pci_error_handlers mlxsw_pci_err_handler = {
+ .reset_prepare = mlxsw_pci_reset_prepare,
+ .reset_done = mlxsw_pci_reset_done,
+};
+
int mlxsw_pci_driver_register(struct pci_driver *pci_driver)
{
pci_driver->probe = mlxsw_pci_probe;
pci_driver->remove = mlxsw_pci_remove;
pci_driver->shutdown = mlxsw_pci_remove;
+ pci_driver->err_handler = &mlxsw_pci_err_handler;
return pci_register_driver(pci_driver);
}
EXPORT_SYMBOL(mlxsw_pci_driver_register);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index 25b294f..3aae446 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -1024,6 +1024,8 @@ static inline void mlxsw_reg_spaft_pack(char *payload, u16 local_port,
* ------------------------------------------
* The following register controls the association of flooding tables and MIDs
* to packet types used for flooding.
+ *
+ * Reserved when CONFIG_PROFILE.flood_mode = CFF.
*/
#define MLXSW_REG_SFGC_ID 0x2011
#define MLXSW_REG_SFGC_LEN 0x14
@@ -1862,6 +1864,7 @@ MLXSW_ITEM32(reg, sfmr, fid, 0x00, 0, 16);
* Access: RW
*
* Note: Reserved when legacy bridge model is used.
+ * Reserved when CONFIG_PROFILE.flood_mode = CFF.
*/
MLXSW_ITEM32(reg, sfmr, flood_rsp, 0x08, 31, 1);
@@ -1872,6 +1875,7 @@ MLXSW_ITEM32(reg, sfmr, flood_rsp, 0x08, 31, 1);
* Access: RW
*
* Note: Reserved when legacy bridge model is used and when flood_rsp=1.
+ * Reserved when CONFIG_PROFILE.flood_mode = CFF
*/
MLXSW_ITEM32(reg, sfmr, flood_bridge_type, 0x08, 28, 1);
@@ -1880,6 +1884,8 @@ MLXSW_ITEM32(reg, sfmr, flood_bridge_type, 0x08, 28, 1);
* Used to point into the flooding table selected by SFGC register if
* the table is of type FID-Offset. Otherwise, this field is reserved.
* Access: RW
+ *
+ * Note: Reserved when CONFIG_PROFILE.flood_mode = CFF
*/
MLXSW_ITEM32(reg, sfmr, fid_offset, 0x08, 0, 16);
@@ -1938,6 +1944,26 @@ MLXSW_ITEM32(reg, sfmr, irif_v, 0x14, 24, 1);
*/
MLXSW_ITEM32(reg, sfmr, irif, 0x14, 0, 16);
+/* reg_sfmr_cff_mid_base
+ * Pointer to PGT table.
+ * Range: 0..(cap_max_pgt-1)
+ * Access: RW
+ *
+ * Note: Reserved when SwitchX/-2 and Spectrum-1.
+ * Supported when CONFIG_PROFILE.flood_mode = CFF.
+ */
+MLXSW_ITEM32(reg, sfmr, cff_mid_base, 0x20, 0, 16);
+
+/* reg_sfmr_cff_prf_id
+ * Compressed Fid Flooding profile_id
+ * Range 0..(max_cap_nve_flood_prf-1)
+ * Access: RW
+ *
+ * Note: Reserved when SwitchX/-2 and Spectrum-1
+ * Supported only when CONFIG_PROFLE.flood_mode = CFF.
+ */
+MLXSW_ITEM32(reg, sfmr, cff_prf_id, 0x24, 0, 2);
+
/* reg_sfmr_smpe_valid
* SMPE is valid.
* Access: RW
@@ -1959,18 +1985,11 @@ MLXSW_ITEM32(reg, sfmr, smpe, 0x28, 0, 16);
static inline void mlxsw_reg_sfmr_pack(char *payload,
enum mlxsw_reg_sfmr_op op, u16 fid,
- u16 fid_offset, bool flood_rsp,
- enum mlxsw_reg_bridge_type bridge_type,
bool smpe_valid, u16 smpe)
{
MLXSW_REG_ZERO(sfmr, payload);
mlxsw_reg_sfmr_op_set(payload, op);
mlxsw_reg_sfmr_fid_set(payload, fid);
- mlxsw_reg_sfmr_fid_offset_set(payload, fid_offset);
- mlxsw_reg_sfmr_vtfp_set(payload, false);
- mlxsw_reg_sfmr_vv_set(payload, false);
- mlxsw_reg_sfmr_flood_rsp_set(payload, flood_rsp);
- mlxsw_reg_sfmr_flood_bridge_type_set(payload, bridge_type);
mlxsw_reg_sfmr_smpe_valid_set(payload, smpe_valid);
mlxsw_reg_sfmr_smpe_set(payload, smpe);
}
@@ -2168,6 +2187,50 @@ static inline void mlxsw_reg_spvc_pack(char *payload, u16 local_port, bool et1,
mlxsw_reg_spvc_et0_set(payload, et0);
}
+/* SFFP - Switch FID Flooding Profiles Register
+ * --------------------------------------------
+ * The SFFP register populates the fid flooding profile tables used for the NVE
+ * flooding and Compressed-FID Flooding (CFF).
+ *
+ * Reserved on Spectrum-1.
+ */
+#define MLXSW_REG_SFFP_ID 0x2029
+#define MLXSW_REG_SFFP_LEN 0x0C
+
+MLXSW_REG_DEFINE(sffp, MLXSW_REG_SFFP_ID, MLXSW_REG_SFFP_LEN);
+
+/* reg_sffp_profile_id
+ * Profile ID a.k.a. SFMR.nve_flood_prf_id or SFMR.cff_prf_id
+ * Range 0..max_cap_nve_flood_prf-1
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, sffp, profile_id, 0x00, 16, 2);
+
+/* reg_sffp_type
+ * The traffic type to reach the flooding table.
+ * Same as SFGC.type
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, sffp, type, 0x00, 0, 4);
+
+/* reg_sffp_flood_offset
+ * Flood offset. Offset to add to SFMR.cff_mid_base to get the final PGT address
+ * for FID flood; or offset to add to SFMR.nve_tunnel_flood_ptr to get KVD
+ * pointer for NVE underlay.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, sffp, flood_offset, 0x04, 0, 3);
+
+static inline void mlxsw_reg_sffp_pack(char *payload, u8 profile_id,
+ enum mlxsw_reg_sfgc_type type,
+ u8 flood_offset)
+{
+ MLXSW_REG_ZERO(sffp, payload);
+ mlxsw_reg_sffp_profile_id_set(payload, profile_id);
+ mlxsw_reg_sffp_type_set(payload, type);
+ mlxsw_reg_sffp_flood_offset_set(payload, flood_offset);
+}
+
/* SPEVET - Switch Port Egress VLAN EtherType
* ------------------------------------------
* The switch port egress VLAN EtherType configures which EtherType to push at
@@ -10122,6 +10185,15 @@ mlxsw_reg_mgir_unpack(char *payload, u32 *hw_rev, char *fw_info_psid,
MLXSW_REG_DEFINE(mrsr, MLXSW_REG_MRSR_ID, MLXSW_REG_MRSR_LEN);
+enum mlxsw_reg_mrsr_command {
+ /* Switch soft reset, does not reset PCI firmware. */
+ MLXSW_REG_MRSR_COMMAND_SOFTWARE_RESET = 1,
+ /* Reset will be done when PCI link will be disabled.
+ * This command will reset PCI firmware also.
+ */
+ MLXSW_REG_MRSR_COMMAND_RESET_AT_PCI_DISABLE = 6,
+};
+
/* reg_mrsr_command
* Reset/shutdown command
* 0 - do nothing
@@ -10130,10 +10202,11 @@ MLXSW_REG_DEFINE(mrsr, MLXSW_REG_MRSR_ID, MLXSW_REG_MRSR_LEN);
*/
MLXSW_ITEM32(reg, mrsr, command, 0x00, 0, 4);
-static inline void mlxsw_reg_mrsr_pack(char *payload)
+static inline void mlxsw_reg_mrsr_pack(char *payload,
+ enum mlxsw_reg_mrsr_command command)
{
MLXSW_REG_ZERO(mrsr, payload);
- mlxsw_reg_mrsr_command_set(payload, 1);
+ mlxsw_reg_mrsr_command_set(payload, command);
}
/* MLCR - Management LED Control Register
@@ -10584,6 +10657,8 @@ MLXSW_ITEM32(reg, mcam, feature_group, 0x00, 16, 8);
enum mlxsw_reg_mcam_mng_feature_cap_mask_bits {
/* If set, MCIA supports 128 bytes payloads. Otherwise, 48 bytes. */
MLXSW_REG_MCAM_MCIA_128B = 34,
+ /* If set, MRSR.command=6 is supported. */
+ MLXSW_REG_MCAM_PCI_RESET = 48,
};
#define MLXSW_REG_BYTES_PER_DWORD 0x4
@@ -12934,6 +13009,7 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = {
MLXSW_REG(spvmlr),
MLXSW_REG(spfsr),
MLXSW_REG(spvc),
+ MLXSW_REG(sffp),
MLXSW_REG(spevet),
MLXSW_REG(smpe),
MLXSW_REG(smid2),
diff --git a/drivers/net/ethernet/mellanox/mlxsw/resources.h b/drivers/net/ethernet/mellanox/mlxsw/resources.h
index 89dd277..9d7977e 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/resources.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/resources.h
@@ -27,6 +27,7 @@ enum mlxsw_res_id {
MLXSW_RES_ID_FID,
MLXSW_RES_ID_MAX_LAG,
MLXSW_RES_ID_MAX_LAG_MEMBERS,
+ MLXSW_RES_ID_MAX_NVE_FLOOD_PRF,
MLXSW_RES_ID_GUARANTEED_SHARED_BUFFER,
MLXSW_RES_ID_CELL_SIZE,
MLXSW_RES_ID_MAX_HEADROOM_SIZE,
@@ -88,6 +89,7 @@ static u16 mlxsw_res_ids[] = {
[MLXSW_RES_ID_FID] = 0x2512,
[MLXSW_RES_ID_MAX_LAG] = 0x2520,
[MLXSW_RES_ID_MAX_LAG_MEMBERS] = 0x2521,
+ [MLXSW_RES_ID_MAX_NVE_FLOOD_PRF] = 0x2522,
[MLXSW_RES_ID_GUARANTEED_SHARED_BUFFER] = 0x2805, /* Bytes */
[MLXSW_RES_ID_CELL_SIZE] = 0x2803, /* Bytes */
[MLXSW_RES_ID_MAX_HEADROOM_SIZE] = 0x2811, /* Bytes */
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index c70333b..800c461d 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -753,6 +753,8 @@ union mlxsw_sp_l3addr {
};
u16 mlxsw_sp_rif_index(const struct mlxsw_sp_rif *rif);
+int mlxsw_sp_rif_subport_port(const struct mlxsw_sp_rif *rif,
+ u16 *port, bool *is_lag);
int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp,
struct netlink_ext_ack *extack);
void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c
index e954b8c..aad4bb1 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c
@@ -424,18 +424,35 @@ static enum mlxsw_reg_sfmr_op mlxsw_sp_sfmr_op(bool valid)
MLXSW_REG_SFMR_OP_DESTROY_FID;
}
-static int mlxsw_sp_fid_op(const struct mlxsw_sp_fid *fid, bool valid)
+static void mlxsw_sp_fid_pack(char *sfmr_pl,
+ const struct mlxsw_sp_fid *fid,
+ enum mlxsw_reg_sfmr_op op)
{
- struct mlxsw_sp *mlxsw_sp = fid->fid_family->mlxsw_sp;
- char sfmr_pl[MLXSW_REG_SFMR_LEN];
u16 smpe;
smpe = fid->fid_family->smpe_index_valid ? fid->fid_index : 0;
- mlxsw_reg_sfmr_pack(sfmr_pl, mlxsw_sp_sfmr_op(valid), fid->fid_index,
- fid->fid_offset, fid->fid_family->flood_rsp,
- fid->fid_family->bridge_type,
+ mlxsw_reg_sfmr_pack(sfmr_pl, op, fid->fid_index,
fid->fid_family->smpe_index_valid, smpe);
+}
+
+static void mlxsw_sp_fid_pack_ctl(char *sfmr_pl,
+ const struct mlxsw_sp_fid *fid,
+ enum mlxsw_reg_sfmr_op op)
+{
+ mlxsw_sp_fid_pack(sfmr_pl, fid, op);
+ mlxsw_reg_sfmr_fid_offset_set(sfmr_pl, fid->fid_offset);
+ mlxsw_reg_sfmr_flood_rsp_set(sfmr_pl, fid->fid_family->flood_rsp);
+ mlxsw_reg_sfmr_flood_bridge_type_set(sfmr_pl,
+ fid->fid_family->bridge_type);
+}
+
+static int mlxsw_sp_fid_op(const struct mlxsw_sp_fid *fid, bool valid)
+{
+ struct mlxsw_sp *mlxsw_sp = fid->fid_family->mlxsw_sp;
+ char sfmr_pl[MLXSW_REG_SFMR_LEN];
+
+ mlxsw_sp_fid_pack_ctl(sfmr_pl, fid, mlxsw_sp_sfmr_op(valid));
return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfmr), sfmr_pl);
}
@@ -444,15 +461,8 @@ static int mlxsw_sp_fid_edit_op(const struct mlxsw_sp_fid *fid,
{
struct mlxsw_sp *mlxsw_sp = fid->fid_family->mlxsw_sp;
char sfmr_pl[MLXSW_REG_SFMR_LEN];
- u16 smpe;
- smpe = fid->fid_family->smpe_index_valid ? fid->fid_index : 0;
-
- mlxsw_reg_sfmr_pack(sfmr_pl, MLXSW_REG_SFMR_OP_CREATE_FID,
- fid->fid_index, fid->fid_offset,
- fid->fid_family->flood_rsp,
- fid->fid_family->bridge_type,
- fid->fid_family->smpe_index_valid, smpe);
+ mlxsw_sp_fid_pack_ctl(sfmr_pl, fid, MLXSW_REG_SFMR_OP_CREATE_FID);
mlxsw_reg_sfmr_vv_set(sfmr_pl, fid->vni_valid);
mlxsw_reg_sfmr_vni_set(sfmr_pl, be32_to_cpu(fid->vni));
mlxsw_reg_sfmr_vtfp_set(sfmr_pl, fid->nve_flood_index_valid);
@@ -1687,9 +1697,6 @@ mlxsw_sp_fid_flood_tables_init(struct mlxsw_sp_fid_family *fid_family)
int err;
int i;
- if (!fid_family->nr_flood_tables)
- return 0;
-
pgt_size = mlxsw_sp_fid_family_pgt_size(fid_family);
err = mlxsw_sp_pgt_mid_alloc_range(mlxsw_sp, &fid_family->pgt_base,
pgt_size);
@@ -1718,9 +1725,6 @@ mlxsw_sp_fid_flood_tables_fini(struct mlxsw_sp_fid_family *fid_family)
struct mlxsw_sp *mlxsw_sp = fid_family->mlxsw_sp;
u16 pgt_size;
- if (!fid_family->nr_flood_tables)
- return;
-
pgt_size = mlxsw_sp_fid_family_pgt_size(fid_family);
mlxsw_sp_pgt_mid_free_range(mlxsw_sp, fid_family->pgt_base, pgt_size);
}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 82a9512..2c255ed 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -8419,6 +8419,9 @@ mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
rif->ops = ops;
rif->rif_entries = rif_entries;
+ if (ops->setup)
+ ops->setup(rif, params);
+
if (ops->fid_get) {
fid = ops->fid_get(rif, params, extack);
if (IS_ERR(fid)) {
@@ -8428,9 +8431,6 @@ mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
rif->fid = fid;
}
- if (ops->setup)
- ops->setup(rif, params);
-
err = ops->configure(rif, extack);
if (err)
goto err_configure;
@@ -8660,6 +8660,20 @@ mlxsw_sp_rif_subport_rif(const struct mlxsw_sp_rif *rif)
return container_of(rif, struct mlxsw_sp_rif_subport, common);
}
+int mlxsw_sp_rif_subport_port(const struct mlxsw_sp_rif *rif,
+ u16 *port, bool *is_lag)
+{
+ struct mlxsw_sp_rif_subport *rif_subport;
+
+ if (WARN_ON(rif->ops->type != MLXSW_SP_RIF_TYPE_SUBPORT))
+ return -EINVAL;
+
+ rif_subport = mlxsw_sp_rif_subport_rif(rif);
+ *is_lag = rif_subport->lag;
+ *port = *is_lag ? rif_subport->lag_id : rif_subport->system_port;
+ return 0;
+}
+
static struct mlxsw_sp_rif *
mlxsw_sp_rif_subport_get(struct mlxsw_sp *mlxsw_sp,
const struct mlxsw_sp_rif_params *params,
diff --git a/drivers/net/ethernet/microchip/lan743x_main.h b/drivers/net/ethernet/microchip/lan743x_main.h
index b648461..be79cb0 100644
--- a/drivers/net/ethernet/microchip/lan743x_main.h
+++ b/drivers/net/ethernet/microchip/lan743x_main.h
@@ -1075,7 +1075,7 @@ struct lan743x_adapter {
#define DMA_DESCRIPTOR_SPACING_32 (32)
#define DMA_DESCRIPTOR_SPACING_64 (64)
#define DMA_DESCRIPTOR_SPACING_128 (128)
-#define DEFAULT_DMA_DESCRIPTOR_SPACING (L1_CACHE_BYTES)
+#define DEFAULT_DMA_DESCRIPTOR_SPACING (DMA_DESCRIPTOR_SPACING_16)
#define DMAC_CHANNEL_STATE_SET(start_bit, stop_bit) \
(((start_bit) ? 2 : 0) | ((stop_bit) ? 1 : 0))
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h
index 939cfce..bd0e265 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h
@@ -621,6 +621,9 @@ struct nfp_net_dp {
* @mbox_amsg.lock: Protect message list
* @mbox_amsg.list: List of message to process
* @mbox_amsg.work: Work to process message asynchronously
+ * @fs: Flow steering
+ * @fs.count: Flow count
+ * @fs.list: List of flows
* @app_priv: APP private data for this vNIC
*/
struct nfp_net {
@@ -728,9 +731,39 @@ struct nfp_net {
struct work_struct work;
} mbox_amsg;
+ struct {
+ u16 count;
+ struct list_head list;
+ } fs;
+
void *app_priv;
};
+struct nfp_fs_entry {
+ struct list_head node;
+ u32 flow_type;
+ u32 loc;
+ struct {
+ union {
+ struct {
+ __be32 sip4;
+ __be32 dip4;
+ };
+ struct {
+ __be32 sip6[4];
+ __be32 dip6[4];
+ };
+ };
+ union {
+ __be16 l3_proto;
+ u8 l4_proto;
+ };
+ __be16 sport;
+ __be16 dport;
+ } key, msk;
+ u64 action;
+};
+
struct nfp_mbox_amsg_entry {
struct list_head list;
int (*cfg)(struct nfp_net *nn, struct nfp_mbox_amsg_entry *entry);
@@ -987,6 +1020,9 @@ struct nfp_net_dp *nfp_net_clone_dp(struct nfp_net *nn);
int nfp_net_ring_reconfig(struct nfp_net *nn, struct nfp_net_dp *new,
struct netlink_ext_ack *extack);
+int nfp_net_fs_add_hw(struct nfp_net *nn, struct nfp_fs_entry *entry);
+int nfp_net_fs_del_hw(struct nfp_net *nn, struct nfp_fs_entry *entry);
+
#ifdef CONFIG_NFP_DEBUG
void nfp_net_debugfs_create(void);
void nfp_net_debugfs_destroy(void);
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index de0a5d5..ac1f451 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -1763,6 +1763,186 @@ nfp_net_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid)
return nfp_net_mbox_reconfig_and_unlock(nn, cmd);
}
+static void
+nfp_net_fs_fill_v4(struct nfp_net *nn, struct nfp_fs_entry *entry, u32 op, u32 *addr)
+{
+ unsigned int i;
+
+ union {
+ struct {
+ __be16 loc;
+ u8 k_proto, m_proto;
+ __be32 k_sip, m_sip, k_dip, m_dip;
+ __be16 k_sport, m_sport, k_dport, m_dport;
+ };
+ __be32 val[7];
+ } v4_rule;
+
+ nn_writel(nn, *addr, op);
+ *addr += sizeof(u32);
+
+ v4_rule.loc = cpu_to_be16(entry->loc);
+ v4_rule.k_proto = entry->key.l4_proto;
+ v4_rule.m_proto = entry->msk.l4_proto;
+ v4_rule.k_sip = entry->key.sip4;
+ v4_rule.m_sip = entry->msk.sip4;
+ v4_rule.k_dip = entry->key.dip4;
+ v4_rule.m_dip = entry->msk.dip4;
+ v4_rule.k_sport = entry->key.sport;
+ v4_rule.m_sport = entry->msk.sport;
+ v4_rule.k_dport = entry->key.dport;
+ v4_rule.m_dport = entry->msk.dport;
+
+ for (i = 0; i < ARRAY_SIZE(v4_rule.val); i++, *addr += sizeof(__be32))
+ nn_writel(nn, *addr, be32_to_cpu(v4_rule.val[i]));
+}
+
+static void
+nfp_net_fs_fill_v6(struct nfp_net *nn, struct nfp_fs_entry *entry, u32 op, u32 *addr)
+{
+ unsigned int i;
+
+ union {
+ struct {
+ __be16 loc;
+ u8 k_proto, m_proto;
+ __be32 k_sip[4], m_sip[4], k_dip[4], m_dip[4];
+ __be16 k_sport, m_sport, k_dport, m_dport;
+ };
+ __be32 val[19];
+ } v6_rule;
+
+ nn_writel(nn, *addr, op);
+ *addr += sizeof(u32);
+
+ v6_rule.loc = cpu_to_be16(entry->loc);
+ v6_rule.k_proto = entry->key.l4_proto;
+ v6_rule.m_proto = entry->msk.l4_proto;
+ for (i = 0; i < 4; i++) {
+ v6_rule.k_sip[i] = entry->key.sip6[i];
+ v6_rule.m_sip[i] = entry->msk.sip6[i];
+ v6_rule.k_dip[i] = entry->key.dip6[i];
+ v6_rule.m_dip[i] = entry->msk.dip6[i];
+ }
+ v6_rule.k_sport = entry->key.sport;
+ v6_rule.m_sport = entry->msk.sport;
+ v6_rule.k_dport = entry->key.dport;
+ v6_rule.m_dport = entry->msk.dport;
+
+ for (i = 0; i < ARRAY_SIZE(v6_rule.val); i++, *addr += sizeof(__be32))
+ nn_writel(nn, *addr, be32_to_cpu(v6_rule.val[i]));
+}
+
+#define NFP_FS_QUEUE_ID GENMASK(22, 16)
+#define NFP_FS_ACT GENMASK(15, 0)
+#define NFP_FS_ACT_DROP BIT(0)
+#define NFP_FS_ACT_Q BIT(1)
+static void
+nfp_net_fs_fill_act(struct nfp_net *nn, struct nfp_fs_entry *entry, u32 addr)
+{
+ u32 action = 0; /* 0 means default passthrough */
+
+ if (entry->action == RX_CLS_FLOW_DISC)
+ action = NFP_FS_ACT_DROP;
+ else if (!(entry->flow_type & FLOW_RSS))
+ action = FIELD_PREP(NFP_FS_QUEUE_ID, entry->action) | NFP_FS_ACT_Q;
+
+ nn_writel(nn, addr, action);
+}
+
+int nfp_net_fs_add_hw(struct nfp_net *nn, struct nfp_fs_entry *entry)
+{
+ u32 addr = nn->tlv_caps.mbox_off + NFP_NET_CFG_MBOX_SIMPLE_VAL;
+ int err;
+
+ err = nfp_net_mbox_lock(nn, NFP_NET_CFG_FS_SZ);
+ if (err)
+ return err;
+
+ switch (entry->flow_type & ~FLOW_RSS) {
+ case TCP_V4_FLOW:
+ case UDP_V4_FLOW:
+ case SCTP_V4_FLOW:
+ case IPV4_USER_FLOW:
+ nfp_net_fs_fill_v4(nn, entry, NFP_NET_CFG_MBOX_CMD_FS_ADD_V4, &addr);
+ break;
+ case TCP_V6_FLOW:
+ case UDP_V6_FLOW:
+ case SCTP_V6_FLOW:
+ case IPV6_USER_FLOW:
+ nfp_net_fs_fill_v6(nn, entry, NFP_NET_CFG_MBOX_CMD_FS_ADD_V6, &addr);
+ break;
+ case ETHER_FLOW:
+ nn_writel(nn, addr, NFP_NET_CFG_MBOX_CMD_FS_ADD_ETHTYPE);
+ addr += sizeof(u32);
+ nn_writew(nn, addr, be16_to_cpu(entry->key.l3_proto));
+ addr += sizeof(u32);
+ break;
+ }
+
+ nfp_net_fs_fill_act(nn, entry, addr);
+
+ err = nfp_net_mbox_reconfig_and_unlock(nn, NFP_NET_CFG_MBOX_CMD_FLOW_STEER);
+ if (err) {
+ nn_err(nn, "Add new fs rule failed with %d\n", err);
+ return -EIO;
+ }
+
+ return 0;
+}
+
+int nfp_net_fs_del_hw(struct nfp_net *nn, struct nfp_fs_entry *entry)
+{
+ u32 addr = nn->tlv_caps.mbox_off + NFP_NET_CFG_MBOX_SIMPLE_VAL;
+ int err;
+
+ err = nfp_net_mbox_lock(nn, NFP_NET_CFG_FS_SZ);
+ if (err)
+ return err;
+
+ switch (entry->flow_type & ~FLOW_RSS) {
+ case TCP_V4_FLOW:
+ case UDP_V4_FLOW:
+ case SCTP_V4_FLOW:
+ case IPV4_USER_FLOW:
+ nfp_net_fs_fill_v4(nn, entry, NFP_NET_CFG_MBOX_CMD_FS_DEL_V4, &addr);
+ break;
+ case TCP_V6_FLOW:
+ case UDP_V6_FLOW:
+ case SCTP_V6_FLOW:
+ case IPV6_USER_FLOW:
+ nfp_net_fs_fill_v6(nn, entry, NFP_NET_CFG_MBOX_CMD_FS_DEL_V6, &addr);
+ break;
+ case ETHER_FLOW:
+ nn_writel(nn, addr, NFP_NET_CFG_MBOX_CMD_FS_DEL_ETHTYPE);
+ addr += sizeof(u32);
+ nn_writew(nn, addr, be16_to_cpu(entry->key.l3_proto));
+ addr += sizeof(u32);
+ break;
+ }
+
+ nfp_net_fs_fill_act(nn, entry, addr);
+
+ err = nfp_net_mbox_reconfig_and_unlock(nn, NFP_NET_CFG_MBOX_CMD_FLOW_STEER);
+ if (err) {
+ nn_err(nn, "Delete fs rule failed with %d\n", err);
+ return -EIO;
+ }
+
+ return 0;
+}
+
+static void nfp_net_fs_clean(struct nfp_net *nn)
+{
+ struct nfp_fs_entry *entry, *tmp;
+
+ list_for_each_entry_safe(entry, tmp, &nn->fs.list, node) {
+ nfp_net_fs_del_hw(nn, entry);
+ list_del(&entry->node);
+ kfree(entry);
+ }
+}
+
static void nfp_net_stat64(struct net_device *netdev,
struct rtnl_link_stats64 *stats)
{
@@ -2740,6 +2920,8 @@ int nfp_net_init(struct nfp_net *nn)
INIT_LIST_HEAD(&nn->mbox_amsg.list);
INIT_WORK(&nn->mbox_amsg.work, nfp_net_mbox_amsg_work);
+ INIT_LIST_HEAD(&nn->fs.list);
+
return register_netdev(nn->dp.netdev);
err_clean_mbox:
@@ -2759,6 +2941,7 @@ void nfp_net_clean(struct nfp_net *nn)
unregister_netdev(nn->dp.netdev);
nfp_net_ipsec_clean(nn);
nfp_ccm_mbox_clean(nn);
+ nfp_net_fs_clean(nn);
flush_work(&nn->mbox_amsg.work);
nfp_net_reconfig_wait_posted(nn);
}
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h
index 3e63f6d..eaf4d3c 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h
@@ -269,6 +269,7 @@
#define NFP_NET_CFG_CTRL_IPSEC (0x1 << 1) /* IPsec offload */
#define NFP_NET_CFG_CTRL_MCAST_FILTER (0x1 << 2) /* Multicast Filter */
#define NFP_NET_CFG_CTRL_FREELIST_EN (0x1 << 6) /* Freelist enable flag bit */
+#define NFP_NET_CFG_CTRL_FLOW_STEER (0x1 << 8) /* Flow steering */
#define NFP_NET_CFG_CAP_WORD1 0x00a4
@@ -418,6 +419,8 @@
#define NFP_NET_CFG_MBOX_CMD_MULTICAST_ADD 8
#define NFP_NET_CFG_MBOX_CMD_MULTICAST_DEL 9
+#define NFP_NET_CFG_MBOX_CMD_FLOW_STEER 10
+
/* VLAN filtering using general use mailbox
* %NFP_NET_CFG_VLAN_FILTER: Base address of VLAN filter mailbox
* %NFP_NET_CFG_VLAN_FILTER_VID: VLAN ID to filter
@@ -440,6 +443,18 @@
#define NFP_NET_CFG_MULTICAST_MAC_LO (NFP_NET_CFG_MULTICAST + 6)
#define NFP_NET_CFG_MULTICAST_SZ 0x0006
+/* Max size of FS rules in bytes */
+#define NFP_NET_CFG_FS_SZ 0x0054
+/* Sub commands for FS */
+enum {
+ NFP_NET_CFG_MBOX_CMD_FS_ADD_V4,
+ NFP_NET_CFG_MBOX_CMD_FS_DEL_V4,
+ NFP_NET_CFG_MBOX_CMD_FS_ADD_V6,
+ NFP_NET_CFG_MBOX_CMD_FS_DEL_V6,
+ NFP_NET_CFG_MBOX_CMD_FS_ADD_ETHTYPE,
+ NFP_NET_CFG_MBOX_CMD_FS_DEL_ETHTYPE,
+};
+
/* TLV capabilities
* %NFP_NET_CFG_TLV_TYPE: Offset of type within the TLV
* %NFP_NET_CFG_TLV_TYPE_REQUIRED: Driver must be able to parse the TLV
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
index e75cbb28..d789639 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
@@ -1317,6 +1317,116 @@ static int nfp_net_get_rss_hash_opts(struct nfp_net *nn,
return 0;
}
+#define NFP_FS_MAX_ENTRY 1024
+
+static int nfp_net_fs_to_ethtool(struct nfp_fs_entry *entry, struct ethtool_rxnfc *cmd)
+{
+ struct ethtool_rx_flow_spec *fs = &cmd->fs;
+ unsigned int i;
+
+ switch (entry->flow_type & ~FLOW_RSS) {
+ case TCP_V4_FLOW:
+ case UDP_V4_FLOW:
+ case SCTP_V4_FLOW:
+ fs->h_u.tcp_ip4_spec.ip4src = entry->key.sip4;
+ fs->h_u.tcp_ip4_spec.ip4dst = entry->key.dip4;
+ fs->h_u.tcp_ip4_spec.psrc = entry->key.sport;
+ fs->h_u.tcp_ip4_spec.pdst = entry->key.dport;
+ fs->m_u.tcp_ip4_spec.ip4src = entry->msk.sip4;
+ fs->m_u.tcp_ip4_spec.ip4dst = entry->msk.dip4;
+ fs->m_u.tcp_ip4_spec.psrc = entry->msk.sport;
+ fs->m_u.tcp_ip4_spec.pdst = entry->msk.dport;
+ break;
+ case TCP_V6_FLOW:
+ case UDP_V6_FLOW:
+ case SCTP_V6_FLOW:
+ for (i = 0; i < 4; i++) {
+ fs->h_u.tcp_ip6_spec.ip6src[i] = entry->key.sip6[i];
+ fs->h_u.tcp_ip6_spec.ip6dst[i] = entry->key.dip6[i];
+ fs->m_u.tcp_ip6_spec.ip6src[i] = entry->msk.sip6[i];
+ fs->m_u.tcp_ip6_spec.ip6dst[i] = entry->msk.dip6[i];
+ }
+ fs->h_u.tcp_ip6_spec.psrc = entry->key.sport;
+ fs->h_u.tcp_ip6_spec.pdst = entry->key.dport;
+ fs->m_u.tcp_ip6_spec.psrc = entry->msk.sport;
+ fs->m_u.tcp_ip6_spec.pdst = entry->msk.dport;
+ break;
+ case IPV4_USER_FLOW:
+ fs->h_u.usr_ip4_spec.ip_ver = ETH_RX_NFC_IP4;
+ fs->h_u.usr_ip4_spec.ip4src = entry->key.sip4;
+ fs->h_u.usr_ip4_spec.ip4dst = entry->key.dip4;
+ fs->h_u.usr_ip4_spec.proto = entry->key.l4_proto;
+ fs->m_u.usr_ip4_spec.ip4src = entry->msk.sip4;
+ fs->m_u.usr_ip4_spec.ip4dst = entry->msk.dip4;
+ fs->m_u.usr_ip4_spec.proto = entry->msk.l4_proto;
+ break;
+ case IPV6_USER_FLOW:
+ for (i = 0; i < 4; i++) {
+ fs->h_u.usr_ip6_spec.ip6src[i] = entry->key.sip6[i];
+ fs->h_u.usr_ip6_spec.ip6dst[i] = entry->key.dip6[i];
+ fs->m_u.usr_ip6_spec.ip6src[i] = entry->msk.sip6[i];
+ fs->m_u.usr_ip6_spec.ip6dst[i] = entry->msk.dip6[i];
+ }
+ fs->h_u.usr_ip6_spec.l4_proto = entry->key.l4_proto;
+ fs->m_u.usr_ip6_spec.l4_proto = entry->msk.l4_proto;
+ break;
+ case ETHER_FLOW:
+ fs->h_u.ether_spec.h_proto = entry->key.l3_proto;
+ fs->m_u.ether_spec.h_proto = entry->msk.l3_proto;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ fs->flow_type = entry->flow_type;
+ fs->ring_cookie = entry->action;
+
+ if (fs->flow_type & FLOW_RSS) {
+ /* Only rss_context of 0 is supported. */
+ cmd->rss_context = 0;
+ /* RSS is used, mask the ring. */
+ fs->ring_cookie |= ETHTOOL_RX_FLOW_SPEC_RING;
+ }
+
+ return 0;
+}
+
+static int nfp_net_get_fs_rule(struct nfp_net *nn, struct ethtool_rxnfc *cmd)
+{
+ struct nfp_fs_entry *entry;
+
+ if (!(nn->cap_w1 & NFP_NET_CFG_CTRL_FLOW_STEER))
+ return -EOPNOTSUPP;
+
+ if (cmd->fs.location >= NFP_FS_MAX_ENTRY)
+ return -EINVAL;
+
+ list_for_each_entry(entry, &nn->fs.list, node) {
+ if (entry->loc == cmd->fs.location)
+ return nfp_net_fs_to_ethtool(entry, cmd);
+
+ if (entry->loc > cmd->fs.location)
+ /* no need to continue */
+ return -ENOENT;
+ }
+
+ return -ENOENT;
+}
+
+static int nfp_net_get_fs_loc(struct nfp_net *nn, u32 *rule_locs)
+{
+ struct nfp_fs_entry *entry;
+ u32 count = 0;
+
+ if (!(nn->cap_w1 & NFP_NET_CFG_CTRL_FLOW_STEER))
+ return -EOPNOTSUPP;
+
+ list_for_each_entry(entry, &nn->fs.list, node)
+ rule_locs[count++] = entry->loc;
+
+ return 0;
+}
+
static int nfp_net_get_rxnfc(struct net_device *netdev,
struct ethtool_rxnfc *cmd, u32 *rule_locs)
{
@@ -1326,6 +1436,14 @@ static int nfp_net_get_rxnfc(struct net_device *netdev,
case ETHTOOL_GRXRINGS:
cmd->data = nn->dp.num_rx_rings;
return 0;
+ case ETHTOOL_GRXCLSRLCNT:
+ cmd->rule_cnt = nn->fs.count;
+ return 0;
+ case ETHTOOL_GRXCLSRULE:
+ return nfp_net_get_fs_rule(nn, cmd);
+ case ETHTOOL_GRXCLSRLALL:
+ cmd->data = NFP_FS_MAX_ENTRY;
+ return nfp_net_get_fs_loc(nn, rule_locs);
case ETHTOOL_GRXFH:
return nfp_net_get_rss_hash_opts(nn, cmd);
default:
@@ -1385,6 +1503,253 @@ static int nfp_net_set_rss_hash_opt(struct nfp_net *nn,
return 0;
}
+static int nfp_net_fs_from_ethtool(struct nfp_fs_entry *entry, struct ethtool_rx_flow_spec *fs)
+{
+ unsigned int i;
+
+ /* FLOW_EXT/FLOW_MAC_EXT is not supported. */
+ switch (fs->flow_type & ~FLOW_RSS) {
+ case TCP_V4_FLOW:
+ case UDP_V4_FLOW:
+ case SCTP_V4_FLOW:
+ entry->msk.sip4 = fs->m_u.tcp_ip4_spec.ip4src;
+ entry->msk.dip4 = fs->m_u.tcp_ip4_spec.ip4dst;
+ entry->msk.sport = fs->m_u.tcp_ip4_spec.psrc;
+ entry->msk.dport = fs->m_u.tcp_ip4_spec.pdst;
+ entry->key.sip4 = fs->h_u.tcp_ip4_spec.ip4src & entry->msk.sip4;
+ entry->key.dip4 = fs->h_u.tcp_ip4_spec.ip4dst & entry->msk.dip4;
+ entry->key.sport = fs->h_u.tcp_ip4_spec.psrc & entry->msk.sport;
+ entry->key.dport = fs->h_u.tcp_ip4_spec.pdst & entry->msk.dport;
+ break;
+ case TCP_V6_FLOW:
+ case UDP_V6_FLOW:
+ case SCTP_V6_FLOW:
+ for (i = 0; i < 4; i++) {
+ entry->msk.sip6[i] = fs->m_u.tcp_ip6_spec.ip6src[i];
+ entry->msk.dip6[i] = fs->m_u.tcp_ip6_spec.ip6dst[i];
+ entry->key.sip6[i] = fs->h_u.tcp_ip6_spec.ip6src[i] & entry->msk.sip6[i];
+ entry->key.dip6[i] = fs->h_u.tcp_ip6_spec.ip6dst[i] & entry->msk.dip6[i];
+ }
+ entry->msk.sport = fs->m_u.tcp_ip6_spec.psrc;
+ entry->msk.dport = fs->m_u.tcp_ip6_spec.pdst;
+ entry->key.sport = fs->h_u.tcp_ip6_spec.psrc & entry->msk.sport;
+ entry->key.dport = fs->h_u.tcp_ip6_spec.pdst & entry->msk.dport;
+ break;
+ case IPV4_USER_FLOW:
+ entry->msk.sip4 = fs->m_u.usr_ip4_spec.ip4src;
+ entry->msk.dip4 = fs->m_u.usr_ip4_spec.ip4dst;
+ entry->msk.l4_proto = fs->m_u.usr_ip4_spec.proto;
+ entry->key.sip4 = fs->h_u.usr_ip4_spec.ip4src & entry->msk.sip4;
+ entry->key.dip4 = fs->h_u.usr_ip4_spec.ip4dst & entry->msk.dip4;
+ entry->key.l4_proto = fs->h_u.usr_ip4_spec.proto & entry->msk.l4_proto;
+ break;
+ case IPV6_USER_FLOW:
+ for (i = 0; i < 4; i++) {
+ entry->msk.sip6[i] = fs->m_u.usr_ip6_spec.ip6src[i];
+ entry->msk.dip6[i] = fs->m_u.usr_ip6_spec.ip6dst[i];
+ entry->key.sip6[i] = fs->h_u.usr_ip6_spec.ip6src[i] & entry->msk.sip6[i];
+ entry->key.dip6[i] = fs->h_u.usr_ip6_spec.ip6dst[i] & entry->msk.dip6[i];
+ }
+ entry->msk.l4_proto = fs->m_u.usr_ip6_spec.l4_proto;
+ entry->key.l4_proto = fs->h_u.usr_ip6_spec.l4_proto & entry->msk.l4_proto;
+ break;
+ case ETHER_FLOW:
+ entry->msk.l3_proto = fs->m_u.ether_spec.h_proto;
+ entry->key.l3_proto = fs->h_u.ether_spec.h_proto & entry->msk.l3_proto;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ switch (fs->flow_type & ~FLOW_RSS) {
+ case TCP_V4_FLOW:
+ case TCP_V6_FLOW:
+ entry->key.l4_proto = IPPROTO_TCP;
+ entry->msk.l4_proto = 0xff;
+ break;
+ case UDP_V4_FLOW:
+ case UDP_V6_FLOW:
+ entry->key.l4_proto = IPPROTO_UDP;
+ entry->msk.l4_proto = 0xff;
+ break;
+ case SCTP_V4_FLOW:
+ case SCTP_V6_FLOW:
+ entry->key.l4_proto = IPPROTO_SCTP;
+ entry->msk.l4_proto = 0xff;
+ break;
+ }
+
+ entry->flow_type = fs->flow_type;
+ entry->action = fs->ring_cookie;
+ entry->loc = fs->location;
+
+ return 0;
+}
+
+static int nfp_net_fs_check_existing(struct nfp_net *nn, struct nfp_fs_entry *new)
+{
+ struct nfp_fs_entry *entry;
+
+ list_for_each_entry(entry, &nn->fs.list, node) {
+ if (new->loc != entry->loc &&
+ !((new->flow_type ^ entry->flow_type) & ~FLOW_RSS) &&
+ !memcmp(&new->key, &entry->key, sizeof(new->key)) &&
+ !memcmp(&new->msk, &entry->msk, sizeof(new->msk)))
+ return entry->loc;
+ }
+
+ /* -1 means no duplicates */
+ return -1;
+}
+
+static int nfp_net_fs_add(struct nfp_net *nn, struct ethtool_rxnfc *cmd)
+{
+ struct ethtool_rx_flow_spec *fs = &cmd->fs;
+ struct nfp_fs_entry *new, *entry;
+ bool unsupp_mask;
+ int err, id;
+
+ if (!(nn->cap_w1 & NFP_NET_CFG_CTRL_FLOW_STEER))
+ return -EOPNOTSUPP;
+
+ /* Only default RSS context(0) is supported. */
+ if ((fs->flow_type & FLOW_RSS) && cmd->rss_context)
+ return -EOPNOTSUPP;
+
+ if (fs->location >= NFP_FS_MAX_ENTRY)
+ return -EINVAL;
+
+ if (fs->ring_cookie != RX_CLS_FLOW_DISC &&
+ fs->ring_cookie >= nn->dp.num_rx_rings)
+ return -EINVAL;
+
+ /* FLOW_EXT/FLOW_MAC_EXT is not supported. */
+ switch (fs->flow_type & ~FLOW_RSS) {
+ case TCP_V4_FLOW:
+ case UDP_V4_FLOW:
+ case SCTP_V4_FLOW:
+ unsupp_mask = !!fs->m_u.tcp_ip4_spec.tos;
+ break;
+ case TCP_V6_FLOW:
+ case UDP_V6_FLOW:
+ case SCTP_V6_FLOW:
+ unsupp_mask = !!fs->m_u.tcp_ip6_spec.tclass;
+ break;
+ case IPV4_USER_FLOW:
+ unsupp_mask = !!fs->m_u.usr_ip4_spec.l4_4_bytes ||
+ !!fs->m_u.usr_ip4_spec.tos ||
+ !!fs->m_u.usr_ip4_spec.ip_ver;
+ /* ip_ver must be ETH_RX_NFC_IP4. */
+ unsupp_mask |= fs->h_u.usr_ip4_spec.ip_ver != ETH_RX_NFC_IP4;
+ break;
+ case IPV6_USER_FLOW:
+ unsupp_mask = !!fs->m_u.usr_ip6_spec.l4_4_bytes ||
+ !!fs->m_u.usr_ip6_spec.tclass;
+ break;
+ case ETHER_FLOW:
+ if (fs->h_u.ether_spec.h_proto == htons(ETH_P_IP) ||
+ fs->h_u.ether_spec.h_proto == htons(ETH_P_IPV6)) {
+ nn_err(nn, "Please use ip4/ip6 flow type instead.\n");
+ return -EOPNOTSUPP;
+ }
+ /* Only unmasked ethtype is supported. */
+ unsupp_mask = !is_zero_ether_addr(fs->m_u.ether_spec.h_dest) ||
+ !is_zero_ether_addr(fs->m_u.ether_spec.h_source) ||
+ (fs->m_u.ether_spec.h_proto != htons(0xffff));
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ if (unsupp_mask)
+ return -EOPNOTSUPP;
+
+ new = kzalloc(sizeof(*new), GFP_KERNEL);
+ if (!new)
+ return -ENOMEM;
+
+ nfp_net_fs_from_ethtool(new, fs);
+
+ id = nfp_net_fs_check_existing(nn, new);
+ if (id >= 0) {
+ nn_err(nn, "Identical rule is existing in %d.\n", id);
+ err = -EINVAL;
+ goto err;
+ }
+
+ /* Insert to list in ascending order of location. */
+ list_for_each_entry(entry, &nn->fs.list, node) {
+ if (entry->loc == fs->location) {
+ err = nfp_net_fs_del_hw(nn, entry);
+ if (err)
+ goto err;
+
+ nn->fs.count--;
+ err = nfp_net_fs_add_hw(nn, new);
+ if (err)
+ goto err;
+
+ nn->fs.count++;
+ list_replace(&entry->node, &new->node);
+ kfree(entry);
+
+ return 0;
+ }
+
+ if (entry->loc > fs->location)
+ break;
+ }
+
+ if (nn->fs.count == NFP_FS_MAX_ENTRY) {
+ err = -ENOSPC;
+ goto err;
+ }
+
+ err = nfp_net_fs_add_hw(nn, new);
+ if (err)
+ goto err;
+
+ list_add_tail(&new->node, &entry->node);
+ nn->fs.count++;
+
+ return 0;
+
+err:
+ kfree(new);
+ return err;
+}
+
+static int nfp_net_fs_del(struct nfp_net *nn, struct ethtool_rxnfc *cmd)
+{
+ struct nfp_fs_entry *entry;
+ int err;
+
+ if (!(nn->cap_w1 & NFP_NET_CFG_CTRL_FLOW_STEER))
+ return -EOPNOTSUPP;
+
+ if (!nn->fs.count || cmd->fs.location >= NFP_FS_MAX_ENTRY)
+ return -EINVAL;
+
+ list_for_each_entry(entry, &nn->fs.list, node) {
+ if (entry->loc == cmd->fs.location) {
+ err = nfp_net_fs_del_hw(nn, entry);
+ if (err)
+ return err;
+
+ list_del(&entry->node);
+ kfree(entry);
+ nn->fs.count--;
+
+ return 0;
+ } else if (entry->loc > cmd->fs.location) {
+ /* no need to continue */
+ break;
+ }
+ }
+
+ return -ENOENT;
+}
+
static int nfp_net_set_rxnfc(struct net_device *netdev,
struct ethtool_rxnfc *cmd)
{
@@ -1393,6 +1758,10 @@ static int nfp_net_set_rxnfc(struct net_device *netdev,
switch (cmd->cmd) {
case ETHTOOL_SRXFH:
return nfp_net_set_rss_hash_opt(nn, cmd);
+ case ETHTOOL_SRXCLSRLINS:
+ return nfp_net_fs_add(nn, cmd);
+ case ETHTOOL_SRXCLSRLDEL:
+ return nfp_net_fs_del(nn, cmd);
default:
return -EOPNOTSUPP;
}
diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index 295366a..dbc5c9d 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -3100,6 +3100,33 @@ static void rtl_hw_start_8168g_2(struct rtl8169_private *tp)
rtl_ephy_init(tp, e_info_8168g_2);
}
+static void rtl8411b_fix_phy_down(struct rtl8169_private *tp)
+{
+ static const u16 fix_data[] = {
+/* 0xf800 */ 0xe008, 0xe00a, 0xe00c, 0xe00e, 0xe027, 0xe04f, 0xe05e, 0xe065,
+/* 0xf810 */ 0xc602, 0xbe00, 0x0000, 0xc502, 0xbd00, 0x074c, 0xc302, 0xbb00,
+/* 0xf820 */ 0x080a, 0x6420, 0x48c2, 0x8c20, 0xc516, 0x64a4, 0x49c0, 0xf009,
+/* 0xf830 */ 0x74a2, 0x8ca5, 0x74a0, 0xc50e, 0x9ca2, 0x1c11, 0x9ca0, 0xe006,
+/* 0xf840 */ 0x74f8, 0x48c4, 0x8cf8, 0xc404, 0xbc00, 0xc403, 0xbc00, 0x0bf2,
+/* 0xf850 */ 0x0c0a, 0xe434, 0xd3c0, 0x49d9, 0xf01f, 0xc526, 0x64a5, 0x1400,
+/* 0xf860 */ 0xf007, 0x0c01, 0x8ca5, 0x1c15, 0xc51b, 0x9ca0, 0xe013, 0xc519,
+/* 0xf870 */ 0x74a0, 0x48c4, 0x8ca0, 0xc516, 0x74a4, 0x48c8, 0x48ca, 0x9ca4,
+/* 0xf880 */ 0xc512, 0x1b00, 0x9ba0, 0x1b1c, 0x483f, 0x9ba2, 0x1b04, 0xc508,
+/* 0xf890 */ 0x9ba0, 0xc505, 0xbd00, 0xc502, 0xbd00, 0x0300, 0x051e, 0xe434,
+/* 0xf8a0 */ 0xe018, 0xe092, 0xde20, 0xd3c0, 0xc50f, 0x76a4, 0x49e3, 0xf007,
+/* 0xf8b0 */ 0x49c0, 0xf103, 0xc607, 0xbe00, 0xc606, 0xbe00, 0xc602, 0xbe00,
+/* 0xf8c0 */ 0x0c4c, 0x0c28, 0x0c2c, 0xdc00, 0xc707, 0x1d00, 0x8de2, 0x48c1,
+/* 0xf8d0 */ 0xc502, 0xbd00, 0x00aa, 0xe0c0, 0xc502, 0xbd00, 0x0132
+ };
+ unsigned long flags;
+ int i;
+
+ raw_spin_lock_irqsave(&tp->mac_ocp_lock, flags);
+ for (i = 0; i < ARRAY_SIZE(fix_data); i++)
+ __r8168_mac_ocp_write(tp, 0xf800 + 2 * i, fix_data[i]);
+ raw_spin_unlock_irqrestore(&tp->mac_ocp_lock, flags);
+}
+
static void rtl_hw_start_8411_2(struct rtl8169_private *tp)
{
static const struct ephy_info e_info_8411_2[] = {
@@ -3133,117 +3160,7 @@ static void rtl_hw_start_8411_2(struct rtl8169_private *tp)
mdelay(3);
r8168_mac_ocp_write(tp, 0xFC26, 0x0000);
- r8168_mac_ocp_write(tp, 0xF800, 0xE008);
- r8168_mac_ocp_write(tp, 0xF802, 0xE00A);
- r8168_mac_ocp_write(tp, 0xF804, 0xE00C);
- r8168_mac_ocp_write(tp, 0xF806, 0xE00E);
- r8168_mac_ocp_write(tp, 0xF808, 0xE027);
- r8168_mac_ocp_write(tp, 0xF80A, 0xE04F);
- r8168_mac_ocp_write(tp, 0xF80C, 0xE05E);
- r8168_mac_ocp_write(tp, 0xF80E, 0xE065);
- r8168_mac_ocp_write(tp, 0xF810, 0xC602);
- r8168_mac_ocp_write(tp, 0xF812, 0xBE00);
- r8168_mac_ocp_write(tp, 0xF814, 0x0000);
- r8168_mac_ocp_write(tp, 0xF816, 0xC502);
- r8168_mac_ocp_write(tp, 0xF818, 0xBD00);
- r8168_mac_ocp_write(tp, 0xF81A, 0x074C);
- r8168_mac_ocp_write(tp, 0xF81C, 0xC302);
- r8168_mac_ocp_write(tp, 0xF81E, 0xBB00);
- r8168_mac_ocp_write(tp, 0xF820, 0x080A);
- r8168_mac_ocp_write(tp, 0xF822, 0x6420);
- r8168_mac_ocp_write(tp, 0xF824, 0x48C2);
- r8168_mac_ocp_write(tp, 0xF826, 0x8C20);
- r8168_mac_ocp_write(tp, 0xF828, 0xC516);
- r8168_mac_ocp_write(tp, 0xF82A, 0x64A4);
- r8168_mac_ocp_write(tp, 0xF82C, 0x49C0);
- r8168_mac_ocp_write(tp, 0xF82E, 0xF009);
- r8168_mac_ocp_write(tp, 0xF830, 0x74A2);
- r8168_mac_ocp_write(tp, 0xF832, 0x8CA5);
- r8168_mac_ocp_write(tp, 0xF834, 0x74A0);
- r8168_mac_ocp_write(tp, 0xF836, 0xC50E);
- r8168_mac_ocp_write(tp, 0xF838, 0x9CA2);
- r8168_mac_ocp_write(tp, 0xF83A, 0x1C11);
- r8168_mac_ocp_write(tp, 0xF83C, 0x9CA0);
- r8168_mac_ocp_write(tp, 0xF83E, 0xE006);
- r8168_mac_ocp_write(tp, 0xF840, 0x74F8);
- r8168_mac_ocp_write(tp, 0xF842, 0x48C4);
- r8168_mac_ocp_write(tp, 0xF844, 0x8CF8);
- r8168_mac_ocp_write(tp, 0xF846, 0xC404);
- r8168_mac_ocp_write(tp, 0xF848, 0xBC00);
- r8168_mac_ocp_write(tp, 0xF84A, 0xC403);
- r8168_mac_ocp_write(tp, 0xF84C, 0xBC00);
- r8168_mac_ocp_write(tp, 0xF84E, 0x0BF2);
- r8168_mac_ocp_write(tp, 0xF850, 0x0C0A);
- r8168_mac_ocp_write(tp, 0xF852, 0xE434);
- r8168_mac_ocp_write(tp, 0xF854, 0xD3C0);
- r8168_mac_ocp_write(tp, 0xF856, 0x49D9);
- r8168_mac_ocp_write(tp, 0xF858, 0xF01F);
- r8168_mac_ocp_write(tp, 0xF85A, 0xC526);
- r8168_mac_ocp_write(tp, 0xF85C, 0x64A5);
- r8168_mac_ocp_write(tp, 0xF85E, 0x1400);
- r8168_mac_ocp_write(tp, 0xF860, 0xF007);
- r8168_mac_ocp_write(tp, 0xF862, 0x0C01);
- r8168_mac_ocp_write(tp, 0xF864, 0x8CA5);
- r8168_mac_ocp_write(tp, 0xF866, 0x1C15);
- r8168_mac_ocp_write(tp, 0xF868, 0xC51B);
- r8168_mac_ocp_write(tp, 0xF86A, 0x9CA0);
- r8168_mac_ocp_write(tp, 0xF86C, 0xE013);
- r8168_mac_ocp_write(tp, 0xF86E, 0xC519);
- r8168_mac_ocp_write(tp, 0xF870, 0x74A0);
- r8168_mac_ocp_write(tp, 0xF872, 0x48C4);
- r8168_mac_ocp_write(tp, 0xF874, 0x8CA0);
- r8168_mac_ocp_write(tp, 0xF876, 0xC516);
- r8168_mac_ocp_write(tp, 0xF878, 0x74A4);
- r8168_mac_ocp_write(tp, 0xF87A, 0x48C8);
- r8168_mac_ocp_write(tp, 0xF87C, 0x48CA);
- r8168_mac_ocp_write(tp, 0xF87E, 0x9CA4);
- r8168_mac_ocp_write(tp, 0xF880, 0xC512);
- r8168_mac_ocp_write(tp, 0xF882, 0x1B00);
- r8168_mac_ocp_write(tp, 0xF884, 0x9BA0);
- r8168_mac_ocp_write(tp, 0xF886, 0x1B1C);
- r8168_mac_ocp_write(tp, 0xF888, 0x483F);
- r8168_mac_ocp_write(tp, 0xF88A, 0x9BA2);
- r8168_mac_ocp_write(tp, 0xF88C, 0x1B04);
- r8168_mac_ocp_write(tp, 0xF88E, 0xC508);
- r8168_mac_ocp_write(tp, 0xF890, 0x9BA0);
- r8168_mac_ocp_write(tp, 0xF892, 0xC505);
- r8168_mac_ocp_write(tp, 0xF894, 0xBD00);
- r8168_mac_ocp_write(tp, 0xF896, 0xC502);
- r8168_mac_ocp_write(tp, 0xF898, 0xBD00);
- r8168_mac_ocp_write(tp, 0xF89A, 0x0300);
- r8168_mac_ocp_write(tp, 0xF89C, 0x051E);
- r8168_mac_ocp_write(tp, 0xF89E, 0xE434);
- r8168_mac_ocp_write(tp, 0xF8A0, 0xE018);
- r8168_mac_ocp_write(tp, 0xF8A2, 0xE092);
- r8168_mac_ocp_write(tp, 0xF8A4, 0xDE20);
- r8168_mac_ocp_write(tp, 0xF8A6, 0xD3C0);
- r8168_mac_ocp_write(tp, 0xF8A8, 0xC50F);
- r8168_mac_ocp_write(tp, 0xF8AA, 0x76A4);
- r8168_mac_ocp_write(tp, 0xF8AC, 0x49E3);
- r8168_mac_ocp_write(tp, 0xF8AE, 0xF007);
- r8168_mac_ocp_write(tp, 0xF8B0, 0x49C0);
- r8168_mac_ocp_write(tp, 0xF8B2, 0xF103);
- r8168_mac_ocp_write(tp, 0xF8B4, 0xC607);
- r8168_mac_ocp_write(tp, 0xF8B6, 0xBE00);
- r8168_mac_ocp_write(tp, 0xF8B8, 0xC606);
- r8168_mac_ocp_write(tp, 0xF8BA, 0xBE00);
- r8168_mac_ocp_write(tp, 0xF8BC, 0xC602);
- r8168_mac_ocp_write(tp, 0xF8BE, 0xBE00);
- r8168_mac_ocp_write(tp, 0xF8C0, 0x0C4C);
- r8168_mac_ocp_write(tp, 0xF8C2, 0x0C28);
- r8168_mac_ocp_write(tp, 0xF8C4, 0x0C2C);
- r8168_mac_ocp_write(tp, 0xF8C6, 0xDC00);
- r8168_mac_ocp_write(tp, 0xF8C8, 0xC707);
- r8168_mac_ocp_write(tp, 0xF8CA, 0x1D00);
- r8168_mac_ocp_write(tp, 0xF8CC, 0x8DE2);
- r8168_mac_ocp_write(tp, 0xF8CE, 0x48C1);
- r8168_mac_ocp_write(tp, 0xF8D0, 0xC502);
- r8168_mac_ocp_write(tp, 0xF8D2, 0xBD00);
- r8168_mac_ocp_write(tp, 0xF8D4, 0x00AA);
- r8168_mac_ocp_write(tp, 0xF8D6, 0xE0C0);
- r8168_mac_ocp_write(tp, 0xF8D8, 0xC502);
- r8168_mac_ocp_write(tp, 0xF8DA, 0xBD00);
- r8168_mac_ocp_write(tp, 0xF8DC, 0x0132);
+ rtl8411b_fix_phy_down(tp);
r8168_mac_ocp_write(tp, 0xFC26, 0x8000);
diff --git a/drivers/net/ethernet/renesas/Kconfig b/drivers/net/ethernet/renesas/Kconfig
index 8ef5b02..733cbb6 100644
--- a/drivers/net/ethernet/renesas/Kconfig
+++ b/drivers/net/ethernet/renesas/Kconfig
@@ -44,7 +44,16 @@
select CRC32
select MII
select PHYLINK
+ select RENESAS_GEN4_PTP
help
Renesas Ethernet Switch device driver.
+config RENESAS_GEN4_PTP
+ tristate "Renesas R-Car Gen4 gPTP support" if COMPILE_TEST
+ select CRC32
+ select MII
+ select PHYLIB
+ help
+ Renesas R-Car Gen4 gPTP device driver.
+
endif # NET_VENDOR_RENESAS
diff --git a/drivers/net/ethernet/renesas/Makefile b/drivers/net/ethernet/renesas/Makefile
index e8fd85b..9070acf 100644
--- a/drivers/net/ethernet/renesas/Makefile
+++ b/drivers/net/ethernet/renesas/Makefile
@@ -8,5 +8,6 @@
ravb-objs := ravb_main.o ravb_ptp.o
obj-$(CONFIG_RAVB) += ravb.o
-rswitch_drv-objs := rswitch.o rcar_gen4_ptp.o
-obj-$(CONFIG_RENESAS_ETHER_SWITCH) += rswitch_drv.o
+obj-$(CONFIG_RENESAS_ETHER_SWITCH) += rswitch.o
+
+obj-$(CONFIG_RENESAS_GEN4_PTP) += rcar_gen4_ptp.o
diff --git a/drivers/net/ethernet/renesas/rcar_gen4_ptp.c b/drivers/net/ethernet/renesas/rcar_gen4_ptp.c
index c007e33..72e7fcc 100644
--- a/drivers/net/ethernet/renesas/rcar_gen4_ptp.c
+++ b/drivers/net/ethernet/renesas/rcar_gen4_ptp.c
@@ -14,7 +14,7 @@
#include "rcar_gen4_ptp.h"
#define ptp_to_priv(ptp) container_of(ptp, struct rcar_gen4_ptp_private, info)
-static const struct rcar_gen4_ptp_reg_offset s4_offs = {
+static const struct rcar_gen4_ptp_reg_offset gen4_offs = {
.enable = PTPTMEC,
.disable = PTPTMDC,
.increment = PTPTIVC0,
@@ -130,25 +130,42 @@ static struct ptp_clock_info rcar_gen4_ptp_info = {
.enable = rcar_gen4_ptp_enable,
};
-static void rcar_gen4_ptp_set_offs(struct rcar_gen4_ptp_private *ptp_priv,
- enum rcar_gen4_ptp_reg_layout layout)
+static int rcar_gen4_ptp_set_offs(struct rcar_gen4_ptp_private *ptp_priv,
+ enum rcar_gen4_ptp_reg_layout layout)
{
- WARN_ON(layout != RCAR_GEN4_PTP_REG_LAYOUT_S4);
+ if (layout != RCAR_GEN4_PTP_REG_LAYOUT)
+ return -EINVAL;
- ptp_priv->offs = &s4_offs;
+ ptp_priv->offs = &gen4_offs;
+
+ return 0;
+}
+
+static s64 rcar_gen4_ptp_rate_to_increment(u32 rate)
+{
+ /* Timer increment in ns.
+ * bit[31:27] - integer
+ * bit[26:0] - decimal
+ * increment[ns] = perid[ns] * 2^27 => (1ns * 2^27) / rate[hz]
+ */
+ return div_s64(1000000000LL << 27, rate);
}
int rcar_gen4_ptp_register(struct rcar_gen4_ptp_private *ptp_priv,
- enum rcar_gen4_ptp_reg_layout layout, u32 clock)
+ enum rcar_gen4_ptp_reg_layout layout, u32 rate)
{
+ int ret;
+
if (ptp_priv->initialized)
return 0;
spin_lock_init(&ptp_priv->lock);
- rcar_gen4_ptp_set_offs(ptp_priv, layout);
+ ret = rcar_gen4_ptp_set_offs(ptp_priv, layout);
+ if (ret)
+ return ret;
- ptp_priv->default_addend = clock;
+ ptp_priv->default_addend = rcar_gen4_ptp_rate_to_increment(rate);
iowrite32(ptp_priv->default_addend, ptp_priv->addr + ptp_priv->offs->increment);
ptp_priv->clock = ptp_clock_register(&ptp_priv->info, NULL);
if (IS_ERR(ptp_priv->clock))
@@ -159,6 +176,7 @@ int rcar_gen4_ptp_register(struct rcar_gen4_ptp_private *ptp_priv,
return 0;
}
+EXPORT_SYMBOL_GPL(rcar_gen4_ptp_register);
int rcar_gen4_ptp_unregister(struct rcar_gen4_ptp_private *ptp_priv)
{
@@ -166,6 +184,7 @@ int rcar_gen4_ptp_unregister(struct rcar_gen4_ptp_private *ptp_priv)
return ptp_clock_unregister(ptp_priv->clock);
}
+EXPORT_SYMBOL_GPL(rcar_gen4_ptp_unregister);
struct rcar_gen4_ptp_private *rcar_gen4_ptp_alloc(struct platform_device *pdev)
{
@@ -179,3 +198,8 @@ struct rcar_gen4_ptp_private *rcar_gen4_ptp_alloc(struct platform_device *pdev)
return ptp;
}
+EXPORT_SYMBOL_GPL(rcar_gen4_ptp_alloc);
+
+MODULE_AUTHOR("Yoshihiro Shimoda");
+MODULE_DESCRIPTION("Renesas R-Car Gen4 gPTP driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/net/ethernet/renesas/rcar_gen4_ptp.h b/drivers/net/ethernet/renesas/rcar_gen4_ptp.h
index b1bbea8..e22da5a 100644
--- a/drivers/net/ethernet/renesas/rcar_gen4_ptp.h
+++ b/drivers/net/ethernet/renesas/rcar_gen4_ptp.h
@@ -9,13 +9,10 @@
#include <linux/ptp_clock_kernel.h>
-#define PTPTIVC_INIT 0x19000000 /* 320MHz */
-#define RCAR_GEN4_PTP_CLOCK_S4 PTPTIVC_INIT
#define RCAR_GEN4_GPTP_OFFSET_S4 0x00018000
-/* for rcar_gen4_ptp_init */
enum rcar_gen4_ptp_reg_layout {
- RCAR_GEN4_PTP_REG_LAYOUT_S4
+ RCAR_GEN4_PTP_REG_LAYOUT
};
/* driver's definitions */
@@ -28,7 +25,7 @@ enum rcar_gen4_ptp_reg_layout {
#define PTPRO 0
-enum rcar_gen4_ptp_reg_s4 {
+enum rcar_gen4_ptp_reg {
PTPTMEC = PTPRO + 0x0010,
PTPTMDC = PTPRO + 0x0014,
PTPTIVC0 = PTPRO + 0x0020,
@@ -65,7 +62,7 @@ struct rcar_gen4_ptp_private {
};
int rcar_gen4_ptp_register(struct rcar_gen4_ptp_private *ptp_priv,
- enum rcar_gen4_ptp_reg_layout layout, u32 clock);
+ enum rcar_gen4_ptp_reg_layout layout, u32 rate);
int rcar_gen4_ptp_unregister(struct rcar_gen4_ptp_private *ptp_priv);
struct rcar_gen4_ptp_private *rcar_gen4_ptp_alloc(struct platform_device *pdev);
diff --git a/drivers/net/ethernet/renesas/rswitch.c b/drivers/net/ethernet/renesas/rswitch.c
index 43a7795..d608942 100644
--- a/drivers/net/ethernet/renesas/rswitch.c
+++ b/drivers/net/ethernet/renesas/rswitch.c
@@ -1828,8 +1828,8 @@ static int rswitch_init(struct rswitch_private *priv)
rswitch_fwd_init(priv);
- err = rcar_gen4_ptp_register(priv->ptp_priv, RCAR_GEN4_PTP_REG_LAYOUT_S4,
- RCAR_GEN4_PTP_CLOCK_S4);
+ err = rcar_gen4_ptp_register(priv->ptp_priv, RCAR_GEN4_PTP_REG_LAYOUT,
+ clk_get_rate(priv->clk));
if (err < 0)
goto err_ptp_register;
diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
index e3f650e..6b93592 100644
--- a/drivers/net/ethernet/stmicro/stmmac/common.h
+++ b/drivers/net/ethernet/stmicro/stmmac/common.h
@@ -580,6 +580,7 @@ struct mac_device_info {
u32 vlan_filter[32];
bool vlan_fail_q_en;
u8 vlan_fail_q;
+ bool hw_vlan_en;
};
struct stmmac_rx_routing {
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
index c6ff1fa..5f35faf 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
@@ -1134,6 +1134,35 @@ static int dwmac4_config_l4_filter(struct mac_device_info *hw, u32 filter_no,
return 0;
}
+static void dwmac4_rx_hw_vlan(struct mac_device_info *hw,
+ struct dma_desc *rx_desc, struct sk_buff *skb)
+{
+ if (hw->desc->get_rx_vlan_valid(rx_desc)) {
+ u16 vid = hw->desc->get_rx_vlan_tci(rx_desc);
+
+ __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid);
+ }
+}
+
+static void dwmac4_set_hw_vlan_mode(struct mac_device_info *hw)
+{
+ void __iomem *ioaddr = hw->pcsr;
+ u32 value = readl(ioaddr + GMAC_VLAN_TAG);
+
+ value &= ~GMAC_VLAN_TAG_CTRL_EVLS_MASK;
+
+ if (hw->hw_vlan_en)
+ /* Always strip VLAN on Receive */
+ value |= GMAC_VLAN_TAG_STRIP_ALL;
+ else
+ /* Do not strip VLAN on Receive */
+ value |= GMAC_VLAN_TAG_STRIP_NONE;
+
+ /* Enable outer VLAN Tag in Rx DMA descriptor */
+ value |= GMAC_VLAN_TAG_CTRL_EVLRXS;
+ writel(value, ioaddr + GMAC_VLAN_TAG);
+}
+
const struct stmmac_ops dwmac4_ops = {
.core_init = dwmac4_core_init,
.phylink_get_caps = dwmac4_phylink_get_caps,
@@ -1175,6 +1204,8 @@ const struct stmmac_ops dwmac4_ops = {
.add_hw_vlan_rx_fltr = dwmac4_add_hw_vlan_rx_fltr,
.del_hw_vlan_rx_fltr = dwmac4_del_hw_vlan_rx_fltr,
.restore_hw_vlan_rx_fltr = dwmac4_restore_hw_vlan_rx_fltr,
+ .rx_hw_vlan = dwmac4_rx_hw_vlan,
+ .set_hw_vlan_mode = dwmac4_set_hw_vlan_mode,
};
const struct stmmac_ops dwmac410_ops = {
@@ -1224,6 +1255,8 @@ const struct stmmac_ops dwmac410_ops = {
.add_hw_vlan_rx_fltr = dwmac4_add_hw_vlan_rx_fltr,
.del_hw_vlan_rx_fltr = dwmac4_del_hw_vlan_rx_fltr,
.restore_hw_vlan_rx_fltr = dwmac4_restore_hw_vlan_rx_fltr,
+ .rx_hw_vlan = dwmac4_rx_hw_vlan,
+ .set_hw_vlan_mode = dwmac4_set_hw_vlan_mode,
};
const struct stmmac_ops dwmac510_ops = {
@@ -1277,6 +1310,8 @@ const struct stmmac_ops dwmac510_ops = {
.add_hw_vlan_rx_fltr = dwmac4_add_hw_vlan_rx_fltr,
.del_hw_vlan_rx_fltr = dwmac4_del_hw_vlan_rx_fltr,
.restore_hw_vlan_rx_fltr = dwmac4_restore_hw_vlan_rx_fltr,
+ .rx_hw_vlan = dwmac4_rx_hw_vlan,
+ .set_hw_vlan_mode = dwmac4_set_hw_vlan_mode,
};
static u32 dwmac4_get_num_vlan(void __iomem *ioaddr)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
index 89a1408..1c5802e 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
@@ -198,6 +198,17 @@ static int dwmac4_get_tx_ls(struct dma_desc *p)
>> TDES3_LAST_DESCRIPTOR_SHIFT;
}
+static u16 dwmac4_wrback_get_rx_vlan_tci(struct dma_desc *p)
+{
+ return (le32_to_cpu(p->des0) & RDES0_VLAN_TAG_MASK);
+}
+
+static bool dwmac4_wrback_get_rx_vlan_valid(struct dma_desc *p)
+{
+ return ((le32_to_cpu(p->des3) & RDES3_LAST_DESCRIPTOR) &&
+ (le32_to_cpu(p->des3) & RDES3_RDES0_VALID));
+}
+
static int dwmac4_wrback_get_rx_frame_len(struct dma_desc *p, int rx_coe)
{
return (le32_to_cpu(p->des3) & RDES3_PACKET_SIZE_MASK);
@@ -551,6 +562,8 @@ const struct stmmac_desc_ops dwmac4_desc_ops = {
.set_tx_owner = dwmac4_set_tx_owner,
.set_rx_owner = dwmac4_set_rx_owner,
.get_tx_ls = dwmac4_get_tx_ls,
+ .get_rx_vlan_tci = dwmac4_wrback_get_rx_vlan_tci,
+ .get_rx_vlan_valid = dwmac4_wrback_get_rx_vlan_valid,
.get_rx_frame_len = dwmac4_wrback_get_rx_frame_len,
.enable_tx_timestamp = dwmac4_rd_enable_tx_timestamp,
.get_tx_timestamp_status = dwmac4_wrback_get_tx_timestamp_status,
diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h
index b95d3e1..1d424c9 100644
--- a/drivers/net/ethernet/stmicro/stmmac/hwif.h
+++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h
@@ -56,6 +56,10 @@ struct stmmac_desc_ops {
void (*set_tx_ic)(struct dma_desc *p);
/* Last tx segment reports the transmit status */
int (*get_tx_ls)(struct dma_desc *p);
+ /* Get the tag of the descriptor */
+ u16 (*get_rx_vlan_tci)(struct dma_desc *p);
+ /* Get the valid status of descriptor */
+ bool (*get_rx_vlan_valid)(struct dma_desc *p);
/* Return the transmit status looking at the TDES1 */
int (*tx_status)(struct stmmac_extra_stats *x,
struct dma_desc *p, void __iomem *ioaddr);
@@ -117,6 +121,10 @@ struct stmmac_desc_ops {
stmmac_do_void_callback(__priv, desc, set_tx_ic, __args)
#define stmmac_get_tx_ls(__priv, __args...) \
stmmac_do_callback(__priv, desc, get_tx_ls, __args)
+#define stmmac_get_rx_vlan_tci(__priv, __args...) \
+ stmmac_do_callback(__priv, desc, get_rx_vlan_tci, __args)
+#define stmmac_get_rx_vlan_valid(__priv, __args...) \
+ stmmac_do_callback(__priv, desc, get_rx_vlan_valid, __args)
#define stmmac_tx_status(__priv, __args...) \
stmmac_do_callback(__priv, desc, tx_status, __args)
#define stmmac_get_tx_len(__priv, __args...) \
@@ -388,6 +396,9 @@ struct stmmac_ops {
void (*update_vlan_hash)(struct mac_device_info *hw, u32 hash,
__le16 perfect_match, bool is_double);
void (*enable_vlan)(struct mac_device_info *hw, u32 type);
+ void (*rx_hw_vlan)(struct mac_device_info *hw, struct dma_desc *rx_desc,
+ struct sk_buff *skb);
+ void (*set_hw_vlan_mode)(struct mac_device_info *hw);
int (*add_hw_vlan_rx_fltr)(struct net_device *dev,
struct mac_device_info *hw,
__be16 proto, u16 vid);
@@ -497,6 +508,10 @@ struct stmmac_ops {
stmmac_do_void_callback(__priv, mac, update_vlan_hash, __args)
#define stmmac_enable_vlan(__priv, __args...) \
stmmac_do_void_callback(__priv, mac, enable_vlan, __args)
+#define stmmac_rx_hw_vlan(__priv, __args...) \
+ stmmac_do_void_callback(__priv, mac, rx_hw_vlan, __args)
+#define stmmac_set_hw_vlan_mode(__priv, __args...) \
+ stmmac_do_void_callback(__priv, mac, set_hw_vlan_mode, __args)
#define stmmac_add_hw_vlan_rx_fltr(__priv, __args...) \
stmmac_do_callback(__priv, mac, add_hw_vlan_rx_fltr, __args)
#define stmmac_del_hw_vlan_rx_fltr(__priv, __args...) \
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 2afb2bd..8964fc8 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -3469,6 +3469,8 @@ static int stmmac_hw_setup(struct net_device *dev, bool ptp_register)
/* Start the ball rolling... */
stmmac_start_all_dma(priv);
+ stmmac_set_hw_vlan_mode(priv, priv->hw);
+
if (priv->dma_cap.fpesel) {
stmmac_fpe_start_wq(priv);
@@ -4993,7 +4995,12 @@ static void stmmac_dispatch_skb_zc(struct stmmac_priv *priv, u32 queue,
}
stmmac_get_rx_hwtstamp(priv, p, np, skb);
- stmmac_rx_vlan(priv->dev, skb);
+ if (priv->hw->hw_vlan_en)
+ /* MAC level stripping. */
+ stmmac_rx_hw_vlan(priv, priv->hw, p, skb);
+ else
+ /* Driver level stripping. */
+ stmmac_rx_vlan(priv->dev, skb);
skb->protocol = eth_type_trans(skb, priv->dev);
if (unlikely(!coe))
@@ -5509,7 +5516,14 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
/* Got entire packet into SKB. Finish it. */
stmmac_get_rx_hwtstamp(priv, p, np, skb);
- stmmac_rx_vlan(priv->dev, skb);
+
+ if (priv->hw->hw_vlan_en)
+ /* MAC level stripping. */
+ stmmac_rx_hw_vlan(priv, priv->hw, p, skb);
+ else
+ /* Driver level stripping. */
+ stmmac_rx_vlan(priv->dev, skb);
+
skb->protocol = eth_type_trans(skb, priv->dev);
if (unlikely(!coe))
@@ -5818,6 +5832,13 @@ static int stmmac_set_features(struct net_device *netdev,
stmmac_enable_sph(priv, priv->ioaddr, sph_en, chan);
}
+ if (features & NETIF_F_HW_VLAN_CTAG_RX)
+ priv->hw->hw_vlan_en = true;
+ else
+ priv->hw->hw_vlan_en = false;
+
+ stmmac_set_hw_vlan_mode(priv, priv->hw);
+
return 0;
}
@@ -6180,30 +6201,23 @@ static struct dentry *stmmac_fs_dir;
static void sysfs_display_ring(void *head, int size, int extend_desc,
struct seq_file *seq, dma_addr_t dma_phy_addr)
{
- int i;
struct dma_extended_desc *ep = (struct dma_extended_desc *)head;
struct dma_desc *p = (struct dma_desc *)head;
+ unsigned int desc_size;
dma_addr_t dma_addr;
+ int i;
+ desc_size = extend_desc ? sizeof(*ep) : sizeof(*p);
for (i = 0; i < size; i++) {
- if (extend_desc) {
- dma_addr = dma_phy_addr + i * sizeof(*ep);
- seq_printf(seq, "%d [%pad]: 0x%x 0x%x 0x%x 0x%x\n",
- i, &dma_addr,
- le32_to_cpu(ep->basic.des0),
- le32_to_cpu(ep->basic.des1),
- le32_to_cpu(ep->basic.des2),
- le32_to_cpu(ep->basic.des3));
- ep++;
- } else {
- dma_addr = dma_phy_addr + i * sizeof(*p);
- seq_printf(seq, "%d [%pad]: 0x%x 0x%x 0x%x 0x%x\n",
- i, &dma_addr,
- le32_to_cpu(p->des0), le32_to_cpu(p->des1),
- le32_to_cpu(p->des2), le32_to_cpu(p->des3));
+ dma_addr = dma_phy_addr + i * desc_size;
+ seq_printf(seq, "%d [%pad]: 0x%x 0x%x 0x%x 0x%x\n",
+ i, &dma_addr,
+ le32_to_cpu(p->des0), le32_to_cpu(p->des1),
+ le32_to_cpu(p->des2), le32_to_cpu(p->des3));
+ if (extend_desc)
+ p = &(++ep)->basic;
+ else
p++;
- }
- seq_printf(seq, "\n");
}
}
@@ -7516,6 +7530,9 @@ int stmmac_dvr_probe(struct device *device,
#ifdef STMMAC_VLAN_TAG_USED
/* Both mac100 and gmac support receive VLAN tag detection */
ndev->features |= NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_STAG_RX;
+ ndev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX;
+ priv->hw->hw_vlan_en = true;
+
if (priv->dma_cap.vlhash) {
ndev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
ndev->features |= NETIF_F_HW_VLAN_STAG_FILTER;
diff --git a/drivers/net/ethernet/ti/am65-cpsw-ethtool.c b/drivers/net/ethernet/ti/am65-cpsw-ethtool.c
index c51e2af..b9e1d56 100644
--- a/drivers/net/ethernet/ti/am65-cpsw-ethtool.c
+++ b/drivers/net/ethernet/ti/am65-cpsw-ethtool.c
@@ -662,6 +662,31 @@ static void am65_cpsw_get_ethtool_stats(struct net_device *ndev,
hw_stats[i].offset);
}
+static void am65_cpsw_get_eth_mac_stats(struct net_device *ndev,
+ struct ethtool_eth_mac_stats *s)
+{
+ struct am65_cpsw_port *port = am65_ndev_to_port(ndev);
+ struct am65_cpsw_stats_regs __iomem *stats;
+
+ stats = port->stat_base;
+
+ s->FramesTransmittedOK = readl_relaxed(&stats->tx_good_frames);
+ s->SingleCollisionFrames = readl_relaxed(&stats->tx_single_coll_frames);
+ s->MultipleCollisionFrames = readl_relaxed(&stats->tx_mult_coll_frames);
+ s->FramesReceivedOK = readl_relaxed(&stats->rx_good_frames);
+ s->FrameCheckSequenceErrors = readl_relaxed(&stats->rx_crc_errors);
+ s->AlignmentErrors = readl_relaxed(&stats->rx_align_code_errors);
+ s->OctetsTransmittedOK = readl_relaxed(&stats->tx_octets);
+ s->FramesWithDeferredXmissions = readl_relaxed(&stats->tx_deferred_frames);
+ s->LateCollisions = readl_relaxed(&stats->tx_late_collisions);
+ s->CarrierSenseErrors = readl_relaxed(&stats->tx_carrier_sense_errors);
+ s->OctetsReceivedOK = readl_relaxed(&stats->rx_octets);
+ s->MulticastFramesXmittedOK = readl_relaxed(&stats->tx_multicast_frames);
+ s->BroadcastFramesXmittedOK = readl_relaxed(&stats->tx_broadcast_frames);
+ s->MulticastFramesReceivedOK = readl_relaxed(&stats->rx_multicast_frames);
+ s->BroadcastFramesReceivedOK = readl_relaxed(&stats->rx_broadcast_frames);
+};
+
static int am65_cpsw_get_ethtool_ts_info(struct net_device *ndev,
struct ethtool_ts_info *info)
{
@@ -729,6 +754,7 @@ const struct ethtool_ops am65_cpsw_ethtool_ops_slave = {
.get_sset_count = am65_cpsw_get_sset_count,
.get_strings = am65_cpsw_get_strings,
.get_ethtool_stats = am65_cpsw_get_ethtool_stats,
+ .get_eth_mac_stats = am65_cpsw_get_eth_mac_stats,
.get_ts_info = am65_cpsw_get_ethtool_ts_info,
.get_priv_flags = am65_cpsw_get_ethtool_priv_flags,
.set_priv_flags = am65_cpsw_set_ethtool_priv_flags,
diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
index ece9f8d..7992a76 100644
--- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c
+++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
@@ -136,6 +136,8 @@
NETIF_MSG_IFUP | NETIF_MSG_PROBE | NETIF_MSG_IFDOWN | \
NETIF_MSG_RX_ERR | NETIF_MSG_TX_ERR)
+#define AM65_CPSW_DEFAULT_TX_CHNS 8
+
static void am65_cpsw_port_set_sl_mac(struct am65_cpsw_port *slave,
const u8 *dev_addr)
{
@@ -367,10 +369,81 @@ static void am65_cpsw_init_host_port_emac(struct am65_cpsw_common *common);
static void am65_cpsw_init_port_switch_ale(struct am65_cpsw_port *port);
static void am65_cpsw_init_port_emac_ale(struct am65_cpsw_port *port);
+static void am65_cpsw_nuss_rx_cleanup(void *data, dma_addr_t desc_dma)
+{
+ struct am65_cpsw_rx_chn *rx_chn = data;
+ struct cppi5_host_desc_t *desc_rx;
+ struct sk_buff *skb;
+ dma_addr_t buf_dma;
+ u32 buf_dma_len;
+ void **swdata;
+
+ desc_rx = k3_cppi_desc_pool_dma2virt(rx_chn->desc_pool, desc_dma);
+ swdata = cppi5_hdesc_get_swdata(desc_rx);
+ skb = *swdata;
+ cppi5_hdesc_get_obuf(desc_rx, &buf_dma, &buf_dma_len);
+ k3_udma_glue_rx_cppi5_to_dma_addr(rx_chn->rx_chn, &buf_dma);
+
+ dma_unmap_single(rx_chn->dma_dev, buf_dma, buf_dma_len, DMA_FROM_DEVICE);
+ k3_cppi_desc_pool_free(rx_chn->desc_pool, desc_rx);
+
+ dev_kfree_skb_any(skb);
+}
+
+static void am65_cpsw_nuss_xmit_free(struct am65_cpsw_tx_chn *tx_chn,
+ struct cppi5_host_desc_t *desc)
+{
+ struct cppi5_host_desc_t *first_desc, *next_desc;
+ dma_addr_t buf_dma, next_desc_dma;
+ u32 buf_dma_len;
+
+ first_desc = desc;
+ next_desc = first_desc;
+
+ cppi5_hdesc_get_obuf(first_desc, &buf_dma, &buf_dma_len);
+ k3_udma_glue_tx_cppi5_to_dma_addr(tx_chn->tx_chn, &buf_dma);
+
+ dma_unmap_single(tx_chn->dma_dev, buf_dma, buf_dma_len, DMA_TO_DEVICE);
+
+ next_desc_dma = cppi5_hdesc_get_next_hbdesc(first_desc);
+ k3_udma_glue_tx_cppi5_to_dma_addr(tx_chn->tx_chn, &next_desc_dma);
+ while (next_desc_dma) {
+ next_desc = k3_cppi_desc_pool_dma2virt(tx_chn->desc_pool,
+ next_desc_dma);
+ cppi5_hdesc_get_obuf(next_desc, &buf_dma, &buf_dma_len);
+ k3_udma_glue_tx_cppi5_to_dma_addr(tx_chn->tx_chn, &buf_dma);
+
+ dma_unmap_page(tx_chn->dma_dev, buf_dma, buf_dma_len,
+ DMA_TO_DEVICE);
+
+ next_desc_dma = cppi5_hdesc_get_next_hbdesc(next_desc);
+ k3_udma_glue_tx_cppi5_to_dma_addr(tx_chn->tx_chn, &next_desc_dma);
+
+ k3_cppi_desc_pool_free(tx_chn->desc_pool, next_desc);
+ }
+
+ k3_cppi_desc_pool_free(tx_chn->desc_pool, first_desc);
+}
+
+static void am65_cpsw_nuss_tx_cleanup(void *data, dma_addr_t desc_dma)
+{
+ struct am65_cpsw_tx_chn *tx_chn = data;
+ struct cppi5_host_desc_t *desc_tx;
+ struct sk_buff *skb;
+ void **swdata;
+
+ desc_tx = k3_cppi_desc_pool_dma2virt(tx_chn->desc_pool, desc_dma);
+ swdata = cppi5_hdesc_get_swdata(desc_tx);
+ skb = *(swdata);
+ am65_cpsw_nuss_xmit_free(tx_chn, desc_tx);
+
+ dev_kfree_skb_any(skb);
+}
+
static int am65_cpsw_nuss_common_open(struct am65_cpsw_common *common)
{
struct am65_cpsw_host *host_p = am65_common_get_host(common);
- int port_idx, i, ret;
+ int port_idx, i, ret, tx;
struct sk_buff *skb;
u32 val, port_mask;
@@ -437,8 +510,12 @@ static int am65_cpsw_nuss_common_open(struct am65_cpsw_common *common)
AM65_CPSW_MAX_PACKET_SIZE,
GFP_KERNEL);
if (!skb) {
+ ret = -ENOMEM;
dev_err(common->dev, "cannot allocate skb\n");
- return -ENOMEM;
+ if (i)
+ goto fail_rx;
+
+ return ret;
}
ret = am65_cpsw_nuss_rx_push(common, skb);
@@ -447,17 +524,28 @@ static int am65_cpsw_nuss_common_open(struct am65_cpsw_common *common)
"cannot submit skb to channel rx, error %d\n",
ret);
kfree_skb(skb);
+ if (i)
+ goto fail_rx;
+
return ret;
}
- kmemleak_not_leak(skb);
}
- k3_udma_glue_enable_rx_chn(common->rx_chns.rx_chn);
- for (i = 0; i < common->tx_ch_num; i++) {
- ret = k3_udma_glue_enable_tx_chn(common->tx_chns[i].tx_chn);
- if (ret)
- return ret;
- napi_enable(&common->tx_chns[i].napi_tx);
+ ret = k3_udma_glue_enable_rx_chn(common->rx_chns.rx_chn);
+ if (ret) {
+ dev_err(common->dev, "couldn't enable rx chn: %d\n", ret);
+ goto fail_rx;
+ }
+
+ for (tx = 0; tx < common->tx_ch_num; tx++) {
+ ret = k3_udma_glue_enable_tx_chn(common->tx_chns[tx].tx_chn);
+ if (ret) {
+ dev_err(common->dev, "couldn't enable tx chn %d: %d\n",
+ tx, ret);
+ tx--;
+ goto fail_tx;
+ }
+ napi_enable(&common->tx_chns[tx].napi_tx);
}
napi_enable(&common->napi_rx);
@@ -468,10 +556,22 @@ static int am65_cpsw_nuss_common_open(struct am65_cpsw_common *common)
dev_dbg(common->dev, "cpsw_nuss started\n");
return 0;
-}
-static void am65_cpsw_nuss_tx_cleanup(void *data, dma_addr_t desc_dma);
-static void am65_cpsw_nuss_rx_cleanup(void *data, dma_addr_t desc_dma);
+fail_tx:
+ while (tx >= 0) {
+ napi_disable(&common->tx_chns[tx].napi_tx);
+ k3_udma_glue_disable_tx_chn(common->tx_chns[tx].tx_chn);
+ tx--;
+ }
+
+ k3_udma_glue_disable_rx_chn(common->rx_chns.rx_chn);
+
+fail_rx:
+ k3_udma_glue_reset_rx_chn(common->rx_chns.rx_chn, 0,
+ &common->rx_chns,
+ am65_cpsw_nuss_rx_cleanup, 0);
+ return ret;
+}
static int am65_cpsw_nuss_common_stop(struct am65_cpsw_common *common)
{
@@ -646,27 +746,6 @@ static int am65_cpsw_nuss_ndo_slave_open(struct net_device *ndev)
return ret;
}
-static void am65_cpsw_nuss_rx_cleanup(void *data, dma_addr_t desc_dma)
-{
- struct am65_cpsw_rx_chn *rx_chn = data;
- struct cppi5_host_desc_t *desc_rx;
- struct sk_buff *skb;
- dma_addr_t buf_dma;
- u32 buf_dma_len;
- void **swdata;
-
- desc_rx = k3_cppi_desc_pool_dma2virt(rx_chn->desc_pool, desc_dma);
- swdata = cppi5_hdesc_get_swdata(desc_rx);
- skb = *swdata;
- cppi5_hdesc_get_obuf(desc_rx, &buf_dma, &buf_dma_len);
- k3_udma_glue_rx_cppi5_to_dma_addr(rx_chn->rx_chn, &buf_dma);
-
- dma_unmap_single(rx_chn->dma_dev, buf_dma, buf_dma_len, DMA_FROM_DEVICE);
- k3_cppi_desc_pool_free(rx_chn->desc_pool, desc_rx);
-
- dev_kfree_skb_any(skb);
-}
-
static void am65_cpsw_nuss_rx_ts(struct sk_buff *skb, u32 *psdata)
{
struct skb_shared_hwtstamps *ssh;
@@ -840,56 +919,6 @@ static int am65_cpsw_nuss_rx_poll(struct napi_struct *napi_rx, int budget)
return num_rx;
}
-static void am65_cpsw_nuss_xmit_free(struct am65_cpsw_tx_chn *tx_chn,
- struct cppi5_host_desc_t *desc)
-{
- struct cppi5_host_desc_t *first_desc, *next_desc;
- dma_addr_t buf_dma, next_desc_dma;
- u32 buf_dma_len;
-
- first_desc = desc;
- next_desc = first_desc;
-
- cppi5_hdesc_get_obuf(first_desc, &buf_dma, &buf_dma_len);
- k3_udma_glue_tx_cppi5_to_dma_addr(tx_chn->tx_chn, &buf_dma);
-
- dma_unmap_single(tx_chn->dma_dev, buf_dma, buf_dma_len, DMA_TO_DEVICE);
-
- next_desc_dma = cppi5_hdesc_get_next_hbdesc(first_desc);
- k3_udma_glue_tx_cppi5_to_dma_addr(tx_chn->tx_chn, &next_desc_dma);
- while (next_desc_dma) {
- next_desc = k3_cppi_desc_pool_dma2virt(tx_chn->desc_pool,
- next_desc_dma);
- cppi5_hdesc_get_obuf(next_desc, &buf_dma, &buf_dma_len);
- k3_udma_glue_tx_cppi5_to_dma_addr(tx_chn->tx_chn, &buf_dma);
-
- dma_unmap_page(tx_chn->dma_dev, buf_dma, buf_dma_len,
- DMA_TO_DEVICE);
-
- next_desc_dma = cppi5_hdesc_get_next_hbdesc(next_desc);
- k3_udma_glue_tx_cppi5_to_dma_addr(tx_chn->tx_chn, &next_desc_dma);
-
- k3_cppi_desc_pool_free(tx_chn->desc_pool, next_desc);
- }
-
- k3_cppi_desc_pool_free(tx_chn->desc_pool, first_desc);
-}
-
-static void am65_cpsw_nuss_tx_cleanup(void *data, dma_addr_t desc_dma)
-{
- struct am65_cpsw_tx_chn *tx_chn = data;
- struct cppi5_host_desc_t *desc_tx;
- struct sk_buff *skb;
- void **swdata;
-
- desc_tx = k3_cppi_desc_pool_dma2virt(tx_chn->desc_pool, desc_dma);
- swdata = cppi5_hdesc_get_swdata(desc_tx);
- skb = *(swdata);
- am65_cpsw_nuss_xmit_free(tx_chn, desc_tx);
-
- dev_kfree_skb_any(skb);
-}
-
static struct sk_buff *
am65_cpsw_nuss_tx_compl_packet(struct am65_cpsw_tx_chn *tx_chn,
dma_addr_t desc_dma)
@@ -2897,7 +2926,7 @@ static int am65_cpsw_nuss_probe(struct platform_device *pdev)
common->rx_flow_id_base = -1;
init_completion(&common->tdown_complete);
- common->tx_ch_num = 1;
+ common->tx_ch_num = AM65_CPSW_DEFAULT_TX_CHNS;
common->pf_p0_rx_ptype_rrobin = false;
common->default_vlan = 1;
diff --git a/drivers/net/ethernet/xilinx/Kconfig b/drivers/net/ethernet/xilinx/Kconfig
index 0014729..35d96c6 100644
--- a/drivers/net/ethernet/xilinx/Kconfig
+++ b/drivers/net/ethernet/xilinx/Kconfig
@@ -26,6 +26,7 @@
config XILINX_AXI_EMAC
tristate "Xilinx 10/100/1000 AXI Ethernet support"
depends on HAS_IOMEM
+ depends on XILINX_DMA
select PHYLINK
help
This driver supports the 10/100/1000 Ethernet from Xilinx for the
diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet.h b/drivers/net/ethernet/xilinx/xilinx_axienet.h
index 575ff9d..807ead6 100644
--- a/drivers/net/ethernet/xilinx/xilinx_axienet.h
+++ b/drivers/net/ethernet/xilinx/xilinx_axienet.h
@@ -14,6 +14,7 @@
#include <linux/interrupt.h>
#include <linux/if_vlan.h>
#include <linux/phylink.h>
+#include <linux/skbuff.h>
/* Packet size info */
#define XAE_HDR_SIZE 14 /* Size of Ethernet header */
@@ -379,6 +380,22 @@ struct axidma_bd {
#define XAE_NUM_MISC_CLOCKS 3
/**
+ * struct skbuf_dma_descriptor - skb for each dma descriptor
+ * @sgl: Pointer for sglist.
+ * @desc: Pointer to dma descriptor.
+ * @dma_address: dma address of sglist.
+ * @skb: Pointer to SKB transferred using DMA
+ * @sg_len: number of entries in the sglist.
+ */
+struct skbuf_dma_descriptor {
+ struct scatterlist sgl[MAX_SKB_FRAGS + 1];
+ struct dma_async_tx_descriptor *desc;
+ dma_addr_t dma_address;
+ struct sk_buff *skb;
+ int sg_len;
+};
+
+/**
* struct axienet_local - axienet private per device data
* @ndev: Pointer for net_device to which it will be attached.
* @dev: Pointer to device structure
@@ -435,6 +452,15 @@ struct axidma_bd {
* @coalesce_usec_rx: IRQ coalesce delay for RX
* @coalesce_count_tx: Store the irq coalesce on TX side.
* @coalesce_usec_tx: IRQ coalesce delay for TX
+ * @use_dmaengine: flag to check dmaengine framework usage.
+ * @tx_chan: TX DMA channel.
+ * @rx_chan: RX DMA channel.
+ * @tx_skb_ring: Pointer to TX skb ring buffer array.
+ * @rx_skb_ring: Pointer to RX skb ring buffer array.
+ * @tx_ring_head: TX skb ring buffer head index.
+ * @tx_ring_tail: TX skb ring buffer tail index.
+ * @rx_ring_head: RX skb ring buffer head index.
+ * @rx_ring_tail: RX skb ring buffer tail index.
*/
struct axienet_local {
struct net_device *ndev;
@@ -499,6 +525,15 @@ struct axienet_local {
u32 coalesce_usec_rx;
u32 coalesce_count_tx;
u32 coalesce_usec_tx;
+ u8 use_dmaengine;
+ struct dma_chan *tx_chan;
+ struct dma_chan *rx_chan;
+ struct skbuf_dma_descriptor **tx_skb_ring;
+ struct skbuf_dma_descriptor **rx_skb_ring;
+ int tx_ring_head;
+ int tx_ring_tail;
+ int rx_ring_head;
+ int rx_ring_tail;
};
/**
diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
index bf6e339..aaf780f 100644
--- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
+++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
@@ -38,6 +38,11 @@
#include <linux/phy.h>
#include <linux/mii.h>
#include <linux/ethtool.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/dma/xilinx_dma.h>
+#include <linux/circ_buf.h>
+#include <net/netdev_queues.h>
#include "xilinx_axienet.h"
@@ -47,6 +52,9 @@
#define TX_BD_NUM_MIN (MAX_SKB_FRAGS + 1)
#define TX_BD_NUM_MAX 4096
#define RX_BD_NUM_MAX 4096
+#define DMA_NUM_APP_WORDS 5
+#define LEN_APP 4
+#define RX_BUF_NUM_DEFAULT 128
/* Must be shorter than length of ethtool_drvinfo.driver field to fit */
#define DRIVER_NAME "xaxienet"
@@ -55,6 +63,8 @@
#define AXIENET_REGS_N 40
+static void axienet_rx_submit_desc(struct net_device *ndev);
+
/* Match table for of_platform binding */
static const struct of_device_id axienet_of_match[] = {
{ .compatible = "xlnx,axi-ethernet-1.00.a", },
@@ -120,6 +130,16 @@ static struct axienet_option axienet_options[] = {
{}
};
+static struct skbuf_dma_descriptor *axienet_get_rx_desc(struct axienet_local *lp, int i)
+{
+ return lp->rx_skb_ring[i & (RX_BUF_NUM_DEFAULT - 1)];
+}
+
+static struct skbuf_dma_descriptor *axienet_get_tx_desc(struct axienet_local *lp, int i)
+{
+ return lp->tx_skb_ring[i & (TX_BD_NUM_MAX - 1)];
+}
+
/**
* axienet_dma_in32 - Memory mapped Axi DMA register read
* @lp: Pointer to axienet local structure
@@ -589,10 +609,6 @@ static int axienet_device_reset(struct net_device *ndev)
struct axienet_local *lp = netdev_priv(ndev);
int ret;
- ret = __axienet_device_reset(lp);
- if (ret)
- return ret;
-
lp->max_frm_size = XAE_MAX_VLAN_FRAME_SIZE;
lp->options |= XAE_OPTION_VLAN;
lp->options &= (~XAE_OPTION_JUMBO);
@@ -606,11 +622,17 @@ static int axienet_device_reset(struct net_device *ndev)
lp->options |= XAE_OPTION_JUMBO;
}
- ret = axienet_dma_bd_init(ndev);
- if (ret) {
- netdev_err(ndev, "%s: descriptor allocation failed\n",
- __func__);
- return ret;
+ if (!lp->use_dmaengine) {
+ ret = __axienet_device_reset(lp);
+ if (ret)
+ return ret;
+
+ ret = axienet_dma_bd_init(ndev);
+ if (ret) {
+ netdev_err(ndev, "%s: descriptor allocation failed\n",
+ __func__);
+ return ret;
+ }
}
axienet_status = axienet_ior(lp, XAE_RCW1_OFFSET);
@@ -726,6 +748,128 @@ static inline int axienet_check_tx_bd_space(struct axienet_local *lp,
}
/**
+ * axienet_dma_tx_cb - DMA engine callback for TX channel.
+ * @data: Pointer to the axienet_local structure.
+ * @result: error reporting through dmaengine_result.
+ * This function is called by dmaengine driver for TX channel to notify
+ * that the transmit is done.
+ */
+static void axienet_dma_tx_cb(void *data, const struct dmaengine_result *result)
+{
+ struct skbuf_dma_descriptor *skbuf_dma;
+ struct axienet_local *lp = data;
+ struct netdev_queue *txq;
+ int len;
+
+ skbuf_dma = axienet_get_tx_desc(lp, lp->tx_ring_tail++);
+ len = skbuf_dma->skb->len;
+ txq = skb_get_tx_queue(lp->ndev, skbuf_dma->skb);
+ u64_stats_update_begin(&lp->tx_stat_sync);
+ u64_stats_add(&lp->tx_bytes, len);
+ u64_stats_add(&lp->tx_packets, 1);
+ u64_stats_update_end(&lp->tx_stat_sync);
+ dma_unmap_sg(lp->dev, skbuf_dma->sgl, skbuf_dma->sg_len, DMA_TO_DEVICE);
+ dev_consume_skb_any(skbuf_dma->skb);
+ netif_txq_completed_wake(txq, 1, len,
+ CIRC_SPACE(lp->tx_ring_head, lp->tx_ring_tail, TX_BD_NUM_MAX),
+ 2 * MAX_SKB_FRAGS);
+}
+
+/**
+ * axienet_start_xmit_dmaengine - Starts the transmission.
+ * @skb: sk_buff pointer that contains data to be Txed.
+ * @ndev: Pointer to net_device structure.
+ *
+ * Return: NETDEV_TX_OK on success or any non space errors.
+ * NETDEV_TX_BUSY when free element in TX skb ring buffer
+ * is not available.
+ *
+ * This function is invoked to initiate transmission. The
+ * function sets the skbs, register dma callback API and submit
+ * the dma transaction.
+ * Additionally if checksum offloading is supported,
+ * it populates AXI Stream Control fields with appropriate values.
+ */
+static netdev_tx_t
+axienet_start_xmit_dmaengine(struct sk_buff *skb, struct net_device *ndev)
+{
+ struct dma_async_tx_descriptor *dma_tx_desc = NULL;
+ struct axienet_local *lp = netdev_priv(ndev);
+ u32 app_metadata[DMA_NUM_APP_WORDS] = {0};
+ struct skbuf_dma_descriptor *skbuf_dma;
+ struct dma_device *dma_dev;
+ struct netdev_queue *txq;
+ u32 csum_start_off;
+ u32 csum_index_off;
+ int sg_len;
+ int ret;
+
+ dma_dev = lp->tx_chan->device;
+ sg_len = skb_shinfo(skb)->nr_frags + 1;
+ if (CIRC_SPACE(lp->tx_ring_head, lp->tx_ring_tail, TX_BD_NUM_MAX) <= sg_len) {
+ netif_stop_queue(ndev);
+ if (net_ratelimit())
+ netdev_warn(ndev, "TX ring unexpectedly full\n");
+ return NETDEV_TX_BUSY;
+ }
+
+ skbuf_dma = axienet_get_tx_desc(lp, lp->tx_ring_head);
+ if (!skbuf_dma)
+ goto xmit_error_drop_skb;
+
+ lp->tx_ring_head++;
+ sg_init_table(skbuf_dma->sgl, sg_len);
+ ret = skb_to_sgvec(skb, skbuf_dma->sgl, 0, skb->len);
+ if (ret < 0)
+ goto xmit_error_drop_skb;
+
+ ret = dma_map_sg(lp->dev, skbuf_dma->sgl, sg_len, DMA_TO_DEVICE);
+ if (!ret)
+ goto xmit_error_drop_skb;
+
+ /* Fill up app fields for checksum */
+ if (skb->ip_summed == CHECKSUM_PARTIAL) {
+ if (lp->features & XAE_FEATURE_FULL_TX_CSUM) {
+ /* Tx Full Checksum Offload Enabled */
+ app_metadata[0] |= 2;
+ } else if (lp->features & XAE_FEATURE_PARTIAL_TX_CSUM) {
+ csum_start_off = skb_transport_offset(skb);
+ csum_index_off = csum_start_off + skb->csum_offset;
+ /* Tx Partial Checksum Offload Enabled */
+ app_metadata[0] |= 1;
+ app_metadata[1] = (csum_start_off << 16) | csum_index_off;
+ }
+ } else if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
+ app_metadata[0] |= 2; /* Tx Full Checksum Offload Enabled */
+ }
+
+ dma_tx_desc = dma_dev->device_prep_slave_sg(lp->tx_chan, skbuf_dma->sgl,
+ sg_len, DMA_MEM_TO_DEV,
+ DMA_PREP_INTERRUPT, (void *)app_metadata);
+ if (!dma_tx_desc)
+ goto xmit_error_unmap_sg;
+
+ skbuf_dma->skb = skb;
+ skbuf_dma->sg_len = sg_len;
+ dma_tx_desc->callback_param = lp;
+ dma_tx_desc->callback_result = axienet_dma_tx_cb;
+ dmaengine_submit(dma_tx_desc);
+ dma_async_issue_pending(lp->tx_chan);
+ txq = skb_get_tx_queue(lp->ndev, skb);
+ netdev_tx_sent_queue(txq, skb->len);
+ netif_txq_maybe_stop(txq, CIRC_SPACE(lp->tx_ring_head, lp->tx_ring_tail, TX_BD_NUM_MAX),
+ MAX_SKB_FRAGS + 1, 2 * MAX_SKB_FRAGS);
+
+ return NETDEV_TX_OK;
+
+xmit_error_unmap_sg:
+ dma_unmap_sg(lp->dev, skbuf_dma->sgl, sg_len, DMA_TO_DEVICE);
+xmit_error_drop_skb:
+ dev_kfree_skb_any(skb);
+ return NETDEV_TX_OK;
+}
+
+/**
* axienet_tx_poll - Invoked once a transmit is completed by the
* Axi DMA Tx channel.
* @napi: Pointer to NAPI structure.
@@ -892,6 +1036,42 @@ axienet_start_xmit(struct sk_buff *skb, struct net_device *ndev)
}
/**
+ * axienet_dma_rx_cb - DMA engine callback for RX channel.
+ * @data: Pointer to the skbuf_dma_descriptor structure.
+ * @result: error reporting through dmaengine_result.
+ * This function is called by dmaengine driver for RX channel to notify
+ * that the packet is received.
+ */
+static void axienet_dma_rx_cb(void *data, const struct dmaengine_result *result)
+{
+ struct skbuf_dma_descriptor *skbuf_dma;
+ size_t meta_len, meta_max_len, rx_len;
+ struct axienet_local *lp = data;
+ struct sk_buff *skb;
+ u32 *app_metadata;
+
+ skbuf_dma = axienet_get_rx_desc(lp, lp->rx_ring_tail++);
+ skb = skbuf_dma->skb;
+ app_metadata = dmaengine_desc_get_metadata_ptr(skbuf_dma->desc, &meta_len,
+ &meta_max_len);
+ dma_unmap_single(lp->dev, skbuf_dma->dma_address, lp->max_frm_size,
+ DMA_FROM_DEVICE);
+ /* TODO: Derive app word index programmatically */
+ rx_len = (app_metadata[LEN_APP] & 0xFFFF);
+ skb_put(skb, rx_len);
+ skb->protocol = eth_type_trans(skb, lp->ndev);
+ skb->ip_summed = CHECKSUM_NONE;
+
+ __netif_rx(skb);
+ u64_stats_update_begin(&lp->rx_stat_sync);
+ u64_stats_add(&lp->rx_packets, 1);
+ u64_stats_add(&lp->rx_bytes, rx_len);
+ u64_stats_update_end(&lp->rx_stat_sync);
+ axienet_rx_submit_desc(lp->ndev);
+ dma_async_issue_pending(lp->rx_chan);
+}
+
+/**
* axienet_rx_poll - Triggered by RX ISR to complete the BD processing.
* @napi: Pointer to NAPI structure.
* @budget: Max number of RX packets to process.
@@ -1125,41 +1305,159 @@ static irqreturn_t axienet_eth_irq(int irq, void *_ndev)
static void axienet_dma_err_handler(struct work_struct *work);
/**
- * axienet_open - Driver open routine.
- * @ndev: Pointer to net_device structure
+ * axienet_rx_submit_desc - Submit the rx descriptors to dmaengine.
+ * allocate skbuff, map the scatterlist and obtain a descriptor
+ * and then add the callback information and submit descriptor.
+ *
+ * @ndev: net_device pointer
+ *
+ */
+static void axienet_rx_submit_desc(struct net_device *ndev)
+{
+ struct dma_async_tx_descriptor *dma_rx_desc = NULL;
+ struct axienet_local *lp = netdev_priv(ndev);
+ struct skbuf_dma_descriptor *skbuf_dma;
+ struct sk_buff *skb;
+ dma_addr_t addr;
+
+ skbuf_dma = axienet_get_rx_desc(lp, lp->rx_ring_head);
+ if (!skbuf_dma)
+ return;
+
+ lp->rx_ring_head++;
+ skb = netdev_alloc_skb(ndev, lp->max_frm_size);
+ if (!skb)
+ return;
+
+ sg_init_table(skbuf_dma->sgl, 1);
+ addr = dma_map_single(lp->dev, skb->data, lp->max_frm_size, DMA_FROM_DEVICE);
+ if (unlikely(dma_mapping_error(lp->dev, addr))) {
+ if (net_ratelimit())
+ netdev_err(ndev, "DMA mapping error\n");
+ goto rx_submit_err_free_skb;
+ }
+ sg_dma_address(skbuf_dma->sgl) = addr;
+ sg_dma_len(skbuf_dma->sgl) = lp->max_frm_size;
+ dma_rx_desc = dmaengine_prep_slave_sg(lp->rx_chan, skbuf_dma->sgl,
+ 1, DMA_DEV_TO_MEM,
+ DMA_PREP_INTERRUPT);
+ if (!dma_rx_desc)
+ goto rx_submit_err_unmap_skb;
+
+ skbuf_dma->skb = skb;
+ skbuf_dma->dma_address = sg_dma_address(skbuf_dma->sgl);
+ skbuf_dma->desc = dma_rx_desc;
+ dma_rx_desc->callback_param = lp;
+ dma_rx_desc->callback_result = axienet_dma_rx_cb;
+ dmaengine_submit(dma_rx_desc);
+
+ return;
+
+rx_submit_err_unmap_skb:
+ dma_unmap_single(lp->dev, addr, lp->max_frm_size, DMA_FROM_DEVICE);
+rx_submit_err_free_skb:
+ dev_kfree_skb(skb);
+}
+
+/**
+ * axienet_init_dmaengine - init the dmaengine code.
+ * @ndev: Pointer to net_device structure
*
* Return: 0, on success.
- * non-zero error value on failure
+ * non-zero error value on failure
*
- * This is the driver open routine. It calls phylink_start to start the
- * PHY device.
- * It also allocates interrupt service routines, enables the interrupt lines
- * and ISR handling. Axi Ethernet core is reset through Axi DMA core. Buffer
- * descriptors are initialized.
+ * This is the dmaengine initialization code.
*/
-static int axienet_open(struct net_device *ndev)
+static int axienet_init_dmaengine(struct net_device *ndev)
+{
+ struct axienet_local *lp = netdev_priv(ndev);
+ struct skbuf_dma_descriptor *skbuf_dma;
+ int i, ret;
+
+ lp->tx_chan = dma_request_chan(lp->dev, "tx_chan0");
+ if (IS_ERR(lp->tx_chan)) {
+ dev_err(lp->dev, "No Ethernet DMA (TX) channel found\n");
+ return PTR_ERR(lp->tx_chan);
+ }
+
+ lp->rx_chan = dma_request_chan(lp->dev, "rx_chan0");
+ if (IS_ERR(lp->rx_chan)) {
+ ret = PTR_ERR(lp->rx_chan);
+ dev_err(lp->dev, "No Ethernet DMA (RX) channel found\n");
+ goto err_dma_release_tx;
+ }
+
+ lp->tx_ring_tail = 0;
+ lp->tx_ring_head = 0;
+ lp->rx_ring_tail = 0;
+ lp->rx_ring_head = 0;
+ lp->tx_skb_ring = kcalloc(TX_BD_NUM_MAX, sizeof(*lp->tx_skb_ring),
+ GFP_KERNEL);
+ if (!lp->tx_skb_ring) {
+ ret = -ENOMEM;
+ goto err_dma_release_rx;
+ }
+ for (i = 0; i < TX_BD_NUM_MAX; i++) {
+ skbuf_dma = kzalloc(sizeof(*skbuf_dma), GFP_KERNEL);
+ if (!skbuf_dma) {
+ ret = -ENOMEM;
+ goto err_free_tx_skb_ring;
+ }
+ lp->tx_skb_ring[i] = skbuf_dma;
+ }
+
+ lp->rx_skb_ring = kcalloc(RX_BUF_NUM_DEFAULT, sizeof(*lp->rx_skb_ring),
+ GFP_KERNEL);
+ if (!lp->rx_skb_ring) {
+ ret = -ENOMEM;
+ goto err_free_tx_skb_ring;
+ }
+ for (i = 0; i < RX_BUF_NUM_DEFAULT; i++) {
+ skbuf_dma = kzalloc(sizeof(*skbuf_dma), GFP_KERNEL);
+ if (!skbuf_dma) {
+ ret = -ENOMEM;
+ goto err_free_rx_skb_ring;
+ }
+ lp->rx_skb_ring[i] = skbuf_dma;
+ }
+ /* TODO: Instead of BD_NUM_DEFAULT use runtime support */
+ for (i = 0; i < RX_BUF_NUM_DEFAULT; i++)
+ axienet_rx_submit_desc(ndev);
+ dma_async_issue_pending(lp->rx_chan);
+
+ return 0;
+
+err_free_rx_skb_ring:
+ for (i = 0; i < RX_BUF_NUM_DEFAULT; i++)
+ kfree(lp->rx_skb_ring[i]);
+ kfree(lp->rx_skb_ring);
+err_free_tx_skb_ring:
+ for (i = 0; i < TX_BD_NUM_MAX; i++)
+ kfree(lp->tx_skb_ring[i]);
+ kfree(lp->tx_skb_ring);
+err_dma_release_rx:
+ dma_release_channel(lp->rx_chan);
+err_dma_release_tx:
+ dma_release_channel(lp->tx_chan);
+ return ret;
+}
+
+/**
+ * axienet_init_legacy_dma - init the dma legacy code.
+ * @ndev: Pointer to net_device structure
+ *
+ * Return: 0, on success.
+ * non-zero error value on failure
+ *
+ * This is the dma initialization code. It also allocates interrupt
+ * service routines, enables the interrupt lines and ISR handling.
+ *
+ */
+static int axienet_init_legacy_dma(struct net_device *ndev)
{
int ret;
struct axienet_local *lp = netdev_priv(ndev);
- dev_dbg(&ndev->dev, "axienet_open()\n");
-
- /* When we do an Axi Ethernet reset, it resets the complete core
- * including the MDIO. MDIO must be disabled before resetting.
- * Hold MDIO bus lock to avoid MDIO accesses during the reset.
- */
- axienet_lock_mii(lp);
- ret = axienet_device_reset(ndev);
- axienet_unlock_mii(lp);
-
- ret = phylink_of_phy_connect(lp->phylink, lp->dev->of_node, 0);
- if (ret) {
- dev_err(lp->dev, "phylink_of_phy_connect() failed: %d\n", ret);
- return ret;
- }
-
- phylink_start(lp->phylink);
-
/* Enable worker thread for Axi DMA error handling */
INIT_WORK(&lp->dma_err_task, axienet_dma_err_handler);
@@ -1193,14 +1491,77 @@ static int axienet_open(struct net_device *ndev)
err_tx_irq:
napi_disable(&lp->napi_tx);
napi_disable(&lp->napi_rx);
- phylink_stop(lp->phylink);
- phylink_disconnect_phy(lp->phylink);
cancel_work_sync(&lp->dma_err_task);
dev_err(lp->dev, "request_irq() failed\n");
return ret;
}
/**
+ * axienet_open - Driver open routine.
+ * @ndev: Pointer to net_device structure
+ *
+ * Return: 0, on success.
+ * non-zero error value on failure
+ *
+ * This is the driver open routine. It calls phylink_start to start the
+ * PHY device.
+ * It also allocates interrupt service routines, enables the interrupt lines
+ * and ISR handling. Axi Ethernet core is reset through Axi DMA core. Buffer
+ * descriptors are initialized.
+ */
+static int axienet_open(struct net_device *ndev)
+{
+ int ret;
+ struct axienet_local *lp = netdev_priv(ndev);
+
+ dev_dbg(&ndev->dev, "%s\n", __func__);
+
+ /* When we do an Axi Ethernet reset, it resets the complete core
+ * including the MDIO. MDIO must be disabled before resetting.
+ * Hold MDIO bus lock to avoid MDIO accesses during the reset.
+ */
+ axienet_lock_mii(lp);
+ ret = axienet_device_reset(ndev);
+ axienet_unlock_mii(lp);
+
+ ret = phylink_of_phy_connect(lp->phylink, lp->dev->of_node, 0);
+ if (ret) {
+ dev_err(lp->dev, "phylink_of_phy_connect() failed: %d\n", ret);
+ return ret;
+ }
+
+ phylink_start(lp->phylink);
+
+ if (lp->use_dmaengine) {
+ /* Enable interrupts for Axi Ethernet core (if defined) */
+ if (lp->eth_irq > 0) {
+ ret = request_irq(lp->eth_irq, axienet_eth_irq, IRQF_SHARED,
+ ndev->name, ndev);
+ if (ret)
+ goto err_phy;
+ }
+
+ ret = axienet_init_dmaengine(ndev);
+ if (ret < 0)
+ goto err_free_eth_irq;
+ } else {
+ ret = axienet_init_legacy_dma(ndev);
+ if (ret)
+ goto err_phy;
+ }
+
+ return 0;
+
+err_free_eth_irq:
+ if (lp->eth_irq > 0)
+ free_irq(lp->eth_irq, ndev);
+err_phy:
+ phylink_stop(lp->phylink);
+ phylink_disconnect_phy(lp->phylink);
+ return ret;
+}
+
+/**
* axienet_stop - Driver stop routine.
* @ndev: Pointer to net_device structure
*
@@ -1213,11 +1574,14 @@ static int axienet_open(struct net_device *ndev)
static int axienet_stop(struct net_device *ndev)
{
struct axienet_local *lp = netdev_priv(ndev);
+ int i;
dev_dbg(&ndev->dev, "axienet_close()\n");
- napi_disable(&lp->napi_tx);
- napi_disable(&lp->napi_rx);
+ if (!lp->use_dmaengine) {
+ napi_disable(&lp->napi_tx);
+ napi_disable(&lp->napi_rx);
+ }
phylink_stop(lp->phylink);
phylink_disconnect_phy(lp->phylink);
@@ -1225,18 +1589,33 @@ static int axienet_stop(struct net_device *ndev)
axienet_setoptions(ndev, lp->options &
~(XAE_OPTION_TXEN | XAE_OPTION_RXEN));
- axienet_dma_stop(lp);
+ if (!lp->use_dmaengine) {
+ axienet_dma_stop(lp);
+ cancel_work_sync(&lp->dma_err_task);
+ free_irq(lp->tx_irq, ndev);
+ free_irq(lp->rx_irq, ndev);
+ axienet_dma_bd_release(ndev);
+ } else {
+ dmaengine_terminate_sync(lp->tx_chan);
+ dmaengine_synchronize(lp->tx_chan);
+ dmaengine_terminate_sync(lp->rx_chan);
+ dmaengine_synchronize(lp->rx_chan);
+
+ for (i = 0; i < TX_BD_NUM_MAX; i++)
+ kfree(lp->tx_skb_ring[i]);
+ kfree(lp->tx_skb_ring);
+ for (i = 0; i < RX_BUF_NUM_DEFAULT; i++)
+ kfree(lp->rx_skb_ring[i]);
+ kfree(lp->rx_skb_ring);
+
+ dma_release_channel(lp->rx_chan);
+ dma_release_channel(lp->tx_chan);
+ }
axienet_iow(lp, XAE_IE_OFFSET, 0);
- cancel_work_sync(&lp->dma_err_task);
-
if (lp->eth_irq > 0)
free_irq(lp->eth_irq, ndev);
- free_irq(lp->tx_irq, ndev);
- free_irq(lp->rx_irq, ndev);
-
- axienet_dma_bd_release(ndev);
return 0;
}
@@ -1333,6 +1712,18 @@ static const struct net_device_ops axienet_netdev_ops = {
#endif
};
+static const struct net_device_ops axienet_netdev_dmaengine_ops = {
+ .ndo_open = axienet_open,
+ .ndo_stop = axienet_stop,
+ .ndo_start_xmit = axienet_start_xmit_dmaengine,
+ .ndo_get_stats64 = axienet_get_stats64,
+ .ndo_change_mtu = axienet_change_mtu,
+ .ndo_set_mac_address = netdev_set_mac_address,
+ .ndo_validate_addr = eth_validate_addr,
+ .ndo_eth_ioctl = axienet_ioctl,
+ .ndo_set_rx_mode = axienet_set_multicast_list,
+};
+
/**
* axienet_ethtools_get_drvinfo - Get various Axi Ethernet driver information.
* @ndev: Pointer to net_device structure
@@ -1412,14 +1803,16 @@ static void axienet_ethtools_get_regs(struct net_device *ndev,
data[29] = axienet_ior(lp, XAE_FMI_OFFSET);
data[30] = axienet_ior(lp, XAE_AF0_OFFSET);
data[31] = axienet_ior(lp, XAE_AF1_OFFSET);
- data[32] = axienet_dma_in32(lp, XAXIDMA_TX_CR_OFFSET);
- data[33] = axienet_dma_in32(lp, XAXIDMA_TX_SR_OFFSET);
- data[34] = axienet_dma_in32(lp, XAXIDMA_TX_CDESC_OFFSET);
- data[35] = axienet_dma_in32(lp, XAXIDMA_TX_TDESC_OFFSET);
- data[36] = axienet_dma_in32(lp, XAXIDMA_RX_CR_OFFSET);
- data[37] = axienet_dma_in32(lp, XAXIDMA_RX_SR_OFFSET);
- data[38] = axienet_dma_in32(lp, XAXIDMA_RX_CDESC_OFFSET);
- data[39] = axienet_dma_in32(lp, XAXIDMA_RX_TDESC_OFFSET);
+ if (!lp->use_dmaengine) {
+ data[32] = axienet_dma_in32(lp, XAXIDMA_TX_CR_OFFSET);
+ data[33] = axienet_dma_in32(lp, XAXIDMA_TX_SR_OFFSET);
+ data[34] = axienet_dma_in32(lp, XAXIDMA_TX_CDESC_OFFSET);
+ data[35] = axienet_dma_in32(lp, XAXIDMA_TX_TDESC_OFFSET);
+ data[36] = axienet_dma_in32(lp, XAXIDMA_RX_CR_OFFSET);
+ data[37] = axienet_dma_in32(lp, XAXIDMA_RX_SR_OFFSET);
+ data[38] = axienet_dma_in32(lp, XAXIDMA_RX_CDESC_OFFSET);
+ data[39] = axienet_dma_in32(lp, XAXIDMA_RX_TDESC_OFFSET);
+ }
}
static void
@@ -1863,7 +2256,6 @@ static int axienet_probe(struct platform_device *pdev)
SET_NETDEV_DEV(ndev, &pdev->dev);
ndev->flags &= ~IFF_MULTICAST; /* clear multicast */
ndev->features = NETIF_F_SG;
- ndev->netdev_ops = &axienet_netdev_ops;
ndev->ethtool_ops = &axienet_ethtool_ops;
/* MTU range: 64 - 9000 */
@@ -1880,9 +2272,6 @@ static int axienet_probe(struct platform_device *pdev)
u64_stats_init(&lp->rx_stat_sync);
u64_stats_init(&lp->tx_stat_sync);
- netif_napi_add(ndev, &lp->napi_rx, axienet_rx_poll);
- netif_napi_add(ndev, &lp->napi_tx, axienet_tx_poll);
-
lp->axi_clk = devm_clk_get_optional(&pdev->dev, "s_axi_lite_clk");
if (!lp->axi_clk) {
/* For backward compatibility, if named AXI clock is not present,
@@ -2008,82 +2397,118 @@ static int axienet_probe(struct platform_device *pdev)
goto cleanup_clk;
}
- /* Find the DMA node, map the DMA registers, and decode the DMA IRQs */
- np = of_parse_phandle(pdev->dev.of_node, "axistream-connected", 0);
- if (np) {
- struct resource dmares;
+ if (!of_find_property(pdev->dev.of_node, "dmas", NULL)) {
+ /* Find the DMA node, map the DMA registers, and decode the DMA IRQs */
+ np = of_parse_phandle(pdev->dev.of_node, "axistream-connected", 0);
- ret = of_address_to_resource(np, 0, &dmares);
- if (ret) {
- dev_err(&pdev->dev,
- "unable to get DMA resource\n");
+ if (np) {
+ struct resource dmares;
+
+ ret = of_address_to_resource(np, 0, &dmares);
+ if (ret) {
+ dev_err(&pdev->dev,
+ "unable to get DMA resource\n");
+ of_node_put(np);
+ goto cleanup_clk;
+ }
+ lp->dma_regs = devm_ioremap_resource(&pdev->dev,
+ &dmares);
+ lp->rx_irq = irq_of_parse_and_map(np, 1);
+ lp->tx_irq = irq_of_parse_and_map(np, 0);
of_node_put(np);
+ lp->eth_irq = platform_get_irq_optional(pdev, 0);
+ } else {
+ /* Check for these resources directly on the Ethernet node. */
+ lp->dma_regs = devm_platform_get_and_ioremap_resource(pdev, 1, NULL);
+ lp->rx_irq = platform_get_irq(pdev, 1);
+ lp->tx_irq = platform_get_irq(pdev, 0);
+ lp->eth_irq = platform_get_irq_optional(pdev, 2);
+ }
+ if (IS_ERR(lp->dma_regs)) {
+ dev_err(&pdev->dev, "could not map DMA regs\n");
+ ret = PTR_ERR(lp->dma_regs);
goto cleanup_clk;
}
- lp->dma_regs = devm_ioremap_resource(&pdev->dev,
- &dmares);
- lp->rx_irq = irq_of_parse_and_map(np, 1);
- lp->tx_irq = irq_of_parse_and_map(np, 0);
- of_node_put(np);
- lp->eth_irq = platform_get_irq_optional(pdev, 0);
- } else {
- /* Check for these resources directly on the Ethernet node. */
- lp->dma_regs = devm_platform_get_and_ioremap_resource(pdev, 1, NULL);
- lp->rx_irq = platform_get_irq(pdev, 1);
- lp->tx_irq = platform_get_irq(pdev, 0);
- lp->eth_irq = platform_get_irq_optional(pdev, 2);
- }
- if (IS_ERR(lp->dma_regs)) {
- dev_err(&pdev->dev, "could not map DMA regs\n");
- ret = PTR_ERR(lp->dma_regs);
- goto cleanup_clk;
- }
- if ((lp->rx_irq <= 0) || (lp->tx_irq <= 0)) {
- dev_err(&pdev->dev, "could not determine irqs\n");
- ret = -ENOMEM;
- goto cleanup_clk;
- }
-
- /* Reset core now that clocks are enabled, prior to accessing MDIO */
- ret = __axienet_device_reset(lp);
- if (ret)
- goto cleanup_clk;
-
- /* Autodetect the need for 64-bit DMA pointers.
- * When the IP is configured for a bus width bigger than 32 bits,
- * writing the MSB registers is mandatory, even if they are all 0.
- * We can detect this case by writing all 1's to one such register
- * and see if that sticks: when the IP is configured for 32 bits
- * only, those registers are RES0.
- * Those MSB registers were introduced in IP v7.1, which we check first.
- */
- if ((axienet_ior(lp, XAE_ID_OFFSET) >> 24) >= 0x9) {
- void __iomem *desc = lp->dma_regs + XAXIDMA_TX_CDESC_OFFSET + 4;
-
- iowrite32(0x0, desc);
- if (ioread32(desc) == 0) { /* sanity check */
- iowrite32(0xffffffff, desc);
- if (ioread32(desc) > 0) {
- lp->features |= XAE_FEATURE_DMA_64BIT;
- addr_width = 64;
- dev_info(&pdev->dev,
- "autodetected 64-bit DMA range\n");
- }
- iowrite32(0x0, desc);
+ if (lp->rx_irq <= 0 || lp->tx_irq <= 0) {
+ dev_err(&pdev->dev, "could not determine irqs\n");
+ ret = -ENOMEM;
+ goto cleanup_clk;
}
- }
- if (!IS_ENABLED(CONFIG_64BIT) && lp->features & XAE_FEATURE_DMA_64BIT) {
- dev_err(&pdev->dev, "64-bit addressable DMA is not compatible with 32-bit archecture\n");
- ret = -EINVAL;
- goto cleanup_clk;
+
+ /* Reset core now that clocks are enabled, prior to accessing MDIO */
+ ret = __axienet_device_reset(lp);
+ if (ret)
+ goto cleanup_clk;
+
+ /* Autodetect the need for 64-bit DMA pointers.
+ * When the IP is configured for a bus width bigger than 32 bits,
+ * writing the MSB registers is mandatory, even if they are all 0.
+ * We can detect this case by writing all 1's to one such register
+ * and see if that sticks: when the IP is configured for 32 bits
+ * only, those registers are RES0.
+ * Those MSB registers were introduced in IP v7.1, which we check first.
+ */
+ if ((axienet_ior(lp, XAE_ID_OFFSET) >> 24) >= 0x9) {
+ void __iomem *desc = lp->dma_regs + XAXIDMA_TX_CDESC_OFFSET + 4;
+
+ iowrite32(0x0, desc);
+ if (ioread32(desc) == 0) { /* sanity check */
+ iowrite32(0xffffffff, desc);
+ if (ioread32(desc) > 0) {
+ lp->features |= XAE_FEATURE_DMA_64BIT;
+ addr_width = 64;
+ dev_info(&pdev->dev,
+ "autodetected 64-bit DMA range\n");
+ }
+ iowrite32(0x0, desc);
+ }
+ }
+ if (!IS_ENABLED(CONFIG_64BIT) && lp->features & XAE_FEATURE_DMA_64BIT) {
+ dev_err(&pdev->dev, "64-bit addressable DMA is not compatible with 32-bit archecture\n");
+ ret = -EINVAL;
+ goto cleanup_clk;
+ }
+
+ ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(addr_width));
+ if (ret) {
+ dev_err(&pdev->dev, "No suitable DMA available\n");
+ goto cleanup_clk;
+ }
+ netif_napi_add(ndev, &lp->napi_rx, axienet_rx_poll);
+ netif_napi_add(ndev, &lp->napi_tx, axienet_tx_poll);
+ } else {
+ struct xilinx_vdma_config cfg;
+ struct dma_chan *tx_chan;
+
+ lp->eth_irq = platform_get_irq_optional(pdev, 0);
+ if (lp->eth_irq < 0 && lp->eth_irq != -ENXIO) {
+ ret = lp->eth_irq;
+ goto cleanup_clk;
+ }
+ tx_chan = dma_request_chan(lp->dev, "tx_chan0");
+ if (IS_ERR(tx_chan)) {
+ ret = PTR_ERR(tx_chan);
+ dev_err_probe(lp->dev, ret, "No Ethernet DMA (TX) channel found\n");
+ goto cleanup_clk;
+ }
+
+ cfg.reset = 1;
+ /* As name says VDMA but it has support for DMA channel reset */
+ ret = xilinx_vdma_channel_set_config(tx_chan, &cfg);
+ if (ret < 0) {
+ dev_err(&pdev->dev, "Reset channel failed\n");
+ dma_release_channel(tx_chan);
+ goto cleanup_clk;
+ }
+
+ dma_release_channel(tx_chan);
+ lp->use_dmaengine = 1;
}
- ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(addr_width));
- if (ret) {
- dev_err(&pdev->dev, "No suitable DMA available\n");
- goto cleanup_clk;
- }
-
+ if (lp->use_dmaengine)
+ ndev->netdev_ops = &axienet_netdev_dmaengine_ops;
+ else
+ ndev->netdev_ops = &axienet_netdev_ops;
/* Check for Ethernet core IRQ (optional) */
if (lp->eth_irq <= 0)
dev_info(&pdev->dev, "Ethernet core IRQ not defined\n");
@@ -2099,8 +2524,8 @@ static int axienet_probe(struct platform_device *pdev)
}
lp->coalesce_count_rx = XAXIDMA_DFT_RX_THRESHOLD;
- lp->coalesce_usec_rx = XAXIDMA_DFT_RX_USEC;
lp->coalesce_count_tx = XAXIDMA_DFT_TX_THRESHOLD;
+ lp->coalesce_usec_rx = XAXIDMA_DFT_RX_USEC;
lp->coalesce_usec_tx = XAXIDMA_DFT_TX_USEC;
ret = axienet_mdio_setup(lp);
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index c8da94a..88dd8a2 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -1086,20 +1086,8 @@ static int macvlan_ethtool_get_ts_info(struct net_device *dev,
struct ethtool_ts_info *info)
{
struct net_device *real_dev = macvlan_dev_real_dev(dev);
- const struct ethtool_ops *ops = real_dev->ethtool_ops;
- struct phy_device *phydev = real_dev->phydev;
- if (phy_has_tsinfo(phydev)) {
- return phy_ts_info(phydev, info);
- } else if (ops->get_ts_info) {
- return ops->get_ts_info(real_dev, info);
- } else {
- info->so_timestamping = SOF_TIMESTAMPING_RX_SOFTWARE |
- SOF_TIMESTAMPING_SOFTWARE;
- info->phc_index = -1;
- }
-
- return 0;
+ return ethtool_get_ts_info_by_layer(real_dev, info);
}
static netdev_features_t macvlan_fix_features(struct net_device *dev,
diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig
index 421d2b6..25cfc5d 100644
--- a/drivers/net/phy/Kconfig
+++ b/drivers/net/phy/Kconfig
@@ -96,10 +96,7 @@
Currently supports the:
- ADIN1100 - Robust,Industrial, Low Power 10BASE-T1L Ethernet PHY
-config AQUANTIA_PHY
- tristate "Aquantia PHYs"
- help
- Currently supports the Aquantia AQ1202, AQ2104, AQR105, AQR405
+source "drivers/net/phy/aquantia/Kconfig"
config AX88796B_PHY
tristate "Asix PHYs"
diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile
index c945ed9..f65e85c 100644
--- a/drivers/net/phy/Makefile
+++ b/drivers/net/phy/Makefile
@@ -35,11 +35,7 @@
obj-$(CONFIG_ADIN_PHY) += adin.o
obj-$(CONFIG_ADIN1100_PHY) += adin1100.o
obj-$(CONFIG_AMD_PHY) += amd.o
-aquantia-objs += aquantia_main.o
-ifdef CONFIG_HWMON
-aquantia-objs += aquantia_hwmon.o
-endif
-obj-$(CONFIG_AQUANTIA_PHY) += aquantia.o
+obj-$(CONFIG_AQUANTIA_PHY) += aquantia/
obj-$(CONFIG_AT803X_PHY) += at803x.o
obj-$(CONFIG_AX88796B_PHY) += ax88796b.o
obj-$(CONFIG_BCM54140_PHY) += bcm54140.o
diff --git a/drivers/net/phy/aquantia.h b/drivers/net/phy/aquantia.h
deleted file mode 100644
index c684b65..0000000
--- a/drivers/net/phy/aquantia.h
+++ /dev/null
@@ -1,16 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* HWMON driver for Aquantia PHY
- *
- * Author: Nikita Yushchenko <nikita.yoush@cogentembedded.com>
- * Author: Andrew Lunn <andrew@lunn.ch>
- * Author: Heiner Kallweit <hkallweit1@gmail.com>
- */
-
-#include <linux/device.h>
-#include <linux/phy.h>
-
-#if IS_REACHABLE(CONFIG_HWMON)
-int aqr_hwmon_probe(struct phy_device *phydev);
-#else
-static inline int aqr_hwmon_probe(struct phy_device *phydev) { return 0; }
-#endif
diff --git a/drivers/net/phy/aquantia/Kconfig b/drivers/net/phy/aquantia/Kconfig
new file mode 100644
index 0000000..a35de4b
--- /dev/null
+++ b/drivers/net/phy/aquantia/Kconfig
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0-only
+config AQUANTIA_PHY
+ tristate "Aquantia PHYs"
+ select CRC_CCITT
+ help
+ Currently supports the Aquantia AQ1202, AQ2104, AQR105, AQR405
diff --git a/drivers/net/phy/aquantia/Makefile b/drivers/net/phy/aquantia/Makefile
new file mode 100644
index 0000000..aa77fb6
--- /dev/null
+++ b/drivers/net/phy/aquantia/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
+aquantia-objs += aquantia_main.o aquantia_firmware.o
+ifdef CONFIG_HWMON
+aquantia-objs += aquantia_hwmon.o
+endif
+obj-$(CONFIG_AQUANTIA_PHY) += aquantia.o
diff --git a/drivers/net/phy/aquantia/aquantia.h b/drivers/net/phy/aquantia/aquantia.h
new file mode 100644
index 0000000..9ed3897
--- /dev/null
+++ b/drivers/net/phy/aquantia/aquantia.h
@@ -0,0 +1,117 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* HWMON driver for Aquantia PHY
+ *
+ * Author: Nikita Yushchenko <nikita.yoush@cogentembedded.com>
+ * Author: Andrew Lunn <andrew@lunn.ch>
+ * Author: Heiner Kallweit <hkallweit1@gmail.com>
+ */
+
+#include <linux/device.h>
+#include <linux/phy.h>
+
+/* Vendor specific 1, MDIO_MMD_VEND1 */
+#define VEND1_GLOBAL_SC 0x0
+#define VEND1_GLOBAL_SC_SOFT_RESET BIT(15)
+#define VEND1_GLOBAL_SC_LOW_POWER BIT(11)
+
+#define VEND1_GLOBAL_FW_ID 0x0020
+#define VEND1_GLOBAL_FW_ID_MAJOR GENMASK(15, 8)
+#define VEND1_GLOBAL_FW_ID_MINOR GENMASK(7, 0)
+
+#define VEND1_GLOBAL_MAILBOX_INTERFACE1 0x0200
+#define VEND1_GLOBAL_MAILBOX_INTERFACE1_EXECUTE BIT(15)
+#define VEND1_GLOBAL_MAILBOX_INTERFACE1_WRITE BIT(14)
+#define VEND1_GLOBAL_MAILBOX_INTERFACE1_CRC_RESET BIT(12)
+#define VEND1_GLOBAL_MAILBOX_INTERFACE1_BUSY BIT(8)
+
+#define VEND1_GLOBAL_MAILBOX_INTERFACE2 0x0201
+#define VEND1_GLOBAL_MAILBOX_INTERFACE3 0x0202
+#define VEND1_GLOBAL_MAILBOX_INTERFACE3_MSW_ADDR_MASK GENMASK(15, 0)
+#define VEND1_GLOBAL_MAILBOX_INTERFACE3_MSW_ADDR(x) FIELD_PREP(VEND1_GLOBAL_MAILBOX_INTERFACE3_MSW_ADDR_MASK, (u16)((x) >> 16))
+#define VEND1_GLOBAL_MAILBOX_INTERFACE4 0x0203
+#define VEND1_GLOBAL_MAILBOX_INTERFACE4_LSW_ADDR_MASK GENMASK(15, 2)
+#define VEND1_GLOBAL_MAILBOX_INTERFACE4_LSW_ADDR(x) FIELD_PREP(VEND1_GLOBAL_MAILBOX_INTERFACE4_LSW_ADDR_MASK, (u16)(x))
+
+#define VEND1_GLOBAL_MAILBOX_INTERFACE5 0x0204
+#define VEND1_GLOBAL_MAILBOX_INTERFACE5_MSW_DATA_MASK GENMASK(15, 0)
+#define VEND1_GLOBAL_MAILBOX_INTERFACE5_MSW_DATA(x) FIELD_PREP(VEND1_GLOBAL_MAILBOX_INTERFACE5_MSW_DATA_MASK, (u16)((x) >> 16))
+#define VEND1_GLOBAL_MAILBOX_INTERFACE6 0x0205
+#define VEND1_GLOBAL_MAILBOX_INTERFACE6_LSW_DATA_MASK GENMASK(15, 0)
+#define VEND1_GLOBAL_MAILBOX_INTERFACE6_LSW_DATA(x) FIELD_PREP(VEND1_GLOBAL_MAILBOX_INTERFACE6_LSW_DATA_MASK, (u16)(x))
+
+/* The following registers all have similar layouts; first the registers... */
+#define VEND1_GLOBAL_CFG_10M 0x0310
+#define VEND1_GLOBAL_CFG_100M 0x031b
+#define VEND1_GLOBAL_CFG_1G 0x031c
+#define VEND1_GLOBAL_CFG_2_5G 0x031d
+#define VEND1_GLOBAL_CFG_5G 0x031e
+#define VEND1_GLOBAL_CFG_10G 0x031f
+/* ...and now the fields */
+#define VEND1_GLOBAL_CFG_RATE_ADAPT GENMASK(8, 7)
+#define VEND1_GLOBAL_CFG_RATE_ADAPT_NONE 0
+#define VEND1_GLOBAL_CFG_RATE_ADAPT_USX 1
+#define VEND1_GLOBAL_CFG_RATE_ADAPT_PAUSE 2
+
+/* Vendor specific 1, MDIO_MMD_VEND2 */
+#define VEND1_GLOBAL_CONTROL2 0xc001
+#define VEND1_GLOBAL_CONTROL2_UP_RUN_STALL_RST BIT(15)
+#define VEND1_GLOBAL_CONTROL2_UP_RUN_STALL_OVD BIT(6)
+#define VEND1_GLOBAL_CONTROL2_UP_RUN_STALL BIT(0)
+
+#define VEND1_THERMAL_PROV_HIGH_TEMP_FAIL 0xc421
+#define VEND1_THERMAL_PROV_LOW_TEMP_FAIL 0xc422
+#define VEND1_THERMAL_PROV_HIGH_TEMP_WARN 0xc423
+#define VEND1_THERMAL_PROV_LOW_TEMP_WARN 0xc424
+#define VEND1_THERMAL_STAT1 0xc820
+#define VEND1_THERMAL_STAT2 0xc821
+#define VEND1_THERMAL_STAT2_VALID BIT(0)
+#define VEND1_GENERAL_STAT1 0xc830
+#define VEND1_GENERAL_STAT1_HIGH_TEMP_FAIL BIT(14)
+#define VEND1_GENERAL_STAT1_LOW_TEMP_FAIL BIT(13)
+#define VEND1_GENERAL_STAT1_HIGH_TEMP_WARN BIT(12)
+#define VEND1_GENERAL_STAT1_LOW_TEMP_WARN BIT(11)
+
+#define VEND1_GLOBAL_GEN_STAT2 0xc831
+#define VEND1_GLOBAL_GEN_STAT2_OP_IN_PROG BIT(15)
+
+#define VEND1_GLOBAL_RSVD_STAT1 0xc885
+#define VEND1_GLOBAL_RSVD_STAT1_FW_BUILD_ID GENMASK(7, 4)
+#define VEND1_GLOBAL_RSVD_STAT1_PROV_ID GENMASK(3, 0)
+
+#define VEND1_GLOBAL_RSVD_STAT9 0xc88d
+#define VEND1_GLOBAL_RSVD_STAT9_MODE GENMASK(7, 0)
+#define VEND1_GLOBAL_RSVD_STAT9_1000BT2 0x23
+
+#define VEND1_GLOBAL_INT_STD_STATUS 0xfc00
+#define VEND1_GLOBAL_INT_VEND_STATUS 0xfc01
+
+#define VEND1_GLOBAL_INT_STD_MASK 0xff00
+#define VEND1_GLOBAL_INT_STD_MASK_PMA1 BIT(15)
+#define VEND1_GLOBAL_INT_STD_MASK_PMA2 BIT(14)
+#define VEND1_GLOBAL_INT_STD_MASK_PCS1 BIT(13)
+#define VEND1_GLOBAL_INT_STD_MASK_PCS2 BIT(12)
+#define VEND1_GLOBAL_INT_STD_MASK_PCS3 BIT(11)
+#define VEND1_GLOBAL_INT_STD_MASK_PHY_XS1 BIT(10)
+#define VEND1_GLOBAL_INT_STD_MASK_PHY_XS2 BIT(9)
+#define VEND1_GLOBAL_INT_STD_MASK_AN1 BIT(8)
+#define VEND1_GLOBAL_INT_STD_MASK_AN2 BIT(7)
+#define VEND1_GLOBAL_INT_STD_MASK_GBE BIT(6)
+#define VEND1_GLOBAL_INT_STD_MASK_ALL BIT(0)
+
+#define VEND1_GLOBAL_INT_VEND_MASK 0xff01
+#define VEND1_GLOBAL_INT_VEND_MASK_PMA BIT(15)
+#define VEND1_GLOBAL_INT_VEND_MASK_PCS BIT(14)
+#define VEND1_GLOBAL_INT_VEND_MASK_PHY_XS BIT(13)
+#define VEND1_GLOBAL_INT_VEND_MASK_AN BIT(12)
+#define VEND1_GLOBAL_INT_VEND_MASK_GBE BIT(11)
+#define VEND1_GLOBAL_INT_VEND_MASK_GLOBAL1 BIT(2)
+#define VEND1_GLOBAL_INT_VEND_MASK_GLOBAL2 BIT(1)
+#define VEND1_GLOBAL_INT_VEND_MASK_GLOBAL3 BIT(0)
+
+#if IS_REACHABLE(CONFIG_HWMON)
+int aqr_hwmon_probe(struct phy_device *phydev);
+#else
+static inline int aqr_hwmon_probe(struct phy_device *phydev) { return 0; }
+#endif
+
+int aqr_firmware_load(struct phy_device *phydev);
diff --git a/drivers/net/phy/aquantia/aquantia_firmware.c b/drivers/net/phy/aquantia/aquantia_firmware.c
new file mode 100644
index 0000000..c5f292b
--- /dev/null
+++ b/drivers/net/phy/aquantia/aquantia_firmware.c
@@ -0,0 +1,370 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bitfield.h>
+#include <linux/of.h>
+#include <linux/firmware.h>
+#include <linux/crc-ccitt.h>
+#include <linux/nvmem-consumer.h>
+
+#include <asm/unaligned.h>
+
+#include "aquantia.h"
+
+#define UP_RESET_SLEEP 100
+
+/* addresses of memory segments in the phy */
+#define DRAM_BASE_ADDR 0x3FFE0000
+#define IRAM_BASE_ADDR 0x40000000
+
+/* firmware image format constants */
+#define VERSION_STRING_SIZE 0x40
+#define VERSION_STRING_OFFSET 0x0200
+/* primary offset is written at an offset from the start of the fw blob */
+#define PRIMARY_OFFSET_OFFSET 0x8
+/* primary offset needs to be then added to a base offset */
+#define PRIMARY_OFFSET_SHIFT 12
+#define PRIMARY_OFFSET(x) ((x) << PRIMARY_OFFSET_SHIFT)
+#define HEADER_OFFSET 0x300
+
+struct aqr_fw_header {
+ u32 padding;
+ u8 iram_offset[3];
+ u8 iram_size[3];
+ u8 dram_offset[3];
+ u8 dram_size[3];
+} __packed;
+
+enum aqr_fw_src {
+ AQR_FW_SRC_NVMEM = 0,
+ AQR_FW_SRC_FS,
+};
+
+static const char * const aqr_fw_src_string[] = {
+ [AQR_FW_SRC_NVMEM] = "NVMEM",
+ [AQR_FW_SRC_FS] = "FS",
+};
+
+/* AQR firmware doesn't have fixed offsets for iram and dram section
+ * but instead provide an header with the offset to use on reading
+ * and parsing the firmware.
+ *
+ * AQR firmware can't be trusted and each offset is validated to be
+ * not negative and be in the size of the firmware itself.
+ */
+static bool aqr_fw_validate_get(size_t size, size_t offset, size_t get_size)
+{
+ return offset + get_size <= size;
+}
+
+static int aqr_fw_get_be16(const u8 *data, size_t offset, size_t size, u16 *value)
+{
+ if (!aqr_fw_validate_get(size, offset, sizeof(u16)))
+ return -EINVAL;
+
+ *value = get_unaligned_be16(data + offset);
+
+ return 0;
+}
+
+static int aqr_fw_get_le16(const u8 *data, size_t offset, size_t size, u16 *value)
+{
+ if (!aqr_fw_validate_get(size, offset, sizeof(u16)))
+ return -EINVAL;
+
+ *value = get_unaligned_le16(data + offset);
+
+ return 0;
+}
+
+static int aqr_fw_get_le24(const u8 *data, size_t offset, size_t size, u32 *value)
+{
+ if (!aqr_fw_validate_get(size, offset, sizeof(u8) * 3))
+ return -EINVAL;
+
+ *value = get_unaligned_le24(data + offset);
+
+ return 0;
+}
+
+/* load data into the phy's memory */
+static int aqr_fw_load_memory(struct phy_device *phydev, u32 addr,
+ const u8 *data, size_t len)
+{
+ u16 crc = 0, up_crc;
+ size_t pos;
+
+ /* PHY expect addr in LE */
+ addr = (__force u32)cpu_to_le32(addr);
+
+ phy_write_mmd(phydev, MDIO_MMD_VEND1,
+ VEND1_GLOBAL_MAILBOX_INTERFACE1,
+ VEND1_GLOBAL_MAILBOX_INTERFACE1_CRC_RESET);
+ phy_write_mmd(phydev, MDIO_MMD_VEND1,
+ VEND1_GLOBAL_MAILBOX_INTERFACE3,
+ VEND1_GLOBAL_MAILBOX_INTERFACE3_MSW_ADDR(addr));
+ phy_write_mmd(phydev, MDIO_MMD_VEND1,
+ VEND1_GLOBAL_MAILBOX_INTERFACE4,
+ VEND1_GLOBAL_MAILBOX_INTERFACE4_LSW_ADDR(addr));
+
+ /* We assume and enforce the size to be word aligned.
+ * If a firmware that is not word aligned is found, please report upstream.
+ */
+ for (pos = 0; pos < len; pos += sizeof(u32)) {
+ u32 word;
+
+ /* FW data is always stored in little-endian */
+ word = get_unaligned((const u32 *)(data + pos));
+
+ phy_write_mmd(phydev, MDIO_MMD_VEND1, VEND1_GLOBAL_MAILBOX_INTERFACE5,
+ VEND1_GLOBAL_MAILBOX_INTERFACE5_MSW_DATA(word));
+ phy_write_mmd(phydev, MDIO_MMD_VEND1, VEND1_GLOBAL_MAILBOX_INTERFACE6,
+ VEND1_GLOBAL_MAILBOX_INTERFACE6_LSW_DATA(word));
+
+ phy_write_mmd(phydev, MDIO_MMD_VEND1, VEND1_GLOBAL_MAILBOX_INTERFACE1,
+ VEND1_GLOBAL_MAILBOX_INTERFACE1_EXECUTE |
+ VEND1_GLOBAL_MAILBOX_INTERFACE1_WRITE);
+
+ /* calculate CRC as we load data to the mailbox.
+ * We convert word to big-endian as PHY is BE and mailbox will
+ * return a BE CRC.
+ */
+ word = (__force u32)cpu_to_be32(word);
+ crc = crc_ccitt_false(crc, (u8 *)&word, sizeof(word));
+ }
+
+ up_crc = phy_read_mmd(phydev, MDIO_MMD_VEND1, VEND1_GLOBAL_MAILBOX_INTERFACE2);
+ if (crc != up_crc) {
+ phydev_err(phydev, "CRC mismatch: calculated 0x%04x PHY 0x%04x\n",
+ crc, up_crc);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int aqr_fw_boot(struct phy_device *phydev, const u8 *data, size_t size,
+ enum aqr_fw_src fw_src)
+{
+ u16 calculated_crc, read_crc, read_primary_offset;
+ u32 iram_offset = 0, iram_size = 0;
+ u32 dram_offset = 0, dram_size = 0;
+ char version[VERSION_STRING_SIZE];
+ u32 primary_offset = 0;
+ int ret;
+
+ /* extract saved CRC at the end of the fw
+ * CRC is saved in big-endian as PHY is BE
+ */
+ ret = aqr_fw_get_be16(data, size - sizeof(u16), size, &read_crc);
+ if (ret) {
+ phydev_err(phydev, "bad firmware CRC in firmware\n");
+ return ret;
+ }
+ calculated_crc = crc_ccitt_false(0, data, size - sizeof(u16));
+ if (read_crc != calculated_crc) {
+ phydev_err(phydev, "bad firmware CRC: file 0x%04x calculated 0x%04x\n",
+ read_crc, calculated_crc);
+ return -EINVAL;
+ }
+
+ /* Get the primary offset to extract DRAM and IRAM sections. */
+ ret = aqr_fw_get_le16(data, PRIMARY_OFFSET_OFFSET, size, &read_primary_offset);
+ if (ret) {
+ phydev_err(phydev, "bad primary offset in firmware\n");
+ return ret;
+ }
+ primary_offset = PRIMARY_OFFSET(read_primary_offset);
+
+ /* Find the DRAM and IRAM sections within the firmware file.
+ * Make sure the fw_header is correctly in the firmware.
+ */
+ if (!aqr_fw_validate_get(size, primary_offset + HEADER_OFFSET,
+ sizeof(struct aqr_fw_header))) {
+ phydev_err(phydev, "bad fw_header in firmware\n");
+ return -EINVAL;
+ }
+
+ /* offset are in LE and values needs to be converted to cpu endian */
+ ret = aqr_fw_get_le24(data, primary_offset + HEADER_OFFSET +
+ offsetof(struct aqr_fw_header, iram_offset),
+ size, &iram_offset);
+ if (ret) {
+ phydev_err(phydev, "bad iram offset in firmware\n");
+ return ret;
+ }
+ ret = aqr_fw_get_le24(data, primary_offset + HEADER_OFFSET +
+ offsetof(struct aqr_fw_header, iram_size),
+ size, &iram_size);
+ if (ret) {
+ phydev_err(phydev, "invalid iram size in firmware\n");
+ return ret;
+ }
+ ret = aqr_fw_get_le24(data, primary_offset + HEADER_OFFSET +
+ offsetof(struct aqr_fw_header, dram_offset),
+ size, &dram_offset);
+ if (ret) {
+ phydev_err(phydev, "bad dram offset in firmware\n");
+ return ret;
+ }
+ ret = aqr_fw_get_le24(data, primary_offset + HEADER_OFFSET +
+ offsetof(struct aqr_fw_header, dram_size),
+ size, &dram_size);
+ if (ret) {
+ phydev_err(phydev, "invalid dram size in firmware\n");
+ return ret;
+ }
+
+ /* Increment the offset with the primary offset.
+ * Validate iram/dram offset and size.
+ */
+ iram_offset += primary_offset;
+ if (iram_size % sizeof(u32)) {
+ phydev_err(phydev, "iram size if not aligned to word size. Please report this upstream!\n");
+ return -EINVAL;
+ }
+ if (!aqr_fw_validate_get(size, iram_offset, iram_size)) {
+ phydev_err(phydev, "invalid iram offset for iram size\n");
+ return -EINVAL;
+ }
+
+ dram_offset += primary_offset;
+ if (dram_size % sizeof(u32)) {
+ phydev_err(phydev, "dram size if not aligned to word size. Please report this upstream!\n");
+ return -EINVAL;
+ }
+ if (!aqr_fw_validate_get(size, dram_offset, dram_size)) {
+ phydev_err(phydev, "invalid iram offset for iram size\n");
+ return -EINVAL;
+ }
+
+ phydev_dbg(phydev, "primary %d IRAM offset=%d size=%d DRAM offset=%d size=%d\n",
+ primary_offset, iram_offset, iram_size, dram_offset, dram_size);
+
+ if (!aqr_fw_validate_get(size, dram_offset + VERSION_STRING_OFFSET,
+ VERSION_STRING_SIZE)) {
+ phydev_err(phydev, "invalid version in firmware\n");
+ return -EINVAL;
+ }
+ strscpy(version, (char *)data + dram_offset + VERSION_STRING_OFFSET,
+ VERSION_STRING_SIZE);
+ if (version[0] == '\0') {
+ phydev_err(phydev, "invalid version in firmware\n");
+ return -EINVAL;
+ }
+ phydev_info(phydev, "loading firmware version '%s' from '%s'\n", version,
+ aqr_fw_src_string[fw_src]);
+
+ /* stall the microcprocessor */
+ phy_write_mmd(phydev, MDIO_MMD_VEND1, VEND1_GLOBAL_CONTROL2,
+ VEND1_GLOBAL_CONTROL2_UP_RUN_STALL | VEND1_GLOBAL_CONTROL2_UP_RUN_STALL_OVD);
+
+ phydev_dbg(phydev, "loading DRAM 0x%08x from offset=%d size=%d\n",
+ DRAM_BASE_ADDR, dram_offset, dram_size);
+ ret = aqr_fw_load_memory(phydev, DRAM_BASE_ADDR, data + dram_offset,
+ dram_size);
+ if (ret)
+ return ret;
+
+ phydev_dbg(phydev, "loading IRAM 0x%08x from offset=%d size=%d\n",
+ IRAM_BASE_ADDR, iram_offset, iram_size);
+ ret = aqr_fw_load_memory(phydev, IRAM_BASE_ADDR, data + iram_offset,
+ iram_size);
+ if (ret)
+ return ret;
+
+ /* make sure soft reset and low power mode are clear */
+ phy_clear_bits_mmd(phydev, MDIO_MMD_VEND1, VEND1_GLOBAL_SC,
+ VEND1_GLOBAL_SC_SOFT_RESET | VEND1_GLOBAL_SC_LOW_POWER);
+
+ /* Release the microprocessor. UP_RESET must be held for 100 usec. */
+ phy_write_mmd(phydev, MDIO_MMD_VEND1, VEND1_GLOBAL_CONTROL2,
+ VEND1_GLOBAL_CONTROL2_UP_RUN_STALL |
+ VEND1_GLOBAL_CONTROL2_UP_RUN_STALL_OVD |
+ VEND1_GLOBAL_CONTROL2_UP_RUN_STALL_RST);
+ usleep_range(UP_RESET_SLEEP, UP_RESET_SLEEP * 2);
+
+ phy_write_mmd(phydev, MDIO_MMD_VEND1, VEND1_GLOBAL_CONTROL2,
+ VEND1_GLOBAL_CONTROL2_UP_RUN_STALL_OVD);
+
+ return 0;
+}
+
+static int aqr_firmware_load_nvmem(struct phy_device *phydev)
+{
+ struct nvmem_cell *cell;
+ size_t size;
+ u8 *buf;
+ int ret;
+
+ cell = nvmem_cell_get(&phydev->mdio.dev, "firmware");
+ if (IS_ERR(cell))
+ return PTR_ERR(cell);
+
+ buf = nvmem_cell_read(cell, &size);
+ if (IS_ERR(buf)) {
+ ret = PTR_ERR(buf);
+ goto exit;
+ }
+
+ ret = aqr_fw_boot(phydev, buf, size, AQR_FW_SRC_NVMEM);
+ if (ret)
+ phydev_err(phydev, "firmware loading failed: %d\n", ret);
+
+ kfree(buf);
+exit:
+ nvmem_cell_put(cell);
+
+ return ret;
+}
+
+static int aqr_firmware_load_fs(struct phy_device *phydev)
+{
+ struct device *dev = &phydev->mdio.dev;
+ const struct firmware *fw;
+ const char *fw_name;
+ int ret;
+
+ ret = of_property_read_string(dev->of_node, "firmware-name",
+ &fw_name);
+ if (ret)
+ return ret;
+
+ ret = request_firmware(&fw, fw_name, dev);
+ if (ret) {
+ phydev_err(phydev, "failed to find FW file %s (%d)\n",
+ fw_name, ret);
+ return ret;
+ }
+
+ ret = aqr_fw_boot(phydev, fw->data, fw->size, AQR_FW_SRC_FS);
+ if (ret)
+ phydev_err(phydev, "firmware loading failed: %d\n", ret);
+
+ release_firmware(fw);
+
+ return ret;
+}
+
+int aqr_firmware_load(struct phy_device *phydev)
+{
+ int ret;
+
+ /* Check if the firmware is not already loaded by pooling
+ * the current version returned by the PHY. If 0 is returned,
+ * no firmware is loaded.
+ */
+ ret = phy_read_mmd(phydev, MDIO_MMD_VEND1, VEND1_GLOBAL_FW_ID);
+ if (ret > 0)
+ goto exit;
+
+ ret = aqr_firmware_load_nvmem(phydev);
+ if (!ret)
+ goto exit;
+
+ ret = aqr_firmware_load_fs(phydev);
+ if (ret)
+ return ret;
+
+exit:
+ return 0;
+}
diff --git a/drivers/net/phy/aquantia_hwmon.c b/drivers/net/phy/aquantia/aquantia_hwmon.c
similarity index 89%
rename from drivers/net/phy/aquantia_hwmon.c
rename to drivers/net/phy/aquantia/aquantia_hwmon.c
index 0da451e..7b3c49c 100644
--- a/drivers/net/phy/aquantia_hwmon.c
+++ b/drivers/net/phy/aquantia/aquantia_hwmon.c
@@ -13,20 +13,6 @@
#include "aquantia.h"
-/* Vendor specific 1, MDIO_MMD_VEND2 */
-#define VEND1_THERMAL_PROV_HIGH_TEMP_FAIL 0xc421
-#define VEND1_THERMAL_PROV_LOW_TEMP_FAIL 0xc422
-#define VEND1_THERMAL_PROV_HIGH_TEMP_WARN 0xc423
-#define VEND1_THERMAL_PROV_LOW_TEMP_WARN 0xc424
-#define VEND1_THERMAL_STAT1 0xc820
-#define VEND1_THERMAL_STAT2 0xc821
-#define VEND1_THERMAL_STAT2_VALID BIT(0)
-#define VEND1_GENERAL_STAT1 0xc830
-#define VEND1_GENERAL_STAT1_HIGH_TEMP_FAIL BIT(14)
-#define VEND1_GENERAL_STAT1_LOW_TEMP_FAIL BIT(13)
-#define VEND1_GENERAL_STAT1_HIGH_TEMP_WARN BIT(12)
-#define VEND1_GENERAL_STAT1_LOW_TEMP_WARN BIT(11)
-
#if IS_REACHABLE(CONFIG_HWMON)
static umode_t aqr_hwmon_is_visible(const void *data,
diff --git a/drivers/net/phy/aquantia_main.c b/drivers/net/phy/aquantia/aquantia_main.c
similarity index 91%
rename from drivers/net/phy/aquantia_main.c
rename to drivers/net/phy/aquantia/aquantia_main.c
index 334a690..cc4a977 100644
--- a/drivers/net/phy/aquantia_main.c
+++ b/drivers/net/phy/aquantia/aquantia_main.c
@@ -91,61 +91,6 @@
#define MDIO_C22EXT_STAT_SGMII_TX_FRAME_ALIGN_ERR 0xd31a
#define MDIO_C22EXT_STAT_SGMII_TX_RUNT_FRAMES 0xd31b
-/* Vendor specific 1, MDIO_MMD_VEND1 */
-#define VEND1_GLOBAL_FW_ID 0x0020
-#define VEND1_GLOBAL_FW_ID_MAJOR GENMASK(15, 8)
-#define VEND1_GLOBAL_FW_ID_MINOR GENMASK(7, 0)
-
-#define VEND1_GLOBAL_GEN_STAT2 0xc831
-#define VEND1_GLOBAL_GEN_STAT2_OP_IN_PROG BIT(15)
-
-/* The following registers all have similar layouts; first the registers... */
-#define VEND1_GLOBAL_CFG_10M 0x0310
-#define VEND1_GLOBAL_CFG_100M 0x031b
-#define VEND1_GLOBAL_CFG_1G 0x031c
-#define VEND1_GLOBAL_CFG_2_5G 0x031d
-#define VEND1_GLOBAL_CFG_5G 0x031e
-#define VEND1_GLOBAL_CFG_10G 0x031f
-/* ...and now the fields */
-#define VEND1_GLOBAL_CFG_RATE_ADAPT GENMASK(8, 7)
-#define VEND1_GLOBAL_CFG_RATE_ADAPT_NONE 0
-#define VEND1_GLOBAL_CFG_RATE_ADAPT_USX 1
-#define VEND1_GLOBAL_CFG_RATE_ADAPT_PAUSE 2
-
-#define VEND1_GLOBAL_RSVD_STAT1 0xc885
-#define VEND1_GLOBAL_RSVD_STAT1_FW_BUILD_ID GENMASK(7, 4)
-#define VEND1_GLOBAL_RSVD_STAT1_PROV_ID GENMASK(3, 0)
-
-#define VEND1_GLOBAL_RSVD_STAT9 0xc88d
-#define VEND1_GLOBAL_RSVD_STAT9_MODE GENMASK(7, 0)
-#define VEND1_GLOBAL_RSVD_STAT9_1000BT2 0x23
-
-#define VEND1_GLOBAL_INT_STD_STATUS 0xfc00
-#define VEND1_GLOBAL_INT_VEND_STATUS 0xfc01
-
-#define VEND1_GLOBAL_INT_STD_MASK 0xff00
-#define VEND1_GLOBAL_INT_STD_MASK_PMA1 BIT(15)
-#define VEND1_GLOBAL_INT_STD_MASK_PMA2 BIT(14)
-#define VEND1_GLOBAL_INT_STD_MASK_PCS1 BIT(13)
-#define VEND1_GLOBAL_INT_STD_MASK_PCS2 BIT(12)
-#define VEND1_GLOBAL_INT_STD_MASK_PCS3 BIT(11)
-#define VEND1_GLOBAL_INT_STD_MASK_PHY_XS1 BIT(10)
-#define VEND1_GLOBAL_INT_STD_MASK_PHY_XS2 BIT(9)
-#define VEND1_GLOBAL_INT_STD_MASK_AN1 BIT(8)
-#define VEND1_GLOBAL_INT_STD_MASK_AN2 BIT(7)
-#define VEND1_GLOBAL_INT_STD_MASK_GBE BIT(6)
-#define VEND1_GLOBAL_INT_STD_MASK_ALL BIT(0)
-
-#define VEND1_GLOBAL_INT_VEND_MASK 0xff01
-#define VEND1_GLOBAL_INT_VEND_MASK_PMA BIT(15)
-#define VEND1_GLOBAL_INT_VEND_MASK_PCS BIT(14)
-#define VEND1_GLOBAL_INT_VEND_MASK_PHY_XS BIT(13)
-#define VEND1_GLOBAL_INT_VEND_MASK_AN BIT(12)
-#define VEND1_GLOBAL_INT_VEND_MASK_GBE BIT(11)
-#define VEND1_GLOBAL_INT_VEND_MASK_GLOBAL1 BIT(2)
-#define VEND1_GLOBAL_INT_VEND_MASK_GLOBAL2 BIT(1)
-#define VEND1_GLOBAL_INT_VEND_MASK_GLOBAL3 BIT(0)
-
/* Sleep and timeout for checking if the Processor-Intensive
* MDIO operation is finished
*/
@@ -713,11 +658,17 @@ static int aqr107_resume(struct phy_device *phydev)
static int aqr107_probe(struct phy_device *phydev)
{
+ int ret;
+
phydev->priv = devm_kzalloc(&phydev->mdio.dev,
sizeof(struct aqr107_priv), GFP_KERNEL);
if (!phydev->priv)
return -ENOMEM;
+ ret = aqr_firmware_load(phydev);
+ if (ret)
+ return ret;
+
return aqr_hwmon_probe(phydev);
}
diff --git a/drivers/net/phy/bcm-phy-ptp.c b/drivers/net/phy/bcm-phy-ptp.c
index cb4b91a..617d384 100644
--- a/drivers/net/phy/bcm-phy-ptp.c
+++ b/drivers/net/phy/bcm-phy-ptp.c
@@ -782,16 +782,13 @@ static void bcm_ptp_txtstamp(struct mii_timestamper *mii_ts,
}
static int bcm_ptp_hwtstamp(struct mii_timestamper *mii_ts,
- struct ifreq *ifr)
+ struct kernel_hwtstamp_config *cfg,
+ struct netlink_ext_ack *extack)
{
struct bcm_ptp_private *priv = mii2priv(mii_ts);
- struct hwtstamp_config cfg;
u16 mode, ctrl;
- if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg)))
- return -EFAULT;
-
- switch (cfg.rx_filter) {
+ switch (cfg->rx_filter) {
case HWTSTAMP_FILTER_NONE:
priv->hwts_rx = false;
break;
@@ -804,14 +801,14 @@ static int bcm_ptp_hwtstamp(struct mii_timestamper *mii_ts,
case HWTSTAMP_FILTER_PTP_V2_EVENT:
case HWTSTAMP_FILTER_PTP_V2_SYNC:
case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
- cfg.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
+ cfg->rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
priv->hwts_rx = true;
break;
default:
return -ERANGE;
}
- priv->tx_type = cfg.tx_type;
+ priv->tx_type = cfg->tx_type;
ctrl = priv->hwts_rx ? SLICE_RX_EN : 0;
ctrl |= priv->tx_type != HWTSTAMP_TX_OFF ? SLICE_TX_EN : 0;
@@ -840,7 +837,7 @@ static int bcm_ptp_hwtstamp(struct mii_timestamper *mii_ts,
/* purge existing data */
skb_queue_purge(&priv->tx_queue);
- return copy_to_user(ifr->ifr_data, &cfg, sizeof(cfg)) ? -EFAULT : 0;
+ return 0;
}
static int bcm_ptp_ts_info(struct mii_timestamper *mii_ts,
diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c
index 3a62710..312a8bb 100644
--- a/drivers/net/phy/broadcom.c
+++ b/drivers/net/phy/broadcom.c
@@ -1135,6 +1135,8 @@ static struct phy_driver broadcom_drivers[] = {
.handle_interrupt = bcm_phy_handle_interrupt,
.link_change_notify = bcm54xx_link_change_notify,
.led_brightness_set = bcm_phy_led_brightness_set,
+ .suspend = bcm54xx_suspend,
+ .resume = bcm54xx_resume,
}, {
.phy_id = PHY_ID_BCM54616S,
.phy_id_mask = 0xfffffff0,
diff --git a/drivers/net/phy/dp83640.c b/drivers/net/phy/dp83640.c
index 2657be7..5c42c47 100644
--- a/drivers/net/phy/dp83640.c
+++ b/drivers/net/phy/dp83640.c
@@ -1207,22 +1207,20 @@ static irqreturn_t dp83640_handle_interrupt(struct phy_device *phydev)
return IRQ_HANDLED;
}
-static int dp83640_hwtstamp(struct mii_timestamper *mii_ts, struct ifreq *ifr)
+static int dp83640_hwtstamp(struct mii_timestamper *mii_ts,
+ struct kernel_hwtstamp_config *cfg,
+ struct netlink_ext_ack *extack)
{
struct dp83640_private *dp83640 =
container_of(mii_ts, struct dp83640_private, mii_ts);
- struct hwtstamp_config cfg;
u16 txcfg0, rxcfg0;
- if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg)))
- return -EFAULT;
-
- if (cfg.tx_type < 0 || cfg.tx_type > HWTSTAMP_TX_ONESTEP_SYNC)
+ if (cfg->tx_type < 0 || cfg->tx_type > HWTSTAMP_TX_ONESTEP_SYNC)
return -ERANGE;
- dp83640->hwts_tx_en = cfg.tx_type;
+ dp83640->hwts_tx_en = cfg->tx_type;
- switch (cfg.rx_filter) {
+ switch (cfg->rx_filter) {
case HWTSTAMP_FILTER_NONE:
dp83640->hwts_rx_en = 0;
dp83640->layer = 0;
@@ -1234,7 +1232,7 @@ static int dp83640_hwtstamp(struct mii_timestamper *mii_ts, struct ifreq *ifr)
dp83640->hwts_rx_en = 1;
dp83640->layer = PTP_CLASS_L4;
dp83640->version = PTP_CLASS_V1;
- cfg.rx_filter = HWTSTAMP_FILTER_PTP_V1_L4_EVENT;
+ cfg->rx_filter = HWTSTAMP_FILTER_PTP_V1_L4_EVENT;
break;
case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
@@ -1242,7 +1240,7 @@ static int dp83640_hwtstamp(struct mii_timestamper *mii_ts, struct ifreq *ifr)
dp83640->hwts_rx_en = 1;
dp83640->layer = PTP_CLASS_L4;
dp83640->version = PTP_CLASS_V2;
- cfg.rx_filter = HWTSTAMP_FILTER_PTP_V2_L4_EVENT;
+ cfg->rx_filter = HWTSTAMP_FILTER_PTP_V2_L4_EVENT;
break;
case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
@@ -1250,7 +1248,7 @@ static int dp83640_hwtstamp(struct mii_timestamper *mii_ts, struct ifreq *ifr)
dp83640->hwts_rx_en = 1;
dp83640->layer = PTP_CLASS_L2;
dp83640->version = PTP_CLASS_V2;
- cfg.rx_filter = HWTSTAMP_FILTER_PTP_V2_L2_EVENT;
+ cfg->rx_filter = HWTSTAMP_FILTER_PTP_V2_L2_EVENT;
break;
case HWTSTAMP_FILTER_PTP_V2_EVENT:
case HWTSTAMP_FILTER_PTP_V2_SYNC:
@@ -1258,7 +1256,7 @@ static int dp83640_hwtstamp(struct mii_timestamper *mii_ts, struct ifreq *ifr)
dp83640->hwts_rx_en = 1;
dp83640->layer = PTP_CLASS_L4 | PTP_CLASS_L2;
dp83640->version = PTP_CLASS_V2;
- cfg.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
+ cfg->rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
break;
default:
return -ERANGE;
@@ -1292,7 +1290,7 @@ static int dp83640_hwtstamp(struct mii_timestamper *mii_ts, struct ifreq *ifr)
mutex_unlock(&dp83640->clock->extreg_lock);
- return copy_to_user(ifr->ifr_data, &cfg, sizeof(cfg)) ? -EFAULT : 0;
+ return 0;
}
static void rx_timestamp_work(struct work_struct *work)
diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index 08e3915..bd4cd08 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -2395,24 +2395,22 @@ static void lan8814_flush_fifo(struct phy_device *phydev, bool egress)
lanphy_read_page_reg(phydev, 5, PTP_TSU_INT_STS);
}
-static int lan8814_hwtstamp(struct mii_timestamper *mii_ts, struct ifreq *ifr)
+static int lan8814_hwtstamp(struct mii_timestamper *mii_ts,
+ struct kernel_hwtstamp_config *config,
+ struct netlink_ext_ack *extack)
{
struct kszphy_ptp_priv *ptp_priv =
container_of(mii_ts, struct kszphy_ptp_priv, mii_ts);
struct phy_device *phydev = ptp_priv->phydev;
struct lan8814_shared_priv *shared = phydev->shared->priv;
struct lan8814_ptp_rx_ts *rx_ts, *tmp;
- struct hwtstamp_config config;
int txcfg = 0, rxcfg = 0;
int pkt_ts_enable;
- if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
- return -EFAULT;
+ ptp_priv->hwts_tx_type = config->tx_type;
+ ptp_priv->rx_filter = config->rx_filter;
- ptp_priv->hwts_tx_type = config.tx_type;
- ptp_priv->rx_filter = config.rx_filter;
-
- switch (config.rx_filter) {
+ switch (config->rx_filter) {
case HWTSTAMP_FILTER_NONE:
ptp_priv->layer = 0;
ptp_priv->version = 0;
@@ -2458,13 +2456,13 @@ static int lan8814_hwtstamp(struct mii_timestamper *mii_ts, struct ifreq *ifr)
lanphy_write_page_reg(ptp_priv->phydev, 5, PTP_TX_MOD,
PTP_TX_MOD_TX_PTP_SYNC_TS_INSERT_);
- if (config.rx_filter != HWTSTAMP_FILTER_NONE)
+ if (config->rx_filter != HWTSTAMP_FILTER_NONE)
lan8814_config_ts_intr(ptp_priv->phydev, true);
else
lan8814_config_ts_intr(ptp_priv->phydev, false);
mutex_lock(&shared->shared_lock);
- if (config.rx_filter != HWTSTAMP_FILTER_NONE)
+ if (config->rx_filter != HWTSTAMP_FILTER_NONE)
shared->ref++;
else
shared->ref--;
@@ -2488,7 +2486,7 @@ static int lan8814_hwtstamp(struct mii_timestamper *mii_ts, struct ifreq *ifr)
lan8814_flush_fifo(ptp_priv->phydev, false);
lan8814_flush_fifo(ptp_priv->phydev, true);
- return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ? -EFAULT : 0;
+ return 0;
}
static void lan8814_txtstamp(struct mii_timestamper *mii_ts,
@@ -3631,12 +3629,8 @@ static int lan8841_ts_info(struct mii_timestamper *mii_ts,
info->phc_index = ptp_priv->ptp_clock ?
ptp_clock_index(ptp_priv->ptp_clock) : -1;
- if (info->phc_index == -1) {
- info->so_timestamping |= SOF_TIMESTAMPING_TX_SOFTWARE |
- SOF_TIMESTAMPING_RX_SOFTWARE |
- SOF_TIMESTAMPING_SOFTWARE;
+ if (info->phc_index == -1)
return 0;
- }
info->so_timestamping = SOF_TIMESTAMPING_TX_HARDWARE |
SOF_TIMESTAMPING_RX_HARDWARE |
@@ -3703,21 +3697,19 @@ static void lan8841_ptp_enable_processing(struct kszphy_ptp_priv *ptp_priv,
#define LAN8841_PTP_TX_TIMESTAMP_EN 443
#define LAN8841_PTP_TX_MOD 445
-static int lan8841_hwtstamp(struct mii_timestamper *mii_ts, struct ifreq *ifr)
+static int lan8841_hwtstamp(struct mii_timestamper *mii_ts,
+ struct kernel_hwtstamp_config *config,
+ struct netlink_ext_ack *extack)
{
struct kszphy_ptp_priv *ptp_priv = container_of(mii_ts, struct kszphy_ptp_priv, mii_ts);
struct phy_device *phydev = ptp_priv->phydev;
- struct hwtstamp_config config;
int txcfg = 0, rxcfg = 0;
int pkt_ts_enable;
- if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
- return -EFAULT;
+ ptp_priv->hwts_tx_type = config->tx_type;
+ ptp_priv->rx_filter = config->rx_filter;
- ptp_priv->hwts_tx_type = config.tx_type;
- ptp_priv->rx_filter = config.rx_filter;
-
- switch (config.rx_filter) {
+ switch (config->rx_filter) {
case HWTSTAMP_FILTER_NONE:
ptp_priv->layer = 0;
ptp_priv->version = 0;
@@ -3771,13 +3763,13 @@ static int lan8841_hwtstamp(struct mii_timestamper *mii_ts, struct ifreq *ifr)
/* Now enable/disable the timestamping */
lan8841_ptp_enable_processing(ptp_priv,
- config.rx_filter != HWTSTAMP_FILTER_NONE);
+ config->rx_filter != HWTSTAMP_FILTER_NONE);
skb_queue_purge(&ptp_priv->tx_queue);
lan8841_ptp_flush_fifo(ptp_priv);
- return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ? -EFAULT : 0;
+ return 0;
}
static bool lan8841_rxtstamp(struct mii_timestamper *mii_ts,
diff --git a/drivers/net/phy/mscc/mscc_ptp.c b/drivers/net/phy/mscc/mscc_ptp.c
index cf728bf..eb0b032c 100644
--- a/drivers/net/phy/mscc/mscc_ptp.c
+++ b/drivers/net/phy/mscc/mscc_ptp.c
@@ -1045,19 +1045,17 @@ static void vsc85xx_ts_reset_fifo(struct phy_device *phydev)
val);
}
-static int vsc85xx_hwtstamp(struct mii_timestamper *mii_ts, struct ifreq *ifr)
+static int vsc85xx_hwtstamp(struct mii_timestamper *mii_ts,
+ struct kernel_hwtstamp_config *cfg,
+ struct netlink_ext_ack *extack)
{
struct vsc8531_private *vsc8531 =
container_of(mii_ts, struct vsc8531_private, mii_ts);
struct phy_device *phydev = vsc8531->ptp->phydev;
- struct hwtstamp_config cfg;
bool one_step = false;
u32 val;
- if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg)))
- return -EFAULT;
-
- switch (cfg.tx_type) {
+ switch (cfg->tx_type) {
case HWTSTAMP_TX_ONESTEP_SYNC:
one_step = true;
break;
@@ -1069,9 +1067,9 @@ static int vsc85xx_hwtstamp(struct mii_timestamper *mii_ts, struct ifreq *ifr)
return -ERANGE;
}
- vsc8531->ptp->tx_type = cfg.tx_type;
+ vsc8531->ptp->tx_type = cfg->tx_type;
- switch (cfg.rx_filter) {
+ switch (cfg->rx_filter) {
case HWTSTAMP_FILTER_NONE:
break;
case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
@@ -1084,7 +1082,7 @@ static int vsc85xx_hwtstamp(struct mii_timestamper *mii_ts, struct ifreq *ifr)
return -ERANGE;
}
- vsc8531->ptp->rx_filter = cfg.rx_filter;
+ vsc8531->ptp->rx_filter = cfg->rx_filter;
mutex_lock(&vsc8531->ts_lock);
@@ -1132,7 +1130,7 @@ static int vsc85xx_hwtstamp(struct mii_timestamper *mii_ts, struct ifreq *ifr)
vsc8531->ptp->configured = 1;
mutex_unlock(&vsc8531->ts_lock);
- return copy_to_user(ifr->ifr_data, &cfg, sizeof(cfg)) ? -EFAULT : 0;
+ return 0;
}
static int vsc85xx_ts_info(struct mii_timestamper *mii_ts,
diff --git a/drivers/net/phy/nxp-c45-tja11xx.c b/drivers/net/phy/nxp-c45-tja11xx.c
index 7ab080f..780ad35 100644
--- a/drivers/net/phy/nxp-c45-tja11xx.c
+++ b/drivers/net/phy/nxp-c45-tja11xx.c
@@ -1022,24 +1022,21 @@ static bool nxp_c45_rxtstamp(struct mii_timestamper *mii_ts,
}
static int nxp_c45_hwtstamp(struct mii_timestamper *mii_ts,
- struct ifreq *ifreq)
+ struct kernel_hwtstamp_config *cfg,
+ struct netlink_ext_ack *extack)
{
struct nxp_c45_phy *priv = container_of(mii_ts, struct nxp_c45_phy,
mii_ts);
struct phy_device *phydev = priv->phydev;
const struct nxp_c45_phy_data *data;
- struct hwtstamp_config cfg;
- if (copy_from_user(&cfg, ifreq->ifr_data, sizeof(cfg)))
- return -EFAULT;
-
- if (cfg.tx_type < 0 || cfg.tx_type > HWTSTAMP_TX_ON)
+ if (cfg->tx_type < 0 || cfg->tx_type > HWTSTAMP_TX_ON)
return -ERANGE;
data = nxp_c45_get_data(phydev);
- priv->hwts_tx = cfg.tx_type;
+ priv->hwts_tx = cfg->tx_type;
- switch (cfg.rx_filter) {
+ switch (cfg->rx_filter) {
case HWTSTAMP_FILTER_NONE:
priv->hwts_rx = 0;
break;
@@ -1047,7 +1044,7 @@ static int nxp_c45_hwtstamp(struct mii_timestamper *mii_ts,
case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
priv->hwts_rx = 1;
- cfg.rx_filter = HWTSTAMP_FILTER_PTP_V2_L2_EVENT;
+ cfg->rx_filter = HWTSTAMP_FILTER_PTP_V2_L2_EVENT;
break;
default:
return -ERANGE;
@@ -1074,7 +1071,7 @@ static int nxp_c45_hwtstamp(struct mii_timestamper *mii_ts,
nxp_c45_clear_reg_field(phydev, &data->regmap->irq_egr_ts_en);
nxp_c45_no_ptp_irq:
- return copy_to_user(ifreq->ifr_data, &cfg, sizeof(cfg)) ? -EFAULT : 0;
+ return 0;
}
static int nxp_c45_ts_info(struct mii_timestamper *mii_ts,
diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index a5fa077..3376e58 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -325,9 +325,13 @@ EXPORT_SYMBOL(phy_ethtool_ksettings_get);
int phy_mii_ioctl(struct phy_device *phydev, struct ifreq *ifr, int cmd)
{
struct mii_ioctl_data *mii_data = if_mii(ifr);
+ struct kernel_hwtstamp_config kernel_cfg;
+ struct netlink_ext_ack extack = {};
u16 val = mii_data->val_in;
bool change_autoneg = false;
+ struct hwtstamp_config cfg;
int prtad, devad;
+ int ret;
switch (cmd) {
case SIOCGMIIPHY:
@@ -411,8 +415,21 @@ int phy_mii_ioctl(struct phy_device *phydev, struct ifreq *ifr, int cmd)
return 0;
case SIOCSHWTSTAMP:
- if (phydev->mii_ts && phydev->mii_ts->hwtstamp)
- return phydev->mii_ts->hwtstamp(phydev->mii_ts, ifr);
+ if (phydev->mii_ts && phydev->mii_ts->hwtstamp) {
+ if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg)))
+ return -EFAULT;
+
+ hwtstamp_config_to_kernel(&kernel_cfg, &cfg);
+ ret = phydev->mii_ts->hwtstamp(phydev->mii_ts, &kernel_cfg, &extack);
+ if (ret)
+ return ret;
+
+ hwtstamp_config_from_kernel(&cfg, &kernel_cfg);
+ if (copy_to_user(ifr->ifr_data, &cfg, sizeof(cfg)))
+ return -EFAULT;
+
+ return 0;
+ }
fallthrough;
default:
@@ -469,7 +486,7 @@ int __phy_hwtstamp_get(struct phy_device *phydev,
if (!phydev)
return -ENODEV;
- return phy_mii_ioctl(phydev, config->ifr, SIOCGHWTSTAMP);
+ return -EOPNOTSUPP;
}
/**
@@ -486,7 +503,10 @@ int __phy_hwtstamp_set(struct phy_device *phydev,
if (!phydev)
return -ENODEV;
- return phy_mii_ioctl(phydev, config->ifr, SIOCSHWTSTAMP);
+ if (phydev->mii_ts && phydev->mii_ts->hwtstamp)
+ return phydev->mii_ts->hwtstamp(phydev->mii_ts, config, extack);
+
+ return -EOPNOTSUPP;
}
/**
diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c
index 25c1949..c276f94 100644
--- a/drivers/net/phy/phylink.c
+++ b/drivers/net/phy/phylink.c
@@ -697,18 +697,16 @@ static int phylink_validate_mask(struct phylink *pl, unsigned long *supported,
__ETHTOOL_DECLARE_LINK_MODE_MASK(all_s) = { 0, };
__ETHTOOL_DECLARE_LINK_MODE_MASK(s);
struct phylink_link_state t;
- int intf;
+ int interface;
- for (intf = 0; intf < PHY_INTERFACE_MODE_MAX; intf++) {
- if (test_bit(intf, interfaces)) {
- linkmode_copy(s, supported);
+ for_each_set_bit(interface, interfaces, PHY_INTERFACE_MODE_MAX) {
+ linkmode_copy(s, supported);
- t = *state;
- t.interface = intf;
- if (!phylink_validate_mac_and_pcs(pl, s, &t)) {
- linkmode_or(all_s, all_s, s);
- linkmode_or(all_adv, all_adv, t.advertising);
- }
+ t = *state;
+ t.interface = interface;
+ if (!phylink_validate_mac_and_pcs(pl, s, &t)) {
+ linkmode_or(all_s, all_s, s);
+ linkmode_or(all_adv, all_adv, t.advertising);
}
}
@@ -805,7 +803,7 @@ static int phylink_parse_fixedlink(struct phylink *pl,
phylink_warn(pl, "fixed link specifies half duplex for %dMbps link?\n",
pl->link_config.speed);
- bitmap_fill(pl->supported, __ETHTOOL_LINK_MODE_MASK_NBITS);
+ linkmode_fill(pl->supported);
linkmode_copy(pl->link_config.advertising, pl->supported);
phylink_validate(pl, pl->supported, &pl->link_config);
@@ -1640,7 +1638,7 @@ struct phylink *phylink_create(struct phylink_config *config,
__set_bit(PHYLINK_DISABLE_STOPPED, &pl->phylink_disable_state);
timer_setup(&pl->link_poll, phylink_fixed_poll, 0);
- bitmap_fill(pl->supported, __ETHTOOL_LINK_MODE_MASK_NBITS);
+ linkmode_fill(pl->supported);
linkmode_copy(pl->link_config.advertising, pl->supported);
phylink_validate(pl, pl->supported, &pl->link_config);
diff --git a/drivers/net/phy/sfp-bus.c b/drivers/net/phy/sfp-bus.c
index 208a939..6fa679b 100644
--- a/drivers/net/phy/sfp-bus.c
+++ b/drivers/net/phy/sfp-bus.c
@@ -328,7 +328,7 @@ void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
* modules use 2500Mbaud rather than 3100 or 3200Mbaud for
* 2500BASE-X, so we allow some slack here.
*/
- if (bitmap_empty(modes, __ETHTOOL_LINK_MODE_MASK_NBITS) && br_nom) {
+ if (linkmode_empty(modes) && br_nom) {
if (br_min <= 1300 && br_max >= 1200) {
phylink_set(modes, 1000baseX_Full);
__set_bit(PHY_INTERFACE_MODE_1000BASEX, interfaces);
diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c
index 4ea0e15..8d835fb 100644
--- a/drivers/net/usb/ax88179_178a.c
+++ b/drivers/net/usb/ax88179_178a.c
@@ -1298,8 +1298,6 @@ static int ax88179_bind(struct usbnet *dev, struct usb_interface *intf)
netif_set_tso_max_size(dev->net, 16384);
- ax88179_reset(dev);
-
return 0;
}
diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c
index 412c3c0..764ea02 100644
--- a/drivers/net/vxlan/vxlan_core.c
+++ b/drivers/net/vxlan/vxlan_core.c
@@ -2379,7 +2379,17 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
else
udp_sum = !(flags & VXLAN_F_UDP_ZERO_CSUM6_TX);
#if IS_ENABLED(CONFIG_IPV6)
- key.label = vxlan->cfg.label;
+ switch (vxlan->cfg.label_policy) {
+ case VXLAN_LABEL_FIXED:
+ key.label = vxlan->cfg.label;
+ break;
+ case VXLAN_LABEL_INHERIT:
+ key.label = ip_tunnel_get_flowlabel(old_iph, skb);
+ break;
+ default:
+ DEBUG_NET_WARN_ON_ONCE(1);
+ goto drop;
+ }
#endif
} else {
if (!info) {
@@ -3366,6 +3376,7 @@ static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = {
[IFLA_VXLAN_DF] = { .type = NLA_U8 },
[IFLA_VXLAN_VNIFILTER] = { .type = NLA_U8 },
[IFLA_VXLAN_LOCALBYPASS] = NLA_POLICY_MAX(NLA_U8, 1),
+ [IFLA_VXLAN_LABEL_POLICY] = NLA_POLICY_MAX(NLA_U32, VXLAN_LABEL_MAX),
};
static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[],
@@ -3740,6 +3751,12 @@ static int vxlan_config_validate(struct net *src_net, struct vxlan_config *conf,
return -EINVAL;
}
+ if (conf->label_policy && !use_ipv6) {
+ NL_SET_ERR_MSG(extack,
+ "Label policy only applies to IPv6 VXLAN devices");
+ return -EINVAL;
+ }
+
if (conf->remote_ifindex) {
struct net_device *lowerdev;
@@ -4082,6 +4099,8 @@ static int vxlan_nl2conf(struct nlattr *tb[], struct nlattr *data[],
if (data[IFLA_VXLAN_LABEL])
conf->label = nla_get_be32(data[IFLA_VXLAN_LABEL]) &
IPV6_FLOWLABEL_MASK;
+ if (data[IFLA_VXLAN_LABEL_POLICY])
+ conf->label_policy = nla_get_u32(data[IFLA_VXLAN_LABEL_POLICY]);
if (data[IFLA_VXLAN_LEARNING]) {
err = vxlan_nl2flag(conf, data, IFLA_VXLAN_LEARNING,
@@ -4398,6 +4417,7 @@ static size_t vxlan_get_size(const struct net_device *dev)
nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_TOS */
nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_DF */
nla_total_size(sizeof(__be32)) + /* IFLA_VXLAN_LABEL */
+ nla_total_size(sizeof(__u32)) + /* IFLA_VXLAN_LABEL_POLICY */
nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_LEARNING */
nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_PROXY */
nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_RSC */
@@ -4471,6 +4491,7 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
nla_put_u8(skb, IFLA_VXLAN_TOS, vxlan->cfg.tos) ||
nla_put_u8(skb, IFLA_VXLAN_DF, vxlan->cfg.df) ||
nla_put_be32(skb, IFLA_VXLAN_LABEL, vxlan->cfg.label) ||
+ nla_put_u32(skb, IFLA_VXLAN_LABEL_POLICY, vxlan->cfg.label_policy) ||
nla_put_u8(skb, IFLA_VXLAN_LEARNING,
!!(vxlan->cfg.flags & VXLAN_F_LEARN)) ||
nla_put_u8(skb, IFLA_VXLAN_PROXY,
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 55bc357..69d20d5 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -1219,6 +1219,9 @@ static int pci_dev_wait(struct pci_dev *dev, char *reset_type, int timeout)
if (delay > PCI_RESET_WAIT)
pci_info(dev, "ready %dms after %s\n", delay - 1,
reset_type);
+ else
+ pci_dbg(dev, "ready %dms after %s\n", delay - 1,
+ reset_type);
return 0;
}
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index ea47625..d208047 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -3787,6 +3787,19 @@ DECLARE_PCI_FIXUP_CLASS_HEADER(PCI_VENDOR_ID_ATI, PCI_ANY_ID,
PCI_CLASS_DISPLAY_VGA, 8, quirk_no_pm_reset);
/*
+ * Spectrum-{1,2,3,4} devices report that a D3hot->D0 transition causes a reset
+ * (i.e., they advertise NoSoftRst-). However, this transition does not have
+ * any effect on the device: It continues to be operational and network ports
+ * remain up. Advertising this support makes it seem as if a PM reset is viable
+ * for these devices. Mark it as unavailable to skip it when testing reset
+ * methods.
+ */
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX, 0xcb84, quirk_no_pm_reset);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX, 0xcf6c, quirk_no_pm_reset);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX, 0xcf70, quirk_no_pm_reset);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX, 0xcf80, quirk_no_pm_reset);
+
+/*
* Thunderbolt controllers with broken MSI hotplug signaling:
* Entire 1st generation (Light Ridge, Eagle Ridge, Light Peak) and part
* of the 2nd generation (Cactus Ridge 4C up to revision 1, Port Ridge).
diff --git a/drivers/ptp/ptp_ines.c b/drivers/ptp/ptp_ines.c
index ed215b4..1d2940a 100644
--- a/drivers/ptp/ptp_ines.c
+++ b/drivers/ptp/ptp_ines.c
@@ -328,17 +328,15 @@ static u64 ines_find_txts(struct ines_port *port, struct sk_buff *skb)
return ns;
}
-static int ines_hwtstamp(struct mii_timestamper *mii_ts, struct ifreq *ifr)
+static int ines_hwtstamp(struct mii_timestamper *mii_ts,
+ struct kernel_hwtstamp_config *cfg,
+ struct netlink_ext_ack *extack)
{
struct ines_port *port = container_of(mii_ts, struct ines_port, mii_ts);
u32 cm_one_step = 0, port_conf, ts_stat_rx, ts_stat_tx;
- struct hwtstamp_config cfg;
unsigned long flags;
- if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg)))
- return -EFAULT;
-
- switch (cfg.tx_type) {
+ switch (cfg->tx_type) {
case HWTSTAMP_TX_OFF:
ts_stat_tx = 0;
break;
@@ -353,7 +351,7 @@ static int ines_hwtstamp(struct mii_timestamper *mii_ts, struct ifreq *ifr)
return -ERANGE;
}
- switch (cfg.rx_filter) {
+ switch (cfg->rx_filter) {
case HWTSTAMP_FILTER_NONE:
ts_stat_rx = 0;
break;
@@ -372,7 +370,7 @@ static int ines_hwtstamp(struct mii_timestamper *mii_ts, struct ifreq *ifr)
case HWTSTAMP_FILTER_PTP_V2_SYNC:
case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
ts_stat_rx = TS_ENABLE;
- cfg.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
+ cfg->rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
break;
default:
return -ERANGE;
@@ -393,7 +391,7 @@ static int ines_hwtstamp(struct mii_timestamper *mii_ts, struct ifreq *ifr)
spin_unlock_irqrestore(&port->lock, flags);
- return copy_to_user(ifr->ifr_data, &cfg, sizeof(cfg)) ? -EFAULT : 0;
+ return 0;
}
static void ines_link_state(struct mii_timestamper *mii_ts,
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 6762dac..258ba23 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -186,8 +186,8 @@ enum btf_field_type {
BPF_LIST_NODE = (1 << 6),
BPF_RB_ROOT = (1 << 7),
BPF_RB_NODE = (1 << 8),
- BPF_GRAPH_NODE_OR_ROOT = BPF_LIST_NODE | BPF_LIST_HEAD |
- BPF_RB_NODE | BPF_RB_ROOT,
+ BPF_GRAPH_NODE = BPF_RB_NODE | BPF_LIST_NODE,
+ BPF_GRAPH_ROOT = BPF_RB_ROOT | BPF_LIST_HEAD,
BPF_REFCOUNT = (1 << 9),
};
@@ -1226,6 +1226,8 @@ enum bpf_dynptr_type {
int bpf_dynptr_check_size(u32 size);
u32 __bpf_dynptr_size(const struct bpf_dynptr_kern *ptr);
+const void *__bpf_dynptr_data(const struct bpf_dynptr_kern *ptr, u32 len);
+void *__bpf_dynptr_data_rw(const struct bpf_dynptr_kern *ptr, u32 len);
#ifdef CONFIG_BPF_JIT
int bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr);
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index aa4d19d..d99a636 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -618,6 +618,7 @@ struct bpf_verifier_env {
int stack_size; /* number of states to be processed */
bool strict_alignment; /* perform strict pointer alignment checks */
bool test_state_freq; /* test verifier with different pruning frequency */
+ bool test_reg_invariants; /* fail verification on register invariants violations */
struct bpf_verifier_state *cur_state; /* current verifier state */
struct bpf_verifier_state_list **explored_states; /* search pruning optimization */
struct bpf_verifier_state_list *free_list;
@@ -695,6 +696,10 @@ int bpf_vlog_init(struct bpf_verifier_log *log, u32 log_level,
void bpf_vlog_reset(struct bpf_verifier_log *log, u64 new_pos);
int bpf_vlog_finalize(struct bpf_verifier_log *log, u32 *log_size_actual);
+__printf(3, 4) void verbose_linfo(struct bpf_verifier_env *env,
+ u32 insn_off,
+ const char *prefix_fmt, ...);
+
static inline struct bpf_func_state *cur_func(struct bpf_verifier_env *env)
{
struct bpf_verifier_state *cur = env->cur_state;
@@ -794,4 +799,76 @@ static inline bool bpf_type_has_unsafe_modifiers(u32 type)
return type_flag(type) & ~BPF_REG_TRUSTED_MODIFIERS;
}
+static inline bool type_is_ptr_alloc_obj(u32 type)
+{
+ return base_type(type) == PTR_TO_BTF_ID && type_flag(type) & MEM_ALLOC;
+}
+
+static inline bool type_is_non_owning_ref(u32 type)
+{
+ return type_is_ptr_alloc_obj(type) && type_flag(type) & NON_OWN_REF;
+}
+
+static inline bool type_is_pkt_pointer(enum bpf_reg_type type)
+{
+ type = base_type(type);
+ return type == PTR_TO_PACKET ||
+ type == PTR_TO_PACKET_META;
+}
+
+static inline bool type_is_sk_pointer(enum bpf_reg_type type)
+{
+ return type == PTR_TO_SOCKET ||
+ type == PTR_TO_SOCK_COMMON ||
+ type == PTR_TO_TCP_SOCK ||
+ type == PTR_TO_XDP_SOCK;
+}
+
+static inline void mark_reg_scratched(struct bpf_verifier_env *env, u32 regno)
+{
+ env->scratched_regs |= 1U << regno;
+}
+
+static inline void mark_stack_slot_scratched(struct bpf_verifier_env *env, u32 spi)
+{
+ env->scratched_stack_slots |= 1ULL << spi;
+}
+
+static inline bool reg_scratched(const struct bpf_verifier_env *env, u32 regno)
+{
+ return (env->scratched_regs >> regno) & 1;
+}
+
+static inline bool stack_slot_scratched(const struct bpf_verifier_env *env, u64 regno)
+{
+ return (env->scratched_stack_slots >> regno) & 1;
+}
+
+static inline bool verifier_state_scratched(const struct bpf_verifier_env *env)
+{
+ return env->scratched_regs || env->scratched_stack_slots;
+}
+
+static inline void mark_verifier_state_clean(struct bpf_verifier_env *env)
+{
+ env->scratched_regs = 0U;
+ env->scratched_stack_slots = 0ULL;
+}
+
+/* Used for printing the entire verifier state. */
+static inline void mark_verifier_state_scratched(struct bpf_verifier_env *env)
+{
+ env->scratched_regs = ~0U;
+ env->scratched_stack_slots = ~0ULL;
+}
+
+const char *reg_type_str(struct bpf_verifier_env *env, enum bpf_reg_type type);
+const char *dynptr_type_str(enum bpf_dynptr_type type);
+const char *iter_type_str(const struct btf *btf, u32 btf_id);
+const char *iter_state_str(enum bpf_iter_state state);
+
+void print_verifier_state(struct bpf_verifier_env *env,
+ const struct bpf_func_state *state, bool print_all);
+void print_insn_state(struct bpf_verifier_env *env, const struct bpf_func_state *state);
+
#endif /* _LINUX_BPF_VERIFIER_H */
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 4a6b6b7..4caab0c 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -563,6 +563,7 @@ struct cgroup_root {
/* A list running through the active hierarchies */
struct list_head root_list;
+ struct rcu_head rcu;
/* Hierarchy-specific flags */
unsigned int flags;
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 0ef0af6..34aaf0e 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -69,6 +69,7 @@ struct css_task_iter {
extern struct file_system_type cgroup_fs_type;
extern struct cgroup_root cgrp_dfl_root;
extern struct css_set init_css_set;
+extern spinlock_t css_set_lock;
#define SUBSYS(_x) extern struct cgroup_subsys _x ## _cgrp_subsys;
#include <linux/cgroup_subsys.h>
@@ -386,7 +387,6 @@ static inline void cgroup_unlock(void)
* as locks used during the cgroup_subsys::attach() methods.
*/
#ifdef CONFIG_PROVE_RCU
-extern spinlock_t css_set_lock;
#define task_css_set_check(task, __c) \
rcu_dereference_check((task)->cgroups, \
rcu_read_lock_sched_held() || \
@@ -853,4 +853,6 @@ static inline void cgroup_bpf_put(struct cgroup *cgrp) {}
#endif /* CONFIG_CGROUP_BPF */
+struct cgroup *task_get_cgroup1(struct task_struct *tsk, int hierarchy_id);
+
#endif /* _LINUX_CGROUP_H */
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 2ceba3f..aebb65b 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -136,7 +136,7 @@
#endif
#define __diag_ignore_all(option, comment) \
- __diag_GCC(8, ignore, option)
+ __diag(__diag_GCC_ignore option)
/*
* Prior to 9.1, -Wno-alloc-size-larger-than (and therefore the "alloc_size"
diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 6890282..c2bb741 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -1044,6 +1044,14 @@ static inline int ethtool_mm_frag_size_min_to_add(u32 val_min, u32 *val_add,
}
/**
+ * ethtool_get_ts_info_by_layer - Obtains time stamping capabilities from the MAC or PHY layer.
+ * @dev: pointer to net_device structure
+ * @info: buffer to hold the result
+ * Returns zero on success, non-zero otherwise.
+ */
+int ethtool_get_ts_info_by_layer(struct net_device *dev, struct ethtool_ts_info *info);
+
+/**
* ethtool_sprintf - Write formatted string to ethtool string data
* @data: Pointer to a pointer to the start of string to update
* @fmt: Format of string to write
diff --git a/include/linux/indirect_call_wrapper.h b/include/linux/indirect_call_wrapper.h
index c1c76a7..adb83a4 100644
--- a/include/linux/indirect_call_wrapper.h
+++ b/include/linux/indirect_call_wrapper.h
@@ -11,7 +11,7 @@
* @__VA_ARGS__: arguments for @f
*
* Avoid retpoline overhead for known builtin, checking @f vs each of them and
- * eventually invoking directly the builtin function. The functions are check
+ * eventually invoking directly the builtin function. The functions are checked
* in the given order. Fallback to the indirect call.
*/
#define INDIRECT_CALL_1(f, f1, ...) \
diff --git a/include/linux/linkmode.h b/include/linux/linkmode.h
index 7303b4b..287f590 100644
--- a/include/linux/linkmode.h
+++ b/include/linux/linkmode.h
@@ -10,6 +10,11 @@ static inline void linkmode_zero(unsigned long *dst)
bitmap_zero(dst, __ETHTOOL_LINK_MODE_MASK_NBITS);
}
+static inline void linkmode_fill(unsigned long *dst)
+{
+ bitmap_fill(dst, __ETHTOOL_LINK_MODE_MASK_NBITS);
+}
+
static inline void linkmode_copy(unsigned long *dst, const unsigned long *src)
{
bitmap_copy(dst, src, __ETHTOOL_LINK_MODE_MASK_NBITS);
diff --git a/include/linux/mii_timestamper.h b/include/linux/mii_timestamper.h
index fa940bb..26b04f7 100644
--- a/include/linux/mii_timestamper.h
+++ b/include/linux/mii_timestamper.h
@@ -9,6 +9,7 @@
#include <linux/device.h>
#include <linux/ethtool.h>
#include <linux/skbuff.h>
+#include <linux/net_tstamp.h>
struct phy_device;
@@ -51,7 +52,8 @@ struct mii_timestamper {
struct sk_buff *skb, int type);
int (*hwtstamp)(struct mii_timestamper *mii_ts,
- struct ifreq *ifreq);
+ struct kernel_hwtstamp_config *kernel_config,
+ struct netlink_ext_ack *extack);
void (*link_state)(struct mii_timestamper *mii_ts,
struct phy_device *phydev);
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 6f36314..ce2e71c 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -10103,7 +10103,10 @@ enum {
struct mlx5_ifc_mtutc_reg_bits {
u8 reserved_at_0[0x5];
u8 freq_adj_units[0x3];
- u8 reserved_at_8[0x14];
+ u8 reserved_at_8[0x3];
+ u8 log_max_freq_adjustment[0x5];
+
+ u8 reserved_at_10[0xc];
u8 operation[0x4];
u8 freq_adjustment[0x20];
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 2564e20..e87caa8 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3964,6 +3964,9 @@ int generic_hwtstamp_get_lower(struct net_device *dev,
int generic_hwtstamp_set_lower(struct net_device *dev,
struct kernel_hwtstamp_config *kernel_cfg,
struct netlink_ext_ack *extack);
+int dev_set_hwtstamp_phylib(struct net_device *dev,
+ struct kernel_hwtstamp_config *cfg,
+ struct netlink_ext_ack *extack);
int dev_ethtool(struct net *net, struct ifreq *ifr, void __user *userdata);
unsigned int dev_get_flags(const struct net_device *);
int __dev_change_flags(struct net_device *dev, unsigned int flags,
diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index 75d7de3..abe91ed 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -351,5 +351,6 @@ bool netlink_ns_capable(const struct sk_buff *skb,
struct user_namespace *ns, int cap);
bool netlink_capable(const struct sk_buff *skb, int cap);
bool netlink_net_capable(const struct sk_buff *skb, int cap);
+struct sk_buff *netlink_alloc_large_skb(unsigned int size, int broadcast);
#endif /* __LINUX_NETLINK_H */
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 3cc5282..e5f1f41 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -1560,9 +1560,11 @@ static inline bool phy_has_txtstamp(struct phy_device *phydev)
return phydev && phydev->mii_ts && phydev->mii_ts->txtstamp;
}
-static inline int phy_hwtstamp(struct phy_device *phydev, struct ifreq *ifr)
+static inline int phy_hwtstamp(struct phy_device *phydev,
+ struct kernel_hwtstamp_config *cfg,
+ struct netlink_ext_ack *extack)
{
- return phydev->mii_ts->hwtstamp(phydev->mii_ts, ifr);
+ return phydev->mii_ts->hwtstamp(phydev->mii_ts, cfg, extack);
}
static inline bool phy_rxtstamp(struct phy_device *phydev, struct sk_buff *skb,
diff --git a/include/linux/tnum.h b/include/linux/tnum.h
index 1c3948a..3c132400 100644
--- a/include/linux/tnum.h
+++ b/include/linux/tnum.h
@@ -106,6 +106,10 @@ int tnum_sbin(char *str, size_t size, struct tnum a);
struct tnum tnum_subreg(struct tnum a);
/* Returns the tnum with the lower 32-bit subreg cleared */
struct tnum tnum_clear_subreg(struct tnum a);
+/* Returns the tnum with the lower 32-bit subreg in *reg* set to the lower
+ * 32-bit subreg in *subreg*
+ */
+struct tnum tnum_with_subreg(struct tnum reg, struct tnum subreg);
/* Returns the tnum with the lower 32-bit subreg set to value */
struct tnum tnum_const_subreg(struct tnum a, u32 value);
/* Returns true if 32-bit subreg @a is a known constant*/
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index f346b4e..2d746f4 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -416,6 +416,17 @@ static inline u8 ip_tunnel_get_dsfield(const struct iphdr *iph,
return 0;
}
+static inline __be32 ip_tunnel_get_flowlabel(const struct iphdr *iph,
+ const struct sk_buff *skb)
+{
+ __be16 payload_protocol = skb_protocol(skb, true);
+
+ if (payload_protocol == htons(ETH_P_IPV6))
+ return ip6_flowlabel((const struct ipv6hdr *)iph);
+ else
+ return 0;
+}
+
static inline u8 ip_tunnel_get_ttl(const struct iphdr *iph,
const struct sk_buff *skb)
{
diff --git a/include/net/netlink.h b/include/net/netlink.h
index 83bdf78..167b913 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -1011,6 +1011,20 @@ static inline struct sk_buff *nlmsg_new(size_t payload, gfp_t flags)
}
/**
+ * nlmsg_new_large - Allocate a new netlink message with non-contiguous
+ * physical memory
+ * @payload: size of the message payload
+ *
+ * The allocated skb is unable to have frag page for shinfo->frags*,
+ * as the NULL setting for skb->head in netlink_skb_destructor() will
+ * bypass most of the handling in skb_release_data()
+ */
+static inline struct sk_buff *nlmsg_new_large(size_t payload)
+{
+ return netlink_alloc_large_skb(nlmsg_total_size(payload), 0);
+}
+
+/**
* nlmsg_end - Finalize a netlink message
* @skb: socket buffer the message is stored in
* @nlh: netlink message header
diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h
index 6fc5134..e1bb92c 100644
--- a/include/net/page_pool/types.h
+++ b/include/net/page_pool/types.h
@@ -54,18 +54,22 @@ struct pp_alloc_cache {
* @offset: DMA sync address offset for PP_FLAG_DMA_SYNC_DEV
*/
struct page_pool_params {
- unsigned int flags;
- unsigned int order;
- unsigned int pool_size;
- int nid;
- struct device *dev;
- struct napi_struct *napi;
- enum dma_data_direction dma_dir;
- unsigned int max_len;
- unsigned int offset;
+ struct_group_tagged(page_pool_params_fast, fast,
+ unsigned int flags;
+ unsigned int order;
+ unsigned int pool_size;
+ int nid;
+ struct device *dev;
+ struct napi_struct *napi;
+ enum dma_data_direction dma_dir;
+ unsigned int max_len;
+ unsigned int offset;
+ );
+ struct_group_tagged(page_pool_params_slow, slow,
/* private: used by test code only */
- void (*init_callback)(struct page *page, void *arg);
- void *init_arg;
+ void (*init_callback)(struct page *page, void *arg);
+ void *init_arg;
+ );
};
#ifdef CONFIG_PAGE_POOL_STATS
@@ -119,7 +123,9 @@ struct page_pool_stats {
#endif
struct page_pool {
- struct page_pool_params p;
+ struct page_pool_params_fast p;
+
+ bool has_init_callback;
long frag_users;
struct page *frag_page;
@@ -178,6 +184,9 @@ struct page_pool {
refcount_t user_cnt;
u64 destroy_cnt;
+
+ /* Slow/Control-path information follows */
+ struct page_pool_params_slow slow;
};
struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp);
diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index 6a9f8a5..33ba6fc 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -210,22 +210,23 @@ struct vxlan_rdst {
};
struct vxlan_config {
- union vxlan_addr remote_ip;
- union vxlan_addr saddr;
- __be32 vni;
- int remote_ifindex;
- int mtu;
- __be16 dst_port;
- u16 port_min;
- u16 port_max;
- u8 tos;
- u8 ttl;
- __be32 label;
- u32 flags;
- unsigned long age_interval;
- unsigned int addrmax;
- bool no_share;
- enum ifla_vxlan_df df;
+ union vxlan_addr remote_ip;
+ union vxlan_addr saddr;
+ __be32 vni;
+ int remote_ifindex;
+ int mtu;
+ __be16 dst_port;
+ u16 port_min;
+ u16 port_max;
+ u8 tos;
+ u8 ttl;
+ __be32 label;
+ enum ifla_vxlan_label_policy label_policy;
+ u32 flags;
+ unsigned long age_interval;
+ unsigned int addrmax;
+ bool no_share;
+ enum ifla_vxlan_df df;
};
enum {
diff --git a/include/uapi/linux/batadv_packet.h b/include/uapi/linux/batadv_packet.h
index 9204e44..6e25753 100644
--- a/include/uapi/linux/batadv_packet.h
+++ b/include/uapi/linux/batadv_packet.h
@@ -116,6 +116,9 @@ enum batadv_icmp_packettype {
* only need routable IPv4 multicast packets we signed up for explicitly
* @BATADV_MCAST_WANT_NO_RTR6: we have no IPv6 multicast router and therefore
* only need routable IPv6 multicast packets we signed up for explicitly
+ * @BATADV_MCAST_HAVE_MC_PTYPE_CAPA: we can parse, receive and forward
+ * batman-adv multicast packets with a multicast tracker TVLV. And all our
+ * hard interfaces have an MTU of at least 1280 bytes.
*/
enum batadv_mcast_flags {
BATADV_MCAST_WANT_ALL_UNSNOOPABLES = 1UL << 0,
@@ -123,6 +126,7 @@ enum batadv_mcast_flags {
BATADV_MCAST_WANT_ALL_IPV6 = 1UL << 2,
BATADV_MCAST_WANT_NO_RTR4 = 1UL << 3,
BATADV_MCAST_WANT_NO_RTR6 = 1UL << 4,
+ BATADV_MCAST_HAVE_MC_PTYPE_CAPA = 1UL << 5,
};
/* tt data subtypes */
@@ -174,14 +178,16 @@ enum batadv_bla_claimframe {
* @BATADV_TVLV_TT: translation table tvlv
* @BATADV_TVLV_ROAM: roaming advertisement tvlv
* @BATADV_TVLV_MCAST: multicast capability tvlv
+ * @BATADV_TVLV_MCAST_TRACKER: multicast tracker tvlv
*/
enum batadv_tvlv_type {
- BATADV_TVLV_GW = 0x01,
- BATADV_TVLV_DAT = 0x02,
- BATADV_TVLV_NC = 0x03,
- BATADV_TVLV_TT = 0x04,
- BATADV_TVLV_ROAM = 0x05,
- BATADV_TVLV_MCAST = 0x06,
+ BATADV_TVLV_GW = 0x01,
+ BATADV_TVLV_DAT = 0x02,
+ BATADV_TVLV_NC = 0x03,
+ BATADV_TVLV_TT = 0x04,
+ BATADV_TVLV_ROAM = 0x05,
+ BATADV_TVLV_MCAST = 0x06,
+ BATADV_TVLV_MCAST_TRACKER = 0x07,
};
#pragma pack(2)
@@ -488,6 +494,25 @@ struct batadv_bcast_packet {
};
/**
+ * struct batadv_mcast_packet - multicast packet for network payload
+ * @packet_type: batman-adv packet type, part of the general header
+ * @version: batman-adv protocol version, part of the general header
+ * @ttl: time to live for this packet, part of the general header
+ * @reserved: reserved byte for alignment
+ * @tvlv_len: length of the appended tvlv buffer (in bytes)
+ */
+struct batadv_mcast_packet {
+ __u8 packet_type;
+ __u8 version;
+ __u8 ttl;
+ __u8 reserved;
+ __be16 tvlv_len;
+ /* "4 bytes boundary + 2 bytes" long to make the payload after the
+ * following ethernet header again 4 bytes boundary aligned
+ */
+};
+
+/**
* struct batadv_coded_packet - network coded packet
* @packet_type: batman-adv packet type, part of the general header
* @version: batman-adv protocol version, part of the general header
@@ -628,6 +653,14 @@ struct batadv_tvlv_mcast_data {
__u8 reserved[3];
};
+/**
+ * struct batadv_tvlv_mcast_tracker - payload of a multicast tracker tvlv
+ * @num_dests: number of subsequent destination originator MAC addresses
+ */
+struct batadv_tvlv_mcast_tracker {
+ __be16 num_dests;
+};
+
#pragma pack()
#endif /* _UAPI_LINUX_BATADV_PACKET_H_ */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 0f6cdf5..7a54982 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1200,6 +1200,9 @@ enum bpf_perf_event_type {
*/
#define BPF_F_XDP_DEV_BOUND_ONLY (1U << 6)
+/* The verifier internal test flag. Behavior is undefined */
+#define BPF_F_TEST_REG_INVARIANTS (1U << 7)
+
/* link_create.kprobe_multi.flags used in LINK_CREATE command for
* BPF_TRACE_KPROBE_MULTI attach type to create return probe.
*/
@@ -4517,6 +4520,8 @@ union bpf_attr {
* long bpf_get_task_stack(struct task_struct *task, void *buf, u32 size, u64 flags)
* Description
* Return a user or a kernel stack in bpf program provided buffer.
+ * Note: the user stack will only be populated if the *task* is
+ * the current task; all other tasks will return -EOPNOTSUPP.
* To achieve this, the helper needs *task*, which is a valid
* pointer to **struct task_struct**. To store the stacktrace, the
* bpf program provides *buf* with a nonnegative *size*.
@@ -4528,6 +4533,7 @@ union bpf_attr {
*
* **BPF_F_USER_STACK**
* Collect a user space stack instead of a kernel stack.
+ * The *task* must be the current task.
* **BPF_F_USER_BUILD_ID**
* Collect buildid+offset instead of ips for user stack,
* only valid if **BPF_F_USER_STACK** is also specified.
@@ -7151,40 +7157,31 @@ struct bpf_spin_lock {
};
struct bpf_timer {
- __u64 :64;
- __u64 :64;
+ __u64 __opaque[2];
} __attribute__((aligned(8)));
struct bpf_dynptr {
- __u64 :64;
- __u64 :64;
+ __u64 __opaque[2];
} __attribute__((aligned(8)));
struct bpf_list_head {
- __u64 :64;
- __u64 :64;
+ __u64 __opaque[2];
} __attribute__((aligned(8)));
struct bpf_list_node {
- __u64 :64;
- __u64 :64;
- __u64 :64;
+ __u64 __opaque[3];
} __attribute__((aligned(8)));
struct bpf_rb_root {
- __u64 :64;
- __u64 :64;
+ __u64 __opaque[2];
} __attribute__((aligned(8)));
struct bpf_rb_node {
- __u64 :64;
- __u64 :64;
- __u64 :64;
- __u64 :64;
+ __u64 __opaque[4];
} __attribute__((aligned(8)));
struct bpf_refcount {
- __u32 :32;
+ __u32 __opaque[1];
} __attribute__((aligned(4)));
struct bpf_sysctl {
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 29ff80d..8181ef2 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -856,6 +856,7 @@ enum {
IFLA_VXLAN_DF,
IFLA_VXLAN_VNIFILTER, /* only applicable with COLLECT_METADATA mode */
IFLA_VXLAN_LOCALBYPASS,
+ IFLA_VXLAN_LABEL_POLICY, /* IPv6 flow label policy; ifla_vxlan_label_policy */
__IFLA_VXLAN_MAX
};
#define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1)
@@ -873,6 +874,13 @@ enum ifla_vxlan_df {
VXLAN_DF_MAX = __VXLAN_DF_END - 1,
};
+enum ifla_vxlan_label_policy {
+ VXLAN_LABEL_FIXED = 0,
+ VXLAN_LABEL_INHERIT = 1,
+ __VXLAN_LABEL_END,
+ VXLAN_LABEL_MAX = __VXLAN_LABEL_END - 1,
+};
+
/* GENEVE section */
enum {
IFLA_GENEVE_UNSPEC,
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 15d71d2..63cf412 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -3840,9 +3840,6 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type
return ERR_PTR(ret);
}
-#define GRAPH_ROOT_MASK (BPF_LIST_HEAD | BPF_RB_ROOT)
-#define GRAPH_NODE_MASK (BPF_LIST_NODE | BPF_RB_NODE)
-
int btf_check_and_fixup_fields(const struct btf *btf, struct btf_record *rec)
{
int i;
@@ -3855,13 +3852,13 @@ int btf_check_and_fixup_fields(const struct btf *btf, struct btf_record *rec)
* Hence we only need to ensure that bpf_{list_head,rb_root} ownership
* does not form cycles.
*/
- if (IS_ERR_OR_NULL(rec) || !(rec->field_mask & GRAPH_ROOT_MASK))
+ if (IS_ERR_OR_NULL(rec) || !(rec->field_mask & BPF_GRAPH_ROOT))
return 0;
for (i = 0; i < rec->cnt; i++) {
struct btf_struct_meta *meta;
u32 btf_id;
- if (!(rec->fields[i].type & GRAPH_ROOT_MASK))
+ if (!(rec->fields[i].type & BPF_GRAPH_ROOT))
continue;
btf_id = rec->fields[i].graph_root.value_btf_id;
meta = btf_find_struct_meta(btf, btf_id);
@@ -3873,7 +3870,7 @@ int btf_check_and_fixup_fields(const struct btf *btf, struct btf_record *rec)
* to check ownership cycle for a type unless it's also a
* node type.
*/
- if (!(rec->field_mask & GRAPH_NODE_MASK))
+ if (!(rec->field_mask & BPF_GRAPH_NODE))
continue;
/* We need to ensure ownership acyclicity among all types. The
@@ -3909,7 +3906,7 @@ int btf_check_and_fixup_fields(const struct btf *btf, struct btf_record *rec)
* - A is both an root and node.
* - B is only an node.
*/
- if (meta->record->field_mask & GRAPH_ROOT_MASK)
+ if (meta->record->field_mask & BPF_GRAPH_ROOT)
return -ELOOP;
}
return 0;
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 56b0c1f..b45a838 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -1937,10 +1937,7 @@ void __bpf_obj_drop_impl(void *p, const struct btf_record *rec, bool percpu)
ma = &bpf_global_percpu_ma;
else
ma = &bpf_global_ma;
- if (rec && rec->refcount_off >= 0)
- bpf_mem_free_rcu(ma, p);
- else
- bpf_mem_free(ma, p);
+ bpf_mem_free_rcu(ma, p);
}
__bpf_kfunc void bpf_obj_drop_impl(void *p__alloc, void *meta__ign)
@@ -2231,6 +2228,25 @@ __bpf_kfunc long bpf_task_under_cgroup(struct task_struct *task,
rcu_read_unlock();
return ret;
}
+
+/**
+ * bpf_task_get_cgroup1 - Acquires the associated cgroup of a task within a
+ * specific cgroup1 hierarchy. The cgroup1 hierarchy is identified by its
+ * hierarchy ID.
+ * @task: The target task
+ * @hierarchy_id: The ID of a cgroup1 hierarchy
+ *
+ * On success, the cgroup is returen. On failure, NULL is returned.
+ */
+__bpf_kfunc struct cgroup *
+bpf_task_get_cgroup1(struct task_struct *task, int hierarchy_id)
+{
+ struct cgroup *cgrp = task_get_cgroup1(task, hierarchy_id);
+
+ if (IS_ERR(cgrp))
+ return NULL;
+ return cgrp;
+}
#endif /* CONFIG_CGROUPS */
/**
@@ -2520,7 +2536,7 @@ BTF_ID_FLAGS(func, bpf_obj_new_impl, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_percpu_obj_new_impl, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_obj_drop_impl, KF_RELEASE)
BTF_ID_FLAGS(func, bpf_percpu_obj_drop_impl, KF_RELEASE)
-BTF_ID_FLAGS(func, bpf_refcount_acquire_impl, KF_ACQUIRE | KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_refcount_acquire_impl, KF_ACQUIRE | KF_RET_NULL | KF_RCU)
BTF_ID_FLAGS(func, bpf_list_push_front_impl)
BTF_ID_FLAGS(func, bpf_list_push_back_impl)
BTF_ID_FLAGS(func, bpf_list_pop_front, KF_ACQUIRE | KF_RET_NULL)
@@ -2537,6 +2553,7 @@ BTF_ID_FLAGS(func, bpf_cgroup_release, KF_RELEASE)
BTF_ID_FLAGS(func, bpf_cgroup_ancestor, KF_ACQUIRE | KF_RCU | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_cgroup_from_id, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_task_under_cgroup, KF_RCU)
+BTF_ID_FLAGS(func, bpf_task_get_cgroup1, KF_ACQUIRE | KF_RCU | KF_RET_NULL)
#endif
BTF_ID_FLAGS(func, bpf_task_from_pid, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_throw)
@@ -2618,3 +2635,22 @@ static int __init kfunc_init(void)
}
late_initcall(kfunc_init);
+
+/* Get a pointer to dynptr data up to len bytes for read only access. If
+ * the dynptr doesn't have continuous data up to len bytes, return NULL.
+ */
+const void *__bpf_dynptr_data(const struct bpf_dynptr_kern *ptr, u32 len)
+{
+ return bpf_dynptr_slice(ptr, 0, NULL, len);
+}
+
+/* Get a pointer to dynptr data up to len bytes for read write access. If
+ * the dynptr doesn't have continuous data up to len bytes, or the dynptr
+ * is read only, return NULL.
+ */
+void *__bpf_dynptr_data_rw(const struct bpf_dynptr_kern *ptr, u32 len)
+{
+ if (__bpf_dynptr_is_rdonly(ptr))
+ return NULL;
+ return (void *)__bpf_dynptr_data(ptr, len);
+}
diff --git a/kernel/bpf/log.c b/kernel/bpf/log.c
index 8504944..3505f3e 100644
--- a/kernel/bpf/log.c
+++ b/kernel/bpf/log.c
@@ -10,6 +10,8 @@
#include <linux/bpf_verifier.h>
#include <linux/math64.h>
+#define verbose(env, fmt, args...) bpf_verifier_log_write(env, fmt, ##args)
+
static bool bpf_verifier_log_attr_valid(const struct bpf_verifier_log *log)
{
/* ubuf and len_total should both be specified (or not) together */
@@ -325,3 +327,481 @@ __printf(2, 3) void bpf_log(struct bpf_verifier_log *log,
va_end(args);
}
EXPORT_SYMBOL_GPL(bpf_log);
+
+static const struct bpf_line_info *
+find_linfo(const struct bpf_verifier_env *env, u32 insn_off)
+{
+ const struct bpf_line_info *linfo;
+ const struct bpf_prog *prog;
+ u32 i, nr_linfo;
+
+ prog = env->prog;
+ nr_linfo = prog->aux->nr_linfo;
+
+ if (!nr_linfo || insn_off >= prog->len)
+ return NULL;
+
+ linfo = prog->aux->linfo;
+ for (i = 1; i < nr_linfo; i++)
+ if (insn_off < linfo[i].insn_off)
+ break;
+
+ return &linfo[i - 1];
+}
+
+static const char *ltrim(const char *s)
+{
+ while (isspace(*s))
+ s++;
+
+ return s;
+}
+
+__printf(3, 4) void verbose_linfo(struct bpf_verifier_env *env,
+ u32 insn_off,
+ const char *prefix_fmt, ...)
+{
+ const struct bpf_line_info *linfo;
+
+ if (!bpf_verifier_log_needed(&env->log))
+ return;
+
+ linfo = find_linfo(env, insn_off);
+ if (!linfo || linfo == env->prev_linfo)
+ return;
+
+ if (prefix_fmt) {
+ va_list args;
+
+ va_start(args, prefix_fmt);
+ bpf_verifier_vlog(&env->log, prefix_fmt, args);
+ va_end(args);
+ }
+
+ verbose(env, "%s\n",
+ ltrim(btf_name_by_offset(env->prog->aux->btf,
+ linfo->line_off)));
+
+ env->prev_linfo = linfo;
+}
+
+static const char *btf_type_name(const struct btf *btf, u32 id)
+{
+ return btf_name_by_offset(btf, btf_type_by_id(btf, id)->name_off);
+}
+
+/* string representation of 'enum bpf_reg_type'
+ *
+ * Note that reg_type_str() can not appear more than once in a single verbose()
+ * statement.
+ */
+const char *reg_type_str(struct bpf_verifier_env *env, enum bpf_reg_type type)
+{
+ char postfix[16] = {0}, prefix[64] = {0};
+ static const char * const str[] = {
+ [NOT_INIT] = "?",
+ [SCALAR_VALUE] = "scalar",
+ [PTR_TO_CTX] = "ctx",
+ [CONST_PTR_TO_MAP] = "map_ptr",
+ [PTR_TO_MAP_VALUE] = "map_value",
+ [PTR_TO_STACK] = "fp",
+ [PTR_TO_PACKET] = "pkt",
+ [PTR_TO_PACKET_META] = "pkt_meta",
+ [PTR_TO_PACKET_END] = "pkt_end",
+ [PTR_TO_FLOW_KEYS] = "flow_keys",
+ [PTR_TO_SOCKET] = "sock",
+ [PTR_TO_SOCK_COMMON] = "sock_common",
+ [PTR_TO_TCP_SOCK] = "tcp_sock",
+ [PTR_TO_TP_BUFFER] = "tp_buffer",
+ [PTR_TO_XDP_SOCK] = "xdp_sock",
+ [PTR_TO_BTF_ID] = "ptr_",
+ [PTR_TO_MEM] = "mem",
+ [PTR_TO_BUF] = "buf",
+ [PTR_TO_FUNC] = "func",
+ [PTR_TO_MAP_KEY] = "map_key",
+ [CONST_PTR_TO_DYNPTR] = "dynptr_ptr",
+ };
+
+ if (type & PTR_MAYBE_NULL) {
+ if (base_type(type) == PTR_TO_BTF_ID)
+ strncpy(postfix, "or_null_", 16);
+ else
+ strncpy(postfix, "_or_null", 16);
+ }
+
+ snprintf(prefix, sizeof(prefix), "%s%s%s%s%s%s%s",
+ type & MEM_RDONLY ? "rdonly_" : "",
+ type & MEM_RINGBUF ? "ringbuf_" : "",
+ type & MEM_USER ? "user_" : "",
+ type & MEM_PERCPU ? "percpu_" : "",
+ type & MEM_RCU ? "rcu_" : "",
+ type & PTR_UNTRUSTED ? "untrusted_" : "",
+ type & PTR_TRUSTED ? "trusted_" : ""
+ );
+
+ snprintf(env->tmp_str_buf, TMP_STR_BUF_LEN, "%s%s%s",
+ prefix, str[base_type(type)], postfix);
+ return env->tmp_str_buf;
+}
+
+const char *dynptr_type_str(enum bpf_dynptr_type type)
+{
+ switch (type) {
+ case BPF_DYNPTR_TYPE_LOCAL:
+ return "local";
+ case BPF_DYNPTR_TYPE_RINGBUF:
+ return "ringbuf";
+ case BPF_DYNPTR_TYPE_SKB:
+ return "skb";
+ case BPF_DYNPTR_TYPE_XDP:
+ return "xdp";
+ case BPF_DYNPTR_TYPE_INVALID:
+ return "<invalid>";
+ default:
+ WARN_ONCE(1, "unknown dynptr type %d\n", type);
+ return "<unknown>";
+ }
+}
+
+const char *iter_type_str(const struct btf *btf, u32 btf_id)
+{
+ if (!btf || btf_id == 0)
+ return "<invalid>";
+
+ /* we already validated that type is valid and has conforming name */
+ return btf_type_name(btf, btf_id) + sizeof(ITER_PREFIX) - 1;
+}
+
+const char *iter_state_str(enum bpf_iter_state state)
+{
+ switch (state) {
+ case BPF_ITER_STATE_ACTIVE:
+ return "active";
+ case BPF_ITER_STATE_DRAINED:
+ return "drained";
+ case BPF_ITER_STATE_INVALID:
+ return "<invalid>";
+ default:
+ WARN_ONCE(1, "unknown iter state %d\n", state);
+ return "<unknown>";
+ }
+}
+
+static char slot_type_char[] = {
+ [STACK_INVALID] = '?',
+ [STACK_SPILL] = 'r',
+ [STACK_MISC] = 'm',
+ [STACK_ZERO] = '0',
+ [STACK_DYNPTR] = 'd',
+ [STACK_ITER] = 'i',
+};
+
+static void print_liveness(struct bpf_verifier_env *env,
+ enum bpf_reg_liveness live)
+{
+ if (live & (REG_LIVE_READ | REG_LIVE_WRITTEN | REG_LIVE_DONE))
+ verbose(env, "_");
+ if (live & REG_LIVE_READ)
+ verbose(env, "r");
+ if (live & REG_LIVE_WRITTEN)
+ verbose(env, "w");
+ if (live & REG_LIVE_DONE)
+ verbose(env, "D");
+}
+
+#define UNUM_MAX_DECIMAL U16_MAX
+#define SNUM_MAX_DECIMAL S16_MAX
+#define SNUM_MIN_DECIMAL S16_MIN
+
+static bool is_unum_decimal(u64 num)
+{
+ return num <= UNUM_MAX_DECIMAL;
+}
+
+static bool is_snum_decimal(s64 num)
+{
+ return num >= SNUM_MIN_DECIMAL && num <= SNUM_MAX_DECIMAL;
+}
+
+static void verbose_unum(struct bpf_verifier_env *env, u64 num)
+{
+ if (is_unum_decimal(num))
+ verbose(env, "%llu", num);
+ else
+ verbose(env, "%#llx", num);
+}
+
+static void verbose_snum(struct bpf_verifier_env *env, s64 num)
+{
+ if (is_snum_decimal(num))
+ verbose(env, "%lld", num);
+ else
+ verbose(env, "%#llx", num);
+}
+
+static void print_scalar_ranges(struct bpf_verifier_env *env,
+ const struct bpf_reg_state *reg,
+ const char **sep)
+{
+ /* For signed ranges, we want to unify 64-bit and 32-bit values in the
+ * output as much as possible, but there is a bit of a complication.
+ * If we choose to print values as decimals, this is natural to do,
+ * because negative 64-bit and 32-bit values >= -S32_MIN have the same
+ * representation due to sign extension. But if we choose to print
+ * them in hex format (see is_snum_decimal()), then sign extension is
+ * misleading.
+ * E.g., smin=-2 and smin32=-2 are exactly the same in decimal, but in
+ * hex they will be smin=0xfffffffffffffffe and smin32=0xfffffffe, two
+ * very different numbers.
+ * So we avoid sign extension if we choose to print values in hex.
+ */
+ struct {
+ const char *name;
+ u64 val;
+ bool omit;
+ } minmaxs[] = {
+ {"smin", reg->smin_value, reg->smin_value == S64_MIN},
+ {"smax", reg->smax_value, reg->smax_value == S64_MAX},
+ {"umin", reg->umin_value, reg->umin_value == 0},
+ {"umax", reg->umax_value, reg->umax_value == U64_MAX},
+ {"smin32",
+ is_snum_decimal((s64)reg->s32_min_value)
+ ? (s64)reg->s32_min_value
+ : (u32)reg->s32_min_value, reg->s32_min_value == S32_MIN},
+ {"smax32",
+ is_snum_decimal((s64)reg->s32_max_value)
+ ? (s64)reg->s32_max_value
+ : (u32)reg->s32_max_value, reg->s32_max_value == S32_MAX},
+ {"umin32", reg->u32_min_value, reg->u32_min_value == 0},
+ {"umax32", reg->u32_max_value, reg->u32_max_value == U32_MAX},
+ }, *m1, *m2, *mend = &minmaxs[ARRAY_SIZE(minmaxs)];
+ bool neg1, neg2;
+
+ for (m1 = &minmaxs[0]; m1 < mend; m1++) {
+ if (m1->omit)
+ continue;
+
+ neg1 = m1->name[0] == 's' && (s64)m1->val < 0;
+
+ verbose(env, "%s%s=", *sep, m1->name);
+ *sep = ",";
+
+ for (m2 = m1 + 2; m2 < mend; m2 += 2) {
+ if (m2->omit || m2->val != m1->val)
+ continue;
+ /* don't mix negatives with positives */
+ neg2 = m2->name[0] == 's' && (s64)m2->val < 0;
+ if (neg2 != neg1)
+ continue;
+ m2->omit = true;
+ verbose(env, "%s=", m2->name);
+ }
+
+ if (m1->name[0] == 's')
+ verbose_snum(env, m1->val);
+ else
+ verbose_unum(env, m1->val);
+ }
+}
+
+static bool type_is_map_ptr(enum bpf_reg_type t) {
+ switch (base_type(t)) {
+ case CONST_PTR_TO_MAP:
+ case PTR_TO_MAP_KEY:
+ case PTR_TO_MAP_VALUE:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static void print_reg_state(struct bpf_verifier_env *env,
+ const struct bpf_func_state *state,
+ const struct bpf_reg_state *reg)
+{
+ enum bpf_reg_type t;
+ const char *sep = "";
+
+ t = reg->type;
+ if (t == SCALAR_VALUE && reg->precise)
+ verbose(env, "P");
+ if (t == SCALAR_VALUE && tnum_is_const(reg->var_off)) {
+ /* reg->off should be 0 for SCALAR_VALUE */
+ verbose_snum(env, reg->var_off.value + reg->off);
+ return;
+ }
+/*
+ * _a stands for append, was shortened to avoid multiline statements below.
+ * This macro is used to output a comma separated list of attributes.
+ */
+#define verbose_a(fmt, ...) ({ verbose(env, "%s" fmt, sep, ##__VA_ARGS__); sep = ","; })
+
+ verbose(env, "%s", reg_type_str(env, t));
+ if (t == PTR_TO_STACK) {
+ if (state->frameno != reg->frameno)
+ verbose(env, "[%d]", reg->frameno);
+ if (tnum_is_const(reg->var_off)) {
+ verbose_snum(env, reg->var_off.value + reg->off);
+ return;
+ }
+ }
+ if (base_type(t) == PTR_TO_BTF_ID)
+ verbose(env, "%s", btf_type_name(reg->btf, reg->btf_id));
+ verbose(env, "(");
+ if (reg->id)
+ verbose_a("id=%d", reg->id);
+ if (reg->ref_obj_id)
+ verbose_a("ref_obj_id=%d", reg->ref_obj_id);
+ if (type_is_non_owning_ref(reg->type))
+ verbose_a("%s", "non_own_ref");
+ if (type_is_map_ptr(t)) {
+ if (reg->map_ptr->name[0])
+ verbose_a("map=%s", reg->map_ptr->name);
+ verbose_a("ks=%d,vs=%d",
+ reg->map_ptr->key_size,
+ reg->map_ptr->value_size);
+ }
+ if (t != SCALAR_VALUE && reg->off) {
+ verbose_a("off=");
+ verbose_snum(env, reg->off);
+ }
+ if (type_is_pkt_pointer(t)) {
+ verbose_a("r=");
+ verbose_unum(env, reg->range);
+ }
+ if (tnum_is_const(reg->var_off)) {
+ /* a pointer register with fixed offset */
+ if (reg->var_off.value) {
+ verbose_a("imm=");
+ verbose_snum(env, reg->var_off.value);
+ }
+ } else {
+ print_scalar_ranges(env, reg, &sep);
+ if (!tnum_is_unknown(reg->var_off)) {
+ char tn_buf[48];
+
+ tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
+ verbose_a("var_off=%s", tn_buf);
+ }
+ }
+ verbose(env, ")");
+
+#undef verbose_a
+}
+
+void print_verifier_state(struct bpf_verifier_env *env, const struct bpf_func_state *state,
+ bool print_all)
+{
+ const struct bpf_reg_state *reg;
+ int i;
+
+ if (state->frameno)
+ verbose(env, " frame%d:", state->frameno);
+ for (i = 0; i < MAX_BPF_REG; i++) {
+ reg = &state->regs[i];
+ if (reg->type == NOT_INIT)
+ continue;
+ if (!print_all && !reg_scratched(env, i))
+ continue;
+ verbose(env, " R%d", i);
+ print_liveness(env, reg->live);
+ verbose(env, "=");
+ print_reg_state(env, state, reg);
+ }
+ for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
+ char types_buf[BPF_REG_SIZE + 1];
+ bool valid = false;
+ u8 slot_type;
+ int j;
+
+ if (!print_all && !stack_slot_scratched(env, i))
+ continue;
+
+ for (j = 0; j < BPF_REG_SIZE; j++) {
+ slot_type = state->stack[i].slot_type[j];
+ if (slot_type != STACK_INVALID)
+ valid = true;
+ types_buf[j] = slot_type_char[slot_type];
+ }
+ types_buf[BPF_REG_SIZE] = 0;
+ if (!valid)
+ continue;
+
+ reg = &state->stack[i].spilled_ptr;
+ switch (state->stack[i].slot_type[BPF_REG_SIZE - 1]) {
+ case STACK_SPILL:
+ /* print MISC/ZERO/INVALID slots above subreg spill */
+ for (j = 0; j < BPF_REG_SIZE; j++)
+ if (state->stack[i].slot_type[j] == STACK_SPILL)
+ break;
+ types_buf[j] = '\0';
+
+ verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE);
+ print_liveness(env, reg->live);
+ verbose(env, "=%s", types_buf);
+ print_reg_state(env, state, reg);
+ break;
+ case STACK_DYNPTR:
+ /* skip to main dynptr slot */
+ i += BPF_DYNPTR_NR_SLOTS - 1;
+ reg = &state->stack[i].spilled_ptr;
+
+ verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE);
+ print_liveness(env, reg->live);
+ verbose(env, "=dynptr_%s", dynptr_type_str(reg->dynptr.type));
+ if (reg->ref_obj_id)
+ verbose(env, "(ref_id=%d)", reg->ref_obj_id);
+ break;
+ case STACK_ITER:
+ /* only main slot has ref_obj_id set; skip others */
+ if (!reg->ref_obj_id)
+ continue;
+
+ verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE);
+ print_liveness(env, reg->live);
+ verbose(env, "=iter_%s(ref_id=%d,state=%s,depth=%u)",
+ iter_type_str(reg->iter.btf, reg->iter.btf_id),
+ reg->ref_obj_id, iter_state_str(reg->iter.state),
+ reg->iter.depth);
+ break;
+ case STACK_MISC:
+ case STACK_ZERO:
+ default:
+ verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE);
+ print_liveness(env, reg->live);
+ verbose(env, "=%s", types_buf);
+ break;
+ }
+ }
+ if (state->acquired_refs && state->refs[0].id) {
+ verbose(env, " refs=%d", state->refs[0].id);
+ for (i = 1; i < state->acquired_refs; i++)
+ if (state->refs[i].id)
+ verbose(env, ",%d", state->refs[i].id);
+ }
+ if (state->in_callback_fn)
+ verbose(env, " cb");
+ if (state->in_async_callback_fn)
+ verbose(env, " async_cb");
+ verbose(env, "\n");
+ if (!print_all)
+ mark_verifier_state_clean(env);
+}
+
+static inline u32 vlog_alignment(u32 pos)
+{
+ return round_up(max(pos + BPF_LOG_MIN_ALIGNMENT / 2, BPF_LOG_ALIGNMENT),
+ BPF_LOG_MIN_ALIGNMENT) - pos - 1;
+}
+
+void print_insn_state(struct bpf_verifier_env *env, const struct bpf_func_state *state)
+{
+ if (env->prev_log_pos && env->prev_log_pos == env->log.end_pos) {
+ /* remove new line character */
+ bpf_vlog_reset(&env->log, env->prev_log_pos - 1);
+ verbose(env, "%*c;", vlog_alignment(env->prev_insn_print_pos), ' ');
+ } else {
+ verbose(env, "%d:", env->insn_idx);
+ }
+ print_verifier_state(env, state, false);
+}
diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c
index 17c7e77..b32be68 100644
--- a/kernel/bpf/lpm_trie.c
+++ b/kernel/bpf/lpm_trie.c
@@ -231,6 +231,9 @@ static void *trie_lookup_elem(struct bpf_map *map, void *_key)
struct lpm_trie_node *node, *found = NULL;
struct bpf_lpm_trie_key *key = _key;
+ if (key->prefixlen > trie->max_prefixlen)
+ return NULL;
+
/* Start walking the trie from the root node ... */
for (node = rcu_dereference_check(trie->root, rcu_read_lock_bh_held());
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index d6b2774..dff7ba5 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -388,6 +388,7 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
{
u32 trace_nr, copy_len, elem_size, num_elem, max_depth;
bool user_build_id = flags & BPF_F_USER_BUILD_ID;
+ bool crosstask = task && task != current;
u32 skip = flags & BPF_F_SKIP_FIELD_MASK;
bool user = flags & BPF_F_USER_STACK;
struct perf_callchain_entry *trace;
@@ -410,6 +411,14 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
if (task && user && !user_mode(regs))
goto err_fault;
+ /* get_perf_callchain does not support crosstask user stack walking
+ * but returns an empty stack instead of NULL.
+ */
+ if (crosstask && user) {
+ err = -EOPNOTSUPP;
+ goto clear;
+ }
+
num_elem = size / elem_size;
max_depth = num_elem + skip;
if (sysctl_perf_event_max_stack < max_depth)
@@ -421,7 +430,7 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
trace = get_callchain_entry_for_task(task, max_depth);
else
trace = get_perf_callchain(regs, 0, kernel, user, max_depth,
- false, false);
+ crosstask, false);
if (unlikely(!trace))
goto err_fault;
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 0ed286b..5e43ddd 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2573,7 +2573,8 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
BPF_F_SLEEPABLE |
BPF_F_TEST_RND_HI32 |
BPF_F_XDP_HAS_FRAGS |
- BPF_F_XDP_DEV_BOUND_ONLY))
+ BPF_F_XDP_DEV_BOUND_ONLY |
+ BPF_F_TEST_REG_INVARIANTS))
return -EINVAL;
if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) &&
diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c
index 26082b97..e5c3500 100644
--- a/kernel/bpf/task_iter.c
+++ b/kernel/bpf/task_iter.c
@@ -70,15 +70,13 @@ static struct task_struct *task_group_seq_get_next(struct bpf_iter_seq_task_comm
return NULL;
retry:
- task = next_thread(task);
+ task = __next_thread(task);
+ if (!task)
+ return NULL;
next_tid = __task_pid_nr_ns(task, PIDTYPE_PID, common->ns);
- if (!next_tid || next_tid == common->pid) {
- /* Run out of tasks of a process. The tasks of a
- * thread_group are linked as circular linked list.
- */
- return NULL;
- }
+ if (!next_tid)
+ goto retry;
if (skip_if_dup_files && task->files == task->group_leader->files)
goto retry;
@@ -980,7 +978,6 @@ __bpf_kfunc int bpf_iter_task_new(struct bpf_iter_task *it,
BUILD_BUG_ON(__alignof__(struct bpf_iter_task_kern) !=
__alignof__(struct bpf_iter_task));
- kit->task = kit->pos = NULL;
switch (flags) {
case BPF_TASK_ITER_ALL_THREADS:
case BPF_TASK_ITER_ALL_PROCS:
@@ -1017,20 +1014,16 @@ __bpf_kfunc struct task_struct *bpf_iter_task_next(struct bpf_iter_task *it)
if (flags == BPF_TASK_ITER_ALL_PROCS)
goto get_next_task;
- kit->pos = next_thread(kit->pos);
- if (kit->pos == kit->task) {
- if (flags == BPF_TASK_ITER_PROC_THREADS) {
- kit->pos = NULL;
- return pos;
- }
- } else
+ kit->pos = __next_thread(kit->pos);
+ if (kit->pos || flags == BPF_TASK_ITER_PROC_THREADS)
return pos;
get_next_task:
- kit->pos = next_task(kit->pos);
- kit->task = kit->pos;
- if (kit->pos == &init_task)
+ kit->task = next_task(kit->task);
+ if (kit->task == &init_task)
kit->pos = NULL;
+ else
+ kit->pos = kit->task;
return pos;
}
diff --git a/kernel/bpf/tnum.c b/kernel/bpf/tnum.c
index 3d7127f..f4c91c9 100644
--- a/kernel/bpf/tnum.c
+++ b/kernel/bpf/tnum.c
@@ -208,7 +208,12 @@ struct tnum tnum_clear_subreg(struct tnum a)
return tnum_lshift(tnum_rshift(a, 32), 32);
}
+struct tnum tnum_with_subreg(struct tnum reg, struct tnum subreg)
+{
+ return tnum_or(tnum_clear_subreg(reg), tnum_subreg(subreg));
+}
+
struct tnum tnum_const_subreg(struct tnum a, u32 value)
{
- return tnum_or(tnum_clear_subreg(a), tnum_const(value));
+ return tnum_with_subreg(a, tnum_const(value));
}
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index af2819d5..405da1f 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -342,27 +342,6 @@ struct btf *btf_vmlinux;
static DEFINE_MUTEX(bpf_verifier_lock);
static DEFINE_MUTEX(bpf_percpu_ma_lock);
-static const struct bpf_line_info *
-find_linfo(const struct bpf_verifier_env *env, u32 insn_off)
-{
- const struct bpf_line_info *linfo;
- const struct bpf_prog *prog;
- u32 i, nr_linfo;
-
- prog = env->prog;
- nr_linfo = prog->aux->nr_linfo;
-
- if (!nr_linfo || insn_off >= prog->len)
- return NULL;
-
- linfo = prog->aux->linfo;
- for (i = 1; i < nr_linfo; i++)
- if (insn_off < linfo[i].insn_off)
- break;
-
- return &linfo[i - 1];
-}
-
__printf(2, 3) static void verbose(void *private_data, const char *fmt, ...)
{
struct bpf_verifier_env *env = private_data;
@@ -376,42 +355,6 @@ __printf(2, 3) static void verbose(void *private_data, const char *fmt, ...)
va_end(args);
}
-static const char *ltrim(const char *s)
-{
- while (isspace(*s))
- s++;
-
- return s;
-}
-
-__printf(3, 4) static void verbose_linfo(struct bpf_verifier_env *env,
- u32 insn_off,
- const char *prefix_fmt, ...)
-{
- const struct bpf_line_info *linfo;
-
- if (!bpf_verifier_log_needed(&env->log))
- return;
-
- linfo = find_linfo(env, insn_off);
- if (!linfo || linfo == env->prev_linfo)
- return;
-
- if (prefix_fmt) {
- va_list args;
-
- va_start(args, prefix_fmt);
- bpf_verifier_vlog(&env->log, prefix_fmt, args);
- va_end(args);
- }
-
- verbose(env, "%s\n",
- ltrim(btf_name_by_offset(env->prog->aux->btf,
- linfo->line_off)));
-
- env->prev_linfo = linfo;
-}
-
static void verbose_invalid_scalar(struct bpf_verifier_env *env,
struct bpf_reg_state *reg,
struct tnum *range, const char *ctx,
@@ -430,21 +373,6 @@ static void verbose_invalid_scalar(struct bpf_verifier_env *env,
verbose(env, " should have been in %s\n", tn_buf);
}
-static bool type_is_pkt_pointer(enum bpf_reg_type type)
-{
- type = base_type(type);
- return type == PTR_TO_PACKET ||
- type == PTR_TO_PACKET_META;
-}
-
-static bool type_is_sk_pointer(enum bpf_reg_type type)
-{
- return type == PTR_TO_SOCKET ||
- type == PTR_TO_SOCK_COMMON ||
- type == PTR_TO_TCP_SOCK ||
- type == PTR_TO_XDP_SOCK;
-}
-
static bool type_may_be_null(u32 type)
{
return type & PTR_MAYBE_NULL;
@@ -468,16 +396,6 @@ static bool reg_not_null(const struct bpf_reg_state *reg)
type == PTR_TO_MEM;
}
-static bool type_is_ptr_alloc_obj(u32 type)
-{
- return base_type(type) == PTR_TO_BTF_ID && type_flag(type) & MEM_ALLOC;
-}
-
-static bool type_is_non_owning_ref(u32 type)
-{
- return type_is_ptr_alloc_obj(type) && type_flag(type) & NON_OWN_REF;
-}
-
static struct btf_record *reg_btf_record(const struct bpf_reg_state *reg)
{
struct btf_record *rec = NULL;
@@ -605,83 +523,6 @@ static bool is_cmpxchg_insn(const struct bpf_insn *insn)
insn->imm == BPF_CMPXCHG;
}
-/* string representation of 'enum bpf_reg_type'
- *
- * Note that reg_type_str() can not appear more than once in a single verbose()
- * statement.
- */
-static const char *reg_type_str(struct bpf_verifier_env *env,
- enum bpf_reg_type type)
-{
- char postfix[16] = {0}, prefix[64] = {0};
- static const char * const str[] = {
- [NOT_INIT] = "?",
- [SCALAR_VALUE] = "scalar",
- [PTR_TO_CTX] = "ctx",
- [CONST_PTR_TO_MAP] = "map_ptr",
- [PTR_TO_MAP_VALUE] = "map_value",
- [PTR_TO_STACK] = "fp",
- [PTR_TO_PACKET] = "pkt",
- [PTR_TO_PACKET_META] = "pkt_meta",
- [PTR_TO_PACKET_END] = "pkt_end",
- [PTR_TO_FLOW_KEYS] = "flow_keys",
- [PTR_TO_SOCKET] = "sock",
- [PTR_TO_SOCK_COMMON] = "sock_common",
- [PTR_TO_TCP_SOCK] = "tcp_sock",
- [PTR_TO_TP_BUFFER] = "tp_buffer",
- [PTR_TO_XDP_SOCK] = "xdp_sock",
- [PTR_TO_BTF_ID] = "ptr_",
- [PTR_TO_MEM] = "mem",
- [PTR_TO_BUF] = "buf",
- [PTR_TO_FUNC] = "func",
- [PTR_TO_MAP_KEY] = "map_key",
- [CONST_PTR_TO_DYNPTR] = "dynptr_ptr",
- };
-
- if (type & PTR_MAYBE_NULL) {
- if (base_type(type) == PTR_TO_BTF_ID)
- strncpy(postfix, "or_null_", 16);
- else
- strncpy(postfix, "_or_null", 16);
- }
-
- snprintf(prefix, sizeof(prefix), "%s%s%s%s%s%s%s",
- type & MEM_RDONLY ? "rdonly_" : "",
- type & MEM_RINGBUF ? "ringbuf_" : "",
- type & MEM_USER ? "user_" : "",
- type & MEM_PERCPU ? "percpu_" : "",
- type & MEM_RCU ? "rcu_" : "",
- type & PTR_UNTRUSTED ? "untrusted_" : "",
- type & PTR_TRUSTED ? "trusted_" : ""
- );
-
- snprintf(env->tmp_str_buf, TMP_STR_BUF_LEN, "%s%s%s",
- prefix, str[base_type(type)], postfix);
- return env->tmp_str_buf;
-}
-
-static char slot_type_char[] = {
- [STACK_INVALID] = '?',
- [STACK_SPILL] = 'r',
- [STACK_MISC] = 'm',
- [STACK_ZERO] = '0',
- [STACK_DYNPTR] = 'd',
- [STACK_ITER] = 'i',
-};
-
-static void print_liveness(struct bpf_verifier_env *env,
- enum bpf_reg_liveness live)
-{
- if (live & (REG_LIVE_READ | REG_LIVE_WRITTEN | REG_LIVE_DONE))
- verbose(env, "_");
- if (live & REG_LIVE_READ)
- verbose(env, "r");
- if (live & REG_LIVE_WRITTEN)
- verbose(env, "w");
- if (live & REG_LIVE_DONE)
- verbose(env, "D");
-}
-
static int __get_spi(s32 off)
{
return (-off - 1) / BPF_REG_SIZE;
@@ -751,87 +592,6 @@ static const char *btf_type_name(const struct btf *btf, u32 id)
return btf_name_by_offset(btf, btf_type_by_id(btf, id)->name_off);
}
-static const char *dynptr_type_str(enum bpf_dynptr_type type)
-{
- switch (type) {
- case BPF_DYNPTR_TYPE_LOCAL:
- return "local";
- case BPF_DYNPTR_TYPE_RINGBUF:
- return "ringbuf";
- case BPF_DYNPTR_TYPE_SKB:
- return "skb";
- case BPF_DYNPTR_TYPE_XDP:
- return "xdp";
- case BPF_DYNPTR_TYPE_INVALID:
- return "<invalid>";
- default:
- WARN_ONCE(1, "unknown dynptr type %d\n", type);
- return "<unknown>";
- }
-}
-
-static const char *iter_type_str(const struct btf *btf, u32 btf_id)
-{
- if (!btf || btf_id == 0)
- return "<invalid>";
-
- /* we already validated that type is valid and has conforming name */
- return btf_type_name(btf, btf_id) + sizeof(ITER_PREFIX) - 1;
-}
-
-static const char *iter_state_str(enum bpf_iter_state state)
-{
- switch (state) {
- case BPF_ITER_STATE_ACTIVE:
- return "active";
- case BPF_ITER_STATE_DRAINED:
- return "drained";
- case BPF_ITER_STATE_INVALID:
- return "<invalid>";
- default:
- WARN_ONCE(1, "unknown iter state %d\n", state);
- return "<unknown>";
- }
-}
-
-static void mark_reg_scratched(struct bpf_verifier_env *env, u32 regno)
-{
- env->scratched_regs |= 1U << regno;
-}
-
-static void mark_stack_slot_scratched(struct bpf_verifier_env *env, u32 spi)
-{
- env->scratched_stack_slots |= 1ULL << spi;
-}
-
-static bool reg_scratched(const struct bpf_verifier_env *env, u32 regno)
-{
- return (env->scratched_regs >> regno) & 1;
-}
-
-static bool stack_slot_scratched(const struct bpf_verifier_env *env, u64 regno)
-{
- return (env->scratched_stack_slots >> regno) & 1;
-}
-
-static bool verifier_state_scratched(const struct bpf_verifier_env *env)
-{
- return env->scratched_regs || env->scratched_stack_slots;
-}
-
-static void mark_verifier_state_clean(struct bpf_verifier_env *env)
-{
- env->scratched_regs = 0U;
- env->scratched_stack_slots = 0ULL;
-}
-
-/* Used for printing the entire verifier state. */
-static void mark_verifier_state_scratched(struct bpf_verifier_env *env)
-{
- env->scratched_regs = ~0U;
- env->scratched_stack_slots = ~0ULL;
-}
-
static enum bpf_dynptr_type arg_to_dynptr_type(enum bpf_arg_type arg_type)
{
switch (arg_type & DYNPTR_TYPE_FLAG_MASK) {
@@ -1371,226 +1131,6 @@ static void scrub_spilled_slot(u8 *stype)
*stype = STACK_MISC;
}
-static void print_scalar_ranges(struct bpf_verifier_env *env,
- const struct bpf_reg_state *reg,
- const char **sep)
-{
- struct {
- const char *name;
- u64 val;
- bool omit;
- } minmaxs[] = {
- {"smin", reg->smin_value, reg->smin_value == S64_MIN},
- {"smax", reg->smax_value, reg->smax_value == S64_MAX},
- {"umin", reg->umin_value, reg->umin_value == 0},
- {"umax", reg->umax_value, reg->umax_value == U64_MAX},
- {"smin32", (s64)reg->s32_min_value, reg->s32_min_value == S32_MIN},
- {"smax32", (s64)reg->s32_max_value, reg->s32_max_value == S32_MAX},
- {"umin32", reg->u32_min_value, reg->u32_min_value == 0},
- {"umax32", reg->u32_max_value, reg->u32_max_value == U32_MAX},
- }, *m1, *m2, *mend = &minmaxs[ARRAY_SIZE(minmaxs)];
- bool neg1, neg2;
-
- for (m1 = &minmaxs[0]; m1 < mend; m1++) {
- if (m1->omit)
- continue;
-
- neg1 = m1->name[0] == 's' && (s64)m1->val < 0;
-
- verbose(env, "%s%s=", *sep, m1->name);
- *sep = ",";
-
- for (m2 = m1 + 2; m2 < mend; m2 += 2) {
- if (m2->omit || m2->val != m1->val)
- continue;
- /* don't mix negatives with positives */
- neg2 = m2->name[0] == 's' && (s64)m2->val < 0;
- if (neg2 != neg1)
- continue;
- m2->omit = true;
- verbose(env, "%s=", m2->name);
- }
-
- verbose(env, m1->name[0] == 's' ? "%lld" : "%llu", m1->val);
- }
-}
-
-static void print_verifier_state(struct bpf_verifier_env *env,
- const struct bpf_func_state *state,
- bool print_all)
-{
- const struct bpf_reg_state *reg;
- enum bpf_reg_type t;
- int i;
-
- if (state->frameno)
- verbose(env, " frame%d:", state->frameno);
- for (i = 0; i < MAX_BPF_REG; i++) {
- reg = &state->regs[i];
- t = reg->type;
- if (t == NOT_INIT)
- continue;
- if (!print_all && !reg_scratched(env, i))
- continue;
- verbose(env, " R%d", i);
- print_liveness(env, reg->live);
- verbose(env, "=");
- if (t == SCALAR_VALUE && reg->precise)
- verbose(env, "P");
- if ((t == SCALAR_VALUE || t == PTR_TO_STACK) &&
- tnum_is_const(reg->var_off)) {
- /* reg->off should be 0 for SCALAR_VALUE */
- verbose(env, "%s", t == SCALAR_VALUE ? "" : reg_type_str(env, t));
- verbose(env, "%lld", reg->var_off.value + reg->off);
- } else {
- const char *sep = "";
-
- verbose(env, "%s", reg_type_str(env, t));
- if (base_type(t) == PTR_TO_BTF_ID)
- verbose(env, "%s", btf_type_name(reg->btf, reg->btf_id));
- verbose(env, "(");
-/*
- * _a stands for append, was shortened to avoid multiline statements below.
- * This macro is used to output a comma separated list of attributes.
- */
-#define verbose_a(fmt, ...) ({ verbose(env, "%s" fmt, sep, __VA_ARGS__); sep = ","; })
-
- if (reg->id)
- verbose_a("id=%d", reg->id);
- if (reg->ref_obj_id)
- verbose_a("ref_obj_id=%d", reg->ref_obj_id);
- if (type_is_non_owning_ref(reg->type))
- verbose_a("%s", "non_own_ref");
- if (t != SCALAR_VALUE)
- verbose_a("off=%d", reg->off);
- if (type_is_pkt_pointer(t))
- verbose_a("r=%d", reg->range);
- else if (base_type(t) == CONST_PTR_TO_MAP ||
- base_type(t) == PTR_TO_MAP_KEY ||
- base_type(t) == PTR_TO_MAP_VALUE)
- verbose_a("ks=%d,vs=%d",
- reg->map_ptr->key_size,
- reg->map_ptr->value_size);
- if (tnum_is_const(reg->var_off)) {
- /* Typically an immediate SCALAR_VALUE, but
- * could be a pointer whose offset is too big
- * for reg->off
- */
- verbose_a("imm=%llx", reg->var_off.value);
- } else {
- print_scalar_ranges(env, reg, &sep);
- if (!tnum_is_unknown(reg->var_off)) {
- char tn_buf[48];
-
- tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
- verbose_a("var_off=%s", tn_buf);
- }
- }
-#undef verbose_a
-
- verbose(env, ")");
- }
- }
- for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
- char types_buf[BPF_REG_SIZE + 1];
- bool valid = false;
- int j;
-
- for (j = 0; j < BPF_REG_SIZE; j++) {
- if (state->stack[i].slot_type[j] != STACK_INVALID)
- valid = true;
- types_buf[j] = slot_type_char[state->stack[i].slot_type[j]];
- }
- types_buf[BPF_REG_SIZE] = 0;
- if (!valid)
- continue;
- if (!print_all && !stack_slot_scratched(env, i))
- continue;
- switch (state->stack[i].slot_type[BPF_REG_SIZE - 1]) {
- case STACK_SPILL:
- reg = &state->stack[i].spilled_ptr;
- t = reg->type;
-
- verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE);
- print_liveness(env, reg->live);
- verbose(env, "=%s", t == SCALAR_VALUE ? "" : reg_type_str(env, t));
- if (t == SCALAR_VALUE && reg->precise)
- verbose(env, "P");
- if (t == SCALAR_VALUE && tnum_is_const(reg->var_off))
- verbose(env, "%lld", reg->var_off.value + reg->off);
- break;
- case STACK_DYNPTR:
- i += BPF_DYNPTR_NR_SLOTS - 1;
- reg = &state->stack[i].spilled_ptr;
-
- verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE);
- print_liveness(env, reg->live);
- verbose(env, "=dynptr_%s", dynptr_type_str(reg->dynptr.type));
- if (reg->ref_obj_id)
- verbose(env, "(ref_id=%d)", reg->ref_obj_id);
- break;
- case STACK_ITER:
- /* only main slot has ref_obj_id set; skip others */
- reg = &state->stack[i].spilled_ptr;
- if (!reg->ref_obj_id)
- continue;
-
- verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE);
- print_liveness(env, reg->live);
- verbose(env, "=iter_%s(ref_id=%d,state=%s,depth=%u)",
- iter_type_str(reg->iter.btf, reg->iter.btf_id),
- reg->ref_obj_id, iter_state_str(reg->iter.state),
- reg->iter.depth);
- break;
- case STACK_MISC:
- case STACK_ZERO:
- default:
- reg = &state->stack[i].spilled_ptr;
-
- for (j = 0; j < BPF_REG_SIZE; j++)
- types_buf[j] = slot_type_char[state->stack[i].slot_type[j]];
- types_buf[BPF_REG_SIZE] = 0;
-
- verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE);
- print_liveness(env, reg->live);
- verbose(env, "=%s", types_buf);
- break;
- }
- }
- if (state->acquired_refs && state->refs[0].id) {
- verbose(env, " refs=%d", state->refs[0].id);
- for (i = 1; i < state->acquired_refs; i++)
- if (state->refs[i].id)
- verbose(env, ",%d", state->refs[i].id);
- }
- if (state->in_callback_fn)
- verbose(env, " cb");
- if (state->in_async_callback_fn)
- verbose(env, " async_cb");
- verbose(env, "\n");
- if (!print_all)
- mark_verifier_state_clean(env);
-}
-
-static inline u32 vlog_alignment(u32 pos)
-{
- return round_up(max(pos + BPF_LOG_MIN_ALIGNMENT / 2, BPF_LOG_ALIGNMENT),
- BPF_LOG_MIN_ALIGNMENT) - pos - 1;
-}
-
-static void print_insn_state(struct bpf_verifier_env *env,
- const struct bpf_func_state *state)
-{
- if (env->prev_log_pos && env->prev_log_pos == env->log.end_pos) {
- /* remove new line character */
- bpf_vlog_reset(&env->log, env->prev_log_pos - 1);
- verbose(env, "%*c;", vlog_alignment(env->prev_insn_print_pos), ' ');
- } else {
- verbose(env, "%d:", env->insn_idx);
- }
- print_verifier_state(env, state, false);
-}
-
/* copy array src of length n * size bytes to dst. dst is reallocated if it's too
* small to hold src. This is different from krealloc since we don't want to preserve
* the contents of dst.
@@ -2341,69 +1881,214 @@ static void __update_reg_bounds(struct bpf_reg_state *reg)
/* Uses signed min/max values to inform unsigned, and vice-versa */
static void __reg32_deduce_bounds(struct bpf_reg_state *reg)
{
- /* Learn sign from signed bounds.
- * If we cannot cross the sign boundary, then signed and unsigned bounds
+ /* If upper 32 bits of u64/s64 range don't change, we can use lower 32
+ * bits to improve our u32/s32 boundaries.
+ *
+ * E.g., the case where we have upper 32 bits as zero ([10, 20] in
+ * u64) is pretty trivial, it's obvious that in u32 we'll also have
+ * [10, 20] range. But this property holds for any 64-bit range as
+ * long as upper 32 bits in that entire range of values stay the same.
+ *
+ * E.g., u64 range [0x10000000A, 0x10000000F] ([4294967306, 4294967311]
+ * in decimal) has the same upper 32 bits throughout all the values in
+ * that range. As such, lower 32 bits form a valid [0xA, 0xF] ([10, 15])
+ * range.
+ *
+ * Note also, that [0xA, 0xF] is a valid range both in u32 and in s32,
+ * following the rules outlined below about u64/s64 correspondence
+ * (which equally applies to u32 vs s32 correspondence). In general it
+ * depends on actual hexadecimal values of 32-bit range. They can form
+ * only valid u32, or only valid s32 ranges in some cases.
+ *
+ * So we use all these insights to derive bounds for subregisters here.
+ */
+ if ((reg->umin_value >> 32) == (reg->umax_value >> 32)) {
+ /* u64 to u32 casting preserves validity of low 32 bits as
+ * a range, if upper 32 bits are the same
+ */
+ reg->u32_min_value = max_t(u32, reg->u32_min_value, (u32)reg->umin_value);
+ reg->u32_max_value = min_t(u32, reg->u32_max_value, (u32)reg->umax_value);
+
+ if ((s32)reg->umin_value <= (s32)reg->umax_value) {
+ reg->s32_min_value = max_t(s32, reg->s32_min_value, (s32)reg->umin_value);
+ reg->s32_max_value = min_t(s32, reg->s32_max_value, (s32)reg->umax_value);
+ }
+ }
+ if ((reg->smin_value >> 32) == (reg->smax_value >> 32)) {
+ /* low 32 bits should form a proper u32 range */
+ if ((u32)reg->smin_value <= (u32)reg->smax_value) {
+ reg->u32_min_value = max_t(u32, reg->u32_min_value, (u32)reg->smin_value);
+ reg->u32_max_value = min_t(u32, reg->u32_max_value, (u32)reg->smax_value);
+ }
+ /* low 32 bits should form a proper s32 range */
+ if ((s32)reg->smin_value <= (s32)reg->smax_value) {
+ reg->s32_min_value = max_t(s32, reg->s32_min_value, (s32)reg->smin_value);
+ reg->s32_max_value = min_t(s32, reg->s32_max_value, (s32)reg->smax_value);
+ }
+ }
+ /* Special case where upper bits form a small sequence of two
+ * sequential numbers (in 32-bit unsigned space, so 0xffffffff to
+ * 0x00000000 is also valid), while lower bits form a proper s32 range
+ * going from negative numbers to positive numbers. E.g., let's say we
+ * have s64 range [-1, 1] ([0xffffffffffffffff, 0x0000000000000001]).
+ * Possible s64 values are {-1, 0, 1} ({0xffffffffffffffff,
+ * 0x0000000000000000, 0x00000000000001}). Ignoring upper 32 bits,
+ * we still get a valid s32 range [-1, 1] ([0xffffffff, 0x00000001]).
+ * Note that it doesn't have to be 0xffffffff going to 0x00000000 in
+ * upper 32 bits. As a random example, s64 range
+ * [0xfffffff0fffffff0; 0xfffffff100000010], forms a valid s32 range
+ * [-16, 16] ([0xfffffff0; 0x00000010]) in its 32 bit subregister.
+ */
+ if ((u32)(reg->umin_value >> 32) + 1 == (u32)(reg->umax_value >> 32) &&
+ (s32)reg->umin_value < 0 && (s32)reg->umax_value >= 0) {
+ reg->s32_min_value = max_t(s32, reg->s32_min_value, (s32)reg->umin_value);
+ reg->s32_max_value = min_t(s32, reg->s32_max_value, (s32)reg->umax_value);
+ }
+ if ((u32)(reg->smin_value >> 32) + 1 == (u32)(reg->smax_value >> 32) &&
+ (s32)reg->smin_value < 0 && (s32)reg->smax_value >= 0) {
+ reg->s32_min_value = max_t(s32, reg->s32_min_value, (s32)reg->smin_value);
+ reg->s32_max_value = min_t(s32, reg->s32_max_value, (s32)reg->smax_value);
+ }
+ /* if u32 range forms a valid s32 range (due to matching sign bit),
+ * try to learn from that
+ */
+ if ((s32)reg->u32_min_value <= (s32)reg->u32_max_value) {
+ reg->s32_min_value = max_t(s32, reg->s32_min_value, reg->u32_min_value);
+ reg->s32_max_value = min_t(s32, reg->s32_max_value, reg->u32_max_value);
+ }
+ /* If we cannot cross the sign boundary, then signed and unsigned bounds
* are the same, so combine. This works even in the negative case, e.g.
* -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff.
*/
- if (reg->s32_min_value >= 0 || reg->s32_max_value < 0) {
- reg->s32_min_value = reg->u32_min_value =
- max_t(u32, reg->s32_min_value, reg->u32_min_value);
- reg->s32_max_value = reg->u32_max_value =
- min_t(u32, reg->s32_max_value, reg->u32_max_value);
- return;
- }
- /* Learn sign from unsigned bounds. Signed bounds cross the sign
- * boundary, so we must be careful.
- */
- if ((s32)reg->u32_max_value >= 0) {
- /* Positive. We can't learn anything from the smin, but smax
- * is positive, hence safe.
- */
- reg->s32_min_value = reg->u32_min_value;
- reg->s32_max_value = reg->u32_max_value =
- min_t(u32, reg->s32_max_value, reg->u32_max_value);
- } else if ((s32)reg->u32_min_value < 0) {
- /* Negative. We can't learn anything from the smax, but smin
- * is negative, hence safe.
- */
- reg->s32_min_value = reg->u32_min_value =
- max_t(u32, reg->s32_min_value, reg->u32_min_value);
- reg->s32_max_value = reg->u32_max_value;
+ if ((u32)reg->s32_min_value <= (u32)reg->s32_max_value) {
+ reg->u32_min_value = max_t(u32, reg->s32_min_value, reg->u32_min_value);
+ reg->u32_max_value = min_t(u32, reg->s32_max_value, reg->u32_max_value);
}
}
static void __reg64_deduce_bounds(struct bpf_reg_state *reg)
{
- /* Learn sign from signed bounds.
- * If we cannot cross the sign boundary, then signed and unsigned bounds
+ /* If u64 range forms a valid s64 range (due to matching sign bit),
+ * try to learn from that. Let's do a bit of ASCII art to see when
+ * this is happening. Let's take u64 range first:
+ *
+ * 0 0x7fffffffffffffff 0x8000000000000000 U64_MAX
+ * |-------------------------------|--------------------------------|
+ *
+ * Valid u64 range is formed when umin and umax are anywhere in the
+ * range [0, U64_MAX], and umin <= umax. u64 case is simple and
+ * straightforward. Let's see how s64 range maps onto the same range
+ * of values, annotated below the line for comparison:
+ *
+ * 0 0x7fffffffffffffff 0x8000000000000000 U64_MAX
+ * |-------------------------------|--------------------------------|
+ * 0 S64_MAX S64_MIN -1
+ *
+ * So s64 values basically start in the middle and they are logically
+ * contiguous to the right of it, wrapping around from -1 to 0, and
+ * then finishing as S64_MAX (0x7fffffffffffffff) right before
+ * S64_MIN. We can try drawing the continuity of u64 vs s64 values
+ * more visually as mapped to sign-agnostic range of hex values.
+ *
+ * u64 start u64 end
+ * _______________________________________________________________
+ * / \
+ * 0 0x7fffffffffffffff 0x8000000000000000 U64_MAX
+ * |-------------------------------|--------------------------------|
+ * 0 S64_MAX S64_MIN -1
+ * / \
+ * >------------------------------ ------------------------------->
+ * s64 continues... s64 end s64 start s64 "midpoint"
+ *
+ * What this means is that, in general, we can't always derive
+ * something new about u64 from any random s64 range, and vice versa.
+ *
+ * But we can do that in two particular cases. One is when entire
+ * u64/s64 range is *entirely* contained within left half of the above
+ * diagram or when it is *entirely* contained in the right half. I.e.:
+ *
+ * |-------------------------------|--------------------------------|
+ * ^ ^ ^ ^
+ * A B C D
+ *
+ * [A, B] and [C, D] are contained entirely in their respective halves
+ * and form valid contiguous ranges as both u64 and s64 values. [A, B]
+ * will be non-negative both as u64 and s64 (and in fact it will be
+ * identical ranges no matter the signedness). [C, D] treated as s64
+ * will be a range of negative values, while in u64 it will be
+ * non-negative range of values larger than 0x8000000000000000.
+ *
+ * Now, any other range here can't be represented in both u64 and s64
+ * simultaneously. E.g., [A, C], [A, D], [B, C], [B, D] are valid
+ * contiguous u64 ranges, but they are discontinuous in s64. [B, C]
+ * in s64 would be properly presented as [S64_MIN, C] and [B, S64_MAX],
+ * for example. Similarly, valid s64 range [D, A] (going from negative
+ * to positive values), would be two separate [D, U64_MAX] and [0, A]
+ * ranges as u64. Currently reg_state can't represent two segments per
+ * numeric domain, so in such situations we can only derive maximal
+ * possible range ([0, U64_MAX] for u64, and [S64_MIN, S64_MAX] for s64).
+ *
+ * So we use these facts to derive umin/umax from smin/smax and vice
+ * versa only if they stay within the same "half". This is equivalent
+ * to checking sign bit: lower half will have sign bit as zero, upper
+ * half have sign bit 1. Below in code we simplify this by just
+ * casting umin/umax as smin/smax and checking if they form valid
+ * range, and vice versa. Those are equivalent checks.
+ */
+ if ((s64)reg->umin_value <= (s64)reg->umax_value) {
+ reg->smin_value = max_t(s64, reg->smin_value, reg->umin_value);
+ reg->smax_value = min_t(s64, reg->smax_value, reg->umax_value);
+ }
+ /* If we cannot cross the sign boundary, then signed and unsigned bounds
* are the same, so combine. This works even in the negative case, e.g.
* -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff.
*/
- if (reg->smin_value >= 0 || reg->smax_value < 0) {
- reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value,
- reg->umin_value);
- reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value,
- reg->umax_value);
- return;
+ if ((u64)reg->smin_value <= (u64)reg->smax_value) {
+ reg->umin_value = max_t(u64, reg->smin_value, reg->umin_value);
+ reg->umax_value = min_t(u64, reg->smax_value, reg->umax_value);
}
- /* Learn sign from unsigned bounds. Signed bounds cross the sign
- * boundary, so we must be careful.
+}
+
+static void __reg_deduce_mixed_bounds(struct bpf_reg_state *reg)
+{
+ /* Try to tighten 64-bit bounds from 32-bit knowledge, using 32-bit
+ * values on both sides of 64-bit range in hope to have tigher range.
+ * E.g., if r1 is [0x1'00000000, 0x3'80000000], and we learn from
+ * 32-bit signed > 0 operation that s32 bounds are now [1; 0x7fffffff].
+ * With this, we can substitute 1 as low 32-bits of _low_ 64-bit bound
+ * (0x100000000 -> 0x100000001) and 0x7fffffff as low 32-bits of
+ * _high_ 64-bit bound (0x380000000 -> 0x37fffffff) and arrive at a
+ * better overall bounds for r1 as [0x1'000000001; 0x3'7fffffff].
+ * We just need to make sure that derived bounds we are intersecting
+ * with are well-formed ranges in respecitve s64 or u64 domain, just
+ * like we do with similar kinds of 32-to-64 or 64-to-32 adjustments.
*/
- if ((s64)reg->umax_value >= 0) {
- /* Positive. We can't learn anything from the smin, but smax
- * is positive, hence safe.
- */
- reg->smin_value = reg->umin_value;
- reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value,
- reg->umax_value);
- } else if ((s64)reg->umin_value < 0) {
- /* Negative. We can't learn anything from the smax, but smin
- * is negative, hence safe.
- */
- reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value,
- reg->umin_value);
- reg->smax_value = reg->umax_value;
+ __u64 new_umin, new_umax;
+ __s64 new_smin, new_smax;
+
+ /* u32 -> u64 tightening, it's always well-formed */
+ new_umin = (reg->umin_value & ~0xffffffffULL) | reg->u32_min_value;
+ new_umax = (reg->umax_value & ~0xffffffffULL) | reg->u32_max_value;
+ reg->umin_value = max_t(u64, reg->umin_value, new_umin);
+ reg->umax_value = min_t(u64, reg->umax_value, new_umax);
+ /* u32 -> s64 tightening, u32 range embedded into s64 preserves range validity */
+ new_smin = (reg->smin_value & ~0xffffffffULL) | reg->u32_min_value;
+ new_smax = (reg->smax_value & ~0xffffffffULL) | reg->u32_max_value;
+ reg->smin_value = max_t(s64, reg->smin_value, new_smin);
+ reg->smax_value = min_t(s64, reg->smax_value, new_smax);
+
+ /* if s32 can be treated as valid u32 range, we can use it as well */
+ if ((u32)reg->s32_min_value <= (u32)reg->s32_max_value) {
+ /* s32 -> u64 tightening */
+ new_umin = (reg->umin_value & ~0xffffffffULL) | (u32)reg->s32_min_value;
+ new_umax = (reg->umax_value & ~0xffffffffULL) | (u32)reg->s32_max_value;
+ reg->umin_value = max_t(u64, reg->umin_value, new_umin);
+ reg->umax_value = min_t(u64, reg->umax_value, new_umax);
+ /* s32 -> s64 tightening */
+ new_smin = (reg->smin_value & ~0xffffffffULL) | (u32)reg->s32_min_value;
+ new_smax = (reg->smax_value & ~0xffffffffULL) | (u32)reg->s32_max_value;
+ reg->smin_value = max_t(s64, reg->smin_value, new_smin);
+ reg->smax_value = min_t(s64, reg->smax_value, new_smax);
}
}
@@ -2411,6 +2096,7 @@ static void __reg_deduce_bounds(struct bpf_reg_state *reg)
{
__reg32_deduce_bounds(reg);
__reg64_deduce_bounds(reg);
+ __reg_deduce_mixed_bounds(reg);
}
/* Attempts to improve var_off based on unsigned min/max information */
@@ -2432,6 +2118,7 @@ static void reg_bounds_sync(struct bpf_reg_state *reg)
__update_reg_bounds(reg);
/* We might have learned something about the sign bit. */
__reg_deduce_bounds(reg);
+ __reg_deduce_bounds(reg);
/* We might have learned some bits from the bounds. */
__reg_bound_offset(reg);
/* Intersecting with the old var_off might have improved our bounds
@@ -2441,6 +2128,56 @@ static void reg_bounds_sync(struct bpf_reg_state *reg)
__update_reg_bounds(reg);
}
+static int reg_bounds_sanity_check(struct bpf_verifier_env *env,
+ struct bpf_reg_state *reg, const char *ctx)
+{
+ const char *msg;
+
+ if (reg->umin_value > reg->umax_value ||
+ reg->smin_value > reg->smax_value ||
+ reg->u32_min_value > reg->u32_max_value ||
+ reg->s32_min_value > reg->s32_max_value) {
+ msg = "range bounds violation";
+ goto out;
+ }
+
+ if (tnum_is_const(reg->var_off)) {
+ u64 uval = reg->var_off.value;
+ s64 sval = (s64)uval;
+
+ if (reg->umin_value != uval || reg->umax_value != uval ||
+ reg->smin_value != sval || reg->smax_value != sval) {
+ msg = "const tnum out of sync with range bounds";
+ goto out;
+ }
+ }
+
+ if (tnum_subreg_is_const(reg->var_off)) {
+ u32 uval32 = tnum_subreg(reg->var_off).value;
+ s32 sval32 = (s32)uval32;
+
+ if (reg->u32_min_value != uval32 || reg->u32_max_value != uval32 ||
+ reg->s32_min_value != sval32 || reg->s32_max_value != sval32) {
+ msg = "const subreg tnum out of sync with range bounds";
+ goto out;
+ }
+ }
+
+ return 0;
+out:
+ verbose(env, "REG INVARIANTS VIOLATION (%s): %s u64=[%#llx, %#llx] "
+ "s64=[%#llx, %#llx] u32=[%#x, %#x] s32=[%#x, %#x] var_off=(%#llx, %#llx)\n",
+ ctx, msg, reg->umin_value, reg->umax_value,
+ reg->smin_value, reg->smax_value,
+ reg->u32_min_value, reg->u32_max_value,
+ reg->s32_min_value, reg->s32_max_value,
+ reg->var_off.value, reg->var_off.mask);
+ if (env->test_reg_invariants)
+ return -EFAULT;
+ __mark_reg_unbounded(reg);
+ return 0;
+}
+
static bool __reg32_bound_s64(s32 a)
{
return a >= 0 && a <= S32_MAX;
@@ -2465,51 +2202,6 @@ static void __reg_assign_32_into_64(struct bpf_reg_state *reg)
}
}
-static void __reg_combine_32_into_64(struct bpf_reg_state *reg)
-{
- /* special case when 64-bit register has upper 32-bit register
- * zeroed. Typically happens after zext or <<32, >>32 sequence
- * allowing us to use 32-bit bounds directly,
- */
- if (tnum_equals_const(tnum_clear_subreg(reg->var_off), 0)) {
- __reg_assign_32_into_64(reg);
- } else {
- /* Otherwise the best we can do is push lower 32bit known and
- * unknown bits into register (var_off set from jmp logic)
- * then learn as much as possible from the 64-bit tnum
- * known and unknown bits. The previous smin/smax bounds are
- * invalid here because of jmp32 compare so mark them unknown
- * so they do not impact tnum bounds calculation.
- */
- __mark_reg64_unbounded(reg);
- }
- reg_bounds_sync(reg);
-}
-
-static bool __reg64_bound_s32(s64 a)
-{
- return a >= S32_MIN && a <= S32_MAX;
-}
-
-static bool __reg64_bound_u32(u64 a)
-{
- return a >= U32_MIN && a <= U32_MAX;
-}
-
-static void __reg_combine_64_into_32(struct bpf_reg_state *reg)
-{
- __mark_reg32_unbounded(reg);
- if (__reg64_bound_s32(reg->smin_value) && __reg64_bound_s32(reg->smax_value)) {
- reg->s32_min_value = (s32)reg->smin_value;
- reg->s32_max_value = (s32)reg->smax_value;
- }
- if (__reg64_bound_u32(reg->umin_value) && __reg64_bound_u32(reg->umax_value)) {
- reg->u32_min_value = (u32)reg->umin_value;
- reg->u32_max_value = (u32)reg->umax_value;
- }
- reg_bounds_sync(reg);
-}
-
/* Mark a register as having a completely unknown (scalar) value. */
static void __mark_reg_unknown(const struct bpf_verifier_env *env,
struct bpf_reg_state *reg)
@@ -4592,9 +4284,17 @@ static bool register_is_null(struct bpf_reg_state *reg)
return reg->type == SCALAR_VALUE && tnum_equals_const(reg->var_off, 0);
}
-static bool register_is_const(struct bpf_reg_state *reg)
+/* check if register is a constant scalar value */
+static bool is_reg_const(struct bpf_reg_state *reg, bool subreg32)
{
- return reg->type == SCALAR_VALUE && tnum_is_const(reg->var_off);
+ return reg->type == SCALAR_VALUE &&
+ tnum_is_const(subreg32 ? tnum_subreg(reg->var_off) : reg->var_off);
+}
+
+/* assuming is_reg_const() is true, return constant value of a register */
+static u64 reg_const_value(struct bpf_reg_state *reg, bool subreg32)
+{
+ return subreg32 ? tnum_subreg(reg->var_off).value : reg->var_off.value;
}
static bool __is_scalar_unbounded(struct bpf_reg_state *reg)
@@ -5451,10 +5151,23 @@ BTF_SET_END(rcu_protected_types)
static bool rcu_protected_object(const struct btf *btf, u32 btf_id)
{
if (!btf_is_kernel(btf))
- return false;
+ return true;
return btf_id_set_contains(&rcu_protected_types, btf_id);
}
+static struct btf_record *kptr_pointee_btf_record(struct btf_field *kptr_field)
+{
+ struct btf_struct_meta *meta;
+
+ if (btf_is_kernel(kptr_field->kptr.btf))
+ return NULL;
+
+ meta = btf_find_struct_meta(kptr_field->kptr.btf,
+ kptr_field->kptr.btf_id);
+
+ return meta ? meta->record : NULL;
+}
+
static bool rcu_safe_kptr(const struct btf_field *field)
{
const struct btf_field_kptr *kptr = &field->kptr;
@@ -5465,12 +5178,25 @@ static bool rcu_safe_kptr(const struct btf_field *field)
static u32 btf_ld_kptr_type(struct bpf_verifier_env *env, struct btf_field *kptr_field)
{
+ struct btf_record *rec;
+ u32 ret;
+
+ ret = PTR_MAYBE_NULL;
if (rcu_safe_kptr(kptr_field) && in_rcu_cs(env)) {
- if (kptr_field->type != BPF_KPTR_PERCPU)
- return PTR_MAYBE_NULL | MEM_RCU;
- return PTR_MAYBE_NULL | MEM_RCU | MEM_PERCPU;
+ ret |= MEM_RCU;
+ if (kptr_field->type == BPF_KPTR_PERCPU)
+ ret |= MEM_PERCPU;
+ else if (!btf_is_kernel(kptr_field->kptr.btf))
+ ret |= MEM_ALLOC;
+
+ rec = kptr_pointee_btf_record(kptr_field);
+ if (rec && btf_record_has_field(rec, BPF_GRAPH_NODE))
+ ret |= NON_OWN_REF;
+ } else {
+ ret |= PTR_UNTRUSTED;
}
- return PTR_MAYBE_NULL | PTR_UNTRUSTED;
+
+ return ret;
}
static int check_map_kptr_access(struct bpf_verifier_env *env, u32 regno,
@@ -6244,9 +5970,10 @@ static void coerce_reg_to_size(struct bpf_reg_state *reg, int size)
* values are also truncated so we push 64-bit bounds into
* 32-bit bounds. Above were truncated < 32-bits already.
*/
- if (size >= 4)
- return;
- __reg_combine_64_into_32(reg);
+ if (size < 4) {
+ __mark_reg32_unbounded(reg);
+ reg_bounds_sync(reg);
+ }
}
static void set_sext64_default_val(struct bpf_reg_state *reg, int size)
@@ -8626,6 +8353,54 @@ static enum bpf_dynptr_type dynptr_get_type(struct bpf_verifier_env *env,
return state->stack[spi].spilled_ptr.dynptr.type;
}
+static int check_reg_const_str(struct bpf_verifier_env *env,
+ struct bpf_reg_state *reg, u32 regno)
+{
+ struct bpf_map *map = reg->map_ptr;
+ int err;
+ int map_off;
+ u64 map_addr;
+ char *str_ptr;
+
+ if (reg->type != PTR_TO_MAP_VALUE)
+ return -EINVAL;
+
+ if (!bpf_map_is_rdonly(map)) {
+ verbose(env, "R%d does not point to a readonly map'\n", regno);
+ return -EACCES;
+ }
+
+ if (!tnum_is_const(reg->var_off)) {
+ verbose(env, "R%d is not a constant address'\n", regno);
+ return -EACCES;
+ }
+
+ if (!map->ops->map_direct_value_addr) {
+ verbose(env, "no direct value access support for this map type\n");
+ return -EACCES;
+ }
+
+ err = check_map_access(env, regno, reg->off,
+ map->value_size - reg->off, false,
+ ACCESS_HELPER);
+ if (err)
+ return err;
+
+ map_off = reg->off + reg->var_off.value;
+ err = map->ops->map_direct_value_addr(map, &map_addr, map_off);
+ if (err) {
+ verbose(env, "direct value access on string failed\n");
+ return err;
+ }
+
+ str_ptr = (char *)(long)(map_addr);
+ if (!strnchr(str_ptr + map_off, map->value_size - map_off, 0)) {
+ verbose(env, "string is not zero-terminated\n");
+ return -EINVAL;
+ }
+ return 0;
+}
+
static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
struct bpf_call_arg_meta *meta,
const struct bpf_func_proto *fn,
@@ -8870,44 +8645,9 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
}
case ARG_PTR_TO_CONST_STR:
{
- struct bpf_map *map = reg->map_ptr;
- int map_off;
- u64 map_addr;
- char *str_ptr;
-
- if (!bpf_map_is_rdonly(map)) {
- verbose(env, "R%d does not point to a readonly map'\n", regno);
- return -EACCES;
- }
-
- if (!tnum_is_const(reg->var_off)) {
- verbose(env, "R%d is not a constant address'\n", regno);
- return -EACCES;
- }
-
- if (!map->ops->map_direct_value_addr) {
- verbose(env, "no direct value access support for this map type\n");
- return -EACCES;
- }
-
- err = check_map_access(env, regno, reg->off,
- map->value_size - reg->off, false,
- ACCESS_HELPER);
+ err = check_reg_const_str(env, reg, regno);
if (err)
return err;
-
- map_off = reg->off + reg->var_off.value;
- err = map->ops->map_direct_value_addr(map, &map_addr, map_off);
- if (err) {
- verbose(env, "direct value access on string failed\n");
- return err;
- }
-
- str_ptr = (char *)(long)(map_addr);
- if (!strnchr(str_ptr + map_off, map->value_size - map_off, 0)) {
- verbose(env, "string is not zero-terminated\n");
- return -EINVAL;
- }
break;
}
case ARG_PTR_TO_KPTR:
@@ -9896,14 +9636,15 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
return 0;
}
-static void do_refine_retval_range(struct bpf_reg_state *regs, int ret_type,
- int func_id,
- struct bpf_call_arg_meta *meta)
+static int do_refine_retval_range(struct bpf_verifier_env *env,
+ struct bpf_reg_state *regs, int ret_type,
+ int func_id,
+ struct bpf_call_arg_meta *meta)
{
struct bpf_reg_state *ret_reg = ®s[BPF_REG_0];
if (ret_type != RET_INTEGER)
- return;
+ return 0;
switch (func_id) {
case BPF_FUNC_get_stack:
@@ -9929,6 +9670,8 @@ static void do_refine_retval_range(struct bpf_reg_state *regs, int ret_type,
reg_bounds_sync(ret_reg);
break;
}
+
+ return reg_bounds_sanity_check(env, ret_reg, "retval");
}
static int
@@ -9998,7 +9741,7 @@ record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
val = reg->var_off.value;
max = map->max_entries;
- if (!(register_is_const(reg) && val < max)) {
+ if (!(is_reg_const(reg, false) && val < max)) {
bpf_map_key_store(aux, BPF_MAP_KEY_POISON);
return 0;
}
@@ -10593,7 +10336,9 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
regs[BPF_REG_0].ref_obj_id = id;
}
- do_refine_retval_range(regs, fn->ret_type, func_id, &meta);
+ err = do_refine_retval_range(env, regs, fn->ret_type, func_id, &meta);
+ if (err)
+ return err;
err = check_map_func_compatibility(env, meta.map_ptr, func_id);
if (err)
@@ -10771,6 +10516,11 @@ static bool is_kfunc_arg_nullable(const struct btf *btf, const struct btf_param
return __kfunc_param_match_suffix(btf, arg, "__nullable");
}
+static bool is_kfunc_arg_const_str(const struct btf *btf, const struct btf_param *arg)
+{
+ return __kfunc_param_match_suffix(btf, arg, "__str");
+}
+
static bool is_kfunc_arg_scalar_with_name(const struct btf *btf,
const struct btf_param *arg,
const char *name)
@@ -10914,6 +10664,7 @@ enum kfunc_ptr_arg_type {
KF_ARG_PTR_TO_RB_ROOT,
KF_ARG_PTR_TO_RB_NODE,
KF_ARG_PTR_TO_NULL,
+ KF_ARG_PTR_TO_CONST_STR,
};
enum special_kfunc_type {
@@ -11064,6 +10815,9 @@ get_kfunc_ptr_arg_type(struct bpf_verifier_env *env,
if (is_kfunc_arg_rbtree_node(meta->btf, &args[argno]))
return KF_ARG_PTR_TO_RB_NODE;
+ if (is_kfunc_arg_const_str(meta->btf, &args[argno]))
+ return KF_ARG_PTR_TO_CONST_STR;
+
if ((base_type(reg->type) == PTR_TO_BTF_ID || reg2btf_ids[base_type(reg->type)])) {
if (!btf_type_is_struct(ref_t)) {
verbose(env, "kernel function %s args#%d pointer type %s %s is not supported\n",
@@ -11695,6 +11449,7 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
case KF_ARG_PTR_TO_MEM_SIZE:
case KF_ARG_PTR_TO_CALLBACK:
case KF_ARG_PTR_TO_REFCOUNTED_KPTR:
+ case KF_ARG_PTR_TO_CONST_STR:
/* Trusted by default */
break;
default:
@@ -11966,6 +11721,15 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
meta->arg_btf = reg->btf;
meta->arg_btf_id = reg->btf_id;
break;
+ case KF_ARG_PTR_TO_CONST_STR:
+ if (reg->type != PTR_TO_MAP_VALUE) {
+ verbose(env, "arg#%d doesn't point to a const string\n", i);
+ return -EINVAL;
+ }
+ ret = check_reg_const_str(env, reg, regno);
+ if (ret)
+ return ret;
+ break;
}
}
@@ -14086,13 +13850,12 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
/* check dest operand */
err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
+ err = err ?: adjust_reg_min_max_vals(env, insn);
if (err)
return err;
-
- return adjust_reg_min_max_vals(env, insn);
}
- return 0;
+ return reg_bounds_sanity_check(env, ®s[insn->dst_reg], "alu");
}
static void find_good_pkt_pointers(struct bpf_verifier_state *vstate,
@@ -14174,201 +13937,135 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *vstate,
}));
}
-static int is_branch32_taken(struct bpf_reg_state *reg, u32 val, u8 opcode)
-{
- struct tnum subreg = tnum_subreg(reg->var_off);
- s32 sval = (s32)val;
-
- switch (opcode) {
- case BPF_JEQ:
- if (tnum_is_const(subreg))
- return !!tnum_equals_const(subreg, val);
- else if (val < reg->u32_min_value || val > reg->u32_max_value)
- return 0;
- else if (sval < reg->s32_min_value || sval > reg->s32_max_value)
- return 0;
- break;
- case BPF_JNE:
- if (tnum_is_const(subreg))
- return !tnum_equals_const(subreg, val);
- else if (val < reg->u32_min_value || val > reg->u32_max_value)
- return 1;
- else if (sval < reg->s32_min_value || sval > reg->s32_max_value)
- return 1;
- break;
- case BPF_JSET:
- if ((~subreg.mask & subreg.value) & val)
- return 1;
- if (!((subreg.mask | subreg.value) & val))
- return 0;
- break;
- case BPF_JGT:
- if (reg->u32_min_value > val)
- return 1;
- else if (reg->u32_max_value <= val)
- return 0;
- break;
- case BPF_JSGT:
- if (reg->s32_min_value > sval)
- return 1;
- else if (reg->s32_max_value <= sval)
- return 0;
- break;
- case BPF_JLT:
- if (reg->u32_max_value < val)
- return 1;
- else if (reg->u32_min_value >= val)
- return 0;
- break;
- case BPF_JSLT:
- if (reg->s32_max_value < sval)
- return 1;
- else if (reg->s32_min_value >= sval)
- return 0;
- break;
- case BPF_JGE:
- if (reg->u32_min_value >= val)
- return 1;
- else if (reg->u32_max_value < val)
- return 0;
- break;
- case BPF_JSGE:
- if (reg->s32_min_value >= sval)
- return 1;
- else if (reg->s32_max_value < sval)
- return 0;
- break;
- case BPF_JLE:
- if (reg->u32_max_value <= val)
- return 1;
- else if (reg->u32_min_value > val)
- return 0;
- break;
- case BPF_JSLE:
- if (reg->s32_max_value <= sval)
- return 1;
- else if (reg->s32_min_value > sval)
- return 0;
- break;
- }
-
- return -1;
-}
-
-
-static int is_branch64_taken(struct bpf_reg_state *reg, u64 val, u8 opcode)
-{
- s64 sval = (s64)val;
-
- switch (opcode) {
- case BPF_JEQ:
- if (tnum_is_const(reg->var_off))
- return !!tnum_equals_const(reg->var_off, val);
- else if (val < reg->umin_value || val > reg->umax_value)
- return 0;
- else if (sval < reg->smin_value || sval > reg->smax_value)
- return 0;
- break;
- case BPF_JNE:
- if (tnum_is_const(reg->var_off))
- return !tnum_equals_const(reg->var_off, val);
- else if (val < reg->umin_value || val > reg->umax_value)
- return 1;
- else if (sval < reg->smin_value || sval > reg->smax_value)
- return 1;
- break;
- case BPF_JSET:
- if ((~reg->var_off.mask & reg->var_off.value) & val)
- return 1;
- if (!((reg->var_off.mask | reg->var_off.value) & val))
- return 0;
- break;
- case BPF_JGT:
- if (reg->umin_value > val)
- return 1;
- else if (reg->umax_value <= val)
- return 0;
- break;
- case BPF_JSGT:
- if (reg->smin_value > sval)
- return 1;
- else if (reg->smax_value <= sval)
- return 0;
- break;
- case BPF_JLT:
- if (reg->umax_value < val)
- return 1;
- else if (reg->umin_value >= val)
- return 0;
- break;
- case BPF_JSLT:
- if (reg->smax_value < sval)
- return 1;
- else if (reg->smin_value >= sval)
- return 0;
- break;
- case BPF_JGE:
- if (reg->umin_value >= val)
- return 1;
- else if (reg->umax_value < val)
- return 0;
- break;
- case BPF_JSGE:
- if (reg->smin_value >= sval)
- return 1;
- else if (reg->smax_value < sval)
- return 0;
- break;
- case BPF_JLE:
- if (reg->umax_value <= val)
- return 1;
- else if (reg->umin_value > val)
- return 0;
- break;
- case BPF_JSLE:
- if (reg->smax_value <= sval)
- return 1;
- else if (reg->smin_value > sval)
- return 0;
- break;
- }
-
- return -1;
-}
-
-/* compute branch direction of the expression "if (reg opcode val) goto target;"
- * and return:
- * 1 - branch will be taken and "goto target" will be executed
- * 0 - branch will not be taken and fall-through to next insn
- * -1 - unknown. Example: "if (reg < 5)" is unknown when register value
- * range [0,10]
+/*
+ * <reg1> <op> <reg2>, currently assuming reg2 is a constant
*/
-static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode,
- bool is_jmp32)
+static int is_scalar_branch_taken(struct bpf_reg_state *reg1, struct bpf_reg_state *reg2,
+ u8 opcode, bool is_jmp32)
{
- if (__is_pointer_value(false, reg)) {
- if (!reg_not_null(reg))
- return -1;
+ struct tnum t1 = is_jmp32 ? tnum_subreg(reg1->var_off) : reg1->var_off;
+ struct tnum t2 = is_jmp32 ? tnum_subreg(reg2->var_off) : reg2->var_off;
+ u64 umin1 = is_jmp32 ? (u64)reg1->u32_min_value : reg1->umin_value;
+ u64 umax1 = is_jmp32 ? (u64)reg1->u32_max_value : reg1->umax_value;
+ s64 smin1 = is_jmp32 ? (s64)reg1->s32_min_value : reg1->smin_value;
+ s64 smax1 = is_jmp32 ? (s64)reg1->s32_max_value : reg1->smax_value;
+ u64 umin2 = is_jmp32 ? (u64)reg2->u32_min_value : reg2->umin_value;
+ u64 umax2 = is_jmp32 ? (u64)reg2->u32_max_value : reg2->umax_value;
+ s64 smin2 = is_jmp32 ? (s64)reg2->s32_min_value : reg2->smin_value;
+ s64 smax2 = is_jmp32 ? (s64)reg2->s32_max_value : reg2->smax_value;
- /* If pointer is valid tests against zero will fail so we can
- * use this to direct branch taken.
+ switch (opcode) {
+ case BPF_JEQ:
+ /* constants, umin/umax and smin/smax checks would be
+ * redundant in this case because they all should match
*/
- if (val != 0)
- return -1;
-
- switch (opcode) {
- case BPF_JEQ:
+ if (tnum_is_const(t1) && tnum_is_const(t2))
+ return t1.value == t2.value;
+ /* non-overlapping ranges */
+ if (umin1 > umax2 || umax1 < umin2)
return 0;
- case BPF_JNE:
- return 1;
- default:
- return -1;
+ if (smin1 > smax2 || smax1 < smin2)
+ return 0;
+ if (!is_jmp32) {
+ /* if 64-bit ranges are inconclusive, see if we can
+ * utilize 32-bit subrange knowledge to eliminate
+ * branches that can't be taken a priori
+ */
+ if (reg1->u32_min_value > reg2->u32_max_value ||
+ reg1->u32_max_value < reg2->u32_min_value)
+ return 0;
+ if (reg1->s32_min_value > reg2->s32_max_value ||
+ reg1->s32_max_value < reg2->s32_min_value)
+ return 0;
}
+ break;
+ case BPF_JNE:
+ /* constants, umin/umax and smin/smax checks would be
+ * redundant in this case because they all should match
+ */
+ if (tnum_is_const(t1) && tnum_is_const(t2))
+ return t1.value != t2.value;
+ /* non-overlapping ranges */
+ if (umin1 > umax2 || umax1 < umin2)
+ return 1;
+ if (smin1 > smax2 || smax1 < smin2)
+ return 1;
+ if (!is_jmp32) {
+ /* if 64-bit ranges are inconclusive, see if we can
+ * utilize 32-bit subrange knowledge to eliminate
+ * branches that can't be taken a priori
+ */
+ if (reg1->u32_min_value > reg2->u32_max_value ||
+ reg1->u32_max_value < reg2->u32_min_value)
+ return 1;
+ if (reg1->s32_min_value > reg2->s32_max_value ||
+ reg1->s32_max_value < reg2->s32_min_value)
+ return 1;
+ }
+ break;
+ case BPF_JSET:
+ if (!is_reg_const(reg2, is_jmp32)) {
+ swap(reg1, reg2);
+ swap(t1, t2);
+ }
+ if (!is_reg_const(reg2, is_jmp32))
+ return -1;
+ if ((~t1.mask & t1.value) & t2.value)
+ return 1;
+ if (!((t1.mask | t1.value) & t2.value))
+ return 0;
+ break;
+ case BPF_JGT:
+ if (umin1 > umax2)
+ return 1;
+ else if (umax1 <= umin2)
+ return 0;
+ break;
+ case BPF_JSGT:
+ if (smin1 > smax2)
+ return 1;
+ else if (smax1 <= smin2)
+ return 0;
+ break;
+ case BPF_JLT:
+ if (umax1 < umin2)
+ return 1;
+ else if (umin1 >= umax2)
+ return 0;
+ break;
+ case BPF_JSLT:
+ if (smax1 < smin2)
+ return 1;
+ else if (smin1 >= smax2)
+ return 0;
+ break;
+ case BPF_JGE:
+ if (umin1 >= umax2)
+ return 1;
+ else if (umax1 < umin2)
+ return 0;
+ break;
+ case BPF_JSGE:
+ if (smin1 >= smax2)
+ return 1;
+ else if (smax1 < smin2)
+ return 0;
+ break;
+ case BPF_JLE:
+ if (umax1 <= umin2)
+ return 1;
+ else if (umin1 > umax2)
+ return 0;
+ break;
+ case BPF_JSLE:
+ if (smax1 <= smin2)
+ return 1;
+ else if (smin1 > smax2)
+ return 0;
+ break;
}
- if (is_jmp32)
- return is_branch32_taken(reg, val, opcode);
- return is_branch64_taken(reg, val, opcode);
+ return -1;
}
static int flip_opcode(u32 opcode)
@@ -14432,216 +14129,244 @@ static int is_pkt_ptr_branch_taken(struct bpf_reg_state *dst_reg,
return -1;
}
-/* Adjusts the register min/max values in the case that the dst_reg is the
- * variable register that we are working on, and src_reg is a constant or we're
- * simply doing a BPF_K check.
- * In JEQ/JNE cases we also adjust the var_off values.
+/* compute branch direction of the expression "if (<reg1> opcode <reg2>) goto target;"
+ * and return:
+ * 1 - branch will be taken and "goto target" will be executed
+ * 0 - branch will not be taken and fall-through to next insn
+ * -1 - unknown. Example: "if (reg1 < 5)" is unknown when register value
+ * range [0,10]
*/
-static void reg_set_min_max(struct bpf_reg_state *true_reg,
- struct bpf_reg_state *false_reg,
- u64 val, u32 val32,
- u8 opcode, bool is_jmp32)
+static int is_branch_taken(struct bpf_reg_state *reg1, struct bpf_reg_state *reg2,
+ u8 opcode, bool is_jmp32)
{
- struct tnum false_32off = tnum_subreg(false_reg->var_off);
- struct tnum false_64off = false_reg->var_off;
- struct tnum true_32off = tnum_subreg(true_reg->var_off);
- struct tnum true_64off = true_reg->var_off;
- s64 sval = (s64)val;
- s32 sval32 = (s32)val32;
+ if (reg_is_pkt_pointer_any(reg1) && reg_is_pkt_pointer_any(reg2) && !is_jmp32)
+ return is_pkt_ptr_branch_taken(reg1, reg2, opcode);
- /* If the dst_reg is a pointer, we can't learn anything about its
- * variable offset from the compare (unless src_reg were a pointer into
- * the same object, but we don't bother with that.
- * Since false_reg and true_reg have the same type by construction, we
- * only need to check one of them for pointerness.
- */
- if (__is_pointer_value(false, false_reg))
- return;
+ if (__is_pointer_value(false, reg1) || __is_pointer_value(false, reg2)) {
+ u64 val;
+ /* arrange that reg2 is a scalar, and reg1 is a pointer */
+ if (!is_reg_const(reg2, is_jmp32)) {
+ opcode = flip_opcode(opcode);
+ swap(reg1, reg2);
+ }
+ /* and ensure that reg2 is a constant */
+ if (!is_reg_const(reg2, is_jmp32))
+ return -1;
+
+ if (!reg_not_null(reg1))
+ return -1;
+
+ /* If pointer is valid tests against zero will fail so we can
+ * use this to direct branch taken.
+ */
+ val = reg_const_value(reg2, is_jmp32);
+ if (val != 0)
+ return -1;
+
+ switch (opcode) {
+ case BPF_JEQ:
+ return 0;
+ case BPF_JNE:
+ return 1;
+ default:
+ return -1;
+ }
+ }
+
+ /* now deal with two scalars, but not necessarily constants */
+ return is_scalar_branch_taken(reg1, reg2, opcode, is_jmp32);
+}
+
+/* Opcode that corresponds to a *false* branch condition.
+ * E.g., if r1 < r2, then reverse (false) condition is r1 >= r2
+ */
+static u8 rev_opcode(u8 opcode)
+{
switch (opcode) {
- /* JEQ/JNE comparison doesn't change the register equivalence.
- *
- * r1 = r2;
- * if (r1 == 42) goto label;
- * ...
- * label: // here both r1 and r2 are known to be 42.
- *
- * Hence when marking register as known preserve it's ID.
+ case BPF_JEQ: return BPF_JNE;
+ case BPF_JNE: return BPF_JEQ;
+ /* JSET doesn't have it's reverse opcode in BPF, so add
+ * BPF_X flag to denote the reverse of that operation
*/
+ case BPF_JSET: return BPF_JSET | BPF_X;
+ case BPF_JSET | BPF_X: return BPF_JSET;
+ case BPF_JGE: return BPF_JLT;
+ case BPF_JGT: return BPF_JLE;
+ case BPF_JLE: return BPF_JGT;
+ case BPF_JLT: return BPF_JGE;
+ case BPF_JSGE: return BPF_JSLT;
+ case BPF_JSGT: return BPF_JSLE;
+ case BPF_JSLE: return BPF_JSGT;
+ case BPF_JSLT: return BPF_JSGE;
+ default: return 0;
+ }
+}
+
+/* Refine range knowledge for <reg1> <op> <reg>2 conditional operation. */
+static void regs_refine_cond_op(struct bpf_reg_state *reg1, struct bpf_reg_state *reg2,
+ u8 opcode, bool is_jmp32)
+{
+ struct tnum t;
+ u64 val;
+
+again:
+ switch (opcode) {
case BPF_JEQ:
if (is_jmp32) {
- __mark_reg32_known(true_reg, val32);
- true_32off = tnum_subreg(true_reg->var_off);
+ reg1->u32_min_value = max(reg1->u32_min_value, reg2->u32_min_value);
+ reg1->u32_max_value = min(reg1->u32_max_value, reg2->u32_max_value);
+ reg1->s32_min_value = max(reg1->s32_min_value, reg2->s32_min_value);
+ reg1->s32_max_value = min(reg1->s32_max_value, reg2->s32_max_value);
+ reg2->u32_min_value = reg1->u32_min_value;
+ reg2->u32_max_value = reg1->u32_max_value;
+ reg2->s32_min_value = reg1->s32_min_value;
+ reg2->s32_max_value = reg1->s32_max_value;
+
+ t = tnum_intersect(tnum_subreg(reg1->var_off), tnum_subreg(reg2->var_off));
+ reg1->var_off = tnum_with_subreg(reg1->var_off, t);
+ reg2->var_off = tnum_with_subreg(reg2->var_off, t);
} else {
- ___mark_reg_known(true_reg, val);
- true_64off = true_reg->var_off;
+ reg1->umin_value = max(reg1->umin_value, reg2->umin_value);
+ reg1->umax_value = min(reg1->umax_value, reg2->umax_value);
+ reg1->smin_value = max(reg1->smin_value, reg2->smin_value);
+ reg1->smax_value = min(reg1->smax_value, reg2->smax_value);
+ reg2->umin_value = reg1->umin_value;
+ reg2->umax_value = reg1->umax_value;
+ reg2->smin_value = reg1->smin_value;
+ reg2->smax_value = reg1->smax_value;
+
+ reg1->var_off = tnum_intersect(reg1->var_off, reg2->var_off);
+ reg2->var_off = reg1->var_off;
}
break;
case BPF_JNE:
- if (is_jmp32) {
- __mark_reg32_known(false_reg, val32);
- false_32off = tnum_subreg(false_reg->var_off);
- } else {
- ___mark_reg_known(false_reg, val);
- false_64off = false_reg->var_off;
- }
+ /* we don't derive any new information for inequality yet */
break;
case BPF_JSET:
+ if (!is_reg_const(reg2, is_jmp32))
+ swap(reg1, reg2);
+ if (!is_reg_const(reg2, is_jmp32))
+ break;
+ val = reg_const_value(reg2, is_jmp32);
+ /* BPF_JSET (i.e., TRUE branch, *not* BPF_JSET | BPF_X)
+ * requires single bit to learn something useful. E.g., if we
+ * know that `r1 & 0x3` is true, then which bits (0, 1, or both)
+ * are actually set? We can learn something definite only if
+ * it's a single-bit value to begin with.
+ *
+ * BPF_JSET | BPF_X (i.e., negation of BPF_JSET) doesn't have
+ * this restriction. I.e., !(r1 & 0x3) means neither bit 0 nor
+ * bit 1 is set, which we can readily use in adjustments.
+ */
+ if (!is_power_of_2(val))
+ break;
if (is_jmp32) {
- false_32off = tnum_and(false_32off, tnum_const(~val32));
- if (is_power_of_2(val32))
- true_32off = tnum_or(true_32off,
- tnum_const(val32));
+ t = tnum_or(tnum_subreg(reg1->var_off), tnum_const(val));
+ reg1->var_off = tnum_with_subreg(reg1->var_off, t);
} else {
- false_64off = tnum_and(false_64off, tnum_const(~val));
- if (is_power_of_2(val))
- true_64off = tnum_or(true_64off,
- tnum_const(val));
+ reg1->var_off = tnum_or(reg1->var_off, tnum_const(val));
+ }
+ break;
+ case BPF_JSET | BPF_X: /* reverse of BPF_JSET, see rev_opcode() */
+ if (!is_reg_const(reg2, is_jmp32))
+ swap(reg1, reg2);
+ if (!is_reg_const(reg2, is_jmp32))
+ break;
+ val = reg_const_value(reg2, is_jmp32);
+ if (is_jmp32) {
+ t = tnum_and(tnum_subreg(reg1->var_off), tnum_const(~val));
+ reg1->var_off = tnum_with_subreg(reg1->var_off, t);
+ } else {
+ reg1->var_off = tnum_and(reg1->var_off, tnum_const(~val));
+ }
+ break;
+ case BPF_JLE:
+ if (is_jmp32) {
+ reg1->u32_max_value = min(reg1->u32_max_value, reg2->u32_max_value);
+ reg2->u32_min_value = max(reg1->u32_min_value, reg2->u32_min_value);
+ } else {
+ reg1->umax_value = min(reg1->umax_value, reg2->umax_value);
+ reg2->umin_value = max(reg1->umin_value, reg2->umin_value);
+ }
+ break;
+ case BPF_JLT:
+ if (is_jmp32) {
+ reg1->u32_max_value = min(reg1->u32_max_value, reg2->u32_max_value - 1);
+ reg2->u32_min_value = max(reg1->u32_min_value + 1, reg2->u32_min_value);
+ } else {
+ reg1->umax_value = min(reg1->umax_value, reg2->umax_value - 1);
+ reg2->umin_value = max(reg1->umin_value + 1, reg2->umin_value);
+ }
+ break;
+ case BPF_JSLE:
+ if (is_jmp32) {
+ reg1->s32_max_value = min(reg1->s32_max_value, reg2->s32_max_value);
+ reg2->s32_min_value = max(reg1->s32_min_value, reg2->s32_min_value);
+ } else {
+ reg1->smax_value = min(reg1->smax_value, reg2->smax_value);
+ reg2->smin_value = max(reg1->smin_value, reg2->smin_value);
+ }
+ break;
+ case BPF_JSLT:
+ if (is_jmp32) {
+ reg1->s32_max_value = min(reg1->s32_max_value, reg2->s32_max_value - 1);
+ reg2->s32_min_value = max(reg1->s32_min_value + 1, reg2->s32_min_value);
+ } else {
+ reg1->smax_value = min(reg1->smax_value, reg2->smax_value - 1);
+ reg2->smin_value = max(reg1->smin_value + 1, reg2->smin_value);
}
break;
case BPF_JGE:
case BPF_JGT:
- {
- if (is_jmp32) {
- u32 false_umax = opcode == BPF_JGT ? val32 : val32 - 1;
- u32 true_umin = opcode == BPF_JGT ? val32 + 1 : val32;
-
- false_reg->u32_max_value = min(false_reg->u32_max_value,
- false_umax);
- true_reg->u32_min_value = max(true_reg->u32_min_value,
- true_umin);
- } else {
- u64 false_umax = opcode == BPF_JGT ? val : val - 1;
- u64 true_umin = opcode == BPF_JGT ? val + 1 : val;
-
- false_reg->umax_value = min(false_reg->umax_value, false_umax);
- true_reg->umin_value = max(true_reg->umin_value, true_umin);
- }
- break;
- }
case BPF_JSGE:
case BPF_JSGT:
- {
- if (is_jmp32) {
- s32 false_smax = opcode == BPF_JSGT ? sval32 : sval32 - 1;
- s32 true_smin = opcode == BPF_JSGT ? sval32 + 1 : sval32;
-
- false_reg->s32_max_value = min(false_reg->s32_max_value, false_smax);
- true_reg->s32_min_value = max(true_reg->s32_min_value, true_smin);
- } else {
- s64 false_smax = opcode == BPF_JSGT ? sval : sval - 1;
- s64 true_smin = opcode == BPF_JSGT ? sval + 1 : sval;
-
- false_reg->smax_value = min(false_reg->smax_value, false_smax);
- true_reg->smin_value = max(true_reg->smin_value, true_smin);
- }
- break;
- }
- case BPF_JLE:
- case BPF_JLT:
- {
- if (is_jmp32) {
- u32 false_umin = opcode == BPF_JLT ? val32 : val32 + 1;
- u32 true_umax = opcode == BPF_JLT ? val32 - 1 : val32;
-
- false_reg->u32_min_value = max(false_reg->u32_min_value,
- false_umin);
- true_reg->u32_max_value = min(true_reg->u32_max_value,
- true_umax);
- } else {
- u64 false_umin = opcode == BPF_JLT ? val : val + 1;
- u64 true_umax = opcode == BPF_JLT ? val - 1 : val;
-
- false_reg->umin_value = max(false_reg->umin_value, false_umin);
- true_reg->umax_value = min(true_reg->umax_value, true_umax);
- }
- break;
- }
- case BPF_JSLE:
- case BPF_JSLT:
- {
- if (is_jmp32) {
- s32 false_smin = opcode == BPF_JSLT ? sval32 : sval32 + 1;
- s32 true_smax = opcode == BPF_JSLT ? sval32 - 1 : sval32;
-
- false_reg->s32_min_value = max(false_reg->s32_min_value, false_smin);
- true_reg->s32_max_value = min(true_reg->s32_max_value, true_smax);
- } else {
- s64 false_smin = opcode == BPF_JSLT ? sval : sval + 1;
- s64 true_smax = opcode == BPF_JSLT ? sval - 1 : sval;
-
- false_reg->smin_value = max(false_reg->smin_value, false_smin);
- true_reg->smax_value = min(true_reg->smax_value, true_smax);
- }
- break;
- }
+ /* just reuse LE/LT logic above */
+ opcode = flip_opcode(opcode);
+ swap(reg1, reg2);
+ goto again;
default:
return;
}
-
- if (is_jmp32) {
- false_reg->var_off = tnum_or(tnum_clear_subreg(false_64off),
- tnum_subreg(false_32off));
- true_reg->var_off = tnum_or(tnum_clear_subreg(true_64off),
- tnum_subreg(true_32off));
- __reg_combine_32_into_64(false_reg);
- __reg_combine_32_into_64(true_reg);
- } else {
- false_reg->var_off = false_64off;
- true_reg->var_off = true_64off;
- __reg_combine_64_into_32(false_reg);
- __reg_combine_64_into_32(true_reg);
- }
}
-/* Same as above, but for the case that dst_reg holds a constant and src_reg is
- * the variable reg.
+/* Adjusts the register min/max values in the case that the dst_reg and
+ * src_reg are both SCALAR_VALUE registers (or we are simply doing a BPF_K
+ * check, in which case we havea fake SCALAR_VALUE representing insn->imm).
+ * Technically we can do similar adjustments for pointers to the same object,
+ * but we don't support that right now.
*/
-static void reg_set_min_max_inv(struct bpf_reg_state *true_reg,
- struct bpf_reg_state *false_reg,
- u64 val, u32 val32,
- u8 opcode, bool is_jmp32)
+static int reg_set_min_max(struct bpf_verifier_env *env,
+ struct bpf_reg_state *true_reg1,
+ struct bpf_reg_state *true_reg2,
+ struct bpf_reg_state *false_reg1,
+ struct bpf_reg_state *false_reg2,
+ u8 opcode, bool is_jmp32)
{
- opcode = flip_opcode(opcode);
- /* This uses zero as "not present in table"; luckily the zero opcode,
- * BPF_JA, can't get here.
+ int err;
+
+ /* If either register is a pointer, we can't learn anything about its
+ * variable offset from the compare (unless they were a pointer into
+ * the same object, but we don't bother with that).
*/
- if (opcode)
- reg_set_min_max(true_reg, false_reg, val, val32, opcode, is_jmp32);
-}
+ if (false_reg1->type != SCALAR_VALUE || false_reg2->type != SCALAR_VALUE)
+ return 0;
-/* Regs are known to be equal, so intersect their min/max/var_off */
-static void __reg_combine_min_max(struct bpf_reg_state *src_reg,
- struct bpf_reg_state *dst_reg)
-{
- src_reg->umin_value = dst_reg->umin_value = max(src_reg->umin_value,
- dst_reg->umin_value);
- src_reg->umax_value = dst_reg->umax_value = min(src_reg->umax_value,
- dst_reg->umax_value);
- src_reg->smin_value = dst_reg->smin_value = max(src_reg->smin_value,
- dst_reg->smin_value);
- src_reg->smax_value = dst_reg->smax_value = min(src_reg->smax_value,
- dst_reg->smax_value);
- src_reg->var_off = dst_reg->var_off = tnum_intersect(src_reg->var_off,
- dst_reg->var_off);
- reg_bounds_sync(src_reg);
- reg_bounds_sync(dst_reg);
-}
+ /* fallthrough (FALSE) branch */
+ regs_refine_cond_op(false_reg1, false_reg2, rev_opcode(opcode), is_jmp32);
+ reg_bounds_sync(false_reg1);
+ reg_bounds_sync(false_reg2);
-static void reg_combine_min_max(struct bpf_reg_state *true_src,
- struct bpf_reg_state *true_dst,
- struct bpf_reg_state *false_src,
- struct bpf_reg_state *false_dst,
- u8 opcode)
-{
- switch (opcode) {
- case BPF_JEQ:
- __reg_combine_min_max(true_src, true_dst);
- break;
- case BPF_JNE:
- __reg_combine_min_max(false_src, false_dst);
- break;
- }
+ /* jump (TRUE) branch */
+ regs_refine_cond_op(true_reg1, true_reg2, opcode, is_jmp32);
+ reg_bounds_sync(true_reg1);
+ reg_bounds_sync(true_reg2);
+
+ err = reg_bounds_sanity_check(env, true_reg1, "true_reg1");
+ err = err ?: reg_bounds_sanity_check(env, true_reg2, "true_reg2");
+ err = err ?: reg_bounds_sanity_check(env, false_reg1, "false_reg1");
+ err = err ?: reg_bounds_sanity_check(env, false_reg2, "false_reg2");
+ return err;
}
static void mark_ptr_or_null_reg(struct bpf_func_state *state,
@@ -14839,6 +14564,7 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
struct bpf_reg_state *regs = this_branch->frame[this_branch->curframe]->regs;
struct bpf_reg_state *dst_reg, *other_branch_regs, *src_reg = NULL;
struct bpf_reg_state *eq_branch_regs;
+ struct bpf_reg_state fake_reg = {};
u8 opcode = BPF_OP(insn->code);
bool is_jmp32;
int pred = -1;
@@ -14879,42 +14605,13 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
return -EINVAL;
}
+ src_reg = &fake_reg;
+ src_reg->type = SCALAR_VALUE;
+ __mark_reg_known(src_reg, insn->imm);
}
is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
-
- if (BPF_SRC(insn->code) == BPF_K) {
- pred = is_branch_taken(dst_reg, insn->imm, opcode, is_jmp32);
- } else if (src_reg->type == SCALAR_VALUE &&
- is_jmp32 && tnum_is_const(tnum_subreg(src_reg->var_off))) {
- pred = is_branch_taken(dst_reg,
- tnum_subreg(src_reg->var_off).value,
- opcode,
- is_jmp32);
- } else if (src_reg->type == SCALAR_VALUE &&
- !is_jmp32 && tnum_is_const(src_reg->var_off)) {
- pred = is_branch_taken(dst_reg,
- src_reg->var_off.value,
- opcode,
- is_jmp32);
- } else if (dst_reg->type == SCALAR_VALUE &&
- is_jmp32 && tnum_is_const(tnum_subreg(dst_reg->var_off))) {
- pred = is_branch_taken(src_reg,
- tnum_subreg(dst_reg->var_off).value,
- flip_opcode(opcode),
- is_jmp32);
- } else if (dst_reg->type == SCALAR_VALUE &&
- !is_jmp32 && tnum_is_const(dst_reg->var_off)) {
- pred = is_branch_taken(src_reg,
- dst_reg->var_off.value,
- flip_opcode(opcode),
- is_jmp32);
- } else if (reg_is_pkt_pointer_any(dst_reg) &&
- reg_is_pkt_pointer_any(src_reg) &&
- !is_jmp32) {
- pred = is_pkt_ptr_branch_taken(dst_reg, src_reg, opcode);
- }
-
+ pred = is_branch_taken(dst_reg, src_reg, opcode, is_jmp32);
if (pred >= 0) {
/* If we get here with a dst_reg pointer type it is because
* above is_branch_taken() special cased the 0 comparison.
@@ -14962,53 +14659,27 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
return -EFAULT;
other_branch_regs = other_branch->frame[other_branch->curframe]->regs;
- /* detect if we are comparing against a constant value so we can adjust
- * our min/max values for our dst register.
- * this is only legit if both are scalars (or pointers to the same
- * object, I suppose, see the PTR_MAYBE_NULL related if block below),
- * because otherwise the different base pointers mean the offsets aren't
- * comparable.
- */
if (BPF_SRC(insn->code) == BPF_X) {
- struct bpf_reg_state *src_reg = ®s[insn->src_reg];
-
- if (dst_reg->type == SCALAR_VALUE &&
- src_reg->type == SCALAR_VALUE) {
- if (tnum_is_const(src_reg->var_off) ||
- (is_jmp32 &&
- tnum_is_const(tnum_subreg(src_reg->var_off))))
- reg_set_min_max(&other_branch_regs[insn->dst_reg],
- dst_reg,
- src_reg->var_off.value,
- tnum_subreg(src_reg->var_off).value,
- opcode, is_jmp32);
- else if (tnum_is_const(dst_reg->var_off) ||
- (is_jmp32 &&
- tnum_is_const(tnum_subreg(dst_reg->var_off))))
- reg_set_min_max_inv(&other_branch_regs[insn->src_reg],
- src_reg,
- dst_reg->var_off.value,
- tnum_subreg(dst_reg->var_off).value,
- opcode, is_jmp32);
- else if (!is_jmp32 &&
- (opcode == BPF_JEQ || opcode == BPF_JNE))
- /* Comparing for equality, we can combine knowledge */
- reg_combine_min_max(&other_branch_regs[insn->src_reg],
- &other_branch_regs[insn->dst_reg],
- src_reg, dst_reg, opcode);
- if (src_reg->id &&
- !WARN_ON_ONCE(src_reg->id != other_branch_regs[insn->src_reg].id)) {
- find_equal_scalars(this_branch, src_reg);
- find_equal_scalars(other_branch, &other_branch_regs[insn->src_reg]);
- }
-
- }
- } else if (dst_reg->type == SCALAR_VALUE) {
- reg_set_min_max(&other_branch_regs[insn->dst_reg],
- dst_reg, insn->imm, (u32)insn->imm,
- opcode, is_jmp32);
+ err = reg_set_min_max(env,
+ &other_branch_regs[insn->dst_reg],
+ &other_branch_regs[insn->src_reg],
+ dst_reg, src_reg, opcode, is_jmp32);
+ } else /* BPF_SRC(insn->code) == BPF_K */ {
+ err = reg_set_min_max(env,
+ &other_branch_regs[insn->dst_reg],
+ src_reg /* fake one */,
+ dst_reg, src_reg /* same fake one */,
+ opcode, is_jmp32);
}
+ if (err)
+ return err;
+ if (BPF_SRC(insn->code) == BPF_X &&
+ src_reg->type == SCALAR_VALUE && src_reg->id &&
+ !WARN_ON_ONCE(src_reg->id != other_branch_regs[insn->src_reg].id)) {
+ find_equal_scalars(this_branch, src_reg);
+ find_equal_scalars(other_branch, &other_branch_regs[insn->src_reg]);
+ }
if (dst_reg->type == SCALAR_VALUE && dst_reg->id &&
!WARN_ON_ONCE(dst_reg->id != other_branch_regs[insn->dst_reg].id)) {
find_equal_scalars(this_branch, dst_reg);
@@ -17541,10 +17212,8 @@ static int do_check(struct bpf_verifier_env *env)
insn->off, BPF_SIZE(insn->code),
BPF_READ, insn->dst_reg, false,
BPF_MODE(insn->code) == BPF_MEMSX);
- if (err)
- return err;
-
- err = save_aux_ptr_type(env, src_reg_type, true);
+ err = err ?: save_aux_ptr_type(env, src_reg_type, true);
+ err = err ?: reg_bounds_sanity_check(env, ®s[insn->dst_reg], "ldx");
if (err)
return err;
} else if (class == BPF_STX) {
@@ -20831,6 +20500,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3
if (is_priv)
env->test_state_freq = attr->prog_flags & BPF_F_TEST_STATE_FREQ;
+ env->test_reg_invariants = attr->prog_flags & BPF_F_TEST_REG_INVARIANTS;
env->explored_states = kvcalloc(state_htab_size(env),
sizeof(struct bpf_verifier_state_list *),
diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h
index c56071f..520b90d 100644
--- a/kernel/cgroup/cgroup-internal.h
+++ b/kernel/cgroup/cgroup-internal.h
@@ -164,13 +164,13 @@ struct cgroup_mgctx {
#define DEFINE_CGROUP_MGCTX(name) \
struct cgroup_mgctx name = CGROUP_MGCTX_INIT(name)
-extern spinlock_t css_set_lock;
extern struct cgroup_subsys *cgroup_subsys[];
extern struct list_head cgroup_roots;
/* iterate across the hierarchies */
#define for_each_root(root) \
- list_for_each_entry((root), &cgroup_roots, root_list)
+ list_for_each_entry_rcu((root), &cgroup_roots, root_list, \
+ lockdep_is_held(&cgroup_mutex))
/**
* for_each_subsys - iterate all enabled cgroup subsystems
diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c
index 76db6c6..04d11a7 100644
--- a/kernel/cgroup/cgroup-v1.c
+++ b/kernel/cgroup/cgroup-v1.c
@@ -1262,6 +1262,40 @@ int cgroup1_get_tree(struct fs_context *fc)
return ret;
}
+/**
+ * task_get_cgroup1 - Acquires the associated cgroup of a task within a
+ * specific cgroup1 hierarchy. The cgroup1 hierarchy is identified by its
+ * hierarchy ID.
+ * @tsk: The target task
+ * @hierarchy_id: The ID of a cgroup1 hierarchy
+ *
+ * On success, the cgroup is returned. On failure, ERR_PTR is returned.
+ * We limit it to cgroup1 only.
+ */
+struct cgroup *task_get_cgroup1(struct task_struct *tsk, int hierarchy_id)
+{
+ struct cgroup *cgrp = ERR_PTR(-ENOENT);
+ struct cgroup_root *root;
+ unsigned long flags;
+
+ rcu_read_lock();
+ for_each_root(root) {
+ /* cgroup1 only*/
+ if (root == &cgrp_dfl_root)
+ continue;
+ if (root->hierarchy_id != hierarchy_id)
+ continue;
+ spin_lock_irqsave(&css_set_lock, flags);
+ cgrp = task_cgroup_from_root(tsk, root);
+ if (!cgrp || !cgroup_tryget(cgrp))
+ cgrp = ERR_PTR(-ENOENT);
+ spin_unlock_irqrestore(&css_set_lock, flags);
+ break;
+ }
+ rcu_read_unlock();
+ return cgrp;
+}
+
static int __init cgroup1_wq_init(void)
{
/*
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 4b9ff41c..8f3cef1 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -1315,7 +1315,7 @@ static void cgroup_exit_root_id(struct cgroup_root *root)
void cgroup_free_root(struct cgroup_root *root)
{
- kfree(root);
+ kfree_rcu(root, rcu);
}
static void cgroup_destroy_root(struct cgroup_root *root)
@@ -1347,10 +1347,9 @@ static void cgroup_destroy_root(struct cgroup_root *root)
spin_unlock_irq(&css_set_lock);
- if (!list_empty(&root->root_list)) {
- list_del(&root->root_list);
- cgroup_root_count--;
- }
+ WARN_ON_ONCE(list_empty(&root->root_list));
+ list_del_rcu(&root->root_list);
+ cgroup_root_count--;
if (!have_favordynmods)
cgroup_favor_dynmods(root, false);
@@ -1390,7 +1389,15 @@ static inline struct cgroup *__cset_cgroup_from_root(struct css_set *cset,
}
}
- BUG_ON(!res_cgroup);
+ /*
+ * If cgroup_mutex is not held, the cgrp_cset_link will be freed
+ * before we remove the cgroup root from the root_list. Consequently,
+ * when accessing a cgroup root, the cset_link may have already been
+ * freed, resulting in a NULL res_cgroup. However, by holding the
+ * cgroup_mutex, we ensure that res_cgroup can't be NULL.
+ * If we don't hold cgroup_mutex in the caller, we must do the NULL
+ * check.
+ */
return res_cgroup;
}
@@ -1413,6 +1420,11 @@ current_cgns_cgroup_from_root(struct cgroup_root *root)
rcu_read_unlock();
+ /*
+ * The namespace_sem is held by current, so the root cgroup can't
+ * be umounted. Therefore, we can ensure that the res is non-NULL.
+ */
+ WARN_ON_ONCE(!res);
return res;
}
@@ -1449,7 +1461,6 @@ static struct cgroup *current_cgns_cgroup_dfl(void)
static struct cgroup *cset_cgroup_from_root(struct css_set *cset,
struct cgroup_root *root)
{
- lockdep_assert_held(&cgroup_mutex);
lockdep_assert_held(&css_set_lock);
return __cset_cgroup_from_root(cset, root);
@@ -1457,7 +1468,9 @@ static struct cgroup *cset_cgroup_from_root(struct css_set *cset,
/*
* Return the cgroup for "task" from the given hierarchy. Must be
- * called with cgroup_mutex and css_set_lock held.
+ * called with css_set_lock held to prevent task's groups from being modified.
+ * Must be called with either cgroup_mutex or rcu read lock to prevent the
+ * cgroup root from being destroyed.
*/
struct cgroup *task_cgroup_from_root(struct task_struct *task,
struct cgroup_root *root)
@@ -2032,7 +2045,7 @@ void init_cgroup_root(struct cgroup_fs_context *ctx)
struct cgroup_root *root = ctx->root;
struct cgroup *cgrp = &root->cgrp;
- INIT_LIST_HEAD(&root->root_list);
+ INIT_LIST_HEAD_RCU(&root->root_list);
atomic_set(&root->nr_cgrps, 1);
cgrp->root = root;
init_cgroup_housekeeping(cgrp);
@@ -2115,7 +2128,7 @@ int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask)
* care of subsystems' refcounts, which are explicitly dropped in
* the failure exit path.
*/
- list_add(&root->root_list, &cgroup_roots);
+ list_add_rcu(&root->root_list, &cgroup_roots);
cgroup_root_count++;
/*
@@ -6265,7 +6278,7 @@ int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
if (!buf)
goto out;
- cgroup_lock();
+ rcu_read_lock();
spin_lock_irq(&css_set_lock);
for_each_root(root) {
@@ -6276,6 +6289,11 @@ int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
if (root == &cgrp_dfl_root && !READ_ONCE(cgrp_dfl_visible))
continue;
+ cgrp = task_cgroup_from_root(tsk, root);
+ /* The root has already been unmounted. */
+ if (!cgrp)
+ continue;
+
seq_printf(m, "%d:", root->hierarchy_id);
if (root != &cgrp_dfl_root)
for_each_subsys(ss, ssid)
@@ -6286,9 +6304,6 @@ int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
seq_printf(m, "%sname=%s", count ? "," : "",
root->name);
seq_putc(m, ':');
-
- cgrp = task_cgroup_from_root(tsk, root);
-
/*
* On traditional hierarchies, all zombie tasks show up as
* belonging to the root cgroup. On the default hierarchy,
@@ -6320,7 +6335,7 @@ int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
retval = 0;
out_unlock:
spin_unlock_irq(&css_set_lock);
- cgroup_unlock();
+ rcu_read_unlock();
kfree(buf);
out:
return retval;
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 84e8a0f..f0b8b7c 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1376,6 +1376,8 @@ __bpf_kfunc int bpf_verify_pkcs7_signature(struct bpf_dynptr_kern *data_ptr,
struct bpf_dynptr_kern *sig_ptr,
struct bpf_key *trusted_keyring)
{
+ const void *data, *sig;
+ u32 data_len, sig_len;
int ret;
if (trusted_keyring->has_ref) {
@@ -1392,10 +1394,12 @@ __bpf_kfunc int bpf_verify_pkcs7_signature(struct bpf_dynptr_kern *data_ptr,
return ret;
}
- return verify_pkcs7_signature(data_ptr->data,
- __bpf_dynptr_size(data_ptr),
- sig_ptr->data,
- __bpf_dynptr_size(sig_ptr),
+ data_len = __bpf_dynptr_size(data_ptr);
+ data = __bpf_dynptr_data(data_ptr, data_len);
+ sig_len = __bpf_dynptr_size(sig_ptr);
+ sig = __bpf_dynptr_data(sig_ptr, sig_len);
+
+ return verify_pkcs7_signature(data, data_len, sig, sig_len,
trusted_keyring->key,
VERIFYING_UNSPECIFIED_SIGNATURE, NULL,
NULL);
diff --git a/lib/test_bpf.c b/lib/test_bpf.c
index 7916503..c148f8d 100644
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -5144,22 +5144,6 @@ static struct bpf_test tests[] = {
{ },
{ { 0, 0x1 } },
},
- {
- "ALU_MOVSX | BPF_W",
- .u.insns_int = {
- BPF_LD_IMM64(R2, 0x00000000deadbeefLL),
- BPF_LD_IMM64(R3, 0xdeadbeefdeadbeefLL),
- BPF_MOVSX32_REG(R1, R3, 32),
- BPF_JMP_REG(BPF_JEQ, R2, R1, 2),
- BPF_MOV32_IMM(R0, 2),
- BPF_EXIT_INSN(),
- BPF_MOV32_IMM(R0, 1),
- BPF_EXIT_INSN(),
- },
- INTERNAL,
- { },
- { { 0, 0x1 } },
- },
/* MOVSX64 REG */
{
"ALU64_MOVSX | BPF_B",
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 2a7f1b1..407b233 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -702,20 +702,7 @@ static int vlan_ethtool_get_ts_info(struct net_device *dev,
struct ethtool_ts_info *info)
{
const struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
- const struct ethtool_ops *ops = vlan->real_dev->ethtool_ops;
- struct phy_device *phydev = vlan->real_dev->phydev;
-
- if (phy_has_tsinfo(phydev)) {
- return phy_ts_info(phydev, info);
- } else if (ops->get_ts_info) {
- return ops->get_ts_info(vlan->real_dev, info);
- } else {
- info->so_timestamping = SOF_TIMESTAMPING_RX_SOFTWARE |
- SOF_TIMESTAMPING_SOFTWARE;
- info->phc_index = -1;
- }
-
- return 0;
+ return ethtool_get_ts_info_by_layer(vlan->real_dev, info);
}
static void vlan_dev_get_stats64(struct net_device *dev,
diff --git a/net/batman-adv/Makefile b/net/batman-adv/Makefile
index 3bd0760..b51d8b0 100644
--- a/net/batman-adv/Makefile
+++ b/net/batman-adv/Makefile
@@ -20,6 +20,7 @@
batman-adv-$(CONFIG_BATMAN_ADV_DEBUG) += log.o
batman-adv-y += main.o
batman-adv-$(CONFIG_BATMAN_ADV_MCAST) += multicast.o
+batman-adv-$(CONFIG_BATMAN_ADV_MCAST) += multicast_forw.o
batman-adv-y += netlink.o
batman-adv-$(CONFIG_BATMAN_ADV_NC) += network-coding.o
batman-adv-y += originator.o
diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index 37ce6cf..5f46ca3 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -20,7 +20,6 @@
#include <linux/if_vlan.h>
#include <linux/jhash.h>
#include <linux/jiffies.h>
-#include <linux/kernel.h>
#include <linux/kref.h>
#include <linux/list.h>
#include <linux/lockdep.h>
@@ -31,6 +30,7 @@
#include <linux/skbuff.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
+#include <linux/sprintf.h>
#include <linux/stddef.h>
#include <linux/string.h>
#include <linux/workqueue.h>
diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c
index c120c7c..757c084 100644
--- a/net/batman-adv/fragmentation.c
+++ b/net/batman-adv/fragmentation.c
@@ -25,7 +25,6 @@
#include "hard-interface.h"
#include "originator.h"
-#include "routing.h"
#include "send.h"
/**
@@ -351,18 +350,14 @@ bool batadv_frag_skb_fwd(struct sk_buff *skb,
struct batadv_orig_node *orig_node_src)
{
struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface);
- struct batadv_orig_node *orig_node_dst;
struct batadv_neigh_node *neigh_node = NULL;
struct batadv_frag_packet *packet;
u16 total_size;
bool ret = false;
packet = (struct batadv_frag_packet *)skb->data;
- orig_node_dst = batadv_orig_hash_find(bat_priv, packet->dest);
- if (!orig_node_dst)
- goto out;
- neigh_node = batadv_find_router(bat_priv, orig_node_dst, recv_if);
+ neigh_node = batadv_orig_to_router(bat_priv, packet->dest, recv_if);
if (!neigh_node)
goto out;
@@ -381,7 +376,6 @@ bool batadv_frag_skb_fwd(struct sk_buff *skb,
}
out:
- batadv_orig_node_put(orig_node_dst);
batadv_neigh_node_put(neigh_node);
return ret;
}
diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index d26124b..0ddd8b4 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -18,7 +18,6 @@
#include <linux/in.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
-#include <linux/kernel.h>
#include <linux/kref.h>
#include <linux/list.h>
#include <linux/lockdep.h>
@@ -29,6 +28,7 @@
#include <linux/skbuff.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
+#include <linux/sprintf.h>
#include <linux/stddef.h>
#include <linux/udp.h>
#include <net/sock.h>
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index e8a4499..5fc754b 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -6,6 +6,7 @@
#include "main.h"
+#include <linux/array_size.h>
#include <linux/atomic.h>
#include <linux/build_bug.h>
#include <linux/byteorder/generic.h>
@@ -20,7 +21,6 @@
#include <linux/init.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
-#include <linux/kernel.h>
#include <linux/kobject.h>
#include <linux/kref.h>
#include <linux/list.h>
@@ -33,6 +33,7 @@
#include <linux/skbuff.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
+#include <linux/sprintf.h>
#include <linux/stddef.h>
#include <linux/string.h>
#include <linux/workqueue.h>
@@ -532,6 +533,8 @@ static void batadv_recv_handler_init(void)
/* broadcast packet */
batadv_rx_handler[BATADV_BCAST] = batadv_recv_bcast_packet;
+ /* multicast packet */
+ batadv_rx_handler[BATADV_MCAST] = batadv_recv_mcast_packet;
/* unicast packets ... */
/* unicast with 4 addresses packet */
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index 10007c5..870dcd7 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -13,7 +13,7 @@
#define BATADV_DRIVER_DEVICE "batman-adv"
#ifndef BATADV_SOURCE_VERSION
-#define BATADV_SOURCE_VERSION "2023.3"
+#define BATADV_SOURCE_VERSION "2024.0"
#endif
/* B.A.T.M.A.N. parameters */
diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c
index 315394f..d982daea 100644
--- a/net/batman-adv/multicast.c
+++ b/net/batman-adv/multicast.c
@@ -25,7 +25,6 @@
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/jiffies.h>
-#include <linux/kernel.h>
#include <linux/list.h>
#include <linux/lockdep.h>
#include <linux/netdevice.h>
@@ -36,6 +35,7 @@
#include <linux/skbuff.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
+#include <linux/sprintf.h>
#include <linux/stddef.h>
#include <linux/string.h>
#include <linux/types.h>
@@ -236,6 +236,37 @@ static u8 batadv_mcast_mla_rtr_flags_get(struct batadv_priv *bat_priv,
}
/**
+ * batadv_mcast_mla_forw_flags_get() - get multicast forwarding flags
+ * @bat_priv: the bat priv with all the soft interface information
+ *
+ * Checks if all active hard interfaces have an MTU larger or equal to 1280
+ * bytes (IPv6 minimum MTU).
+ *
+ * Return: BATADV_MCAST_HAVE_MC_PTYPE_CAPA if yes, BATADV_NO_FLAGS otherwise.
+ */
+static u8 batadv_mcast_mla_forw_flags_get(struct batadv_priv *bat_priv)
+{
+ const struct batadv_hard_iface *hard_iface;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) {
+ if (hard_iface->if_status != BATADV_IF_ACTIVE)
+ continue;
+
+ if (hard_iface->soft_iface != bat_priv->soft_iface)
+ continue;
+
+ if (hard_iface->net_dev->mtu < IPV6_MIN_MTU) {
+ rcu_read_unlock();
+ return BATADV_NO_FLAGS;
+ }
+ }
+ rcu_read_unlock();
+
+ return BATADV_MCAST_HAVE_MC_PTYPE_CAPA;
+}
+
+/**
* batadv_mcast_mla_flags_get() - get the new multicast flags
* @bat_priv: the bat priv with all the soft interface information
*
@@ -256,6 +287,7 @@ batadv_mcast_mla_flags_get(struct batadv_priv *bat_priv)
mla_flags.enabled = 1;
mla_flags.tvlv_flags |= batadv_mcast_mla_rtr_flags_get(bat_priv,
bridge);
+ mla_flags.tvlv_flags |= batadv_mcast_mla_forw_flags_get(bat_priv);
if (!bridge)
return mla_flags;
@@ -806,23 +838,25 @@ static void batadv_mcast_flags_log(struct batadv_priv *bat_priv, u8 flags)
{
bool old_enabled = bat_priv->mcast.mla_flags.enabled;
u8 old_flags = bat_priv->mcast.mla_flags.tvlv_flags;
- char str_old_flags[] = "[.... . ]";
+ char str_old_flags[] = "[.... . .]";
- sprintf(str_old_flags, "[%c%c%c%s%s]",
+ sprintf(str_old_flags, "[%c%c%c%s%s%c]",
(old_flags & BATADV_MCAST_WANT_ALL_UNSNOOPABLES) ? 'U' : '.',
(old_flags & BATADV_MCAST_WANT_ALL_IPV4) ? '4' : '.',
(old_flags & BATADV_MCAST_WANT_ALL_IPV6) ? '6' : '.',
!(old_flags & BATADV_MCAST_WANT_NO_RTR4) ? "R4" : ". ",
- !(old_flags & BATADV_MCAST_WANT_NO_RTR6) ? "R6" : ". ");
+ !(old_flags & BATADV_MCAST_WANT_NO_RTR6) ? "R6" : ". ",
+ !(old_flags & BATADV_MCAST_HAVE_MC_PTYPE_CAPA) ? 'P' : '.');
batadv_dbg(BATADV_DBG_MCAST, bat_priv,
- "Changing multicast flags from '%s' to '[%c%c%c%s%s]'\n",
+ "Changing multicast flags from '%s' to '[%c%c%c%s%s%c]'\n",
old_enabled ? str_old_flags : "<undefined>",
(flags & BATADV_MCAST_WANT_ALL_UNSNOOPABLES) ? 'U' : '.',
(flags & BATADV_MCAST_WANT_ALL_IPV4) ? '4' : '.',
(flags & BATADV_MCAST_WANT_ALL_IPV6) ? '6' : '.',
!(flags & BATADV_MCAST_WANT_NO_RTR4) ? "R4" : ". ",
- !(flags & BATADV_MCAST_WANT_NO_RTR6) ? "R6" : ". ");
+ !(flags & BATADV_MCAST_WANT_NO_RTR6) ? "R6" : ". ",
+ !(flags & BATADV_MCAST_HAVE_MC_PTYPE_CAPA) ? 'P' : '.');
}
/**
@@ -1136,16 +1170,61 @@ static int batadv_mcast_forw_rtr_count(struct batadv_priv *bat_priv,
}
/**
+ * batadv_mcast_forw_mode_by_count() - get forwarding mode by count
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the multicast packet to check
+ * @vid: the vlan identifier
+ * @is_routable: stores whether the destination is routable
+ * @count: the number of originators the multicast packet need to be sent to
+ *
+ * For a multicast packet with multiple destination originators, checks which
+ * mode to use. For BATADV_FORW_MCAST it also encapsulates the packet with a
+ * complete batman-adv multicast header.
+ *
+ * Return:
+ * BATADV_FORW_MCAST: If all nodes have multicast packet routing
+ * capabilities and an MTU >= 1280 on all hard interfaces (including us)
+ * and the encapsulated multicast packet with all destination addresses
+ * would still fit into an 1280 bytes batman-adv multicast packet
+ * (excluding the outer ethernet frame) and we could successfully push
+ * the full batman-adv multicast packet header.
+ * BATADV_FORW_UCASTS: If the packet cannot be sent in a batman-adv
+ * multicast packet and the amount of batman-adv unicast packets needed
+ * is smaller or equal to the configured multicast fanout.
+ * BATADV_FORW_BCAST: Otherwise.
+ */
+static enum batadv_forw_mode
+batadv_mcast_forw_mode_by_count(struct batadv_priv *bat_priv,
+ struct sk_buff *skb, unsigned short vid,
+ int is_routable, int count)
+{
+ unsigned int mcast_hdrlen = batadv_mcast_forw_packet_hdrlen(count);
+ u8 own_tvlv_flags = bat_priv->mcast.mla_flags.tvlv_flags;
+
+ if (!atomic_read(&bat_priv->mcast.num_no_mc_ptype_capa) &&
+ own_tvlv_flags & BATADV_MCAST_HAVE_MC_PTYPE_CAPA &&
+ skb->len + mcast_hdrlen <= IPV6_MIN_MTU &&
+ batadv_mcast_forw_push(bat_priv, skb, vid, is_routable, count))
+ return BATADV_FORW_MCAST;
+
+ if (count <= atomic_read(&bat_priv->multicast_fanout))
+ return BATADV_FORW_UCASTS;
+
+ return BATADV_FORW_BCAST;
+}
+
+/**
* batadv_mcast_forw_mode() - check on how to forward a multicast packet
* @bat_priv: the bat priv with all the soft interface information
* @skb: the multicast packet to check
+ * @vid: the vlan identifier
* @is_routable: stores whether the destination is routable
*
* Return: The forwarding mode as enum batadv_forw_mode.
*/
enum batadv_forw_mode
batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb,
- int *is_routable)
+ unsigned short vid, int *is_routable)
{
int ret, tt_count, ip_count, unsnoop_count, total_count;
bool is_unsnoopable = false;
@@ -1175,10 +1254,8 @@ batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb,
else if (unsnoop_count)
return BATADV_FORW_BCAST;
- if (total_count <= atomic_read(&bat_priv->multicast_fanout))
- return BATADV_FORW_UCASTS;
-
- return BATADV_FORW_BCAST;
+ return batadv_mcast_forw_mode_by_count(bat_priv, skb, vid, *is_routable,
+ total_count);
}
/**
@@ -1739,6 +1816,31 @@ static void batadv_mcast_want_rtr6_update(struct batadv_priv *bat_priv,
}
/**
+ * batadv_mcast_have_mc_ptype_update() - update multicast packet type counter
+ * @bat_priv: the bat priv with all the soft interface information
+ * @orig: the orig_node which multicast state might have changed of
+ * @mcast_flags: flags indicating the new multicast state
+ *
+ * If the BATADV_MCAST_HAVE_MC_PTYPE_CAPA flag of this originator, orig, has
+ * toggled then this method updates the counter accordingly.
+ */
+static void batadv_mcast_have_mc_ptype_update(struct batadv_priv *bat_priv,
+ struct batadv_orig_node *orig,
+ u8 mcast_flags)
+{
+ lockdep_assert_held(&orig->mcast_handler_lock);
+
+ /* switched from flag set to unset */
+ if (!(mcast_flags & BATADV_MCAST_HAVE_MC_PTYPE_CAPA) &&
+ orig->mcast_flags & BATADV_MCAST_HAVE_MC_PTYPE_CAPA)
+ atomic_inc(&bat_priv->mcast.num_no_mc_ptype_capa);
+ /* switched from flag unset to set */
+ else if (mcast_flags & BATADV_MCAST_HAVE_MC_PTYPE_CAPA &&
+ !(orig->mcast_flags & BATADV_MCAST_HAVE_MC_PTYPE_CAPA))
+ atomic_dec(&bat_priv->mcast.num_no_mc_ptype_capa);
+}
+
+/**
* batadv_mcast_tvlv_flags_get() - get multicast flags from an OGM TVLV
* @enabled: whether the originator has multicast TVLV support enabled
* @tvlv_value: tvlv buffer containing the multicast flags
@@ -1806,6 +1908,7 @@ static void batadv_mcast_tvlv_ogm_handler(struct batadv_priv *bat_priv,
batadv_mcast_want_ipv6_update(bat_priv, orig, mcast_flags);
batadv_mcast_want_rtr4_update(bat_priv, orig, mcast_flags);
batadv_mcast_want_rtr6_update(bat_priv, orig, mcast_flags);
+ batadv_mcast_have_mc_ptype_update(bat_priv, orig, mcast_flags);
orig->mcast_flags = mcast_flags;
spin_unlock_bh(&orig->mcast_handler_lock);
@@ -1820,6 +1923,10 @@ void batadv_mcast_init(struct batadv_priv *bat_priv)
batadv_tvlv_handler_register(bat_priv, batadv_mcast_tvlv_ogm_handler,
NULL, NULL, BATADV_TVLV_MCAST, 2,
BATADV_TVLV_HANDLER_OGM_CIFNOTFND);
+ batadv_tvlv_handler_register(bat_priv, NULL, NULL,
+ batadv_mcast_forw_tracker_tvlv_handler,
+ BATADV_TVLV_MCAST_TRACKER, 1,
+ BATADV_TVLV_HANDLER_OGM_CIFNOTFND);
INIT_DELAYED_WORK(&bat_priv->mcast.work, batadv_mcast_mla_update);
batadv_mcast_start_timer(bat_priv);
diff --git a/net/batman-adv/multicast.h b/net/batman-adv/multicast.h
index a9770d8..d97ee51 100644
--- a/net/batman-adv/multicast.h
+++ b/net/batman-adv/multicast.h
@@ -11,6 +11,7 @@
#include <linux/netlink.h>
#include <linux/skbuff.h>
+#include <linux/types.h>
/**
* enum batadv_forw_mode - the way a packet should be forwarded as
@@ -28,6 +29,12 @@ enum batadv_forw_mode {
*/
BATADV_FORW_UCASTS,
+ /**
+ * @BATADV_FORW_MCAST: forward the packet to some nodes via a
+ * batman-adv multicast packet
+ */
+ BATADV_FORW_MCAST,
+
/** @BATADV_FORW_NONE: don't forward, drop it */
BATADV_FORW_NONE,
};
@@ -36,7 +43,7 @@ enum batadv_forw_mode {
enum batadv_forw_mode
batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb,
- int *is_routable);
+ unsigned short vid, int *is_routable);
int batadv_mcast_forw_send(struct batadv_priv *bat_priv, struct sk_buff *skb,
unsigned short vid, int is_routable);
@@ -52,11 +59,23 @@ void batadv_mcast_free(struct batadv_priv *bat_priv);
void batadv_mcast_purge_orig(struct batadv_orig_node *orig_node);
+/* multicast_forw.c */
+
+int batadv_mcast_forw_tracker_tvlv_handler(struct batadv_priv *bat_priv,
+ struct sk_buff *skb);
+
+unsigned int batadv_mcast_forw_packet_hdrlen(unsigned int num_dests);
+
+bool batadv_mcast_forw_push(struct batadv_priv *bat_priv, struct sk_buff *skb,
+ unsigned short vid, int is_routable, int count);
+
+int batadv_mcast_forw_mcsend(struct batadv_priv *bat_priv, struct sk_buff *skb);
+
#else
static inline enum batadv_forw_mode
batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb,
- int *is_routable)
+ unsigned short vid, int *is_routable)
{
return BATADV_FORW_BCAST;
}
@@ -94,6 +113,13 @@ static inline void batadv_mcast_purge_orig(struct batadv_orig_node *orig_node)
{
}
+static inline int batadv_mcast_forw_mcsend(struct batadv_priv *bat_priv,
+ struct sk_buff *skb)
+{
+ kfree_skb(skb);
+ return NET_XMIT_DROP;
+}
+
#endif /* CONFIG_BATMAN_ADV_MCAST */
#endif /* _NET_BATMAN_ADV_MULTICAST_H_ */
diff --git a/net/batman-adv/multicast_forw.c b/net/batman-adv/multicast_forw.c
new file mode 100644
index 0000000..fafd6ba
--- /dev/null
+++ b/net/batman-adv/multicast_forw.c
@@ -0,0 +1,1178 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) B.A.T.M.A.N. contributors:
+ *
+ * Linus Lüssing
+ */
+
+#include "multicast.h"
+#include "main.h"
+
+#include <linux/bug.h>
+#include <linux/build_bug.h>
+#include <linux/byteorder/generic.h>
+#include <linux/compiler.h>
+#include <linux/errno.h>
+#include <linux/etherdevice.h>
+#include <linux/gfp.h>
+#include <linux/if_ether.h>
+#include <linux/if_vlan.h>
+#include <linux/ipv6.h>
+#include <linux/limits.h>
+#include <linux/netdevice.h>
+#include <linux/rculist.h>
+#include <linux/rcupdate.h>
+#include <linux/skbuff.h>
+#include <linux/stddef.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <uapi/linux/batadv_packet.h>
+
+#include "bridge_loop_avoidance.h"
+#include "originator.h"
+#include "send.h"
+#include "translation-table.h"
+
+#define batadv_mcast_forw_tracker_for_each_dest(dest, num_dests) \
+ for (; num_dests; num_dests--, (dest) += ETH_ALEN)
+
+#define batadv_mcast_forw_tracker_for_each_dest2(dest1, dest2, num_dests) \
+ for (; num_dests; num_dests--, (dest1) += ETH_ALEN, (dest2) += ETH_ALEN)
+
+/**
+ * batadv_mcast_forw_skb_push() - skb_push and memorize amount of pushed bytes
+ * @skb: the skb to push onto
+ * @size: the amount of bytes to push
+ * @len: stores the total amount of bytes pushed
+ *
+ * Performs an skb_push() onto the given skb and adds the amount of pushed bytes
+ * to the given len pointer.
+ *
+ * Return: the return value of the skb_push() call.
+ */
+static void *batadv_mcast_forw_skb_push(struct sk_buff *skb, size_t size,
+ unsigned short *len)
+{
+ *len += size;
+ return skb_push(skb, size);
+}
+
+/**
+ * batadv_mcast_forw_push_padding() - push 2 padding bytes to skb's front
+ * @skb: the skb to push onto
+ * @tvlv_len: stores the amount of currently pushed TVLV bytes
+ *
+ * Pushes two padding bytes to the front of the given skb.
+ *
+ * Return: On success a pointer to the first byte of the two pushed padding
+ * bytes within the skb. NULL otherwise.
+ */
+static char *
+batadv_mcast_forw_push_padding(struct sk_buff *skb, unsigned short *tvlv_len)
+{
+ const int pad_len = 2;
+ char *padding;
+
+ if (skb_headroom(skb) < pad_len)
+ return NULL;
+
+ padding = batadv_mcast_forw_skb_push(skb, pad_len, tvlv_len);
+ memset(padding, 0, pad_len);
+
+ return padding;
+}
+
+/**
+ * batadv_mcast_forw_push_est_padding() - push padding bytes if necessary
+ * @skb: the skb to potentially push the padding onto
+ * @count: the (estimated) number of originators the multicast packet needs to
+ * be sent to
+ * @tvlv_len: stores the amount of currently pushed TVLV bytes
+ *
+ * If the number of destination entries is even then this adds two
+ * padding bytes to the end of the tracker TVLV.
+ *
+ * Return: true on success or if no padding is needed, false otherwise.
+ */
+static bool
+batadv_mcast_forw_push_est_padding(struct sk_buff *skb, int count,
+ unsigned short *tvlv_len)
+{
+ if (!(count % 2) && !batadv_mcast_forw_push_padding(skb, tvlv_len))
+ return false;
+
+ return true;
+}
+
+/**
+ * batadv_mcast_forw_orig_entry() - get orig_node from an hlist node
+ * @node: the hlist node to get the orig_node from
+ * @entry_offset: the offset of the hlist node within the orig_node struct
+ *
+ * Return: The orig_node containing the hlist node on success, NULL on error.
+ */
+static struct batadv_orig_node *
+batadv_mcast_forw_orig_entry(struct hlist_node *node,
+ size_t entry_offset)
+{
+ /* sanity check */
+ switch (entry_offset) {
+ case offsetof(struct batadv_orig_node, mcast_want_all_ipv4_node):
+ case offsetof(struct batadv_orig_node, mcast_want_all_ipv6_node):
+ case offsetof(struct batadv_orig_node, mcast_want_all_rtr4_node):
+ case offsetof(struct batadv_orig_node, mcast_want_all_rtr6_node):
+ break;
+ default:
+ WARN_ON(1);
+ return NULL;
+ }
+
+ return (struct batadv_orig_node *)((void *)node - entry_offset);
+}
+
+/**
+ * batadv_mcast_forw_push_dest() - push an originator MAC address onto an skb
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the skb to push the destination address onto
+ * @vid: the vlan identifier
+ * @orig_node: the originator node to get the MAC address from
+ * @num_dests: a pointer to store the number of pushed addresses in
+ * @tvlv_len: stores the amount of currently pushed TVLV bytes
+ *
+ * If the orig_node is a BLA backbone gateway, if there is not enough skb
+ * headroom available or if num_dests is already at its maximum (65535) then
+ * neither the skb nor num_dests is changed. Otherwise the originator's MAC
+ * address is pushed onto the given skb and num_dests incremented by one.
+ *
+ * Return: true if the orig_node is a backbone gateway or if an orig address
+ * was pushed successfully, false otherwise.
+ */
+static bool batadv_mcast_forw_push_dest(struct batadv_priv *bat_priv,
+ struct sk_buff *skb, unsigned short vid,
+ struct batadv_orig_node *orig_node,
+ unsigned short *num_dests,
+ unsigned short *tvlv_len)
+{
+ BUILD_BUG_ON(sizeof_field(struct batadv_tvlv_mcast_tracker, num_dests)
+ != sizeof(__be16));
+
+ /* Avoid sending to other BLA gateways - they already got the frame from
+ * the LAN side we share with them.
+ * TODO: Refactor to take BLA into account earlier in mode check.
+ */
+ if (batadv_bla_is_backbone_gw_orig(bat_priv, orig_node->orig, vid))
+ return true;
+
+ if (skb_headroom(skb) < ETH_ALEN || *num_dests == U16_MAX)
+ return false;
+
+ batadv_mcast_forw_skb_push(skb, ETH_ALEN, tvlv_len);
+ ether_addr_copy(skb->data, orig_node->orig);
+ (*num_dests)++;
+
+ return true;
+}
+
+/**
+ * batadv_mcast_forw_push_dests_list() - push originators from list onto an skb
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the skb to push the destination addresses onto
+ * @vid: the vlan identifier
+ * @head: the list to gather originators from
+ * @entry_offset: offset of an hlist node in an orig_node structure
+ * @num_dests: a pointer to store the number of pushed addresses in
+ * @tvlv_len: stores the amount of currently pushed TVLV bytes
+ *
+ * Push the MAC addresses of all originators in the given list onto the given
+ * skb.
+ *
+ * Return: true on success, false otherwise.
+ */
+static int batadv_mcast_forw_push_dests_list(struct batadv_priv *bat_priv,
+ struct sk_buff *skb,
+ unsigned short vid,
+ struct hlist_head *head,
+ size_t entry_offset,
+ unsigned short *num_dests,
+ unsigned short *tvlv_len)
+{
+ struct hlist_node *node;
+ struct batadv_orig_node *orig_node;
+
+ rcu_read_lock();
+ __hlist_for_each_rcu(node, head) {
+ orig_node = batadv_mcast_forw_orig_entry(node, entry_offset);
+ if (!orig_node ||
+ !batadv_mcast_forw_push_dest(bat_priv, skb, vid, orig_node,
+ num_dests, tvlv_len)) {
+ rcu_read_unlock();
+ return false;
+ }
+ }
+ rcu_read_unlock();
+
+ return true;
+}
+
+/**
+ * batadv_mcast_forw_push_tt() - push originators with interest through TT
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the skb to push the destination addresses onto
+ * @vid: the vlan identifier
+ * @num_dests: a pointer to store the number of pushed addresses in
+ * @tvlv_len: stores the amount of currently pushed TVLV bytes
+ *
+ * Push the MAC addresses of all originators which have indicated interest in
+ * this multicast packet through the translation table onto the given skb.
+ *
+ * Return: true on success, false otherwise.
+ */
+static bool
+batadv_mcast_forw_push_tt(struct batadv_priv *bat_priv, struct sk_buff *skb,
+ unsigned short vid, unsigned short *num_dests,
+ unsigned short *tvlv_len)
+{
+ struct batadv_tt_orig_list_entry *orig_entry;
+
+ struct batadv_tt_global_entry *tt_global;
+ const u8 *addr = eth_hdr(skb)->h_dest;
+
+ /* ok */
+ int ret = true;
+
+ tt_global = batadv_tt_global_hash_find(bat_priv, addr, vid);
+ if (!tt_global)
+ goto out;
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(orig_entry, &tt_global->orig_list, list) {
+ if (!batadv_mcast_forw_push_dest(bat_priv, skb, vid,
+ orig_entry->orig_node,
+ num_dests, tvlv_len)) {
+ ret = false;
+ break;
+ }
+ }
+ rcu_read_unlock();
+
+ batadv_tt_global_entry_put(tt_global);
+
+out:
+ return ret;
+}
+
+/**
+ * batadv_mcast_forw_push_want_all() - push originators with want-all flag
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the skb to push the destination addresses onto
+ * @vid: the vlan identifier
+ * @num_dests: a pointer to store the number of pushed addresses in
+ * @tvlv_len: stores the amount of currently pushed TVLV bytes
+ *
+ * Push the MAC addresses of all originators which have indicated interest in
+ * this multicast packet through the want-all flag onto the given skb.
+ *
+ * Return: true on success, false otherwise.
+ */
+static bool batadv_mcast_forw_push_want_all(struct batadv_priv *bat_priv,
+ struct sk_buff *skb,
+ unsigned short vid,
+ unsigned short *num_dests,
+ unsigned short *tvlv_len)
+{
+ struct hlist_head *head = NULL;
+ size_t offset;
+ int ret;
+
+ switch (eth_hdr(skb)->h_proto) {
+ case htons(ETH_P_IP):
+ head = &bat_priv->mcast.want_all_ipv4_list;
+ offset = offsetof(struct batadv_orig_node,
+ mcast_want_all_ipv4_node);
+ break;
+ case htons(ETH_P_IPV6):
+ head = &bat_priv->mcast.want_all_ipv6_list;
+ offset = offsetof(struct batadv_orig_node,
+ mcast_want_all_ipv6_node);
+ break;
+ default:
+ return false;
+ }
+
+ ret = batadv_mcast_forw_push_dests_list(bat_priv, skb, vid, head,
+ offset, num_dests, tvlv_len);
+ if (!ret)
+ return false;
+
+ return true;
+}
+
+/**
+ * batadv_mcast_forw_push_want_rtr() - push originators with want-router flag
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the skb to push the destination addresses onto
+ * @vid: the vlan identifier
+ * @num_dests: a pointer to store the number of pushed addresses in
+ * @tvlv_len: stores the amount of currently pushed TVLV bytes
+ *
+ * Push the MAC addresses of all originators which have indicated interest in
+ * this multicast packet through the want-all-rtr flag onto the given skb.
+ *
+ * Return: true on success, false otherwise.
+ */
+static bool batadv_mcast_forw_push_want_rtr(struct batadv_priv *bat_priv,
+ struct sk_buff *skb,
+ unsigned short vid,
+ unsigned short *num_dests,
+ unsigned short *tvlv_len)
+{
+ struct hlist_head *head = NULL;
+ size_t offset;
+ int ret;
+
+ switch (eth_hdr(skb)->h_proto) {
+ case htons(ETH_P_IP):
+ head = &bat_priv->mcast.want_all_rtr4_list;
+ offset = offsetof(struct batadv_orig_node,
+ mcast_want_all_rtr4_node);
+ break;
+ case htons(ETH_P_IPV6):
+ head = &bat_priv->mcast.want_all_rtr6_list;
+ offset = offsetof(struct batadv_orig_node,
+ mcast_want_all_rtr6_node);
+ break;
+ default:
+ return false;
+ }
+
+ ret = batadv_mcast_forw_push_dests_list(bat_priv, skb, vid, head,
+ offset, num_dests, tvlv_len);
+ if (!ret)
+ return false;
+
+ return true;
+}
+
+/**
+ * batadv_mcast_forw_scrape() - remove bytes within skb data
+ * @skb: the skb to remove bytes from
+ * @offset: the offset from the skb data from which to scrape
+ * @len: the amount of bytes to scrape starting from the offset
+ *
+ * Scrapes/removes len bytes from the given skb at the given offset from the
+ * skb data.
+ *
+ * Caller needs to ensure that the region from the skb data's start up
+ * to/including the to be removed bytes are linearized.
+ */
+static void batadv_mcast_forw_scrape(struct sk_buff *skb,
+ unsigned short offset,
+ unsigned short len)
+{
+ char *to, *from;
+
+ SKB_LINEAR_ASSERT(skb);
+
+ to = skb_pull(skb, len);
+ from = to - len;
+
+ memmove(to, from, offset);
+}
+
+/**
+ * batadv_mcast_forw_push_scrape_padding() - remove TVLV padding
+ * @skb: the skb to potentially adjust the TVLV's padding on
+ * @tvlv_len: stores the amount of currently pushed TVLV bytes
+ *
+ * Remove two padding bytes from the end of the multicast tracker TVLV,
+ * from before the payload data.
+ *
+ * Caller needs to ensure that the TVLV bytes are linearized.
+ */
+static void batadv_mcast_forw_push_scrape_padding(struct sk_buff *skb,
+ unsigned short *tvlv_len)
+{
+ const int pad_len = 2;
+
+ batadv_mcast_forw_scrape(skb, *tvlv_len - pad_len, pad_len);
+ *tvlv_len -= pad_len;
+}
+
+/**
+ * batadv_mcast_forw_push_insert_padding() - insert TVLV padding
+ * @skb: the skb to potentially adjust the TVLV's padding on
+ * @tvlv_len: stores the amount of currently pushed TVLV bytes
+ *
+ * Inserts two padding bytes at the end of the multicast tracker TVLV,
+ * before the payload data in the given skb.
+ *
+ * Return: true on success, false otherwise.
+ */
+static bool batadv_mcast_forw_push_insert_padding(struct sk_buff *skb,
+ unsigned short *tvlv_len)
+{
+ unsigned short offset = *tvlv_len;
+ char *to, *from = skb->data;
+
+ to = batadv_mcast_forw_push_padding(skb, tvlv_len);
+ if (!to)
+ return false;
+
+ memmove(to, from, offset);
+ memset(to + offset, 0, *tvlv_len - offset);
+ return true;
+}
+
+/**
+ * batadv_mcast_forw_push_adjust_padding() - adjust padding if necessary
+ * @skb: the skb to potentially adjust the TVLV's padding on
+ * @count: the estimated number of originators the multicast packet needs to
+ * be sent to
+ * @num_dests_pushed: the number of originators that were actually added to the
+ * multicast packet's tracker TVLV
+ * @tvlv_len: stores the amount of currently pushed TVLV bytes
+ *
+ * Adjusts the padding in the multicast packet's tracker TVLV depending on the
+ * initially estimated amount of destinations versus the amount of destinations
+ * that were actually added to the tracker TVLV.
+ *
+ * If the initial estimate was correct or at least the oddness was the same then
+ * no padding adjustment is performed.
+ * If the initially estimated number was even, so padding was initially added,
+ * but it turned out to be odd then padding is removed.
+ * If the initially estimated number was odd, so no padding was initially added,
+ * but it turned out to be even then padding is added.
+ *
+ * Return: true if no padding adjustment is needed or the adjustment was
+ * successful, false otherwise.
+ */
+static bool
+batadv_mcast_forw_push_adjust_padding(struct sk_buff *skb, int *count,
+ unsigned short num_dests_pushed,
+ unsigned short *tvlv_len)
+{
+ int ret = true;
+
+ if (likely((num_dests_pushed % 2) == (*count % 2)))
+ goto out;
+
+ /**
+ * estimated even number of destinations, but turned out to be odd
+ * -> remove padding
+ */
+ if (!(*count % 2) && (num_dests_pushed % 2))
+ batadv_mcast_forw_push_scrape_padding(skb, tvlv_len);
+ /**
+ * estimated odd number of destinations, but turned out to be even
+ * -> add padding
+ */
+ else if ((*count % 2) && (!(num_dests_pushed % 2)))
+ ret = batadv_mcast_forw_push_insert_padding(skb, tvlv_len);
+
+out:
+ *count = num_dests_pushed;
+ return ret;
+}
+
+/**
+ * batadv_mcast_forw_push_dests() - push originator addresses onto an skb
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the skb to push the destination addresses onto
+ * @vid: the vlan identifier
+ * @is_routable: indicates whether the destination is routable
+ * @count: the number of originators the multicast packet needs to be sent to
+ * @tvlv_len: stores the amount of currently pushed TVLV bytes
+ *
+ * Push the MAC addresses of all originators which have indicated interest in
+ * this multicast packet onto the given skb.
+ *
+ * Return: -ENOMEM if there is not enough skb headroom available. Otherwise, on
+ * success 0.
+ */
+static int
+batadv_mcast_forw_push_dests(struct batadv_priv *bat_priv, struct sk_buff *skb,
+ unsigned short vid, int is_routable, int *count,
+ unsigned short *tvlv_len)
+{
+ unsigned short num_dests = 0;
+
+ if (!batadv_mcast_forw_push_est_padding(skb, *count, tvlv_len))
+ goto err;
+
+ if (!batadv_mcast_forw_push_tt(bat_priv, skb, vid, &num_dests,
+ tvlv_len))
+ goto err;
+
+ if (!batadv_mcast_forw_push_want_all(bat_priv, skb, vid, &num_dests,
+ tvlv_len))
+ goto err;
+
+ if (is_routable &&
+ !batadv_mcast_forw_push_want_rtr(bat_priv, skb, vid, &num_dests,
+ tvlv_len))
+ goto err;
+
+ if (!batadv_mcast_forw_push_adjust_padding(skb, count, num_dests,
+ tvlv_len))
+ goto err;
+
+ return 0;
+err:
+ return -ENOMEM;
+}
+
+/**
+ * batadv_mcast_forw_push_tracker() - push a multicast tracker TVLV header
+ * @skb: the skb to push the tracker TVLV onto
+ * @num_dests: the number of destination addresses to set in the header
+ * @tvlv_len: stores the amount of currently pushed TVLV bytes
+ *
+ * Pushes a multicast tracker TVLV header onto the given skb, including the
+ * generic TVLV header but excluding the destination MAC addresses.
+ *
+ * The provided num_dests value is taken into consideration to set the
+ * num_dests field in the tracker header and to set the appropriate TVLV length
+ * value fields.
+ *
+ * Return: -ENOMEM if there is not enough skb headroom available. Otherwise, on
+ * success 0.
+ */
+static int batadv_mcast_forw_push_tracker(struct sk_buff *skb, int num_dests,
+ unsigned short *tvlv_len)
+{
+ struct batadv_tvlv_mcast_tracker *mcast_tracker;
+ struct batadv_tvlv_hdr *tvlv_hdr;
+ unsigned int tvlv_value_len;
+
+ if (skb_headroom(skb) < sizeof(*mcast_tracker) + sizeof(*tvlv_hdr))
+ return -ENOMEM;
+
+ tvlv_value_len = sizeof(*mcast_tracker) + *tvlv_len;
+ if (tvlv_value_len + sizeof(*tvlv_hdr) > U16_MAX)
+ return -ENOMEM;
+
+ batadv_mcast_forw_skb_push(skb, sizeof(*mcast_tracker), tvlv_len);
+ mcast_tracker = (struct batadv_tvlv_mcast_tracker *)skb->data;
+ mcast_tracker->num_dests = htons(num_dests);
+
+ skb_reset_network_header(skb);
+
+ batadv_mcast_forw_skb_push(skb, sizeof(*tvlv_hdr), tvlv_len);
+ tvlv_hdr = (struct batadv_tvlv_hdr *)skb->data;
+ tvlv_hdr->type = BATADV_TVLV_MCAST_TRACKER;
+ tvlv_hdr->version = 1;
+ tvlv_hdr->len = htons(tvlv_value_len);
+
+ return 0;
+}
+
+/**
+ * batadv_mcast_forw_push_tvlvs() - push a multicast tracker TVLV onto an skb
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the skb to push the tracker TVLV onto
+ * @vid: the vlan identifier
+ * @is_routable: indicates whether the destination is routable
+ * @count: the number of originators the multicast packet needs to be sent to
+ * @tvlv_len: stores the amount of currently pushed TVLV bytes
+ *
+ * Pushes a multicast tracker TVLV onto the given skb, including the collected
+ * destination MAC addresses and the generic TVLV header.
+ *
+ * Return: -ENOMEM if there is not enough skb headroom available. Otherwise, on
+ * success 0.
+ */
+static int
+batadv_mcast_forw_push_tvlvs(struct batadv_priv *bat_priv, struct sk_buff *skb,
+ unsigned short vid, int is_routable, int count,
+ unsigned short *tvlv_len)
+{
+ int ret;
+
+ ret = batadv_mcast_forw_push_dests(bat_priv, skb, vid, is_routable,
+ &count, tvlv_len);
+ if (ret < 0)
+ return ret;
+
+ ret = batadv_mcast_forw_push_tracker(skb, count, tvlv_len);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+/**
+ * batadv_mcast_forw_push_hdr() - push a multicast packet header onto an skb
+ * @skb: the skb to push the header onto
+ * @tvlv_len: the total TVLV length value to set in the header
+ *
+ * Pushes a batman-adv multicast packet header onto the given skb and sets
+ * the provided total TVLV length value in it.
+ *
+ * Caller needs to ensure enough skb headroom is available.
+ *
+ * Return: -ENOMEM if there is not enough skb headroom available. Otherwise, on
+ * success 0.
+ */
+static int
+batadv_mcast_forw_push_hdr(struct sk_buff *skb, unsigned short tvlv_len)
+{
+ struct batadv_mcast_packet *mcast_packet;
+
+ if (skb_headroom(skb) < sizeof(*mcast_packet))
+ return -ENOMEM;
+
+ skb_push(skb, sizeof(*mcast_packet));
+
+ mcast_packet = (struct batadv_mcast_packet *)skb->data;
+ mcast_packet->version = BATADV_COMPAT_VERSION;
+ mcast_packet->ttl = BATADV_TTL;
+ mcast_packet->packet_type = BATADV_MCAST;
+ mcast_packet->reserved = 0;
+ mcast_packet->tvlv_len = htons(tvlv_len);
+
+ return 0;
+}
+
+/**
+ * batadv_mcast_forw_scrub_dests() - scrub destinations in a tracker TVLV
+ * @bat_priv: the bat priv with all the soft interface information
+ * @comp_neigh: next hop neighbor to scrub+collect destinations for
+ * @dest: start MAC entry in original skb's tracker TVLV
+ * @next_dest: start MAC entry in to be sent skb's tracker TVLV
+ * @num_dests: number of remaining destination MAC entries to iterate over
+ *
+ * This sorts destination entries into either the original batman-adv
+ * multicast packet or the skb (copy) that is going to be sent to comp_neigh
+ * next.
+ *
+ * In preparation for the next, to be (unicast) transmitted batman-adv multicast
+ * packet skb to be sent to the given neighbor node, tries to collect all
+ * originator MAC addresses that have the given neighbor node as their next hop
+ * in the to be transmitted skb (copy), which next_dest points into. That is we
+ * zero all destination entries in next_dest which do not have comp_neigh as
+ * their next hop. And zero all destination entries in the original skb that
+ * would have comp_neigh as their next hop (to avoid redundant transmissions and
+ * duplicated payload later).
+ */
+static void
+batadv_mcast_forw_scrub_dests(struct batadv_priv *bat_priv,
+ struct batadv_neigh_node *comp_neigh, u8 *dest,
+ u8 *next_dest, u16 num_dests)
+{
+ struct batadv_neigh_node *next_neigh;
+
+ /* skip first entry, this is what we are comparing with */
+ eth_zero_addr(dest);
+ dest += ETH_ALEN;
+ next_dest += ETH_ALEN;
+ num_dests--;
+
+ batadv_mcast_forw_tracker_for_each_dest2(dest, next_dest, num_dests) {
+ if (is_zero_ether_addr(next_dest))
+ continue;
+
+ /* sanity check, we expect unicast destinations */
+ if (is_multicast_ether_addr(next_dest)) {
+ eth_zero_addr(dest);
+ eth_zero_addr(next_dest);
+ continue;
+ }
+
+ next_neigh = batadv_orig_to_router(bat_priv, next_dest, NULL);
+ if (!next_neigh) {
+ eth_zero_addr(next_dest);
+ continue;
+ }
+
+ if (!batadv_compare_eth(next_neigh->addr, comp_neigh->addr)) {
+ eth_zero_addr(next_dest);
+ batadv_neigh_node_put(next_neigh);
+ continue;
+ }
+
+ /* found an entry for our next packet to transmit, so remove it
+ * from the original packet
+ */
+ eth_zero_addr(dest);
+ batadv_neigh_node_put(next_neigh);
+ }
+}
+
+/**
+ * batadv_mcast_forw_shrink_fill() - swap slot with next non-zero destination
+ * @slot: the to be filled zero-MAC destination entry in a tracker TVLV
+ * @num_dests_slot: remaining entries in tracker TVLV from/including slot
+ *
+ * Searches for the next non-zero-MAC destination entry in a tracker TVLV after
+ * the given slot pointer. And if found, swaps it with the zero-MAC destination
+ * entry which the slot points to.
+ *
+ * Return: true if slot was swapped/filled successfully, false otherwise.
+ */
+static bool batadv_mcast_forw_shrink_fill(u8 *slot, u16 num_dests_slot)
+{
+ u16 num_dests_filler;
+ u8 *filler;
+
+ /* sanity check, should not happen */
+ if (!num_dests_slot)
+ return false;
+
+ num_dests_filler = num_dests_slot - 1;
+ filler = slot + ETH_ALEN;
+
+ /* find a candidate to fill the empty slot */
+ batadv_mcast_forw_tracker_for_each_dest(filler, num_dests_filler) {
+ if (is_zero_ether_addr(filler))
+ continue;
+
+ ether_addr_copy(slot, filler);
+ eth_zero_addr(filler);
+ return true;
+ }
+
+ return false;
+}
+
+/**
+ * batadv_mcast_forw_shrink_pack_dests() - pack destinations of a tracker TVLV
+ * @skb: the batman-adv multicast packet to compact destinations in
+ *
+ * Compacts the originator destination MAC addresses in the multicast tracker
+ * TVLV of the given multicast packet. This is done by moving all non-zero
+ * MAC addresses in direction of the skb head and all zero MAC addresses in skb
+ * tail direction, within the multicast tracker TVLV.
+ *
+ * Return: The number of consecutive zero MAC address destinations which are
+ * now at the end of the multicast tracker TVLV.
+ */
+static int batadv_mcast_forw_shrink_pack_dests(struct sk_buff *skb)
+{
+ struct batadv_tvlv_mcast_tracker *mcast_tracker;
+ unsigned char *skb_net_hdr;
+ u16 num_dests_slot;
+ u8 *slot;
+
+ skb_net_hdr = skb_network_header(skb);
+ mcast_tracker = (struct batadv_tvlv_mcast_tracker *)skb_net_hdr;
+ num_dests_slot = ntohs(mcast_tracker->num_dests);
+
+ slot = (u8 *)mcast_tracker + sizeof(*mcast_tracker);
+
+ batadv_mcast_forw_tracker_for_each_dest(slot, num_dests_slot) {
+ /* find an empty slot */
+ if (!is_zero_ether_addr(slot))
+ continue;
+
+ if (!batadv_mcast_forw_shrink_fill(slot, num_dests_slot))
+ /* could not find a filler, so we successfully packed
+ * and can stop - and must not reduce num_dests_slot!
+ */
+ break;
+ }
+
+ /* num_dests_slot is now the amount of reduced, zeroed
+ * destinations at the end of the tracker TVLV
+ */
+ return num_dests_slot;
+}
+
+/**
+ * batadv_mcast_forw_shrink_align_offset() - get new alignment offset
+ * @num_dests_old: the old, to be updated amount of destination nodes
+ * @num_dests_reduce: the number of destinations that were removed
+ *
+ * Calculates the amount of potential extra alignment offset that is needed to
+ * adjust the TVLV padding after the change in destination nodes.
+ *
+ * Return:
+ * 0: If no change to padding is needed.
+ * 2: If padding needs to be removed.
+ * -2: If padding needs to be added.
+ */
+static short
+batadv_mcast_forw_shrink_align_offset(unsigned int num_dests_old,
+ unsigned int num_dests_reduce)
+{
+ /* even amount of removed destinations -> no alignment change */
+ if (!(num_dests_reduce % 2))
+ return 0;
+
+ /* even to odd amount of destinations -> remove padding */
+ if (!(num_dests_old % 2))
+ return 2;
+
+ /* odd to even amount of destinations -> add padding */
+ return -2;
+}
+
+/**
+ * batadv_mcast_forw_shrink_update_headers() - update shrunk mc packet headers
+ * @skb: the batman-adv multicast packet to update headers of
+ * @num_dests_reduce: the number of destinations that were removed
+ *
+ * This updates any fields of a batman-adv multicast packet that are affected
+ * by the reduced number of destinations in the multicast tracket TVLV. In
+ * particular this updates:
+ *
+ * The num_dest field of the multicast tracker TVLV.
+ * The TVLV length field of the according generic TVLV header.
+ * The batman-adv multicast packet's total TVLV length field.
+ *
+ * Return: The offset in skb's tail direction at which the new batman-adv
+ * multicast packet header needs to start.
+ */
+static unsigned int
+batadv_mcast_forw_shrink_update_headers(struct sk_buff *skb,
+ unsigned int num_dests_reduce)
+{
+ struct batadv_tvlv_mcast_tracker *mcast_tracker;
+ struct batadv_mcast_packet *mcast_packet;
+ struct batadv_tvlv_hdr *tvlv_hdr;
+ unsigned char *skb_net_hdr;
+ unsigned int offset;
+ short align_offset;
+ u16 num_dests;
+
+ skb_net_hdr = skb_network_header(skb);
+ mcast_tracker = (struct batadv_tvlv_mcast_tracker *)skb_net_hdr;
+ num_dests = ntohs(mcast_tracker->num_dests);
+
+ align_offset = batadv_mcast_forw_shrink_align_offset(num_dests,
+ num_dests_reduce);
+ offset = ETH_ALEN * num_dests_reduce + align_offset;
+ num_dests -= num_dests_reduce;
+
+ /* update tracker header */
+ mcast_tracker->num_dests = htons(num_dests);
+
+ /* update tracker's tvlv header's length field */
+ tvlv_hdr = (struct batadv_tvlv_hdr *)(skb_network_header(skb) -
+ sizeof(*tvlv_hdr));
+ tvlv_hdr->len = htons(ntohs(tvlv_hdr->len) - offset);
+
+ /* update multicast packet header's tvlv length field */
+ mcast_packet = (struct batadv_mcast_packet *)skb->data;
+ mcast_packet->tvlv_len = htons(ntohs(mcast_packet->tvlv_len) - offset);
+
+ return offset;
+}
+
+/**
+ * batadv_mcast_forw_shrink_move_headers() - move multicast headers by offset
+ * @skb: the batman-adv multicast packet to move headers for
+ * @offset: a non-negative offset to move headers by, towards the skb tail
+ *
+ * Moves the batman-adv multicast packet header, its multicast tracker TVLV and
+ * any TVLVs in between by the given offset in direction towards the tail.
+ */
+static void
+batadv_mcast_forw_shrink_move_headers(struct sk_buff *skb, unsigned int offset)
+{
+ struct batadv_tvlv_mcast_tracker *mcast_tracker;
+ unsigned char *skb_net_hdr;
+ unsigned int len;
+ u16 num_dests;
+
+ skb_net_hdr = skb_network_header(skb);
+ mcast_tracker = (struct batadv_tvlv_mcast_tracker *)skb_net_hdr;
+ num_dests = ntohs(mcast_tracker->num_dests);
+ len = skb_network_offset(skb) + sizeof(*mcast_tracker);
+ len += num_dests * ETH_ALEN;
+
+ batadv_mcast_forw_scrape(skb, len, offset);
+}
+
+/**
+ * batadv_mcast_forw_shrink_tracker() - remove zero addresses in a tracker tvlv
+ * @skb: the batman-adv multicast packet to (potentially) shrink
+ *
+ * Removes all destinations with a zero MAC addresses (00:00:00:00:00:00) from
+ * the given batman-adv multicast packet's tracker TVLV and updates headers
+ * accordingly to maintain a valid batman-adv multicast packet.
+ */
+static void batadv_mcast_forw_shrink_tracker(struct sk_buff *skb)
+{
+ unsigned int offset;
+ u16 dests_reduced;
+
+ dests_reduced = batadv_mcast_forw_shrink_pack_dests(skb);
+ if (!dests_reduced)
+ return;
+
+ offset = batadv_mcast_forw_shrink_update_headers(skb, dests_reduced);
+ batadv_mcast_forw_shrink_move_headers(skb, offset);
+}
+
+/**
+ * batadv_mcast_forw_packet() - forward a batman-adv multicast packet
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the received or locally generated batman-adv multicast packet
+ * @local_xmit: indicates that the packet was locally generated and not received
+ *
+ * Parses the tracker TVLV of a batman-adv multicast packet and forwards the
+ * packet as indicated in this TVLV.
+ *
+ * Caller needs to set the skb network header to the start of the multicast
+ * tracker TVLV (excluding the generic TVLV header) and the skb transport header
+ * to the next byte after this multicast tracker TVLV.
+ *
+ * Caller needs to free the skb.
+ *
+ * Return: NET_RX_SUCCESS or NET_RX_DROP on success or a negative error
+ * code on failure. NET_RX_SUCCESS if the received packet is supposed to be
+ * decapsulated and forwarded to the own soft interface, NET_RX_DROP otherwise.
+ */
+static int batadv_mcast_forw_packet(struct batadv_priv *bat_priv,
+ struct sk_buff *skb, bool local_xmit)
+{
+ struct batadv_tvlv_mcast_tracker *mcast_tracker;
+ struct batadv_neigh_node *neigh_node;
+ unsigned long offset, num_dests_off;
+ struct sk_buff *nexthop_skb;
+ unsigned char *skb_net_hdr;
+ bool local_recv = false;
+ unsigned int tvlv_len;
+ bool xmitted = false;
+ u8 *dest, *next_dest;
+ u16 num_dests;
+ int ret;
+
+ /* (at least) TVLV part needs to be linearized */
+ SKB_LINEAR_ASSERT(skb);
+
+ /* check if num_dests is within skb length */
+ num_dests_off = offsetof(struct batadv_tvlv_mcast_tracker, num_dests);
+ if (num_dests_off > skb_network_header_len(skb))
+ return -EINVAL;
+
+ skb_net_hdr = skb_network_header(skb);
+ mcast_tracker = (struct batadv_tvlv_mcast_tracker *)skb_net_hdr;
+ num_dests = ntohs(mcast_tracker->num_dests);
+
+ dest = (u8 *)mcast_tracker + sizeof(*mcast_tracker);
+
+ /* check if full tracker tvlv is within skb length */
+ tvlv_len = sizeof(*mcast_tracker) + ETH_ALEN * num_dests;
+ if (tvlv_len > skb_network_header_len(skb))
+ return -EINVAL;
+
+ /* invalidate checksum: */
+ skb->ip_summed = CHECKSUM_NONE;
+
+ batadv_mcast_forw_tracker_for_each_dest(dest, num_dests) {
+ if (is_zero_ether_addr(dest))
+ continue;
+
+ /* only unicast originator addresses supported */
+ if (is_multicast_ether_addr(dest)) {
+ eth_zero_addr(dest);
+ continue;
+ }
+
+ if (batadv_is_my_mac(bat_priv, dest)) {
+ eth_zero_addr(dest);
+ local_recv = true;
+ continue;
+ }
+
+ neigh_node = batadv_orig_to_router(bat_priv, dest, NULL);
+ if (!neigh_node) {
+ eth_zero_addr(dest);
+ continue;
+ }
+
+ nexthop_skb = skb_copy(skb, GFP_ATOMIC);
+ if (!nexthop_skb) {
+ batadv_neigh_node_put(neigh_node);
+ return -ENOMEM;
+ }
+
+ offset = dest - skb->data;
+ next_dest = nexthop_skb->data + offset;
+
+ batadv_mcast_forw_scrub_dests(bat_priv, neigh_node, dest,
+ next_dest, num_dests);
+ batadv_mcast_forw_shrink_tracker(nexthop_skb);
+
+ batadv_inc_counter(bat_priv, BATADV_CNT_MCAST_TX);
+ batadv_add_counter(bat_priv, BATADV_CNT_MCAST_TX_BYTES,
+ nexthop_skb->len + ETH_HLEN);
+ xmitted = true;
+ ret = batadv_send_unicast_skb(nexthop_skb, neigh_node);
+
+ batadv_neigh_node_put(neigh_node);
+
+ if (ret < 0)
+ return ret;
+ }
+
+ if (xmitted) {
+ if (local_xmit) {
+ batadv_inc_counter(bat_priv, BATADV_CNT_MCAST_TX_LOCAL);
+ batadv_add_counter(bat_priv,
+ BATADV_CNT_MCAST_TX_LOCAL_BYTES,
+ skb->len -
+ skb_transport_offset(skb));
+ } else {
+ batadv_inc_counter(bat_priv, BATADV_CNT_MCAST_FWD);
+ batadv_add_counter(bat_priv, BATADV_CNT_MCAST_FWD_BYTES,
+ skb->len + ETH_HLEN);
+ }
+ }
+
+ if (local_recv)
+ return NET_RX_SUCCESS;
+ else
+ return NET_RX_DROP;
+}
+
+/**
+ * batadv_mcast_forw_tracker_tvlv_handler() - handle an mcast tracker tvlv
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the received batman-adv multicast packet
+ *
+ * Parses the tracker TVLV of an incoming batman-adv multicast packet and
+ * forwards the packet as indicated in this TVLV.
+ *
+ * Caller needs to set the skb network header to the start of the multicast
+ * tracker TVLV (excluding the generic TVLV header) and the skb transport header
+ * to the next byte after this multicast tracker TVLV.
+ *
+ * Caller needs to free the skb.
+ *
+ * Return: NET_RX_SUCCESS or NET_RX_DROP on success or a negative error
+ * code on failure. NET_RX_SUCCESS if the received packet is supposed to be
+ * decapsulated and forwarded to the own soft interface, NET_RX_DROP otherwise.
+ */
+int batadv_mcast_forw_tracker_tvlv_handler(struct batadv_priv *bat_priv,
+ struct sk_buff *skb)
+{
+ return batadv_mcast_forw_packet(bat_priv, skb, false);
+}
+
+/**
+ * batadv_mcast_forw_packet_hdrlen() - multicast packet header length
+ * @num_dests: number of destination nodes
+ *
+ * Calculates the total batman-adv multicast packet header length for a given
+ * number of destination nodes (excluding the outer ethernet frame).
+ *
+ * Return: The calculated total batman-adv multicast packet header length.
+ */
+unsigned int batadv_mcast_forw_packet_hdrlen(unsigned int num_dests)
+{
+ /**
+ * If the number of destination entries is even then we need to add
+ * two byte padding to the tracker TVLV.
+ */
+ int padding = (!(num_dests % 2)) ? 2 : 0;
+
+ return padding + num_dests * ETH_ALEN +
+ sizeof(struct batadv_tvlv_mcast_tracker) +
+ sizeof(struct batadv_tvlv_hdr) +
+ sizeof(struct batadv_mcast_packet);
+}
+
+/**
+ * batadv_mcast_forw_expand_head() - expand headroom for an mcast packet
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the multicast packet to send
+ *
+ * Tries to expand an skb's headroom so that its head to tail is 1298
+ * bytes (minimum IPv6 MTU + vlan ethernet header size) large.
+ *
+ * Return: -EINVAL if the given skb's length is too large or -ENOMEM on memory
+ * allocation failure. Otherwise, on success, zero is returned.
+ */
+static int batadv_mcast_forw_expand_head(struct batadv_priv *bat_priv,
+ struct sk_buff *skb)
+{
+ int hdr_size = VLAN_ETH_HLEN + IPV6_MIN_MTU - skb->len;
+
+ /* TODO: Could be tightened to actual number of destination nodes?
+ * But it's tricky, number of destinations might have increased since
+ * we last checked.
+ */
+ if (hdr_size < 0) {
+ /* batadv_mcast_forw_mode_check_count() should ensure we do not
+ * end up here
+ */
+ WARN_ON(1);
+ return -EINVAL;
+ }
+
+ if (skb_headroom(skb) < hdr_size &&
+ pskb_expand_head(skb, hdr_size, 0, GFP_ATOMIC) < 0)
+ return -ENOMEM;
+
+ return 0;
+}
+
+/**
+ * batadv_mcast_forw_push() - encapsulate skb in a batman-adv multicast packet
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the multicast packet to encapsulate and send
+ * @vid: the vlan identifier
+ * @is_routable: indicates whether the destination is routable
+ * @count: the number of originators the multicast packet needs to be sent to
+ *
+ * Encapsulates the given multicast packet in a batman-adv multicast packet.
+ * A multicast tracker TVLV with destination originator addresses for any node
+ * that signaled interest in it, that is either via the translation table or the
+ * according want-all flags, is attached accordingly.
+ *
+ * Return: true on success, false otherwise.
+ */
+bool batadv_mcast_forw_push(struct batadv_priv *bat_priv, struct sk_buff *skb,
+ unsigned short vid, int is_routable, int count)
+{
+ unsigned short tvlv_len = 0;
+ int ret;
+
+ if (batadv_mcast_forw_expand_head(bat_priv, skb) < 0)
+ goto err;
+
+ skb_reset_transport_header(skb);
+
+ ret = batadv_mcast_forw_push_tvlvs(bat_priv, skb, vid, is_routable,
+ count, &tvlv_len);
+ if (ret < 0)
+ goto err;
+
+ ret = batadv_mcast_forw_push_hdr(skb, tvlv_len);
+ if (ret < 0)
+ goto err;
+
+ return true;
+
+err:
+ if (tvlv_len)
+ skb_pull(skb, tvlv_len);
+
+ return false;
+}
+
+/**
+ * batadv_mcast_forw_mcsend() - send a self prepared batman-adv multicast packet
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the multicast packet to encapsulate and send
+ *
+ * Transmits a batman-adv multicast packet that was locally prepared and
+ * consumes/frees it.
+ *
+ * Return: NET_XMIT_DROP on memory allocation failure. NET_XMIT_SUCCESS
+ * otherwise.
+ */
+int batadv_mcast_forw_mcsend(struct batadv_priv *bat_priv,
+ struct sk_buff *skb)
+{
+ int ret = batadv_mcast_forw_packet(bat_priv, skb, true);
+
+ if (ret < 0) {
+ kfree_skb(skb);
+ return NET_XMIT_DROP;
+ }
+
+ consume_skb(skb);
+ return NET_XMIT_SUCCESS;
+}
diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c
index 0c64d81..1f7ed9d 100644
--- a/net/batman-adv/netlink.c
+++ b/net/batman-adv/netlink.c
@@ -7,6 +7,7 @@
#include "netlink.h"
#include "main.h"
+#include <linux/array_size.h>
#include <linux/atomic.h>
#include <linux/bitops.h>
#include <linux/bug.h>
@@ -20,7 +21,6 @@
#include <linux/if_ether.h>
#include <linux/if_vlan.h>
#include <linux/init.h>
-#include <linux/kernel.h>
#include <linux/limits.h>
#include <linux/list.h>
#include <linux/minmax.h>
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index 34903df..71c143d 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -312,6 +312,33 @@ batadv_orig_router_get(struct batadv_orig_node *orig_node,
}
/**
+ * batadv_orig_to_router() - get next hop neighbor to an orig address
+ * @bat_priv: the bat priv with all the soft interface information
+ * @orig_addr: the originator MAC address to search the best next hop router for
+ * @if_outgoing: the interface where the payload packet has been received or
+ * the OGM should be sent to
+ *
+ * Return: A neighbor node which is the best router towards the given originator
+ * address.
+ */
+struct batadv_neigh_node *
+batadv_orig_to_router(struct batadv_priv *bat_priv, u8 *orig_addr,
+ struct batadv_hard_iface *if_outgoing)
+{
+ struct batadv_neigh_node *neigh_node;
+ struct batadv_orig_node *orig_node;
+
+ orig_node = batadv_orig_hash_find(bat_priv, orig_addr);
+ if (!orig_node)
+ return NULL;
+
+ neigh_node = batadv_find_router(bat_priv, orig_node, if_outgoing);
+ batadv_orig_node_put(orig_node);
+
+ return neigh_node;
+}
+
+/**
* batadv_orig_ifinfo_get() - find the ifinfo from an orig_node
* @orig_node: the orig node to be queried
* @if_outgoing: the interface for which the ifinfo should be acquired
@@ -942,6 +969,7 @@ struct batadv_orig_node *batadv_orig_node_new(struct batadv_priv *bat_priv,
#ifdef CONFIG_BATMAN_ADV_MCAST
orig_node->mcast_flags = BATADV_MCAST_WANT_NO_RTR4;
orig_node->mcast_flags |= BATADV_MCAST_WANT_NO_RTR6;
+ orig_node->mcast_flags |= BATADV_MCAST_HAVE_MC_PTYPE_CAPA;
INIT_HLIST_NODE(&orig_node->mcast_want_all_unsnoopables_node);
INIT_HLIST_NODE(&orig_node->mcast_want_all_ipv4_node);
INIT_HLIST_NODE(&orig_node->mcast_want_all_ipv6_node);
diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h
index ea3d69e..db0c551 100644
--- a/net/batman-adv/originator.h
+++ b/net/batman-adv/originator.h
@@ -36,6 +36,9 @@ void batadv_neigh_node_release(struct kref *ref);
struct batadv_neigh_node *
batadv_orig_router_get(struct batadv_orig_node *orig_node,
const struct batadv_hard_iface *if_outgoing);
+struct batadv_neigh_node *
+batadv_orig_to_router(struct batadv_priv *bat_priv, u8 *orig_addr,
+ struct batadv_hard_iface *if_outgoing);
struct batadv_neigh_ifinfo *
batadv_neigh_ifinfo_new(struct batadv_neigh_node *neigh,
struct batadv_hard_iface *if_outgoing);
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index 163cd43..f106198 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -1270,3 +1270,73 @@ int batadv_recv_bcast_packet(struct sk_buff *skb,
batadv_orig_node_put(orig_node);
return ret;
}
+
+#ifdef CONFIG_BATMAN_ADV_MCAST
+/**
+ * batadv_recv_mcast_packet() - process received batman-adv multicast packet
+ * @skb: the received batman-adv multicast packet
+ * @recv_if: interface that the skb is received on
+ *
+ * Parses the given, received batman-adv multicast packet. Depending on the
+ * contents of its TVLV forwards it and/or decapsulates it to hand it to the
+ * soft interface.
+ *
+ * Return: NET_RX_DROP if the skb is not consumed, NET_RX_SUCCESS otherwise.
+ */
+int batadv_recv_mcast_packet(struct sk_buff *skb,
+ struct batadv_hard_iface *recv_if)
+{
+ struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface);
+ struct batadv_mcast_packet *mcast_packet;
+ int hdr_size = sizeof(*mcast_packet);
+ unsigned char *tvlv_buff;
+ int ret = NET_RX_DROP;
+ u16 tvlv_buff_len;
+
+ if (batadv_check_unicast_packet(bat_priv, skb, hdr_size) < 0)
+ goto free_skb;
+
+ /* create a copy of the skb, if needed, to modify it. */
+ if (skb_cow(skb, ETH_HLEN) < 0)
+ goto free_skb;
+
+ /* packet needs to be linearized to access the tvlv content */
+ if (skb_linearize(skb) < 0)
+ goto free_skb;
+
+ mcast_packet = (struct batadv_mcast_packet *)skb->data;
+ if (mcast_packet->ttl-- < 2)
+ goto free_skb;
+
+ tvlv_buff = (unsigned char *)(skb->data + hdr_size);
+ tvlv_buff_len = ntohs(mcast_packet->tvlv_len);
+
+ if (tvlv_buff_len > skb->len - hdr_size)
+ goto free_skb;
+
+ ret = batadv_tvlv_containers_process(bat_priv, BATADV_MCAST, NULL, skb,
+ tvlv_buff, tvlv_buff_len);
+ if (ret >= 0) {
+ batadv_inc_counter(bat_priv, BATADV_CNT_MCAST_RX);
+ batadv_add_counter(bat_priv, BATADV_CNT_MCAST_RX_BYTES,
+ skb->len + ETH_HLEN);
+ }
+
+ hdr_size += tvlv_buff_len;
+
+ if (ret == NET_RX_SUCCESS && (skb->len - hdr_size >= ETH_HLEN)) {
+ batadv_inc_counter(bat_priv, BATADV_CNT_MCAST_RX_LOCAL);
+ batadv_add_counter(bat_priv, BATADV_CNT_MCAST_RX_LOCAL_BYTES,
+ skb->len - hdr_size);
+
+ batadv_interface_rx(bat_priv->soft_iface, skb, hdr_size, NULL);
+ /* skb was consumed */
+ skb = NULL;
+ }
+
+free_skb:
+ kfree_skb(skb);
+
+ return ret;
+}
+#endif /* CONFIG_BATMAN_ADV_MCAST */
diff --git a/net/batman-adv/routing.h b/net/batman-adv/routing.h
index afd15b3..e9849f0 100644
--- a/net/batman-adv/routing.h
+++ b/net/batman-adv/routing.h
@@ -27,6 +27,17 @@ int batadv_recv_frag_packet(struct sk_buff *skb,
struct batadv_hard_iface *iface);
int batadv_recv_bcast_packet(struct sk_buff *skb,
struct batadv_hard_iface *recv_if);
+#ifdef CONFIG_BATMAN_ADV_MCAST
+int batadv_recv_mcast_packet(struct sk_buff *skb,
+ struct batadv_hard_iface *recv_if);
+#else
+static inline int batadv_recv_mcast_packet(struct sk_buff *skb,
+ struct batadv_hard_iface *recv_if)
+{
+ kfree_skb(skb);
+ return NET_RX_DROP;
+}
+#endif
int batadv_recv_unicast_tvlv(struct sk_buff *skb,
struct batadv_hard_iface *recv_if);
int batadv_recv_unhandled_unicast_packet(struct sk_buff *skb,
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 1bf1232..89c51b3 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -301,12 +301,13 @@ static netdev_tx_t batadv_interface_tx(struct sk_buff *skb,
send:
if (do_bcast && !is_broadcast_ether_addr(ethhdr->h_dest)) {
- forw_mode = batadv_mcast_forw_mode(bat_priv, skb,
+ forw_mode = batadv_mcast_forw_mode(bat_priv, skb, vid,
&mcast_is_routable);
switch (forw_mode) {
case BATADV_FORW_BCAST:
break;
case BATADV_FORW_UCASTS:
+ case BATADV_FORW_MCAST:
do_bcast = false;
break;
case BATADV_FORW_NONE:
@@ -365,6 +366,8 @@ static netdev_tx_t batadv_interface_tx(struct sk_buff *skb,
} else if (forw_mode == BATADV_FORW_UCASTS) {
ret = batadv_mcast_forw_send(bat_priv, skb, vid,
mcast_is_routable);
+ } else if (forw_mode == BATADV_FORW_MCAST) {
+ ret = batadv_mcast_forw_mcsend(bat_priv, skb);
} else {
if (batadv_dat_snoop_outgoing_arp_request(bat_priv,
skb))
@@ -762,6 +765,7 @@ static int batadv_softif_init_late(struct net_device *dev)
atomic_set(&bat_priv->mcast.num_want_all_unsnoopables, 0);
atomic_set(&bat_priv->mcast.num_want_all_ipv4, 0);
atomic_set(&bat_priv->mcast.num_want_all_ipv6, 0);
+ atomic_set(&bat_priv->mcast.num_no_mc_ptype_capa, 0);
#endif
atomic_set(&bat_priv->gw.mode, BATADV_GW_MODE_OFF);
atomic_set(&bat_priv->gw.bandwidth_down, 100);
@@ -925,6 +929,18 @@ static const struct {
{ "tt_response_rx" },
{ "tt_roam_adv_tx" },
{ "tt_roam_adv_rx" },
+#ifdef CONFIG_BATMAN_ADV_MCAST
+ { "mcast_tx" },
+ { "mcast_tx_bytes" },
+ { "mcast_tx_local" },
+ { "mcast_tx_local_bytes" },
+ { "mcast_rx" },
+ { "mcast_rx_bytes" },
+ { "mcast_rx_local" },
+ { "mcast_rx_local_bytes" },
+ { "mcast_fwd" },
+ { "mcast_fwd_bytes" },
+#endif
#ifdef CONFIG_BATMAN_ADV_DAT
{ "dat_get_tx" },
{ "dat_get_rx" },
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index 17d5ea1..00840d5 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -862,6 +862,70 @@ enum batadv_counters {
*/
BATADV_CNT_TT_ROAM_ADV_RX,
+#ifdef CONFIG_BATMAN_ADV_MCAST
+ /**
+ * @BATADV_CNT_MCAST_TX: transmitted batman-adv multicast packets
+ * counter
+ */
+ BATADV_CNT_MCAST_TX,
+
+ /**
+ * @BATADV_CNT_MCAST_TX_BYTES: transmitted batman-adv multicast packets
+ * bytes counter
+ */
+ BATADV_CNT_MCAST_TX_BYTES,
+
+ /**
+ * @BATADV_CNT_MCAST_TX_LOCAL: counter for multicast packets which
+ * were locally encapsulated and transmitted as batman-adv multicast
+ * packets
+ */
+ BATADV_CNT_MCAST_TX_LOCAL,
+
+ /**
+ * @BATADV_CNT_MCAST_TX_LOCAL_BYTES: bytes counter for multicast packets
+ * which were locally encapsulated and transmitted as batman-adv
+ * multicast packets
+ */
+ BATADV_CNT_MCAST_TX_LOCAL_BYTES,
+
+ /**
+ * @BATADV_CNT_MCAST_RX: received batman-adv multicast packet counter
+ */
+ BATADV_CNT_MCAST_RX,
+
+ /**
+ * @BATADV_CNT_MCAST_RX_BYTES: received batman-adv multicast packet
+ * bytes counter
+ */
+ BATADV_CNT_MCAST_RX_BYTES,
+
+ /**
+ * @BATADV_CNT_MCAST_RX_LOCAL: counter for received batman-adv multicast
+ * packets which were forwarded to the local soft interface
+ */
+ BATADV_CNT_MCAST_RX_LOCAL,
+
+ /**
+ * @BATADV_CNT_MCAST_RX_LOCAL_BYTES: bytes counter for received
+ * batman-adv multicast packets which were forwarded to the local soft
+ * interface
+ */
+ BATADV_CNT_MCAST_RX_LOCAL_BYTES,
+
+ /**
+ * @BATADV_CNT_MCAST_FWD: counter for received batman-adv multicast
+ * packets which were forwarded to other, neighboring nodes
+ */
+ BATADV_CNT_MCAST_FWD,
+
+ /**
+ * @BATADV_CNT_MCAST_FWD_BYTES: bytes counter for received batman-adv
+ * multicast packets which were forwarded to other, neighboring nodes
+ */
+ BATADV_CNT_MCAST_FWD_BYTES,
+#endif
+
#ifdef CONFIG_BATMAN_ADV_DAT
/**
* @BATADV_CNT_DAT_GET_TX: transmitted dht GET traffic packet counter
@@ -1279,6 +1343,12 @@ struct batadv_priv_mcast {
atomic_t num_want_all_rtr6;
/**
+ * @num_no_mc_ptype_capa: counter for number of nodes without the
+ * BATADV_MCAST_HAVE_MC_PTYPE_CAPA flag
+ */
+ atomic_t num_no_mc_ptype_capa;
+
+ /**
* @want_lists_lock: lock for protecting modifications to mcasts
* want_all_{unsnoopables,ipv4,ipv6}_list (traversals are rcu-locked)
*/
diff --git a/net/core/dev.c b/net/core/dev.c
index c879246..3950ced 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -11236,17 +11236,19 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net,
dev_net_set(dev, net);
dev->ifindex = new_ifindex;
- /* Send a netdev-add uevent to the new namespace */
- kobject_uevent(&dev->dev.kobj, KOBJ_ADD);
- netdev_adjacent_add_links(dev);
-
if (new_name[0]) /* Rename the netdev to prepared name */
strscpy(dev->name, new_name, IFNAMSIZ);
/* Fixup kobjects */
+ dev_set_uevent_suppress(&dev->dev, 1);
err = device_rename(&dev->dev, dev->name);
+ dev_set_uevent_suppress(&dev->dev, 0);
WARN_ON(err);
+ /* Send a netdev-add uevent to the new namespace */
+ kobject_uevent(&dev->dev.kobj, KOBJ_ADD);
+ netdev_adjacent_add_links(dev);
+
/* Adapt owner in case owning user namespace of target network
* namespace is different from the original one.
*/
diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
index feeddf9..9a66cf5 100644
--- a/net/core/dev_ioctl.c
+++ b/net/core/dev_ioctl.c
@@ -322,9 +322,9 @@ static int dev_get_hwtstamp(struct net_device *dev, struct ifreq *ifr)
* frames and not forward them), it must set IFF_SEE_ALL_HWTSTAMP_REQUESTS in
* dev->priv_flags.
*/
-static int dev_set_hwtstamp_phylib(struct net_device *dev,
- struct kernel_hwtstamp_config *cfg,
- struct netlink_ext_ack *extack)
+int dev_set_hwtstamp_phylib(struct net_device *dev,
+ struct kernel_hwtstamp_config *cfg,
+ struct netlink_ext_ack *extack)
{
const struct net_device_ops *ops = dev->netdev_ops;
bool phy_ts = phy_has_hwtstamp(dev->phydev);
@@ -363,6 +363,7 @@ static int dev_set_hwtstamp_phylib(struct net_device *dev,
return 0;
}
+EXPORT_SYMBOL_GPL(dev_set_hwtstamp_phylib);
static int dev_set_hwtstamp(struct net_device *dev, struct ifreq *ifr)
{
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index dec5443..df2a06d 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -173,7 +173,8 @@ static int page_pool_init(struct page_pool *pool,
{
unsigned int ring_qsize = 1024; /* Default */
- memcpy(&pool->p, params, sizeof(pool->p));
+ memcpy(&pool->p, ¶ms->fast, sizeof(pool->p));
+ memcpy(&pool->slow, ¶ms->slow, sizeof(pool->slow));
/* Validate only known flags were used */
if (pool->p.flags & ~(PP_FLAG_ALL))
@@ -211,6 +212,8 @@ static int page_pool_init(struct page_pool *pool,
*/
}
+ pool->has_init_callback = !!pool->slow.init_callback;
+
#ifdef CONFIG_PAGE_POOL_STATS
pool->recycle_stats = alloc_percpu(struct page_pool_recycle_stats);
if (!pool->recycle_stats)
@@ -388,8 +391,8 @@ static void page_pool_set_pp_info(struct page_pool *pool,
* the overhead is negligible.
*/
page_pool_fragment_page(page, 1);
- if (pool->p.init_callback)
- pool->p.init_callback(page, pool->p.init_arg);
+ if (pool->has_init_callback)
+ pool->slow.init_callback(page, pool->slow.init_arg);
}
static void page_pool_clear_pp_info(struct page *page)
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index e8431c6..592164c 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -3849,7 +3849,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh,
goto out;
err = -ENOBUFS;
- nskb = nlmsg_new(if_nlmsg_size(dev, ext_filter_mask), GFP_KERNEL);
+ nskb = nlmsg_new_large(if_nlmsg_size(dev, ext_filter_mask));
if (nskb == NULL)
goto out;
diff --git a/net/devlink/core.c b/net/devlink/core.c
index 6984877..4275a2b 100644
--- a/net/devlink/core.c
+++ b/net/devlink/core.c
@@ -503,14 +503,14 @@ static void __net_exit devlink_pernet_pre_exit(struct net *net)
* all devlink instances from this namespace into init_net.
*/
devlinks_xa_for_each_registered_get(net, index, devlink) {
- devl_lock(devlink);
+ devl_dev_lock(devlink, true);
err = 0;
if (devl_is_registered(devlink))
err = devlink_reload(devlink, &init_net,
DEVLINK_RELOAD_ACTION_DRIVER_REINIT,
DEVLINK_RELOAD_LIMIT_UNSPEC,
&actions_performed, NULL);
- devl_unlock(devlink);
+ devl_dev_unlock(devlink, true);
devlink_put(devlink);
if (err && err != -EOPNOTSUPP)
pr_warn("Failed to reload devlink instance into init_net\n");
diff --git a/net/devlink/dev.c b/net/devlink/dev.c
index 4fc7adb..ea6a92f 100644
--- a/net/devlink/dev.c
+++ b/net/devlink/dev.c
@@ -4,6 +4,7 @@
* Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
*/
+#include <linux/device.h>
#include <net/genetlink.h>
#include <net/sock.h>
#include "devl_internal.h"
@@ -433,6 +434,13 @@ int devlink_reload(struct devlink *devlink, struct net *dest_net,
struct net *curr_net;
int err;
+ /* Make sure the reload operations are invoked with the device lock
+ * held to allow drivers to trigger functionality that expects it
+ * (e.g., PCI reset) and to close possible races between these
+ * operations and probe/remove.
+ */
+ device_lock_assert(devlink->dev);
+
memcpy(remote_reload_stats, devlink->stats.remote_reload_stats,
sizeof(remote_reload_stats));
diff --git a/net/devlink/devl_internal.h b/net/devlink/devl_internal.h
index 183dbe3..5ea2e20 100644
--- a/net/devlink/devl_internal.h
+++ b/net/devlink/devl_internal.h
@@ -3,6 +3,7 @@
* Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
*/
+#include <linux/device.h>
#include <linux/etherdevice.h>
#include <linux/mutex.h>
#include <linux/netdevice.h>
@@ -96,6 +97,20 @@ static inline bool devl_is_registered(struct devlink *devlink)
return xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED);
}
+static inline void devl_dev_lock(struct devlink *devlink, bool dev_lock)
+{
+ if (dev_lock)
+ device_lock(devlink->dev);
+ devl_lock(devlink);
+}
+
+static inline void devl_dev_unlock(struct devlink *devlink, bool dev_lock)
+{
+ devl_unlock(devlink);
+ if (dev_lock)
+ device_unlock(devlink->dev);
+}
+
typedef void devlink_rel_notify_cb_t(struct devlink *devlink, u32 obj_index);
typedef void devlink_rel_cleanup_cb_t(struct devlink *devlink, u32 obj_index,
u32 rel_index);
@@ -111,9 +126,6 @@ int devlink_rel_devlink_handle_put(struct sk_buff *msg, struct devlink *devlink,
bool *msg_updated);
/* Netlink */
-#define DEVLINK_NL_FLAG_NEED_PORT BIT(0)
-#define DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT BIT(1)
-
enum devlink_multicast_groups {
DEVLINK_MCGRP_CONFIG,
};
@@ -140,7 +152,8 @@ typedef int devlink_nl_dump_one_func_t(struct sk_buff *msg,
int flags);
struct devlink *
-devlink_get_from_attrs_lock(struct net *net, struct nlattr **attrs);
+devlink_get_from_attrs_lock(struct net *net, struct nlattr **attrs,
+ bool dev_lock);
int devlink_nl_dumpit(struct sk_buff *msg, struct netlink_callback *cb,
devlink_nl_dump_one_func_t *dump_one);
diff --git a/net/devlink/health.c b/net/devlink/health.c
index 695df61..71ae121 100644
--- a/net/devlink/health.c
+++ b/net/devlink/health.c
@@ -1151,7 +1151,8 @@ devlink_health_reporter_get_from_cb_lock(struct netlink_callback *cb)
struct nlattr **attrs = info->attrs;
struct devlink *devlink;
- devlink = devlink_get_from_attrs_lock(sock_net(cb->skb->sk), attrs);
+ devlink = devlink_get_from_attrs_lock(sock_net(cb->skb->sk), attrs,
+ false);
if (IS_ERR(devlink))
return NULL;
diff --git a/net/devlink/netlink.c b/net/devlink/netlink.c
index d0b90eb..fa9afe3 100644
--- a/net/devlink/netlink.c
+++ b/net/devlink/netlink.c
@@ -9,6 +9,10 @@
#include "devl_internal.h"
+#define DEVLINK_NL_FLAG_NEED_PORT BIT(0)
+#define DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT BIT(1)
+#define DEVLINK_NL_FLAG_NEED_DEV_LOCK BIT(2)
+
static const struct genl_multicast_group devlink_nl_mcgrps[] = {
[DEVLINK_MCGRP_CONFIG] = { .name = DEVLINK_GENL_MCGRP_CONFIG_NAME },
};
@@ -61,7 +65,8 @@ int devlink_nl_msg_reply_and_new(struct sk_buff **msg, struct genl_info *info)
}
struct devlink *
-devlink_get_from_attrs_lock(struct net *net, struct nlattr **attrs)
+devlink_get_from_attrs_lock(struct net *net, struct nlattr **attrs,
+ bool dev_lock)
{
struct devlink *devlink;
unsigned long index;
@@ -75,12 +80,12 @@ devlink_get_from_attrs_lock(struct net *net, struct nlattr **attrs)
devname = nla_data(attrs[DEVLINK_ATTR_DEV_NAME]);
devlinks_xa_for_each_registered_get(net, index, devlink) {
- devl_lock(devlink);
+ devl_dev_lock(devlink, dev_lock);
if (devl_is_registered(devlink) &&
strcmp(devlink->dev->bus->name, busname) == 0 &&
strcmp(dev_name(devlink->dev), devname) == 0)
return devlink;
- devl_unlock(devlink);
+ devl_dev_unlock(devlink, dev_lock);
devlink_put(devlink);
}
@@ -90,11 +95,13 @@ devlink_get_from_attrs_lock(struct net *net, struct nlattr **attrs)
static int __devlink_nl_pre_doit(struct sk_buff *skb, struct genl_info *info,
u8 flags)
{
+ bool dev_lock = flags & DEVLINK_NL_FLAG_NEED_DEV_LOCK;
struct devlink_port *devlink_port;
struct devlink *devlink;
int err;
- devlink = devlink_get_from_attrs_lock(genl_info_net(info), info->attrs);
+ devlink = devlink_get_from_attrs_lock(genl_info_net(info), info->attrs,
+ dev_lock);
if (IS_ERR(devlink))
return PTR_ERR(devlink);
@@ -114,7 +121,7 @@ static int __devlink_nl_pre_doit(struct sk_buff *skb, struct genl_info *info,
return 0;
unlock:
- devl_unlock(devlink);
+ devl_dev_unlock(devlink, dev_lock);
devlink_put(devlink);
return err;
}
@@ -131,6 +138,12 @@ int devlink_nl_pre_doit_port(const struct genl_split_ops *ops,
return __devlink_nl_pre_doit(skb, info, DEVLINK_NL_FLAG_NEED_PORT);
}
+int devlink_nl_pre_doit_dev_lock(const struct genl_split_ops *ops,
+ struct sk_buff *skb, struct genl_info *info)
+{
+ return __devlink_nl_pre_doit(skb, info, DEVLINK_NL_FLAG_NEED_DEV_LOCK);
+}
+
int devlink_nl_pre_doit_port_optional(const struct genl_split_ops *ops,
struct sk_buff *skb,
struct genl_info *info)
@@ -138,16 +151,30 @@ int devlink_nl_pre_doit_port_optional(const struct genl_split_ops *ops,
return __devlink_nl_pre_doit(skb, info, DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT);
}
-void devlink_nl_post_doit(const struct genl_split_ops *ops,
- struct sk_buff *skb, struct genl_info *info)
+static void __devlink_nl_post_doit(struct sk_buff *skb, struct genl_info *info,
+ u8 flags)
{
+ bool dev_lock = flags & DEVLINK_NL_FLAG_NEED_DEV_LOCK;
struct devlink *devlink;
devlink = info->user_ptr[0];
- devl_unlock(devlink);
+ devl_dev_unlock(devlink, dev_lock);
devlink_put(devlink);
}
+void devlink_nl_post_doit(const struct genl_split_ops *ops,
+ struct sk_buff *skb, struct genl_info *info)
+{
+ __devlink_nl_post_doit(skb, info, 0);
+}
+
+void
+devlink_nl_post_doit_dev_lock(const struct genl_split_ops *ops,
+ struct sk_buff *skb, struct genl_info *info)
+{
+ __devlink_nl_post_doit(skb, info, DEVLINK_NL_FLAG_NEED_DEV_LOCK);
+}
+
static int devlink_nl_inst_single_dumpit(struct sk_buff *msg,
struct netlink_callback *cb, int flags,
devlink_nl_dump_one_func_t *dump_one,
@@ -156,7 +183,7 @@ static int devlink_nl_inst_single_dumpit(struct sk_buff *msg,
struct devlink *devlink;
int err;
- devlink = devlink_get_from_attrs_lock(sock_net(msg->sk), attrs);
+ devlink = devlink_get_from_attrs_lock(sock_net(msg->sk), attrs, false);
if (IS_ERR(devlink))
return PTR_ERR(devlink);
err = dump_one(msg, devlink, cb, flags | NLM_F_DUMP_FILTERED);
diff --git a/net/devlink/netlink_gen.c b/net/devlink/netlink_gen.c
index 788dfdc..95f9b43 100644
--- a/net/devlink/netlink_gen.c
+++ b/net/devlink/netlink_gen.c
@@ -846,9 +846,9 @@ const struct genl_split_ops devlink_nl_ops[73] = {
{
.cmd = DEVLINK_CMD_RELOAD,
.validate = GENL_DONT_VALIDATE_STRICT,
- .pre_doit = devlink_nl_pre_doit,
+ .pre_doit = devlink_nl_pre_doit_dev_lock,
.doit = devlink_nl_reload_doit,
- .post_doit = devlink_nl_post_doit,
+ .post_doit = devlink_nl_post_doit_dev_lock,
.policy = devlink_reload_nl_policy,
.maxattr = DEVLINK_ATTR_RELOAD_LIMITS,
.flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
diff --git a/net/devlink/netlink_gen.h b/net/devlink/netlink_gen.h
index 0e9e89c..02f3c0b 100644
--- a/net/devlink/netlink_gen.h
+++ b/net/devlink/netlink_gen.h
@@ -22,12 +22,17 @@ int devlink_nl_pre_doit(const struct genl_split_ops *ops, struct sk_buff *skb,
struct genl_info *info);
int devlink_nl_pre_doit_port(const struct genl_split_ops *ops,
struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_pre_doit_dev_lock(const struct genl_split_ops *ops,
+ struct sk_buff *skb, struct genl_info *info);
int devlink_nl_pre_doit_port_optional(const struct genl_split_ops *ops,
struct sk_buff *skb,
struct genl_info *info);
void
devlink_nl_post_doit(const struct genl_split_ops *ops, struct sk_buff *skb,
struct genl_info *info);
+void
+devlink_nl_post_doit_dev_lock(const struct genl_split_ops *ops,
+ struct sk_buff *skb, struct genl_info *info);
int devlink_nl_get_doit(struct sk_buff *skb, struct genl_info *info);
int devlink_nl_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb);
diff --git a/net/devlink/region.c b/net/devlink/region.c
index 0aab7b8..e3bab45 100644
--- a/net/devlink/region.c
+++ b/net/devlink/region.c
@@ -883,7 +883,8 @@ int devlink_nl_region_read_dumpit(struct sk_buff *skb,
start_offset = state->start_offset;
- devlink = devlink_get_from_attrs_lock(sock_net(cb->skb->sk), attrs);
+ devlink = devlink_get_from_attrs_lock(sock_net(cb->skb->sk), attrs,
+ false);
if (IS_ERR(devlink))
return PTR_ERR(devlink);
diff --git a/net/dsa/tag_rtl4_a.c b/net/dsa/tag_rtl4_a.c
index 4da5bad..a019226 100644
--- a/net/dsa/tag_rtl4_a.c
+++ b/net/dsa/tag_rtl4_a.c
@@ -23,7 +23,6 @@
#define RTL4_A_NAME "rtl4a"
#define RTL4_A_HDR_LEN 4
-#define RTL4_A_ETHERTYPE 0x8899
#define RTL4_A_PROTOCOL_SHIFT 12
/*
* 0x1 = Realtek Remote Control protocol (RRCP)
@@ -54,7 +53,7 @@ static struct sk_buff *rtl4a_tag_xmit(struct sk_buff *skb,
/* Set Ethertype */
p = (__be16 *)tag;
- *p = htons(RTL4_A_ETHERTYPE);
+ *p = htons(ETH_P_REALTEK);
out = (RTL4_A_PROTOCOL_RTL8366RB << RTL4_A_PROTOCOL_SHIFT);
/* The lower bits indicate the port number */
@@ -82,7 +81,7 @@ static struct sk_buff *rtl4a_tag_rcv(struct sk_buff *skb,
tag = dsa_etype_header_pos_rx(skb);
p = (__be16 *)tag;
etype = ntohs(*p);
- if (etype != RTL4_A_ETHERTYPE) {
+ if (etype != ETH_P_REALTEK) {
/* Not custom, just pass through */
netdev_dbg(dev, "non-realtek ethertype 0x%04x\n", etype);
return skb;
diff --git a/net/ethtool/common.c b/net/ethtool/common.c
index b4419fb..11d8797 100644
--- a/net/ethtool/common.c
+++ b/net/ethtool/common.c
@@ -661,6 +661,12 @@ int ethtool_get_phc_vclocks(struct net_device *dev, int **vclock_index)
}
EXPORT_SYMBOL(ethtool_get_phc_vclocks);
+int ethtool_get_ts_info_by_layer(struct net_device *dev, struct ethtool_ts_info *info)
+{
+ return __ethtool_get_ts_info(dev, info);
+}
+EXPORT_SYMBOL(ethtool_get_ts_info_by_layer);
+
const struct ethtool_phy_ops *ethtool_phy_ops;
void ethtool_set_ethtool_phy_ops(const struct ethtool_phy_ops *ops)
diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c
index 306f942..7ceb9ac 100644
--- a/net/hsr/hsr_device.c
+++ b/net/hsr/hsr_device.c
@@ -173,7 +173,24 @@ static int hsr_dev_open(struct net_device *dev)
static int hsr_dev_close(struct net_device *dev)
{
- /* Nothing to do here. */
+ struct hsr_port *port;
+ struct hsr_priv *hsr;
+
+ hsr = netdev_priv(dev);
+ hsr_for_each_port(hsr, port) {
+ if (port->type == HSR_PT_MASTER)
+ continue;
+ switch (port->type) {
+ case HSR_PT_SLAVE_A:
+ case HSR_PT_SLAVE_B:
+ dev_uc_unsync(port->dev, dev);
+ dev_mc_unsync(port->dev, dev);
+ break;
+ default:
+ break;
+ }
+ }
+
return 0;
}
@@ -404,12 +421,60 @@ void hsr_del_ports(struct hsr_priv *hsr)
hsr_del_port(port);
}
+static void hsr_set_rx_mode(struct net_device *dev)
+{
+ struct hsr_port *port;
+ struct hsr_priv *hsr;
+
+ hsr = netdev_priv(dev);
+
+ hsr_for_each_port(hsr, port) {
+ if (port->type == HSR_PT_MASTER)
+ continue;
+ switch (port->type) {
+ case HSR_PT_SLAVE_A:
+ case HSR_PT_SLAVE_B:
+ dev_mc_sync_multiple(port->dev, dev);
+ dev_uc_sync_multiple(port->dev, dev);
+ break;
+ default:
+ break;
+ }
+ }
+}
+
+static void hsr_change_rx_flags(struct net_device *dev, int change)
+{
+ struct hsr_port *port;
+ struct hsr_priv *hsr;
+
+ hsr = netdev_priv(dev);
+
+ hsr_for_each_port(hsr, port) {
+ if (port->type == HSR_PT_MASTER)
+ continue;
+ switch (port->type) {
+ case HSR_PT_SLAVE_A:
+ case HSR_PT_SLAVE_B:
+ if (change & IFF_ALLMULTI)
+ dev_set_allmulti(port->dev,
+ dev->flags &
+ IFF_ALLMULTI ? 1 : -1);
+ break;
+ default:
+ break;
+ }
+ }
+}
+
static const struct net_device_ops hsr_device_ops = {
.ndo_change_mtu = hsr_dev_change_mtu,
.ndo_open = hsr_dev_open,
.ndo_stop = hsr_dev_close,
.ndo_start_xmit = hsr_dev_xmit,
+ .ndo_change_rx_flags = hsr_change_rx_flags,
.ndo_fix_features = hsr_fix_features,
+ .ndo_set_rx_mode = hsr_set_rx_mode,
};
static struct device_type hsr_type = {
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 5f693bb..86cc6d3 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -482,6 +482,7 @@ int tcp_v4_err(struct sk_buff *skb, u32 info)
const int code = icmp_hdr(skb)->code;
struct sock *sk;
struct request_sock *fastopen;
+ bool harderr = false;
u32 seq, snd_una;
int err;
struct net *net = dev_net(skb->dev);
@@ -555,6 +556,7 @@ int tcp_v4_err(struct sk_buff *skb, u32 info)
goto out;
case ICMP_PARAMETERPROB:
err = EPROTO;
+ harderr = true;
break;
case ICMP_DEST_UNREACH:
if (code > NR_ICMP_UNREACH)
@@ -579,6 +581,7 @@ int tcp_v4_err(struct sk_buff *skb, u32 info)
}
err = icmp_err_convert[code].errno;
+ harderr = icmp_err_convert[code].fatal;
/* check if this ICMP message allows revert of backoff.
* (see RFC 6069)
*/
@@ -604,6 +607,9 @@ int tcp_v4_err(struct sk_buff *skb, u32 info)
ip_icmp_error(sk, skb, err, th->dest, info, (u8 *)th);
+ if (!harderr)
+ break;
+
if (!sock_owned_by_user(sk)) {
WRITE_ONCE(sk->sk_err, err);
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 1f9f6c1..d1ad20c 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -626,7 +626,6 @@ void tcp_retransmit_timer(struct sock *sk)
* implemented ftp to mars will work nicely. We will have to fix
* the 120 second clamps though!
*/
- icsk->icsk_backoff++;
out_reset_timer:
/* If stream is thin, use linear timeouts. Since 'icsk_backoff' is
@@ -647,11 +646,12 @@ void tcp_retransmit_timer(struct sock *sk)
tcp_rto_min(sk),
TCP_RTO_MAX);
} else if (sk->sk_state != TCP_SYN_SENT ||
- icsk->icsk_backoff >
+ tp->total_rto >
READ_ONCE(net->ipv4.sysctl_tcp_syn_linear_timeouts)) {
/* Use normal (exponential) backoff unless linear timeouts are
* activated.
*/
+ icsk->icsk_backoff++;
icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX);
}
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 937a02c..43deda4 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -381,7 +381,7 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
struct tcp_sock *tp;
__u32 seq, snd_una;
struct sock *sk;
- bool fatal;
+ bool harderr;
int err;
sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
@@ -402,9 +402,9 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
return 0;
}
seq = ntohl(th->seq);
- fatal = icmpv6_err_convert(type, code, &err);
+ harderr = icmpv6_err_convert(type, code, &err);
if (sk->sk_state == TCP_NEW_SYN_RECV) {
- tcp_req_err(sk, seq, fatal);
+ tcp_req_err(sk, seq, harderr);
return 0;
}
@@ -489,6 +489,9 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th);
+ if (!harderr)
+ break;
+
if (!sock_owned_by_user(sk)) {
WRITE_ONCE(sk->sk_err, err);
sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
diff --git a/net/ncsi/internal.h b/net/ncsi/internal.h
index 03757e7..374412e 100644
--- a/net/ncsi/internal.h
+++ b/net/ncsi/internal.h
@@ -105,8 +105,11 @@ enum {
struct ncsi_channel_version {
- u32 version; /* Supported BCD encoded NCSI version */
- u32 alpha2; /* Supported BCD encoded NCSI version */
+ u8 major; /* NCSI version major */
+ u8 minor; /* NCSI version minor */
+ u8 update; /* NCSI version update */
+ char alpha1; /* NCSI version alpha1 */
+ char alpha2; /* NCSI version alpha2 */
u8 fw_name[12]; /* Firmware name string */
u32 fw_version; /* Firmware version */
u16 pci_ids[4]; /* PCI identification */
diff --git a/net/ncsi/ncsi-cmd.c b/net/ncsi/ncsi-cmd.c
index fd2236e..b3ff37a 100644
--- a/net/ncsi/ncsi-cmd.c
+++ b/net/ncsi/ncsi-cmd.c
@@ -270,7 +270,8 @@ static struct ncsi_cmd_handler {
{ NCSI_PKT_CMD_GPS, 0, ncsi_cmd_handler_default },
{ NCSI_PKT_CMD_OEM, -1, ncsi_cmd_handler_oem },
{ NCSI_PKT_CMD_PLDM, 0, NULL },
- { NCSI_PKT_CMD_GPUUID, 0, ncsi_cmd_handler_default }
+ { NCSI_PKT_CMD_GPUUID, 0, ncsi_cmd_handler_default },
+ { NCSI_PKT_CMD_GMCMA, 0, ncsi_cmd_handler_default }
};
static struct ncsi_request *ncsi_alloc_command(struct ncsi_cmd_arg *nca)
diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c
index d9da942..745c788 100644
--- a/net/ncsi/ncsi-manage.c
+++ b/net/ncsi/ncsi-manage.c
@@ -689,8 +689,6 @@ static int set_one_vid(struct ncsi_dev_priv *ndp, struct ncsi_channel *nc,
return 0;
}
-#if IS_ENABLED(CONFIG_NCSI_OEM_CMD_KEEP_PHY)
-
static int ncsi_oem_keep_phy_intel(struct ncsi_cmd_arg *nca)
{
unsigned char data[NCSI_OEM_INTEL_CMD_KEEP_PHY_LEN];
@@ -716,10 +714,6 @@ static int ncsi_oem_keep_phy_intel(struct ncsi_cmd_arg *nca)
return ret;
}
-#endif
-
-#if IS_ENABLED(CONFIG_NCSI_OEM_CMD_GET_MAC)
-
/* NCSI OEM Command APIs */
static int ncsi_oem_gma_handler_bcm(struct ncsi_cmd_arg *nca)
{
@@ -856,8 +850,6 @@ static int ncsi_gma_handler(struct ncsi_cmd_arg *nca, unsigned int mf_id)
return nch->handler(nca);
}
-#endif /* CONFIG_NCSI_OEM_CMD_GET_MAC */
-
/* Determine if a given channel from the channel_queue should be used for Tx */
static bool ncsi_channel_is_tx(struct ncsi_dev_priv *ndp,
struct ncsi_channel *nc)
@@ -1039,20 +1031,23 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp)
goto error;
}
- nd->state = ncsi_dev_state_config_oem_gma;
+ nd->state = IS_ENABLED(CONFIG_NCSI_OEM_CMD_GET_MAC)
+ ? ncsi_dev_state_config_oem_gma
+ : ncsi_dev_state_config_clear_vids;
break;
case ncsi_dev_state_config_oem_gma:
nd->state = ncsi_dev_state_config_clear_vids;
- ret = -1;
-#if IS_ENABLED(CONFIG_NCSI_OEM_CMD_GET_MAC)
- nca.type = NCSI_PKT_CMD_OEM;
nca.package = np->id;
nca.channel = nc->id;
ndp->pending_req_num = 1;
- ret = ncsi_gma_handler(&nca, nc->version.mf_id);
-#endif /* CONFIG_NCSI_OEM_CMD_GET_MAC */
-
+ if (nc->version.major >= 1 && nc->version.minor >= 2) {
+ nca.type = NCSI_PKT_CMD_GMCMA;
+ ret = ncsi_xmit_cmd(&nca);
+ } else {
+ nca.type = NCSI_PKT_CMD_OEM;
+ ret = ncsi_gma_handler(&nca, nc->version.mf_id);
+ }
if (ret < 0)
schedule_work(&ndp->work);
@@ -1404,7 +1399,6 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp)
schedule_work(&ndp->work);
break;
-#if IS_ENABLED(CONFIG_NCSI_OEM_CMD_GET_MAC)
case ncsi_dev_state_probe_mlx_gma:
ndp->pending_req_num = 1;
@@ -1429,7 +1423,6 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp)
nd->state = ncsi_dev_state_probe_cis;
break;
-#endif /* CONFIG_NCSI_OEM_CMD_GET_MAC */
case ncsi_dev_state_probe_cis:
ndp->pending_req_num = NCSI_RESERVED_CHANNEL;
@@ -1447,7 +1440,6 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp)
if (IS_ENABLED(CONFIG_NCSI_OEM_CMD_KEEP_PHY))
nd->state = ncsi_dev_state_probe_keep_phy;
break;
-#if IS_ENABLED(CONFIG_NCSI_OEM_CMD_KEEP_PHY)
case ncsi_dev_state_probe_keep_phy:
ndp->pending_req_num = 1;
@@ -1460,7 +1452,6 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp)
nd->state = ncsi_dev_state_probe_gvi;
break;
-#endif /* CONFIG_NCSI_OEM_CMD_KEEP_PHY */
case ncsi_dev_state_probe_gvi:
case ncsi_dev_state_probe_gc:
case ncsi_dev_state_probe_gls:
diff --git a/net/ncsi/ncsi-netlink.c b/net/ncsi/ncsi-netlink.c
index a3a6753..2f872d0 100644
--- a/net/ncsi/ncsi-netlink.c
+++ b/net/ncsi/ncsi-netlink.c
@@ -71,8 +71,8 @@ static int ncsi_write_channel_info(struct sk_buff *skb,
if (nc == nc->package->preferred_channel)
nla_put_flag(skb, NCSI_CHANNEL_ATTR_FORCED);
- nla_put_u32(skb, NCSI_CHANNEL_ATTR_VERSION_MAJOR, nc->version.version);
- nla_put_u32(skb, NCSI_CHANNEL_ATTR_VERSION_MINOR, nc->version.alpha2);
+ nla_put_u32(skb, NCSI_CHANNEL_ATTR_VERSION_MAJOR, nc->version.major);
+ nla_put_u32(skb, NCSI_CHANNEL_ATTR_VERSION_MINOR, nc->version.minor);
nla_put_string(skb, NCSI_CHANNEL_ATTR_VERSION_STR, nc->version.fw_name);
vid_nest = nla_nest_start_noflag(skb, NCSI_CHANNEL_ATTR_VLAN_LIST);
diff --git a/net/ncsi/ncsi-pkt.h b/net/ncsi/ncsi-pkt.h
index ba66c7d..f2f3b5c 100644
--- a/net/ncsi/ncsi-pkt.h
+++ b/net/ncsi/ncsi-pkt.h
@@ -197,9 +197,12 @@ struct ncsi_rsp_gls_pkt {
/* Get Version ID */
struct ncsi_rsp_gvi_pkt {
struct ncsi_rsp_pkt_hdr rsp; /* Response header */
- __be32 ncsi_version; /* NCSI version */
+ unsigned char major; /* NCSI version major */
+ unsigned char minor; /* NCSI version minor */
+ unsigned char update; /* NCSI version update */
+ unsigned char alpha1; /* NCSI version alpha1 */
unsigned char reserved[3]; /* Reserved */
- unsigned char alpha2; /* NCSI version */
+ unsigned char alpha2; /* NCSI version alpha2 */
unsigned char fw_name[12]; /* f/w name string */
__be32 fw_version; /* f/w version */
__be16 pci_ids[4]; /* PCI IDs */
@@ -335,6 +338,14 @@ struct ncsi_rsp_gpuuid_pkt {
__be32 checksum;
};
+/* Get MC MAC Address */
+struct ncsi_rsp_gmcma_pkt {
+ struct ncsi_rsp_pkt_hdr rsp;
+ unsigned char address_count;
+ unsigned char reserved[3];
+ unsigned char addresses[][ETH_ALEN];
+};
+
/* AEN: Link State Change */
struct ncsi_aen_lsc_pkt {
struct ncsi_aen_pkt_hdr aen; /* AEN header */
@@ -395,6 +406,7 @@ struct ncsi_aen_hncdsc_pkt {
#define NCSI_PKT_CMD_GPUUID 0x52 /* Get package UUID */
#define NCSI_PKT_CMD_QPNPR 0x56 /* Query Pending NC PLDM request */
#define NCSI_PKT_CMD_SNPR 0x57 /* Send NC PLDM Reply */
+#define NCSI_PKT_CMD_GMCMA 0x58 /* Get MC MAC Address */
/* NCSI packet responses */
@@ -430,6 +442,7 @@ struct ncsi_aen_hncdsc_pkt {
#define NCSI_PKT_RSP_GPUUID (NCSI_PKT_CMD_GPUUID + 0x80)
#define NCSI_PKT_RSP_QPNPR (NCSI_PKT_CMD_QPNPR + 0x80)
#define NCSI_PKT_RSP_SNPR (NCSI_PKT_CMD_SNPR + 0x80)
+#define NCSI_PKT_RSP_GMCMA (NCSI_PKT_CMD_GMCMA + 0x80)
/* NCSI response code/reason */
#define NCSI_PKT_RSP_C_COMPLETED 0x0000 /* Command Completed */
diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c
index 069c265..bee290d 100644
--- a/net/ncsi/ncsi-rsp.c
+++ b/net/ncsi/ncsi-rsp.c
@@ -19,6 +19,19 @@
#include "ncsi-pkt.h"
#include "ncsi-netlink.h"
+/* Nibbles within [0xA, 0xF] add zero "0" to the returned value.
+ * Optional fields (encoded as 0xFF) will default to zero.
+ */
+static u8 decode_bcd_u8(u8 x)
+{
+ int lo = x & 0xF;
+ int hi = x >> 4;
+
+ lo = lo < 0xA ? lo : 0;
+ hi = hi < 0xA ? hi : 0;
+ return lo + hi * 10;
+}
+
static int ncsi_validate_rsp_pkt(struct ncsi_request *nr,
unsigned short payload)
{
@@ -755,9 +768,18 @@ static int ncsi_rsp_handler_gvi(struct ncsi_request *nr)
if (!nc)
return -ENODEV;
- /* Update to channel's version info */
+ /* Update channel's version info
+ *
+ * Major, minor, and update fields are supposed to be
+ * unsigned integers encoded as packed BCD.
+ *
+ * Alpha1 and alpha2 are ISO/IEC 8859-1 characters.
+ */
ncv = &nc->version;
- ncv->version = ntohl(rsp->ncsi_version);
+ ncv->major = decode_bcd_u8(rsp->major);
+ ncv->minor = decode_bcd_u8(rsp->minor);
+ ncv->update = decode_bcd_u8(rsp->update);
+ ncv->alpha1 = rsp->alpha1;
ncv->alpha2 = rsp->alpha2;
memcpy(ncv->fw_name, rsp->fw_name, 12);
ncv->fw_version = ntohl(rsp->fw_version);
@@ -1069,6 +1091,44 @@ static int ncsi_rsp_handler_netlink(struct ncsi_request *nr)
return ret;
}
+static int ncsi_rsp_handler_gmcma(struct ncsi_request *nr)
+{
+ struct ncsi_dev_priv *ndp = nr->ndp;
+ struct net_device *ndev = ndp->ndev.dev;
+ struct ncsi_rsp_gmcma_pkt *rsp;
+ struct sockaddr saddr;
+ int ret = -1;
+ int i;
+
+ rsp = (struct ncsi_rsp_gmcma_pkt *)skb_network_header(nr->rsp);
+ saddr.sa_family = ndev->type;
+ ndev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
+
+ netdev_info(ndev, "NCSI: Received %d provisioned MAC addresses\n",
+ rsp->address_count);
+ for (i = 0; i < rsp->address_count; i++) {
+ netdev_info(ndev, "NCSI: MAC address %d: %02x:%02x:%02x:%02x:%02x:%02x\n",
+ i, rsp->addresses[i][0], rsp->addresses[i][1],
+ rsp->addresses[i][2], rsp->addresses[i][3],
+ rsp->addresses[i][4], rsp->addresses[i][5]);
+ }
+
+ for (i = 0; i < rsp->address_count; i++) {
+ memcpy(saddr.sa_data, &rsp->addresses[i], ETH_ALEN);
+ ret = ndev->netdev_ops->ndo_set_mac_address(ndev, &saddr);
+ if (ret < 0) {
+ netdev_warn(ndev, "NCSI: Unable to assign %pM to device\n",
+ saddr.sa_data);
+ continue;
+ }
+ netdev_warn(ndev, "NCSI: Set MAC address to %pM\n", saddr.sa_data);
+ break;
+ }
+
+ ndp->gma_flag = ret == 0;
+ return ret;
+}
+
static struct ncsi_rsp_handler {
unsigned char type;
int payload;
@@ -1105,7 +1165,8 @@ static struct ncsi_rsp_handler {
{ NCSI_PKT_RSP_PLDM, -1, ncsi_rsp_handler_pldm },
{ NCSI_PKT_RSP_GPUUID, 20, ncsi_rsp_handler_gpuuid },
{ NCSI_PKT_RSP_QPNPR, -1, ncsi_rsp_handler_pldm },
- { NCSI_PKT_RSP_SNPR, -1, ncsi_rsp_handler_pldm }
+ { NCSI_PKT_RSP_SNPR, -1, ncsi_rsp_handler_pldm },
+ { NCSI_PKT_RSP_GMCMA, -1, ncsi_rsp_handler_gmcma },
};
int ncsi_rcv_rsp(struct sk_buff *skb, struct net_device *dev,
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index eb086b0..177126f 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1204,8 +1204,7 @@ struct sock *netlink_getsockbyfilp(struct file *filp)
return sock;
}
-static struct sk_buff *netlink_alloc_large_skb(unsigned int size,
- int broadcast)
+struct sk_buff *netlink_alloc_large_skb(unsigned int size, int broadcast)
{
struct sk_buff *skb;
void *data;
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index d5bdfd4..289e175 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -71,7 +71,7 @@ struct tc_u_hnode {
struct tc_u_hnode __rcu *next;
u32 handle;
u32 prio;
- int refcnt;
+ refcount_t refcnt;
unsigned int divisor;
struct idr handle_idr;
bool is_root;
@@ -86,7 +86,7 @@ struct tc_u_hnode {
struct tc_u_common {
struct tc_u_hnode __rcu *hlist;
void *ptr;
- int refcnt;
+ refcount_t refcnt;
struct idr handle_idr;
struct hlist_node hnode;
long knodes;
@@ -359,7 +359,7 @@ static int u32_init(struct tcf_proto *tp)
if (root_ht == NULL)
return -ENOBUFS;
- root_ht->refcnt++;
+ refcount_set(&root_ht->refcnt, 1);
root_ht->handle = tp_c ? gen_new_htid(tp_c, root_ht) : 0x80000000;
root_ht->prio = tp->prio;
root_ht->is_root = true;
@@ -371,18 +371,20 @@ static int u32_init(struct tcf_proto *tp)
kfree(root_ht);
return -ENOBUFS;
}
+ refcount_set(&tp_c->refcnt, 1);
tp_c->ptr = key;
INIT_HLIST_NODE(&tp_c->hnode);
idr_init(&tp_c->handle_idr);
hlist_add_head(&tp_c->hnode, tc_u_hash(key));
+ } else {
+ refcount_inc(&tp_c->refcnt);
}
- tp_c->refcnt++;
RCU_INIT_POINTER(root_ht->next, tp_c->hlist);
rcu_assign_pointer(tp_c->hlist, root_ht);
- root_ht->refcnt++;
+ /* root_ht must be destroyed when tcf_proto is destroyed */
rcu_assign_pointer(tp->root, root_ht);
tp->data = tp_c;
return 0;
@@ -393,7 +395,7 @@ static void __u32_destroy_key(struct tc_u_knode *n)
struct tc_u_hnode *ht = rtnl_dereference(n->ht_down);
tcf_exts_destroy(&n->exts);
- if (ht && --ht->refcnt == 0)
+ if (ht && refcount_dec_and_test(&ht->refcnt))
kfree(ht);
kfree(n);
}
@@ -601,8 +603,6 @@ static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht,
struct tc_u_hnode __rcu **hn;
struct tc_u_hnode *phn;
- WARN_ON(--ht->refcnt);
-
u32_clear_hnode(tp, ht, extack);
hn = &tp_c->hlist;
@@ -630,10 +630,10 @@ static void u32_destroy(struct tcf_proto *tp, bool rtnl_held,
WARN_ON(root_ht == NULL);
- if (root_ht && --root_ht->refcnt == 1)
+ if (root_ht && refcount_dec_and_test(&root_ht->refcnt))
u32_destroy_hnode(tp, root_ht, extack);
- if (--tp_c->refcnt == 0) {
+ if (refcount_dec_and_test(&tp_c->refcnt)) {
struct tc_u_hnode *ht;
hlist_del(&tp_c->hnode);
@@ -645,7 +645,7 @@ static void u32_destroy(struct tcf_proto *tp, bool rtnl_held,
/* u32_destroy_key() will later free ht for us, if it's
* still referenced by some knode
*/
- if (--ht->refcnt == 0)
+ if (refcount_dec_and_test(&ht->refcnt))
kfree_rcu(ht, rcu);
}
@@ -674,7 +674,7 @@ static int u32_delete(struct tcf_proto *tp, void *arg, bool *last,
return -EINVAL;
}
- if (ht->refcnt == 1) {
+ if (refcount_dec_if_one(&ht->refcnt)) {
u32_destroy_hnode(tp, ht, extack);
} else {
NL_SET_ERR_MSG_MOD(extack, "Can not delete in-use filter");
@@ -682,7 +682,7 @@ static int u32_delete(struct tcf_proto *tp, void *arg, bool *last,
}
out:
- *last = tp_c->refcnt == 1 && tp_c->knodes == 0;
+ *last = refcount_read(&tp_c->refcnt) == 1 && tp_c->knodes == 0;
return ret;
}
@@ -766,14 +766,14 @@ static int u32_set_parms(struct net *net, struct tcf_proto *tp,
NL_SET_ERR_MSG_MOD(extack, "Not linking to root node");
return -EINVAL;
}
- ht_down->refcnt++;
+ refcount_inc(&ht_down->refcnt);
}
ht_old = rtnl_dereference(n->ht_down);
rcu_assign_pointer(n->ht_down, ht_down);
if (ht_old)
- ht_old->refcnt--;
+ refcount_dec(&ht_old->refcnt);
}
if (ifindex >= 0)
@@ -852,7 +852,7 @@ static struct tc_u_knode *u32_init_knode(struct net *net, struct tcf_proto *tp,
/* bump reference count as long as we hold pointer to structure */
if (ht)
- ht->refcnt++;
+ refcount_inc(&ht->refcnt);
return new;
}
@@ -932,7 +932,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
ht_old = rtnl_dereference(n->ht_down);
if (ht_old)
- ht_old->refcnt++;
+ refcount_inc(&ht_old->refcnt);
}
__u32_destroy_key(new);
return err;
@@ -980,7 +980,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
return err;
}
}
- ht->refcnt = 1;
+ refcount_set(&ht->refcnt, 1);
ht->divisor = divisor;
ht->handle = handle;
ht->prio = tp->prio;
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 4195a4b..8dd0e59 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -522,8 +522,9 @@ static void dev_watchdog(struct timer_list *t)
if (unlikely(timedout_ms)) {
trace_net_dev_xmit_timeout(dev, i);
- WARN_ONCE(1, "NETDEV WATCHDOG: %s (%s): transmit queue %u timed out %u ms\n",
- dev->name, netdev_drivername(dev), i, timedout_ms);
+ netdev_crit(dev, "NETDEV WATCHDOG: CPU: %d: transmit queue %u timed out %u ms\n",
+ raw_smp_processor_id(),
+ i, timedout_ms);
netif_freeze_queues(dev);
dev->netdev_ops->ndo_tx_timeout(dev, i);
netif_unfreeze_queues(dev);
diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index c763008..079aebb 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -168,7 +168,7 @@ static struct sk_buff *tipc_get_err_tlv(char *str)
int str_len = strlen(str) + 1;
struct sk_buff *buf;
- buf = tipc_tlv_alloc(TLV_SPACE(str_len));
+ buf = tipc_tlv_alloc(str_len);
if (buf)
tipc_add_tlv(buf, TIPC_TLV_ERROR_STRING, str, str_len);
diff --git a/tools/bpf/bpftool/Documentation/bpftool.rst b/tools/bpf/bpftool/Documentation/bpftool.rst
index 6965c94..09e4f2f 100644
--- a/tools/bpf/bpftool/Documentation/bpftool.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool.rst
@@ -20,7 +20,7 @@
**bpftool** **version**
- *OBJECT* := { **map** | **program** | **link** | **cgroup** | **perf** | **net** | **feature** |
+ *OBJECT* := { **map** | **prog** | **link** | **cgroup** | **perf** | **net** | **feature** |
**btf** | **gen** | **struct_ops** | **iter** }
*OPTIONS* := { { **-V** | **--version** } | |COMMON_OPTIONS| }
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 0f6cdf5..7a54982 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -1200,6 +1200,9 @@ enum bpf_perf_event_type {
*/
#define BPF_F_XDP_DEV_BOUND_ONLY (1U << 6)
+/* The verifier internal test flag. Behavior is undefined */
+#define BPF_F_TEST_REG_INVARIANTS (1U << 7)
+
/* link_create.kprobe_multi.flags used in LINK_CREATE command for
* BPF_TRACE_KPROBE_MULTI attach type to create return probe.
*/
@@ -4517,6 +4520,8 @@ union bpf_attr {
* long bpf_get_task_stack(struct task_struct *task, void *buf, u32 size, u64 flags)
* Description
* Return a user or a kernel stack in bpf program provided buffer.
+ * Note: the user stack will only be populated if the *task* is
+ * the current task; all other tasks will return -EOPNOTSUPP.
* To achieve this, the helper needs *task*, which is a valid
* pointer to **struct task_struct**. To store the stacktrace, the
* bpf program provides *buf* with a nonnegative *size*.
@@ -4528,6 +4533,7 @@ union bpf_attr {
*
* **BPF_F_USER_STACK**
* Collect a user space stack instead of a kernel stack.
+ * The *task* must be the current task.
* **BPF_F_USER_BUILD_ID**
* Collect buildid+offset instead of ips for user stack,
* only valid if **BPF_F_USER_STACK** is also specified.
@@ -7151,40 +7157,31 @@ struct bpf_spin_lock {
};
struct bpf_timer {
- __u64 :64;
- __u64 :64;
+ __u64 __opaque[2];
} __attribute__((aligned(8)));
struct bpf_dynptr {
- __u64 :64;
- __u64 :64;
+ __u64 __opaque[2];
} __attribute__((aligned(8)));
struct bpf_list_head {
- __u64 :64;
- __u64 :64;
+ __u64 __opaque[2];
} __attribute__((aligned(8)));
struct bpf_list_node {
- __u64 :64;
- __u64 :64;
- __u64 :64;
+ __u64 __opaque[3];
} __attribute__((aligned(8)));
struct bpf_rb_root {
- __u64 :64;
- __u64 :64;
+ __u64 __opaque[2];
} __attribute__((aligned(8)));
struct bpf_rb_node {
- __u64 :64;
- __u64 :64;
- __u64 :64;
- __u64 :64;
+ __u64 __opaque[4];
} __attribute__((aligned(8)));
struct bpf_refcount {
- __u32 :32;
+ __u32 __opaque[1];
} __attribute__((aligned(4)));
struct bpf_sysctl {
diff --git a/tools/lib/bpf/libbpf_common.h b/tools/lib/bpf/libbpf_common.h
index b7060f2..8fe248e 100644
--- a/tools/lib/bpf/libbpf_common.h
+++ b/tools/lib/bpf/libbpf_common.h
@@ -79,11 +79,14 @@
*/
#define LIBBPF_OPTS_RESET(NAME, ...) \
do { \
- memset(&NAME, 0, sizeof(NAME)); \
- NAME = (typeof(NAME)) { \
- .sz = sizeof(NAME), \
- __VA_ARGS__ \
- }; \
+ typeof(NAME) ___##NAME = ({ \
+ memset(&___##NAME, 0, sizeof(NAME)); \
+ (typeof(NAME)) { \
+ .sz = sizeof(NAME), \
+ __VA_ARGS__ \
+ }; \
+ }); \
+ memcpy(&NAME, &___##NAME, sizeof(NAME)); \
} while (0)
#endif /* __LIBBPF_LIBBPF_COMMON_H */
diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c
index 5b1da2a..5aa133b 100644
--- a/tools/testing/selftests/bpf/cgroup_helpers.c
+++ b/tools/testing/selftests/bpf/cgroup_helpers.c
@@ -45,9 +45,12 @@
#define format_parent_cgroup_path(buf, path) \
format_cgroup_path_pid(buf, path, getppid())
-#define format_classid_path(buf) \
- snprintf(buf, sizeof(buf), "%s%s", NETCLS_MOUNT_PATH, \
- CGROUP_WORK_DIR)
+#define format_classid_path_pid(buf, pid) \
+ snprintf(buf, sizeof(buf), "%s%s%d", NETCLS_MOUNT_PATH, \
+ CGROUP_WORK_DIR, pid)
+
+#define format_classid_path(buf) \
+ format_classid_path_pid(buf, getpid())
static __thread bool cgroup_workdir_mounted;
@@ -419,26 +422,23 @@ int create_and_get_cgroup(const char *relative_path)
}
/**
- * get_cgroup_id() - Get cgroup id for a particular cgroup path
- * @relative_path: The cgroup path, relative to the workdir, to join
+ * get_cgroup_id_from_path - Get cgroup id for a particular cgroup path
+ * @cgroup_workdir: The absolute cgroup path
*
* On success, it returns the cgroup id. On failure it returns 0,
* which is an invalid cgroup id.
* If there is a failure, it prints the error to stderr.
*/
-unsigned long long get_cgroup_id(const char *relative_path)
+unsigned long long get_cgroup_id_from_path(const char *cgroup_workdir)
{
int dirfd, err, flags, mount_id, fhsize;
union {
unsigned long long cgid;
unsigned char raw_bytes[8];
} id;
- char cgroup_workdir[PATH_MAX + 1];
struct file_handle *fhp, *fhp2;
unsigned long long ret = 0;
- format_cgroup_path(cgroup_workdir, relative_path);
-
dirfd = AT_FDCWD;
flags = 0;
fhsize = sizeof(*fhp);
@@ -474,6 +474,14 @@ unsigned long long get_cgroup_id(const char *relative_path)
return ret;
}
+unsigned long long get_cgroup_id(const char *relative_path)
+{
+ char cgroup_workdir[PATH_MAX + 1];
+
+ format_cgroup_path(cgroup_workdir, relative_path);
+ return get_cgroup_id_from_path(cgroup_workdir);
+}
+
int cgroup_setup_and_join(const char *path) {
int cg_fd;
@@ -523,10 +531,20 @@ int setup_classid_environment(void)
return 1;
}
- if (mount("net_cls", NETCLS_MOUNT_PATH, "cgroup", 0, "net_cls") &&
- errno != EBUSY) {
- log_err("mount cgroup net_cls");
- return 1;
+ if (mount("net_cls", NETCLS_MOUNT_PATH, "cgroup", 0, "net_cls")) {
+ if (errno != EBUSY) {
+ log_err("mount cgroup net_cls");
+ return 1;
+ }
+
+ if (rmdir(NETCLS_MOUNT_PATH)) {
+ log_err("rmdir cgroup net_cls");
+ return 1;
+ }
+ if (umount(CGROUP_MOUNT_DFLT)) {
+ log_err("umount cgroup base");
+ return 1;
+ }
}
cleanup_classid_environment();
@@ -541,15 +559,16 @@ int setup_classid_environment(void)
/**
* set_classid() - Set a cgroupv1 net_cls classid
- * @id: the numeric classid
*
- * Writes the passed classid into the cgroup work dir's net_cls.classid
+ * Writes the classid into the cgroup work dir's net_cls.classid
* file in order to later on trigger socket tagging.
*
+ * We leverage the current pid as the classid, ensuring unique identification.
+ *
* On success, it returns 0, otherwise on failure it returns 1. If there
* is a failure, it prints the error to stderr.
*/
-int set_classid(unsigned int id)
+int set_classid(void)
{
char cgroup_workdir[PATH_MAX - 42];
char cgroup_classid_path[PATH_MAX + 1];
@@ -565,7 +584,7 @@ int set_classid(unsigned int id)
return 1;
}
- if (dprintf(fd, "%u\n", id) < 0) {
+ if (dprintf(fd, "%u\n", getpid()) < 0) {
log_err("Setting cgroup classid");
rc = 1;
}
@@ -607,3 +626,66 @@ void cleanup_classid_environment(void)
join_cgroup_from_top(NETCLS_MOUNT_PATH);
nftw(cgroup_workdir, nftwfunc, WALK_FD_LIMIT, FTW_DEPTH | FTW_MOUNT);
}
+
+/**
+ * get_classid_cgroup_id - Get the cgroup id of a net_cls cgroup
+ */
+unsigned long long get_classid_cgroup_id(void)
+{
+ char cgroup_workdir[PATH_MAX + 1];
+
+ format_classid_path(cgroup_workdir);
+ return get_cgroup_id_from_path(cgroup_workdir);
+}
+
+/**
+ * get_cgroup1_hierarchy_id - Retrieves the ID of a cgroup1 hierarchy from the cgroup1 subsys name.
+ * @subsys_name: The cgroup1 subsys name, which can be retrieved from /proc/self/cgroup. It can be
+ * a named cgroup like "name=systemd", a controller name like "net_cls", or multi-contollers like
+ * "net_cls,net_prio".
+ */
+int get_cgroup1_hierarchy_id(const char *subsys_name)
+{
+ char *c, *c2, *c3, *c4;
+ bool found = false;
+ char line[1024];
+ FILE *file;
+ int i, id;
+
+ if (!subsys_name)
+ return -1;
+
+ file = fopen("/proc/self/cgroup", "r");
+ if (!file) {
+ log_err("fopen /proc/self/cgroup");
+ return -1;
+ }
+
+ while (fgets(line, 1024, file)) {
+ i = 0;
+ for (c = strtok_r(line, ":", &c2); c && i < 2; c = strtok_r(NULL, ":", &c2)) {
+ if (i == 0) {
+ id = strtol(c, NULL, 10);
+ } else if (i == 1) {
+ if (!strcmp(c, subsys_name)) {
+ found = true;
+ break;
+ }
+
+ /* Multiple subsystems may share one single mount point */
+ for (c3 = strtok_r(c, ",", &c4); c3;
+ c3 = strtok_r(NULL, ",", &c4)) {
+ if (!strcmp(c, subsys_name)) {
+ found = true;
+ break;
+ }
+ }
+ }
+ i++;
+ }
+ if (found)
+ break;
+ }
+ fclose(file);
+ return found ? id : -1;
+}
diff --git a/tools/testing/selftests/bpf/cgroup_helpers.h b/tools/testing/selftests/bpf/cgroup_helpers.h
index 5c2cb9c..ee05364 100644
--- a/tools/testing/selftests/bpf/cgroup_helpers.h
+++ b/tools/testing/selftests/bpf/cgroup_helpers.h
@@ -20,6 +20,7 @@ int get_root_cgroup(void);
int create_and_get_cgroup(const char *relative_path);
void remove_cgroup(const char *relative_path);
unsigned long long get_cgroup_id(const char *relative_path);
+int get_cgroup1_hierarchy_id(const char *subsys_name);
int join_cgroup(const char *relative_path);
int join_root_cgroup(void);
@@ -29,8 +30,9 @@ int setup_cgroup_environment(void);
void cleanup_cgroup_environment(void);
/* cgroupv1 related */
-int set_classid(unsigned int id);
+int set_classid(void);
int join_classid(void);
+unsigned long long get_classid_cgroup_id(void);
int setup_classid_environment(void);
void cleanup_classid_environment(void);
diff --git a/tools/testing/selftests/bpf/config.aarch64 b/tools/testing/selftests/bpf/config.aarch64
index 2538214..29c8635 100644
--- a/tools/testing/selftests/bpf/config.aarch64
+++ b/tools/testing/selftests/bpf/config.aarch64
@@ -1,4 +1,3 @@
-CONFIG_9P_FS=y
CONFIG_ARCH_VEXPRESS=y
CONFIG_ARCH_WANT_DEFAULT_BPF_JIT=y
CONFIG_ARM_SMMU_V3=y
@@ -37,6 +36,7 @@
CONFIG_DEBUG_ATOMIC_SLEEP=y
CONFIG_DEBUG_INFO_BTF=y
CONFIG_DEBUG_INFO_DWARF4=y
+CONFIG_DEBUG_INFO_REDUCED=n
CONFIG_DEBUG_LIST=y
CONFIG_DEBUG_LOCKDEP=y
CONFIG_DEBUG_NOTIFIERS=y
@@ -46,7 +46,6 @@
CONFIG_DETECT_HUNG_TASK=y
CONFIG_DEVTMPFS_MOUNT=y
CONFIG_DEVTMPFS=y
-CONFIG_DRM_VIRTIO_GPU=y
CONFIG_DRM=y
CONFIG_DUMMY=y
CONFIG_EXPERT=y
@@ -67,7 +66,6 @@
CONFIG_HEADERS_INSTALL=y
CONFIG_HIGH_RES_TIMERS=y
CONFIG_HUGETLBFS=y
-CONFIG_HW_RANDOM_VIRTIO=y
CONFIG_HW_RANDOM=y
CONFIG_HZ_100=y
CONFIG_IDLE_PAGE_TRACKING=y
@@ -99,8 +97,6 @@
CONFIG_MEMORY_HOTPLUG=y
CONFIG_MEMORY_HOTREMOVE=y
CONFIG_NAMESPACES=y
-CONFIG_NET_9P_VIRTIO=y
-CONFIG_NET_9P=y
CONFIG_NET_ACT_BPF=y
CONFIG_NET_ACT_GACT=y
CONFIG_NETDEVICES=y
@@ -140,7 +136,6 @@
CONFIG_SCSI_CONSTANTS=y
CONFIG_SCSI_LOGGING=y
CONFIG_SCSI_SCAN_ASYNC=y
-CONFIG_SCSI_VIRTIO=y
CONFIG_SCSI=y
CONFIG_SECURITY_NETWORK=y
CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
@@ -167,16 +162,6 @@
CONFIG_USELIB=y
CONFIG_USER_NS=y
CONFIG_VETH=y
-CONFIG_VIRTIO_BALLOON=y
-CONFIG_VIRTIO_BLK=y
-CONFIG_VIRTIO_CONSOLE=y
-CONFIG_VIRTIO_FS=y
-CONFIG_VIRTIO_INPUT=y
-CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES=y
-CONFIG_VIRTIO_MMIO=y
-CONFIG_VIRTIO_NET=y
-CONFIG_VIRTIO_PCI=y
-CONFIG_VIRTIO_VSOCKETS_COMMON=y
CONFIG_VLAN_8021Q=y
CONFIG_VSOCKETS=y
CONFIG_VSOCKETS_LOOPBACK=y
diff --git a/tools/testing/selftests/bpf/config.s390x b/tools/testing/selftests/bpf/config.s390x
index 2ba9216..e933303 100644
--- a/tools/testing/selftests/bpf/config.s390x
+++ b/tools/testing/selftests/bpf/config.s390x
@@ -1,4 +1,3 @@
-CONFIG_9P_FS=y
CONFIG_ARCH_WANT_DEFAULT_BPF_JIT=y
CONFIG_AUDIT=y
CONFIG_BLK_CGROUP=y
@@ -84,8 +83,6 @@
CONFIG_MEMORY_HOTREMOVE=y
CONFIG_NAMESPACES=y
CONFIG_NET=y
-CONFIG_NET_9P=y
-CONFIG_NET_9P_VIRTIO=y
CONFIG_NET_ACT_BPF=y
CONFIG_NET_ACT_GACT=y
CONFIG_NET_KEY=y
@@ -114,7 +111,6 @@
CONFIG_SAMPLES=y
CONFIG_SCHED_TRACER=y
CONFIG_SCSI=y
-CONFIG_SCSI_VIRTIO=y
CONFIG_SECURITY_NETWORK=y
CONFIG_STACK_TRACER=y
CONFIG_STATIC_KEYS_SELFTEST=y
@@ -136,11 +132,6 @@
CONFIG_USELIB=y
CONFIG_USER_NS=y
CONFIG_VETH=y
-CONFIG_VIRTIO_BALLOON=y
-CONFIG_VIRTIO_BLK=y
-CONFIG_VIRTIO_NET=y
-CONFIG_VIRTIO_PCI=y
-CONFIG_VIRTIO_VSOCKETS_COMMON=y
CONFIG_VLAN_8021Q=y
CONFIG_VSOCKETS=y
CONFIG_VSOCKETS_LOOPBACK=y
diff --git a/tools/testing/selftests/bpf/config.vm b/tools/testing/selftests/bpf/config.vm
new file mode 100644
index 0000000..a9746ca7
--- /dev/null
+++ b/tools/testing/selftests/bpf/config.vm
@@ -0,0 +1,12 @@
+CONFIG_9P_FS=y
+CONFIG_9P_FS_POSIX_ACL=y
+CONFIG_9P_FS_SECURITY=y
+CONFIG_CRYPTO_DEV_VIRTIO=y
+CONFIG_NET_9P=y
+CONFIG_NET_9P_VIRTIO=y
+CONFIG_VIRTIO_BALLOON=y
+CONFIG_VIRTIO_BLK=y
+CONFIG_VIRTIO_CONSOLE=y
+CONFIG_VIRTIO_NET=y
+CONFIG_VIRTIO_PCI=y
+CONFIG_VIRTIO_VSOCKETS_COMMON=y
diff --git a/tools/testing/selftests/bpf/config.x86_64 b/tools/testing/selftests/bpf/config.x86_64
index 2e70a60..f7bfb2b 100644
--- a/tools/testing/selftests/bpf/config.x86_64
+++ b/tools/testing/selftests/bpf/config.x86_64
@@ -1,6 +1,3 @@
-CONFIG_9P_FS=y
-CONFIG_9P_FS_POSIX_ACL=y
-CONFIG_9P_FS_SECURITY=y
CONFIG_AGP=y
CONFIG_AGP_AMD64=y
CONFIG_AGP_INTEL=y
@@ -45,7 +42,6 @@
CONFIG_CPUSETS=y
CONFIG_CRC_T10DIF=y
CONFIG_CRYPTO_BLAKE2B=y
-CONFIG_CRYPTO_DEV_VIRTIO=y
CONFIG_CRYPTO_SEQIV=y
CONFIG_CRYPTO_XXHASH=y
CONFIG_DCB=y
@@ -145,8 +141,6 @@
CONFIG_MINIX_SUBPARTITION=y
CONFIG_NAMESPACES=y
CONFIG_NET=y
-CONFIG_NET_9P=y
-CONFIG_NET_9P_VIRTIO=y
CONFIG_NET_ACT_BPF=y
CONFIG_NET_CLS_CGROUP=y
CONFIG_NET_EMATCH=y
@@ -228,12 +222,6 @@
CONFIG_VALIDATE_FS_PARSER=y
CONFIG_VETH=y
CONFIG_VIRT_DRIVERS=y
-CONFIG_VIRTIO_BALLOON=y
-CONFIG_VIRTIO_BLK=y
-CONFIG_VIRTIO_CONSOLE=y
-CONFIG_VIRTIO_NET=y
-CONFIG_VIRTIO_PCI=y
-CONFIG_VIRTIO_VSOCKETS_COMMON=y
CONFIG_VLAN_8021Q=y
CONFIG_VSOCKETS=y
CONFIG_VSOCKETS_LOOPBACK=y
diff --git a/tools/testing/selftests/bpf/map_tests/map_percpu_stats.c b/tools/testing/selftests/bpf/map_tests/map_percpu_stats.c
index 8bf497a..2ea3640 100644
--- a/tools/testing/selftests/bpf/map_tests/map_percpu_stats.c
+++ b/tools/testing/selftests/bpf/map_tests/map_percpu_stats.c
@@ -131,10 +131,17 @@ static bool is_lru(__u32 map_type)
map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH;
}
+static bool is_percpu(__u32 map_type)
+{
+ return map_type == BPF_MAP_TYPE_PERCPU_HASH ||
+ map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH;
+}
+
struct upsert_opts {
__u32 map_type;
int map_fd;
__u32 n;
+ bool retry_for_nomem;
};
static int create_small_hash(void)
@@ -148,19 +155,38 @@ static int create_small_hash(void)
return map_fd;
}
+static bool retry_for_nomem_fn(int err)
+{
+ return err == ENOMEM;
+}
+
static void *patch_map_thread(void *arg)
{
+ /* 8KB is enough for 1024 CPUs. And it is shared between N_THREADS. */
+ static __u8 blob[8 << 10];
struct upsert_opts *opts = arg;
+ void *val_ptr;
int val;
int ret;
int i;
for (i = 0; i < opts->n; i++) {
- if (opts->map_type == BPF_MAP_TYPE_HASH_OF_MAPS)
+ if (opts->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) {
val = create_small_hash();
- else
+ val_ptr = &val;
+ } else if (is_percpu(opts->map_type)) {
+ val_ptr = blob;
+ } else {
val = rand();
- ret = bpf_map_update_elem(opts->map_fd, &i, &val, 0);
+ val_ptr = &val;
+ }
+
+ /* 2 seconds may be enough ? */
+ if (opts->retry_for_nomem)
+ ret = map_update_retriable(opts->map_fd, &i, val_ptr, 0,
+ 40, retry_for_nomem_fn);
+ else
+ ret = bpf_map_update_elem(opts->map_fd, &i, val_ptr, 0);
CHECK(ret < 0, "bpf_map_update_elem", "key=%d error: %s\n", i, strerror(errno));
if (opts->map_type == BPF_MAP_TYPE_HASH_OF_MAPS)
@@ -281,6 +307,13 @@ static void __test(int map_fd)
else
opts.n /= 2;
+ /* per-cpu bpf memory allocator may not be able to allocate per-cpu
+ * pointer successfully and it can not refill free llist timely, and
+ * bpf_map_update_elem() will return -ENOMEM. so just retry to mitigate
+ * the problem temporarily.
+ */
+ opts.retry_for_nomem = is_percpu(opts.map_type) && (info.map_flags & BPF_F_NO_PREALLOC);
+
/*
* Upsert keys [0, n) under some competition: with random values from
* N_THREADS threads. Check values, then delete all elements and check
diff --git a/tools/testing/selftests/bpf/prog_tests/align.c b/tools/testing/selftests/bpf/prog_tests/align.c
index 465c1c3..4ebd0da 100644
--- a/tools/testing/selftests/bpf/prog_tests/align.c
+++ b/tools/testing/selftests/bpf/prog_tests/align.c
@@ -40,7 +40,7 @@ static struct bpf_align_test tests[] = {
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.matches = {
- {0, "R1", "ctx(off=0,imm=0)"},
+ {0, "R1", "ctx()"},
{0, "R10", "fp0"},
{0, "R3_w", "2"},
{1, "R3_w", "4"},
@@ -68,7 +68,7 @@ static struct bpf_align_test tests[] = {
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.matches = {
- {0, "R1", "ctx(off=0,imm=0)"},
+ {0, "R1", "ctx()"},
{0, "R10", "fp0"},
{0, "R3_w", "1"},
{1, "R3_w", "2"},
@@ -97,7 +97,7 @@ static struct bpf_align_test tests[] = {
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.matches = {
- {0, "R1", "ctx(off=0,imm=0)"},
+ {0, "R1", "ctx()"},
{0, "R10", "fp0"},
{0, "R3_w", "4"},
{1, "R3_w", "8"},
@@ -119,7 +119,7 @@ static struct bpf_align_test tests[] = {
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.matches = {
- {0, "R1", "ctx(off=0,imm=0)"},
+ {0, "R1", "ctx()"},
{0, "R10", "fp0"},
{0, "R3_w", "7"},
{1, "R3_w", "7"},
@@ -162,13 +162,13 @@ static struct bpf_align_test tests[] = {
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.matches = {
- {6, "R0_w", "pkt(off=8,r=8,imm=0)"},
+ {6, "R0_w", "pkt(off=8,r=8)"},
{6, "R3_w", "var_off=(0x0; 0xff)"},
{7, "R3_w", "var_off=(0x0; 0x1fe)"},
{8, "R3_w", "var_off=(0x0; 0x3fc)"},
{9, "R3_w", "var_off=(0x0; 0x7f8)"},
{10, "R3_w", "var_off=(0x0; 0xff0)"},
- {12, "R3_w", "pkt_end(off=0,imm=0)"},
+ {12, "R3_w", "pkt_end()"},
{17, "R4_w", "var_off=(0x0; 0xff)"},
{18, "R4_w", "var_off=(0x0; 0x1fe0)"},
{19, "R4_w", "var_off=(0x0; 0xff0)"},
@@ -235,11 +235,11 @@ static struct bpf_align_test tests[] = {
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.matches = {
- {2, "R5_w", "pkt(off=0,r=0,imm=0)"},
- {4, "R5_w", "pkt(off=14,r=0,imm=0)"},
- {5, "R4_w", "pkt(off=14,r=0,imm=0)"},
- {9, "R2", "pkt(off=0,r=18,imm=0)"},
- {10, "R5", "pkt(off=14,r=18,imm=0)"},
+ {2, "R5_w", "pkt(r=0)"},
+ {4, "R5_w", "pkt(off=14,r=0)"},
+ {5, "R4_w", "pkt(off=14,r=0)"},
+ {9, "R2", "pkt(r=18)"},
+ {10, "R5", "pkt(off=14,r=18)"},
{10, "R4_w", "var_off=(0x0; 0xff)"},
{13, "R4_w", "var_off=(0x0; 0xffff)"},
{14, "R4_w", "var_off=(0x0; 0xffff)"},
@@ -299,7 +299,7 @@ static struct bpf_align_test tests[] = {
/* Calculated offset in R6 has unknown value, but known
* alignment of 4.
*/
- {6, "R2_w", "pkt(off=0,r=8,imm=0)"},
+ {6, "R2_w", "pkt(r=8)"},
{7, "R6_w", "var_off=(0x0; 0x3fc)"},
/* Offset is added to packet pointer R5, resulting in
* known fixed offset, and variable offset from R6.
@@ -337,7 +337,7 @@ static struct bpf_align_test tests[] = {
/* Constant offset is added to R5 packet pointer,
* resulting in reg->off value of 14.
*/
- {26, "R5_w", "pkt(off=14,r=8,"},
+ {26, "R5_w", "pkt(off=14,r=8)"},
/* Variable offset is added to R5, resulting in a
* variable offset of (4n). See comment for insn #18
* for R4 = R5 trick.
@@ -397,7 +397,7 @@ static struct bpf_align_test tests[] = {
/* Calculated offset in R6 has unknown value, but known
* alignment of 4.
*/
- {6, "R2_w", "pkt(off=0,r=8,imm=0)"},
+ {6, "R2_w", "pkt(r=8)"},
{7, "R6_w", "var_off=(0x0; 0x3fc)"},
/* Adding 14 makes R6 be (4n+2) */
{8, "R6_w", "var_off=(0x2; 0x7fc)"},
@@ -459,7 +459,7 @@ static struct bpf_align_test tests[] = {
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.result = REJECT,
.matches = {
- {3, "R5_w", "pkt_end(off=0,imm=0)"},
+ {3, "R5_w", "pkt_end()"},
/* (ptr - ptr) << 2 == unknown, (4n) */
{5, "R5_w", "var_off=(0x0; 0xfffffffffffffffc)"},
/* (4n) + 14 == (4n+2). We blow our bounds, because
@@ -513,7 +513,7 @@ static struct bpf_align_test tests[] = {
/* Calculated offset in R6 has unknown value, but known
* alignment of 4.
*/
- {6, "R2_w", "pkt(off=0,r=8,imm=0)"},
+ {6, "R2_w", "pkt(r=8)"},
{8, "R6_w", "var_off=(0x0; 0x3fc)"},
/* Adding 14 makes R6 be (4n+2) */
{9, "R6_w", "var_off=(0x2; 0x7fc)"},
@@ -566,7 +566,7 @@ static struct bpf_align_test tests[] = {
/* Calculated offset in R6 has unknown value, but known
* alignment of 4.
*/
- {6, "R2_w", "pkt(off=0,r=8,imm=0)"},
+ {6, "R2_w", "pkt(r=8)"},
{9, "R6_w", "var_off=(0x0; 0x3c)"},
/* Adding 14 makes R6 be (4n+2) */
{10, "R6_w", "var_off=(0x2; 0x7c)"},
@@ -659,14 +659,14 @@ static int do_test_single(struct bpf_align_test *test)
/* Check the next line as well in case the previous line
* did not have a corresponding bpf insn. Example:
* func#0 @0
- * 0: R1=ctx(off=0,imm=0) R10=fp0
+ * 0: R1=ctx() R10=fp0
* 0: (b7) r3 = 2 ; R3_w=2
*
* Sometimes it's actually two lines below, e.g. when
* searching for "6: R3_w=scalar(umax=255,var_off=(0x0; 0xff))":
- * from 4 to 6: R0_w=pkt(off=8,r=8,imm=0) R1=ctx(off=0,imm=0) R2_w=pkt(off=0,r=8,imm=0) R3_w=pkt_end(off=0,imm=0) R10=fp0
- * 6: R0_w=pkt(off=8,r=8,imm=0) R1=ctx(off=0,imm=0) R2_w=pkt(off=0,r=8,imm=0) R3_w=pkt_end(off=0,imm=0) R10=fp0
- * 6: (71) r3 = *(u8 *)(r2 +0) ; R2_w=pkt(off=0,r=8,imm=0) R3_w=scalar(umax=255,var_off=(0x0; 0xff))
+ * from 4 to 6: R0_w=pkt(off=8,r=8) R1=ctx() R2_w=pkt(r=8) R3_w=pkt_end() R10=fp0
+ * 6: R0_w=pkt(off=8,r=8) R1=ctx() R2_w=pkt(r=8) R3_w=pkt_end() R10=fp0
+ * 6: (71) r3 = *(u8 *)(r2 +0) ; R2_w=pkt(r=8) R3_w=scalar(umax=255,var_off=(0x0; 0xff))
*/
while (!(p = strstr(line_ptr, m.reg)) || !strstr(p, m.match)) {
cur_line = -1;
diff --git a/tools/testing/selftests/bpf/prog_tests/bind_perm.c b/tools/testing/selftests/bpf/prog_tests/bind_perm.c
index a1766a2..f7cd129 100644
--- a/tools/testing/selftests/bpf/prog_tests/bind_perm.c
+++ b/tools/testing/selftests/bpf/prog_tests/bind_perm.c
@@ -9,8 +9,6 @@
#include "cap_helpers.h"
#include "bind_perm.skel.h"
-static int duration;
-
static int create_netns(void)
{
if (!ASSERT_OK(unshare(CLONE_NEWNET), "create netns"))
@@ -27,7 +25,7 @@ void try_bind(int family, int port, int expected_errno)
int fd = -1;
fd = socket(family, SOCK_STREAM, 0);
- if (CHECK(fd < 0, "fd", "errno %d", errno))
+ if (!ASSERT_GE(fd, 0, "socket"))
goto close_socket;
if (family == AF_INET) {
@@ -60,7 +58,7 @@ void test_bind_perm(void)
return;
cgroup_fd = test__join_cgroup("/bind_perm");
- if (CHECK(cgroup_fd < 0, "cg-join", "errno %d", errno))
+ if (!ASSERT_GE(cgroup_fd, 0, "test__join_cgroup"))
return;
skel = bind_perm__open_and_load();
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
index e3498f6..618af9d 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -34,8 +34,6 @@
#include "bpf_iter_ksym.skel.h"
#include "bpf_iter_sockmap.skel.h"
-static int duration;
-
static void test_btf_id_or_null(void)
{
struct bpf_iter_test_kern3 *skel;
@@ -64,7 +62,7 @@ static void do_dummy_read_opts(struct bpf_program *prog, struct bpf_iter_attach_
/* not check contents, but ensure read() ends without error */
while ((len = read(iter_fd, buf, sizeof(buf))) > 0)
;
- CHECK(len < 0, "read", "read failed: %s\n", strerror(errno));
+ ASSERT_GE(len, 0, "read");
close(iter_fd);
@@ -334,6 +332,8 @@ static void test_task_stack(void)
do_dummy_read(skel->progs.dump_task_stack);
do_dummy_read(skel->progs.get_task_user_stacks);
+ ASSERT_EQ(skel->bss->num_user_stacks, 1, "num_user_stacks");
+
bpf_iter_task_stack__destroy(skel);
}
@@ -413,7 +413,7 @@ static int do_btf_read(struct bpf_iter_task_btf *skel)
goto free_link;
}
- if (CHECK(err < 0, "read", "read failed: %s\n", strerror(errno)))
+ if (!ASSERT_GE(err, 0, "read"))
goto free_link;
ASSERT_HAS_SUBSTR(taskbuf, "(struct task_struct)",
@@ -526,11 +526,11 @@ static int do_read_with_fd(int iter_fd, const char *expected,
start = 0;
while ((len = read(iter_fd, buf + start, read_buf_len)) > 0) {
start += len;
- if (CHECK(start >= 16, "read", "read len %d\n", len))
+ if (!ASSERT_LT(start, 16, "read"))
return -1;
read_buf_len = read_one_char ? 1 : 16 - start;
}
- if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno)))
+ if (!ASSERT_GE(len, 0, "read"))
return -1;
if (!ASSERT_STREQ(buf, expected, "read"))
@@ -571,8 +571,7 @@ static int do_read(const char *path, const char *expected)
int err, iter_fd;
iter_fd = open(path, O_RDONLY);
- if (CHECK(iter_fd < 0, "open", "open %s failed: %s\n",
- path, strerror(errno)))
+ if (!ASSERT_GE(iter_fd, 0, "open"))
return -1;
err = do_read_with_fd(iter_fd, expected, false);
@@ -600,7 +599,7 @@ static void test_file_iter(void)
unlink(path);
err = bpf_link__pin(link, path);
- if (CHECK(err, "pin_iter", "pin_iter to %s failed: %d\n", path, err))
+ if (!ASSERT_OK(err, "pin_iter"))
goto free_link;
err = do_read(path, "abcd");
@@ -651,12 +650,10 @@ static void test_overflow(bool test_e2big_overflow, bool ret1)
* overflow and needs restart.
*/
map1_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, 4, 8, 1, NULL);
- if (CHECK(map1_fd < 0, "bpf_map_create",
- "map_creation failed: %s\n", strerror(errno)))
+ if (!ASSERT_GE(map1_fd, 0, "bpf_map_create"))
goto out;
map2_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, 4, 8, 1, NULL);
- if (CHECK(map2_fd < 0, "bpf_map_create",
- "map_creation failed: %s\n", strerror(errno)))
+ if (!ASSERT_GE(map2_fd, 0, "bpf_map_create"))
goto free_map1;
/* bpf_seq_printf kernel buffer is 8 pages, so one map
@@ -685,14 +682,12 @@ static void test_overflow(bool test_e2big_overflow, bool ret1)
/* setup filtering map_id in bpf program */
map_info_len = sizeof(map_info);
err = bpf_map_get_info_by_fd(map1_fd, &map_info, &map_info_len);
- if (CHECK(err, "get_map_info", "get map info failed: %s\n",
- strerror(errno)))
+ if (!ASSERT_OK(err, "get_map_info"))
goto free_map2;
skel->bss->map1_id = map_info.id;
err = bpf_map_get_info_by_fd(map2_fd, &map_info, &map_info_len);
- if (CHECK(err, "get_map_info", "get map info failed: %s\n",
- strerror(errno)))
+ if (!ASSERT_OK(err, "get_map_info"))
goto free_map2;
skel->bss->map2_id = map_info.id;
@@ -705,7 +700,7 @@ static void test_overflow(bool test_e2big_overflow, bool ret1)
goto free_link;
buf = malloc(expected_read_len);
- if (!buf)
+ if (!ASSERT_OK_PTR(buf, "malloc"))
goto close_iter;
/* do read */
@@ -714,16 +709,14 @@ static void test_overflow(bool test_e2big_overflow, bool ret1)
while ((len = read(iter_fd, buf, expected_read_len)) > 0)
total_read_len += len;
- CHECK(len != -1 || errno != E2BIG, "read",
- "expected ret -1, errno E2BIG, but get ret %d, error %s\n",
- len, strerror(errno));
+ ASSERT_EQ(len, -1, "read");
+ ASSERT_EQ(errno, E2BIG, "read");
goto free_buf;
} else if (!ret1) {
while ((len = read(iter_fd, buf, expected_read_len)) > 0)
total_read_len += len;
- if (CHECK(len < 0, "read", "read failed: %s\n",
- strerror(errno)))
+ if (!ASSERT_GE(len, 0, "read"))
goto free_buf;
} else {
do {
@@ -732,8 +725,7 @@ static void test_overflow(bool test_e2big_overflow, bool ret1)
total_read_len += len;
} while (len > 0 || len == -EAGAIN);
- if (CHECK(len < 0, "read", "read failed: %s\n",
- strerror(errno)))
+ if (!ASSERT_GE(len, 0, "read"))
goto free_buf;
}
@@ -836,7 +828,7 @@ static void test_bpf_hash_map(void)
/* do some tests */
while ((len = read(iter_fd, buf, sizeof(buf))) > 0)
;
- if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno)))
+ if (!ASSERT_GE(len, 0, "read"))
goto close_iter;
/* test results */
@@ -878,6 +870,8 @@ static void test_bpf_percpu_hash_map(void)
skel->rodata->num_cpus = bpf_num_possible_cpus();
val = malloc(8 * bpf_num_possible_cpus());
+ if (!ASSERT_OK_PTR(val, "malloc"))
+ goto out;
err = bpf_iter_bpf_percpu_hash_map__load(skel);
if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_percpu_hash_map__load"))
@@ -917,7 +911,7 @@ static void test_bpf_percpu_hash_map(void)
/* do some tests */
while ((len = read(iter_fd, buf, sizeof(buf))) > 0)
;
- if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno)))
+ if (!ASSERT_GE(len, 0, "read"))
goto close_iter;
/* test results */
@@ -983,17 +977,14 @@ static void test_bpf_array_map(void)
start = 0;
while ((len = read(iter_fd, buf + start, sizeof(buf) - start)) > 0)
start += len;
- if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno)))
+ if (!ASSERT_GE(len, 0, "read"))
goto close_iter;
/* test results */
res_first_key = *(__u32 *)buf;
res_first_val = *(__u64 *)(buf + sizeof(__u32));
- if (CHECK(res_first_key != 0 || res_first_val != first_val,
- "bpf_seq_write",
- "seq_write failure: first key %u vs expected 0, "
- " first value %llu vs expected %llu\n",
- res_first_key, res_first_val, first_val))
+ if (!ASSERT_EQ(res_first_key, 0, "bpf_seq_write") ||
+ !ASSERT_EQ(res_first_val, first_val, "bpf_seq_write"))
goto close_iter;
if (!ASSERT_EQ(skel->bss->key_sum, expected_key, "key_sum"))
@@ -1057,6 +1048,8 @@ static void test_bpf_percpu_array_map(void)
skel->rodata->num_cpus = bpf_num_possible_cpus();
val = malloc(8 * bpf_num_possible_cpus());
+ if (!ASSERT_OK_PTR(val, "malloc"))
+ goto out;
err = bpf_iter_bpf_percpu_array_map__load(skel);
if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_percpu_array_map__load"))
@@ -1092,7 +1085,7 @@ static void test_bpf_percpu_array_map(void)
/* do some tests */
while ((len = read(iter_fd, buf, sizeof(buf))) > 0)
;
- if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno)))
+ if (!ASSERT_GE(len, 0, "read"))
goto close_iter;
/* test results */
@@ -1131,6 +1124,7 @@ static void test_bpf_sk_storage_delete(void)
sock_fd = socket(AF_INET6, SOCK_STREAM, 0);
if (!ASSERT_GE(sock_fd, 0, "socket"))
goto out;
+
err = bpf_map_update_elem(map_fd, &sock_fd, &val, BPF_NOEXIST);
if (!ASSERT_OK(err, "map_update"))
goto out;
@@ -1151,14 +1145,19 @@ static void test_bpf_sk_storage_delete(void)
/* do some tests */
while ((len = read(iter_fd, buf, sizeof(buf))) > 0)
;
- if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno)))
+ if (!ASSERT_GE(len, 0, "read"))
goto close_iter;
/* test results */
err = bpf_map_lookup_elem(map_fd, &sock_fd, &val);
- if (CHECK(!err || errno != ENOENT, "bpf_map_lookup_elem",
- "map value wasn't deleted (err=%d, errno=%d)\n", err, errno))
- goto close_iter;
+
+ /* Note: The following assertions serve to ensure
+ * the value was deleted. It does so by asserting
+ * that bpf_map_lookup_elem has failed. This might
+ * seem counterintuitive at first.
+ */
+ ASSERT_ERR(err, "bpf_map_lookup_elem");
+ ASSERT_EQ(errno, ENOENT, "bpf_map_lookup_elem");
close_iter:
close(iter_fd);
@@ -1203,17 +1202,15 @@ static void test_bpf_sk_storage_get(void)
do_dummy_read(skel->progs.fill_socket_owner);
err = bpf_map_lookup_elem(map_fd, &sock_fd, &val);
- if (CHECK(err || val != getpid(), "bpf_map_lookup_elem",
- "map value wasn't set correctly (expected %d, got %d, err=%d)\n",
- getpid(), val, err))
+ if (!ASSERT_OK(err, "bpf_map_lookup_elem") ||
+ !ASSERT_EQ(val, getpid(), "bpf_map_lookup_elem"))
goto close_socket;
do_dummy_read(skel->progs.negate_socket_local_storage);
err = bpf_map_lookup_elem(map_fd, &sock_fd, &val);
- CHECK(err || val != -getpid(), "bpf_map_lookup_elem",
- "map value wasn't set correctly (expected %d, got %d, err=%d)\n",
- -getpid(), val, err);
+ ASSERT_OK(err, "bpf_map_lookup_elem");
+ ASSERT_EQ(val, -getpid(), "bpf_map_lookup_elem");
close_socket:
close(sock_fd);
@@ -1290,7 +1287,7 @@ static void test_bpf_sk_storage_map(void)
/* do some tests */
while ((len = read(iter_fd, buf, sizeof(buf))) > 0)
;
- if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno)))
+ if (!ASSERT_GE(len, 0, "read"))
goto close_iter;
/* test results */
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c b/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c
index 675b90b..f09d6ac 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c
@@ -25,7 +25,7 @@ void serial_test_bpf_obj_id(void)
*/
__u32 map_ids[nr_iters + 1];
char jited_insns[128], xlated_insns[128], zeros[128], tp_name[128];
- __u32 i, next_id, info_len, nr_id_found, duration = 0;
+ __u32 i, next_id, info_len, nr_id_found;
struct timespec real_time_ts, boot_time_ts;
int err = 0;
__u64 array_value;
@@ -33,16 +33,16 @@ void serial_test_bpf_obj_id(void)
time_t now, load_time;
err = bpf_prog_get_fd_by_id(0);
- CHECK(err >= 0 || errno != ENOENT,
- "get-fd-by-notexist-prog-id", "err %d errno %d\n", err, errno);
+ ASSERT_LT(err, 0, "bpf_prog_get_fd_by_id");
+ ASSERT_EQ(errno, ENOENT, "bpf_prog_get_fd_by_id");
err = bpf_map_get_fd_by_id(0);
- CHECK(err >= 0 || errno != ENOENT,
- "get-fd-by-notexist-map-id", "err %d errno %d\n", err, errno);
+ ASSERT_LT(err, 0, "bpf_map_get_fd_by_id");
+ ASSERT_EQ(errno, ENOENT, "bpf_map_get_fd_by_id");
err = bpf_link_get_fd_by_id(0);
- CHECK(err >= 0 || errno != ENOENT,
- "get-fd-by-notexist-link-id", "err %d errno %d\n", err, errno);
+ ASSERT_LT(err, 0, "bpf_map_get_fd_by_id");
+ ASSERT_EQ(errno, ENOENT, "bpf_map_get_fd_by_id");
/* Check bpf_map_get_info_by_fd() */
bzero(zeros, sizeof(zeros));
@@ -53,25 +53,26 @@ void serial_test_bpf_obj_id(void)
/* test_obj_id.o is a dumb prog. It should never fail
* to load.
*/
- if (CHECK_FAIL(err))
+ if (!ASSERT_OK(err, "bpf_prog_test_load"))
continue;
/* Insert a magic value to the map */
map_fds[i] = bpf_find_map(__func__, objs[i], "test_map_id");
- if (CHECK_FAIL(map_fds[i] < 0))
- goto done;
- err = bpf_map_update_elem(map_fds[i], &array_key,
- &array_magic_value, 0);
- if (CHECK_FAIL(err))
+ if (!ASSERT_GE(map_fds[i], 0, "bpf_find_map"))
goto done;
- prog = bpf_object__find_program_by_name(objs[i],
- "test_obj_id");
- if (CHECK_FAIL(!prog))
+ err = bpf_map_update_elem(map_fds[i], &array_key,
+ &array_magic_value, 0);
+ if (!ASSERT_OK(err, "bpf_map_update_elem"))
goto done;
+
+ prog = bpf_object__find_program_by_name(objs[i], "test_obj_id");
+ if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
+ goto done;
+
links[i] = bpf_program__attach(prog);
err = libbpf_get_error(links[i]);
- if (CHECK(err, "prog_attach", "prog #%d, err %d\n", i, err)) {
+ if (!ASSERT_OK(err, "bpf_program__attach")) {
links[i] = NULL;
goto done;
}
@@ -81,24 +82,14 @@ void serial_test_bpf_obj_id(void)
bzero(&map_infos[i], info_len);
err = bpf_map_get_info_by_fd(map_fds[i], &map_infos[i],
&info_len);
- if (CHECK(err ||
- map_infos[i].type != BPF_MAP_TYPE_ARRAY ||
- map_infos[i].key_size != sizeof(__u32) ||
- map_infos[i].value_size != sizeof(__u64) ||
- map_infos[i].max_entries != 1 ||
- map_infos[i].map_flags != 0 ||
- info_len != sizeof(struct bpf_map_info) ||
- strcmp((char *)map_infos[i].name, expected_map_name),
- "get-map-info(fd)",
- "err %d errno %d type %d(%d) info_len %u(%zu) key_size %u value_size %u max_entries %u map_flags %X name %s(%s)\n",
- err, errno,
- map_infos[i].type, BPF_MAP_TYPE_ARRAY,
- info_len, sizeof(struct bpf_map_info),
- map_infos[i].key_size,
- map_infos[i].value_size,
- map_infos[i].max_entries,
- map_infos[i].map_flags,
- map_infos[i].name, expected_map_name))
+ if (!ASSERT_OK(err, "bpf_map_get_info_by_fd") ||
+ !ASSERT_EQ(map_infos[i].type, BPF_MAP_TYPE_ARRAY, "map_type") ||
+ !ASSERT_EQ(map_infos[i].key_size, sizeof(__u32), "key_size") ||
+ !ASSERT_EQ(map_infos[i].value_size, sizeof(__u64), "value_size") ||
+ !ASSERT_EQ(map_infos[i].max_entries, 1, "max_entries") ||
+ !ASSERT_EQ(map_infos[i].map_flags, 0, "map_flags") ||
+ !ASSERT_EQ(info_len, sizeof(struct bpf_map_info), "map_info_len") ||
+ !ASSERT_STREQ((char *)map_infos[i].name, expected_map_name, "map_name"))
goto done;
/* Check getting prog info */
@@ -112,48 +103,34 @@ void serial_test_bpf_obj_id(void)
prog_infos[i].xlated_prog_len = sizeof(xlated_insns);
prog_infos[i].map_ids = ptr_to_u64(map_ids + i);
prog_infos[i].nr_map_ids = 2;
+
err = clock_gettime(CLOCK_REALTIME, &real_time_ts);
- if (CHECK_FAIL(err))
+ if (!ASSERT_OK(err, "clock_gettime"))
goto done;
+
err = clock_gettime(CLOCK_BOOTTIME, &boot_time_ts);
- if (CHECK_FAIL(err))
+ if (!ASSERT_OK(err, "clock_gettime"))
goto done;
+
err = bpf_prog_get_info_by_fd(prog_fds[i], &prog_infos[i],
&info_len);
load_time = (real_time_ts.tv_sec - boot_time_ts.tv_sec)
+ (prog_infos[i].load_time / nsec_per_sec);
- if (CHECK(err ||
- prog_infos[i].type != BPF_PROG_TYPE_RAW_TRACEPOINT ||
- info_len != sizeof(struct bpf_prog_info) ||
- (env.jit_enabled && !prog_infos[i].jited_prog_len) ||
- (env.jit_enabled &&
- !memcmp(jited_insns, zeros, sizeof(zeros))) ||
- !prog_infos[i].xlated_prog_len ||
- !memcmp(xlated_insns, zeros, sizeof(zeros)) ||
- load_time < now - 60 || load_time > now + 60 ||
- prog_infos[i].created_by_uid != my_uid ||
- prog_infos[i].nr_map_ids != 1 ||
- *(int *)(long)prog_infos[i].map_ids != map_infos[i].id ||
- strcmp((char *)prog_infos[i].name, expected_prog_name),
- "get-prog-info(fd)",
- "err %d errno %d i %d type %d(%d) info_len %u(%zu) "
- "jit_enabled %d jited_prog_len %u xlated_prog_len %u "
- "jited_prog %d xlated_prog %d load_time %lu(%lu) "
- "uid %u(%u) nr_map_ids %u(%u) map_id %u(%u) "
- "name %s(%s)\n",
- err, errno, i,
- prog_infos[i].type, BPF_PROG_TYPE_SOCKET_FILTER,
- info_len, sizeof(struct bpf_prog_info),
- env.jit_enabled,
- prog_infos[i].jited_prog_len,
- prog_infos[i].xlated_prog_len,
- !!memcmp(jited_insns, zeros, sizeof(zeros)),
- !!memcmp(xlated_insns, zeros, sizeof(zeros)),
- load_time, now,
- prog_infos[i].created_by_uid, my_uid,
- prog_infos[i].nr_map_ids, 1,
- *(int *)(long)prog_infos[i].map_ids, map_infos[i].id,
- prog_infos[i].name, expected_prog_name))
+
+ if (!ASSERT_OK(err, "bpf_prog_get_info_by_fd") ||
+ !ASSERT_EQ(prog_infos[i].type, BPF_PROG_TYPE_RAW_TRACEPOINT, "prog_type") ||
+ !ASSERT_EQ(info_len, sizeof(struct bpf_prog_info), "prog_info_len") ||
+ !ASSERT_FALSE((env.jit_enabled && !prog_infos[i].jited_prog_len), "jited_prog_len") ||
+ !ASSERT_FALSE((env.jit_enabled && !memcmp(jited_insns, zeros, sizeof(zeros))),
+ "jited_insns") ||
+ !ASSERT_NEQ(prog_infos[i].xlated_prog_len, 0, "xlated_prog_len") ||
+ !ASSERT_NEQ(memcmp(xlated_insns, zeros, sizeof(zeros)), 0, "xlated_insns") ||
+ !ASSERT_GE(load_time, (now - 60), "load_time") ||
+ !ASSERT_LE(load_time, (now + 60), "load_time") ||
+ !ASSERT_EQ(prog_infos[i].created_by_uid, my_uid, "created_by_uid") ||
+ !ASSERT_EQ(prog_infos[i].nr_map_ids, 1, "nr_map_ids") ||
+ !ASSERT_EQ(*(int *)(long)prog_infos[i].map_ids, map_infos[i].id, "map_ids") ||
+ !ASSERT_STREQ((char *)prog_infos[i].name, expected_prog_name, "prog_name"))
goto done;
/* Check getting link info */
@@ -163,25 +140,12 @@ void serial_test_bpf_obj_id(void)
link_infos[i].raw_tracepoint.tp_name_len = sizeof(tp_name);
err = bpf_link_get_info_by_fd(bpf_link__fd(links[i]),
&link_infos[i], &info_len);
- if (CHECK(err ||
- link_infos[i].type != BPF_LINK_TYPE_RAW_TRACEPOINT ||
- link_infos[i].prog_id != prog_infos[i].id ||
- link_infos[i].raw_tracepoint.tp_name != ptr_to_u64(&tp_name) ||
- strcmp(u64_to_ptr(link_infos[i].raw_tracepoint.tp_name),
- "sys_enter") ||
- info_len != sizeof(struct bpf_link_info),
- "get-link-info(fd)",
- "err %d errno %d info_len %u(%zu) type %d(%d) id %d "
- "prog_id %d (%d) tp_name %s(%s)\n",
- err, errno,
- info_len, sizeof(struct bpf_link_info),
- link_infos[i].type, BPF_LINK_TYPE_RAW_TRACEPOINT,
- link_infos[i].id,
- link_infos[i].prog_id, prog_infos[i].id,
- (const char *)u64_to_ptr(link_infos[i].raw_tracepoint.tp_name),
- "sys_enter"))
+ if (!ASSERT_OK(err, "bpf_link_get_info_by_fd") ||
+ !ASSERT_EQ(link_infos[i].type, BPF_LINK_TYPE_RAW_TRACEPOINT, "link_type") ||
+ !ASSERT_EQ(link_infos[i].prog_id, prog_infos[i].id, "prog_id") ||
+ !ASSERT_EQ(link_infos[i].raw_tracepoint.tp_name, ptr_to_u64(&tp_name), "&tp_name") ||
+ !ASSERT_STREQ(u64_to_ptr(link_infos[i].raw_tracepoint.tp_name), "sys_enter", "tp_name"))
goto done;
-
}
/* Check bpf_prog_get_next_id() */
@@ -190,7 +154,7 @@ void serial_test_bpf_obj_id(void)
while (!bpf_prog_get_next_id(next_id, &next_id)) {
struct bpf_prog_info prog_info = {};
__u32 saved_map_id;
- int prog_fd;
+ int prog_fd, cmp_res;
info_len = sizeof(prog_info);
@@ -198,9 +162,7 @@ void serial_test_bpf_obj_id(void)
if (prog_fd < 0 && errno == ENOENT)
/* The bpf_prog is in the dead row */
continue;
- if (CHECK(prog_fd < 0, "get-prog-fd(next_id)",
- "prog_fd %d next_id %d errno %d\n",
- prog_fd, next_id, errno))
+ if (!ASSERT_GE(prog_fd, 0, "bpf_prog_get_fd_by_id"))
break;
for (i = 0; i < nr_iters; i++)
@@ -218,9 +180,8 @@ void serial_test_bpf_obj_id(void)
*/
prog_info.nr_map_ids = 1;
err = bpf_prog_get_info_by_fd(prog_fd, &prog_info, &info_len);
- if (CHECK(!err || errno != EFAULT,
- "get-prog-fd-bad-nr-map-ids", "err %d errno %d(%d)",
- err, errno, EFAULT))
+ if (!ASSERT_ERR(err, "bpf_prog_get_info_by_fd") ||
+ !ASSERT_EQ(errno, EFAULT, "bpf_prog_get_info_by_fd"))
break;
bzero(&prog_info, sizeof(prog_info));
info_len = sizeof(prog_info);
@@ -231,27 +192,22 @@ void serial_test_bpf_obj_id(void)
err = bpf_prog_get_info_by_fd(prog_fd, &prog_info, &info_len);
prog_infos[i].jited_prog_insns = 0;
prog_infos[i].xlated_prog_insns = 0;
- CHECK(err || info_len != sizeof(struct bpf_prog_info) ||
- memcmp(&prog_info, &prog_infos[i], info_len) ||
- *(int *)(long)prog_info.map_ids != saved_map_id,
- "get-prog-info(next_id->fd)",
- "err %d errno %d info_len %u(%zu) memcmp %d map_id %u(%u)\n",
- err, errno, info_len, sizeof(struct bpf_prog_info),
- memcmp(&prog_info, &prog_infos[i], info_len),
- *(int *)(long)prog_info.map_ids, saved_map_id);
+ cmp_res = memcmp(&prog_info, &prog_infos[i], info_len);
+
+ ASSERT_OK(err, "bpf_prog_get_info_by_fd");
+ ASSERT_EQ(info_len, sizeof(struct bpf_prog_info), "prog_info_len");
+ ASSERT_OK(cmp_res, "memcmp");
+ ASSERT_EQ(*(int *)(long)prog_info.map_ids, saved_map_id, "map_id");
close(prog_fd);
}
- CHECK(nr_id_found != nr_iters,
- "check total prog id found by get_next_id",
- "nr_id_found %u(%u)\n",
- nr_id_found, nr_iters);
+ ASSERT_EQ(nr_id_found, nr_iters, "prog_nr_id_found");
/* Check bpf_map_get_next_id() */
nr_id_found = 0;
next_id = 0;
while (!bpf_map_get_next_id(next_id, &next_id)) {
struct bpf_map_info map_info = {};
- int map_fd;
+ int map_fd, cmp_res;
info_len = sizeof(map_info);
@@ -259,9 +215,7 @@ void serial_test_bpf_obj_id(void)
if (map_fd < 0 && errno == ENOENT)
/* The bpf_map is in the dead row */
continue;
- if (CHECK(map_fd < 0, "get-map-fd(next_id)",
- "map_fd %d next_id %u errno %d\n",
- map_fd, next_id, errno))
+ if (!ASSERT_GE(map_fd, 0, "bpf_map_get_fd_by_id"))
break;
for (i = 0; i < nr_iters; i++)
@@ -274,25 +228,19 @@ void serial_test_bpf_obj_id(void)
nr_id_found++;
err = bpf_map_lookup_elem(map_fd, &array_key, &array_value);
- if (CHECK_FAIL(err))
+ if (!ASSERT_OK(err, "bpf_map_lookup_elem"))
goto done;
err = bpf_map_get_info_by_fd(map_fd, &map_info, &info_len);
- CHECK(err || info_len != sizeof(struct bpf_map_info) ||
- memcmp(&map_info, &map_infos[i], info_len) ||
- array_value != array_magic_value,
- "check get-map-info(next_id->fd)",
- "err %d errno %d info_len %u(%zu) memcmp %d array_value %llu(%llu)\n",
- err, errno, info_len, sizeof(struct bpf_map_info),
- memcmp(&map_info, &map_infos[i], info_len),
- array_value, array_magic_value);
+ cmp_res = memcmp(&map_info, &map_infos[i], info_len);
+ ASSERT_OK(err, "bpf_map_get_info_by_fd");
+ ASSERT_EQ(info_len, sizeof(struct bpf_map_info), "info_len");
+ ASSERT_OK(cmp_res, "memcmp");
+ ASSERT_EQ(array_value, array_magic_value, "array_value");
close(map_fd);
}
- CHECK(nr_id_found != nr_iters,
- "check total map id found by get_next_id",
- "nr_id_found %u(%u)\n",
- nr_id_found, nr_iters);
+ ASSERT_EQ(nr_id_found, nr_iters, "map_nr_id_found");
/* Check bpf_link_get_next_id() */
nr_id_found = 0;
@@ -308,9 +256,7 @@ void serial_test_bpf_obj_id(void)
if (link_fd < 0 && errno == ENOENT)
/* The bpf_link is in the dead row */
continue;
- if (CHECK(link_fd < 0, "get-link-fd(next_id)",
- "link_fd %d next_id %u errno %d\n",
- link_fd, next_id, errno))
+ if (!ASSERT_GE(link_fd, 0, "bpf_link_get_fd_by_id"))
break;
for (i = 0; i < nr_iters; i++)
@@ -325,17 +271,13 @@ void serial_test_bpf_obj_id(void)
err = bpf_link_get_info_by_fd(link_fd, &link_info, &info_len);
cmp_res = memcmp(&link_info, &link_infos[i],
offsetof(struct bpf_link_info, raw_tracepoint));
- CHECK(err || info_len != sizeof(link_info) || cmp_res,
- "check get-link-info(next_id->fd)",
- "err %d errno %d info_len %u(%zu) memcmp %d\n",
- err, errno, info_len, sizeof(struct bpf_link_info),
- cmp_res);
+ ASSERT_OK(err, "bpf_link_get_info_by_fd");
+ ASSERT_EQ(info_len, sizeof(link_info), "info_len");
+ ASSERT_OK(cmp_res, "memcmp");
close(link_fd);
}
- CHECK(nr_id_found != nr_iters,
- "check total link id found by get_next_id",
- "nr_id_found %u(%u)\n", nr_id_found, nr_iters);
+ ASSERT_EQ(nr_id_found, nr_iters, "link_nr_id_found");
done:
for (i = 0; i < nr_iters; i++) {
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
index 4aabeaa..a88e6e0 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
@@ -20,15 +20,14 @@
static const unsigned int total_bytes = 10 * 1024 * 1024;
static int expected_stg = 0xeB9F;
-static int stop, duration;
+static int stop;
static int settcpca(int fd, const char *tcp_ca)
{
int err;
err = setsockopt(fd, IPPROTO_TCP, TCP_CONGESTION, tcp_ca, strlen(tcp_ca));
- if (CHECK(err == -1, "setsockopt(fd, TCP_CONGESTION)", "errno:%d\n",
- errno))
+ if (!ASSERT_NEQ(err, -1, "setsockopt"))
return -1;
return 0;
@@ -65,8 +64,7 @@ static void *server(void *arg)
bytes += nr_sent;
}
- CHECK(bytes != total_bytes, "send", "%zd != %u nr_sent:%zd errno:%d\n",
- bytes, total_bytes, nr_sent, errno);
+ ASSERT_EQ(bytes, total_bytes, "send");
done:
if (fd >= 0)
@@ -92,10 +90,11 @@ static void do_test(const char *tcp_ca, const struct bpf_map *sk_stg_map)
WRITE_ONCE(stop, 0);
lfd = socket(AF_INET6, SOCK_STREAM, 0);
- if (CHECK(lfd == -1, "socket", "errno:%d\n", errno))
+ if (!ASSERT_NEQ(lfd, -1, "socket"))
return;
+
fd = socket(AF_INET6, SOCK_STREAM, 0);
- if (CHECK(fd == -1, "socket", "errno:%d\n", errno)) {
+ if (!ASSERT_NEQ(fd, -1, "socket")) {
close(lfd);
return;
}
@@ -108,26 +107,27 @@ static void do_test(const char *tcp_ca, const struct bpf_map *sk_stg_map)
sa6.sin6_family = AF_INET6;
sa6.sin6_addr = in6addr_loopback;
err = bind(lfd, (struct sockaddr *)&sa6, addrlen);
- if (CHECK(err == -1, "bind", "errno:%d\n", errno))
+ if (!ASSERT_NEQ(err, -1, "bind"))
goto done;
+
err = getsockname(lfd, (struct sockaddr *)&sa6, &addrlen);
- if (CHECK(err == -1, "getsockname", "errno:%d\n", errno))
+ if (!ASSERT_NEQ(err, -1, "getsockname"))
goto done;
+
err = listen(lfd, 1);
- if (CHECK(err == -1, "listen", "errno:%d\n", errno))
+ if (!ASSERT_NEQ(err, -1, "listen"))
goto done;
if (sk_stg_map) {
err = bpf_map_update_elem(bpf_map__fd(sk_stg_map), &fd,
&expected_stg, BPF_NOEXIST);
- if (CHECK(err, "bpf_map_update_elem(sk_stg_map)",
- "err:%d errno:%d\n", err, errno))
+ if (!ASSERT_OK(err, "bpf_map_update_elem(sk_stg_map)"))
goto done;
}
/* connect to server */
err = connect(fd, (struct sockaddr *)&sa6, addrlen);
- if (CHECK(err == -1, "connect", "errno:%d\n", errno))
+ if (!ASSERT_NEQ(err, -1, "connect"))
goto done;
if (sk_stg_map) {
@@ -135,14 +135,13 @@ static void do_test(const char *tcp_ca, const struct bpf_map *sk_stg_map)
err = bpf_map_lookup_elem(bpf_map__fd(sk_stg_map), &fd,
&tmp_stg);
- if (CHECK(!err || errno != ENOENT,
- "bpf_map_lookup_elem(sk_stg_map)",
- "err:%d errno:%d\n", err, errno))
+ if (!ASSERT_ERR(err, "bpf_map_lookup_elem(sk_stg_map)") ||
+ !ASSERT_EQ(errno, ENOENT, "bpf_map_lookup_elem(sk_stg_map)"))
goto done;
}
err = pthread_create(&srv_thread, NULL, server, (void *)(long)lfd);
- if (CHECK(err != 0, "pthread_create", "err:%d errno:%d\n", err, errno))
+ if (!ASSERT_OK(err, "pthread_create"))
goto done;
/* recv total_bytes */
@@ -156,13 +155,12 @@ static void do_test(const char *tcp_ca, const struct bpf_map *sk_stg_map)
bytes += nr_recv;
}
- CHECK(bytes != total_bytes, "recv", "%zd != %u nr_recv:%zd errno:%d\n",
- bytes, total_bytes, nr_recv, errno);
+ ASSERT_EQ(bytes, total_bytes, "recv");
WRITE_ONCE(stop, 1);
pthread_join(srv_thread, &thread_ret);
- CHECK(IS_ERR(thread_ret), "pthread_join", "thread_ret:%ld",
- PTR_ERR(thread_ret));
+ ASSERT_OK(IS_ERR(thread_ret), "thread_ret");
+
done:
close(lfd);
close(fd);
@@ -174,7 +172,7 @@ static void test_cubic(void)
struct bpf_link *link;
cubic_skel = bpf_cubic__open_and_load();
- if (CHECK(!cubic_skel, "bpf_cubic__open_and_load", "failed\n"))
+ if (!ASSERT_OK_PTR(cubic_skel, "bpf_cubic__open_and_load"))
return;
link = bpf_map__attach_struct_ops(cubic_skel->maps.cubic);
@@ -197,7 +195,7 @@ static void test_dctcp(void)
struct bpf_link *link;
dctcp_skel = bpf_dctcp__open_and_load();
- if (CHECK(!dctcp_skel, "bpf_dctcp__open_and_load", "failed\n"))
+ if (!ASSERT_OK_PTR(dctcp_skel, "bpf_dctcp__open_and_load"))
return;
link = bpf_map__attach_struct_ops(dctcp_skel->maps.dctcp);
@@ -207,9 +205,7 @@ static void test_dctcp(void)
}
do_test("bpf_dctcp", dctcp_skel->maps.sk_stg_map);
- CHECK(dctcp_skel->bss->stg_result != expected_stg,
- "Unexpected stg_result", "stg_result (%x) != expected_stg (%x)\n",
- dctcp_skel->bss->stg_result, expected_stg);
+ ASSERT_EQ(dctcp_skel->bss->stg_result, expected_stg, "stg_result");
bpf_link__destroy(link);
bpf_dctcp__destroy(dctcp_skel);
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
index 731c343..e770912 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
@@ -35,7 +35,7 @@ static int check_load(const char *file, enum bpf_prog_type type)
}
bpf_program__set_type(prog, type);
- bpf_program__set_flags(prog, BPF_F_TEST_RND_HI32);
+ bpf_program__set_flags(prog, BPF_F_TEST_RND_HI32 | BPF_F_TEST_REG_INVARIANTS);
bpf_program__set_log_level(prog, 4 | extra_prog_load_log_flags);
err = bpf_object__load(obj);
diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c
index 92d51f3..8fb4a04 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf.c
@@ -5265,6 +5265,7 @@ static size_t get_pprint_mapv_size(enum pprint_mapv_kind_t mapv_kind)
#endif
assert(0);
+ return 0;
}
static void set_pprint_mapv(enum pprint_mapv_kind_t mapv_kind,
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup1_hierarchy.c b/tools/testing/selftests/bpf/prog_tests/cgroup1_hierarchy.c
new file mode 100644
index 0000000..74d6d75
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup1_hierarchy.c
@@ -0,0 +1,158 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023 Yafang Shao <laoar.shao@gmail.com> */
+
+#include <sys/types.h>
+#include <unistd.h>
+#include <test_progs.h>
+#include "cgroup_helpers.h"
+#include "test_cgroup1_hierarchy.skel.h"
+
+static void bpf_cgroup1(struct test_cgroup1_hierarchy *skel)
+{
+ struct bpf_link *lsm_link, *fentry_link;
+ int err;
+
+ /* Attach LSM prog first */
+ lsm_link = bpf_program__attach_lsm(skel->progs.lsm_run);
+ if (!ASSERT_OK_PTR(lsm_link, "lsm_attach"))
+ return;
+
+ /* LSM prog will be triggered when attaching fentry */
+ fentry_link = bpf_program__attach_trace(skel->progs.fentry_run);
+ ASSERT_NULL(fentry_link, "fentry_attach_fail");
+
+ err = bpf_link__destroy(lsm_link);
+ ASSERT_OK(err, "destroy_lsm");
+}
+
+static void bpf_cgroup1_sleepable(struct test_cgroup1_hierarchy *skel)
+{
+ struct bpf_link *lsm_link, *fentry_link;
+ int err;
+
+ /* Attach LSM prog first */
+ lsm_link = bpf_program__attach_lsm(skel->progs.lsm_s_run);
+ if (!ASSERT_OK_PTR(lsm_link, "lsm_attach"))
+ return;
+
+ /* LSM prog will be triggered when attaching fentry */
+ fentry_link = bpf_program__attach_trace(skel->progs.fentry_run);
+ ASSERT_NULL(fentry_link, "fentry_attach_fail");
+
+ err = bpf_link__destroy(lsm_link);
+ ASSERT_OK(err, "destroy_lsm");
+}
+
+static void bpf_cgroup1_invalid_id(struct test_cgroup1_hierarchy *skel)
+{
+ struct bpf_link *lsm_link, *fentry_link;
+ int err;
+
+ /* Attach LSM prog first */
+ lsm_link = bpf_program__attach_lsm(skel->progs.lsm_run);
+ if (!ASSERT_OK_PTR(lsm_link, "lsm_attach"))
+ return;
+
+ /* LSM prog will be triggered when attaching fentry */
+ fentry_link = bpf_program__attach_trace(skel->progs.fentry_run);
+ if (!ASSERT_OK_PTR(fentry_link, "fentry_attach_success"))
+ goto cleanup;
+
+ err = bpf_link__destroy(fentry_link);
+ ASSERT_OK(err, "destroy_lsm");
+
+cleanup:
+ err = bpf_link__destroy(lsm_link);
+ ASSERT_OK(err, "destroy_fentry");
+}
+
+void test_cgroup1_hierarchy(void)
+{
+ struct test_cgroup1_hierarchy *skel;
+ __u64 current_cgid;
+ int hid, err;
+
+ skel = test_cgroup1_hierarchy__open();
+ if (!ASSERT_OK_PTR(skel, "open"))
+ return;
+
+ skel->bss->target_pid = getpid();
+
+ err = bpf_program__set_attach_target(skel->progs.fentry_run, 0, "bpf_fentry_test1");
+ if (!ASSERT_OK(err, "fentry_set_target"))
+ goto destroy;
+
+ err = test_cgroup1_hierarchy__load(skel);
+ if (!ASSERT_OK(err, "load"))
+ goto destroy;
+
+ /* Setup cgroup1 hierarchy */
+ err = setup_classid_environment();
+ if (!ASSERT_OK(err, "setup_classid_environment"))
+ goto destroy;
+
+ err = join_classid();
+ if (!ASSERT_OK(err, "join_cgroup1"))
+ goto cleanup;
+
+ current_cgid = get_classid_cgroup_id();
+ if (!ASSERT_GE(current_cgid, 0, "cgroup1 id"))
+ goto cleanup;
+
+ hid = get_cgroup1_hierarchy_id("net_cls");
+ if (!ASSERT_GE(hid, 0, "cgroup1 id"))
+ goto cleanup;
+ skel->bss->target_hid = hid;
+
+ if (test__start_subtest("test_cgroup1_hierarchy")) {
+ skel->bss->target_ancestor_cgid = current_cgid;
+ bpf_cgroup1(skel);
+ }
+
+ if (test__start_subtest("test_root_cgid")) {
+ skel->bss->target_ancestor_cgid = 1;
+ skel->bss->target_ancestor_level = 0;
+ bpf_cgroup1(skel);
+ }
+
+ if (test__start_subtest("test_invalid_level")) {
+ skel->bss->target_ancestor_cgid = 1;
+ skel->bss->target_ancestor_level = 1;
+ bpf_cgroup1_invalid_id(skel);
+ }
+
+ if (test__start_subtest("test_invalid_cgid")) {
+ skel->bss->target_ancestor_cgid = 0;
+ bpf_cgroup1_invalid_id(skel);
+ }
+
+ if (test__start_subtest("test_invalid_hid")) {
+ skel->bss->target_ancestor_cgid = 1;
+ skel->bss->target_ancestor_level = 0;
+ skel->bss->target_hid = -1;
+ bpf_cgroup1_invalid_id(skel);
+ }
+
+ if (test__start_subtest("test_invalid_cgrp_name")) {
+ skel->bss->target_hid = get_cgroup1_hierarchy_id("net_cl");
+ skel->bss->target_ancestor_cgid = current_cgid;
+ bpf_cgroup1_invalid_id(skel);
+ }
+
+ if (test__start_subtest("test_invalid_cgrp_name2")) {
+ skel->bss->target_hid = get_cgroup1_hierarchy_id("net_cls,");
+ skel->bss->target_ancestor_cgid = current_cgid;
+ bpf_cgroup1_invalid_id(skel);
+ }
+
+ if (test__start_subtest("test_sleepable_prog")) {
+ skel->bss->target_hid = hid;
+ skel->bss->target_ancestor_cgid = current_cgid;
+ bpf_cgroup1_sleepable(skel);
+ }
+
+cleanup:
+ cleanup_classid_environment();
+destroy:
+ test_cgroup1_hierarchy__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c b/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c
index 9026b42..addf720 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c
@@ -71,7 +71,7 @@ void test_cgroup_v1v2(void)
}
ASSERT_OK(run_test(cgroup_fd, server_fd, false), "cgroup-v2-only");
setup_classid_environment();
- set_classid(42);
+ set_classid();
ASSERT_OK(run_test(cgroup_fd, server_fd, true), "cgroup-v1v2");
cleanup_classid_environment();
close(server_fd);
diff --git a/tools/testing/selftests/bpf/prog_tests/local_kptr_stash.c b/tools/testing/selftests/bpf/prog_tests/local_kptr_stash.c
index b25b870..e6e50a3 100644
--- a/tools/testing/selftests/bpf/prog_tests/local_kptr_stash.c
+++ b/tools/testing/selftests/bpf/prog_tests/local_kptr_stash.c
@@ -73,6 +73,37 @@ static void test_local_kptr_stash_unstash(void)
local_kptr_stash__destroy(skel);
}
+static void test_refcount_acquire_without_unstash(void)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
+ struct local_kptr_stash *skel;
+ int ret;
+
+ skel = local_kptr_stash__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "local_kptr_stash__open_and_load"))
+ return;
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.refcount_acquire_without_unstash),
+ &opts);
+ ASSERT_OK(ret, "refcount_acquire_without_unstash run");
+ ASSERT_EQ(opts.retval, 2, "refcount_acquire_without_unstash retval");
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.stash_refcounted_node), &opts);
+ ASSERT_OK(ret, "stash_refcounted_node run");
+ ASSERT_OK(opts.retval, "stash_refcounted_node retval");
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.refcount_acquire_without_unstash),
+ &opts);
+ ASSERT_OK(ret, "refcount_acquire_without_unstash (2) run");
+ ASSERT_EQ(opts.retval, 42, "refcount_acquire_without_unstash (2) retval");
+
+ local_kptr_stash__destroy(skel);
+}
+
static void test_local_kptr_stash_fail(void)
{
RUN_TESTS(local_kptr_stash_fail);
@@ -86,6 +117,8 @@ void test_local_kptr_stash(void)
test_local_kptr_stash_plain();
if (test__start_subtest("local_kptr_stash_unstash"))
test_local_kptr_stash_unstash();
+ if (test__start_subtest("refcount_acquire_without_unstash"))
+ test_refcount_acquire_without_unstash();
if (test__start_subtest("local_kptr_stash_fail"))
test_local_kptr_stash_fail();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/log_buf.c b/tools/testing/selftests/bpf/prog_tests/log_buf.c
index fe9a23e..0f7ea4d 100644
--- a/tools/testing/selftests/bpf/prog_tests/log_buf.c
+++ b/tools/testing/selftests/bpf/prog_tests/log_buf.c
@@ -78,7 +78,7 @@ static void obj_load_log_buf(void)
ASSERT_OK_PTR(strstr(libbpf_log_buf, "prog 'bad_prog': BPF program load failed"),
"libbpf_log_not_empty");
ASSERT_OK_PTR(strstr(obj_log_buf, "DATASEC license"), "obj_log_not_empty");
- ASSERT_OK_PTR(strstr(good_log_buf, "0: R1=ctx(off=0,imm=0) R10=fp0"),
+ ASSERT_OK_PTR(strstr(good_log_buf, "0: R1=ctx() R10=fp0"),
"good_log_verbose");
ASSERT_OK_PTR(strstr(bad_log_buf, "invalid access to map value, value_size=16 off=16000 size=4"),
"bad_log_not_empty");
@@ -175,7 +175,7 @@ static void bpf_prog_load_log_buf(void)
opts.log_level = 2;
fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, "good_prog", "GPL",
good_prog_insns, good_prog_insn_cnt, &opts);
- ASSERT_OK_PTR(strstr(log_buf, "0: R1=ctx(off=0,imm=0) R10=fp0"), "good_log_2");
+ ASSERT_OK_PTR(strstr(log_buf, "0: R1=ctx() R10=fp0"), "good_log_2");
ASSERT_GE(fd, 0, "good_fd2");
if (fd >= 0)
close(fd);
diff --git a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c
new file mode 100644
index 0000000..0c9abd2
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c
@@ -0,0 +1,2124 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#define _GNU_SOURCE
+#include <limits.h>
+#include <test_progs.h>
+#include <linux/filter.h>
+#include <linux/bpf.h>
+
+/* =================================
+ * SHORT AND CONSISTENT NUMBER TYPES
+ * =================================
+ */
+#define U64_MAX ((u64)UINT64_MAX)
+#define U32_MAX ((u32)UINT_MAX)
+#define U16_MAX ((u32)UINT_MAX)
+#define S64_MIN ((s64)INT64_MIN)
+#define S64_MAX ((s64)INT64_MAX)
+#define S32_MIN ((s32)INT_MIN)
+#define S32_MAX ((s32)INT_MAX)
+#define S16_MIN ((s16)0x80000000)
+#define S16_MAX ((s16)0x7fffffff)
+
+typedef unsigned long long ___u64;
+typedef unsigned int ___u32;
+typedef long long ___s64;
+typedef int ___s32;
+
+/* avoid conflicts with already defined types in kernel headers */
+#define u64 ___u64
+#define u32 ___u32
+#define s64 ___s64
+#define s32 ___s32
+
+/* ==================================
+ * STRING BUF ABSTRACTION AND HELPERS
+ * ==================================
+ */
+struct strbuf {
+ size_t buf_sz;
+ int pos;
+ char buf[0];
+};
+
+#define DEFINE_STRBUF(name, N) \
+ struct { struct strbuf buf; char data[(N)]; } ___##name; \
+ struct strbuf *name = (___##name.buf.buf_sz = (N), ___##name.buf.pos = 0, &___##name.buf)
+
+__printf(2, 3)
+static inline void snappendf(struct strbuf *s, const char *fmt, ...)
+{
+ va_list args;
+
+ va_start(args, fmt);
+ s->pos += vsnprintf(s->buf + s->pos,
+ s->pos < s->buf_sz ? s->buf_sz - s->pos : 0,
+ fmt, args);
+ va_end(args);
+}
+
+/* ==================================
+ * GENERIC NUMBER TYPE AND OPERATIONS
+ * ==================================
+ */
+enum num_t { U64, first_t = U64, U32, S64, S32, last_t = S32 };
+
+static __always_inline u64 min_t(enum num_t t, u64 x, u64 y)
+{
+ switch (t) {
+ case U64: return (u64)x < (u64)y ? (u64)x : (u64)y;
+ case U32: return (u32)x < (u32)y ? (u32)x : (u32)y;
+ case S64: return (s64)x < (s64)y ? (s64)x : (s64)y;
+ case S32: return (s32)x < (s32)y ? (s32)x : (s32)y;
+ default: printf("min_t!\n"); exit(1);
+ }
+}
+
+static __always_inline u64 max_t(enum num_t t, u64 x, u64 y)
+{
+ switch (t) {
+ case U64: return (u64)x > (u64)y ? (u64)x : (u64)y;
+ case U32: return (u32)x > (u32)y ? (u32)x : (u32)y;
+ case S64: return (s64)x > (s64)y ? (s64)x : (s64)y;
+ case S32: return (s32)x > (s32)y ? (u32)(s32)x : (u32)(s32)y;
+ default: printf("max_t!\n"); exit(1);
+ }
+}
+
+static __always_inline u64 cast_t(enum num_t t, u64 x)
+{
+ switch (t) {
+ case U64: return (u64)x;
+ case U32: return (u32)x;
+ case S64: return (s64)x;
+ case S32: return (u32)(s32)x;
+ default: printf("cast_t!\n"); exit(1);
+ }
+}
+
+static const char *t_str(enum num_t t)
+{
+ switch (t) {
+ case U64: return "u64";
+ case U32: return "u32";
+ case S64: return "s64";
+ case S32: return "s32";
+ default: printf("t_str!\n"); exit(1);
+ }
+}
+
+static enum num_t t_is_32(enum num_t t)
+{
+ switch (t) {
+ case U64: return false;
+ case U32: return true;
+ case S64: return false;
+ case S32: return true;
+ default: printf("t_is_32!\n"); exit(1);
+ }
+}
+
+static enum num_t t_signed(enum num_t t)
+{
+ switch (t) {
+ case U64: return S64;
+ case U32: return S32;
+ case S64: return S64;
+ case S32: return S32;
+ default: printf("t_signed!\n"); exit(1);
+ }
+}
+
+static enum num_t t_unsigned(enum num_t t)
+{
+ switch (t) {
+ case U64: return U64;
+ case U32: return U32;
+ case S64: return U64;
+ case S32: return U32;
+ default: printf("t_unsigned!\n"); exit(1);
+ }
+}
+
+#define UNUM_MAX_DECIMAL U16_MAX
+#define SNUM_MAX_DECIMAL S16_MAX
+#define SNUM_MIN_DECIMAL S16_MIN
+
+static bool num_is_small(enum num_t t, u64 x)
+{
+ switch (t) {
+ case U64: return (u64)x <= UNUM_MAX_DECIMAL;
+ case U32: return (u32)x <= UNUM_MAX_DECIMAL;
+ case S64: return (s64)x >= SNUM_MIN_DECIMAL && (s64)x <= SNUM_MAX_DECIMAL;
+ case S32: return (s32)x >= SNUM_MIN_DECIMAL && (s32)x <= SNUM_MAX_DECIMAL;
+ default: printf("num_is_small!\n"); exit(1);
+ }
+}
+
+static void snprintf_num(enum num_t t, struct strbuf *sb, u64 x)
+{
+ bool is_small = num_is_small(t, x);
+
+ if (is_small) {
+ switch (t) {
+ case U64: return snappendf(sb, "%llu", (u64)x);
+ case U32: return snappendf(sb, "%u", (u32)x);
+ case S64: return snappendf(sb, "%lld", (s64)x);
+ case S32: return snappendf(sb, "%d", (s32)x);
+ default: printf("snprintf_num!\n"); exit(1);
+ }
+ } else {
+ switch (t) {
+ case U64:
+ if (x == U64_MAX)
+ return snappendf(sb, "U64_MAX");
+ else if (x >= U64_MAX - 256)
+ return snappendf(sb, "U64_MAX-%llu", U64_MAX - x);
+ else
+ return snappendf(sb, "%#llx", (u64)x);
+ case U32:
+ if ((u32)x == U32_MAX)
+ return snappendf(sb, "U32_MAX");
+ else if ((u32)x >= U32_MAX - 256)
+ return snappendf(sb, "U32_MAX-%u", U32_MAX - (u32)x);
+ else
+ return snappendf(sb, "%#x", (u32)x);
+ case S64:
+ if ((s64)x == S64_MAX)
+ return snappendf(sb, "S64_MAX");
+ else if ((s64)x >= S64_MAX - 256)
+ return snappendf(sb, "S64_MAX-%lld", S64_MAX - (s64)x);
+ else if ((s64)x == S64_MIN)
+ return snappendf(sb, "S64_MIN");
+ else if ((s64)x <= S64_MIN + 256)
+ return snappendf(sb, "S64_MIN+%lld", (s64)x - S64_MIN);
+ else
+ return snappendf(sb, "%#llx", (s64)x);
+ case S32:
+ if ((s32)x == S32_MAX)
+ return snappendf(sb, "S32_MAX");
+ else if ((s32)x >= S32_MAX - 256)
+ return snappendf(sb, "S32_MAX-%d", S32_MAX - (s32)x);
+ else if ((s32)x == S32_MIN)
+ return snappendf(sb, "S32_MIN");
+ else if ((s32)x <= S32_MIN + 256)
+ return snappendf(sb, "S32_MIN+%d", (s32)x - S32_MIN);
+ else
+ return snappendf(sb, "%#x", (s32)x);
+ default: printf("snprintf_num!\n"); exit(1);
+ }
+ }
+}
+
+/* ===================================
+ * GENERIC RANGE STRUCT AND OPERATIONS
+ * ===================================
+ */
+struct range {
+ u64 a, b;
+};
+
+static void snprintf_range(enum num_t t, struct strbuf *sb, struct range x)
+{
+ if (x.a == x.b)
+ return snprintf_num(t, sb, x.a);
+
+ snappendf(sb, "[");
+ snprintf_num(t, sb, x.a);
+ snappendf(sb, "; ");
+ snprintf_num(t, sb, x.b);
+ snappendf(sb, "]");
+}
+
+static void print_range(enum num_t t, struct range x, const char *sfx)
+{
+ DEFINE_STRBUF(sb, 128);
+
+ snprintf_range(t, sb, x);
+ printf("%s%s", sb->buf, sfx);
+}
+
+static const struct range unkn[] = {
+ [U64] = { 0, U64_MAX },
+ [U32] = { 0, U32_MAX },
+ [S64] = { (u64)S64_MIN, (u64)S64_MAX },
+ [S32] = { (u64)(u32)S32_MIN, (u64)(u32)S32_MAX },
+};
+
+static struct range unkn_subreg(enum num_t t)
+{
+ switch (t) {
+ case U64: return unkn[U32];
+ case U32: return unkn[U32];
+ case S64: return unkn[U32];
+ case S32: return unkn[S32];
+ default: printf("unkn_subreg!\n"); exit(1);
+ }
+}
+
+static struct range range(enum num_t t, u64 a, u64 b)
+{
+ switch (t) {
+ case U64: return (struct range){ (u64)a, (u64)b };
+ case U32: return (struct range){ (u32)a, (u32)b };
+ case S64: return (struct range){ (s64)a, (s64)b };
+ case S32: return (struct range){ (u32)(s32)a, (u32)(s32)b };
+ default: printf("range!\n"); exit(1);
+ }
+}
+
+static __always_inline u32 sign64(u64 x) { return (x >> 63) & 1; }
+static __always_inline u32 sign32(u64 x) { return ((u32)x >> 31) & 1; }
+static __always_inline u32 upper32(u64 x) { return (u32)(x >> 32); }
+static __always_inline u64 swap_low32(u64 x, u32 y) { return (x & 0xffffffff00000000ULL) | y; }
+
+static bool range_eq(struct range x, struct range y)
+{
+ return x.a == y.a && x.b == y.b;
+}
+
+static struct range range_cast_to_s32(struct range x)
+{
+ u64 a = x.a, b = x.b;
+
+ /* if upper 32 bits are constant, lower 32 bits should form a proper
+ * s32 range to be correct
+ */
+ if (upper32(a) == upper32(b) && (s32)a <= (s32)b)
+ return range(S32, a, b);
+
+ /* Special case where upper bits form a small sequence of two
+ * sequential numbers (in 32-bit unsigned space, so 0xffffffff to
+ * 0x00000000 is also valid), while lower bits form a proper s32 range
+ * going from negative numbers to positive numbers.
+ *
+ * E.g.: [0xfffffff0ffffff00; 0xfffffff100000010]. Iterating
+ * over full 64-bit numbers range will form a proper [-16, 16]
+ * ([0xffffff00; 0x00000010]) range in its lower 32 bits.
+ */
+ if (upper32(a) + 1 == upper32(b) && (s32)a < 0 && (s32)b >= 0)
+ return range(S32, a, b);
+
+ /* otherwise we can't derive much meaningful information */
+ return unkn[S32];
+}
+
+static struct range range_cast_u64(enum num_t to_t, struct range x)
+{
+ u64 a = (u64)x.a, b = (u64)x.b;
+
+ switch (to_t) {
+ case U64:
+ return x;
+ case U32:
+ if (upper32(a) != upper32(b))
+ return unkn[U32];
+ return range(U32, a, b);
+ case S64:
+ if (sign64(a) != sign64(b))
+ return unkn[S64];
+ return range(S64, a, b);
+ case S32:
+ return range_cast_to_s32(x);
+ default: printf("range_cast_u64!\n"); exit(1);
+ }
+}
+
+static struct range range_cast_s64(enum num_t to_t, struct range x)
+{
+ s64 a = (s64)x.a, b = (s64)x.b;
+
+ switch (to_t) {
+ case U64:
+ /* equivalent to (s64)a <= (s64)b check */
+ if (sign64(a) != sign64(b))
+ return unkn[U64];
+ return range(U64, a, b);
+ case U32:
+ if (upper32(a) != upper32(b) || sign32(a) != sign32(b))
+ return unkn[U32];
+ return range(U32, a, b);
+ case S64:
+ return x;
+ case S32:
+ return range_cast_to_s32(x);
+ default: printf("range_cast_s64!\n"); exit(1);
+ }
+}
+
+static struct range range_cast_u32(enum num_t to_t, struct range x)
+{
+ u32 a = (u32)x.a, b = (u32)x.b;
+
+ switch (to_t) {
+ case U64:
+ case S64:
+ /* u32 is always a valid zero-extended u64/s64 */
+ return range(to_t, a, b);
+ case U32:
+ return x;
+ case S32:
+ return range_cast_to_s32(range(U32, a, b));
+ default: printf("range_cast_u32!\n"); exit(1);
+ }
+}
+
+static struct range range_cast_s32(enum num_t to_t, struct range x)
+{
+ s32 a = (s32)x.a, b = (s32)x.b;
+
+ switch (to_t) {
+ case U64:
+ case U32:
+ case S64:
+ if (sign32(a) != sign32(b))
+ return unkn[to_t];
+ return range(to_t, a, b);
+ case S32:
+ return x;
+ default: printf("range_cast_s32!\n"); exit(1);
+ }
+}
+
+/* Reinterpret range in *from_t* domain as a range in *to_t* domain preserving
+ * all possible information. Worst case, it will be unknown range within
+ * *to_t* domain, if nothing more specific can be guaranteed during the
+ * conversion
+ */
+static struct range range_cast(enum num_t from_t, enum num_t to_t, struct range from)
+{
+ switch (from_t) {
+ case U64: return range_cast_u64(to_t, from);
+ case U32: return range_cast_u32(to_t, from);
+ case S64: return range_cast_s64(to_t, from);
+ case S32: return range_cast_s32(to_t, from);
+ default: printf("range_cast!\n"); exit(1);
+ }
+}
+
+static bool is_valid_num(enum num_t t, u64 x)
+{
+ switch (t) {
+ case U64: return true;
+ case U32: return upper32(x) == 0;
+ case S64: return true;
+ case S32: return upper32(x) == 0;
+ default: printf("is_valid_num!\n"); exit(1);
+ }
+}
+
+static bool is_valid_range(enum num_t t, struct range x)
+{
+ if (!is_valid_num(t, x.a) || !is_valid_num(t, x.b))
+ return false;
+
+ switch (t) {
+ case U64: return (u64)x.a <= (u64)x.b;
+ case U32: return (u32)x.a <= (u32)x.b;
+ case S64: return (s64)x.a <= (s64)x.b;
+ case S32: return (s32)x.a <= (s32)x.b;
+ default: printf("is_valid_range!\n"); exit(1);
+ }
+}
+
+static struct range range_improve(enum num_t t, struct range old, struct range new)
+{
+ return range(t, max_t(t, old.a, new.a), min_t(t, old.b, new.b));
+}
+
+static struct range range_refine(enum num_t x_t, struct range x, enum num_t y_t, struct range y)
+{
+ struct range y_cast;
+
+ y_cast = range_cast(y_t, x_t, y);
+
+ /* the case when new range knowledge, *y*, is a 32-bit subregister
+ * range, while previous range knowledge, *x*, is a full register
+ * 64-bit range, needs special treatment to take into account upper 32
+ * bits of full register range
+ */
+ if (t_is_32(y_t) && !t_is_32(x_t)) {
+ struct range x_swap;
+
+ /* some combinations of upper 32 bits and sign bit can lead to
+ * invalid ranges, in such cases it's easier to detect them
+ * after cast/swap than try to enumerate all the conditions
+ * under which transformation and knowledge transfer is valid
+ */
+ x_swap = range(x_t, swap_low32(x.a, y_cast.a), swap_low32(x.b, y_cast.b));
+ if (!is_valid_range(x_t, x_swap))
+ return x;
+ return range_improve(x_t, x, x_swap);
+ }
+
+ /* otherwise, plain range cast and intersection works */
+ return range_improve(x_t, x, y_cast);
+}
+
+/* =======================
+ * GENERIC CONDITIONAL OPS
+ * =======================
+ */
+enum op { OP_LT, OP_LE, OP_GT, OP_GE, OP_EQ, OP_NE, first_op = OP_LT, last_op = OP_NE };
+
+static enum op complement_op(enum op op)
+{
+ switch (op) {
+ case OP_LT: return OP_GE;
+ case OP_LE: return OP_GT;
+ case OP_GT: return OP_LE;
+ case OP_GE: return OP_LT;
+ case OP_EQ: return OP_NE;
+ case OP_NE: return OP_EQ;
+ default: printf("complement_op!\n"); exit(1);
+ }
+}
+
+static const char *op_str(enum op op)
+{
+ switch (op) {
+ case OP_LT: return "<";
+ case OP_LE: return "<=";
+ case OP_GT: return ">";
+ case OP_GE: return ">=";
+ case OP_EQ: return "==";
+ case OP_NE: return "!=";
+ default: printf("op_str!\n"); exit(1);
+ }
+}
+
+/* Can register with range [x.a, x.b] *EVER* satisfy
+ * OP (<, <=, >, >=, ==, !=) relation to
+ * a regsiter with range [y.a, y.b]
+ * _in *num_t* domain_
+ */
+static bool range_canbe_op(enum num_t t, struct range x, struct range y, enum op op)
+{
+#define range_canbe(T) do { \
+ switch (op) { \
+ case OP_LT: return (T)x.a < (T)y.b; \
+ case OP_LE: return (T)x.a <= (T)y.b; \
+ case OP_GT: return (T)x.b > (T)y.a; \
+ case OP_GE: return (T)x.b >= (T)y.a; \
+ case OP_EQ: return (T)max_t(t, x.a, y.a) <= (T)min_t(t, x.b, y.b); \
+ case OP_NE: return !((T)x.a == (T)x.b && (T)y.a == (T)y.b && (T)x.a == (T)y.a); \
+ default: printf("range_canbe op %d\n", op); exit(1); \
+ } \
+} while (0)
+
+ switch (t) {
+ case U64: { range_canbe(u64); }
+ case U32: { range_canbe(u32); }
+ case S64: { range_canbe(s64); }
+ case S32: { range_canbe(s32); }
+ default: printf("range_canbe!\n"); exit(1);
+ }
+#undef range_canbe
+}
+
+/* Does register with range [x.a, x.b] *ALWAYS* satisfy
+ * OP (<, <=, >, >=, ==, !=) relation to
+ * a regsiter with range [y.a, y.b]
+ * _in *num_t* domain_
+ */
+static bool range_always_op(enum num_t t, struct range x, struct range y, enum op op)
+{
+ /* always op <=> ! canbe complement(op) */
+ return !range_canbe_op(t, x, y, complement_op(op));
+}
+
+/* Does register with range [x.a, x.b] *NEVER* satisfy
+ * OP (<, <=, >, >=, ==, !=) relation to
+ * a regsiter with range [y.a, y.b]
+ * _in *num_t* domain_
+ */
+static bool range_never_op(enum num_t t, struct range x, struct range y, enum op op)
+{
+ return !range_canbe_op(t, x, y, op);
+}
+
+/* similar to verifier's is_branch_taken():
+ * 1 - always taken;
+ * 0 - never taken,
+ * -1 - unsure.
+ */
+static int range_branch_taken_op(enum num_t t, struct range x, struct range y, enum op op)
+{
+ if (range_always_op(t, x, y, op))
+ return 1;
+ if (range_never_op(t, x, y, op))
+ return 0;
+ return -1;
+}
+
+/* What would be the new estimates for register x and y ranges assuming truthful
+ * OP comparison between them. I.e., (x OP y == true) => x <- newx, y <- newy.
+ *
+ * We assume "interesting" cases where ranges overlap. Cases where it's
+ * obvious that (x OP y) is either always true or false should be filtered with
+ * range_never and range_always checks.
+ */
+static void range_cond(enum num_t t, struct range x, struct range y,
+ enum op op, struct range *newx, struct range *newy)
+{
+ if (!range_canbe_op(t, x, y, op)) {
+ /* nothing to adjust, can't happen, return original values */
+ *newx = x;
+ *newy = y;
+ return;
+ }
+ switch (op) {
+ case OP_LT:
+ *newx = range(t, x.a, min_t(t, x.b, y.b - 1));
+ *newy = range(t, max_t(t, x.a + 1, y.a), y.b);
+ break;
+ case OP_LE:
+ *newx = range(t, x.a, min_t(t, x.b, y.b));
+ *newy = range(t, max_t(t, x.a, y.a), y.b);
+ break;
+ case OP_GT:
+ *newx = range(t, max_t(t, x.a, y.a + 1), x.b);
+ *newy = range(t, y.a, min_t(t, x.b - 1, y.b));
+ break;
+ case OP_GE:
+ *newx = range(t, max_t(t, x.a, y.a), x.b);
+ *newy = range(t, y.a, min_t(t, x.b, y.b));
+ break;
+ case OP_EQ:
+ *newx = range(t, max_t(t, x.a, y.a), min_t(t, x.b, y.b));
+ *newy = range(t, max_t(t, x.a, y.a), min_t(t, x.b, y.b));
+ break;
+ case OP_NE:
+ /* generic case, can't derive more information */
+ *newx = range(t, x.a, x.b);
+ *newy = range(t, y.a, y.b);
+ break;
+
+ /* below extended logic is not supported by verifier just yet */
+ if (x.a == x.b && x.a == y.a) {
+ /* X is a constant matching left side of Y */
+ *newx = range(t, x.a, x.b);
+ *newy = range(t, y.a + 1, y.b);
+ } else if (x.a == x.b && x.b == y.b) {
+ /* X is a constant matching rigth side of Y */
+ *newx = range(t, x.a, x.b);
+ *newy = range(t, y.a, y.b - 1);
+ } else if (y.a == y.b && x.a == y.a) {
+ /* Y is a constant matching left side of X */
+ *newx = range(t, x.a + 1, x.b);
+ *newy = range(t, y.a, y.b);
+ } else if (y.a == y.b && x.b == y.b) {
+ /* Y is a constant matching rigth side of X */
+ *newx = range(t, x.a, x.b - 1);
+ *newy = range(t, y.a, y.b);
+ } else {
+ /* generic case, can't derive more information */
+ *newx = range(t, x.a, x.b);
+ *newy = range(t, y.a, y.b);
+ }
+
+ break;
+ default:
+ break;
+ }
+}
+
+/* =======================
+ * REGISTER STATE HANDLING
+ * =======================
+ */
+struct reg_state {
+ struct range r[4]; /* indexed by enum num_t: U64, U32, S64, S32 */
+ bool valid;
+};
+
+static void print_reg_state(struct reg_state *r, const char *sfx)
+{
+ DEFINE_STRBUF(sb, 512);
+ enum num_t t;
+ int cnt = 0;
+
+ if (!r->valid) {
+ printf("<not found>%s", sfx);
+ return;
+ }
+
+ snappendf(sb, "scalar(");
+ for (t = first_t; t <= last_t; t++) {
+ snappendf(sb, "%s%s=", cnt++ ? "," : "", t_str(t));
+ snprintf_range(t, sb, r->r[t]);
+ }
+ snappendf(sb, ")");
+
+ printf("%s%s", sb->buf, sfx);
+}
+
+static void print_refinement(enum num_t s_t, struct range src,
+ enum num_t d_t, struct range old, struct range new,
+ const char *ctx)
+{
+ printf("REFINING (%s) (%s)SRC=", ctx, t_str(s_t));
+ print_range(s_t, src, "");
+ printf(" (%s)DST_OLD=", t_str(d_t));
+ print_range(d_t, old, "");
+ printf(" (%s)DST_NEW=", t_str(d_t));
+ print_range(d_t, new, "\n");
+}
+
+static void reg_state_refine(struct reg_state *r, enum num_t t, struct range x, const char *ctx)
+{
+ enum num_t d_t, s_t;
+ struct range old;
+ bool keep_going = false;
+
+again:
+ /* try to derive new knowledge from just learned range x of type t */
+ for (d_t = first_t; d_t <= last_t; d_t++) {
+ old = r->r[d_t];
+ r->r[d_t] = range_refine(d_t, r->r[d_t], t, x);
+ if (!range_eq(r->r[d_t], old)) {
+ keep_going = true;
+ if (env.verbosity >= VERBOSE_VERY)
+ print_refinement(t, x, d_t, old, r->r[d_t], ctx);
+ }
+ }
+
+ /* now see if we can derive anything new from updated reg_state's ranges */
+ for (s_t = first_t; s_t <= last_t; s_t++) {
+ for (d_t = first_t; d_t <= last_t; d_t++) {
+ old = r->r[d_t];
+ r->r[d_t] = range_refine(d_t, r->r[d_t], s_t, r->r[s_t]);
+ if (!range_eq(r->r[d_t], old)) {
+ keep_going = true;
+ if (env.verbosity >= VERBOSE_VERY)
+ print_refinement(s_t, r->r[s_t], d_t, old, r->r[d_t], ctx);
+ }
+ }
+ }
+
+ /* keep refining until we converge */
+ if (keep_going) {
+ keep_going = false;
+ goto again;
+ }
+}
+
+static void reg_state_set_const(struct reg_state *rs, enum num_t t, u64 val)
+{
+ enum num_t tt;
+
+ rs->valid = true;
+ for (tt = first_t; tt <= last_t; tt++)
+ rs->r[tt] = tt == t ? range(t, val, val) : unkn[tt];
+
+ reg_state_refine(rs, t, rs->r[t], "CONST");
+}
+
+static void reg_state_cond(enum num_t t, struct reg_state *x, struct reg_state *y, enum op op,
+ struct reg_state *newx, struct reg_state *newy, const char *ctx)
+{
+ char buf[32];
+ enum num_t ts[2];
+ struct reg_state xx = *x, yy = *y;
+ int i, t_cnt;
+ struct range z1, z2;
+
+ if (op == OP_EQ || op == OP_NE) {
+ /* OP_EQ and OP_NE are sign-agnostic, so we need to process
+ * both signed and unsigned domains at the same time
+ */
+ ts[0] = t_unsigned(t);
+ ts[1] = t_signed(t);
+ t_cnt = 2;
+ } else {
+ ts[0] = t;
+ t_cnt = 1;
+ }
+
+ for (i = 0; i < t_cnt; i++) {
+ t = ts[i];
+ z1 = x->r[t];
+ z2 = y->r[t];
+
+ range_cond(t, z1, z2, op, &z1, &z2);
+
+ if (newx) {
+ snprintf(buf, sizeof(buf), "%s R1", ctx);
+ reg_state_refine(&xx, t, z1, buf);
+ }
+ if (newy) {
+ snprintf(buf, sizeof(buf), "%s R2", ctx);
+ reg_state_refine(&yy, t, z2, buf);
+ }
+ }
+
+ if (newx)
+ *newx = xx;
+ if (newy)
+ *newy = yy;
+}
+
+static int reg_state_branch_taken_op(enum num_t t, struct reg_state *x, struct reg_state *y,
+ enum op op)
+{
+ if (op == OP_EQ || op == OP_NE) {
+ /* OP_EQ and OP_NE are sign-agnostic */
+ enum num_t tu = t_unsigned(t);
+ enum num_t ts = t_signed(t);
+ int br_u, br_s, br;
+
+ br_u = range_branch_taken_op(tu, x->r[tu], y->r[tu], op);
+ br_s = range_branch_taken_op(ts, x->r[ts], y->r[ts], op);
+
+ if (br_u >= 0 && br_s >= 0 && br_u != br_s)
+ ASSERT_FALSE(true, "branch taken inconsistency!\n");
+
+ /* if 64-bit ranges are indecisive, use 32-bit subranges to
+ * eliminate always/never taken branches, if possible
+ */
+ if (br_u == -1 && (t == U64 || t == S64)) {
+ br = range_branch_taken_op(U32, x->r[U32], y->r[U32], op);
+ /* we can only reject for OP_EQ, never take branch
+ * based on lower 32 bits
+ */
+ if (op == OP_EQ && br == 0)
+ return 0;
+ /* for OP_NEQ we can be conclusive only if lower 32 bits
+ * differ and thus inequality branch is always taken
+ */
+ if (op == OP_NE && br == 1)
+ return 1;
+
+ br = range_branch_taken_op(S32, x->r[S32], y->r[S32], op);
+ if (op == OP_EQ && br == 0)
+ return 0;
+ if (op == OP_NE && br == 1)
+ return 1;
+ }
+
+ return br_u >= 0 ? br_u : br_s;
+ }
+ return range_branch_taken_op(t, x->r[t], y->r[t], op);
+}
+
+/* =====================================
+ * BPF PROGS GENERATION AND VERIFICATION
+ * =====================================
+ */
+struct case_spec {
+ /* whether to init full register (r1) or sub-register (w1) */
+ bool init_subregs;
+ /* whether to establish initial value range on full register (r1) or
+ * sub-register (w1)
+ */
+ bool setup_subregs;
+ /* whether to establish initial value range using signed or unsigned
+ * comparisons (i.e., initialize umin/umax or smin/smax directly)
+ */
+ bool setup_signed;
+ /* whether to perform comparison on full registers or sub-registers */
+ bool compare_subregs;
+ /* whether to perform comparison using signed or unsigned operations */
+ bool compare_signed;
+};
+
+/* Generate test BPF program based on provided test ranges, operation, and
+ * specifications about register bitness and signedness.
+ */
+static int load_range_cmp_prog(struct range x, struct range y, enum op op,
+ int branch_taken, struct case_spec spec,
+ char *log_buf, size_t log_sz,
+ int *false_pos, int *true_pos)
+{
+#define emit(insn) ({ \
+ struct bpf_insn __insns[] = { insn }; \
+ int __i; \
+ for (__i = 0; __i < ARRAY_SIZE(__insns); __i++) \
+ insns[cur_pos + __i] = __insns[__i]; \
+ cur_pos += __i; \
+})
+#define JMP_TO(target) (target - cur_pos - 1)
+ int cur_pos = 0, exit_pos, fd, op_code;
+ struct bpf_insn insns[64];
+ LIBBPF_OPTS(bpf_prog_load_opts, opts,
+ .log_level = 2,
+ .log_buf = log_buf,
+ .log_size = log_sz,
+ .prog_flags = BPF_F_TEST_REG_INVARIANTS,
+ );
+
+ /* ; skip exit block below
+ * goto +2;
+ */
+ emit(BPF_JMP_A(2));
+ exit_pos = cur_pos;
+ /* ; exit block for all the preparatory conditionals
+ * out:
+ * r0 = 0;
+ * exit;
+ */
+ emit(BPF_MOV64_IMM(BPF_REG_0, 0));
+ emit(BPF_EXIT_INSN());
+ /*
+ * ; assign r6/w6 and r7/w7 unpredictable u64/u32 value
+ * call bpf_get_current_pid_tgid;
+ * r6 = r0; | w6 = w0;
+ * call bpf_get_current_pid_tgid;
+ * r7 = r0; | w7 = w0;
+ */
+ emit(BPF_EMIT_CALL(BPF_FUNC_get_current_pid_tgid));
+ if (spec.init_subregs)
+ emit(BPF_MOV32_REG(BPF_REG_6, BPF_REG_0));
+ else
+ emit(BPF_MOV64_REG(BPF_REG_6, BPF_REG_0));
+ emit(BPF_EMIT_CALL(BPF_FUNC_get_current_pid_tgid));
+ if (spec.init_subregs)
+ emit(BPF_MOV32_REG(BPF_REG_7, BPF_REG_0));
+ else
+ emit(BPF_MOV64_REG(BPF_REG_7, BPF_REG_0));
+ /* ; setup initial r6/w6 possible value range ([x.a, x.b])
+ * r1 = %[x.a] ll; | w1 = %[x.a];
+ * r2 = %[x.b] ll; | w2 = %[x.b];
+ * if r6 < r1 goto out; | if w6 < w1 goto out;
+ * if r6 > r2 goto out; | if w6 > w2 goto out;
+ */
+ if (spec.setup_subregs) {
+ emit(BPF_MOV32_IMM(BPF_REG_1, (s32)x.a));
+ emit(BPF_MOV32_IMM(BPF_REG_2, (s32)x.b));
+ emit(BPF_JMP32_REG(spec.setup_signed ? BPF_JSLT : BPF_JLT,
+ BPF_REG_6, BPF_REG_1, JMP_TO(exit_pos)));
+ emit(BPF_JMP32_REG(spec.setup_signed ? BPF_JSGT : BPF_JGT,
+ BPF_REG_6, BPF_REG_2, JMP_TO(exit_pos)));
+ } else {
+ emit(BPF_LD_IMM64(BPF_REG_1, x.a));
+ emit(BPF_LD_IMM64(BPF_REG_2, x.b));
+ emit(BPF_JMP_REG(spec.setup_signed ? BPF_JSLT : BPF_JLT,
+ BPF_REG_6, BPF_REG_1, JMP_TO(exit_pos)));
+ emit(BPF_JMP_REG(spec.setup_signed ? BPF_JSGT : BPF_JGT,
+ BPF_REG_6, BPF_REG_2, JMP_TO(exit_pos)));
+ }
+ /* ; setup initial r7/w7 possible value range ([y.a, y.b])
+ * r1 = %[y.a] ll; | w1 = %[y.a];
+ * r2 = %[y.b] ll; | w2 = %[y.b];
+ * if r7 < r1 goto out; | if w7 < w1 goto out;
+ * if r7 > r2 goto out; | if w7 > w2 goto out;
+ */
+ if (spec.setup_subregs) {
+ emit(BPF_MOV32_IMM(BPF_REG_1, (s32)y.a));
+ emit(BPF_MOV32_IMM(BPF_REG_2, (s32)y.b));
+ emit(BPF_JMP32_REG(spec.setup_signed ? BPF_JSLT : BPF_JLT,
+ BPF_REG_7, BPF_REG_1, JMP_TO(exit_pos)));
+ emit(BPF_JMP32_REG(spec.setup_signed ? BPF_JSGT : BPF_JGT,
+ BPF_REG_7, BPF_REG_2, JMP_TO(exit_pos)));
+ } else {
+ emit(BPF_LD_IMM64(BPF_REG_1, y.a));
+ emit(BPF_LD_IMM64(BPF_REG_2, y.b));
+ emit(BPF_JMP_REG(spec.setup_signed ? BPF_JSLT : BPF_JLT,
+ BPF_REG_7, BPF_REG_1, JMP_TO(exit_pos)));
+ emit(BPF_JMP_REG(spec.setup_signed ? BPF_JSGT : BPF_JGT,
+ BPF_REG_7, BPF_REG_2, JMP_TO(exit_pos)));
+ }
+ /* ; range test instruction
+ * if r6 <op> r7 goto +3; | if w6 <op> w7 goto +3;
+ */
+ switch (op) {
+ case OP_LT: op_code = spec.compare_signed ? BPF_JSLT : BPF_JLT; break;
+ case OP_LE: op_code = spec.compare_signed ? BPF_JSLE : BPF_JLE; break;
+ case OP_GT: op_code = spec.compare_signed ? BPF_JSGT : BPF_JGT; break;
+ case OP_GE: op_code = spec.compare_signed ? BPF_JSGE : BPF_JGE; break;
+ case OP_EQ: op_code = BPF_JEQ; break;
+ case OP_NE: op_code = BPF_JNE; break;
+ default:
+ printf("unrecognized op %d\n", op);
+ return -ENOTSUP;
+ }
+ /* ; BEFORE conditional, r0/w0 = {r6/w6,r7/w7} is to extract verifier state reliably
+ * ; this is used for debugging, as verifier doesn't always print
+ * ; registers states as of condition jump instruction (e.g., when
+ * ; precision marking happens)
+ * r0 = r6; | w0 = w6;
+ * r0 = r7; | w0 = w7;
+ */
+ if (spec.compare_subregs) {
+ emit(BPF_MOV32_REG(BPF_REG_0, BPF_REG_6));
+ emit(BPF_MOV32_REG(BPF_REG_0, BPF_REG_7));
+ } else {
+ emit(BPF_MOV64_REG(BPF_REG_0, BPF_REG_6));
+ emit(BPF_MOV64_REG(BPF_REG_0, BPF_REG_7));
+ }
+ if (spec.compare_subregs)
+ emit(BPF_JMP32_REG(op_code, BPF_REG_6, BPF_REG_7, 3));
+ else
+ emit(BPF_JMP_REG(op_code, BPF_REG_6, BPF_REG_7, 3));
+ /* ; FALSE branch, r0/w0 = {r6/w6,r7/w7} is to extract verifier state reliably
+ * r0 = r6; | w0 = w6;
+ * r0 = r7; | w0 = w7;
+ * exit;
+ */
+ *false_pos = cur_pos;
+ if (spec.compare_subregs) {
+ emit(BPF_MOV32_REG(BPF_REG_0, BPF_REG_6));
+ emit(BPF_MOV32_REG(BPF_REG_0, BPF_REG_7));
+ } else {
+ emit(BPF_MOV64_REG(BPF_REG_0, BPF_REG_6));
+ emit(BPF_MOV64_REG(BPF_REG_0, BPF_REG_7));
+ }
+ if (branch_taken == 1) /* false branch is never taken */
+ emit(BPF_EMIT_CALL(0xDEAD)); /* poison this branch */
+ else
+ emit(BPF_EXIT_INSN());
+ /* ; TRUE branch, r0/w0 = {r6/w6,r7/w7} is to extract verifier state reliably
+ * r0 = r6; | w0 = w6;
+ * r0 = r7; | w0 = w7;
+ * exit;
+ */
+ *true_pos = cur_pos;
+ if (spec.compare_subregs) {
+ emit(BPF_MOV32_REG(BPF_REG_0, BPF_REG_6));
+ emit(BPF_MOV32_REG(BPF_REG_0, BPF_REG_7));
+ } else {
+ emit(BPF_MOV64_REG(BPF_REG_0, BPF_REG_6));
+ emit(BPF_MOV64_REG(BPF_REG_0, BPF_REG_7));
+ }
+ if (branch_taken == 0) /* true branch is never taken */
+ emit(BPF_EMIT_CALL(0xDEAD)); /* poison this branch */
+ emit(BPF_EXIT_INSN()); /* last instruction has to be exit */
+
+ fd = bpf_prog_load(BPF_PROG_TYPE_RAW_TRACEPOINT, "reg_bounds_test",
+ "GPL", insns, cur_pos, &opts);
+ if (fd < 0)
+ return fd;
+
+ close(fd);
+ return 0;
+#undef emit
+#undef JMP_TO
+}
+
+#define str_has_pfx(str, pfx) (strncmp(str, pfx, strlen(pfx)) == 0)
+
+/* Parse register state from verifier log.
+ * `s` should point to the start of "Rx = ..." substring in the verifier log.
+ */
+static int parse_reg_state(const char *s, struct reg_state *reg)
+{
+ /* There are two generic forms for SCALAR register:
+ * - known constant: R6_rwD=P%lld
+ * - range: R6_rwD=scalar(id=1,...), where "..." is a comma-separated
+ * list of optional range specifiers:
+ * - umin=%llu, if missing, assumed 0;
+ * - umax=%llu, if missing, assumed U64_MAX;
+ * - smin=%lld, if missing, assumed S64_MIN;
+ * - smax=%lld, if missing, assummed S64_MAX;
+ * - umin32=%d, if missing, assumed 0;
+ * - umax32=%d, if missing, assumed U32_MAX;
+ * - smin32=%d, if missing, assumed S32_MIN;
+ * - smax32=%d, if missing, assummed S32_MAX;
+ * - var_off=(%#llx; %#llx), tnum part, we don't care about it.
+ *
+ * If some of the values are equal, they will be grouped (but min/max
+ * are not mixed together, and similarly negative values are not
+ * grouped with non-negative ones). E.g.:
+ *
+ * R6_w=Pscalar(smin=smin32=0, smax=umax=umax32=1000)
+ *
+ * _rwD part is optional (and any of the letters can be missing).
+ * P (precision mark) is optional as well.
+ *
+ * Anything inside scalar() is optional, including id, of course.
+ */
+ struct {
+ const char *pfx;
+ u64 *dst, def;
+ bool is_32, is_set;
+ } *f, fields[8] = {
+ {"smin=", ®->r[S64].a, S64_MIN},
+ {"smax=", ®->r[S64].b, S64_MAX},
+ {"umin=", ®->r[U64].a, 0},
+ {"umax=", ®->r[U64].b, U64_MAX},
+ {"smin32=", ®->r[S32].a, (u32)S32_MIN, true},
+ {"smax32=", ®->r[S32].b, (u32)S32_MAX, true},
+ {"umin32=", ®->r[U32].a, 0, true},
+ {"umax32=", ®->r[U32].b, U32_MAX, true},
+ };
+ const char *p;
+ int i;
+
+ p = strchr(s, '=');
+ if (!p)
+ return -EINVAL;
+ p++;
+ if (*p == 'P')
+ p++;
+
+ if (!str_has_pfx(p, "scalar(")) {
+ long long sval;
+ enum num_t t;
+
+ if (p[0] == '0' && p[1] == 'x') {
+ if (sscanf(p, "%llx", &sval) != 1)
+ return -EINVAL;
+ } else {
+ if (sscanf(p, "%lld", &sval) != 1)
+ return -EINVAL;
+ }
+
+ reg->valid = true;
+ for (t = first_t; t <= last_t; t++) {
+ reg->r[t] = range(t, sval, sval);
+ }
+ return 0;
+ }
+
+ p += sizeof("scalar");
+ while (p) {
+ int midxs[ARRAY_SIZE(fields)], mcnt = 0;
+ u64 val;
+
+ for (i = 0; i < ARRAY_SIZE(fields); i++) {
+ f = &fields[i];
+ if (!str_has_pfx(p, f->pfx))
+ continue;
+ midxs[mcnt++] = i;
+ p += strlen(f->pfx);
+ }
+
+ if (mcnt) {
+ /* populate all matched fields */
+ if (p[0] == '0' && p[1] == 'x') {
+ if (sscanf(p, "%llx", &val) != 1)
+ return -EINVAL;
+ } else {
+ if (sscanf(p, "%lld", &val) != 1)
+ return -EINVAL;
+ }
+
+ for (i = 0; i < mcnt; i++) {
+ f = &fields[midxs[i]];
+ f->is_set = true;
+ *f->dst = f->is_32 ? (u64)(u32)val : val;
+ }
+ } else if (str_has_pfx(p, "var_off")) {
+ /* skip "var_off=(0x0; 0x3f)" part completely */
+ p = strchr(p, ')');
+ if (!p)
+ return -EINVAL;
+ p++;
+ }
+
+ p = strpbrk(p, ",)");
+ if (*p == ')')
+ break;
+ if (p)
+ p++;
+ }
+
+ reg->valid = true;
+
+ for (i = 0; i < ARRAY_SIZE(fields); i++) {
+ f = &fields[i];
+ if (!f->is_set)
+ *f->dst = f->def;
+ }
+
+ return 0;
+}
+
+
+/* Parse all register states (TRUE/FALSE branches and DST/SRC registers)
+ * out of the verifier log for a corresponding test case BPF program.
+ */
+static int parse_range_cmp_log(const char *log_buf, struct case_spec spec,
+ int false_pos, int true_pos,
+ struct reg_state *false1_reg, struct reg_state *false2_reg,
+ struct reg_state *true1_reg, struct reg_state *true2_reg)
+{
+ struct {
+ int insn_idx;
+ int reg_idx;
+ const char *reg_upper;
+ struct reg_state *state;
+ } specs[] = {
+ {false_pos, 6, "R6=", false1_reg},
+ {false_pos + 1, 7, "R7=", false2_reg},
+ {true_pos, 6, "R6=", true1_reg},
+ {true_pos + 1, 7, "R7=", true2_reg},
+ };
+ char buf[32];
+ const char *p = log_buf, *q;
+ int i, err;
+
+ for (i = 0; i < 4; i++) {
+ sprintf(buf, "%d: (%s) %s = %s%d", specs[i].insn_idx,
+ spec.compare_subregs ? "bc" : "bf",
+ spec.compare_subregs ? "w0" : "r0",
+ spec.compare_subregs ? "w" : "r", specs[i].reg_idx);
+
+ q = strstr(p, buf);
+ if (!q) {
+ *specs[i].state = (struct reg_state){.valid = false};
+ continue;
+ }
+ p = strstr(q, specs[i].reg_upper);
+ if (!p)
+ return -EINVAL;
+ err = parse_reg_state(p, specs[i].state);
+ if (err)
+ return -EINVAL;
+ }
+ return 0;
+}
+
+/* Validate ranges match, and print details if they don't */
+static bool assert_range_eq(enum num_t t, struct range x, struct range y,
+ const char *ctx1, const char *ctx2)
+{
+ DEFINE_STRBUF(sb, 512);
+
+ if (range_eq(x, y))
+ return true;
+
+ snappendf(sb, "MISMATCH %s.%s: ", ctx1, ctx2);
+ snprintf_range(t, sb, x);
+ snappendf(sb, " != ");
+ snprintf_range(t, sb, y);
+
+ printf("%s\n", sb->buf);
+
+ return false;
+}
+
+/* Validate that register states match, and print details if they don't */
+static bool assert_reg_state_eq(struct reg_state *r, struct reg_state *e, const char *ctx)
+{
+ bool ok = true;
+ enum num_t t;
+
+ if (r->valid != e->valid) {
+ printf("MISMATCH %s: actual %s != expected %s\n", ctx,
+ r->valid ? "<valid>" : "<invalid>",
+ e->valid ? "<valid>" : "<invalid>");
+ return false;
+ }
+
+ if (!r->valid)
+ return true;
+
+ for (t = first_t; t <= last_t; t++) {
+ if (!assert_range_eq(t, r->r[t], e->r[t], ctx, t_str(t)))
+ ok = false;
+ }
+
+ return ok;
+}
+
+/* Printf verifier log, filtering out irrelevant noise */
+static void print_verifier_log(const char *buf)
+{
+ const char *p;
+
+ while (buf[0]) {
+ p = strchrnul(buf, '\n');
+
+ /* filter out irrelevant precision backtracking logs */
+ if (str_has_pfx(buf, "mark_precise: "))
+ goto skip_line;
+
+ printf("%.*s\n", (int)(p - buf), buf);
+
+skip_line:
+ buf = *p == '\0' ? p : p + 1;
+ }
+}
+
+/* Simulate provided test case purely with our own range-based logic.
+ * This is done to set up expectations for verifier's branch_taken logic and
+ * verifier's register states in the verifier log.
+ */
+static void sim_case(enum num_t init_t, enum num_t cond_t,
+ struct range x, struct range y, enum op op,
+ struct reg_state *fr1, struct reg_state *fr2,
+ struct reg_state *tr1, struct reg_state *tr2,
+ int *branch_taken)
+{
+ const u64 A = x.a;
+ const u64 B = x.b;
+ const u64 C = y.a;
+ const u64 D = y.b;
+ struct reg_state rc;
+ enum op rev_op = complement_op(op);
+ enum num_t t;
+
+ fr1->valid = fr2->valid = true;
+ tr1->valid = tr2->valid = true;
+ for (t = first_t; t <= last_t; t++) {
+ /* if we are initializing using 32-bit subregisters,
+ * full registers get upper 32 bits zeroed automatically
+ */
+ struct range z = t_is_32(init_t) ? unkn_subreg(t) : unkn[t];
+
+ fr1->r[t] = fr2->r[t] = tr1->r[t] = tr2->r[t] = z;
+ }
+
+ /* step 1: r1 >= A, r2 >= C */
+ reg_state_set_const(&rc, init_t, A);
+ reg_state_cond(init_t, fr1, &rc, OP_GE, fr1, NULL, "r1>=A");
+ reg_state_set_const(&rc, init_t, C);
+ reg_state_cond(init_t, fr2, &rc, OP_GE, fr2, NULL, "r2>=C");
+ *tr1 = *fr1;
+ *tr2 = *fr2;
+ if (env.verbosity >= VERBOSE_VERY) {
+ printf("STEP1 (%s) R1: ", t_str(init_t)); print_reg_state(fr1, "\n");
+ printf("STEP1 (%s) R2: ", t_str(init_t)); print_reg_state(fr2, "\n");
+ }
+
+ /* step 2: r1 <= B, r2 <= D */
+ reg_state_set_const(&rc, init_t, B);
+ reg_state_cond(init_t, fr1, &rc, OP_LE, fr1, NULL, "r1<=B");
+ reg_state_set_const(&rc, init_t, D);
+ reg_state_cond(init_t, fr2, &rc, OP_LE, fr2, NULL, "r2<=D");
+ *tr1 = *fr1;
+ *tr2 = *fr2;
+ if (env.verbosity >= VERBOSE_VERY) {
+ printf("STEP2 (%s) R1: ", t_str(init_t)); print_reg_state(fr1, "\n");
+ printf("STEP2 (%s) R2: ", t_str(init_t)); print_reg_state(fr2, "\n");
+ }
+
+ /* step 3: r1 <op> r2 */
+ *branch_taken = reg_state_branch_taken_op(cond_t, fr1, fr2, op);
+ fr1->valid = fr2->valid = false;
+ tr1->valid = tr2->valid = false;
+ if (*branch_taken != 1) { /* FALSE is possible */
+ fr1->valid = fr2->valid = true;
+ reg_state_cond(cond_t, fr1, fr2, rev_op, fr1, fr2, "FALSE");
+ }
+ if (*branch_taken != 0) { /* TRUE is possible */
+ tr1->valid = tr2->valid = true;
+ reg_state_cond(cond_t, tr1, tr2, op, tr1, tr2, "TRUE");
+ }
+ if (env.verbosity >= VERBOSE_VERY) {
+ printf("STEP3 (%s) FALSE R1:", t_str(cond_t)); print_reg_state(fr1, "\n");
+ printf("STEP3 (%s) FALSE R2:", t_str(cond_t)); print_reg_state(fr2, "\n");
+ printf("STEP3 (%s) TRUE R1:", t_str(cond_t)); print_reg_state(tr1, "\n");
+ printf("STEP3 (%s) TRUE R2:", t_str(cond_t)); print_reg_state(tr2, "\n");
+ }
+}
+
+/* ===============================
+ * HIGH-LEVEL TEST CASE VALIDATION
+ * ===============================
+ */
+static u32 upper_seeds[] = {
+ 0,
+ 1,
+ U32_MAX,
+ U32_MAX - 1,
+ S32_MAX,
+ (u32)S32_MIN,
+};
+
+static u32 lower_seeds[] = {
+ 0,
+ 1,
+ 2, (u32)-2,
+ 255, (u32)-255,
+ UINT_MAX,
+ UINT_MAX - 1,
+ INT_MAX,
+ (u32)INT_MIN,
+};
+
+struct ctx {
+ int val_cnt, subval_cnt, range_cnt, subrange_cnt;
+ u64 uvals[ARRAY_SIZE(upper_seeds) * ARRAY_SIZE(lower_seeds)];
+ s64 svals[ARRAY_SIZE(upper_seeds) * ARRAY_SIZE(lower_seeds)];
+ u32 usubvals[ARRAY_SIZE(lower_seeds)];
+ s32 ssubvals[ARRAY_SIZE(lower_seeds)];
+ struct range *uranges, *sranges;
+ struct range *usubranges, *ssubranges;
+ int max_failure_cnt, cur_failure_cnt;
+ int total_case_cnt, case_cnt;
+ int rand_case_cnt;
+ unsigned rand_seed;
+ __u64 start_ns;
+ char progress_ctx[64];
+};
+
+static void cleanup_ctx(struct ctx *ctx)
+{
+ free(ctx->uranges);
+ free(ctx->sranges);
+ free(ctx->usubranges);
+ free(ctx->ssubranges);
+}
+
+struct subtest_case {
+ enum num_t init_t;
+ enum num_t cond_t;
+ struct range x;
+ struct range y;
+ enum op op;
+};
+
+static void subtest_case_str(struct strbuf *sb, struct subtest_case *t, bool use_op)
+{
+ snappendf(sb, "(%s)", t_str(t->init_t));
+ snprintf_range(t->init_t, sb, t->x);
+ snappendf(sb, " (%s)%s ", t_str(t->cond_t), use_op ? op_str(t->op) : "<op>");
+ snprintf_range(t->init_t, sb, t->y);
+}
+
+/* Generate and validate test case based on specific combination of setup
+ * register ranges (including their expected num_t domain), and conditional
+ * operation to perform (including num_t domain in which it has to be
+ * performed)
+ */
+static int verify_case_op(enum num_t init_t, enum num_t cond_t,
+ struct range x, struct range y, enum op op)
+{
+ char log_buf[256 * 1024];
+ size_t log_sz = sizeof(log_buf);
+ int err, false_pos = 0, true_pos = 0, branch_taken;
+ struct reg_state fr1, fr2, tr1, tr2;
+ struct reg_state fe1, fe2, te1, te2;
+ bool failed = false;
+ struct case_spec spec = {
+ .init_subregs = (init_t == U32 || init_t == S32),
+ .setup_subregs = (init_t == U32 || init_t == S32),
+ .setup_signed = (init_t == S64 || init_t == S32),
+ .compare_subregs = (cond_t == U32 || cond_t == S32),
+ .compare_signed = (cond_t == S64 || cond_t == S32),
+ };
+
+ log_buf[0] = '\0';
+
+ sim_case(init_t, cond_t, x, y, op, &fe1, &fe2, &te1, &te2, &branch_taken);
+
+ err = load_range_cmp_prog(x, y, op, branch_taken, spec,
+ log_buf, log_sz, &false_pos, &true_pos);
+ if (err) {
+ ASSERT_OK(err, "load_range_cmp_prog");
+ failed = true;
+ }
+
+ err = parse_range_cmp_log(log_buf, spec, false_pos, true_pos,
+ &fr1, &fr2, &tr1, &tr2);
+ if (err) {
+ ASSERT_OK(err, "parse_range_cmp_log");
+ failed = true;
+ }
+
+ if (!assert_reg_state_eq(&fr1, &fe1, "false_reg1") ||
+ !assert_reg_state_eq(&fr2, &fe2, "false_reg2") ||
+ !assert_reg_state_eq(&tr1, &te1, "true_reg1") ||
+ !assert_reg_state_eq(&tr2, &te2, "true_reg2")) {
+ failed = true;
+ }
+
+ if (failed || env.verbosity >= VERBOSE_NORMAL) {
+ if (failed || env.verbosity >= VERBOSE_VERY) {
+ printf("VERIFIER LOG:\n========================\n");
+ print_verifier_log(log_buf);
+ printf("=====================\n");
+ }
+ printf("ACTUAL FALSE1: "); print_reg_state(&fr1, "\n");
+ printf("EXPECTED FALSE1: "); print_reg_state(&fe1, "\n");
+ printf("ACTUAL FALSE2: "); print_reg_state(&fr2, "\n");
+ printf("EXPECTED FALSE2: "); print_reg_state(&fe2, "\n");
+ printf("ACTUAL TRUE1: "); print_reg_state(&tr1, "\n");
+ printf("EXPECTED TRUE1: "); print_reg_state(&te1, "\n");
+ printf("ACTUAL TRUE2: "); print_reg_state(&tr2, "\n");
+ printf("EXPECTED TRUE2: "); print_reg_state(&te2, "\n");
+
+ return failed ? -EINVAL : 0;
+ }
+
+ return 0;
+}
+
+/* Given setup ranges and number types, go over all supported operations,
+ * generating individual subtest for each allowed combination
+ */
+static int verify_case_opt(struct ctx *ctx, enum num_t init_t, enum num_t cond_t,
+ struct range x, struct range y, bool is_subtest)
+{
+ DEFINE_STRBUF(sb, 256);
+ int err;
+ struct subtest_case sub = {
+ .init_t = init_t,
+ .cond_t = cond_t,
+ .x = x,
+ .y = y,
+ };
+
+ sb->pos = 0; /* reset position in strbuf */
+ subtest_case_str(sb, &sub, false /* ignore op */);
+ if (is_subtest && !test__start_subtest(sb->buf))
+ return 0;
+
+ for (sub.op = first_op; sub.op <= last_op; sub.op++) {
+ sb->pos = 0; /* reset position in strbuf */
+ subtest_case_str(sb, &sub, true /* print op */);
+
+ if (env.verbosity >= VERBOSE_NORMAL) /* this speeds up debugging */
+ printf("TEST CASE: %s\n", sb->buf);
+
+ err = verify_case_op(init_t, cond_t, x, y, sub.op);
+ if (err || env.verbosity >= VERBOSE_NORMAL)
+ ASSERT_OK(err, sb->buf);
+ if (err) {
+ ctx->cur_failure_cnt++;
+ if (ctx->cur_failure_cnt > ctx->max_failure_cnt)
+ return err;
+ return 0; /* keep testing other cases */
+ }
+ ctx->case_cnt++;
+ if ((ctx->case_cnt % 10000) == 0) {
+ double progress = (ctx->case_cnt + 0.0) / ctx->total_case_cnt;
+ u64 elapsed_ns = get_time_ns() - ctx->start_ns;
+ double remain_ns = elapsed_ns / progress * (1 - progress);
+
+ fprintf(env.stderr, "PROGRESS (%s): %d/%d (%.2lf%%), "
+ "elapsed %llu mins (%.2lf hrs), "
+ "ETA %.0lf mins (%.2lf hrs)\n",
+ ctx->progress_ctx,
+ ctx->case_cnt, ctx->total_case_cnt, 100.0 * progress,
+ elapsed_ns / 1000000000 / 60,
+ elapsed_ns / 1000000000.0 / 3600,
+ remain_ns / 1000000000.0 / 60,
+ remain_ns / 1000000000.0 / 3600);
+ }
+ }
+
+ return 0;
+}
+
+static int verify_case(struct ctx *ctx, enum num_t init_t, enum num_t cond_t,
+ struct range x, struct range y)
+{
+ return verify_case_opt(ctx, init_t, cond_t, x, y, true /* is_subtest */);
+}
+
+/* ================================
+ * GENERATED CASES FROM SEED VALUES
+ * ================================
+ */
+static int u64_cmp(const void *p1, const void *p2)
+{
+ u64 x1 = *(const u64 *)p1, x2 = *(const u64 *)p2;
+
+ return x1 != x2 ? (x1 < x2 ? -1 : 1) : 0;
+}
+
+static int u32_cmp(const void *p1, const void *p2)
+{
+ u32 x1 = *(const u32 *)p1, x2 = *(const u32 *)p2;
+
+ return x1 != x2 ? (x1 < x2 ? -1 : 1) : 0;
+}
+
+static int s64_cmp(const void *p1, const void *p2)
+{
+ s64 x1 = *(const s64 *)p1, x2 = *(const s64 *)p2;
+
+ return x1 != x2 ? (x1 < x2 ? -1 : 1) : 0;
+}
+
+static int s32_cmp(const void *p1, const void *p2)
+{
+ s32 x1 = *(const s32 *)p1, x2 = *(const s32 *)p2;
+
+ return x1 != x2 ? (x1 < x2 ? -1 : 1) : 0;
+}
+
+/* Generate valid unique constants from seeds, both signed and unsigned */
+static void gen_vals(struct ctx *ctx)
+{
+ int i, j, cnt = 0;
+
+ for (i = 0; i < ARRAY_SIZE(upper_seeds); i++) {
+ for (j = 0; j < ARRAY_SIZE(lower_seeds); j++) {
+ ctx->uvals[cnt++] = (((u64)upper_seeds[i]) << 32) | lower_seeds[j];
+ }
+ }
+
+ /* sort and compact uvals (i.e., it's `sort | uniq`) */
+ qsort(ctx->uvals, cnt, sizeof(*ctx->uvals), u64_cmp);
+ for (i = 1, j = 0; i < cnt; i++) {
+ if (ctx->uvals[j] == ctx->uvals[i])
+ continue;
+ j++;
+ ctx->uvals[j] = ctx->uvals[i];
+ }
+ ctx->val_cnt = j + 1;
+
+ /* we have exactly the same number of s64 values, they are just in
+ * a different order than u64s, so just sort them differently
+ */
+ for (i = 0; i < ctx->val_cnt; i++)
+ ctx->svals[i] = ctx->uvals[i];
+ qsort(ctx->svals, ctx->val_cnt, sizeof(*ctx->svals), s64_cmp);
+
+ if (env.verbosity >= VERBOSE_SUPER) {
+ DEFINE_STRBUF(sb1, 256);
+ DEFINE_STRBUF(sb2, 256);
+
+ for (i = 0; i < ctx->val_cnt; i++) {
+ sb1->pos = sb2->pos = 0;
+ snprintf_num(U64, sb1, ctx->uvals[i]);
+ snprintf_num(S64, sb2, ctx->svals[i]);
+ printf("SEED #%d: u64=%-20s s64=%-20s\n", i, sb1->buf, sb2->buf);
+ }
+ }
+
+ /* 32-bit values are generated separately */
+ cnt = 0;
+ for (i = 0; i < ARRAY_SIZE(lower_seeds); i++) {
+ ctx->usubvals[cnt++] = lower_seeds[i];
+ }
+
+ /* sort and compact usubvals (i.e., it's `sort | uniq`) */
+ qsort(ctx->usubvals, cnt, sizeof(*ctx->usubvals), u32_cmp);
+ for (i = 1, j = 0; i < cnt; i++) {
+ if (ctx->usubvals[j] == ctx->usubvals[i])
+ continue;
+ j++;
+ ctx->usubvals[j] = ctx->usubvals[i];
+ }
+ ctx->subval_cnt = j + 1;
+
+ for (i = 0; i < ctx->subval_cnt; i++)
+ ctx->ssubvals[i] = ctx->usubvals[i];
+ qsort(ctx->ssubvals, ctx->subval_cnt, sizeof(*ctx->ssubvals), s32_cmp);
+
+ if (env.verbosity >= VERBOSE_SUPER) {
+ DEFINE_STRBUF(sb1, 256);
+ DEFINE_STRBUF(sb2, 256);
+
+ for (i = 0; i < ctx->subval_cnt; i++) {
+ sb1->pos = sb2->pos = 0;
+ snprintf_num(U32, sb1, ctx->usubvals[i]);
+ snprintf_num(S32, sb2, ctx->ssubvals[i]);
+ printf("SUBSEED #%d: u32=%-10s s32=%-10s\n", i, sb1->buf, sb2->buf);
+ }
+ }
+}
+
+/* Generate valid ranges from upper/lower seeds */
+static int gen_ranges(struct ctx *ctx)
+{
+ int i, j, cnt = 0;
+
+ for (i = 0; i < ctx->val_cnt; i++) {
+ for (j = i; j < ctx->val_cnt; j++) {
+ if (env.verbosity >= VERBOSE_SUPER) {
+ DEFINE_STRBUF(sb1, 256);
+ DEFINE_STRBUF(sb2, 256);
+
+ sb1->pos = sb2->pos = 0;
+ snprintf_range(U64, sb1, range(U64, ctx->uvals[i], ctx->uvals[j]));
+ snprintf_range(S64, sb2, range(S64, ctx->svals[i], ctx->svals[j]));
+ printf("RANGE #%d: u64=%-40s s64=%-40s\n", cnt, sb1->buf, sb2->buf);
+ }
+ cnt++;
+ }
+ }
+ ctx->range_cnt = cnt;
+
+ ctx->uranges = calloc(ctx->range_cnt, sizeof(*ctx->uranges));
+ if (!ASSERT_OK_PTR(ctx->uranges, "uranges_calloc"))
+ return -EINVAL;
+ ctx->sranges = calloc(ctx->range_cnt, sizeof(*ctx->sranges));
+ if (!ASSERT_OK_PTR(ctx->sranges, "sranges_calloc"))
+ return -EINVAL;
+
+ cnt = 0;
+ for (i = 0; i < ctx->val_cnt; i++) {
+ for (j = i; j < ctx->val_cnt; j++) {
+ ctx->uranges[cnt] = range(U64, ctx->uvals[i], ctx->uvals[j]);
+ ctx->sranges[cnt] = range(S64, ctx->svals[i], ctx->svals[j]);
+ cnt++;
+ }
+ }
+
+ cnt = 0;
+ for (i = 0; i < ctx->subval_cnt; i++) {
+ for (j = i; j < ctx->subval_cnt; j++) {
+ if (env.verbosity >= VERBOSE_SUPER) {
+ DEFINE_STRBUF(sb1, 256);
+ DEFINE_STRBUF(sb2, 256);
+
+ sb1->pos = sb2->pos = 0;
+ snprintf_range(U32, sb1, range(U32, ctx->usubvals[i], ctx->usubvals[j]));
+ snprintf_range(S32, sb2, range(S32, ctx->ssubvals[i], ctx->ssubvals[j]));
+ printf("SUBRANGE #%d: u32=%-20s s32=%-20s\n", cnt, sb1->buf, sb2->buf);
+ }
+ cnt++;
+ }
+ }
+ ctx->subrange_cnt = cnt;
+
+ ctx->usubranges = calloc(ctx->subrange_cnt, sizeof(*ctx->usubranges));
+ if (!ASSERT_OK_PTR(ctx->usubranges, "usubranges_calloc"))
+ return -EINVAL;
+ ctx->ssubranges = calloc(ctx->subrange_cnt, sizeof(*ctx->ssubranges));
+ if (!ASSERT_OK_PTR(ctx->ssubranges, "ssubranges_calloc"))
+ return -EINVAL;
+
+ cnt = 0;
+ for (i = 0; i < ctx->subval_cnt; i++) {
+ for (j = i; j < ctx->subval_cnt; j++) {
+ ctx->usubranges[cnt] = range(U32, ctx->usubvals[i], ctx->usubvals[j]);
+ ctx->ssubranges[cnt] = range(S32, ctx->ssubvals[i], ctx->ssubvals[j]);
+ cnt++;
+ }
+ }
+
+ return 0;
+}
+
+static int parse_env_vars(struct ctx *ctx)
+{
+ const char *s;
+
+ if ((s = getenv("REG_BOUNDS_MAX_FAILURE_CNT"))) {
+ errno = 0;
+ ctx->max_failure_cnt = strtol(s, NULL, 10);
+ if (errno || ctx->max_failure_cnt < 0) {
+ ASSERT_OK(-errno, "REG_BOUNDS_MAX_FAILURE_CNT");
+ return -EINVAL;
+ }
+ }
+
+ if ((s = getenv("REG_BOUNDS_RAND_CASE_CNT"))) {
+ errno = 0;
+ ctx->rand_case_cnt = strtol(s, NULL, 10);
+ if (errno || ctx->rand_case_cnt < 0) {
+ ASSERT_OK(-errno, "REG_BOUNDS_RAND_CASE_CNT");
+ return -EINVAL;
+ }
+ }
+
+ if ((s = getenv("REG_BOUNDS_RAND_SEED"))) {
+ errno = 0;
+ ctx->rand_seed = strtoul(s, NULL, 10);
+ if (errno) {
+ ASSERT_OK(-errno, "REG_BOUNDS_RAND_SEED");
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+static int prepare_gen_tests(struct ctx *ctx)
+{
+ const char *s;
+ int err;
+
+ if (!(s = getenv("SLOW_TESTS")) || strcmp(s, "1") != 0) {
+ test__skip();
+ return -ENOTSUP;
+ }
+
+ err = parse_env_vars(ctx);
+ if (err)
+ return err;
+
+ gen_vals(ctx);
+ err = gen_ranges(ctx);
+ if (err) {
+ ASSERT_OK(err, "gen_ranges");
+ return err;
+ }
+
+ return 0;
+}
+
+/* Go over generated constants and ranges and validate various supported
+ * combinations of them
+ */
+static void validate_gen_range_vs_const_64(enum num_t init_t, enum num_t cond_t)
+{
+ struct ctx ctx;
+ struct range rconst;
+ const struct range *ranges;
+ const u64 *vals;
+ int i, j;
+
+ memset(&ctx, 0, sizeof(ctx));
+
+ if (prepare_gen_tests(&ctx))
+ goto cleanup;
+
+ ranges = init_t == U64 ? ctx.uranges : ctx.sranges;
+ vals = init_t == U64 ? ctx.uvals : (const u64 *)ctx.svals;
+
+ ctx.total_case_cnt = (last_op - first_op + 1) * (2 * ctx.range_cnt * ctx.val_cnt);
+ ctx.start_ns = get_time_ns();
+ snprintf(ctx.progress_ctx, sizeof(ctx.progress_ctx),
+ "RANGE x CONST, %s -> %s",
+ t_str(init_t), t_str(cond_t));
+
+ for (i = 0; i < ctx.val_cnt; i++) {
+ for (j = 0; j < ctx.range_cnt; j++) {
+ rconst = range(init_t, vals[i], vals[i]);
+
+ /* (u64|s64)(<range> x <const>) */
+ if (verify_case(&ctx, init_t, cond_t, ranges[j], rconst))
+ goto cleanup;
+ /* (u64|s64)(<const> x <range>) */
+ if (verify_case(&ctx, init_t, cond_t, rconst, ranges[j]))
+ goto cleanup;
+ }
+ }
+
+cleanup:
+ cleanup_ctx(&ctx);
+}
+
+static void validate_gen_range_vs_const_32(enum num_t init_t, enum num_t cond_t)
+{
+ struct ctx ctx;
+ struct range rconst;
+ const struct range *ranges;
+ const u32 *vals;
+ int i, j;
+
+ memset(&ctx, 0, sizeof(ctx));
+
+ if (prepare_gen_tests(&ctx))
+ goto cleanup;
+
+ ranges = init_t == U32 ? ctx.usubranges : ctx.ssubranges;
+ vals = init_t == U32 ? ctx.usubvals : (const u32 *)ctx.ssubvals;
+
+ ctx.total_case_cnt = (last_op - first_op + 1) * (2 * ctx.subrange_cnt * ctx.subval_cnt);
+ ctx.start_ns = get_time_ns();
+ snprintf(ctx.progress_ctx, sizeof(ctx.progress_ctx),
+ "RANGE x CONST, %s -> %s",
+ t_str(init_t), t_str(cond_t));
+
+ for (i = 0; i < ctx.subval_cnt; i++) {
+ for (j = 0; j < ctx.subrange_cnt; j++) {
+ rconst = range(init_t, vals[i], vals[i]);
+
+ /* (u32|s32)(<range> x <const>) */
+ if (verify_case(&ctx, init_t, cond_t, ranges[j], rconst))
+ goto cleanup;
+ /* (u32|s32)(<const> x <range>) */
+ if (verify_case(&ctx, init_t, cond_t, rconst, ranges[j]))
+ goto cleanup;
+ }
+ }
+
+cleanup:
+ cleanup_ctx(&ctx);
+}
+
+static void validate_gen_range_vs_range(enum num_t init_t, enum num_t cond_t)
+{
+ struct ctx ctx;
+ const struct range *ranges;
+ int i, j, rcnt;
+
+ memset(&ctx, 0, sizeof(ctx));
+
+ if (prepare_gen_tests(&ctx))
+ goto cleanup;
+
+ switch (init_t)
+ {
+ case U64:
+ ranges = ctx.uranges;
+ rcnt = ctx.range_cnt;
+ break;
+ case U32:
+ ranges = ctx.usubranges;
+ rcnt = ctx.subrange_cnt;
+ break;
+ case S64:
+ ranges = ctx.sranges;
+ rcnt = ctx.range_cnt;
+ break;
+ case S32:
+ ranges = ctx.ssubranges;
+ rcnt = ctx.subrange_cnt;
+ break;
+ default:
+ printf("validate_gen_range_vs_range!\n");
+ exit(1);
+ }
+
+ ctx.total_case_cnt = (last_op - first_op + 1) * (2 * rcnt * (rcnt + 1) / 2);
+ ctx.start_ns = get_time_ns();
+ snprintf(ctx.progress_ctx, sizeof(ctx.progress_ctx),
+ "RANGE x RANGE, %s -> %s",
+ t_str(init_t), t_str(cond_t));
+
+ for (i = 0; i < rcnt; i++) {
+ for (j = i; j < rcnt; j++) {
+ /* (<range> x <range>) */
+ if (verify_case(&ctx, init_t, cond_t, ranges[i], ranges[j]))
+ goto cleanup;
+ if (verify_case(&ctx, init_t, cond_t, ranges[j], ranges[i]))
+ goto cleanup;
+ }
+ }
+
+cleanup:
+ cleanup_ctx(&ctx);
+}
+
+/* Go over thousands of test cases generated from initial seed values.
+ * Given this take a long time, guard this begind SLOW_TESTS=1 envvar. If
+ * envvar is not set, this test is skipped during test_progs testing.
+ *
+ * We split this up into smaller subsets based on initialization and
+ * conditiona numeric domains to get an easy parallelization with test_progs'
+ * -j argument.
+ */
+
+/* RANGE x CONST, U64 initial range */
+void test_reg_bounds_gen_consts_u64_u64(void) { validate_gen_range_vs_const_64(U64, U64); }
+void test_reg_bounds_gen_consts_u64_s64(void) { validate_gen_range_vs_const_64(U64, S64); }
+void test_reg_bounds_gen_consts_u64_u32(void) { validate_gen_range_vs_const_64(U64, U32); }
+void test_reg_bounds_gen_consts_u64_s32(void) { validate_gen_range_vs_const_64(U64, S32); }
+/* RANGE x CONST, S64 initial range */
+void test_reg_bounds_gen_consts_s64_u64(void) { validate_gen_range_vs_const_64(S64, U64); }
+void test_reg_bounds_gen_consts_s64_s64(void) { validate_gen_range_vs_const_64(S64, S64); }
+void test_reg_bounds_gen_consts_s64_u32(void) { validate_gen_range_vs_const_64(S64, U32); }
+void test_reg_bounds_gen_consts_s64_s32(void) { validate_gen_range_vs_const_64(S64, S32); }
+/* RANGE x CONST, U32 initial range */
+void test_reg_bounds_gen_consts_u32_u64(void) { validate_gen_range_vs_const_32(U32, U64); }
+void test_reg_bounds_gen_consts_u32_s64(void) { validate_gen_range_vs_const_32(U32, S64); }
+void test_reg_bounds_gen_consts_u32_u32(void) { validate_gen_range_vs_const_32(U32, U32); }
+void test_reg_bounds_gen_consts_u32_s32(void) { validate_gen_range_vs_const_32(U32, S32); }
+/* RANGE x CONST, S32 initial range */
+void test_reg_bounds_gen_consts_s32_u64(void) { validate_gen_range_vs_const_32(S32, U64); }
+void test_reg_bounds_gen_consts_s32_s64(void) { validate_gen_range_vs_const_32(S32, S64); }
+void test_reg_bounds_gen_consts_s32_u32(void) { validate_gen_range_vs_const_32(S32, U32); }
+void test_reg_bounds_gen_consts_s32_s32(void) { validate_gen_range_vs_const_32(S32, S32); }
+
+/* RANGE x RANGE, U64 initial range */
+void test_reg_bounds_gen_ranges_u64_u64(void) { validate_gen_range_vs_range(U64, U64); }
+void test_reg_bounds_gen_ranges_u64_s64(void) { validate_gen_range_vs_range(U64, S64); }
+void test_reg_bounds_gen_ranges_u64_u32(void) { validate_gen_range_vs_range(U64, U32); }
+void test_reg_bounds_gen_ranges_u64_s32(void) { validate_gen_range_vs_range(U64, S32); }
+/* RANGE x RANGE, S64 initial range */
+void test_reg_bounds_gen_ranges_s64_u64(void) { validate_gen_range_vs_range(S64, U64); }
+void test_reg_bounds_gen_ranges_s64_s64(void) { validate_gen_range_vs_range(S64, S64); }
+void test_reg_bounds_gen_ranges_s64_u32(void) { validate_gen_range_vs_range(S64, U32); }
+void test_reg_bounds_gen_ranges_s64_s32(void) { validate_gen_range_vs_range(S64, S32); }
+/* RANGE x RANGE, U32 initial range */
+void test_reg_bounds_gen_ranges_u32_u64(void) { validate_gen_range_vs_range(U32, U64); }
+void test_reg_bounds_gen_ranges_u32_s64(void) { validate_gen_range_vs_range(U32, S64); }
+void test_reg_bounds_gen_ranges_u32_u32(void) { validate_gen_range_vs_range(U32, U32); }
+void test_reg_bounds_gen_ranges_u32_s32(void) { validate_gen_range_vs_range(U32, S32); }
+/* RANGE x RANGE, S32 initial range */
+void test_reg_bounds_gen_ranges_s32_u64(void) { validate_gen_range_vs_range(S32, U64); }
+void test_reg_bounds_gen_ranges_s32_s64(void) { validate_gen_range_vs_range(S32, S64); }
+void test_reg_bounds_gen_ranges_s32_u32(void) { validate_gen_range_vs_range(S32, U32); }
+void test_reg_bounds_gen_ranges_s32_s32(void) { validate_gen_range_vs_range(S32, S32); }
+
+#define DEFAULT_RAND_CASE_CNT 100
+
+#define RAND_21BIT_MASK ((1 << 22) - 1)
+
+static u64 rand_u64()
+{
+ /* RAND_MAX is guaranteed to be at least 1<<15, but in practice it
+ * seems to be 1<<31, so we need to call it thrice to get full u64;
+ * we'll use rougly equal split: 22 + 21 + 21 bits
+ */
+ return ((u64)random() << 42) |
+ (((u64)random() & RAND_21BIT_MASK) << 21) |
+ (random() & RAND_21BIT_MASK);
+}
+
+static u64 rand_const(enum num_t t)
+{
+ return cast_t(t, rand_u64());
+}
+
+static struct range rand_range(enum num_t t)
+{
+ u64 x = rand_const(t), y = rand_const(t);
+
+ return range(t, min_t(t, x, y), max_t(t, x, y));
+}
+
+static void validate_rand_ranges(enum num_t init_t, enum num_t cond_t, bool const_range)
+{
+ struct ctx ctx;
+ struct range range1, range2;
+ int err, i;
+ u64 t;
+
+ memset(&ctx, 0, sizeof(ctx));
+
+ err = parse_env_vars(&ctx);
+ if (err) {
+ ASSERT_OK(err, "parse_env_vars");
+ return;
+ }
+
+ if (ctx.rand_case_cnt == 0)
+ ctx.rand_case_cnt = DEFAULT_RAND_CASE_CNT;
+ if (ctx.rand_seed == 0)
+ ctx.rand_seed = (unsigned)get_time_ns();
+
+ srandom(ctx.rand_seed);
+
+ ctx.total_case_cnt = (last_op - first_op + 1) * (2 * ctx.rand_case_cnt);
+ ctx.start_ns = get_time_ns();
+ snprintf(ctx.progress_ctx, sizeof(ctx.progress_ctx),
+ "[RANDOM SEED %u] RANGE x %s, %s -> %s",
+ ctx.rand_seed, const_range ? "CONST" : "RANGE",
+ t_str(init_t), t_str(cond_t));
+
+ for (i = 0; i < ctx.rand_case_cnt; i++) {
+ range1 = rand_range(init_t);
+ if (const_range) {
+ t = rand_const(init_t);
+ range2 = range(init_t, t, t);
+ } else {
+ range2 = rand_range(init_t);
+ }
+
+ /* <range1> x <range2> */
+ if (verify_case_opt(&ctx, init_t, cond_t, range1, range2, false /* !is_subtest */))
+ goto cleanup;
+ /* <range2> x <range1> */
+ if (verify_case_opt(&ctx, init_t, cond_t, range2, range1, false /* !is_subtest */))
+ goto cleanup;
+ }
+
+cleanup:
+ /* make sure we report random seed for reproducing */
+ ASSERT_TRUE(true, ctx.progress_ctx);
+ cleanup_ctx(&ctx);
+}
+
+/* [RANDOM] RANGE x CONST, U64 initial range */
+void test_reg_bounds_rand_consts_u64_u64(void) { validate_rand_ranges(U64, U64, true /* const */); }
+void test_reg_bounds_rand_consts_u64_s64(void) { validate_rand_ranges(U64, S64, true /* const */); }
+void test_reg_bounds_rand_consts_u64_u32(void) { validate_rand_ranges(U64, U32, true /* const */); }
+void test_reg_bounds_rand_consts_u64_s32(void) { validate_rand_ranges(U64, S32, true /* const */); }
+/* [RANDOM] RANGE x CONST, S64 initial range */
+void test_reg_bounds_rand_consts_s64_u64(void) { validate_rand_ranges(S64, U64, true /* const */); }
+void test_reg_bounds_rand_consts_s64_s64(void) { validate_rand_ranges(S64, S64, true /* const */); }
+void test_reg_bounds_rand_consts_s64_u32(void) { validate_rand_ranges(S64, U32, true /* const */); }
+void test_reg_bounds_rand_consts_s64_s32(void) { validate_rand_ranges(S64, S32, true /* const */); }
+/* [RANDOM] RANGE x CONST, U32 initial range */
+void test_reg_bounds_rand_consts_u32_u64(void) { validate_rand_ranges(U32, U64, true /* const */); }
+void test_reg_bounds_rand_consts_u32_s64(void) { validate_rand_ranges(U32, S64, true /* const */); }
+void test_reg_bounds_rand_consts_u32_u32(void) { validate_rand_ranges(U32, U32, true /* const */); }
+void test_reg_bounds_rand_consts_u32_s32(void) { validate_rand_ranges(U32, S32, true /* const */); }
+/* [RANDOM] RANGE x CONST, S32 initial range */
+void test_reg_bounds_rand_consts_s32_u64(void) { validate_rand_ranges(S32, U64, true /* const */); }
+void test_reg_bounds_rand_consts_s32_s64(void) { validate_rand_ranges(S32, S64, true /* const */); }
+void test_reg_bounds_rand_consts_s32_u32(void) { validate_rand_ranges(S32, U32, true /* const */); }
+void test_reg_bounds_rand_consts_s32_s32(void) { validate_rand_ranges(S32, S32, true /* const */); }
+
+/* [RANDOM] RANGE x RANGE, U64 initial range */
+void test_reg_bounds_rand_ranges_u64_u64(void) { validate_rand_ranges(U64, U64, false /* range */); }
+void test_reg_bounds_rand_ranges_u64_s64(void) { validate_rand_ranges(U64, S64, false /* range */); }
+void test_reg_bounds_rand_ranges_u64_u32(void) { validate_rand_ranges(U64, U32, false /* range */); }
+void test_reg_bounds_rand_ranges_u64_s32(void) { validate_rand_ranges(U64, S32, false /* range */); }
+/* [RANDOM] RANGE x RANGE, S64 initial range */
+void test_reg_bounds_rand_ranges_s64_u64(void) { validate_rand_ranges(S64, U64, false /* range */); }
+void test_reg_bounds_rand_ranges_s64_s64(void) { validate_rand_ranges(S64, S64, false /* range */); }
+void test_reg_bounds_rand_ranges_s64_u32(void) { validate_rand_ranges(S64, U32, false /* range */); }
+void test_reg_bounds_rand_ranges_s64_s32(void) { validate_rand_ranges(S64, S32, false /* range */); }
+/* [RANDOM] RANGE x RANGE, U32 initial range */
+void test_reg_bounds_rand_ranges_u32_u64(void) { validate_rand_ranges(U32, U64, false /* range */); }
+void test_reg_bounds_rand_ranges_u32_s64(void) { validate_rand_ranges(U32, S64, false /* range */); }
+void test_reg_bounds_rand_ranges_u32_u32(void) { validate_rand_ranges(U32, U32, false /* range */); }
+void test_reg_bounds_rand_ranges_u32_s32(void) { validate_rand_ranges(U32, S32, false /* range */); }
+/* [RANDOM] RANGE x RANGE, S32 initial range */
+void test_reg_bounds_rand_ranges_s32_u64(void) { validate_rand_ranges(S32, U64, false /* range */); }
+void test_reg_bounds_rand_ranges_s32_s64(void) { validate_rand_ranges(S32, S64, false /* range */); }
+void test_reg_bounds_rand_ranges_s32_u32(void) { validate_rand_ranges(S32, U32, false /* range */); }
+void test_reg_bounds_rand_ranges_s32_s32(void) { validate_rand_ranges(S32, S32, false /* range */); }
+
+/* A set of hard-coded "interesting" cases to validate as part of normal
+ * test_progs test runs
+ */
+static struct subtest_case crafted_cases[] = {
+ {U64, U64, {0, 0xffffffff}, {0, 0}},
+ {U64, U64, {0, 0x80000000}, {0, 0}},
+ {U64, U64, {0x100000000ULL, 0x100000100ULL}, {0, 0}},
+ {U64, U64, {0x100000000ULL, 0x180000000ULL}, {0, 0}},
+ {U64, U64, {0x100000000ULL, 0x1ffffff00ULL}, {0, 0}},
+ {U64, U64, {0x100000000ULL, 0x1ffffff01ULL}, {0, 0}},
+ {U64, U64, {0x100000000ULL, 0x1fffffffeULL}, {0, 0}},
+ {U64, U64, {0x100000001ULL, 0x1000000ffULL}, {0, 0}},
+
+ /* single point overlap, interesting BPF_EQ and BPF_NE interactions */
+ {U64, U64, {0, 1}, {1, 0x80000000}},
+ {U64, S64, {0, 1}, {1, 0x80000000}},
+ {U64, U32, {0, 1}, {1, 0x80000000}},
+ {U64, S32, {0, 1}, {1, 0x80000000}},
+
+ {U64, S64, {0, 0xffffffff00000000ULL}, {0, 0}},
+ {U64, S64, {0x7fffffffffffffffULL, 0xffffffff00000000ULL}, {0, 0}},
+ {U64, S64, {0x7fffffff00000001ULL, 0xffffffff00000000ULL}, {0, 0}},
+ {U64, S64, {0, 0xffffffffULL}, {1, 1}},
+ {U64, S64, {0, 0xffffffffULL}, {0x7fffffff, 0x7fffffff}},
+
+ {U64, U32, {0, 0x100000000}, {0, 0}},
+ {U64, U32, {0xfffffffe, 0x100000000}, {0x80000000, 0x80000000}},
+
+ {U64, S32, {0, 0xffffffff00000000ULL}, {0, 0}},
+ /* these are tricky cases where lower 32 bits allow to tighten 64
+ * bit boundaries based on tightened lower 32 bit boundaries
+ */
+ {U64, S32, {0, 0x0ffffffffULL}, {0, 0}},
+ {U64, S32, {0, 0x100000000ULL}, {0, 0}},
+ {U64, S32, {0, 0x100000001ULL}, {0, 0}},
+ {U64, S32, {0, 0x180000000ULL}, {0, 0}},
+ {U64, S32, {0, 0x17fffffffULL}, {0, 0}},
+ {U64, S32, {0, 0x180000001ULL}, {0, 0}},
+
+ /* verifier knows about [-1, 0] range for s32 for this case already */
+ {S64, S64, {0xffffffffffffffffULL, 0}, {0xffffffff00000000ULL, 0xffffffff00000000ULL}},
+ /* but didn't know about these cases initially */
+ {U64, U64, {0xffffffff, 0x100000000ULL}, {0, 0}}, /* s32: [-1, 0] */
+ {U64, U64, {0xffffffff, 0x100000001ULL}, {0, 0}}, /* s32: [-1, 1] */
+
+ /* longer convergence case: learning from u64 -> s64 -> u64 -> u32,
+ * arriving at u32: [1, U32_MAX] (instead of more pessimistic [0, U32_MAX])
+ */
+ {S64, U64, {0xffffffff00000001ULL, 0}, {0xffffffff00000000ULL, 0xffffffff00000000ULL}},
+
+ {U32, U32, {1, U32_MAX}, {0, 0}},
+
+ {U32, S32, {0, U32_MAX}, {U32_MAX, U32_MAX}},
+
+ {S32, U64, {(u32)(s32)S32_MIN, (u32)(s32)S32_MIN}, {(u32)(s32)-255, 0}},
+ {S32, S64, {(u32)(s32)S32_MIN, (u32)(s32)-255}, {(u32)(s32)-2, 0}},
+ {S32, S64, {0, 1}, {(u32)(s32)S32_MIN, (u32)(s32)S32_MIN}},
+ {S32, U32, {(u32)(s32)S32_MIN, (u32)(s32)S32_MIN}, {(u32)(s32)S32_MIN, (u32)(s32)S32_MIN}},
+};
+
+/* Go over crafted hard-coded cases. This is fast, so we do it as part of
+ * normal test_progs run.
+ */
+void test_reg_bounds_crafted(void)
+{
+ struct ctx ctx;
+ int i;
+
+ memset(&ctx, 0, sizeof(ctx));
+
+ for (i = 0; i < ARRAY_SIZE(crafted_cases); i++) {
+ struct subtest_case *c = &crafted_cases[i];
+
+ verify_case(&ctx, c->init_t, c->cond_t, c->x, c->y);
+ verify_case(&ctx, c->init_t, c->cond_t, c->y, c->x);
+ }
+
+ cleanup_ctx(&ctx);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/spin_lock.c b/tools/testing/selftests/bpf/prog_tests/spin_lock.c
index f29c08d..18d451b 100644
--- a/tools/testing/selftests/bpf/prog_tests/spin_lock.c
+++ b/tools/testing/selftests/bpf/prog_tests/spin_lock.c
@@ -13,22 +13,22 @@ static struct {
const char *err_msg;
} spin_lock_fail_tests[] = {
{ "lock_id_kptr_preserve",
- "5: (bf) r1 = r0 ; R0_w=ptr_foo(id=2,ref_obj_id=2,off=0,imm=0) "
- "R1_w=ptr_foo(id=2,ref_obj_id=2,off=0,imm=0) refs=2\n6: (85) call bpf_this_cpu_ptr#154\n"
+ "5: (bf) r1 = r0 ; R0_w=ptr_foo(id=2,ref_obj_id=2) "
+ "R1_w=ptr_foo(id=2,ref_obj_id=2) refs=2\n6: (85) call bpf_this_cpu_ptr#154\n"
"R1 type=ptr_ expected=percpu_ptr_" },
{ "lock_id_global_zero",
- "; R1_w=map_value(off=0,ks=4,vs=4,imm=0)\n2: (85) call bpf_this_cpu_ptr#154\n"
+ "; R1_w=map_value(map=.data.A,ks=4,vs=4)\n2: (85) call bpf_this_cpu_ptr#154\n"
"R1 type=map_value expected=percpu_ptr_" },
{ "lock_id_mapval_preserve",
"[0-9]\\+: (bf) r1 = r0 ;"
- " R0_w=map_value(id=1,off=0,ks=4,vs=8,imm=0)"
- " R1_w=map_value(id=1,off=0,ks=4,vs=8,imm=0)\n"
+ " R0_w=map_value(id=1,map=array_map,ks=4,vs=8)"
+ " R1_w=map_value(id=1,map=array_map,ks=4,vs=8)\n"
"[0-9]\\+: (85) call bpf_this_cpu_ptr#154\n"
"R1 type=map_value expected=percpu_ptr_" },
{ "lock_id_innermapval_preserve",
"[0-9]\\+: (bf) r1 = r0 ;"
- " R0=map_value(id=2,off=0,ks=4,vs=8,imm=0)"
- " R1_w=map_value(id=2,off=0,ks=4,vs=8,imm=0)\n"
+ " R0=map_value(id=2,ks=4,vs=8)"
+ " R1_w=map_value(id=2,ks=4,vs=8)\n"
"[0-9]\\+: (85) call bpf_this_cpu_ptr#154\n"
"R1 type=map_value expected=percpu_ptr_" },
{ "lock_id_mismatch_kptr_kptr", "bpf_spin_unlock of different lock" },
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_opts.c b/tools/testing/selftests/bpf/prog_tests/tc_opts.c
index 51883cc..196abf2 100644
--- a/tools/testing/selftests/bpf/prog_tests/tc_opts.c
+++ b/tools/testing/selftests/bpf/prog_tests/tc_opts.c
@@ -2387,12 +2387,9 @@ static int generate_dummy_prog(void)
const size_t prog_insn_cnt = sizeof(prog_insns) / sizeof(struct bpf_insn);
LIBBPF_OPTS(bpf_prog_load_opts, opts);
const size_t log_buf_sz = 256;
- char *log_buf;
+ char log_buf[log_buf_sz];
int fd = -1;
- log_buf = malloc(log_buf_sz);
- if (!ASSERT_OK_PTR(log_buf, "log_buf_alloc"))
- return fd;
opts.log_buf = log_buf;
opts.log_size = log_buf_sz;
@@ -2402,7 +2399,6 @@ static int generate_dummy_prog(void)
prog_insns, prog_insn_cnt, &opts);
ASSERT_STREQ(log_buf, "", "log_0");
ASSERT_GE(fd, 0, "prog_fd");
- free(log_buf);
return fd;
}
diff --git a/tools/testing/selftests/bpf/prog_tests/vmlinux.c b/tools/testing/selftests/bpf/prog_tests/vmlinux.c
index 72310cf..6fb2217 100644
--- a/tools/testing/selftests/bpf/prog_tests/vmlinux.c
+++ b/tools/testing/selftests/bpf/prog_tests/vmlinux.c
@@ -16,27 +16,27 @@ static void nsleep()
void test_vmlinux(void)
{
- int duration = 0, err;
+ int err;
struct test_vmlinux* skel;
struct test_vmlinux__bss *bss;
skel = test_vmlinux__open_and_load();
- if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
+ if (!ASSERT_OK_PTR(skel, "test_vmlinux__open_and_load"))
return;
bss = skel->bss;
err = test_vmlinux__attach(skel);
- if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err))
+ if (!ASSERT_OK(err, "test_vmlinux__attach"))
goto cleanup;
/* trigger everything */
nsleep();
- CHECK(!bss->tp_called, "tp", "not called\n");
- CHECK(!bss->raw_tp_called, "raw_tp", "not called\n");
- CHECK(!bss->tp_btf_called, "tp_btf", "not called\n");
- CHECK(!bss->kprobe_called, "kprobe", "not called\n");
- CHECK(!bss->fentry_called, "fentry", "not called\n");
+ ASSERT_TRUE(bss->tp_called, "tp");
+ ASSERT_TRUE(bss->raw_tp_called, "raw_tp");
+ ASSERT_TRUE(bss->tp_btf_called, "tp_btf");
+ ASSERT_TRUE(bss->kprobe_called, "kprobe");
+ ASSERT_TRUE(bss->fentry_called, "fentry");
cleanup:
test_vmlinux__destroy(skel);
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c
index f2b8167..442f4ca 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c
@@ -35,6 +35,8 @@ int dump_task_stack(struct bpf_iter__task *ctx)
return 0;
}
+int num_user_stacks = 0;
+
SEC("iter/task")
int get_task_user_stacks(struct bpf_iter__task *ctx)
{
@@ -51,6 +53,9 @@ int get_task_user_stacks(struct bpf_iter__task *ctx)
if (res <= 0)
return 0;
+ /* Only one task, the current one, should succeed */
+ ++num_user_stacks;
+
buf_sz += res;
/* If the verifier doesn't refine bpf_get_task_stack res, and instead
diff --git a/tools/testing/selftests/bpf/progs/exceptions_assert.c b/tools/testing/selftests/bpf/progs/exceptions_assert.c
index e1e5c54..49efaed 100644
--- a/tools/testing/selftests/bpf/progs/exceptions_assert.c
+++ b/tools/testing/selftests/bpf/progs/exceptions_assert.c
@@ -18,48 +18,48 @@
return *(u64 *)num; \
}
-__msg(": R0_w=-2147483648 R10=fp0")
+__msg(": R0_w=0xffffffff80000000 R10=fp0")
check_assert(s64, eq, int_min, INT_MIN);
-__msg(": R0_w=2147483647 R10=fp0")
+__msg(": R0_w=0x7fffffff R10=fp0")
check_assert(s64, eq, int_max, INT_MAX);
__msg(": R0_w=0 R10=fp0")
check_assert(s64, eq, zero, 0);
-__msg(": R0_w=-9223372036854775808 R1_w=-9223372036854775808 R10=fp0")
+__msg(": R0_w=0x8000000000000000 R1_w=0x8000000000000000 R10=fp0")
check_assert(s64, eq, llong_min, LLONG_MIN);
-__msg(": R0_w=9223372036854775807 R1_w=9223372036854775807 R10=fp0")
+__msg(": R0_w=0x7fffffffffffffff R1_w=0x7fffffffffffffff R10=fp0")
check_assert(s64, eq, llong_max, LLONG_MAX);
-__msg(": R0_w=scalar(smax=2147483646) R10=fp0")
+__msg(": R0_w=scalar(smax=0x7ffffffe) R10=fp0")
check_assert(s64, lt, pos, INT_MAX);
-__msg(": R0_w=scalar(smax=-1,umin=9223372036854775808,var_off=(0x8000000000000000; 0x7fffffffffffffff))")
+__msg(": R0_w=scalar(smax=-1,umin=0x8000000000000000,var_off=(0x8000000000000000; 0x7fffffffffffffff))")
check_assert(s64, lt, zero, 0);
-__msg(": R0_w=scalar(smax=-2147483649,umin=9223372036854775808,umax=18446744071562067967,var_off=(0x8000000000000000; 0x7fffffffffffffff))")
+__msg(": R0_w=scalar(smax=0xffffffff7fffffff,umin=0x8000000000000000,umax=0xffffffff7fffffff,var_off=(0x8000000000000000; 0x7fffffffffffffff))")
check_assert(s64, lt, neg, INT_MIN);
-__msg(": R0_w=scalar(smax=2147483647) R10=fp0")
+__msg(": R0_w=scalar(smax=0x7fffffff) R10=fp0")
check_assert(s64, le, pos, INT_MAX);
__msg(": R0_w=scalar(smax=0) R10=fp0")
check_assert(s64, le, zero, 0);
-__msg(": R0_w=scalar(smax=-2147483648,umin=9223372036854775808,umax=18446744071562067968,var_off=(0x8000000000000000; 0x7fffffffffffffff))")
+__msg(": R0_w=scalar(smax=0xffffffff80000000,umin=0x8000000000000000,umax=0xffffffff80000000,var_off=(0x8000000000000000; 0x7fffffffffffffff))")
check_assert(s64, le, neg, INT_MIN);
-__msg(": R0_w=scalar(smin=umin=2147483648,umax=9223372036854775807,var_off=(0x0; 0x7fffffffffffffff))")
+__msg(": R0_w=scalar(smin=umin=0x80000000,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))")
check_assert(s64, gt, pos, INT_MAX);
-__msg(": R0_w=scalar(smin=umin=1,umax=9223372036854775807,var_off=(0x0; 0x7fffffffffffffff))")
+__msg(": R0_w=scalar(smin=umin=1,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))")
check_assert(s64, gt, zero, 0);
-__msg(": R0_w=scalar(smin=-2147483647) R10=fp0")
+__msg(": R0_w=scalar(smin=0xffffffff80000001) R10=fp0")
check_assert(s64, gt, neg, INT_MIN);
-__msg(": R0_w=scalar(smin=umin=2147483647,umax=9223372036854775807,var_off=(0x0; 0x7fffffffffffffff))")
+__msg(": R0_w=scalar(smin=umin=0x7fffffff,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))")
check_assert(s64, ge, pos, INT_MAX);
-__msg(": R0_w=scalar(smin=0,umax=9223372036854775807,var_off=(0x0; 0x7fffffffffffffff)) R10=fp0")
+__msg(": R0_w=scalar(smin=0,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff)) R10=fp0")
check_assert(s64, ge, zero, 0);
-__msg(": R0_w=scalar(smin=-2147483648) R10=fp0")
+__msg(": R0_w=scalar(smin=0xffffffff80000000) R10=fp0")
check_assert(s64, ge, neg, INT_MIN);
SEC("?tc")
__log_level(2) __failure
-__msg(": R0=0 R1=ctx(off=0,imm=0) R2=scalar(smin=smin32=-2147483646,smax=smax32=2147483645) R10=fp0")
+__msg(": R0=0 R1=ctx() R2=scalar(smin=0xffffffff80000002,smax=smax32=0x7ffffffd,smin32=0x80000002) R10=fp0")
int check_assert_range_s64(struct __sk_buff *ctx)
{
struct bpf_sock *sk = ctx->sk;
@@ -75,7 +75,7 @@ int check_assert_range_s64(struct __sk_buff *ctx)
SEC("?tc")
__log_level(2) __failure
-__msg(": R1=ctx(off=0,imm=0) R2=scalar(smin=umin=smin32=umin32=4096,smax=umax=smax32=umax32=8192,var_off=(0x0; 0x3fff))")
+__msg(": R1=ctx() R2=scalar(smin=umin=smin32=umin32=4096,smax=umax=smax32=umax32=8192,var_off=(0x0; 0x3fff))")
int check_assert_range_u64(struct __sk_buff *ctx)
{
u64 num = ctx->len;
@@ -86,7 +86,7 @@ int check_assert_range_u64(struct __sk_buff *ctx)
SEC("?tc")
__log_level(2) __failure
-__msg(": R0=0 R1=ctx(off=0,imm=0) R2=4096 R10=fp0")
+__msg(": R0=0 R1=ctx() R2=4096 R10=fp0")
int check_assert_single_range_s64(struct __sk_buff *ctx)
{
struct bpf_sock *sk = ctx->sk;
@@ -103,7 +103,7 @@ int check_assert_single_range_s64(struct __sk_buff *ctx)
SEC("?tc")
__log_level(2) __failure
-__msg(": R1=ctx(off=0,imm=0) R2=4096 R10=fp0")
+__msg(": R1=ctx() R2=4096 R10=fp0")
int check_assert_single_range_u64(struct __sk_buff *ctx)
{
u64 num = ctx->len;
@@ -114,7 +114,7 @@ int check_assert_single_range_u64(struct __sk_buff *ctx)
SEC("?tc")
__log_level(2) __failure
-__msg(": R1=pkt(off=64,r=64,imm=0) R2=pkt_end(off=0,imm=0) R6=pkt(off=0,r=64,imm=0) R10=fp0")
+__msg(": R1=pkt(off=64,r=64) R2=pkt_end() R6=pkt(r=64) R10=fp0")
int check_assert_generic(struct __sk_buff *ctx)
{
u8 *data_end = (void *)(long)ctx->data_end;
diff --git a/tools/testing/selftests/bpf/progs/iters.c b/tools/testing/selftests/bpf/progs/iters.c
index c20c4e3..b2181f8 100644
--- a/tools/testing/selftests/bpf/progs/iters.c
+++ b/tools/testing/selftests/bpf/progs/iters.c
@@ -1411,4 +1411,26 @@ __naked int checkpoint_states_deletion(void)
);
}
+struct {
+ int data[32];
+ int n;
+} loop_data;
+
+SEC("raw_tp")
+__success
+int iter_arr_with_actual_elem_count(const void *ctx)
+{
+ int i, n = loop_data.n, sum = 0;
+
+ if (n > ARRAY_SIZE(loop_data.data))
+ return 0;
+
+ bpf_for(i, 0, n) {
+ /* no rechecking of i against ARRAY_SIZE(loop_data.n) */
+ sum += loop_data.data[i];
+ }
+
+ return sum;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/local_kptr_stash.c b/tools/testing/selftests/bpf/progs/local_kptr_stash.c
index b567a66..1769fdf 100644
--- a/tools/testing/selftests/bpf/progs/local_kptr_stash.c
+++ b/tools/testing/selftests/bpf/progs/local_kptr_stash.c
@@ -14,6 +14,24 @@ struct node_data {
struct bpf_rb_node node;
};
+struct refcounted_node {
+ long data;
+ struct bpf_rb_node rb_node;
+ struct bpf_refcount refcount;
+};
+
+struct stash {
+ struct bpf_spin_lock l;
+ struct refcounted_node __kptr *stashed;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, int);
+ __type(value, struct stash);
+ __uint(max_entries, 10);
+} refcounted_node_stash SEC(".maps");
+
struct plain_local {
long key;
long data;
@@ -38,6 +56,7 @@ struct map_value {
* Had to do the same w/ bpf_kfunc_call_test_release below
*/
struct node_data *just_here_because_btf_bug;
+struct refcounted_node *just_here_because_btf_bug2;
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
@@ -132,4 +151,56 @@ long stash_test_ref_kfunc(void *ctx)
return 0;
}
+SEC("tc")
+long refcount_acquire_without_unstash(void *ctx)
+{
+ struct refcounted_node *p;
+ struct stash *s;
+ int ret = 0;
+
+ s = bpf_map_lookup_elem(&refcounted_node_stash, &ret);
+ if (!s)
+ return 1;
+
+ if (!s->stashed)
+ /* refcount_acquire failure is expected when no refcounted_node
+ * has been stashed before this program executes
+ */
+ return 2;
+
+ p = bpf_refcount_acquire(s->stashed);
+ if (!p)
+ return 3;
+
+ ret = s->stashed ? s->stashed->data : -1;
+ bpf_obj_drop(p);
+ return ret;
+}
+
+/* Helper for refcount_acquire_without_unstash test */
+SEC("tc")
+long stash_refcounted_node(void *ctx)
+{
+ struct refcounted_node *p;
+ struct stash *s;
+ int key = 0;
+
+ s = bpf_map_lookup_elem(&refcounted_node_stash, &key);
+ if (!s)
+ return 1;
+
+ p = bpf_obj_new(typeof(*p));
+ if (!p)
+ return 2;
+ p->data = 42;
+
+ p = bpf_kptr_xchg(&s->stashed, p);
+ if (p) {
+ bpf_obj_drop(p);
+ return 3;
+ }
+
+ return 0;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/pyperf180.c b/tools/testing/selftests/bpf/progs/pyperf180.c
index c39f559..42c4a8b 100644
--- a/tools/testing/selftests/bpf/progs/pyperf180.c
+++ b/tools/testing/selftests/bpf/progs/pyperf180.c
@@ -1,4 +1,26 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2019 Facebook
#define STACK_MAX_LEN 180
+
+/* llvm upstream commit at clang18
+ * https://github.com/llvm/llvm-project/commit/1a2e77cf9e11dbf56b5720c607313a566eebb16e
+ * changed inlining behavior and caused compilation failure as some branch
+ * target distance exceeded 16bit representation which is the maximum for
+ * cpu v1/v2/v3. Macro __BPF_CPU_VERSION__ is later implemented in clang18
+ * to specify which cpu version is used for compilation. So a smaller
+ * unroll_count can be set if __BPF_CPU_VERSION__ is less than 4, which
+ * reduced some branch target distances and resolved the compilation failure.
+ *
+ * To capture the case where a developer/ci uses clang18 but the corresponding
+ * repo checkpoint does not have __BPF_CPU_VERSION__, a smaller unroll_count
+ * will be set as well to prevent potential compilation failures.
+ */
+#ifdef __BPF_CPU_VERSION__
+#if __BPF_CPU_VERSION__ < 4
+#define UNROLL_COUNT 90
+#endif
+#elif __clang_major__ == 18
+#define UNROLL_COUNT 90
+#endif
+
#include "pyperf.h"
diff --git a/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c b/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c
index 1ef07f6..1553b9c 100644
--- a/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c
+++ b/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c
@@ -54,6 +54,25 @@ long rbtree_refcounted_node_ref_escapes(void *ctx)
}
SEC("?tc")
+__failure __msg("Possibly NULL pointer passed to trusted arg0")
+long refcount_acquire_maybe_null(void *ctx)
+{
+ struct node_acquire *n, *m;
+
+ n = bpf_obj_new(typeof(*n));
+ /* Intentionally not testing !n
+ * it's MAYBE_NULL for refcount_acquire
+ */
+ m = bpf_refcount_acquire(n);
+ if (m)
+ bpf_obj_drop(m);
+ if (n)
+ bpf_obj_drop(n);
+
+ return 0;
+}
+
+SEC("?tc")
__failure __msg("Unreleased reference id=3 alloc_insn=9")
long rbtree_refcounted_node_ref_escapes_owning_input(void *ctx)
{
diff --git a/tools/testing/selftests/bpf/progs/test_cgroup1_hierarchy.c b/tools/testing/selftests/bpf/progs/test_cgroup1_hierarchy.c
new file mode 100644
index 0000000..4462886
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_cgroup1_hierarchy.c
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023 Yafang Shao <laoar.shao@gmail.com> */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+
+__u32 target_ancestor_level;
+__u64 target_ancestor_cgid;
+int target_pid, target_hid;
+
+struct cgroup *bpf_task_get_cgroup1(struct task_struct *task, int hierarchy_id) __ksym;
+struct cgroup *bpf_cgroup_ancestor(struct cgroup *cgrp, int level) __ksym;
+void bpf_cgroup_release(struct cgroup *cgrp) __ksym;
+
+static int bpf_link_create_verify(int cmd)
+{
+ struct cgroup *cgrp, *ancestor;
+ struct task_struct *task;
+ int ret = 0;
+
+ if (cmd != BPF_LINK_CREATE)
+ return 0;
+
+ task = bpf_get_current_task_btf();
+
+ /* Then it can run in parallel with others */
+ if (task->pid != target_pid)
+ return 0;
+
+ cgrp = bpf_task_get_cgroup1(task, target_hid);
+ if (!cgrp)
+ return 0;
+
+ /* Refuse it if its cgid or its ancestor's cgid is the target cgid */
+ if (cgrp->kn->id == target_ancestor_cgid)
+ ret = -1;
+
+ ancestor = bpf_cgroup_ancestor(cgrp, target_ancestor_level);
+ if (!ancestor)
+ goto out;
+
+ if (ancestor->kn->id == target_ancestor_cgid)
+ ret = -1;
+ bpf_cgroup_release(ancestor);
+
+out:
+ bpf_cgroup_release(cgrp);
+ return ret;
+}
+
+SEC("lsm/bpf")
+int BPF_PROG(lsm_run, int cmd, union bpf_attr *attr, unsigned int size)
+{
+ return bpf_link_create_verify(cmd);
+}
+
+SEC("lsm.s/bpf")
+int BPF_PROG(lsm_s_run, int cmd, union bpf_attr *attr, unsigned int size)
+{
+ return bpf_link_create_verify(cmd);
+}
+
+SEC("fentry")
+int BPF_PROG(fentry_run)
+{
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_bounds.c b/tools/testing/selftests/bpf/progs/verifier_bounds.c
index c5588a1..ec430b7 100644
--- a/tools/testing/selftests/bpf/progs/verifier_bounds.c
+++ b/tools/testing/selftests/bpf/progs/verifier_bounds.c
@@ -965,6 +965,7 @@ l0_%=: r0 = 0; \
SEC("xdp")
__description("bound check with JMP_JSLT for crossing 64-bit signed boundary")
__success __retval(0)
+__flag(!BPF_F_TEST_REG_INVARIANTS) /* known invariants violation */
__naked void crossing_64_bit_signed_boundary_2(void)
{
asm volatile (" \
@@ -1046,6 +1047,7 @@ l0_%=: r0 = 0; \
SEC("xdp")
__description("bound check with JMP32_JSLT for crossing 32-bit signed boundary")
__success __retval(0)
+__flag(!BPF_F_TEST_REG_INVARIANTS) /* known invariants violation */
__naked void crossing_32_bit_signed_boundary_2(void)
{
asm volatile (" \
diff --git a/tools/testing/selftests/bpf/test_loader.c b/tools/testing/selftests/bpf/test_loader.c
index 37ffa57..a350ecd 100644
--- a/tools/testing/selftests/bpf/test_loader.c
+++ b/tools/testing/selftests/bpf/test_loader.c
@@ -153,6 +153,14 @@ static int parse_retval(const char *str, int *val, const char *name)
return parse_int(str, val, name);
}
+static void update_flags(int *flags, int flag, bool clear)
+{
+ if (clear)
+ *flags &= ~flag;
+ else
+ *flags |= flag;
+}
+
/* Uses btf_decl_tag attributes to describe the expected test
* behavior, see bpf_misc.h for detailed description of each attribute
* and attribute combinations.
@@ -171,6 +179,7 @@ static int parse_test_spec(struct test_loader *tester,
memset(spec, 0, sizeof(*spec));
spec->prog_name = bpf_program__name(prog);
+ spec->prog_flags = BPF_F_TEST_REG_INVARIANTS; /* by default be strict */
btf = bpf_object__btf(obj);
if (!btf) {
@@ -187,7 +196,8 @@ static int parse_test_spec(struct test_loader *tester,
for (i = 1; i < btf__type_cnt(btf); i++) {
const char *s, *val, *msg;
const struct btf_type *t;
- int tmp;
+ bool clear;
+ int flags;
t = btf__type_by_id(btf, i);
if (!btf_is_decl_tag(t))
@@ -253,23 +263,30 @@ static int parse_test_spec(struct test_loader *tester,
goto cleanup;
} else if (str_has_pfx(s, TEST_TAG_PROG_FLAGS_PFX)) {
val = s + sizeof(TEST_TAG_PROG_FLAGS_PFX) - 1;
+
+ clear = val[0] == '!';
+ if (clear)
+ val++;
+
if (strcmp(val, "BPF_F_STRICT_ALIGNMENT") == 0) {
- spec->prog_flags |= BPF_F_STRICT_ALIGNMENT;
+ update_flags(&spec->prog_flags, BPF_F_STRICT_ALIGNMENT, clear);
} else if (strcmp(val, "BPF_F_ANY_ALIGNMENT") == 0) {
- spec->prog_flags |= BPF_F_ANY_ALIGNMENT;
+ update_flags(&spec->prog_flags, BPF_F_ANY_ALIGNMENT, clear);
} else if (strcmp(val, "BPF_F_TEST_RND_HI32") == 0) {
- spec->prog_flags |= BPF_F_TEST_RND_HI32;
+ update_flags(&spec->prog_flags, BPF_F_TEST_RND_HI32, clear);
} else if (strcmp(val, "BPF_F_TEST_STATE_FREQ") == 0) {
- spec->prog_flags |= BPF_F_TEST_STATE_FREQ;
+ update_flags(&spec->prog_flags, BPF_F_TEST_STATE_FREQ, clear);
} else if (strcmp(val, "BPF_F_SLEEPABLE") == 0) {
- spec->prog_flags |= BPF_F_SLEEPABLE;
+ update_flags(&spec->prog_flags, BPF_F_SLEEPABLE, clear);
} else if (strcmp(val, "BPF_F_XDP_HAS_FRAGS") == 0) {
- spec->prog_flags |= BPF_F_XDP_HAS_FRAGS;
+ update_flags(&spec->prog_flags, BPF_F_XDP_HAS_FRAGS, clear);
+ } else if (strcmp(val, "BPF_F_TEST_REG_INVARIANTS") == 0) {
+ update_flags(&spec->prog_flags, BPF_F_TEST_REG_INVARIANTS, clear);
} else /* assume numeric value */ {
- err = parse_int(val, &tmp, "test prog flags");
+ err = parse_int(val, &flags, "test prog flags");
if (err)
goto cleanup;
- spec->prog_flags |= tmp;
+ update_flags(&spec->prog_flags, flags, clear);
}
}
}
diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index 7fc00e4..767e069 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -1396,13 +1396,18 @@ static void test_map_stress(void)
#define MAX_DELAY_US 50000
#define MIN_DELAY_RANGE_US 5000
-static int map_update_retriable(int map_fd, const void *key, const void *value,
- int flags, int attempts)
+static bool retry_for_again_or_busy(int err)
+{
+ return (err == EAGAIN || err == EBUSY);
+}
+
+int map_update_retriable(int map_fd, const void *key, const void *value, int flags, int attempts,
+ retry_for_error_fn need_retry)
{
int delay = rand() % MIN_DELAY_RANGE_US;
while (bpf_map_update_elem(map_fd, key, value, flags)) {
- if (!attempts || (errno != EAGAIN && errno != EBUSY))
+ if (!attempts || !need_retry(errno))
return -errno;
if (delay <= MAX_DELAY_US / 2)
@@ -1445,11 +1450,13 @@ static void test_update_delete(unsigned int fn, void *data)
key = value = i;
if (do_update) {
- err = map_update_retriable(fd, &key, &value, BPF_NOEXIST, MAP_RETRIES);
+ err = map_update_retriable(fd, &key, &value, BPF_NOEXIST, MAP_RETRIES,
+ retry_for_again_or_busy);
if (err)
printf("error %d %d\n", err, errno);
assert(err == 0);
- err = map_update_retriable(fd, &key, &value, BPF_EXIST, MAP_RETRIES);
+ err = map_update_retriable(fd, &key, &value, BPF_EXIST, MAP_RETRIES,
+ retry_for_again_or_busy);
if (err)
printf("error %d %d\n", err, errno);
assert(err == 0);
diff --git a/tools/testing/selftests/bpf/test_maps.h b/tools/testing/selftests/bpf/test_maps.h
index f6fbca76..e4ac704 100644
--- a/tools/testing/selftests/bpf/test_maps.h
+++ b/tools/testing/selftests/bpf/test_maps.h
@@ -4,6 +4,7 @@
#include <stdio.h>
#include <stdlib.h>
+#include <stdbool.h>
#define CHECK(condition, tag, format...) ({ \
int __ret = !!(condition); \
@@ -16,4 +17,8 @@
extern int skips;
+typedef bool (*retry_for_error_fn)(int err);
+int map_update_retriable(int map_fd, const void *key, const void *value, int flags, int attempts,
+ retry_for_error_fn need_retry);
+
#endif
diff --git a/tools/testing/selftests/bpf/test_sock_addr.c b/tools/testing/selftests/bpf/test_sock_addr.c
index 2c89674..b0068a9 100644
--- a/tools/testing/selftests/bpf/test_sock_addr.c
+++ b/tools/testing/selftests/bpf/test_sock_addr.c
@@ -679,7 +679,7 @@ static int load_path(const struct sock_addr_test *test, const char *path)
bpf_program__set_type(prog, BPF_PROG_TYPE_CGROUP_SOCK_ADDR);
bpf_program__set_expected_attach_type(prog, test->expected_attach_type);
- bpf_program__set_flags(prog, BPF_F_TEST_RND_HI32);
+ bpf_program__set_flags(prog, BPF_F_TEST_RND_HI32 | BPF_F_TEST_REG_INVARIANTS);
err = bpf_object__load(obj);
if (err) {
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 98107e0..f36e414 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -1588,7 +1588,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
if (fixup_skips != skips)
return;
- pflags = BPF_F_TEST_RND_HI32;
+ pflags = BPF_F_TEST_RND_HI32 | BPF_F_TEST_REG_INVARIANTS;
if (test->flags & F_LOAD_WITH_STRICT_ALIGNMENT)
pflags |= BPF_F_STRICT_ALIGNMENT;
if (test->flags & F_NEEDS_EFFICIENT_UNALIGNED_ACCESS)
diff --git a/tools/testing/selftests/bpf/testing_helpers.c b/tools/testing/selftests/bpf/testing_helpers.c
index 8d99488..d2458c1 100644
--- a/tools/testing/selftests/bpf/testing_helpers.c
+++ b/tools/testing/selftests/bpf/testing_helpers.c
@@ -276,7 +276,7 @@ int bpf_prog_test_load(const char *file, enum bpf_prog_type type,
if (type != BPF_PROG_TYPE_UNSPEC && bpf_program__type(prog) != type)
bpf_program__set_type(prog, type);
- flags = bpf_program__flags(prog) | BPF_F_TEST_RND_HI32;
+ flags = bpf_program__flags(prog) | BPF_F_TEST_RND_HI32 | BPF_F_TEST_REG_INVARIANTS;
bpf_program__set_flags(prog, flags);
err = bpf_object__load(obj);
@@ -299,7 +299,7 @@ int bpf_test_load_program(enum bpf_prog_type type, const struct bpf_insn *insns,
{
LIBBPF_OPTS(bpf_prog_load_opts, opts,
.kern_version = kern_version,
- .prog_flags = BPF_F_TEST_RND_HI32,
+ .prog_flags = BPF_F_TEST_RND_HI32 | BPF_F_TEST_REG_INVARIANTS,
.log_level = extra_prog_load_log_flags,
.log_buf = log_buf,
.log_size = log_buf_sz,
diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c
index 6550958..1d418d6 100644
--- a/tools/testing/selftests/bpf/veristat.c
+++ b/tools/testing/selftests/bpf/veristat.c
@@ -18,6 +18,7 @@
#include <libelf.h>
#include <gelf.h>
#include <float.h>
+#include <math.h>
#ifndef ARRAY_SIZE
#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
@@ -99,6 +100,7 @@ struct stat_specs {
enum stat_id ids[ALL_STATS_CNT];
enum stat_variant variants[ALL_STATS_CNT];
bool asc[ALL_STATS_CNT];
+ bool abs[ALL_STATS_CNT];
int lens[ALL_STATS_CNT * 3]; /* 3x for comparison mode */
};
@@ -133,6 +135,7 @@ struct filter {
int stat_id;
enum stat_variant stat_var;
long value;
+ bool abs;
};
static struct env {
@@ -142,10 +145,12 @@ static struct env {
bool debug;
bool quiet;
bool force_checkpoints;
+ bool force_reg_invariants;
enum resfmt out_fmt;
bool show_version;
bool comparison_mode;
bool replay_mode;
+ int top_n;
int log_level;
int log_size;
@@ -210,8 +215,7 @@ static const struct argp_option opts[] = {
{ "log-level", 'l', "LEVEL", 0, "Verifier log level (default 0 for normal mode, 1 for verbose mode)" },
{ "log-fixed", OPT_LOG_FIXED, NULL, 0, "Disable verifier log rotation" },
{ "log-size", OPT_LOG_SIZE, "BYTES", 0, "Customize verifier log size (default to 16MB)" },
- { "test-states", 't', NULL, 0,
- "Force frequent BPF verifier state checkpointing (set BPF_F_TEST_STATE_FREQ program flag)" },
+ { "top-n", 'n', "N", 0, "Emit only up to first N results." },
{ "quiet", 'q', NULL, 0, "Quiet mode" },
{ "emit", 'e', "SPEC", 0, "Specify stats to be emitted" },
{ "sort", 's', "SPEC", 0, "Specify sort order" },
@@ -219,6 +223,10 @@ static const struct argp_option opts[] = {
{ "compare", 'C', NULL, 0, "Comparison mode" },
{ "replay", 'R', NULL, 0, "Replay mode" },
{ "filter", 'f', "FILTER", 0, "Filter expressions (or @filename for file with expressions)." },
+ { "test-states", 't', NULL, 0,
+ "Force frequent BPF verifier state checkpointing (set BPF_F_TEST_STATE_FREQ program flag)" },
+ { "test-reg-invariants", 'r', NULL, 0,
+ "Force BPF verifier failure on register invariant violation (BPF_F_TEST_REG_INVARIANTS program flag)" },
{},
};
@@ -290,6 +298,16 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
case 't':
env.force_checkpoints = true;
break;
+ case 'r':
+ env.force_reg_invariants = true;
+ break;
+ case 'n':
+ errno = 0;
+ env.top_n = strtol(arg, NULL, 10);
+ if (errno) {
+ fprintf(stderr, "invalid top N specifier: %s\n", arg);
+ argp_usage(state);
+ }
case 'C':
env.comparison_mode = true;
break;
@@ -455,7 +473,8 @@ static struct {
{ OP_EQ, "=" },
};
-static bool parse_stat_id_var(const char *name, size_t len, int *id, enum stat_variant *var);
+static bool parse_stat_id_var(const char *name, size_t len, int *id,
+ enum stat_variant *var, bool *is_abs);
static int append_filter(struct filter **filters, int *cnt, const char *str)
{
@@ -488,13 +507,14 @@ static int append_filter(struct filter **filters, int *cnt, const char *str)
long val;
const char *end = str;
const char *op_str;
+ bool is_abs;
op_str = operators[i].op_str;
p = strstr(str, op_str);
if (!p)
continue;
- if (!parse_stat_id_var(str, p - str, &id, &var)) {
+ if (!parse_stat_id_var(str, p - str, &id, &var, &is_abs)) {
fprintf(stderr, "Unrecognized stat name in '%s'!\n", str);
return -EINVAL;
}
@@ -533,6 +553,7 @@ static int append_filter(struct filter **filters, int *cnt, const char *str)
f->stat_id = id;
f->stat_var = var;
f->op = operators[i].op_kind;
+ f->abs = true;
f->value = val;
*cnt += 1;
@@ -657,7 +678,8 @@ static struct stat_def {
[MARK_READ_MAX_LEN] = { "Max mark read length", {"max_mark_read_len", "mark_read"}, },
};
-static bool parse_stat_id_var(const char *name, size_t len, int *id, enum stat_variant *var)
+static bool parse_stat_id_var(const char *name, size_t len, int *id,
+ enum stat_variant *var, bool *is_abs)
{
static const char *var_sfxs[] = {
[VARIANT_A] = "_a",
@@ -667,6 +689,14 @@ static bool parse_stat_id_var(const char *name, size_t len, int *id, enum stat_v
};
int i, j, k;
+ /* |<stat>| means we take absolute value of given stat */
+ *is_abs = false;
+ if (len > 2 && name[0] == '|' && name[len - 1] == '|') {
+ *is_abs = true;
+ name += 1;
+ len -= 2;
+ }
+
for (i = 0; i < ARRAY_SIZE(stat_defs); i++) {
struct stat_def *def = &stat_defs[i];
size_t alias_len, sfx_len;
@@ -722,7 +752,7 @@ static bool is_desc_sym(char c)
static int parse_stat(const char *stat_name, struct stat_specs *specs)
{
int id;
- bool has_order = false, is_asc = false;
+ bool has_order = false, is_asc = false, is_abs = false;
size_t len = strlen(stat_name);
enum stat_variant var;
@@ -737,7 +767,7 @@ static int parse_stat(const char *stat_name, struct stat_specs *specs)
len -= 1;
}
- if (!parse_stat_id_var(stat_name, len, &id, &var)) {
+ if (!parse_stat_id_var(stat_name, len, &id, &var, &is_abs)) {
fprintf(stderr, "Unrecognized stat name '%s'\n", stat_name);
return -ESRCH;
}
@@ -745,6 +775,7 @@ static int parse_stat(const char *stat_name, struct stat_specs *specs)
specs->ids[specs->spec_cnt] = id;
specs->variants[specs->spec_cnt] = var;
specs->asc[specs->spec_cnt] = has_order ? is_asc : stat_defs[id].asc_by_default;
+ specs->abs[specs->spec_cnt] = is_abs;
specs->spec_cnt++;
return 0;
@@ -997,6 +1028,8 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf
if (env.force_checkpoints)
bpf_program__set_flags(prog, bpf_program__flags(prog) | BPF_F_TEST_STATE_FREQ);
+ if (env.force_reg_invariants)
+ bpf_program__set_flags(prog, bpf_program__flags(prog) | BPF_F_TEST_REG_INVARIANTS);
err = bpf_object__load(obj);
env.progs_processed++;
@@ -1103,7 +1136,7 @@ static int process_obj(const char *filename)
}
static int cmp_stat(const struct verif_stats *s1, const struct verif_stats *s2,
- enum stat_id id, bool asc)
+ enum stat_id id, bool asc, bool abs)
{
int cmp = 0;
@@ -1124,6 +1157,11 @@ static int cmp_stat(const struct verif_stats *s1, const struct verif_stats *s2,
long v1 = s1->stats[id];
long v2 = s2->stats[id];
+ if (abs) {
+ v1 = v1 < 0 ? -v1 : v1;
+ v2 = v2 < 0 ? -v2 : v2;
+ }
+
if (v1 != v2)
cmp = v1 < v2 ? -1 : 1;
break;
@@ -1142,7 +1180,8 @@ static int cmp_prog_stats(const void *v1, const void *v2)
int i, cmp;
for (i = 0; i < env.sort_spec.spec_cnt; i++) {
- cmp = cmp_stat(s1, s2, env.sort_spec.ids[i], env.sort_spec.asc[i]);
+ cmp = cmp_stat(s1, s2, env.sort_spec.ids[i],
+ env.sort_spec.asc[i], env.sort_spec.abs[i]);
if (cmp != 0)
return cmp;
}
@@ -1211,7 +1250,8 @@ static void fetch_join_stat_value(const struct verif_stats_join *s,
static int cmp_join_stat(const struct verif_stats_join *s1,
const struct verif_stats_join *s2,
- enum stat_id id, enum stat_variant var, bool asc)
+ enum stat_id id, enum stat_variant var,
+ bool asc, bool abs)
{
const char *str1 = NULL, *str2 = NULL;
double v1, v2;
@@ -1220,6 +1260,11 @@ static int cmp_join_stat(const struct verif_stats_join *s1,
fetch_join_stat_value(s1, id, var, &str1, &v1);
fetch_join_stat_value(s2, id, var, &str2, &v2);
+ if (abs) {
+ v1 = fabs(v1);
+ v2 = fabs(v2);
+ }
+
if (str1)
cmp = strcmp(str1, str2);
else if (v1 != v2)
@@ -1237,7 +1282,8 @@ static int cmp_join_stats(const void *v1, const void *v2)
cmp = cmp_join_stat(s1, s2,
env.sort_spec.ids[i],
env.sort_spec.variants[i],
- env.sort_spec.asc[i]);
+ env.sort_spec.asc[i],
+ env.sort_spec.abs[i]);
if (cmp != 0)
return cmp;
}
@@ -1720,6 +1766,9 @@ static bool is_join_stat_filter_matched(struct filter *f, const struct verif_sta
fetch_join_stat_value(stats, f->stat_id, f->stat_var, &str, &value);
+ if (f->abs)
+ value = fabs(value);
+
switch (f->op) {
case OP_EQ: return value > f->value - eps && value < f->value + eps;
case OP_NEQ: return value < f->value - eps || value > f->value + eps;
@@ -1766,7 +1815,7 @@ static int handle_comparison_mode(void)
struct stat_specs base_specs = {}, comp_specs = {};
struct stat_specs tmp_sort_spec;
enum resfmt cur_fmt;
- int err, i, j, last_idx;
+ int err, i, j, last_idx, cnt;
if (env.filename_cnt != 2) {
fprintf(stderr, "Comparison mode expects exactly two input CSV files!\n\n");
@@ -1879,7 +1928,7 @@ static int handle_comparison_mode(void)
env.join_stat_cnt += 1;
}
- /* now sort joined results accorsing to sort spec */
+ /* now sort joined results according to sort spec */
qsort(env.join_stats, env.join_stat_cnt, sizeof(*env.join_stats), cmp_join_stats);
/* for human-readable table output we need to do extra pass to
@@ -1896,16 +1945,22 @@ static int handle_comparison_mode(void)
output_comp_headers(cur_fmt);
last_idx = -1;
+ cnt = 0;
for (i = 0; i < env.join_stat_cnt; i++) {
const struct verif_stats_join *join = &env.join_stats[i];
if (!should_output_join_stats(join))
continue;
+ if (env.top_n && cnt >= env.top_n)
+ break;
+
if (cur_fmt == RESFMT_TABLE_CALCLEN)
last_idx = i;
output_comp_stats(join, cur_fmt, i == last_idx);
+
+ cnt++;
}
if (cur_fmt == RESFMT_TABLE_CALCLEN) {
@@ -1920,6 +1975,9 @@ static bool is_stat_filter_matched(struct filter *f, const struct verif_stats *s
{
long value = stats->stats[f->stat_id];
+ if (f->abs)
+ value = value < 0 ? -value : value;
+
switch (f->op) {
case OP_EQ: return value == f->value;
case OP_NEQ: return value != f->value;
@@ -1964,7 +2022,7 @@ static bool should_output_stats(const struct verif_stats *stats)
static void output_prog_stats(void)
{
const struct verif_stats *stats;
- int i, last_stat_idx = 0;
+ int i, last_stat_idx = 0, cnt = 0;
if (env.out_fmt == RESFMT_TABLE) {
/* calculate column widths */
@@ -1984,7 +2042,10 @@ static void output_prog_stats(void)
stats = &env.prog_stats[i];
if (!should_output_stats(stats))
continue;
+ if (env.top_n && cnt >= env.top_n)
+ break;
output_stats(stats, env.out_fmt, i == last_stat_idx);
+ cnt++;
}
}
diff --git a/tools/testing/selftests/bpf/vmtest.sh b/tools/testing/selftests/bpf/vmtest.sh
index 6850345..65d14f3 100755
--- a/tools/testing/selftests/bpf/vmtest.sh
+++ b/tools/testing/selftests/bpf/vmtest.sh
@@ -36,7 +36,9 @@
MOUNT_DIR="mnt"
ROOTFS_IMAGE="root.img"
OUTPUT_DIR="$HOME/.bpf_selftests"
-KCONFIG_REL_PATHS=("tools/testing/selftests/bpf/config" "tools/testing/selftests/bpf/config.${ARCH}")
+KCONFIG_REL_PATHS=("tools/testing/selftests/bpf/config"
+ "tools/testing/selftests/bpf/config.vm"
+ "tools/testing/selftests/bpf/config.${ARCH}")
INDEX_URL="https://raw.githubusercontent.com/libbpf/ci/master/INDEX"
NUM_COMPILE_JOBS="$(nproc)"
LOG_FILE_BASE="$(date +"bpf_selftests.%Y-%m-%d_%H-%M-%S")"
diff --git a/tools/testing/selftests/drivers/net/mlxsw/pci_reset.sh b/tools/testing/selftests/drivers/net/mlxsw/pci_reset.sh
new file mode 100755
index 0000000..fe0343b
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/pci_reset.sh
@@ -0,0 +1,58 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test that PCI reset works correctly by verifying that only the expected reset
+# methods are supported and that after issuing the reset the ifindex of the
+# port changes.
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+ pci_reset_test
+"
+NUM_NETIFS=1
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+
+pci_reset_test()
+{
+ RET=0
+
+ local bus=$(echo $DEVLINK_DEV | cut -d '/' -f 1)
+ local bdf=$(echo $DEVLINK_DEV | cut -d '/' -f 2)
+
+ if [ $bus != "pci" ]; then
+ check_err 1 "devlink device is not a PCI device"
+ log_test "pci reset"
+ return
+ fi
+
+ if [ ! -f /sys/bus/pci/devices/$bdf/reset_method ]; then
+ check_err 1 "reset is not supported"
+ log_test "pci reset"
+ return
+ fi
+
+ [[ $(cat /sys/bus/pci/devices/$bdf/reset_method) == "bus" ]]
+ check_err $? "only \"bus\" reset method should be supported"
+
+ local ifindex_pre=$(ip -j link show dev $swp1 | jq '.[]["ifindex"]')
+
+ echo 1 > /sys/bus/pci/devices/$bdf/reset
+ check_err $? "reset failed"
+
+ # Wait for udev to rename newly created netdev.
+ udevadm settle
+
+ local ifindex_post=$(ip -j link show dev $swp1 | jq '.[]["ifindex"]')
+
+ [[ $ifindex_pre != $ifindex_post ]]
+ check_err $? "reset not performed"
+
+ log_test "pci reset"
+}
+
+swp1=${NETIFS[p1]}
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 5b2aca4..9274edf 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -91,6 +91,7 @@
TEST_PROGS += test_vxlan_nolocalbypass.sh
TEST_PROGS += test_bridge_backup_port.sh
TEST_PROGS += fdb_flush.sh
+TEST_PROGS += fq_band_pktlimit.sh
TEST_FILES := settings
diff --git a/tools/testing/selftests/net/cmsg_sender.c b/tools/testing/selftests/net/cmsg_sender.c
index 24b21b1..8d75753 100644
--- a/tools/testing/selftests/net/cmsg_sender.c
+++ b/tools/testing/selftests/net/cmsg_sender.c
@@ -45,11 +45,13 @@ struct options {
const char *host;
const char *service;
unsigned int size;
+ unsigned int num_pkt;
struct {
unsigned int mark;
unsigned int dontfrag;
unsigned int tclass;
unsigned int hlimit;
+ unsigned int priority;
} sockopt;
struct {
unsigned int family;
@@ -72,6 +74,7 @@ struct options {
} v6;
} opt = {
.size = 13,
+ .num_pkt = 1,
.sock = {
.family = AF_UNSPEC,
.type = SOCK_DGRAM,
@@ -112,7 +115,7 @@ static void cs_parse_args(int argc, char *argv[])
{
int o;
- while ((o = getopt(argc, argv, "46sS:p:m:M:d:tf:F:c:C:l:L:H:")) != -1) {
+ while ((o = getopt(argc, argv, "46sS:p:P:m:M:n:d:tf:F:c:C:l:L:H:")) != -1) {
switch (o) {
case 's':
opt.silent_send = true;
@@ -138,7 +141,9 @@ static void cs_parse_args(int argc, char *argv[])
cs_usage(argv[0]);
}
break;
-
+ case 'P':
+ opt.sockopt.priority = atoi(optarg);
+ break;
case 'm':
opt.mark.ena = true;
opt.mark.val = atoi(optarg);
@@ -146,6 +151,9 @@ static void cs_parse_args(int argc, char *argv[])
case 'M':
opt.sockopt.mark = atoi(optarg);
break;
+ case 'n':
+ opt.num_pkt = atoi(optarg);
+ break;
case 'd':
opt.txtime.ena = true;
opt.txtime.delay = atoi(optarg);
@@ -410,6 +418,10 @@ static void ca_set_sockopts(int fd)
setsockopt(fd, SOL_IPV6, IPV6_UNICAST_HOPS,
&opt.sockopt.hlimit, sizeof(opt.sockopt.hlimit)))
error(ERN_SOCKOPT, errno, "setsockopt IPV6_HOPLIMIT");
+ if (opt.sockopt.priority &&
+ setsockopt(fd, SOL_SOCKET, SO_PRIORITY,
+ &opt.sockopt.priority, sizeof(opt.sockopt.priority)))
+ error(ERN_SOCKOPT, errno, "setsockopt SO_PRIORITY");
}
int main(int argc, char *argv[])
@@ -421,6 +433,7 @@ int main(int argc, char *argv[])
char *buf;
int err;
int fd;
+ int i;
cs_parse_args(argc, argv);
@@ -480,24 +493,27 @@ int main(int argc, char *argv[])
cs_write_cmsg(fd, &msg, cbuf, sizeof(cbuf));
- err = sendmsg(fd, &msg, 0);
- if (err < 0) {
- if (!opt.silent_send)
- fprintf(stderr, "send failed: %s\n", strerror(errno));
- err = ERN_SEND;
- goto err_out;
- } else if (err != (int)opt.size) {
- fprintf(stderr, "short send\n");
- err = ERN_SEND_SHORT;
- goto err_out;
- } else {
- err = ERN_SUCCESS;
+ for (i = 0; i < opt.num_pkt; i++) {
+ err = sendmsg(fd, &msg, 0);
+ if (err < 0) {
+ if (!opt.silent_send)
+ fprintf(stderr, "send failed: %s\n", strerror(errno));
+ err = ERN_SEND;
+ goto err_out;
+ } else if (err != (int)opt.size) {
+ fprintf(stderr, "short send\n");
+ err = ERN_SEND_SHORT;
+ goto err_out;
+ }
}
+ err = ERN_SUCCESS;
- /* Make sure all timestamps have time to loop back */
- usleep(opt.txtime.delay);
+ if (opt.ts.ena) {
+ /* Make sure all timestamps have time to loop back */
+ usleep(opt.txtime.delay);
- cs_read_cmsg(fd, &msg, cbuf, sizeof(cbuf));
+ cs_read_cmsg(fd, &msg, cbuf, sizeof(cbuf));
+ }
err_out:
close(fd);
diff --git a/tools/testing/selftests/net/fq_band_pktlimit.sh b/tools/testing/selftests/net/fq_band_pktlimit.sh
new file mode 100755
index 0000000..24b77bd
--- /dev/null
+++ b/tools/testing/selftests/net/fq_band_pktlimit.sh
@@ -0,0 +1,57 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Verify that FQ has a packet limit per band:
+#
+# 1. set the limit to 10 per band
+# 2. send 20 pkts on band A: verify that 10 are queued, 10 dropped
+# 3. send 20 pkts on band A: verify that 0 are queued, 20 dropped
+# 4. send 20 pkts on band B: verify that 10 are queued, 10 dropped
+#
+# Send packets with a 100ms delay to ensure that previously sent
+# packets are still queued when later ones are sent.
+# Use SO_TXTIME for this.
+
+die() {
+ echo "$1"
+ exit 1
+}
+
+# run inside private netns
+if [[ $# -eq 0 ]]; then
+ ./in_netns.sh "$0" __subprocess
+ exit
+fi
+
+ip link add type dummy
+ip link set dev dummy0 up
+ip -6 addr add fdaa::1/128 dev dummy0
+ip -6 route add fdaa::/64 dev dummy0
+tc qdisc replace dev dummy0 root handle 1: fq quantum 1514 initial_quantum 1514 limit 10
+
+./cmsg_sender -6 -p u -d 100000 -n 20 fdaa::2 8000
+OUT1="$(tc -s qdisc show dev dummy0 | grep '^\ Sent')"
+
+./cmsg_sender -6 -p u -d 100000 -n 20 fdaa::2 8000
+OUT2="$(tc -s qdisc show dev dummy0 | grep '^\ Sent')"
+
+./cmsg_sender -6 -p u -d 100000 -n 20 -P 7 fdaa::2 8000
+OUT3="$(tc -s qdisc show dev dummy0 | grep '^\ Sent')"
+
+# Initial stats will report zero sent, as all packets are still
+# queued in FQ. Sleep for the delay period (100ms) and see that
+# twenty are now sent.
+sleep 0.1
+OUT4="$(tc -s qdisc show dev dummy0 | grep '^\ Sent')"
+
+# Log the output after the test
+echo "${OUT1}"
+echo "${OUT2}"
+echo "${OUT3}"
+echo "${OUT4}"
+
+# Test the output for expected values
+echo "${OUT1}" | grep -q '0\ pkt\ (dropped\ 10' || die "unexpected drop count at 1"
+echo "${OUT2}" | grep -q '0\ pkt\ (dropped\ 30' || die "unexpected drop count at 2"
+echo "${OUT3}" | grep -q '0\ pkt\ (dropped\ 40' || die "unexpected drop count at 3"
+echo "${OUT4}" | grep -q '20\ pkt\ (dropped\ 40' || die "unexpected accept count at 4"
diff --git a/tools/testing/selftests/net/net_helper.sh b/tools/testing/selftests/net/net_helper.sh
new file mode 100755
index 0000000..4fe0bef
--- /dev/null
+++ b/tools/testing/selftests/net/net_helper.sh
@@ -0,0 +1,22 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Helper functions
+
+wait_local_port_listen()
+{
+ local listener_ns="${1}"
+ local port="${2}"
+ local protocol="${3}"
+ local port_hex
+ local i
+
+ port_hex="$(printf "%04X" "${port}")"
+ for i in $(seq 10); do
+ if ip netns exec "${listener_ns}" cat /proc/net/"${protocol}"* | \
+ grep -q "${port_hex}"; then
+ break
+ fi
+ sleep 0.1
+ done
+}
diff --git a/tools/testing/selftests/net/udpgro.sh b/tools/testing/selftests/net/udpgro.sh
index 0c74375..af5dc57 100755
--- a/tools/testing/selftests/net/udpgro.sh
+++ b/tools/testing/selftests/net/udpgro.sh
@@ -3,6 +3,8 @@
#
# Run a series of udpgro functional tests.
+source net_helper.sh
+
readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)"
BPF_FILE="../bpf/xdp_dummy.bpf.o"
@@ -51,8 +53,7 @@
echo "ok" || \
echo "failed" &
- # Hack: let bg programs complete the startup
- sleep 0.2
+ wait_local_port_listen ${PEER_NS} 8000 udp
./udpgso_bench_tx ${tx_args}
ret=$?
wait $(jobs -p)
@@ -97,7 +98,7 @@
echo "ok" || \
echo "failed"&
- sleep 0.1
+ wait_local_port_listen "${PEER_NS}" 8000 udp
./udpgso_bench_tx ${tx_args}
ret=$?
kill -INT $pid
@@ -118,11 +119,9 @@
echo "ok" || \
echo "failed" &
- # Hack: let bg programs complete the startup
- sleep 0.2
+ wait_local_port_listen "${PEER_NS}" 12345 udp
./udpgso_bench_tx ${tx_args} -p 12345
- sleep 0.1
- # first UDP GSO socket should be closed at this point
+ wait_local_port_listen "${PEER_NS}" 8000 udp
./udpgso_bench_tx ${tx_args}
ret=$?
wait $(jobs -p)
diff --git a/tools/testing/selftests/net/udpgro_bench.sh b/tools/testing/selftests/net/udpgro_bench.sh
index 8949728..cb664679 100755
--- a/tools/testing/selftests/net/udpgro_bench.sh
+++ b/tools/testing/selftests/net/udpgro_bench.sh
@@ -3,6 +3,8 @@
#
# Run a series of udpgro benchmarks
+source net_helper.sh
+
readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)"
BPF_FILE="../bpf/xdp_dummy.bpf.o"
@@ -40,8 +42,7 @@
ip netns exec "${PEER_NS}" ./udpgso_bench_rx ${rx_args} -r &
ip netns exec "${PEER_NS}" ./udpgso_bench_rx -t ${rx_args} -r &
- # Hack: let bg programs complete the startup
- sleep 0.2
+ wait_local_port_listen "${PEER_NS}" 8000 udp
./udpgso_bench_tx ${tx_args}
}
diff --git a/tools/testing/selftests/net/udpgro_frglist.sh b/tools/testing/selftests/net/udpgro_frglist.sh
index 0a6359b..dd47fa9 100755
--- a/tools/testing/selftests/net/udpgro_frglist.sh
+++ b/tools/testing/selftests/net/udpgro_frglist.sh
@@ -3,6 +3,8 @@
#
# Run a series of udpgro benchmarks
+source net_helper.sh
+
readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)"
BPF_FILE="../bpf/xdp_dummy.bpf.o"
@@ -45,8 +47,7 @@
echo ${rx_args}
ip netns exec "${PEER_NS}" ./udpgso_bench_rx ${rx_args} -r &
- # Hack: let bg programs complete the startup
- sleep 0.2
+ wait_local_port_listen "${PEER_NS}" 8000 udp
./udpgso_bench_tx ${tx_args}
}
diff --git a/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py
index b62429b..65c8f3f 100644
--- a/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py
+++ b/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py
@@ -9,43 +9,13 @@
from tdc_config import *
-def prepare_suite(obj, test):
- original = obj.args.NAMES
-
- if 'skip' in test and test['skip'] == 'yes':
- return
-
- if 'nsPlugin' not in test['plugins']:
- return
-
- shadow = {}
- shadow['IP'] = original['IP']
- shadow['TC'] = original['TC']
- shadow['NS'] = '{}-{}'.format(original['NS'], test['random'])
- shadow['DEV0'] = '{}id{}'.format(original['DEV0'], test['id'])
- shadow['DEV1'] = '{}id{}'.format(original['DEV1'], test['id'])
- shadow['DUMMY'] = '{}id{}'.format(original['DUMMY'], test['id'])
- shadow['DEV2'] = original['DEV2']
- obj.args.NAMES = shadow
-
- if obj.args.namespace:
- obj._ns_create()
- else:
- obj._ports_create()
-
- # Make sure the netns is visible in the fs
- while True:
- obj._proc_check()
- try:
- ns = obj.args.NAMES['NS']
- f = open('/run/netns/{}'.format(ns))
- f.close()
- break
- except:
- time.sleep(0.1)
- continue
-
- obj.args.NAMES = original
+try:
+ from pyroute2 import netns
+ from pyroute2 import IPRoute
+ netlink = True
+except ImportError:
+ netlink = False
+ print("!!! Consider installing pyroute2 !!!")
class SubPlugin(TdcPlugin):
def __init__(self):
@@ -57,60 +27,69 @@
super().pre_suite(testcount, testlist)
- print("Setting up namespaces and devices...")
+ def prepare_test(self, test):
+ if 'skip' in test and test['skip'] == 'yes':
+ return
- with Pool(self.args.mp) as p:
- it = zip(cycle([self]), testlist)
- p.starmap(prepare_suite, it)
+ if 'nsPlugin' not in test['plugins']:
+ return
- def pre_case(self, caseinfo, test_skip):
+ if netlink == True:
+ self._nl_ns_create()
+ else:
+ self._ns_create()
+
+ # Make sure the netns is visible in the fs
+ ticks = 20
+ while True:
+ if ticks == 0:
+ raise TimeoutError
+ self._proc_check()
+ try:
+ ns = self.args.NAMES['NS']
+ f = open('/run/netns/{}'.format(ns))
+ f.close()
+ break
+ except:
+ time.sleep(0.1)
+ ticks -= 1
+ continue
+
+ def pre_case(self, test, test_skip):
if self.args.verbose:
print('{}.pre_case'.format(self.sub_class))
if test_skip:
return
+ self.prepare_test(test)
def post_case(self):
if self.args.verbose:
print('{}.post_case'.format(self.sub_class))
- if self.args.namespace:
- self._ns_destroy()
+ if netlink == True:
+ self._nl_ns_destroy()
else:
- self._ports_destroy()
+ self._ns_destroy()
def post_suite(self, index):
if self.args.verbose:
print('{}.post_suite'.format(self.sub_class))
# Make sure we don't leak resources
- for f in os.listdir('/run/netns/'):
- cmd = self._replace_keywords("$IP netns del {}".format(f))
+ cmd = "$IP -a netns del"
- if self.args.verbose > 3:
- print('_exec_cmd: command "{}"'.format(cmd))
+ if self.args.verbose > 3:
+ print('_exec_cmd: command "{}"'.format(cmd))
- subprocess.run(cmd, shell=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
-
- def add_args(self, parser):
- super().add_args(parser)
- self.argparser_group = self.argparser.add_argument_group(
- 'netns',
- 'options for nsPlugin(run commands in net namespace)')
- self.argparser_group.add_argument(
- '-N', '--no-namespace', action='store_false', default=True,
- dest='namespace', help='Don\'t run commands in namespace')
- return self.argparser
+ subprocess.run(cmd, shell=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
def adjust_command(self, stage, command):
super().adjust_command(stage, command)
cmdform = 'list'
cmdlist = list()
- if not self.args.namespace:
- return command
-
if self.args.verbose:
print('{}.adjust_command'.format(self.sub_class))
@@ -138,41 +117,67 @@
print('adjust_command: return command [{}]'.format(command))
return command
- def _ports_create_cmds(self):
- cmds = []
+ def _nl_ns_create(self):
+ ns = self.args.NAMES["NS"];
+ dev0 = self.args.NAMES["DEV0"];
+ dev1 = self.args.NAMES["DEV1"];
+ dummy = self.args.NAMES["DUMMY"];
- cmds.append(self._replace_keywords('link add $DEV0 type veth peer name $DEV1'))
- cmds.append(self._replace_keywords('link set $DEV0 up'))
- cmds.append(self._replace_keywords('link add $DUMMY type dummy'))
- if not self.args.namespace:
- cmds.append(self._replace_keywords('link set $DEV1 up'))
+ if self.args.verbose:
+ print('{}._nl_ns_create'.format(self.sub_class))
- return cmds
+ netns.create(ns)
+ netns.pushns(newns=ns)
+ with IPRoute() as ip:
+ ip.link('add', ifname=dev1, kind='veth', peer={'ifname': dev0, 'net_ns_fd':'/proc/1/ns/net'})
+ ip.link('add', ifname=dummy, kind='dummy')
+ ticks = 20
+ while True:
+ if ticks == 0:
+ raise TimeoutError
+ try:
+ dev1_idx = ip.link_lookup(ifname=dev1)[0]
+ dummy_idx = ip.link_lookup(ifname=dummy)[0]
+ ip.link('set', index=dev1_idx, state='up')
+ ip.link('set', index=dummy_idx, state='up')
+ break
+ except:
+ time.sleep(0.1)
+ ticks -= 1
+ continue
+ netns.popns()
- def _ports_create(self):
- self._exec_cmd_batched('pre', self._ports_create_cmds())
-
- def _ports_destroy_cmd(self):
- return self._replace_keywords('link del $DEV0')
-
- def _ports_destroy(self):
- self._exec_cmd('post', self._ports_destroy_cmd())
+ with IPRoute() as ip:
+ ticks = 20
+ while True:
+ if ticks == 0:
+ raise TimeoutError
+ try:
+ dev0_idx = ip.link_lookup(ifname=dev0)[0]
+ ip.link('set', index=dev0_idx, state='up')
+ break
+ except:
+ time.sleep(0.1)
+ ticks -= 1
+ continue
def _ns_create_cmds(self):
cmds = []
- if self.args.namespace:
- ns = self.args.NAMES['NS']
+ ns = self.args.NAMES['NS']
- cmds.append(self._replace_keywords('netns add {}'.format(ns)))
- cmds.append(self._replace_keywords('link set $DEV1 netns {}'.format(ns)))
- cmds.append(self._replace_keywords('link set $DUMMY netns {}'.format(ns)))
- cmds.append(self._replace_keywords('netns exec {} $IP link set $DEV1 up'.format(ns)))
- cmds.append(self._replace_keywords('netns exec {} $IP link set $DUMMY up'.format(ns)))
+ cmds.append(self._replace_keywords('netns add {}'.format(ns)))
+ cmds.append(self._replace_keywords('link add $DEV1 type veth peer name $DEV0'))
+ cmds.append(self._replace_keywords('link set $DEV1 netns {}'.format(ns)))
+ cmds.append(self._replace_keywords('link add $DUMMY type dummy'.format(ns)))
+ cmds.append(self._replace_keywords('link set $DUMMY netns {}'.format(ns)))
+ cmds.append(self._replace_keywords('netns exec {} $IP link set $DEV1 up'.format(ns)))
+ cmds.append(self._replace_keywords('netns exec {} $IP link set $DUMMY up'.format(ns)))
+ cmds.append(self._replace_keywords('link set $DEV0 up'.format(ns)))
- if self.args.device:
- cmds.append(self._replace_keywords('link set $DEV2 netns {}'.format(ns)))
- cmds.append(self._replace_keywords('netns exec {} $IP link set $DEV2 up'.format(ns)))
+ if self.args.device:
+ cmds.append(self._replace_keywords('link set $DEV2 netns {}'.format(ns)))
+ cmds.append(self._replace_keywords('netns exec {} $IP link set $DEV2 up'.format(ns)))
return cmds
@@ -181,9 +186,12 @@
Create the network namespace in which the tests will be run and set up
the required network devices for it.
'''
- self._ports_create()
self._exec_cmd_batched('pre', self._ns_create_cmds())
+ def _nl_ns_destroy(self):
+ ns = self.args.NAMES['NS']
+ netns.remove(ns)
+
def _ns_destroy_cmd(self):
return self._replace_keywords('netns delete {}'.format(self.args.NAMES['NS']))
@@ -192,9 +200,7 @@
Destroy the network namespace for testing (and any associated network
devices as well)
'''
- if self.args.namespace:
- self._exec_cmd('post', self._ns_destroy_cmd())
- self._ports_destroy()
+ self._exec_cmd('post', self._ns_destroy_cmd())
@cached_property
def _proc(self):
diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/u32.json b/tools/testing/selftests/tc-testing/tc-tests/filters/u32.json
index ddc7c35..24bd0c2 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/filters/u32.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/filters/u32.json
@@ -272,5 +272,62 @@
"teardown": [
"$TC qdisc del dev $DEV1 parent root drr"
]
+ },
+ {
+ "id": "bd32",
+ "name": "Try to delete hashtable referenced by another u32 filter",
+ "category": [
+ "filter",
+ "u32"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 parent root handle 10: drr",
+ "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 handle 1: u32 divisor 1",
+ "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 u32 ht 800: match ip src any link 1:"
+ ],
+ "cmdUnderTest": "$TC filter delete dev $DEV1 parent 10: prio 2 handle 1: u32",
+ "expExitCode": "2",
+ "verifyCmd": "$TC filter show dev $DEV1",
+ "matchPattern": "protocol ip pref 2 u32 chain 0 fh 1:",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 parent root drr"
+ ]
+ },
+ {
+ "id": "4585",
+ "name": "Delete small tree of u32 hashtables and filters",
+ "category": [
+ "filter",
+ "u32"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 parent root handle 10: drr",
+ "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 handle 1: u32 divisor 1",
+ "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 handle 2: u32 divisor 1",
+ "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 handle 3: u32 divisor 2",
+ "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 handle 4: u32 divisor 1",
+ "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 u32 ht 1: match ip src any action drop",
+ "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 u32 ht 2: match ip src any action drop",
+ "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 u32 ht 3: match ip src any link 2:",
+ "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 u32 ht 3: match ip src any link 1:",
+ "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 u32 ht 4: match ip src any action drop",
+ "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 u32 ht 800: match ip src any link 3:",
+ "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 u32 ht 800: match ip src any link 4:"
+ ],
+ "cmdUnderTest": "$TC filter delete dev $DEV1 parent 10:",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter show dev $DEV1",
+ "matchPattern": "protocol ip pref 2 u32",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 parent root drr"
+ ]
}
]
diff --git a/tools/testing/selftests/tc-testing/tdc.py b/tools/testing/selftests/tc-testing/tdc.py
index a671819..669ec89 100755
--- a/tools/testing/selftests/tc-testing/tdc.py
+++ b/tools/testing/selftests/tc-testing/tdc.py
@@ -616,7 +616,7 @@
batches.insert(0, serial)
print("Executing {} tests in parallel and {} in serial".format(len(parallel), len(serial)))
- print("Using {} batches".format(len(batches)))
+ print("Using {} batches and {} workers".format(len(batches), args.mp))
# We can't pickle these objects so workaround them
global mp_pm
@@ -1017,6 +1017,7 @@
parser = pm.call_add_args(parser)
(args, remaining) = parser.parse_known_args()
args.NAMES = NAMES
+ args.mp = min(args.mp, 4)
pm.set_args(args)
check_default_settings(args, remaining, pm)
if args.verbose > 2:
diff --git a/tools/testing/selftests/tc-testing/tdc.sh b/tools/testing/selftests/tc-testing/tdc.sh
index eb357bd..4dbe50b 100755
--- a/tools/testing/selftests/tc-testing/tdc.sh
+++ b/tools/testing/selftests/tc-testing/tdc.sh
@@ -1,7 +1,68 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
-modprobe netdevsim
-modprobe sch_teql
-./tdc.py -c actions --nobuildebpf
-./tdc.py -c qdisc
+# If a module is required and was not compiled
+# the test that requires it will fail anyways
+try_modprobe() {
+ modprobe -q -R "$1"
+ if [ $? -ne 0 ]; then
+ echo "Module $1 not found... skipping."
+ else
+ modprobe "$1"
+ fi
+}
+
+try_modprobe netdevsim
+try_modprobe act_bpf
+try_modprobe act_connmark
+try_modprobe act_csum
+try_modprobe act_ct
+try_modprobe act_ctinfo
+try_modprobe act_gact
+try_modprobe act_gate
+try_modprobe act_ipt
+try_modprobe act_mirred
+try_modprobe act_mpls
+try_modprobe act_nat
+try_modprobe act_pedit
+try_modprobe act_police
+try_modprobe act_sample
+try_modprobe act_simple
+try_modprobe act_skbedit
+try_modprobe act_skbmod
+try_modprobe act_tunnel_key
+try_modprobe act_vlan
+try_modprobe cls_basic
+try_modprobe cls_bpf
+try_modprobe cls_cgroup
+try_modprobe cls_flow
+try_modprobe cls_flower
+try_modprobe cls_fw
+try_modprobe cls_matchall
+try_modprobe cls_route
+try_modprobe cls_u32
+try_modprobe em_canid
+try_modprobe em_cmp
+try_modprobe em_ipset
+try_modprobe em_ipt
+try_modprobe em_meta
+try_modprobe em_nbyte
+try_modprobe em_text
+try_modprobe em_u32
+try_modprobe sch_cake
+try_modprobe sch_cbs
+try_modprobe sch_choke
+try_modprobe sch_codel
+try_modprobe sch_drr
+try_modprobe sch_etf
+try_modprobe sch_ets
+try_modprobe sch_fq
+try_modprobe sch_fq_codel
+try_modprobe sch_fq_pie
+try_modprobe sch_gred
+try_modprobe sch_hfsc
+try_modprobe sch_hhf
+try_modprobe sch_htb
+try_modprobe sch_teql
+./tdc.py -J`nproc` -c actions --nobuildebpf
+./tdc.py -J`nproc` -c qdisc