Merge branch '10GbE' of git://git.kernel.org/pub/scm/linux/kernel/git/tnguy/next-queue
Tony Nguyen says:
====================
10GbE Intel Wired LAN Driver Updates 2021-12-28
Alexander Lobakin says:
napi_build_skb() I introduced earlier this year ([0]) aims
to decrease MM pressure and the overhead from in-place
kmem_cache_alloc() on each Rx entry processing by decaching
skbuff_heads from NAPI per-cpu cache filled prior to that by
napi_consume_skb() (so it is sort of a direct shortcut for
free -> mm -> alloc cycle).
Currently, no in-tree drivers use it. Switch all Intel Ethernet
drivers to it to get slight-to-medium perf boosts depending on
the frame size.
ice driver, 50 Gbps link, pktgen + XDP_PASS (local in) sample:
frame_size/nthreads 64/42 128/20 256/8 512/4 1024/2 1532/1
net-next (Kpps) 46062 34654 18248 9830 5343 2714
series 47438 34708 18330 9875 5435 2777
increase 2.9% 0.15% 0.45% 0.46% 1.72% 2.32%
Additionally, e1000's been switched to napi_consume_skb() as it's
safe and works fine there, and there's no point in napi_build_skb()
without paired NAPI cache feeding point.
[0] https://lore.kernel.org/all/20210213141021.87840-1-alobakin@pm.me
* '10GbE' of git://git.kernel.org/pub/scm/linux/kernel/git/tnguy/next-queue:
ixgbevf: switch to napi_build_skb()
ixgbe: switch to napi_build_skb()
igc: switch to napi_build_skb()
igb: switch to napi_build_skb()
ice: switch to napi_build_skb()
iavf: switch to napi_build_skb()
i40e: switch to napi_build_skb()
e1000: switch to napi_build_skb()
e1000: switch to napi_consume_skb()
====================
Link: https://lore.kernel.org/r/20211228175815.281449-1-anthony.l.nguyen@intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
diff --git a/drivers/net/ethernet/lantiq_etop.c b/drivers/net/ethernet/lantiq_etop.c
index 14059e1..1f6808b 100644
--- a/drivers/net/ethernet/lantiq_etop.c
+++ b/drivers/net/ethernet/lantiq_etop.c
@@ -218,6 +218,7 @@ ltq_etop_free_channel(struct net_device *dev, struct ltq_etop_chan *ch)
free_irq(ch->dma.irq, priv);
if (IS_RX(ch->idx)) {
int desc;
+
for (desc = 0; desc < LTQ_DESC_NUM; desc++)
dev_kfree_skb_any(ch->skb[ch->dma.desc]);
}
@@ -498,6 +499,7 @@ ltq_etop_tx(struct sk_buff *skb, struct net_device *dev)
spin_lock_irqsave(&priv->lock, flags);
desc->addr = ((unsigned int) dma_map_single(&priv->pdev->dev, skb->data, len,
DMA_TO_DEVICE)) - byte_offset;
+ /* Make sure the address is written before we give it to HW */
wmb();
desc->ctl = LTQ_DMA_OWN | LTQ_DMA_SOP | LTQ_DMA_EOP |
LTQ_DMA_TX_OFFSET(byte_offset) | (len & LTQ_DMA_SIZE_MASK);
diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index 3d68433..7161a5b 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -615,6 +615,7 @@ struct rtl8169_private {
struct ring_info tx_skb[NUM_TX_DESC]; /* Tx data buffers */
u16 cp_cmd;
u32 irq_mask;
+ int irq;
struct clk *clk;
struct {
@@ -4712,7 +4713,7 @@ static int rtl8169_close(struct net_device *dev)
cancel_work_sync(&tp->wk.work);
- free_irq(pci_irq_vector(pdev, 0), tp);
+ free_irq(tp->irq, tp);
phy_disconnect(tp->phydev);
@@ -4733,7 +4734,7 @@ static void rtl8169_netpoll(struct net_device *dev)
{
struct rtl8169_private *tp = netdev_priv(dev);
- rtl8169_interrupt(pci_irq_vector(tp->pci_dev, 0), tp);
+ rtl8169_interrupt(tp->irq, tp);
}
#endif
@@ -4767,8 +4768,7 @@ static int rtl_open(struct net_device *dev)
rtl_request_firmware(tp);
irqflags = pci_dev_msi_enabled(pdev) ? IRQF_NO_THREAD : IRQF_SHARED;
- retval = request_irq(pci_irq_vector(pdev, 0), rtl8169_interrupt,
- irqflags, dev->name, tp);
+ retval = request_irq(tp->irq, rtl8169_interrupt, irqflags, dev->name, tp);
if (retval < 0)
goto err_release_fw_2;
@@ -4785,7 +4785,7 @@ static int rtl_open(struct net_device *dev)
return retval;
err_free_irq:
- free_irq(pci_irq_vector(pdev, 0), tp);
+ free_irq(tp->irq, tp);
err_release_fw_2:
rtl_release_firmware(tp);
rtl8169_rx_clear(tp);
@@ -5360,6 +5360,7 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
dev_err(&pdev->dev, "Can't allocate interrupt\n");
return rc;
}
+ tp->irq = pci_irq_vector(pdev, 0);
INIT_WORK(&tp->wk.work, rtl_task);
@@ -5435,8 +5436,7 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
return rc;
netdev_info(dev, "%s, %pM, XID %03x, IRQ %d\n",
- rtl_chip_infos[chipset].name, dev->dev_addr, xid,
- pci_irq_vector(pdev, 0));
+ rtl_chip_infos[chipset].name, dev->dev_addr, xid, tp->irq);
if (jumbo_max)
netdev_info(dev, "jumbo features [frames: %d bytes, tx checksumming: %s]\n",