| // SPDX-License-Identifier: GPL-2.0 |
| /* Copyright(c) 2018 Intel Corporation. */ |
| |
| #include <linux/bpf_trace.h> |
| #include <net/xdp_sock_drv.h> |
| #include <net/xdp.h> |
| |
| #include "e1000_hw.h" |
| #include "igb.h" |
| |
| static int igb_realloc_rx_buffer_info(struct igb_ring *ring, bool pool_present) |
| { |
| int size = pool_present ? |
| sizeof(*ring->rx_buffer_info_zc) * ring->count : |
| sizeof(*ring->rx_buffer_info) * ring->count; |
| void *buff_info = vmalloc(size); |
| |
| if (!buff_info) |
| return -ENOMEM; |
| |
| if (pool_present) { |
| vfree(ring->rx_buffer_info); |
| ring->rx_buffer_info = NULL; |
| ring->rx_buffer_info_zc = buff_info; |
| } else { |
| vfree(ring->rx_buffer_info_zc); |
| ring->rx_buffer_info_zc = NULL; |
| ring->rx_buffer_info = buff_info; |
| } |
| |
| return 0; |
| } |
| |
| static void igb_txrx_ring_disable(struct igb_adapter *adapter, u16 qid) |
| { |
| struct igb_ring *tx_ring = adapter->tx_ring[qid]; |
| struct igb_ring *rx_ring = adapter->rx_ring[qid]; |
| struct e1000_hw *hw = &adapter->hw; |
| |
| set_bit(IGB_RING_FLAG_TX_DISABLED, &tx_ring->flags); |
| |
| wr32(E1000_TXDCTL(tx_ring->reg_idx), 0); |
| wr32(E1000_RXDCTL(rx_ring->reg_idx), 0); |
| |
| synchronize_net(); |
| |
| /* Rx/Tx share the same napi context. */ |
| napi_disable(&rx_ring->q_vector->napi); |
| |
| igb_clean_tx_ring(tx_ring); |
| igb_clean_rx_ring(rx_ring); |
| |
| memset(&rx_ring->rx_stats, 0, sizeof(rx_ring->rx_stats)); |
| memset(&tx_ring->tx_stats, 0, sizeof(tx_ring->tx_stats)); |
| } |
| |
| static void igb_txrx_ring_enable(struct igb_adapter *adapter, u16 qid) |
| { |
| struct igb_ring *tx_ring = adapter->tx_ring[qid]; |
| struct igb_ring *rx_ring = adapter->rx_ring[qid]; |
| |
| igb_configure_tx_ring(adapter, tx_ring); |
| igb_configure_rx_ring(adapter, rx_ring); |
| |
| synchronize_net(); |
| |
| clear_bit(IGB_RING_FLAG_TX_DISABLED, &tx_ring->flags); |
| |
| /* call igb_desc_unused which always leaves |
| * at least 1 descriptor unused to make sure |
| * next_to_use != next_to_clean |
| */ |
| if (rx_ring->xsk_pool) |
| igb_alloc_rx_buffers_zc(rx_ring, rx_ring->xsk_pool, |
| igb_desc_unused(rx_ring)); |
| else |
| igb_alloc_rx_buffers(rx_ring, igb_desc_unused(rx_ring)); |
| |
| /* Rx/Tx share the same napi context. */ |
| napi_enable(&rx_ring->q_vector->napi); |
| } |
| |
| struct xsk_buff_pool *igb_xsk_pool(struct igb_adapter *adapter, |
| struct igb_ring *ring) |
| { |
| int qid = ring->queue_index; |
| struct xsk_buff_pool *pool; |
| |
| pool = xsk_get_pool_from_qid(adapter->netdev, qid); |
| |
| if (!igb_xdp_is_enabled(adapter)) |
| return NULL; |
| |
| return (pool && pool->dev) ? pool : NULL; |
| } |
| |
| static int igb_xsk_pool_enable(struct igb_adapter *adapter, |
| struct xsk_buff_pool *pool, |
| u16 qid) |
| { |
| struct net_device *netdev = adapter->netdev; |
| struct igb_ring *rx_ring; |
| bool if_running; |
| int err; |
| |
| if (qid >= adapter->num_rx_queues) |
| return -EINVAL; |
| |
| if (qid >= netdev->real_num_rx_queues || |
| qid >= netdev->real_num_tx_queues) |
| return -EINVAL; |
| |
| err = xsk_pool_dma_map(pool, &adapter->pdev->dev, IGB_RX_DMA_ATTR); |
| if (err) |
| return err; |
| |
| rx_ring = adapter->rx_ring[qid]; |
| if_running = netif_running(adapter->netdev) && igb_xdp_is_enabled(adapter); |
| if (if_running) |
| igb_txrx_ring_disable(adapter, qid); |
| |
| if (if_running) { |
| err = igb_realloc_rx_buffer_info(rx_ring, true); |
| if (!err) { |
| igb_txrx_ring_enable(adapter, qid); |
| /* Kick start the NAPI context so that receiving will start */ |
| err = igb_xsk_wakeup(adapter->netdev, qid, XDP_WAKEUP_RX); |
| } |
| |
| if (err) { |
| xsk_pool_dma_unmap(pool, IGB_RX_DMA_ATTR); |
| return err; |
| } |
| } |
| |
| return 0; |
| } |
| |
| static int igb_xsk_pool_disable(struct igb_adapter *adapter, u16 qid) |
| { |
| struct xsk_buff_pool *pool; |
| struct igb_ring *rx_ring; |
| bool if_running; |
| int err; |
| |
| pool = xsk_get_pool_from_qid(adapter->netdev, qid); |
| if (!pool) |
| return -EINVAL; |
| |
| rx_ring = adapter->rx_ring[qid]; |
| if_running = netif_running(adapter->netdev) && igb_xdp_is_enabled(adapter); |
| if (if_running) |
| igb_txrx_ring_disable(adapter, qid); |
| |
| xsk_pool_dma_unmap(pool, IGB_RX_DMA_ATTR); |
| |
| if (if_running) { |
| err = igb_realloc_rx_buffer_info(rx_ring, false); |
| if (err) |
| return err; |
| |
| igb_txrx_ring_enable(adapter, qid); |
| } |
| |
| return 0; |
| } |
| |
| int igb_xsk_pool_setup(struct igb_adapter *adapter, |
| struct xsk_buff_pool *pool, |
| u16 qid) |
| { |
| return pool ? igb_xsk_pool_enable(adapter, pool, qid) : |
| igb_xsk_pool_disable(adapter, qid); |
| } |
| |
| static u16 igb_fill_rx_descs(struct xsk_buff_pool *pool, struct xdp_buff **xdp, |
| union e1000_adv_rx_desc *rx_desc, u16 count) |
| { |
| dma_addr_t dma; |
| u16 buffs; |
| int i; |
| |
| /* nothing to do */ |
| if (!count) |
| return 0; |
| |
| buffs = xsk_buff_alloc_batch(pool, xdp, count); |
| for (i = 0; i < buffs; i++) { |
| dma = xsk_buff_xdp_get_dma(*xdp); |
| rx_desc->read.pkt_addr = cpu_to_le64(dma); |
| rx_desc->wb.upper.length = 0; |
| |
| rx_desc++; |
| xdp++; |
| } |
| |
| return buffs; |
| } |
| |
| bool igb_alloc_rx_buffers_zc(struct igb_ring *rx_ring, |
| struct xsk_buff_pool *xsk_pool, u16 count) |
| { |
| u32 nb_buffs_extra = 0, nb_buffs = 0; |
| union e1000_adv_rx_desc *rx_desc; |
| u16 ntu = rx_ring->next_to_use; |
| u16 total_count = count; |
| struct xdp_buff **xdp; |
| |
| rx_desc = IGB_RX_DESC(rx_ring, ntu); |
| xdp = &rx_ring->rx_buffer_info_zc[ntu]; |
| |
| if (ntu + count >= rx_ring->count) { |
| nb_buffs_extra = igb_fill_rx_descs(xsk_pool, xdp, rx_desc, |
| rx_ring->count - ntu); |
| if (nb_buffs_extra != rx_ring->count - ntu) { |
| ntu += nb_buffs_extra; |
| goto exit; |
| } |
| rx_desc = IGB_RX_DESC(rx_ring, 0); |
| xdp = rx_ring->rx_buffer_info_zc; |
| ntu = 0; |
| count -= nb_buffs_extra; |
| } |
| |
| nb_buffs = igb_fill_rx_descs(xsk_pool, xdp, rx_desc, count); |
| ntu += nb_buffs; |
| if (ntu == rx_ring->count) |
| ntu = 0; |
| |
| /* clear the length for the next_to_use descriptor */ |
| rx_desc = IGB_RX_DESC(rx_ring, ntu); |
| rx_desc->wb.upper.length = 0; |
| |
| exit: |
| if (rx_ring->next_to_use != ntu) { |
| rx_ring->next_to_use = ntu; |
| |
| /* Force memory writes to complete before letting h/w |
| * know there are new descriptors to fetch. (Only |
| * applicable for weak-ordered memory model archs, |
| * such as IA-64). |
| */ |
| wmb(); |
| writel(ntu, rx_ring->tail); |
| } |
| |
| return total_count == (nb_buffs + nb_buffs_extra); |
| } |
| |
| void igb_clean_rx_ring_zc(struct igb_ring *rx_ring) |
| { |
| u16 ntc = rx_ring->next_to_clean; |
| u16 ntu = rx_ring->next_to_use; |
| |
| while (ntc != ntu) { |
| struct xdp_buff *xdp = rx_ring->rx_buffer_info_zc[ntc]; |
| |
| xsk_buff_free(xdp); |
| ntc++; |
| if (ntc >= rx_ring->count) |
| ntc = 0; |
| } |
| } |
| |
| static struct sk_buff *igb_construct_skb_zc(struct igb_ring *rx_ring, |
| struct xdp_buff *xdp, |
| ktime_t timestamp) |
| { |
| unsigned int totalsize = xdp->data_end - xdp->data_meta; |
| unsigned int metasize = xdp->data - xdp->data_meta; |
| struct sk_buff *skb; |
| |
| net_prefetch(xdp->data_meta); |
| |
| /* allocate a skb to store the frags */ |
| skb = napi_alloc_skb(&rx_ring->q_vector->napi, totalsize); |
| if (unlikely(!skb)) |
| return NULL; |
| |
| if (timestamp) |
| skb_hwtstamps(skb)->hwtstamp = timestamp; |
| |
| memcpy(__skb_put(skb, totalsize), xdp->data_meta, |
| ALIGN(totalsize, sizeof(long))); |
| |
| if (metasize) { |
| skb_metadata_set(skb, metasize); |
| __skb_pull(skb, metasize); |
| } |
| |
| return skb; |
| } |
| |
| static int igb_run_xdp_zc(struct igb_adapter *adapter, struct igb_ring *rx_ring, |
| struct xdp_buff *xdp, struct xsk_buff_pool *xsk_pool, |
| struct bpf_prog *xdp_prog) |
| { |
| int err, result = IGB_XDP_PASS; |
| u32 act; |
| |
| prefetchw(xdp->data_hard_start); /* xdp_frame write */ |
| |
| act = bpf_prog_run_xdp(xdp_prog, xdp); |
| |
| if (likely(act == XDP_REDIRECT)) { |
| err = xdp_do_redirect(adapter->netdev, xdp, xdp_prog); |
| if (!err) |
| return IGB_XDP_REDIR; |
| |
| if (xsk_uses_need_wakeup(xsk_pool) && |
| err == -ENOBUFS) |
| result = IGB_XDP_EXIT; |
| else |
| result = IGB_XDP_CONSUMED; |
| goto out_failure; |
| } |
| |
| switch (act) { |
| case XDP_PASS: |
| break; |
| case XDP_TX: |
| result = igb_xdp_xmit_back(adapter, xdp); |
| if (result == IGB_XDP_CONSUMED) |
| goto out_failure; |
| break; |
| default: |
| bpf_warn_invalid_xdp_action(adapter->netdev, xdp_prog, act); |
| fallthrough; |
| case XDP_ABORTED: |
| out_failure: |
| trace_xdp_exception(rx_ring->netdev, xdp_prog, act); |
| fallthrough; |
| case XDP_DROP: |
| result = IGB_XDP_CONSUMED; |
| break; |
| } |
| |
| return result; |
| } |
| |
| int igb_clean_rx_irq_zc(struct igb_q_vector *q_vector, |
| struct xsk_buff_pool *xsk_pool, const int budget) |
| { |
| struct igb_adapter *adapter = q_vector->adapter; |
| unsigned int total_bytes = 0, total_packets = 0; |
| struct igb_ring *rx_ring = q_vector->rx.ring; |
| u32 ntc = rx_ring->next_to_clean; |
| struct bpf_prog *xdp_prog; |
| unsigned int xdp_xmit = 0; |
| bool failure = false; |
| u16 entries_to_alloc; |
| struct sk_buff *skb; |
| |
| /* xdp_prog cannot be NULL in the ZC path */ |
| xdp_prog = READ_ONCE(rx_ring->xdp_prog); |
| |
| while (likely(total_packets < budget)) { |
| union e1000_adv_rx_desc *rx_desc; |
| ktime_t timestamp = 0; |
| struct xdp_buff *xdp; |
| unsigned int size; |
| int xdp_res = 0; |
| |
| rx_desc = IGB_RX_DESC(rx_ring, ntc); |
| size = le16_to_cpu(rx_desc->wb.upper.length); |
| if (!size) |
| break; |
| |
| /* This memory barrier is needed to keep us from reading |
| * any other fields out of the rx_desc until we know the |
| * descriptor has been written back |
| */ |
| dma_rmb(); |
| |
| xdp = rx_ring->rx_buffer_info_zc[ntc]; |
| xsk_buff_set_size(xdp, size); |
| xsk_buff_dma_sync_for_cpu(xdp); |
| |
| /* pull rx packet timestamp if available and valid */ |
| if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) { |
| int ts_hdr_len; |
| |
| ts_hdr_len = igb_ptp_rx_pktstamp(rx_ring->q_vector, |
| xdp->data, |
| ×tamp); |
| |
| xdp->data += ts_hdr_len; |
| xdp->data_meta += ts_hdr_len; |
| size -= ts_hdr_len; |
| } |
| |
| xdp_res = igb_run_xdp_zc(adapter, rx_ring, xdp, xsk_pool, |
| xdp_prog); |
| |
| if (xdp_res) { |
| if (likely(xdp_res & (IGB_XDP_TX | IGB_XDP_REDIR))) { |
| xdp_xmit |= xdp_res; |
| } else if (xdp_res == IGB_XDP_EXIT) { |
| failure = true; |
| break; |
| } else if (xdp_res == IGB_XDP_CONSUMED) { |
| xsk_buff_free(xdp); |
| } |
| |
| total_packets++; |
| total_bytes += size; |
| ntc++; |
| if (ntc == rx_ring->count) |
| ntc = 0; |
| continue; |
| } |
| |
| skb = igb_construct_skb_zc(rx_ring, xdp, timestamp); |
| |
| /* exit if we failed to retrieve a buffer */ |
| if (!skb) { |
| rx_ring->rx_stats.alloc_failed++; |
| break; |
| } |
| |
| xsk_buff_free(xdp); |
| ntc++; |
| if (ntc == rx_ring->count) |
| ntc = 0; |
| |
| if (eth_skb_pad(skb)) |
| continue; |
| |
| /* probably a little skewed due to removing CRC */ |
| total_bytes += skb->len; |
| |
| /* populate checksum, timestamp, VLAN, and protocol */ |
| igb_process_skb_fields(rx_ring, rx_desc, skb); |
| |
| napi_gro_receive(&q_vector->napi, skb); |
| |
| /* update budget accounting */ |
| total_packets++; |
| } |
| |
| rx_ring->next_to_clean = ntc; |
| |
| if (xdp_xmit) |
| igb_finalize_xdp(adapter, xdp_xmit); |
| |
| igb_update_rx_stats(q_vector, total_packets, total_bytes); |
| |
| entries_to_alloc = igb_desc_unused(rx_ring); |
| if (entries_to_alloc >= IGB_RX_BUFFER_WRITE) |
| failure |= !igb_alloc_rx_buffers_zc(rx_ring, xsk_pool, |
| entries_to_alloc); |
| |
| if (xsk_uses_need_wakeup(xsk_pool)) { |
| if (failure || rx_ring->next_to_clean == rx_ring->next_to_use) |
| xsk_set_rx_need_wakeup(xsk_pool); |
| else |
| xsk_clear_rx_need_wakeup(xsk_pool); |
| |
| return (int)total_packets; |
| } |
| return failure ? budget : (int)total_packets; |
| } |
| |
| bool igb_xmit_zc(struct igb_ring *tx_ring, struct xsk_buff_pool *xsk_pool) |
| { |
| unsigned int budget = igb_desc_unused(tx_ring); |
| u32 cmd_type, olinfo_status, nb_pkts, i = 0; |
| struct xdp_desc *descs = xsk_pool->tx_descs; |
| union e1000_adv_tx_desc *tx_desc = NULL; |
| struct igb_tx_buffer *tx_buffer_info; |
| unsigned int total_bytes = 0; |
| dma_addr_t dma; |
| |
| if (!netif_carrier_ok(tx_ring->netdev)) |
| return true; |
| |
| if (test_bit(IGB_RING_FLAG_TX_DISABLED, &tx_ring->flags)) |
| return true; |
| |
| nb_pkts = xsk_tx_peek_release_desc_batch(xsk_pool, budget); |
| if (!nb_pkts) |
| return true; |
| |
| while (nb_pkts-- > 0) { |
| dma = xsk_buff_raw_get_dma(xsk_pool, descs[i].addr); |
| xsk_buff_raw_dma_sync_for_device(xsk_pool, dma, descs[i].len); |
| |
| tx_buffer_info = &tx_ring->tx_buffer_info[tx_ring->next_to_use]; |
| tx_buffer_info->bytecount = descs[i].len; |
| tx_buffer_info->type = IGB_TYPE_XSK; |
| tx_buffer_info->xdpf = NULL; |
| tx_buffer_info->gso_segs = 1; |
| tx_buffer_info->time_stamp = jiffies; |
| |
| tx_desc = IGB_TX_DESC(tx_ring, tx_ring->next_to_use); |
| tx_desc->read.buffer_addr = cpu_to_le64(dma); |
| |
| /* put descriptor type bits */ |
| cmd_type = E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_DEXT | |
| E1000_ADVTXD_DCMD_IFCS; |
| olinfo_status = descs[i].len << E1000_ADVTXD_PAYLEN_SHIFT; |
| |
| /* FIXME: This sets the Report Status (RS) bit for every |
| * descriptor. One nice to have optimization would be to set it |
| * only for the last descriptor in the whole batch. See Intel |
| * ice driver for an example on how to do it. |
| */ |
| cmd_type |= descs[i].len | IGB_TXD_DCMD; |
| tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type); |
| tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status); |
| |
| total_bytes += descs[i].len; |
| |
| i++; |
| tx_ring->next_to_use++; |
| tx_buffer_info->next_to_watch = tx_desc; |
| if (tx_ring->next_to_use == tx_ring->count) |
| tx_ring->next_to_use = 0; |
| } |
| |
| netdev_tx_sent_queue(txring_txq(tx_ring), total_bytes); |
| igb_xdp_ring_update_tail(tx_ring); |
| |
| return nb_pkts < budget; |
| } |
| |
| int igb_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags) |
| { |
| struct igb_adapter *adapter = netdev_priv(dev); |
| struct e1000_hw *hw = &adapter->hw; |
| struct igb_ring *ring; |
| u32 eics = 0; |
| |
| if (test_bit(__IGB_DOWN, &adapter->state)) |
| return -ENETDOWN; |
| |
| if (!igb_xdp_is_enabled(adapter)) |
| return -EINVAL; |
| |
| if (qid >= adapter->num_tx_queues) |
| return -EINVAL; |
| |
| ring = adapter->tx_ring[qid]; |
| |
| if (test_bit(IGB_RING_FLAG_TX_DISABLED, &ring->flags)) |
| return -ENETDOWN; |
| |
| if (!READ_ONCE(ring->xsk_pool)) |
| return -EINVAL; |
| |
| if (!napi_if_scheduled_mark_missed(&ring->q_vector->napi)) { |
| /* Cause software interrupt */ |
| if (adapter->flags & IGB_FLAG_HAS_MSIX) { |
| eics |= ring->q_vector->eims_value; |
| wr32(E1000_EICS, eics); |
| } else { |
| wr32(E1000_ICS, E1000_ICS_RXDMT0); |
| } |
| } |
| |
| return 0; |
| } |