| // SPDX-License-Identifier: GPL-2.0-only |
| /**************************************************************************** |
| * Driver for Solarflare network controllers and boards |
| * Copyright 2018 Solarflare Communications Inc. |
| * Copyright 2019-2020 Xilinx Inc. |
| * |
| * This program is free software; you can redistribute it and/or modify it |
| * under the terms of the GNU General Public License version 2 as published |
| * by the Free Software Foundation, incorporated herein by reference. |
| */ |
| |
| #include <net/ip6_checksum.h> |
| |
| #include "net_driver.h" |
| #include "tx_common.h" |
| #include "nic_common.h" |
| #include "mcdi_functions.h" |
| #include "ef100_regs.h" |
| #include "io.h" |
| #include "ef100_tx.h" |
| #include "ef100_nic.h" |
| |
| int ef100_tx_probe(struct efx_tx_queue *tx_queue) |
| { |
| /* Allocate an extra descriptor for the QMDA status completion entry */ |
| return efx_nic_alloc_buffer(tx_queue->efx, &tx_queue->txd, |
| (tx_queue->ptr_mask + 2) * |
| sizeof(efx_oword_t), |
| GFP_KERNEL); |
| } |
| |
| void ef100_tx_init(struct efx_tx_queue *tx_queue) |
| { |
| /* must be the inverse of lookup in efx_get_tx_channel */ |
| tx_queue->core_txq = |
| netdev_get_tx_queue(tx_queue->efx->net_dev, |
| tx_queue->channel->channel - |
| tx_queue->efx->tx_channel_offset); |
| |
| /* This value is purely documentational; as EF100 never passes through |
| * the switch statement in tx.c:__efx_enqueue_skb(), that switch does |
| * not handle case 3. EF100's TSOv3 descriptors are generated by |
| * ef100_make_tso_desc(). |
| * Meanwhile, all efx_mcdi_tx_init() cares about is that it's not 2. |
| */ |
| tx_queue->tso_version = 3; |
| if (efx_mcdi_tx_init(tx_queue)) |
| netdev_WARN(tx_queue->efx->net_dev, |
| "failed to initialise TXQ %d\n", tx_queue->queue); |
| } |
| |
| static bool ef100_tx_can_tso(struct efx_tx_queue *tx_queue, struct sk_buff *skb) |
| { |
| struct efx_nic *efx = tx_queue->efx; |
| struct ef100_nic_data *nic_data; |
| struct efx_tx_buffer *buffer; |
| size_t header_len; |
| u32 mss; |
| |
| nic_data = efx->nic_data; |
| |
| if (!skb_is_gso_tcp(skb)) |
| return false; |
| if (!(efx->net_dev->features & NETIF_F_TSO)) |
| return false; |
| |
| mss = skb_shinfo(skb)->gso_size; |
| if (unlikely(mss < 4)) { |
| WARN_ONCE(1, "MSS of %u is too small for TSO\n", mss); |
| return false; |
| } |
| |
| header_len = efx_tx_tso_header_length(skb); |
| if (header_len > nic_data->tso_max_hdr_len) |
| return false; |
| |
| if (skb_shinfo(skb)->gso_segs > nic_data->tso_max_payload_num_segs) { |
| /* net_dev->gso_max_segs should've caught this */ |
| WARN_ON_ONCE(1); |
| return false; |
| } |
| |
| if (skb->data_len / mss > nic_data->tso_max_frames) |
| return false; |
| |
| /* net_dev->gso_max_size should've caught this */ |
| if (WARN_ON_ONCE(skb->data_len > nic_data->tso_max_payload_len)) |
| return false; |
| |
| /* Reserve an empty buffer for the TSO V3 descriptor. |
| * Convey the length of the header since we already know it. |
| */ |
| buffer = efx_tx_queue_get_insert_buffer(tx_queue); |
| buffer->flags = EFX_TX_BUF_TSO_V3 | EFX_TX_BUF_CONT; |
| buffer->len = header_len; |
| buffer->unmap_len = 0; |
| buffer->skb = skb; |
| ++tx_queue->insert_count; |
| return true; |
| } |
| |
| static efx_oword_t *ef100_tx_desc(struct efx_tx_queue *tx_queue, unsigned int index) |
| { |
| if (likely(tx_queue->txd.addr)) |
| return ((efx_oword_t *)tx_queue->txd.addr) + index; |
| else |
| return NULL; |
| } |
| |
| static void ef100_notify_tx_desc(struct efx_tx_queue *tx_queue) |
| { |
| unsigned int write_ptr; |
| efx_dword_t reg; |
| |
| tx_queue->xmit_pending = false; |
| |
| if (unlikely(tx_queue->notify_count == tx_queue->write_count)) |
| return; |
| |
| write_ptr = tx_queue->write_count & tx_queue->ptr_mask; |
| /* The write pointer goes into the high word */ |
| EFX_POPULATE_DWORD_1(reg, ERF_GZ_TX_RING_PIDX, write_ptr); |
| efx_writed_page(tx_queue->efx, ®, |
| ER_GZ_TX_RING_DOORBELL, tx_queue->queue); |
| tx_queue->notify_count = tx_queue->write_count; |
| } |
| |
| static void ef100_tx_push_buffers(struct efx_tx_queue *tx_queue) |
| { |
| ef100_notify_tx_desc(tx_queue); |
| ++tx_queue->pushes; |
| } |
| |
| static void ef100_set_tx_csum_partial(const struct sk_buff *skb, |
| struct efx_tx_buffer *buffer, efx_oword_t *txd) |
| { |
| efx_oword_t csum; |
| int csum_start; |
| |
| if (!skb || skb->ip_summed != CHECKSUM_PARTIAL) |
| return; |
| |
| /* skb->csum_start has the offset from head, but we need the offset |
| * from data. |
| */ |
| csum_start = skb_checksum_start_offset(skb); |
| EFX_POPULATE_OWORD_3(csum, |
| ESF_GZ_TX_SEND_CSO_PARTIAL_EN, 1, |
| ESF_GZ_TX_SEND_CSO_PARTIAL_START_W, |
| csum_start >> 1, |
| ESF_GZ_TX_SEND_CSO_PARTIAL_CSUM_W, |
| skb->csum_offset >> 1); |
| EFX_OR_OWORD(*txd, *txd, csum); |
| } |
| |
| static void ef100_set_tx_hw_vlan(const struct sk_buff *skb, efx_oword_t *txd) |
| { |
| u16 vlan_tci = skb_vlan_tag_get(skb); |
| efx_oword_t vlan; |
| |
| EFX_POPULATE_OWORD_2(vlan, |
| ESF_GZ_TX_SEND_VLAN_INSERT_EN, 1, |
| ESF_GZ_TX_SEND_VLAN_INSERT_TCI, vlan_tci); |
| EFX_OR_OWORD(*txd, *txd, vlan); |
| } |
| |
| static void ef100_make_send_desc(struct efx_nic *efx, |
| const struct sk_buff *skb, |
| struct efx_tx_buffer *buffer, efx_oword_t *txd, |
| unsigned int segment_count) |
| { |
| /* TX send descriptor */ |
| EFX_POPULATE_OWORD_3(*txd, |
| ESF_GZ_TX_SEND_NUM_SEGS, segment_count, |
| ESF_GZ_TX_SEND_LEN, buffer->len, |
| ESF_GZ_TX_SEND_ADDR, buffer->dma_addr); |
| |
| if (likely(efx->net_dev->features & NETIF_F_HW_CSUM)) |
| ef100_set_tx_csum_partial(skb, buffer, txd); |
| if (efx->net_dev->features & NETIF_F_HW_VLAN_CTAG_TX && |
| skb && skb_vlan_tag_present(skb)) |
| ef100_set_tx_hw_vlan(skb, txd); |
| } |
| |
| static void ef100_make_tso_desc(struct efx_nic *efx, |
| const struct sk_buff *skb, |
| struct efx_tx_buffer *buffer, efx_oword_t *txd, |
| unsigned int segment_count) |
| { |
| bool gso_partial = skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL; |
| unsigned int len, ip_offset, tcp_offset, payload_segs; |
| u32 mangleid = ESE_GZ_TX_DESC_IP4_ID_INC_MOD16; |
| unsigned int outer_ip_offset, outer_l4_offset; |
| u16 vlan_tci = skb_vlan_tag_get(skb); |
| u32 mss = skb_shinfo(skb)->gso_size; |
| bool encap = skb->encapsulation; |
| bool udp_encap = false; |
| u16 vlan_enable = 0; |
| struct tcphdr *tcp; |
| bool outer_csum; |
| u32 paylen; |
| |
| if (skb_shinfo(skb)->gso_type & SKB_GSO_TCP_FIXEDID) |
| mangleid = ESE_GZ_TX_DESC_IP4_ID_NO_OP; |
| if (efx->net_dev->features & NETIF_F_HW_VLAN_CTAG_TX) |
| vlan_enable = skb_vlan_tag_present(skb); |
| |
| len = skb->len - buffer->len; |
| /* We use 1 for the TSO descriptor and 1 for the header */ |
| payload_segs = segment_count - 2; |
| if (encap) { |
| outer_ip_offset = skb_network_offset(skb); |
| outer_l4_offset = skb_transport_offset(skb); |
| ip_offset = skb_inner_network_offset(skb); |
| tcp_offset = skb_inner_transport_offset(skb); |
| if (skb_shinfo(skb)->gso_type & |
| (SKB_GSO_UDP_TUNNEL | SKB_GSO_UDP_TUNNEL_CSUM)) |
| udp_encap = true; |
| } else { |
| ip_offset = skb_network_offset(skb); |
| tcp_offset = skb_transport_offset(skb); |
| outer_ip_offset = outer_l4_offset = 0; |
| } |
| outer_csum = skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM; |
| |
| /* subtract TCP payload length from inner checksum */ |
| tcp = (void *)skb->data + tcp_offset; |
| paylen = skb->len - tcp_offset; |
| csum_replace_by_diff(&tcp->check, (__force __wsum)htonl(paylen)); |
| |
| EFX_POPULATE_OWORD_19(*txd, |
| ESF_GZ_TX_DESC_TYPE, ESE_GZ_TX_DESC_TYPE_TSO, |
| ESF_GZ_TX_TSO_MSS, mss, |
| ESF_GZ_TX_TSO_HDR_NUM_SEGS, 1, |
| ESF_GZ_TX_TSO_PAYLOAD_NUM_SEGS, payload_segs, |
| ESF_GZ_TX_TSO_HDR_LEN_W, buffer->len >> 1, |
| ESF_GZ_TX_TSO_PAYLOAD_LEN, len, |
| ESF_GZ_TX_TSO_CSO_OUTER_L4, outer_csum, |
| ESF_GZ_TX_TSO_CSO_INNER_L4, 1, |
| ESF_GZ_TX_TSO_INNER_L3_OFF_W, ip_offset >> 1, |
| ESF_GZ_TX_TSO_INNER_L4_OFF_W, tcp_offset >> 1, |
| ESF_GZ_TX_TSO_ED_INNER_IP4_ID, mangleid, |
| ESF_GZ_TX_TSO_ED_INNER_IP_LEN, 1, |
| ESF_GZ_TX_TSO_OUTER_L3_OFF_W, outer_ip_offset >> 1, |
| ESF_GZ_TX_TSO_OUTER_L4_OFF_W, outer_l4_offset >> 1, |
| ESF_GZ_TX_TSO_ED_OUTER_UDP_LEN, udp_encap && !gso_partial, |
| ESF_GZ_TX_TSO_ED_OUTER_IP_LEN, encap && !gso_partial, |
| ESF_GZ_TX_TSO_ED_OUTER_IP4_ID, encap ? mangleid : |
| ESE_GZ_TX_DESC_IP4_ID_NO_OP, |
| ESF_GZ_TX_TSO_VLAN_INSERT_EN, vlan_enable, |
| ESF_GZ_TX_TSO_VLAN_INSERT_TCI, vlan_tci |
| ); |
| } |
| |
| static void ef100_tx_make_descriptors(struct efx_tx_queue *tx_queue, |
| const struct sk_buff *skb, |
| unsigned int segment_count, |
| struct efx_rep *efv) |
| { |
| unsigned int old_write_count = tx_queue->write_count; |
| unsigned int new_write_count = old_write_count; |
| struct efx_tx_buffer *buffer; |
| unsigned int next_desc_type; |
| unsigned int write_ptr; |
| efx_oword_t *txd; |
| unsigned int nr_descs = tx_queue->insert_count - old_write_count; |
| |
| if (unlikely(nr_descs == 0)) |
| return; |
| |
| if (segment_count) |
| next_desc_type = ESE_GZ_TX_DESC_TYPE_TSO; |
| else |
| next_desc_type = ESE_GZ_TX_DESC_TYPE_SEND; |
| |
| if (unlikely(efv)) { |
| /* Create TX override descriptor */ |
| write_ptr = new_write_count & tx_queue->ptr_mask; |
| txd = ef100_tx_desc(tx_queue, write_ptr); |
| ++new_write_count; |
| |
| tx_queue->packet_write_count = new_write_count; |
| EFX_POPULATE_OWORD_3(*txd, |
| ESF_GZ_TX_DESC_TYPE, ESE_GZ_TX_DESC_TYPE_PREFIX, |
| ESF_GZ_TX_PREFIX_EGRESS_MPORT, efv->mport, |
| ESF_GZ_TX_PREFIX_EGRESS_MPORT_EN, 1); |
| nr_descs--; |
| } |
| |
| /* if it's a raw write (such as XDP) then always SEND single frames */ |
| if (!skb) |
| nr_descs = 1; |
| |
| do { |
| write_ptr = new_write_count & tx_queue->ptr_mask; |
| buffer = &tx_queue->buffer[write_ptr]; |
| txd = ef100_tx_desc(tx_queue, write_ptr); |
| ++new_write_count; |
| |
| /* Create TX descriptor ring entry */ |
| tx_queue->packet_write_count = new_write_count; |
| |
| switch (next_desc_type) { |
| case ESE_GZ_TX_DESC_TYPE_SEND: |
| ef100_make_send_desc(tx_queue->efx, skb, |
| buffer, txd, nr_descs); |
| break; |
| case ESE_GZ_TX_DESC_TYPE_TSO: |
| /* TX TSO descriptor */ |
| WARN_ON_ONCE(!(buffer->flags & EFX_TX_BUF_TSO_V3)); |
| ef100_make_tso_desc(tx_queue->efx, skb, |
| buffer, txd, nr_descs); |
| break; |
| default: |
| /* TX segment descriptor */ |
| EFX_POPULATE_OWORD_3(*txd, |
| ESF_GZ_TX_DESC_TYPE, ESE_GZ_TX_DESC_TYPE_SEG, |
| ESF_GZ_TX_SEG_LEN, buffer->len, |
| ESF_GZ_TX_SEG_ADDR, buffer->dma_addr); |
| } |
| /* if it's a raw write (such as XDP) then always SEND */ |
| next_desc_type = skb ? ESE_GZ_TX_DESC_TYPE_SEG : |
| ESE_GZ_TX_DESC_TYPE_SEND; |
| /* mark as an EFV buffer if applicable */ |
| if (unlikely(efv)) |
| buffer->flags |= EFX_TX_BUF_EFV; |
| |
| } while (new_write_count != tx_queue->insert_count); |
| |
| wmb(); /* Ensure descriptors are written before they are fetched */ |
| |
| tx_queue->write_count = new_write_count; |
| |
| /* The write_count above must be updated before reading |
| * channel->holdoff_doorbell to avoid a race with the |
| * completion path, so ensure these operations are not |
| * re-ordered. This also flushes the update of write_count |
| * back into the cache. |
| */ |
| smp_mb(); |
| } |
| |
| void ef100_tx_write(struct efx_tx_queue *tx_queue) |
| { |
| ef100_tx_make_descriptors(tx_queue, NULL, 0, NULL); |
| ef100_tx_push_buffers(tx_queue); |
| } |
| |
| int ef100_ev_tx(struct efx_channel *channel, const efx_qword_t *p_event) |
| { |
| unsigned int tx_done = |
| EFX_QWORD_FIELD(*p_event, ESF_GZ_EV_TXCMPL_NUM_DESC); |
| unsigned int qlabel = |
| EFX_QWORD_FIELD(*p_event, ESF_GZ_EV_TXCMPL_Q_LABEL); |
| struct efx_tx_queue *tx_queue = |
| efx_channel_get_tx_queue(channel, qlabel); |
| unsigned int tx_index = (tx_queue->read_count + tx_done - 1) & |
| tx_queue->ptr_mask; |
| |
| return efx_xmit_done(tx_queue, tx_index); |
| } |
| |
| /* Add a socket buffer to a TX queue |
| * |
| * You must hold netif_tx_lock() to call this function. |
| * |
| * Returns 0 on success, error code otherwise. In case of an error this |
| * function will free the SKB. |
| */ |
| netdev_tx_t ef100_enqueue_skb(struct efx_tx_queue *tx_queue, |
| struct sk_buff *skb) |
| { |
| return __ef100_enqueue_skb(tx_queue, skb, NULL); |
| } |
| |
| int __ef100_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb, |
| struct efx_rep *efv) |
| { |
| unsigned int old_insert_count = tx_queue->insert_count; |
| struct efx_nic *efx = tx_queue->efx; |
| bool xmit_more = netdev_xmit_more(); |
| unsigned int fill_level; |
| unsigned int segments; |
| int rc; |
| |
| if (!tx_queue->buffer || !tx_queue->ptr_mask) { |
| netif_stop_queue(efx->net_dev); |
| dev_kfree_skb_any(skb); |
| return -ENODEV; |
| } |
| |
| segments = skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 0; |
| if (segments == 1) |
| segments = 0; /* Don't use TSO/GSO for a single segment. */ |
| if (segments && !ef100_tx_can_tso(tx_queue, skb)) { |
| rc = efx_tx_tso_fallback(tx_queue, skb); |
| tx_queue->tso_fallbacks++; |
| if (rc) |
| goto err; |
| else |
| return 0; |
| } |
| |
| if (unlikely(efv)) { |
| struct efx_tx_buffer *buffer = __efx_tx_queue_get_insert_buffer(tx_queue); |
| |
| /* Drop representor packets if the queue is stopped. |
| * We currently don't assert backoff to representors so this is |
| * to make sure representor traffic can't starve the main |
| * net device. |
| * And, of course, if there are no TX descriptors left. |
| */ |
| if (netif_tx_queue_stopped(tx_queue->core_txq) || |
| unlikely(efx_tx_buffer_in_use(buffer))) { |
| atomic64_inc(&efv->stats.tx_errors); |
| rc = -ENOSPC; |
| goto err; |
| } |
| |
| /* Also drop representor traffic if it could cause us to |
| * stop the queue. If we assert backoff and we haven't |
| * received traffic on the main net device recently then the |
| * TX watchdog can go off erroneously. |
| */ |
| fill_level = efx_channel_tx_old_fill_level(tx_queue->channel); |
| fill_level += efx_tx_max_skb_descs(efx); |
| if (fill_level > efx->txq_stop_thresh) { |
| struct efx_tx_queue *txq2; |
| |
| /* Refresh cached fill level and re-check */ |
| efx_for_each_channel_tx_queue(txq2, tx_queue->channel) |
| txq2->old_read_count = READ_ONCE(txq2->read_count); |
| |
| fill_level = efx_channel_tx_old_fill_level(tx_queue->channel); |
| fill_level += efx_tx_max_skb_descs(efx); |
| if (fill_level > efx->txq_stop_thresh) { |
| atomic64_inc(&efv->stats.tx_errors); |
| rc = -ENOSPC; |
| goto err; |
| } |
| } |
| |
| buffer->flags = EFX_TX_BUF_OPTION | EFX_TX_BUF_EFV; |
| tx_queue->insert_count++; |
| } |
| |
| /* Map for DMA and create descriptors */ |
| rc = efx_tx_map_data(tx_queue, skb, segments); |
| if (rc) |
| goto err; |
| ef100_tx_make_descriptors(tx_queue, skb, segments, efv); |
| |
| fill_level = efx_channel_tx_old_fill_level(tx_queue->channel); |
| if (fill_level > efx->txq_stop_thresh) { |
| struct efx_tx_queue *txq2; |
| |
| /* Because of checks above, representor traffic should |
| * not be able to stop the queue. |
| */ |
| WARN_ON(efv); |
| |
| netif_tx_stop_queue(tx_queue->core_txq); |
| /* Re-read after a memory barrier in case we've raced with |
| * the completion path. Otherwise there's a danger we'll never |
| * restart the queue if all completions have just happened. |
| */ |
| smp_mb(); |
| efx_for_each_channel_tx_queue(txq2, tx_queue->channel) |
| txq2->old_read_count = READ_ONCE(txq2->read_count); |
| fill_level = efx_channel_tx_old_fill_level(tx_queue->channel); |
| if (fill_level < efx->txq_stop_thresh) |
| netif_tx_start_queue(tx_queue->core_txq); |
| } |
| |
| tx_queue->xmit_pending = true; |
| |
| /* If xmit_more then we don't need to push the doorbell, unless there |
| * are 256 descriptors already queued in which case we have to push to |
| * ensure we never push more than 256 at once. |
| * |
| * Always push for representor traffic, and don't account it to parent |
| * PF netdevice's BQL. |
| */ |
| if (unlikely(efv) || |
| __netdev_tx_sent_queue(tx_queue->core_txq, skb->len, xmit_more) || |
| tx_queue->write_count - tx_queue->notify_count > 255) |
| ef100_tx_push_buffers(tx_queue); |
| |
| if (segments) { |
| tx_queue->tso_bursts++; |
| tx_queue->tso_packets += segments; |
| tx_queue->tx_packets += segments; |
| } else { |
| tx_queue->tx_packets++; |
| } |
| return 0; |
| |
| err: |
| efx_enqueue_unwind(tx_queue, old_insert_count); |
| if (!IS_ERR_OR_NULL(skb)) |
| dev_kfree_skb_any(skb); |
| |
| /* If we're not expecting another transmit and we had something to push |
| * on this queue then we need to push here to get the previous packets |
| * out. We only enter this branch from before the xmit_more handling |
| * above, so xmit_pending still refers to the old state. |
| */ |
| if (tx_queue->xmit_pending && !xmit_more) |
| ef100_tx_push_buffers(tx_queue); |
| return rc; |
| } |