Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Daniel Borkmann says:
====================
pull-request: bpf-next 2020-10-15
The main changes are:
1) Fix register equivalence tracking in verifier, from Alexei Starovoitov.
2) Fix sockmap error path to not call bpf_prog_put() with NULL, from Alex Dewar.
3) Fix sockmap to add locking annotations to iterator, from Lorenz Bauer.
4) Fix tcp_hdr_options test to use loopback address, from Martin KaFai Lau.
====================
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
diff --git a/Documentation/networking/scaling.rst b/Documentation/networking/scaling.rst
index 8f0347b..3d435ca 100644
--- a/Documentation/networking/scaling.rst
+++ b/Documentation/networking/scaling.rst
@@ -465,9 +465,9 @@
-----------------
XPS is only available if the kconfig symbol CONFIG_XPS is enabled (on by
-default for SMP). The functionality remains disabled until explicitly
-configured. To enable XPS, the bitmap of CPUs/receive-queues that may
-use a transmit queue is configured using the sysfs file entry:
+default for SMP). If compiled in, it is driver dependent whether, and
+how, XPS is configured at device init. The mapping of CPUs/receive-queues
+to transmit queue can be inspected and configured using sysfs:
For selection based on CPUs map::
diff --git a/drivers/crypto/chelsio/chcr_core.h b/drivers/crypto/chelsio/chcr_core.h
index bb092b6..b02f981 100644
--- a/drivers/crypto/chelsio/chcr_core.h
+++ b/drivers/crypto/chelsio/chcr_core.h
@@ -137,6 +137,4 @@
int chcr_uld_tx_handler(struct sk_buff *skb, struct net_device *dev);
int chcr_handle_resp(struct crypto_async_request *req, unsigned char *input,
int err);
-int chcr_ipsec_xmit(struct sk_buff *skb, struct net_device *dev);
-void chcr_add_xfrmops(const struct cxgb4_lld_info *lld);
#endif /* __CHCR_CORE_H__ */
diff --git a/drivers/infiniband/hw/hfi1/ipoib_main.c b/drivers/infiniband/hw/hfi1/ipoib_main.c
index 014351e..9f71b9d 100644
--- a/drivers/infiniband/hw/hfi1/ipoib_main.c
+++ b/drivers/infiniband/hw/hfi1/ipoib_main.c
@@ -97,41 +97,9 @@
struct rtnl_link_stats64 *storage)
{
struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev);
- u64 rx_packets = 0ull;
- u64 rx_bytes = 0ull;
- u64 tx_packets = 0ull;
- u64 tx_bytes = 0ull;
- int i;
netdev_stats_to_stats64(storage, &dev->stats);
-
- for_each_possible_cpu(i) {
- const struct pcpu_sw_netstats *stats;
- unsigned int start;
- u64 trx_packets;
- u64 trx_bytes;
- u64 ttx_packets;
- u64 ttx_bytes;
-
- stats = per_cpu_ptr(priv->netstats, i);
- do {
- start = u64_stats_fetch_begin_irq(&stats->syncp);
- trx_packets = stats->rx_packets;
- trx_bytes = stats->rx_bytes;
- ttx_packets = stats->tx_packets;
- ttx_bytes = stats->tx_bytes;
- } while (u64_stats_fetch_retry_irq(&stats->syncp, start));
-
- rx_packets += trx_packets;
- rx_bytes += trx_bytes;
- tx_packets += ttx_packets;
- tx_bytes += ttx_bytes;
- }
-
- storage->rx_packets += rx_packets;
- storage->rx_bytes += rx_bytes;
- storage->tx_packets += tx_packets;
- storage->tx_bytes += tx_bytes;
+ dev_fetch_sw_netstats(storage, priv->netstats);
}
static const struct net_device_ops hfi1_ipoib_netdev_ops = {
diff --git a/drivers/net/can/m_can/m_can_platform.c b/drivers/net/can/m_can/m_can_platform.c
index 38ea5e6..e6d0cb9 100644
--- a/drivers/net/can/m_can/m_can_platform.c
+++ b/drivers/net/can/m_can/m_can_platform.c
@@ -144,8 +144,6 @@
struct net_device *ndev = dev_get_drvdata(dev);
struct m_can_classdev *mcan_class = netdev_priv(ndev);
- m_can_class_suspend(dev);
-
clk_disable_unprepare(mcan_class->cclk);
clk_disable_unprepare(mcan_class->hclk);
diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c
index 01f5784f..0ef8549 100644
--- a/drivers/net/dsa/microchip/ksz_common.c
+++ b/drivers/net/dsa/microchip/ksz_common.c
@@ -103,14 +103,8 @@
INIT_DELAYED_WORK(&dev->mib_read, ksz_mib_read_work);
- /* Read MIB counters every 30 seconds to avoid overflow. */
- dev->mib_read_interval = msecs_to_jiffies(30000);
-
for (i = 0; i < dev->mib_port_cnt; i++)
dev->dev_ops->port_init_cnt(dev, i);
-
- /* Start the timer 2 seconds later. */
- schedule_delayed_work(&dev->mib_read, msecs_to_jiffies(2000));
}
EXPORT_SYMBOL_GPL(ksz_init_mib_timer);
@@ -143,7 +137,9 @@
/* Read all MIB counters when the link is going down. */
p->read = true;
- schedule_delayed_work(&dev->mib_read, 0);
+ /* timer started */
+ if (dev->mib_read_interval)
+ schedule_delayed_work(&dev->mib_read, 0);
}
EXPORT_SYMBOL_GPL(ksz_mac_link_down);
@@ -451,6 +447,12 @@
return ret;
}
+ /* Read MIB counters every 30 seconds to avoid overflow. */
+ dev->mib_read_interval = msecs_to_jiffies(30000);
+
+ /* Start the MIB timer. */
+ schedule_delayed_work(&dev->mib_read, 0);
+
return 0;
}
EXPORT_SYMBOL(ksz_switch_register);
diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h
index 3fd5c6c..5de47f6 100644
--- a/drivers/net/ethernet/cadence/macb.h
+++ b/drivers/net/ethernet/cadence/macb.h
@@ -366,6 +366,8 @@
#define MACB_ISR_RLE_SIZE 1
#define MACB_TXERR_OFFSET 6 /* EN TX frame corrupt from error interrupt */
#define MACB_TXERR_SIZE 1
+#define MACB_RM9200_TBRE_OFFSET 6 /* EN may send new frame interrupt (RM9200) */
+#define MACB_RM9200_TBRE_SIZE 1
#define MACB_TCOMP_OFFSET 7 /* Enable transmit complete interrupt */
#define MACB_TCOMP_SIZE 1
#define MACB_ISR_LINK_OFFSET 9 /* Enable link change interrupt */
@@ -1205,10 +1207,10 @@
phy_interface_t phy_interface;
- /* AT91RM9200 transmit */
- struct sk_buff *skb; /* holds skb until xmit interrupt completes */
- dma_addr_t skb_physaddr; /* phys addr from pci_map_single */
- int skb_length; /* saved skb length for pci_unmap_single */
+ /* AT91RM9200 transmit queue (1 on wire + 1 queued) */
+ struct macb_tx_skb rm9200_txq[2];
+ unsigned int rm9200_tx_tail;
+ unsigned int rm9200_tx_len;
unsigned int max_tx_length;
u64 ethtool_stats[GEM_STATS_LEN + QUEUE_STATS_LEN * MACB_MAX_QUEUES];
diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index 4b42b2d..883e47c 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -3908,6 +3908,7 @@
MACB_BIT(ISR_TUND) |
MACB_BIT(ISR_RLE) |
MACB_BIT(TCOMP) |
+ MACB_BIT(RM9200_TBRE) |
MACB_BIT(ISR_ROVR) |
MACB_BIT(HRESP));
@@ -3924,6 +3925,7 @@
MACB_BIT(ISR_TUND) |
MACB_BIT(ISR_RLE) |
MACB_BIT(TCOMP) |
+ MACB_BIT(RM9200_TBRE) |
MACB_BIT(ISR_ROVR) |
MACB_BIT(HRESP));
@@ -3993,24 +3995,34 @@
struct net_device *dev)
{
struct macb *lp = netdev_priv(dev);
+ unsigned long flags;
- if (macb_readl(lp, TSR) & MACB_BIT(RM9200_BNQ)) {
- netif_stop_queue(dev);
+ if (lp->rm9200_tx_len < 2) {
+ int desc = lp->rm9200_tx_tail;
/* Store packet information (to free when Tx completed) */
- lp->skb = skb;
- lp->skb_length = skb->len;
- lp->skb_physaddr = dma_map_single(&lp->pdev->dev, skb->data,
- skb->len, DMA_TO_DEVICE);
- if (dma_mapping_error(&lp->pdev->dev, lp->skb_physaddr)) {
+ lp->rm9200_txq[desc].skb = skb;
+ lp->rm9200_txq[desc].size = skb->len;
+ lp->rm9200_txq[desc].mapping = dma_map_single(&lp->pdev->dev, skb->data,
+ skb->len, DMA_TO_DEVICE);
+ if (dma_mapping_error(&lp->pdev->dev, lp->rm9200_txq[desc].mapping)) {
dev_kfree_skb_any(skb);
dev->stats.tx_dropped++;
netdev_err(dev, "%s: DMA mapping error\n", __func__);
return NETDEV_TX_OK;
}
+ spin_lock_irqsave(&lp->lock, flags);
+
+ lp->rm9200_tx_tail = (desc + 1) & 1;
+ lp->rm9200_tx_len++;
+ if (lp->rm9200_tx_len > 1)
+ netif_stop_queue(dev);
+
+ spin_unlock_irqrestore(&lp->lock, flags);
+
/* Set address of the data in the Transmit Address register */
- macb_writel(lp, TAR, lp->skb_physaddr);
+ macb_writel(lp, TAR, lp->rm9200_txq[desc].mapping);
/* Set length of the packet in the Transmit Control register */
macb_writel(lp, TCR, skb->len);
@@ -4073,6 +4085,9 @@
struct net_device *dev = dev_id;
struct macb *lp = netdev_priv(dev);
u32 intstatus, ctl;
+ unsigned int desc;
+ unsigned int qlen;
+ u32 tsr;
/* MAC Interrupt Status register indicates what interrupts are pending.
* It is automatically cleared once read.
@@ -4084,20 +4099,39 @@
at91ether_rx(dev);
/* Transmit complete */
- if (intstatus & MACB_BIT(TCOMP)) {
+ if (intstatus & (MACB_BIT(TCOMP) | MACB_BIT(RM9200_TBRE))) {
/* The TCOM bit is set even if the transmission failed */
if (intstatus & (MACB_BIT(ISR_TUND) | MACB_BIT(ISR_RLE)))
dev->stats.tx_errors++;
- if (lp->skb) {
- dev_consume_skb_irq(lp->skb);
- lp->skb = NULL;
- dma_unmap_single(&lp->pdev->dev, lp->skb_physaddr,
- lp->skb_length, DMA_TO_DEVICE);
+ spin_lock(&lp->lock);
+
+ tsr = macb_readl(lp, TSR);
+
+ /* we have three possibilities here:
+ * - all pending packets transmitted (TGO, implies BNQ)
+ * - only first packet transmitted (!TGO && BNQ)
+ * - two frames pending (!TGO && !BNQ)
+ * Note that TGO ("transmit go") is called "IDLE" on RM9200.
+ */
+ qlen = (tsr & MACB_BIT(TGO)) ? 0 :
+ (tsr & MACB_BIT(RM9200_BNQ)) ? 1 : 2;
+
+ while (lp->rm9200_tx_len > qlen) {
+ desc = (lp->rm9200_tx_tail - lp->rm9200_tx_len) & 1;
+ dev_consume_skb_irq(lp->rm9200_txq[desc].skb);
+ lp->rm9200_txq[desc].skb = NULL;
+ dma_unmap_single(&lp->pdev->dev, lp->rm9200_txq[desc].mapping,
+ lp->rm9200_txq[desc].size, DMA_TO_DEVICE);
dev->stats.tx_packets++;
- dev->stats.tx_bytes += lp->skb_length;
+ dev->stats.tx_bytes += lp->rm9200_txq[desc].size;
+ lp->rm9200_tx_len--;
}
- netif_wake_queue(dev);
+
+ if (lp->rm9200_tx_len < 2 && netif_queue_stopped(dev))
+ netif_wake_queue(dev);
+
+ spin_unlock(&lp->lock);
}
/* Work-around for EMAC Errata section 41.3.1 */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c
index f642c1b..1b88bd1 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c
@@ -60,6 +60,89 @@
PEDIT_FIELDS(IP6_, DST_127_96, 4, nat_lip, 12),
};
+static const struct cxgb4_natmode_config cxgb4_natmode_config_array[] = {
+ /* Default supported NAT modes */
+ {
+ .chip = CHELSIO_T5,
+ .flags = CXGB4_ACTION_NATMODE_NONE,
+ .natmode = NAT_MODE_NONE,
+ },
+ {
+ .chip = CHELSIO_T5,
+ .flags = CXGB4_ACTION_NATMODE_DIP,
+ .natmode = NAT_MODE_DIP,
+ },
+ {
+ .chip = CHELSIO_T5,
+ .flags = CXGB4_ACTION_NATMODE_DIP | CXGB4_ACTION_NATMODE_DPORT,
+ .natmode = NAT_MODE_DIP_DP,
+ },
+ {
+ .chip = CHELSIO_T5,
+ .flags = CXGB4_ACTION_NATMODE_DIP | CXGB4_ACTION_NATMODE_DPORT |
+ CXGB4_ACTION_NATMODE_SIP,
+ .natmode = NAT_MODE_DIP_DP_SIP,
+ },
+ {
+ .chip = CHELSIO_T5,
+ .flags = CXGB4_ACTION_NATMODE_DIP | CXGB4_ACTION_NATMODE_DPORT |
+ CXGB4_ACTION_NATMODE_SPORT,
+ .natmode = NAT_MODE_DIP_DP_SP,
+ },
+ {
+ .chip = CHELSIO_T5,
+ .flags = CXGB4_ACTION_NATMODE_SIP | CXGB4_ACTION_NATMODE_SPORT,
+ .natmode = NAT_MODE_SIP_SP,
+ },
+ {
+ .chip = CHELSIO_T5,
+ .flags = CXGB4_ACTION_NATMODE_DIP | CXGB4_ACTION_NATMODE_SIP |
+ CXGB4_ACTION_NATMODE_SPORT,
+ .natmode = NAT_MODE_DIP_SIP_SP,
+ },
+ {
+ .chip = CHELSIO_T5,
+ .flags = CXGB4_ACTION_NATMODE_DIP | CXGB4_ACTION_NATMODE_SIP |
+ CXGB4_ACTION_NATMODE_DPORT |
+ CXGB4_ACTION_NATMODE_SPORT,
+ .natmode = NAT_MODE_ALL,
+ },
+ /* T6+ can ignore L4 ports when they're disabled. */
+ {
+ .chip = CHELSIO_T6,
+ .flags = CXGB4_ACTION_NATMODE_SIP,
+ .natmode = NAT_MODE_SIP_SP,
+ },
+ {
+ .chip = CHELSIO_T6,
+ .flags = CXGB4_ACTION_NATMODE_DIP | CXGB4_ACTION_NATMODE_SPORT,
+ .natmode = NAT_MODE_DIP_DP_SP,
+ },
+ {
+ .chip = CHELSIO_T6,
+ .flags = CXGB4_ACTION_NATMODE_DIP | CXGB4_ACTION_NATMODE_SIP,
+ .natmode = NAT_MODE_ALL,
+ },
+};
+
+static void cxgb4_action_natmode_tweak(struct ch_filter_specification *fs,
+ u8 natmode_flags)
+{
+ u8 i = 0;
+
+ /* Translate the enabled NAT 4-tuple fields to one of the
+ * hardware supported NAT mode configurations. This ensures
+ * that we pick a valid combination, where the disabled fields
+ * do not get overwritten to 0.
+ */
+ for (i = 0; i < ARRAY_SIZE(cxgb4_natmode_config_array); i++) {
+ if (cxgb4_natmode_config_array[i].flags == natmode_flags) {
+ fs->nat_mode = cxgb4_natmode_config_array[i].natmode;
+ return;
+ }
+ }
+}
+
static struct ch_tc_flower_entry *allocate_flower_entry(void)
{
struct ch_tc_flower_entry *new = kzalloc(sizeof(*new), GFP_KERNEL);
@@ -289,7 +372,8 @@
}
static void process_pedit_field(struct ch_filter_specification *fs, u32 val,
- u32 mask, u32 offset, u8 htype)
+ u32 mask, u32 offset, u8 htype,
+ u8 *natmode_flags)
{
switch (htype) {
case FLOW_ACT_MANGLE_HDR_TYPE_ETH:
@@ -314,67 +398,102 @@
switch (offset) {
case PEDIT_IP4_SRC:
offload_pedit(fs, val, mask, IP4_SRC);
+ *natmode_flags |= CXGB4_ACTION_NATMODE_SIP;
break;
case PEDIT_IP4_DST:
offload_pedit(fs, val, mask, IP4_DST);
+ *natmode_flags |= CXGB4_ACTION_NATMODE_DIP;
}
- fs->nat_mode = NAT_MODE_ALL;
break;
case FLOW_ACT_MANGLE_HDR_TYPE_IP6:
switch (offset) {
case PEDIT_IP6_SRC_31_0:
offload_pedit(fs, val, mask, IP6_SRC_31_0);
+ *natmode_flags |= CXGB4_ACTION_NATMODE_SIP;
break;
case PEDIT_IP6_SRC_63_32:
offload_pedit(fs, val, mask, IP6_SRC_63_32);
+ *natmode_flags |= CXGB4_ACTION_NATMODE_SIP;
break;
case PEDIT_IP6_SRC_95_64:
offload_pedit(fs, val, mask, IP6_SRC_95_64);
+ *natmode_flags |= CXGB4_ACTION_NATMODE_SIP;
break;
case PEDIT_IP6_SRC_127_96:
offload_pedit(fs, val, mask, IP6_SRC_127_96);
+ *natmode_flags |= CXGB4_ACTION_NATMODE_SIP;
break;
case PEDIT_IP6_DST_31_0:
offload_pedit(fs, val, mask, IP6_DST_31_0);
+ *natmode_flags |= CXGB4_ACTION_NATMODE_DIP;
break;
case PEDIT_IP6_DST_63_32:
offload_pedit(fs, val, mask, IP6_DST_63_32);
+ *natmode_flags |= CXGB4_ACTION_NATMODE_DIP;
break;
case PEDIT_IP6_DST_95_64:
offload_pedit(fs, val, mask, IP6_DST_95_64);
+ *natmode_flags |= CXGB4_ACTION_NATMODE_DIP;
break;
case PEDIT_IP6_DST_127_96:
offload_pedit(fs, val, mask, IP6_DST_127_96);
+ *natmode_flags |= CXGB4_ACTION_NATMODE_DIP;
}
- fs->nat_mode = NAT_MODE_ALL;
break;
case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
switch (offset) {
case PEDIT_TCP_SPORT_DPORT:
- if (~mask & PEDIT_TCP_UDP_SPORT_MASK)
+ if (~mask & PEDIT_TCP_UDP_SPORT_MASK) {
fs->nat_fport = val;
- else
+ *natmode_flags |= CXGB4_ACTION_NATMODE_SPORT;
+ } else {
fs->nat_lport = val >> 16;
+ *natmode_flags |= CXGB4_ACTION_NATMODE_DPORT;
+ }
}
- fs->nat_mode = NAT_MODE_ALL;
break;
case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
switch (offset) {
case PEDIT_UDP_SPORT_DPORT:
- if (~mask & PEDIT_TCP_UDP_SPORT_MASK)
+ if (~mask & PEDIT_TCP_UDP_SPORT_MASK) {
fs->nat_fport = val;
- else
+ *natmode_flags |= CXGB4_ACTION_NATMODE_SPORT;
+ } else {
fs->nat_lport = val >> 16;
+ *natmode_flags |= CXGB4_ACTION_NATMODE_DPORT;
+ }
}
- fs->nat_mode = NAT_MODE_ALL;
+ break;
}
}
+static int cxgb4_action_natmode_validate(struct adapter *adap, u8 natmode_flags,
+ struct netlink_ext_ack *extack)
+{
+ u8 i = 0;
+
+ /* Extract the NAT mode to enable based on what 4-tuple fields
+ * are enabled to be overwritten. This ensures that the
+ * disabled fields don't get overwritten to 0.
+ */
+ for (i = 0; i < ARRAY_SIZE(cxgb4_natmode_config_array); i++) {
+ const struct cxgb4_natmode_config *c;
+
+ c = &cxgb4_natmode_config_array[i];
+ if (CHELSIO_CHIP_VERSION(adap->params.chip) >= c->chip &&
+ natmode_flags == c->flags)
+ return 0;
+ }
+ NL_SET_ERR_MSG_MOD(extack, "Unsupported NAT mode 4-tuple combination");
+ return -EOPNOTSUPP;
+}
+
void cxgb4_process_flow_actions(struct net_device *in,
struct flow_action *actions,
struct ch_filter_specification *fs)
{
struct flow_action_entry *act;
+ u8 natmode_flags = 0;
int i;
flow_action_for_each(i, act, actions) {
@@ -426,7 +545,8 @@
val = act->mangle.val;
offset = act->mangle.offset;
- process_pedit_field(fs, val, mask, offset, htype);
+ process_pedit_field(fs, val, mask, offset, htype,
+ &natmode_flags);
}
break;
case FLOW_ACTION_QUEUE:
@@ -438,6 +558,9 @@
break;
}
}
+ if (natmode_flags)
+ cxgb4_action_natmode_tweak(fs, natmode_flags);
+
}
static bool valid_l4_mask(u32 mask)
@@ -454,7 +577,8 @@
}
static bool valid_pedit_action(struct net_device *dev,
- const struct flow_action_entry *act)
+ const struct flow_action_entry *act,
+ u8 *natmode_flags)
{
u32 mask, offset;
u8 htype;
@@ -479,7 +603,10 @@
case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
switch (offset) {
case PEDIT_IP4_SRC:
+ *natmode_flags |= CXGB4_ACTION_NATMODE_SIP;
+ break;
case PEDIT_IP4_DST:
+ *natmode_flags |= CXGB4_ACTION_NATMODE_DIP;
break;
default:
netdev_err(dev, "%s: Unsupported pedit field\n",
@@ -493,10 +620,13 @@
case PEDIT_IP6_SRC_63_32:
case PEDIT_IP6_SRC_95_64:
case PEDIT_IP6_SRC_127_96:
+ *natmode_flags |= CXGB4_ACTION_NATMODE_SIP;
+ break;
case PEDIT_IP6_DST_31_0:
case PEDIT_IP6_DST_63_32:
case PEDIT_IP6_DST_95_64:
case PEDIT_IP6_DST_127_96:
+ *natmode_flags |= CXGB4_ACTION_NATMODE_DIP;
break;
default:
netdev_err(dev, "%s: Unsupported pedit field\n",
@@ -512,6 +642,10 @@
__func__);
return false;
}
+ if (~mask & PEDIT_TCP_UDP_SPORT_MASK)
+ *natmode_flags |= CXGB4_ACTION_NATMODE_SPORT;
+ else
+ *natmode_flags |= CXGB4_ACTION_NATMODE_DPORT;
break;
default:
netdev_err(dev, "%s: Unsupported pedit field\n",
@@ -527,6 +661,10 @@
__func__);
return false;
}
+ if (~mask & PEDIT_TCP_UDP_SPORT_MASK)
+ *natmode_flags |= CXGB4_ACTION_NATMODE_SPORT;
+ else
+ *natmode_flags |= CXGB4_ACTION_NATMODE_DPORT;
break;
default:
netdev_err(dev, "%s: Unsupported pedit field\n",
@@ -546,10 +684,12 @@
struct netlink_ext_ack *extack,
u8 matchall_filter)
{
+ struct adapter *adap = netdev2adap(dev);
struct flow_action_entry *act;
bool act_redir = false;
bool act_pedit = false;
bool act_vlan = false;
+ u8 natmode_flags = 0;
int i;
if (!flow_action_basic_hw_stats_check(actions, extack))
@@ -563,7 +703,6 @@
break;
case FLOW_ACTION_MIRRED:
case FLOW_ACTION_REDIRECT: {
- struct adapter *adap = netdev2adap(dev);
struct net_device *n_dev, *target_dev;
bool found = false;
unsigned int i;
@@ -620,7 +759,8 @@
}
break;
case FLOW_ACTION_MANGLE: {
- bool pedit_valid = valid_pedit_action(dev, act);
+ bool pedit_valid = valid_pedit_action(dev, act,
+ &natmode_flags);
if (!pedit_valid)
return -EOPNOTSUPP;
@@ -642,6 +782,15 @@
return -EINVAL;
}
+ if (act_pedit) {
+ int ret;
+
+ ret = cxgb4_action_natmode_validate(adap, natmode_flags,
+ extack);
+ if (ret)
+ return ret;
+ }
+
return 0;
}
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.h
index 6296e1d..3a2fa00 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.h
@@ -108,6 +108,21 @@
#define PEDIT_TCP_SPORT_DPORT 0x0
#define PEDIT_UDP_SPORT_DPORT 0x0
+enum cxgb4_action_natmode_flags {
+ CXGB4_ACTION_NATMODE_NONE = 0,
+ CXGB4_ACTION_NATMODE_DIP = (1 << 0),
+ CXGB4_ACTION_NATMODE_SIP = (1 << 1),
+ CXGB4_ACTION_NATMODE_DPORT = (1 << 2),
+ CXGB4_ACTION_NATMODE_SPORT = (1 << 3),
+};
+
+/* TC PEDIT action to NATMODE translation entry */
+struct cxgb4_natmode_config {
+ enum chip_type chip;
+ u8 flags;
+ u8 natmode;
+};
+
void cxgb4_process_flow_actions(struct net_device *in,
struct flow_action *actions,
struct ch_filter_specification *fs);
diff --git a/drivers/net/ethernet/chelsio/inline_crypto/ch_ipsec/chcr_ipsec.c b/drivers/net/ethernet/chelsio/inline_crypto/ch_ipsec/chcr_ipsec.c
index 0e7d251..072299b 100644
--- a/drivers/net/ethernet/chelsio/inline_crypto/ch_ipsec/chcr_ipsec.c
+++ b/drivers/net/ethernet/chelsio/inline_crypto/ch_ipsec/chcr_ipsec.c
@@ -35,7 +35,7 @@
* Atul Gupta (atul.gupta@chelsio.com)
*/
-#define pr_fmt(fmt) "chcr:" fmt
+#define pr_fmt(fmt) "ch_ipsec: " fmt
#include <linux/kernel.h>
#include <linux/module.h>
@@ -72,20 +72,21 @@
static LIST_HEAD(uld_ctx_list);
static DEFINE_MUTEX(dev_mutex);
-static int chcr_xfrm_add_state(struct xfrm_state *x);
-static void chcr_xfrm_del_state(struct xfrm_state *x);
-static void chcr_xfrm_free_state(struct xfrm_state *x);
-static bool chcr_ipsec_offload_ok(struct sk_buff *skb, struct xfrm_state *x);
-static void chcr_advance_esn_state(struct xfrm_state *x);
+static bool ch_ipsec_offload_ok(struct sk_buff *skb, struct xfrm_state *x);
static int ch_ipsec_uld_state_change(void *handle, enum cxgb4_state new_state);
+static int ch_ipsec_xmit(struct sk_buff *skb, struct net_device *dev);
static void *ch_ipsec_uld_add(const struct cxgb4_lld_info *infop);
+static void ch_ipsec_advance_esn_state(struct xfrm_state *x);
+static void ch_ipsec_xfrm_free_state(struct xfrm_state *x);
+static void ch_ipsec_xfrm_del_state(struct xfrm_state *x);
+static int ch_ipsec_xfrm_add_state(struct xfrm_state *x);
-static const struct xfrmdev_ops chcr_xfrmdev_ops = {
- .xdo_dev_state_add = chcr_xfrm_add_state,
- .xdo_dev_state_delete = chcr_xfrm_del_state,
- .xdo_dev_state_free = chcr_xfrm_free_state,
- .xdo_dev_offload_ok = chcr_ipsec_offload_ok,
- .xdo_dev_state_advance_esn = chcr_advance_esn_state,
+static const struct xfrmdev_ops ch_ipsec_xfrmdev_ops = {
+ .xdo_dev_state_add = ch_ipsec_xfrm_add_state,
+ .xdo_dev_state_delete = ch_ipsec_xfrm_del_state,
+ .xdo_dev_state_free = ch_ipsec_xfrm_free_state,
+ .xdo_dev_offload_ok = ch_ipsec_offload_ok,
+ .xdo_dev_state_advance_esn = ch_ipsec_advance_esn_state,
};
static struct cxgb4_uld_info ch_ipsec_uld_info = {
@@ -95,8 +96,8 @@
.rxq_size = 1024,
.add = ch_ipsec_uld_add,
.state_change = ch_ipsec_uld_state_change,
- .tx_handler = chcr_ipsec_xmit,
- .xfrmdev_ops = &chcr_xfrmdev_ops,
+ .tx_handler = ch_ipsec_xmit,
+ .xfrmdev_ops = &ch_ipsec_xfrmdev_ops,
};
static void *ch_ipsec_uld_add(const struct cxgb4_lld_info *infop)
@@ -119,7 +120,7 @@
{
struct ipsec_uld_ctx *u_ctx = handle;
- pr_info("new_state %u\n", new_state);
+ pr_debug("new_state %u\n", new_state);
switch (new_state) {
case CXGB4_STATE_UP:
pr_info("%s: Up\n", pci_name(u_ctx->lldi.pdev));
@@ -140,8 +141,8 @@
return 0;
}
-static inline int chcr_ipsec_setauthsize(struct xfrm_state *x,
- struct ipsec_sa_entry *sa_entry)
+static int ch_ipsec_setauthsize(struct xfrm_state *x,
+ struct ipsec_sa_entry *sa_entry)
{
int hmac_ctrl;
int authsize = x->aead->alg_icv_len / 8;
@@ -164,8 +165,8 @@
return hmac_ctrl;
}
-static inline int chcr_ipsec_setkey(struct xfrm_state *x,
- struct ipsec_sa_entry *sa_entry)
+static int ch_ipsec_setkey(struct xfrm_state *x,
+ struct ipsec_sa_entry *sa_entry)
{
int keylen = (x->aead->alg_key_len + 7) / 8;
unsigned char *key = x->aead->alg_key;
@@ -223,65 +224,65 @@
}
/*
- * chcr_xfrm_add_state
+ * ch_ipsec_xfrm_add_state
* returns 0 on success, negative error if failed to send message to FPGA
* positive error if FPGA returned a bad response
*/
-static int chcr_xfrm_add_state(struct xfrm_state *x)
+static int ch_ipsec_xfrm_add_state(struct xfrm_state *x)
{
struct ipsec_sa_entry *sa_entry;
int res = 0;
if (x->props.aalgo != SADB_AALG_NONE) {
- pr_debug("CHCR: Cannot offload authenticated xfrm states\n");
+ pr_debug("Cannot offload authenticated xfrm states\n");
return -EINVAL;
}
if (x->props.calgo != SADB_X_CALG_NONE) {
- pr_debug("CHCR: Cannot offload compressed xfrm states\n");
+ pr_debug("Cannot offload compressed xfrm states\n");
return -EINVAL;
}
if (x->props.family != AF_INET &&
x->props.family != AF_INET6) {
- pr_debug("CHCR: Only IPv4/6 xfrm state offloaded\n");
+ pr_debug("Only IPv4/6 xfrm state offloaded\n");
return -EINVAL;
}
if (x->props.mode != XFRM_MODE_TRANSPORT &&
x->props.mode != XFRM_MODE_TUNNEL) {
- pr_debug("CHCR: Only transport and tunnel xfrm offload\n");
+ pr_debug("Only transport and tunnel xfrm offload\n");
return -EINVAL;
}
if (x->id.proto != IPPROTO_ESP) {
- pr_debug("CHCR: Only ESP xfrm state offloaded\n");
+ pr_debug("Only ESP xfrm state offloaded\n");
return -EINVAL;
}
if (x->encap) {
- pr_debug("CHCR: Encapsulated xfrm state not offloaded\n");
+ pr_debug("Encapsulated xfrm state not offloaded\n");
return -EINVAL;
}
if (!x->aead) {
- pr_debug("CHCR: Cannot offload xfrm states without aead\n");
+ pr_debug("Cannot offload xfrm states without aead\n");
return -EINVAL;
}
if (x->aead->alg_icv_len != 128 &&
x->aead->alg_icv_len != 96) {
- pr_debug("CHCR: Cannot offload xfrm states with AEAD ICV length other than 96b & 128b\n");
+ pr_debug("Cannot offload xfrm states with AEAD ICV length other than 96b & 128b\n");
return -EINVAL;
}
if ((x->aead->alg_key_len != 128 + 32) &&
(x->aead->alg_key_len != 256 + 32)) {
- pr_debug("CHCR: Cannot offload xfrm states with AEAD key length other than 128/256 bit\n");
+ pr_debug("cannot offload xfrm states with AEAD key length other than 128/256 bit\n");
return -EINVAL;
}
if (x->tfcpad) {
- pr_debug("CHCR: Cannot offload xfrm states with tfc padding\n");
+ pr_debug("Cannot offload xfrm states with tfc padding\n");
return -EINVAL;
}
if (!x->geniv) {
- pr_debug("CHCR: Cannot offload xfrm states without geniv\n");
+ pr_debug("Cannot offload xfrm states without geniv\n");
return -EINVAL;
}
if (strcmp(x->geniv, "seqiv")) {
- pr_debug("CHCR: Cannot offload xfrm states with geniv other than seqiv\n");
+ pr_debug("Cannot offload xfrm states with geniv other than seqiv\n");
return -EINVAL;
}
@@ -291,24 +292,24 @@
goto out;
}
- sa_entry->hmac_ctrl = chcr_ipsec_setauthsize(x, sa_entry);
+ sa_entry->hmac_ctrl = ch_ipsec_setauthsize(x, sa_entry);
if (x->props.flags & XFRM_STATE_ESN)
sa_entry->esn = 1;
- chcr_ipsec_setkey(x, sa_entry);
+ ch_ipsec_setkey(x, sa_entry);
x->xso.offload_handle = (unsigned long)sa_entry;
try_module_get(THIS_MODULE);
out:
return res;
}
-static void chcr_xfrm_del_state(struct xfrm_state *x)
+static void ch_ipsec_xfrm_del_state(struct xfrm_state *x)
{
/* do nothing */
if (!x->xso.offload_handle)
return;
}
-static void chcr_xfrm_free_state(struct xfrm_state *x)
+static void ch_ipsec_xfrm_free_state(struct xfrm_state *x)
{
struct ipsec_sa_entry *sa_entry;
@@ -320,7 +321,7 @@
module_put(THIS_MODULE);
}
-static bool chcr_ipsec_offload_ok(struct sk_buff *skb, struct xfrm_state *x)
+static bool ch_ipsec_offload_ok(struct sk_buff *skb, struct xfrm_state *x)
{
if (x->props.family == AF_INET) {
/* Offload with IP options is not supported yet */
@@ -334,15 +335,15 @@
return true;
}
-static void chcr_advance_esn_state(struct xfrm_state *x)
+static void ch_ipsec_advance_esn_state(struct xfrm_state *x)
{
/* do nothing */
if (!x->xso.offload_handle)
return;
}
-static inline int is_eth_imm(const struct sk_buff *skb,
- struct ipsec_sa_entry *sa_entry)
+static int is_eth_imm(const struct sk_buff *skb,
+ struct ipsec_sa_entry *sa_entry)
{
unsigned int kctx_len;
int hdrlen;
@@ -360,9 +361,9 @@
return 0;
}
-static inline unsigned int calc_tx_sec_flits(const struct sk_buff *skb,
- struct ipsec_sa_entry *sa_entry,
- bool *immediate)
+static unsigned int calc_tx_sec_flits(const struct sk_buff *skb,
+ struct ipsec_sa_entry *sa_entry,
+ bool *immediate)
{
unsigned int kctx_len;
unsigned int flits;
@@ -403,7 +404,7 @@
return flits;
}
-inline void *copy_esn_pktxt(struct sk_buff *skb,
+static void *copy_esn_pktxt(struct sk_buff *skb,
struct net_device *dev,
void *pos,
struct ipsec_sa_entry *sa_entry)
@@ -457,7 +458,7 @@
return pos;
}
-inline void *copy_cpltx_pktxt(struct sk_buff *skb,
+static void *copy_cpltx_pktxt(struct sk_buff *skb,
struct net_device *dev,
void *pos,
struct ipsec_sa_entry *sa_entry)
@@ -501,10 +502,10 @@
return pos;
}
-inline void *copy_key_cpltx_pktxt(struct sk_buff *skb,
- struct net_device *dev,
- void *pos,
- struct ipsec_sa_entry *sa_entry)
+static void *copy_key_cpltx_pktxt(struct sk_buff *skb,
+ struct net_device *dev,
+ void *pos,
+ struct ipsec_sa_entry *sa_entry)
{
struct _key_ctx *key_ctx;
int left, eoq, key_len;
@@ -549,11 +550,11 @@
return pos;
}
-inline void *chcr_crypto_wreq(struct sk_buff *skb,
- struct net_device *dev,
- void *pos,
- int credits,
- struct ipsec_sa_entry *sa_entry)
+static void *ch_ipsec_crypto_wreq(struct sk_buff *skb,
+ struct net_device *dev,
+ void *pos,
+ int credits,
+ struct ipsec_sa_entry *sa_entry)
{
struct port_info *pi = netdev_priv(dev);
struct adapter *adap = pi->adapter;
@@ -674,13 +675,13 @@
* Returns the number of Tx descriptors needed for the supplied number
* of flits.
*/
-static inline unsigned int flits_to_desc(unsigned int n)
+static unsigned int flits_to_desc(unsigned int n)
{
WARN_ON(n > SGE_MAX_WR_LEN / 8);
return DIV_ROUND_UP(n, 8);
}
-static inline unsigned int txq_avail(const struct sge_txq *q)
+static unsigned int txq_avail(const struct sge_txq *q)
{
return q->size - 1 - q->in_use;
}
@@ -691,7 +692,7 @@
q->q.stops++;
}
-static inline void txq_advance(struct sge_txq *q, unsigned int n)
+static void txq_advance(struct sge_txq *q, unsigned int n)
{
q->in_use += n;
q->pidx += n;
@@ -700,9 +701,9 @@
}
/*
- * chcr_ipsec_xmit called from ULD Tx handler
+ * ch_ipsec_xmit called from ULD Tx handler
*/
-int chcr_ipsec_xmit(struct sk_buff *skb, struct net_device *dev)
+int ch_ipsec_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct xfrm_state *x = xfrm_input_state(skb);
unsigned int last_desc, ndesc, flits = 0;
@@ -763,8 +764,8 @@
before = (u64 *)pos;
end = (u64 *)pos + flits;
/* Setup IPSec CPL */
- pos = (void *)chcr_crypto_wreq(skb, dev, (void *)pos,
- credits, sa_entry);
+ pos = (void *)ch_ipsec_crypto_wreq(skb, dev, (void *)pos,
+ credits, sa_entry);
if (before > (u64 *)pos) {
left = (u8 *)end - (u8 *)q->q.stat;
end = (void *)q->q.desc + left;
@@ -791,14 +792,14 @@
return NETDEV_TX_OK;
}
-static int __init chcr_ipsec_init(void)
+static int __init ch_ipsec_init(void)
{
cxgb4_register_uld(CXGB4_ULD_IPSEC, &ch_ipsec_uld_info);
return 0;
}
-static void __exit chcr_ipsec_exit(void)
+static void __exit ch_ipsec_exit(void)
{
struct ipsec_uld_ctx *u_ctx, *tmp;
struct adapter *adap;
@@ -814,8 +815,8 @@
cxgb4_unregister_uld(CXGB4_ULD_IPSEC);
}
-module_init(chcr_ipsec_init);
-module_exit(chcr_ipsec_exit);
+module_init(ch_ipsec_init);
+module_exit(ch_ipsec_exit);
MODULE_DESCRIPTION("Crypto IPSEC for Chelsio Terminator cards.");
MODULE_LICENSE("GPL");
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index c043afb..8f7eca1 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -1912,6 +1912,27 @@
return ret;
}
+static void fec_enet_phy_reset_after_clk_enable(struct net_device *ndev)
+{
+ struct fec_enet_private *fep = netdev_priv(ndev);
+ struct phy_device *phy_dev = ndev->phydev;
+
+ if (phy_dev) {
+ phy_reset_after_clk_enable(phy_dev);
+ } else if (fep->phy_node) {
+ /*
+ * If the PHY still is not bound to the MAC, but there is
+ * OF PHY node and a matching PHY device instance already,
+ * use the OF PHY node to obtain the PHY device instance,
+ * and then use that PHY device instance when triggering
+ * the PHY reset.
+ */
+ phy_dev = of_phy_find_device(fep->phy_node);
+ phy_reset_after_clk_enable(phy_dev);
+ put_device(&phy_dev->mdio.dev);
+ }
+}
+
static int fec_enet_clk_enable(struct net_device *ndev, bool enable)
{
struct fec_enet_private *fep = netdev_priv(ndev);
@@ -1938,7 +1959,7 @@
if (ret)
goto failed_clk_ref;
- phy_reset_after_clk_enable(ndev->phydev);
+ fec_enet_phy_reset_after_clk_enable(ndev);
} else {
clk_disable_unprepare(fep->clk_enet_out);
if (fep->clk_ptp) {
@@ -2983,16 +3004,16 @@
/* Init MAC prior to mii bus probe */
fec_restart(ndev);
- /* Probe and connect to PHY when open the interface */
- ret = fec_enet_mii_probe(ndev);
- if (ret)
- goto err_enet_mii_probe;
-
/* Call phy_reset_after_clk_enable() again if it failed during
* phy_reset_after_clk_enable() before because the PHY wasn't probed.
*/
if (reset_again)
- phy_reset_after_clk_enable(ndev->phydev);
+ fec_enet_phy_reset_after_clk_enable(ndev);
+
+ /* Probe and connect to PHY when open the interface */
+ ret = fec_enet_mii_probe(ndev);
+ if (ret)
+ goto err_enet_mii_probe;
if (fep->quirks & FEC_QUIRK_ERR006687)
imx6q_cpuidle_fec_irqs_used();
diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c
index c5c7326..7ef3369 100644
--- a/drivers/net/ethernet/ibm/ibmveth.c
+++ b/drivers/net/ethernet/ibm/ibmveth.c
@@ -1349,6 +1349,7 @@
int offset = ibmveth_rxq_frame_offset(adapter);
int csum_good = ibmveth_rxq_csum_good(adapter);
int lrg_pkt = ibmveth_rxq_large_packet(adapter);
+ __sum16 iph_check = 0;
skb = ibmveth_rxq_get_buffer(adapter);
@@ -1385,16 +1386,26 @@
skb_put(skb, length);
skb->protocol = eth_type_trans(skb, netdev);
+ /* PHYP without PLSO support places a -1 in the ip
+ * checksum for large send frames.
+ */
+ if (skb->protocol == cpu_to_be16(ETH_P_IP)) {
+ struct iphdr *iph = (struct iphdr *)skb->data;
+
+ iph_check = iph->check;
+ }
+
+ if ((length > netdev->mtu + ETH_HLEN) ||
+ lrg_pkt || iph_check == 0xffff) {
+ ibmveth_rx_mss_helper(skb, mss, lrg_pkt);
+ adapter->rx_large_packets++;
+ }
+
if (csum_good) {
skb->ip_summed = CHECKSUM_UNNECESSARY;
ibmveth_rx_csum_helper(skb, adapter);
}
- if (length > netdev->mtu + ETH_HLEN) {
- ibmveth_rx_mss_helper(skb, mss, lrg_pkt);
- adapter->rx_large_packets++;
- }
-
napi_gro_receive(napi, skb); /* send it up */
netdev->stats.rx_packets++;
diff --git a/drivers/net/ethernet/intel/e1000/e1000_hw.c b/drivers/net/ethernet/intel/e1000/e1000_hw.c
index fb5af23..4c0c943 100644
--- a/drivers/net/ethernet/intel/e1000/e1000_hw.c
+++ b/drivers/net/ethernet/intel/e1000/e1000_hw.c
@@ -4401,17 +4401,9 @@
static void e1000_clear_vfta(struct e1000_hw *hw)
{
u32 offset;
- u32 vfta_value = 0;
- u32 vfta_offset = 0;
- u32 vfta_bit_in_reg = 0;
for (offset = 0; offset < E1000_VLAN_FILTER_TBL_SIZE; offset++) {
- /* If the offset we want to clear is the same offset of the
- * manageability VLAN ID, then clear all bits except that of the
- * manageability unit
- */
- vfta_value = (offset == vfta_offset) ? vfta_bit_in_reg : 0;
- E1000_WRITE_REG_ARRAY(hw, VFTA, offset, vfta_value);
+ E1000_WRITE_REG_ARRAY(hw, VFTA, offset, 0);
E1000_WRITE_FLUSH();
}
}
diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq.c b/drivers/net/ethernet/intel/i40e/i40e_adminq.c
index c897a28..593912b 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_adminq.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_adminq.c
@@ -541,6 +541,12 @@
(aq->api_maj_ver == 1 &&
aq->api_min_ver >= I40E_MINOR_VER_GET_LINK_INFO_X722))
hw->flags |= I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE;
+
+ if (aq->api_maj_ver > 1 ||
+ (aq->api_maj_ver == 1 &&
+ aq->api_min_ver >= I40E_MINOR_VER_FW_REQUEST_FEC_X722))
+ hw->flags |= I40E_HW_FLAG_X722_FEC_REQUEST_CAPABLE;
+
fallthrough;
default:
break;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
index c0c8efe..1e960c3 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
@@ -24,6 +24,8 @@
#define I40E_MINOR_VER_GET_LINK_INFO_X722 0x0009
/* API version 1.6 for X722 devices adds ability to stop FW LLDP agent */
#define I40E_MINOR_VER_FW_LLDP_STOPPABLE_X722 0x0006
+/* API version 1.10 for X722 devices adds ability to request FEC encoding */
+#define I40E_MINOR_VER_FW_REQUEST_FEC_X722 0x000A
struct i40e_aq_desc {
__le16 flags;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index dc15771..26ba1f3 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -891,6 +891,7 @@
if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB)
ethtool_link_ksettings_add_link_mode(ks, advertising,
10000baseT_Full);
+ i40e_get_settings_link_up_fec(hw_link_info->req_fec_info, ks);
break;
case I40E_PHY_TYPE_SGMII:
ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
@@ -1481,12 +1482,16 @@
struct i40e_pf *pf = np->vsi->back;
struct i40e_hw *hw = &pf->hw;
u8 fec_cfg = 0;
- int err = 0;
if (hw->device_id != I40E_DEV_ID_25G_SFP28 &&
- hw->device_id != I40E_DEV_ID_25G_B) {
- err = -EPERM;
- goto done;
+ hw->device_id != I40E_DEV_ID_25G_B &&
+ hw->device_id != I40E_DEV_ID_KX_X722)
+ return -EPERM;
+
+ if (hw->mac.type == I40E_MAC_X722 &&
+ !(hw->flags & I40E_HW_FLAG_X722_FEC_REQUEST_CAPABLE)) {
+ netdev_err(netdev, "Setting FEC encoding not supported by firmware. Please update the NVM image.\n");
+ return -EOPNOTSUPP;
}
switch (fecparam->fec) {
@@ -1508,14 +1513,10 @@
default:
dev_warn(&pf->pdev->dev, "Unsupported FEC mode: %d",
fecparam->fec);
- err = -EINVAL;
- goto done;
+ return -EINVAL;
}
- err = i40e_set_fec_cfg(netdev, fec_cfg);
-
-done:
- return err;
+ return i40e_set_fec_cfg(netdev, fec_cfg);
}
static int i40e_nway_reset(struct net_device *netdev)
@@ -4951,8 +4952,7 @@
}
}
- if (((changed_flags & I40E_FLAG_RS_FEC) ||
- (changed_flags & I40E_FLAG_BASE_R_FEC)) &&
+ if (changed_flags & I40E_FLAG_RS_FEC &&
pf->hw.device_id != I40E_DEV_ID_25G_SFP28 &&
pf->hw.device_id != I40E_DEV_ID_25G_B) {
dev_warn(&pf->pdev->dev,
@@ -4960,6 +4960,15 @@
return -EOPNOTSUPP;
}
+ if (changed_flags & I40E_FLAG_BASE_R_FEC &&
+ pf->hw.device_id != I40E_DEV_ID_25G_SFP28 &&
+ pf->hw.device_id != I40E_DEV_ID_25G_B &&
+ pf->hw.device_id != I40E_DEV_ID_KX_X722) {
+ dev_warn(&pf->pdev->dev,
+ "Device does not support changing FEC configuration\n");
+ return -EOPNOTSUPP;
+ }
+
/* Process any additional changes needed as a result of flag changes.
* The changed_flags value reflects the list of bits that were
* changed in the code above.
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 929c647..4f8a2154 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -6625,6 +6625,25 @@
netdev_info(vsi->netdev,
"NIC Link is Up, %sbps Full Duplex, Requested FEC: %s, Negotiated FEC: %s, Autoneg: %s, Flow Control: %s\n",
speed, req_fec, fec, an, fc);
+ } else if (pf->hw.device_id == I40E_DEV_ID_KX_X722) {
+ req_fec = "None";
+ fec = "None";
+ an = "False";
+
+ if (pf->hw.phy.link_info.an_info & I40E_AQ_AN_COMPLETED)
+ an = "True";
+
+ if (pf->hw.phy.link_info.fec_info &
+ I40E_AQ_CONFIG_FEC_KR_ENA)
+ fec = "CL74 FC-FEC/BASE-R";
+
+ if (pf->hw.phy.link_info.req_fec_info &
+ I40E_AQ_REQUEST_FEC_KR)
+ req_fec = "CL74 FC-FEC/BASE-R";
+
+ netdev_info(vsi->netdev,
+ "NIC Link is Up, %sbps Full Duplex, Requested FEC: %s, Negotiated FEC: %s, Autoneg: %s, Flow Control: %s\n",
+ speed, req_fec, fec, an, fc);
} else {
netdev_info(vsi->netdev,
"NIC Link is Up, %sbps Full Duplex, Flow Control: %s\n",
diff --git a/drivers/net/ethernet/intel/i40e/i40e_type.h b/drivers/net/ethernet/intel/i40e/i40e_type.h
index 97d29df..c0bdc66 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_type.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_type.h
@@ -595,6 +595,7 @@
#define I40E_HW_FLAG_FW_LLDP_PERSISTENT BIT_ULL(5)
#define I40E_HW_FLAG_AQ_PHY_ACCESS_EXTENDED BIT_ULL(6)
#define I40E_HW_FLAG_DROP_MODE BIT_ULL(7)
+#define I40E_HW_FLAG_X722_FEC_REQUEST_CAPABLE BIT_ULL(8)
u64 flags;
/* Used in set switch config AQ command */
diff --git a/drivers/net/ethernet/korina.c b/drivers/net/ethernet/korina.c
index 03e0349..af441d6 100644
--- a/drivers/net/ethernet/korina.c
+++ b/drivers/net/ethernet/korina.c
@@ -1113,7 +1113,7 @@
return rc;
probe_err_register:
- kfree(lp->td_ring);
+ kfree(KSEG0ADDR(lp->td_ring));
probe_err_td_ring:
iounmap(lp->tx_dma_regs);
probe_err_dma_tx:
@@ -1133,6 +1133,7 @@
iounmap(lp->eth_regs);
iounmap(lp->rx_dma_regs);
iounmap(lp->tx_dma_regs);
+ kfree(KSEG0ADDR(lp->td_ring));
unregister_netdev(bif->dev);
free_netdev(bif->dev);
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index 99d7737..502d1b9 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -943,6 +943,9 @@
bool clean_complete = true;
int done;
+ if (!budget)
+ return 0;
+
if (priv->tx_ring_num[TX_XDP]) {
xdp_tx_cq = priv->tx_cq[TX_XDP][cq->ring];
if (xdp_tx_cq->xdp_busy) {
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index 45869b2..3ddb726 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -350,7 +350,7 @@
.dma = tx_info->map0_dma,
};
- if (!mlx4_en_rx_recycle(ring->recycle_ring, &frame)) {
+ if (!napi_mode || !mlx4_en_rx_recycle(ring->recycle_ring, &frame)) {
dma_unmap_page(priv->ddev, tx_info->map0_dma,
PAGE_SIZE, priv->dma_dir);
put_page(tx_info->page);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
index 6a97452..dc74470 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
@@ -236,6 +236,7 @@
struct mlx5e_flow_steering {
struct mlx5_flow_namespace *ns;
+ struct mlx5_flow_namespace *egress_ns;
#ifdef CONFIG_MLX5_EN_RXNFC
struct mlx5e_ethtool_steering ethtool;
#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h
index 2ea1cdc..899b98a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h
@@ -107,6 +107,9 @@
#ifdef CONFIG_MLX5_EN_TLS
struct mlx5e_accel_tx_tls_state tls;
#endif
+#ifdef CONFIG_MLX5_EN_IPSEC
+ struct mlx5e_accel_tx_ipsec_state ipsec;
+#endif
};
static inline bool mlx5e_accel_tx_begin(struct net_device *dev,
@@ -125,22 +128,46 @@
}
#endif
+#ifdef CONFIG_MLX5_EN_IPSEC
+ if (test_bit(MLX5E_SQ_STATE_IPSEC, &sq->state) && xfrm_offload(skb)) {
+ if (unlikely(!mlx5e_ipsec_handle_tx_skb(dev, skb, &state->ipsec)))
+ return false;
+ }
+#endif
+
return true;
}
+static inline bool mlx5e_accel_tx_is_ipsec_flow(struct mlx5e_accel_tx_state *state)
+{
+#ifdef CONFIG_MLX5_EN_IPSEC
+ return mlx5e_ipsec_is_tx_flow(&state->ipsec);
+#endif
+
+ return false;
+}
+
+static inline unsigned int mlx5e_accel_tx_ids_len(struct mlx5e_txqsq *sq,
+ struct mlx5e_accel_tx_state *state)
+{
+#ifdef CONFIG_MLX5_EN_IPSEC
+ if (test_bit(MLX5E_SQ_STATE_IPSEC, &sq->state))
+ return mlx5e_ipsec_tx_ids_len(&state->ipsec);
+#endif
+
+ return 0;
+}
+
/* Part of the eseg touched by TX offloads */
#define MLX5E_ACCEL_ESEG_LEN offsetof(struct mlx5_wqe_eth_seg, mss)
static inline bool mlx5e_accel_tx_eseg(struct mlx5e_priv *priv,
- struct mlx5e_txqsq *sq,
struct sk_buff *skb,
struct mlx5_wqe_eth_seg *eseg)
{
#ifdef CONFIG_MLX5_EN_IPSEC
- if (test_bit(MLX5E_SQ_STATE_IPSEC, &sq->state)) {
- if (unlikely(!mlx5e_ipsec_handle_tx_skb(priv, eseg, skb)))
- return false;
- }
+ if (xfrm_offload(skb))
+ mlx5e_ipsec_tx_build_eseg(priv, skb, eseg);
#endif
#if IS_ENABLED(CONFIG_GENEVE)
@@ -153,11 +180,18 @@
static inline void mlx5e_accel_tx_finish(struct mlx5e_txqsq *sq,
struct mlx5e_tx_wqe *wqe,
- struct mlx5e_accel_tx_state *state)
+ struct mlx5e_accel_tx_state *state,
+ struct mlx5_wqe_inline_seg *inlseg)
{
#ifdef CONFIG_MLX5_EN_TLS
mlx5e_tls_handle_tx_wqe(sq, &wqe->ctrl, &state->tls);
#endif
+
+#ifdef CONFIG_MLX5_EN_IPSEC
+ if (test_bit(MLX5E_SQ_STATE_IPSEC, &sq->state) &&
+ state->ipsec.xo && state->ipsec.tailen)
+ mlx5e_ipsec_handle_tx_wqe(wqe, &state->ipsec, inlseg);
+#endif
}
static inline int mlx5e_accel_init_rx(struct mlx5e_priv *priv)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
index d39989c..3d45341 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
@@ -560,6 +560,9 @@
return;
}
+ if (mlx5_is_ipsec_device(mdev))
+ netdev->gso_partial_features |= NETIF_F_GSO_ESP;
+
mlx5_core_dbg(mdev, "mlx5e: ESP GSO capability turned on\n");
netdev->features |= NETIF_F_GSO_ESP;
netdev->hw_features |= NETIF_F_GSO_ESP;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
index 0fc8b4d..6164c7f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
@@ -76,6 +76,7 @@
};
struct mlx5e_accel_fs_esp;
+struct mlx5e_ipsec_tx;
struct mlx5e_ipsec {
struct mlx5e_priv *en_priv;
@@ -87,6 +88,7 @@
struct mlx5e_ipsec_stats stats;
struct workqueue_struct *wq;
struct mlx5e_accel_fs_esp *rx_fs;
+ struct mlx5e_ipsec_tx *tx_fs;
};
struct mlx5e_ipsec_esn_state {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
index b974f3c..0e45590 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
@@ -34,6 +34,12 @@
struct mlx5e_accel_fs_esp_prot fs_prot[ACCEL_FS_ESP_NUM_TYPES];
};
+struct mlx5e_ipsec_tx {
+ struct mlx5_flow_table *ft;
+ struct mutex mutex; /* Protect IPsec TX steering */
+ u32 refcnt;
+};
+
/* IPsec RX flow steering */
static enum mlx5e_traffic_types fs_esp2tt(enum accel_fs_esp_type i)
{
@@ -323,6 +329,77 @@
mutex_unlock(&fs_prot->prot_mutex);
}
+/* IPsec TX flow steering */
+static int tx_create(struct mlx5e_priv *priv)
+{
+ struct mlx5_flow_table_attr ft_attr = {};
+ struct mlx5e_ipsec *ipsec = priv->ipsec;
+ struct mlx5_flow_table *ft;
+ int err;
+
+ priv->fs.egress_ns =
+ mlx5_get_flow_namespace(priv->mdev,
+ MLX5_FLOW_NAMESPACE_EGRESS_KERNEL);
+ if (!priv->fs.egress_ns)
+ return -EOPNOTSUPP;
+
+ ft_attr.max_fte = NUM_IPSEC_FTE;
+ ft_attr.autogroup.max_num_groups = 1;
+ ft = mlx5_create_auto_grouped_flow_table(priv->fs.egress_ns, &ft_attr);
+ if (IS_ERR(ft)) {
+ err = PTR_ERR(ft);
+ netdev_err(priv->netdev, "fail to create ipsec tx ft err=%d\n", err);
+ return err;
+ }
+ ipsec->tx_fs->ft = ft;
+ return 0;
+}
+
+static void tx_destroy(struct mlx5e_priv *priv)
+{
+ struct mlx5e_ipsec *ipsec = priv->ipsec;
+
+ if (IS_ERR_OR_NULL(ipsec->tx_fs->ft))
+ return;
+
+ mlx5_destroy_flow_table(ipsec->tx_fs->ft);
+ ipsec->tx_fs->ft = NULL;
+}
+
+static int tx_ft_get(struct mlx5e_priv *priv)
+{
+ struct mlx5e_ipsec_tx *tx_fs = priv->ipsec->tx_fs;
+ int err = 0;
+
+ mutex_lock(&tx_fs->mutex);
+ if (tx_fs->refcnt++)
+ goto out;
+
+ err = tx_create(priv);
+ if (err) {
+ tx_fs->refcnt--;
+ goto out;
+ }
+
+out:
+ mutex_unlock(&tx_fs->mutex);
+ return err;
+}
+
+static void tx_ft_put(struct mlx5e_priv *priv)
+{
+ struct mlx5e_ipsec_tx *tx_fs = priv->ipsec->tx_fs;
+
+ mutex_lock(&tx_fs->mutex);
+ if (--tx_fs->refcnt)
+ goto out;
+
+ tx_destroy(priv);
+
+out:
+ mutex_unlock(&tx_fs->mutex);
+}
+
static void setup_fte_common(struct mlx5_accel_esp_xfrm_attrs *attrs,
u32 ipsec_obj_id,
struct mlx5_flow_spec *spec,
@@ -457,6 +534,54 @@
return err;
}
+static int tx_add_rule(struct mlx5e_priv *priv,
+ struct mlx5_accel_esp_xfrm_attrs *attrs,
+ u32 ipsec_obj_id,
+ struct mlx5e_ipsec_rule *ipsec_rule)
+{
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5_flow_handle *rule;
+ struct mlx5_flow_spec *spec;
+ int err = 0;
+
+ err = tx_ft_get(priv);
+ if (err)
+ return err;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ setup_fte_common(attrs, ipsec_obj_id, spec, &flow_act);
+
+ /* Add IPsec indicator in metadata_reg_a */
+ spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2;
+ MLX5_SET(fte_match_param, spec->match_criteria, misc_parameters_2.metadata_reg_a,
+ MLX5_ETH_WQE_FT_META_IPSEC);
+ MLX5_SET(fte_match_param, spec->match_value, misc_parameters_2.metadata_reg_a,
+ MLX5_ETH_WQE_FT_META_IPSEC);
+
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW |
+ MLX5_FLOW_CONTEXT_ACTION_IPSEC_ENCRYPT;
+ rule = mlx5_add_flow_rules(priv->ipsec->tx_fs->ft, spec, &flow_act, NULL, 0);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ netdev_err(priv->netdev, "fail to add ipsec rule attrs->action=0x%x, err=%d\n",
+ attrs->action, err);
+ goto out;
+ }
+
+ ipsec_rule->rule = rule;
+
+out:
+ kvfree(spec);
+ if (err)
+ tx_ft_put(priv);
+ return err;
+}
+
static void rx_del_rule(struct mlx5e_priv *priv,
struct mlx5_accel_esp_xfrm_attrs *attrs,
struct mlx5e_ipsec_rule *ipsec_rule)
@@ -470,15 +595,27 @@
rx_ft_put(priv, attrs->is_ipv6 ? ACCEL_FS_ESP6 : ACCEL_FS_ESP4);
}
+static void tx_del_rule(struct mlx5e_priv *priv,
+ struct mlx5e_ipsec_rule *ipsec_rule)
+{
+ mlx5_del_flow_rules(ipsec_rule->rule);
+ ipsec_rule->rule = NULL;
+
+ tx_ft_put(priv);
+}
+
int mlx5e_accel_ipsec_fs_add_rule(struct mlx5e_priv *priv,
struct mlx5_accel_esp_xfrm_attrs *attrs,
u32 ipsec_obj_id,
struct mlx5e_ipsec_rule *ipsec_rule)
{
- if (!priv->ipsec->rx_fs || attrs->action != MLX5_ACCEL_ESP_ACTION_DECRYPT)
+ if (!priv->ipsec->rx_fs)
return -EOPNOTSUPP;
- return rx_add_rule(priv, attrs, ipsec_obj_id, ipsec_rule);
+ if (attrs->action == MLX5_ACCEL_ESP_ACTION_DECRYPT)
+ return rx_add_rule(priv, attrs, ipsec_obj_id, ipsec_rule);
+ else
+ return tx_add_rule(priv, attrs, ipsec_obj_id, ipsec_rule);
}
void mlx5e_accel_ipsec_fs_del_rule(struct mlx5e_priv *priv,
@@ -488,7 +625,18 @@
if (!priv->ipsec->rx_fs)
return;
- rx_del_rule(priv, attrs, ipsec_rule);
+ if (attrs->action == MLX5_ACCEL_ESP_ACTION_DECRYPT)
+ rx_del_rule(priv, attrs, ipsec_rule);
+ else
+ tx_del_rule(priv, ipsec_rule);
+}
+
+static void fs_cleanup_tx(struct mlx5e_priv *priv)
+{
+ mutex_destroy(&priv->ipsec->tx_fs->mutex);
+ WARN_ON(priv->ipsec->tx_fs->refcnt);
+ kfree(priv->ipsec->tx_fs);
+ priv->ipsec->tx_fs = NULL;
}
static void fs_cleanup_rx(struct mlx5e_priv *priv)
@@ -507,6 +655,17 @@
priv->ipsec->rx_fs = NULL;
}
+static int fs_init_tx(struct mlx5e_priv *priv)
+{
+ priv->ipsec->tx_fs =
+ kzalloc(sizeof(struct mlx5e_ipsec_tx), GFP_KERNEL);
+ if (!priv->ipsec->tx_fs)
+ return -ENOMEM;
+
+ mutex_init(&priv->ipsec->tx_fs->mutex);
+ return 0;
+}
+
static int fs_init_rx(struct mlx5e_priv *priv)
{
struct mlx5e_accel_fs_esp_prot *fs_prot;
@@ -532,13 +691,24 @@
if (!priv->ipsec->rx_fs)
return;
+ fs_cleanup_tx(priv);
fs_cleanup_rx(priv);
}
int mlx5e_accel_ipsec_fs_init(struct mlx5e_priv *priv)
{
+ int err;
+
if (!mlx5_is_ipsec_device(priv->mdev) || !priv->ipsec)
return -EOPNOTSUPP;
- return fs_init_rx(priv);
+ err = fs_init_tx(priv);
+ if (err)
+ return err;
+
+ err = fs_init_rx(priv);
+ if (err)
+ fs_cleanup_tx(priv);
+
+ return err;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
index 93a8d68..11e31a3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
@@ -34,7 +34,7 @@
#include <crypto/aead.h>
#include <net/xfrm.h>
#include <net/esp.h>
-
+#include "accel/ipsec_offload.h"
#include "en_accel/ipsec_rxtx.h"
#include "en_accel/ipsec.h"
#include "accel/accel.h"
@@ -233,18 +233,94 @@
ntohs(mdata->content.tx.seq));
}
-bool mlx5e_ipsec_handle_tx_skb(struct mlx5e_priv *priv,
- struct mlx5_wqe_eth_seg *eseg,
- struct sk_buff *skb)
+void mlx5e_ipsec_handle_tx_wqe(struct mlx5e_tx_wqe *wqe,
+ struct mlx5e_accel_tx_ipsec_state *ipsec_st,
+ struct mlx5_wqe_inline_seg *inlseg)
+{
+ inlseg->byte_count = cpu_to_be32(ipsec_st->tailen | MLX5_INLINE_SEG);
+ esp_output_fill_trailer((u8 *)inlseg->data, 0, ipsec_st->plen, ipsec_st->xo->proto);
+}
+
+static int mlx5e_ipsec_set_state(struct mlx5e_priv *priv,
+ struct sk_buff *skb,
+ struct xfrm_state *x,
+ struct xfrm_offload *xo,
+ struct mlx5e_accel_tx_ipsec_state *ipsec_st)
+{
+ unsigned int blksize, clen, alen, plen;
+ struct crypto_aead *aead;
+ unsigned int tailen;
+
+ ipsec_st->x = x;
+ ipsec_st->xo = xo;
+ if (mlx5_is_ipsec_device(priv->mdev)) {
+ aead = x->data;
+ alen = crypto_aead_authsize(aead);
+ blksize = ALIGN(crypto_aead_blocksize(aead), 4);
+ clen = ALIGN(skb->len + 2, blksize);
+ plen = max_t(u32, clen - skb->len, 4);
+ tailen = plen + alen;
+ ipsec_st->plen = plen;
+ ipsec_st->tailen = tailen;
+ }
+
+ return 0;
+}
+
+void mlx5e_ipsec_tx_build_eseg(struct mlx5e_priv *priv, struct sk_buff *skb,
+ struct mlx5_wqe_eth_seg *eseg)
{
struct xfrm_offload *xo = xfrm_offload(skb);
- struct mlx5e_ipsec_metadata *mdata;
- struct mlx5e_ipsec_sa_entry *sa_entry;
+ struct xfrm_encap_tmpl *encap;
struct xfrm_state *x;
struct sec_path *sp;
+ u8 l3_proto;
- if (!xo)
- return true;
+ sp = skb_sec_path(skb);
+ if (unlikely(sp->len != 1))
+ return;
+
+ x = xfrm_input_state(skb);
+ if (unlikely(!x))
+ return;
+
+ if (unlikely(!x->xso.offload_handle ||
+ (skb->protocol != htons(ETH_P_IP) &&
+ skb->protocol != htons(ETH_P_IPV6))))
+ return;
+
+ mlx5e_ipsec_set_swp(skb, eseg, x->props.mode, xo);
+
+ l3_proto = (x->props.family == AF_INET) ?
+ ((struct iphdr *)skb_network_header(skb))->protocol :
+ ((struct ipv6hdr *)skb_network_header(skb))->nexthdr;
+
+ if (mlx5_is_ipsec_device(priv->mdev)) {
+ eseg->flow_table_metadata |= cpu_to_be32(MLX5_ETH_WQE_FT_META_IPSEC);
+ eseg->trailer |= cpu_to_be32(MLX5_ETH_WQE_INSERT_TRAILER);
+ encap = x->encap;
+ if (!encap) {
+ eseg->trailer |= (l3_proto == IPPROTO_ESP) ?
+ cpu_to_be32(MLX5_ETH_WQE_TRAILER_HDR_OUTER_IP_ASSOC) :
+ cpu_to_be32(MLX5_ETH_WQE_TRAILER_HDR_OUTER_L4_ASSOC);
+ } else if (encap->encap_type == UDP_ENCAP_ESPINUDP) {
+ eseg->trailer |= (l3_proto == IPPROTO_ESP) ?
+ cpu_to_be32(MLX5_ETH_WQE_TRAILER_HDR_INNER_IP_ASSOC) :
+ cpu_to_be32(MLX5_ETH_WQE_TRAILER_HDR_INNER_L4_ASSOC);
+ }
+ }
+}
+
+bool mlx5e_ipsec_handle_tx_skb(struct net_device *netdev,
+ struct sk_buff *skb,
+ struct mlx5e_accel_tx_ipsec_state *ipsec_st)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct xfrm_offload *xo = xfrm_offload(skb);
+ struct mlx5e_ipsec_sa_entry *sa_entry;
+ struct mlx5e_ipsec_metadata *mdata;
+ struct xfrm_state *x;
+ struct sec_path *sp;
sp = skb_sec_path(skb);
if (unlikely(sp->len != 1)) {
@@ -270,15 +346,21 @@
atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_trailer);
goto drop;
}
- mdata = mlx5e_ipsec_add_metadata(skb);
- if (IS_ERR(mdata)) {
- atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_metadata);
- goto drop;
+
+ if (MLX5_CAP_GEN(priv->mdev, fpga)) {
+ mdata = mlx5e_ipsec_add_metadata(skb);
+ if (IS_ERR(mdata)) {
+ atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_metadata);
+ goto drop;
+ }
}
- mlx5e_ipsec_set_swp(skb, eseg, x->props.mode, xo);
+
sa_entry = (struct mlx5e_ipsec_sa_entry *)x->xso.offload_handle;
sa_entry->set_iv_op(skb, x, xo);
- mlx5e_ipsec_set_metadata(skb, mdata, xo);
+ if (MLX5_CAP_GEN(priv->mdev, fpga))
+ mlx5e_ipsec_set_metadata(skb, mdata, xo);
+
+ mlx5e_ipsec_set_state(priv, skb, x, xo, ipsec_st);
return true;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h
index f96e786..056dacb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h
@@ -43,6 +43,13 @@
#define MLX5_IPSEC_METADATA_SYNDROM_MASK (0x7F)
#define MLX5_IPSEC_METADATA_HANDLE(metadata) (((metadata) >> 8) & 0xFF)
+struct mlx5e_accel_tx_ipsec_state {
+ struct xfrm_offload *xo;
+ struct xfrm_state *x;
+ u32 tailen;
+ u32 plen;
+};
+
#ifdef CONFIG_MLX5_EN_IPSEC
struct sk_buff *mlx5e_ipsec_handle_rx_skb(struct net_device *netdev,
@@ -55,16 +62,32 @@
struct xfrm_offload *xo);
void mlx5e_ipsec_set_iv(struct sk_buff *skb, struct xfrm_state *x,
struct xfrm_offload *xo);
-bool mlx5e_ipsec_handle_tx_skb(struct mlx5e_priv *priv,
- struct mlx5_wqe_eth_seg *eseg,
- struct sk_buff *skb);
+bool mlx5e_ipsec_handle_tx_skb(struct net_device *netdev,
+ struct sk_buff *skb,
+ struct mlx5e_accel_tx_ipsec_state *ipsec_st);
+void mlx5e_ipsec_handle_tx_wqe(struct mlx5e_tx_wqe *wqe,
+ struct mlx5e_accel_tx_ipsec_state *ipsec_st,
+ struct mlx5_wqe_inline_seg *inlseg);
void mlx5e_ipsec_offload_handle_rx_skb(struct net_device *netdev,
struct sk_buff *skb,
struct mlx5_cqe64 *cqe);
+static inline unsigned int mlx5e_ipsec_tx_ids_len(struct mlx5e_accel_tx_ipsec_state *ipsec_st)
+{
+ return ipsec_st->tailen;
+}
+
static inline bool mlx5_ipsec_is_rx_flow(struct mlx5_cqe64 *cqe)
{
return !!(MLX5_IPSEC_METADATA_MARKER_MASK & be32_to_cpu(cqe->ft_metadata));
}
+
+static inline bool mlx5e_ipsec_is_tx_flow(struct mlx5e_accel_tx_ipsec_state *ipsec_st)
+{
+ return ipsec_st->x;
+}
+
+void mlx5e_ipsec_tx_build_eseg(struct mlx5e_priv *priv, struct sk_buff *skb,
+ struct mlx5_wqe_eth_seg *eseg);
#else
static inline
void mlx5e_ipsec_offload_handle_rx_skb(struct net_device *netdev,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index a0c35698..e3a968e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -4569,7 +4569,6 @@
err_free:
kfree(flow);
kvfree(parse_attr);
- kfree(attr);
return err;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index 13bd4f2..82b4419 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -144,9 +144,29 @@
memcpy(&vhdr->h_vlan_encapsulated_proto, skb->data + cpy1_sz, cpy2_sz);
}
+/* RM 2311217: no L4 inner checksum for IPsec tunnel type packet */
+static void
+ipsec_txwqe_build_eseg_csum(struct mlx5e_txqsq *sq, struct sk_buff *skb,
+ struct mlx5_wqe_eth_seg *eseg)
+{
+ eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM;
+ if (skb->encapsulation) {
+ eseg->cs_flags |= MLX5_ETH_WQE_L3_INNER_CSUM;
+ sq->stats->csum_partial_inner++;
+ } else {
+ eseg->cs_flags |= MLX5_ETH_WQE_L4_CSUM;
+ sq->stats->csum_partial++;
+ }
+}
+
static inline void
mlx5e_txwqe_build_eseg_csum(struct mlx5e_txqsq *sq, struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg)
{
+ if (unlikely(eseg->flow_table_metadata & cpu_to_be32(MLX5_ETH_WQE_FT_META_IPSEC))) {
+ ipsec_txwqe_build_eseg_csum(sq, skb, eseg);
+ return;
+ }
+
if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) {
eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM;
if (skb->encapsulation) {
@@ -237,12 +257,14 @@
u16 headlen;
u16 ihs;
__be16 mss;
+ u16 insz;
u8 opcode;
};
struct mlx5e_tx_wqe_attr {
u16 ds_cnt;
u16 ds_cnt_inl;
+ u16 ds_cnt_ids;
u8 num_wqebbs;
};
@@ -299,6 +321,7 @@
stats->packets++;
}
+ attr->insz = mlx5e_accel_tx_ids_len(sq, accel);
stats->bytes += attr->num_bytes;
}
@@ -307,9 +330,13 @@
{
u16 ds_cnt = MLX5E_TX_WQE_EMPTY_DS_COUNT;
u16 ds_cnt_inl = 0;
+ u16 ds_cnt_ids = 0;
- ds_cnt += !!attr->headlen + skb_shinfo(skb)->nr_frags;
+ if (attr->insz)
+ ds_cnt_ids = DIV_ROUND_UP(sizeof(struct mlx5_wqe_inline_seg) + attr->insz,
+ MLX5_SEND_WQE_DS);
+ ds_cnt += !!attr->headlen + skb_shinfo(skb)->nr_frags + ds_cnt_ids;
if (attr->ihs) {
u16 inl = attr->ihs - INL_HDR_START_SZ;
@@ -323,6 +350,7 @@
*wqe_attr = (struct mlx5e_tx_wqe_attr) {
.ds_cnt = ds_cnt,
.ds_cnt_inl = ds_cnt_inl,
+ .ds_cnt_ids = ds_cnt_ids,
.num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS),
};
}
@@ -398,11 +426,11 @@
if (attr->ihs) {
if (skb_vlan_tag_present(skb)) {
- eseg->inline_hdr.sz = cpu_to_be16(attr->ihs + VLAN_HLEN);
+ eseg->inline_hdr.sz |= cpu_to_be16(attr->ihs + VLAN_HLEN);
mlx5e_insert_vlan(eseg->inline_hdr.start, skb, attr->ihs);
stats->added_vlan_packets++;
} else {
- eseg->inline_hdr.sz = cpu_to_be16(attr->ihs);
+ eseg->inline_hdr.sz |= cpu_to_be16(attr->ihs);
memcpy(eseg->inline_hdr.start, skb->data, attr->ihs);
}
dseg += wqe_attr->ds_cnt_inl;
@@ -414,6 +442,7 @@
stats->added_vlan_packets++;
}
+ dseg += wqe_attr->ds_cnt_ids;
num_dma = mlx5e_txwqe_build_dsegs(sq, skb, skb->data + attr->ihs,
attr->headlen, dseg);
if (unlikely(num_dma < 0))
@@ -430,7 +459,8 @@
static bool mlx5e_tx_skb_supports_mpwqe(struct sk_buff *skb, struct mlx5e_tx_attr *attr)
{
- return !skb_is_nonlinear(skb) && !skb_vlan_tag_present(skb) && !attr->ihs;
+ return !skb_is_nonlinear(skb) && !skb_vlan_tag_present(skb) && !attr->ihs &&
+ !attr->insz;
}
static bool mlx5e_tx_mpwqe_same_eseg(struct mlx5e_txqsq *sq, struct mlx5_wqe_eth_seg *eseg)
@@ -580,7 +610,7 @@
static bool mlx5e_txwqe_build_eseg(struct mlx5e_priv *priv, struct mlx5e_txqsq *sq,
struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg)
{
- if (unlikely(!mlx5e_accel_tx_eseg(priv, sq, skb, eseg)))
+ if (unlikely(!mlx5e_accel_tx_eseg(priv, skb, eseg)))
return false;
mlx5e_txwqe_build_eseg_csum(sq, skb, eseg);
@@ -625,7 +655,8 @@
wqe = MLX5E_TX_FETCH_WQE(sq, pi);
/* May update the WQE, but may not post other WQEs. */
- mlx5e_accel_tx_finish(sq, wqe, &accel);
+ mlx5e_accel_tx_finish(sq, wqe, &accel,
+ (struct mlx5_wqe_inline_seg *)(wqe->data + wqe_attr.ds_cnt_inl));
if (unlikely(!mlx5e_txwqe_build_eseg(priv, sq, skb, &wqe->eth)))
return NETDEV_TX_OK;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index fee1697..babe340 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -776,6 +776,9 @@
table_type = FS_FT_NIC_RX;
break;
case MLX5_FLOW_NAMESPACE_EGRESS:
+#ifdef CONFIG_MLX5_IPSEC
+ case MLX5_FLOW_NAMESPACE_EGRESS_KERNEL:
+#endif
max_actions = MLX5_CAP_FLOWTABLE_NIC_TX(dev, max_modify_header_actions);
table_type = FS_FT_NIC_TX;
break;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 6141e9e..1609183 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -126,6 +126,10 @@
#define LAG_NUM_PRIOS 1
#define LAG_MIN_LEVEL (OFFLOADS_MIN_LEVEL + 1)
+#define KERNEL_TX_IPSEC_NUM_PRIOS 1
+#define KERNEL_TX_IPSEC_NUM_LEVELS 1
+#define KERNEL_TX_MIN_LEVEL (KERNEL_TX_IPSEC_NUM_LEVELS)
+
struct node_caps {
size_t arr_sz;
long *caps;
@@ -180,13 +184,24 @@
static struct init_tree_node egress_root_fs = {
.type = FS_TYPE_NAMESPACE,
+#ifdef CONFIG_MLX5_IPSEC
+ .ar_size = 2,
+#else
.ar_size = 1,
+#endif
.children = (struct init_tree_node[]) {
ADD_PRIO(0, MLX5_BY_PASS_NUM_PRIOS, 0,
FS_CHAINING_CAPS_EGRESS,
ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
ADD_MULTIPLE_PRIO(MLX5_BY_PASS_NUM_PRIOS,
BY_PASS_PRIO_NUM_LEVELS))),
+#ifdef CONFIG_MLX5_IPSEC
+ ADD_PRIO(0, KERNEL_TX_MIN_LEVEL, 0,
+ FS_CHAINING_CAPS_EGRESS,
+ ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
+ ADD_MULTIPLE_PRIO(KERNEL_TX_IPSEC_NUM_PRIOS,
+ KERNEL_TX_IPSEC_NUM_LEVELS))),
+#endif
}
};
@@ -2165,8 +2180,10 @@
break;
}
- if (type == MLX5_FLOW_NAMESPACE_EGRESS) {
+ if (type == MLX5_FLOW_NAMESPACE_EGRESS ||
+ type == MLX5_FLOW_NAMESPACE_EGRESS_KERNEL) {
root_ns = steering->egress_root_ns;
+ prio = type - MLX5_FLOW_NAMESPACE_EGRESS;
} else if (type == MLX5_FLOW_NAMESPACE_RDMA_RX) {
root_ns = steering->rdma_rx_root_ns;
prio = RDMA_RX_BYPASS_PRIO;
diff --git a/drivers/net/ethernet/mscc/ocelot_net.c b/drivers/net/ethernet/mscc/ocelot_net.c
index d3c0394..b34da11 100644
--- a/drivers/net/ethernet/mscc/ocelot_net.c
+++ b/drivers/net/ethernet/mscc/ocelot_net.c
@@ -669,7 +669,8 @@
return priv->dev;
}
-static bool ocelot_port_dev_check(const struct net_device *dev)
+/* Checks if the net_device instance given to us originates from our driver */
+static bool ocelot_netdevice_dev_check(const struct net_device *dev)
{
return dev->netdev_ops == &ocelot_port_netdev_ops;
}
@@ -678,7 +679,7 @@
{
struct ocelot_port_private *priv;
- if (!dev || !ocelot_port_dev_check(dev))
+ if (!dev || !ocelot_netdevice_dev_check(dev))
return -EINVAL;
priv = netdev_priv(dev);
@@ -907,12 +908,6 @@
return ret;
}
-/* Checks if the net_device instance given to us originate from our driver. */
-static bool ocelot_netdevice_dev_check(const struct net_device *dev)
-{
- return dev->netdev_ops == &ocelot_port_netdev_ops;
-}
-
static int ocelot_netdevice_port_event(struct net_device *dev,
unsigned long event,
struct netdev_notifier_changeupper_info *info)
diff --git a/drivers/net/ethernet/ti/tlan.c b/drivers/net/ethernet/ti/tlan.c
index 317fad7..267c080 100644
--- a/drivers/net/ethernet/ti/tlan.c
+++ b/drivers/net/ethernet/ti/tlan.c
@@ -2504,7 +2504,7 @@
}
/* Wait for 50 ms and powerup
- * This is abitrary. It is intended to make sure the
+ * This is arbitrary. It is intended to make sure the
* transceiver settles.
*/
tlan_set_timer(dev, msecs_to_jiffies(50), TLAN_TIMER_PHY_PUP);
diff --git a/drivers/net/ipa/ipa_endpoint.c b/drivers/net/ipa/ipa_endpoint.c
index 24d688e..b40b711 100644
--- a/drivers/net/ipa/ipa_endpoint.c
+++ b/drivers/net/ipa/ipa_endpoint.c
@@ -1432,6 +1432,9 @@
void ipa_endpoint_suspend(struct ipa *ipa)
{
+ if (!ipa->setup_complete)
+ return;
+
if (ipa->modem_netdev)
ipa_modem_suspend(ipa->modem_netdev);
@@ -1443,6 +1446,9 @@
void ipa_endpoint_resume(struct ipa *ipa)
{
+ if (!ipa->setup_complete)
+ return;
+
ipa_endpoint_resume_one(ipa->name_map[IPA_ENDPOINT_AP_COMMAND_TX]);
ipa_endpoint_resume_one(ipa->name_map[IPA_ENDPOINT_AP_LAN_RX]);
diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c
index 2b0c8f0..11ca5fa 100644
--- a/drivers/net/macsec.c
+++ b/drivers/net/macsec.c
@@ -3647,30 +3647,10 @@
static void macsec_get_stats64(struct net_device *dev,
struct rtnl_link_stats64 *s)
{
- int cpu;
-
if (!dev->tstats)
return;
- for_each_possible_cpu(cpu) {
- struct pcpu_sw_netstats *stats;
- struct pcpu_sw_netstats tmp;
- int start;
-
- stats = per_cpu_ptr(dev->tstats, cpu);
- do {
- start = u64_stats_fetch_begin_irq(&stats->syncp);
- tmp.rx_packets = stats->rx_packets;
- tmp.rx_bytes = stats->rx_bytes;
- tmp.tx_packets = stats->tx_packets;
- tmp.tx_bytes = stats->tx_bytes;
- } while (u64_stats_fetch_retry_irq(&stats->syncp, start));
-
- s->rx_packets += tmp.rx_packets;
- s->rx_bytes += tmp.rx_bytes;
- s->tx_packets += tmp.tx_packets;
- s->tx_bytes += tmp.tx_bytes;
- }
+ dev_fetch_sw_netstats(s, dev->tstats);
s->rx_dropped = dev->stats.rx_dropped;
s->tx_dropped = dev->stats.tx_dropped;
diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index 5ca1356..a322f51 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -126,31 +126,9 @@
struct rtnl_link_stats64 *stats)
{
struct qmimux_priv *priv = netdev_priv(net);
- unsigned int start;
- int cpu;
netdev_stats_to_stats64(stats, &net->stats);
-
- for_each_possible_cpu(cpu) {
- struct pcpu_sw_netstats *stats64;
- u64 rx_packets, rx_bytes;
- u64 tx_packets, tx_bytes;
-
- stats64 = per_cpu_ptr(priv->stats64, cpu);
-
- do {
- start = u64_stats_fetch_begin_irq(&stats64->syncp);
- rx_packets = stats64->rx_packets;
- rx_bytes = stats64->rx_bytes;
- tx_packets = stats64->tx_packets;
- tx_bytes = stats64->tx_bytes;
- } while (u64_stats_fetch_retry_irq(&stats64->syncp, start));
-
- stats->rx_packets += rx_packets;
- stats->rx_bytes += rx_bytes;
- stats->tx_packets += tx_packets;
- stats->tx_bytes += tx_bytes;
- }
+ dev_fetch_sw_netstats(stats, priv->stats64);
}
static const struct net_device_ops qmimux_netdev_ops = {
diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
index 963d260..6062dc2 100644
--- a/drivers/net/usb/usbnet.c
+++ b/drivers/net/usb/usbnet.c
@@ -983,31 +983,9 @@
void usbnet_get_stats64(struct net_device *net, struct rtnl_link_stats64 *stats)
{
struct usbnet *dev = netdev_priv(net);
- unsigned int start;
- int cpu;
netdev_stats_to_stats64(stats, &net->stats);
-
- for_each_possible_cpu(cpu) {
- struct pcpu_sw_netstats *stats64;
- u64 rx_packets, rx_bytes;
- u64 tx_packets, tx_bytes;
-
- stats64 = per_cpu_ptr(dev->stats64, cpu);
-
- do {
- start = u64_stats_fetch_begin_irq(&stats64->syncp);
- rx_packets = stats64->rx_packets;
- rx_bytes = stats64->rx_bytes;
- tx_packets = stats64->tx_packets;
- tx_bytes = stats64->tx_bytes;
- } while (u64_stats_fetch_retry_irq(&stats64->syncp, start));
-
- stats->rx_packets += rx_packets;
- stats->rx_bytes += rx_bytes;
- stats->tx_packets += tx_packets;
- stats->tx_bytes += tx_bytes;
- }
+ dev_fetch_sw_netstats(stats, dev->stats64);
}
EXPORT_SYMBOL_GPL(usbnet_get_stats64);
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 21b7114..d2d2c4a 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -68,6 +68,8 @@
(1ULL << VIRTIO_NET_F_GUEST_ECN) | \
(1ULL << VIRTIO_NET_F_GUEST_UFO))
+#define GUEST_OFFLOAD_CSUM_MASK (1ULL << VIRTIO_NET_F_GUEST_CSUM)
+
struct virtnet_stat_desc {
char desc[ETH_GSTRING_LEN];
size_t offset;
@@ -2522,29 +2524,48 @@
return 0;
}
+static netdev_features_t virtnet_fix_features(struct net_device *netdev,
+ netdev_features_t features)
+{
+ /* If Rx checksum is disabled, LRO should also be disabled. */
+ if (!(features & NETIF_F_RXCSUM))
+ features &= ~NETIF_F_LRO;
+
+ return features;
+}
+
static int virtnet_set_features(struct net_device *dev,
netdev_features_t features)
{
struct virtnet_info *vi = netdev_priv(dev);
- u64 offloads;
+ u64 offloads = vi->guest_offloads;
int err;
+ /* Don't allow configuration while XDP is active. */
+ if (vi->xdp_queue_pairs)
+ return -EBUSY;
+
if ((dev->features ^ features) & NETIF_F_LRO) {
- if (vi->xdp_queue_pairs)
- return -EBUSY;
-
if (features & NETIF_F_LRO)
- offloads = vi->guest_offloads_capable;
+ offloads |= GUEST_OFFLOAD_LRO_MASK &
+ vi->guest_offloads_capable;
else
- offloads = vi->guest_offloads_capable &
- ~GUEST_OFFLOAD_LRO_MASK;
-
- err = virtnet_set_guest_offloads(vi, offloads);
- if (err)
- return err;
- vi->guest_offloads = offloads;
+ offloads &= ~GUEST_OFFLOAD_LRO_MASK;
}
+ if ((dev->features ^ features) & NETIF_F_RXCSUM) {
+ if (features & NETIF_F_RXCSUM)
+ offloads |= GUEST_OFFLOAD_CSUM_MASK &
+ vi->guest_offloads_capable;
+ else
+ offloads &= ~GUEST_OFFLOAD_CSUM_MASK;
+ }
+
+ err = virtnet_set_guest_offloads(vi, offloads);
+ if (err)
+ return err;
+
+ vi->guest_offloads = offloads;
return 0;
}
@@ -2563,6 +2584,7 @@
.ndo_features_check = passthru_features_check,
.ndo_get_phys_port_name = virtnet_get_phys_port_name,
.ndo_set_features = virtnet_set_features,
+ .ndo_fix_features = virtnet_fix_features,
};
static void virtnet_config_changed_work(struct work_struct *work)
@@ -3013,8 +3035,10 @@
if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) ||
virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6))
dev->features |= NETIF_F_LRO;
- if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS))
+ if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
+ dev->hw_features |= NETIF_F_RXCSUM;
dev->hw_features |= NETIF_F_LRO;
+ }
dev->vlan_features = dev->features;
diff --git a/drivers/net/wireless/quantenna/qtnfmac/core.c b/drivers/net/wireless/quantenna/qtnfmac/core.c
index 374074d..bf6dbeb 100644
--- a/drivers/net/wireless/quantenna/qtnfmac/core.c
+++ b/drivers/net/wireless/quantenna/qtnfmac/core.c
@@ -139,34 +139,13 @@
struct rtnl_link_stats64 *stats)
{
struct qtnf_vif *vif = qtnf_netdev_get_priv(ndev);
- unsigned int start;
- int cpu;
netdev_stats_to_stats64(stats, &ndev->stats);
if (!vif->stats64)
return;
- for_each_possible_cpu(cpu) {
- struct pcpu_sw_netstats *stats64;
- u64 rx_packets, rx_bytes;
- u64 tx_packets, tx_bytes;
-
- stats64 = per_cpu_ptr(vif->stats64, cpu);
-
- do {
- start = u64_stats_fetch_begin_irq(&stats64->syncp);
- rx_packets = stats64->rx_packets;
- rx_bytes = stats64->rx_bytes;
- tx_packets = stats64->tx_packets;
- tx_bytes = stats64->tx_bytes;
- } while (u64_stats_fetch_retry_irq(&stats64->syncp, start));
-
- stats->rx_packets += rx_packets;
- stats->rx_bytes += rx_bytes;
- stats->tx_packets += tx_packets;
- stats->tx_bytes += tx_bytes;
- }
+ dev_fetch_sw_netstats(stats, vif->stats64);
}
/* Netdev handler for transmission timeout.
diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index 92d991d..846d94a 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -76,6 +76,7 @@
MLX5_FLOW_NAMESPACE_SNIFFER_RX,
MLX5_FLOW_NAMESPACE_SNIFFER_TX,
MLX5_FLOW_NAMESPACE_EGRESS,
+ MLX5_FLOW_NAMESPACE_EGRESS_KERNEL,
MLX5_FLOW_NAMESPACE_RDMA_RX,
MLX5_FLOW_NAMESPACE_RDMA_RX_KERNEL,
MLX5_FLOW_NAMESPACE_RDMA_TX,
diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h
index 36492a1..d75ef8a 100644
--- a/include/linux/mlx5/qp.h
+++ b/include/linux/mlx5/qp.h
@@ -245,6 +245,10 @@
MLX5_ETH_WQE_SWP_OUTER_L4_UDP = 1 << 5,
};
+enum {
+ MLX5_ETH_WQE_FT_META_IPSEC = BIT(0),
+};
+
struct mlx5_wqe_eth_seg {
u8 swp_outer_l4_offset;
u8 swp_outer_l3_offset;
@@ -253,7 +257,7 @@
u8 cs_flags;
u8 swp_flags;
__be16 mss;
- __be32 rsvd2;
+ __be32 flow_table_metadata;
union {
struct {
__be16 sz;
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 948d710..964b494 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -4499,6 +4499,8 @@
struct rtnl_link_stats64 *storage);
void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,
const struct net_device_stats *netdev_stats);
+void dev_fetch_sw_netstats(struct rtnl_link_stats64 *s,
+ const struct pcpu_sw_netstats __percpu *netstats);
extern int netdev_max_backlog;
extern int netdev_tstamp_prequeue;
diff --git a/include/net/netfilter/nf_log.h b/include/net/netfilter/nf_log.h
index 0d39208..716db4a 100644
--- a/include/net/netfilter/nf_log.h
+++ b/include/net/netfilter/nf_log.h
@@ -108,6 +108,7 @@
unsigned int logflags);
void nf_log_dump_sk_uid_gid(struct net *net, struct nf_log_buf *m,
struct sock *sk);
+void nf_log_dump_vlan(struct nf_log_buf *m, const struct sk_buff *skb);
void nf_log_dump_packet_common(struct nf_log_buf *m, u_int8_t pf,
unsigned int hooknum, const struct sk_buff *skb,
const struct net_device *in,
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 3965ce1..3f7e56b 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -14,6 +14,8 @@
#include <net/netlink.h>
#include <net/flow_offload.h>
+#define NFT_MAX_HOOKS (NF_INET_INGRESS + 1)
+
struct module;
#define NFT_JUMP_STACK_SIZE 16
@@ -979,7 +981,7 @@
int family;
struct module *owner;
unsigned int hook_mask;
- nf_hookfn *hooks[NF_MAX_HOOKS];
+ nf_hookfn *hooks[NFT_MAX_HOOKS];
int (*ops_register)(struct net *net, const struct nf_hook_ops *ops);
void (*ops_unregister)(struct net *net, const struct nf_hook_ops *ops);
};
diff --git a/include/net/tc_act/tc_tunnel_key.h b/include/net/tc_act/tc_tunnel_key.h
index e1057b2..879fe8c 100644
--- a/include/net/tc_act/tc_tunnel_key.h
+++ b/include/net/tc_act/tc_tunnel_key.h
@@ -56,7 +56,10 @@
{
#ifdef CONFIG_NET_CLS_ACT
struct tcf_tunnel_key *t = to_tunnel_key(a);
- struct tcf_tunnel_key_params *params = rtnl_dereference(t->params);
+ struct tcf_tunnel_key_params *params;
+
+ params = rcu_dereference_protected(t->params,
+ lockdep_is_held(&a->tcfa_lock));
return ¶ms->tcft_enc_metadata->u.tun_info;
#else
diff --git a/include/net/tls.h b/include/net/tls.h
index e5dac7e..baf1e99 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -679,10 +679,6 @@
struct scatterlist *sgout);
struct sk_buff *tls_encrypt_skb(struct sk_buff *skb);
-struct sk_buff *tls_validate_xmit_skb(struct sock *sk,
- struct net_device *dev,
- struct sk_buff *skb);
-
int tls_sw_fallback_init(struct sock *sk,
struct tls_offload_context_tx *offload_ctx,
struct tls_crypto_info *crypto_info);
diff --git a/include/uapi/linux/netfilter.h b/include/uapi/linux/netfilter.h
index 6a6179a..ef9a442 100644
--- a/include/uapi/linux/netfilter.h
+++ b/include/uapi/linux/netfilter.h
@@ -45,8 +45,8 @@
NF_INET_FORWARD,
NF_INET_LOCAL_OUT,
NF_INET_POST_ROUTING,
- NF_INET_INGRESS,
- NF_INET_NUMHOOKS
+ NF_INET_NUMHOOKS,
+ NF_INET_INGRESS = NF_INET_NUMHOOKS,
};
enum nf_dev_hooks {
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 352ee51..98272cb 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -749,10 +749,12 @@
*
* @NFT_PAYLOAD_CSUM_NONE: no checksumming
* @NFT_PAYLOAD_CSUM_INET: internet checksum (RFC 791)
+ * @NFT_PAYLOAD_CSUM_SCTP: CRC-32c, for use in SCTP header (RFC 3309)
*/
enum nft_payload_csum_types {
NFT_PAYLOAD_CSUM_NONE,
NFT_PAYLOAD_CSUM_INET,
+ NFT_PAYLOAD_CSUM_SCTP,
};
enum nft_payload_csum_flags {
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 9a2fb4a..6f742fe 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -206,27 +206,8 @@
struct rtnl_link_stats64 *stats)
{
struct net_bridge *br = netdev_priv(dev);
- struct pcpu_sw_netstats tmp, sum = { 0 };
- unsigned int cpu;
- for_each_possible_cpu(cpu) {
- unsigned int start;
- const struct pcpu_sw_netstats *bstats
- = per_cpu_ptr(br->stats, cpu);
- do {
- start = u64_stats_fetch_begin_irq(&bstats->syncp);
- memcpy(&tmp, bstats, sizeof(tmp));
- } while (u64_stats_fetch_retry_irq(&bstats->syncp, start));
- sum.tx_bytes += tmp.tx_bytes;
- sum.tx_packets += tmp.tx_packets;
- sum.rx_bytes += tmp.rx_bytes;
- sum.rx_packets += tmp.rx_packets;
- }
-
- stats->tx_bytes = sum.tx_bytes;
- stats->tx_packets = sum.tx_packets;
- stats->rx_bytes = sum.rx_bytes;
- stats->rx_packets = sum.rx_packets;
+ dev_fetch_sw_netstats(stats, br->stats);
}
static int br_change_mtu(struct net_device *dev, int new_mtu)
diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c
index 0cec415..e09d087 100644
--- a/net/can/j1939/transport.c
+++ b/net/can/j1939/transport.c
@@ -580,6 +580,7 @@
skb->dev = priv->ndev;
can_skb_reserve(skb);
can_skb_prv(skb)->ifindex = priv->ndev->ifindex;
+ can_skb_prv(skb)->skbcnt = 0;
/* reserve CAN header */
skb_reserve(skb, offsetof(struct can_frame, data));
@@ -1487,6 +1488,7 @@
skb->dev = priv->ndev;
can_skb_reserve(skb);
can_skb_prv(skb)->ifindex = priv->ndev->ifindex;
+ can_skb_prv(skb)->skbcnt = 0;
skcb = j1939_skb_to_cb(skb);
memcpy(skcb, rel_skcb, sizeof(*skcb));
diff --git a/net/core/dev.c b/net/core/dev.c
index 22babf5b..751e526 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -10328,6 +10328,40 @@
}
EXPORT_SYMBOL(dev_get_stats);
+/**
+ * dev_fetch_sw_netstats - get per-cpu network device statistics
+ * @s: place to store stats
+ * @netstats: per-cpu network stats to read from
+ *
+ * Read per-cpu network statistics and populate the related fields in @s.
+ */
+void dev_fetch_sw_netstats(struct rtnl_link_stats64 *s,
+ const struct pcpu_sw_netstats __percpu *netstats)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ const struct pcpu_sw_netstats *stats;
+ struct pcpu_sw_netstats tmp;
+ unsigned int start;
+
+ stats = per_cpu_ptr(netstats, cpu);
+ do {
+ start = u64_stats_fetch_begin_irq(&stats->syncp);
+ tmp.rx_packets = stats->rx_packets;
+ tmp.rx_bytes = stats->rx_bytes;
+ tmp.tx_packets = stats->tx_packets;
+ tmp.tx_bytes = stats->tx_bytes;
+ } while (u64_stats_fetch_retry_irq(&stats->syncp, start));
+
+ s->rx_packets += tmp.rx_packets;
+ s->rx_bytes += tmp.rx_bytes;
+ s->tx_packets += tmp.tx_packets;
+ s->tx_bytes += tmp.tx_bytes;
+ }
+}
+EXPORT_SYMBOL_GPL(dev_fetch_sw_netstats);
+
struct netdev_queue *dev_ingress_queue_create(struct net_device *dev)
{
struct netdev_queue *queue = dev_ingress_queue(dev);
diff --git a/net/core/sock.c b/net/core/sock.c
index d9a537e..4e87293 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -757,7 +757,6 @@
} else {
sock_reset_flag(sk, SOCK_RCVTSTAMP);
sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
- sock_reset_flag(sk, SOCK_TSTAMP_NEW);
}
}
@@ -994,8 +993,6 @@
__sock_set_timestamps(sk, valbool, true, true);
break;
case SO_TIMESTAMPING_NEW:
- sock_set_flag(sk, SOCK_TSTAMP_NEW);
- fallthrough;
case SO_TIMESTAMPING_OLD:
if (val & ~SOF_TIMESTAMPING_MASK) {
ret = -EINVAL;
@@ -1024,16 +1021,14 @@
}
sk->sk_tsflags = val;
+ sock_valbool_flag(sk, SOCK_TSTAMP_NEW, optname == SO_TIMESTAMPING_NEW);
+
if (val & SOF_TIMESTAMPING_RX_SOFTWARE)
sock_enable_timestamp(sk,
SOCK_TIMESTAMPING_RX_SOFTWARE);
- else {
- if (optname == SO_TIMESTAMPING_NEW)
- sock_reset_flag(sk, SOCK_TSTAMP_NEW);
-
+ else
sock_disable_timestamp(sk,
(1UL << SOCK_TIMESTAMPING_RX_SOFTWARE));
- }
break;
case SO_RCVLOWAT:
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index e7c1d62..3bc5ca4 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -1221,28 +1221,9 @@
struct rtnl_link_stats64 *stats)
{
struct dsa_slave_priv *p = netdev_priv(dev);
- struct pcpu_sw_netstats *s;
- unsigned int start;
- int i;
netdev_stats_to_stats64(stats, &dev->stats);
- for_each_possible_cpu(i) {
- u64 tx_packets, tx_bytes, rx_packets, rx_bytes;
-
- s = per_cpu_ptr(p->stats64, i);
- do {
- start = u64_stats_fetch_begin_irq(&s->syncp);
- tx_packets = s->tx_packets;
- tx_bytes = s->tx_bytes;
- rx_packets = s->rx_packets;
- rx_bytes = s->rx_bytes;
- } while (u64_stats_fetch_retry_irq(&s->syncp, start));
-
- stats->tx_packets += tx_packets;
- stats->tx_bytes += tx_bytes;
- stats->rx_packets += rx_packets;
- stats->rx_bytes += rx_bytes;
- }
+ dev_fetch_sw_netstats(stats, p->stats64);
}
static int dsa_slave_get_rxnfc(struct net_device *dev,
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 8f2e974..e8d704d 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -457,6 +457,23 @@
local_bh_enable();
}
+/*
+ * The device used for looking up which routing table to use for sending an ICMP
+ * error is preferably the source whenever it is set, which should ensure the
+ * icmp error can be sent to the source host, else lookup using the routing
+ * table of the destination device, else use the main routing table (index 0).
+ */
+static struct net_device *icmp_get_route_lookup_dev(struct sk_buff *skb)
+{
+ struct net_device *route_lookup_dev = NULL;
+
+ if (skb->dev)
+ route_lookup_dev = skb->dev;
+ else if (skb_dst(skb))
+ route_lookup_dev = skb_dst(skb)->dev;
+ return route_lookup_dev;
+}
+
static struct rtable *icmp_route_lookup(struct net *net,
struct flowi4 *fl4,
struct sk_buff *skb_in,
@@ -465,6 +482,7 @@
int type, int code,
struct icmp_bxm *param)
{
+ struct net_device *route_lookup_dev;
struct rtable *rt, *rt2;
struct flowi4 fl4_dec;
int err;
@@ -479,7 +497,8 @@
fl4->flowi4_proto = IPPROTO_ICMP;
fl4->fl4_icmp_type = type;
fl4->fl4_icmp_code = code;
- fl4->flowi4_oif = l3mdev_master_ifindex(skb_dst(skb_in)->dev);
+ route_lookup_dev = icmp_get_route_lookup_dev(skb_in);
+ fl4->flowi4_oif = l3mdev_master_ifindex(route_lookup_dev);
security_skb_classify_flow(skb_in, flowi4_to_flowi(fl4));
rt = ip_route_output_key_hash(net, fl4, skb_in);
@@ -503,7 +522,7 @@
if (err)
goto relookup_failed;
- if (inet_addr_type_dev_table(net, skb_dst(skb_in)->dev,
+ if (inet_addr_type_dev_table(net, route_lookup_dev,
fl4_dec.saddr) == RTN_LOCAL) {
rt2 = __ip_route_output_key(net, &fl4_dec);
if (IS_ERR(rt2))
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 4e31f23..e702917 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -625,9 +625,7 @@
}
if (dev->header_ops) {
- /* Need space for new headers */
- if (skb_cow_head(skb, dev->needed_headroom -
- (tunnel->hlen + sizeof(struct iphdr))))
+ if (skb_cow_head(skb, 0))
goto free_skb;
tnl_params = (const struct iphdr *)skb->data;
@@ -748,7 +746,11 @@
len = tunnel->tun_hlen - len;
tunnel->hlen = tunnel->hlen + len;
- dev->needed_headroom = dev->needed_headroom + len;
+ if (dev->header_ops)
+ dev->hard_header_len += len;
+ else
+ dev->needed_headroom += len;
+
if (set_mtu)
dev->mtu = max_t(int, dev->mtu - len, 68);
@@ -944,6 +946,7 @@
tunnel->parms.iph.protocol = IPPROTO_GRE;
tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
+ dev->needed_headroom = tunnel->hlen + sizeof(tunnel->parms.iph);
dev->features |= GRE_FEATURES;
dev->hw_features |= GRE_FEATURES;
@@ -987,10 +990,14 @@
return -EINVAL;
dev->flags = IFF_BROADCAST;
dev->header_ops = &ipgre_header_ops;
+ dev->hard_header_len = tunnel->hlen + sizeof(*iph);
+ dev->needed_headroom = 0;
}
#endif
} else if (!tunnel->collect_md) {
dev->header_ops = &ipgre_header_ops;
+ dev->hard_header_len = tunnel->hlen + sizeof(*iph);
+ dev->needed_headroom = 0;
}
return ip_tunnel_init(dev);
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index b2ea1a8..25f1caf 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -433,29 +433,8 @@
void ip_tunnel_get_stats64(struct net_device *dev,
struct rtnl_link_stats64 *tot)
{
- int i;
-
netdev_stats_to_stats64(tot, &dev->stats);
-
- for_each_possible_cpu(i) {
- const struct pcpu_sw_netstats *tstats =
- per_cpu_ptr(dev->tstats, i);
- u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
- unsigned int start;
-
- do {
- start = u64_stats_fetch_begin_irq(&tstats->syncp);
- rx_packets = tstats->rx_packets;
- tx_packets = tstats->tx_packets;
- rx_bytes = tstats->rx_bytes;
- tx_bytes = tstats->tx_bytes;
- } while (u64_stats_fetch_retry_irq(&tstats->syncp, start));
-
- tot->rx_packets += rx_packets;
- tot->tx_packets += tx_packets;
- tot->rx_bytes += rx_bytes;
- tot->tx_bytes += tx_bytes;
- }
+ dev_fetch_sw_netstats(tot, dev->tstats);
}
EXPORT_SYMBOL_GPL(ip_tunnel_get_stats64);
diff --git a/net/ipv4/netfilter/nf_log_arp.c b/net/ipv4/netfilter/nf_log_arp.c
index 7a83f88..136030a 100644
--- a/net/ipv4/netfilter/nf_log_arp.c
+++ b/net/ipv4/netfilter/nf_log_arp.c
@@ -43,16 +43,31 @@
const struct nf_loginfo *info,
const struct sk_buff *skb, unsigned int nhoff)
{
- const struct arphdr *ah;
- struct arphdr _arph;
const struct arppayload *ap;
struct arppayload _arpp;
+ const struct arphdr *ah;
+ unsigned int logflags;
+ struct arphdr _arph;
ah = skb_header_pointer(skb, 0, sizeof(_arph), &_arph);
if (ah == NULL) {
nf_log_buf_add(m, "TRUNCATED");
return;
}
+
+ if (info->type == NF_LOG_TYPE_LOG)
+ logflags = info->u.log.logflags;
+ else
+ logflags = NF_LOG_DEFAULT_MASK;
+
+ if (logflags & NF_LOG_MACDECODE) {
+ nf_log_buf_add(m, "MACSRC=%pM MACDST=%pM ",
+ eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest);
+ nf_log_dump_vlan(m, skb);
+ nf_log_buf_add(m, "MACPROTO=%04x ",
+ ntohs(eth_hdr(skb)->h_proto));
+ }
+
nf_log_buf_add(m, "ARP HTYPE=%d PTYPE=0x%04x OPCODE=%d",
ntohs(ah->ar_hrd), ntohs(ah->ar_pro), ntohs(ah->ar_op));
diff --git a/net/ipv4/netfilter/nf_log_ipv4.c b/net/ipv4/netfilter/nf_log_ipv4.c
index 0c72156..d07583f 100644
--- a/net/ipv4/netfilter/nf_log_ipv4.c
+++ b/net/ipv4/netfilter/nf_log_ipv4.c
@@ -284,8 +284,10 @@
switch (dev->type) {
case ARPHRD_ETHER:
- nf_log_buf_add(m, "MACSRC=%pM MACDST=%pM MACPROTO=%04x ",
- eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest,
+ nf_log_buf_add(m, "MACSRC=%pM MACDST=%pM ",
+ eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest);
+ nf_log_dump_vlan(m, skb);
+ nf_log_buf_add(m, "MACPROTO=%04x ",
ntohs(eth_hdr(skb)->h_proto));
return;
default:
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index d15a78b..dc2a399 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2770,10 +2770,12 @@
if (IS_ERR(rt))
return rt;
- if (flp4->flowi4_proto)
+ if (flp4->flowi4_proto) {
+ flp4->flowi4_oif = rt->dst.dev->ifindex;
rt = (struct rtable *)xfrm_lookup_route(net, &rt->dst,
flowi4_to_flowi(flp4),
sk, 0);
+ }
return rt;
}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index c15ca20..67f10d3 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4430,7 +4430,8 @@
sp[i] = sp[i + 1];
continue;
}
- this_sack++, swalk++;
+ this_sack++;
+ swalk++;
}
}
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
index 3f51e78..c8003c8 100644
--- a/net/ipv4/tcp_vegas.c
+++ b/net/ipv4/tcp_vegas.c
@@ -293,10 +293,10 @@
const struct vegas *ca = inet_csk_ca(sk);
if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) {
- info->vegas.tcpv_enabled = ca->doing_vegas_now,
- info->vegas.tcpv_rttcnt = ca->cntRTT,
- info->vegas.tcpv_rtt = ca->baseRTT,
- info->vegas.tcpv_minrtt = ca->minRTT,
+ info->vegas.tcpv_enabled = ca->doing_vegas_now;
+ info->vegas.tcpv_rttcnt = ca->cntRTT;
+ info->vegas.tcpv_rtt = ca->baseRTT;
+ info->vegas.tcpv_minrtt = ca->minRTT;
*attr = INET_DIAG_VEGASINFO;
return sizeof(struct tcpvegas_info);
diff --git a/net/ipv6/calipso.c b/net/ipv6/calipso.c
index 8d3f66c..78f76601 100644
--- a/net/ipv6/calipso.c
+++ b/net/ipv6/calipso.c
@@ -761,7 +761,7 @@
calipso[1] = len - 2;
*(__be32 *)(calipso + 2) = htonl(doi_def->doi);
calipso[6] = (len - CALIPSO_HDR_LEN) / 4;
- calipso[7] = secattr->attr.mls.lvl,
+ calipso[7] = secattr->attr.mls.lvl;
crc = ~crc_ccitt(0xffff, calipso, len);
calipso[8] = crc & 0xff;
calipso[9] = (crc >> 8) & 0xff;
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index a4e4912..91209a2 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -501,8 +501,11 @@
if (__ipv6_addr_needs_scope_id(addr_type)) {
iif = icmp6_iif(skb);
} else {
- dst = skb_dst(skb);
- iif = l3mdev_master_ifindex(dst ? dst->dev : skb->dev);
+ /*
+ * The source device is used for looking up which routing table
+ * to use for sending an ICMP error.
+ */
+ iif = l3mdev_master_ifindex(skb->dev);
}
/*
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 141c0a4..605cdd3 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -2622,8 +2622,10 @@
iter->skip = *pos;
if (iter->tbl) {
+ loff_t p = 0;
+
ipv6_route_seq_setup_walk(iter, net);
- return ipv6_route_seq_next(seq, NULL, pos);
+ return ipv6_route_seq_next(seq, NULL, &p);
} else {
return NULL;
}
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index a2a65e3..5941bfb 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -468,8 +468,6 @@
* check and decrement ttl
*/
if (hdr->hop_limit <= 1) {
- /* Force OUTPUT device used as source address */
- skb->dev = dst->dev;
icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
__IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
diff --git a/net/ipv6/netfilter/nf_log_ipv6.c b/net/ipv6/netfilter/nf_log_ipv6.c
index da64550..8210ff3 100644
--- a/net/ipv6/netfilter/nf_log_ipv6.c
+++ b/net/ipv6/netfilter/nf_log_ipv6.c
@@ -297,9 +297,11 @@
switch (dev->type) {
case ARPHRD_ETHER:
- nf_log_buf_add(m, "MACSRC=%pM MACDST=%pM MACPROTO=%04x ",
- eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest,
- ntohs(eth_hdr(skb)->h_proto));
+ nf_log_buf_add(m, "MACSRC=%pM MACDST=%pM ",
+ eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest);
+ nf_log_dump_vlan(m, skb);
+ nf_log_buf_add(m, "MACPROTO=%04x ",
+ ntohs(eth_hdr(skb)->h_proto));
return;
default:
break;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index bde6d48..7e0ce7a 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2745,7 +2745,8 @@
if (confirm_neigh)
dst_confirm_neigh(dst, daddr);
- mtu = max_t(u32, mtu, IPV6_MIN_MTU);
+ if (mtu < IPV6_MIN_MTU)
+ return;
if (mtu >= dst_mtu(dst))
return;
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 240862a..1be7759 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -709,28 +709,7 @@
static void
ieee80211_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
{
- int i;
-
- for_each_possible_cpu(i) {
- const struct pcpu_sw_netstats *tstats;
- u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
- unsigned int start;
-
- tstats = per_cpu_ptr(dev->tstats, i);
-
- do {
- start = u64_stats_fetch_begin_irq(&tstats->syncp);
- rx_packets = tstats->rx_packets;
- tx_packets = tstats->tx_packets;
- rx_bytes = tstats->rx_bytes;
- tx_bytes = tstats->tx_bytes;
- } while (u64_stats_fetch_retry_irq(&tstats->syncp, start));
-
- stats->rx_packets += rx_packets;
- stats->tx_packets += tx_packets;
- stats->rx_bytes += rx_bytes;
- stats->tx_bytes += tx_bytes;
- }
+ dev_fetch_sw_netstats(stats, dev->tstats);
}
static const struct net_device_ops ieee80211_dataif_ops = {
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 4d35fa9..092a2d4 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -517,7 +517,7 @@
return ret;
}
- if (subflow->use_64bit_ack) {
+ if (READ_ONCE(msk->use_64bit_ack)) {
ack_size = TCPOLEN_MPTCP_DSS_ACK64;
opts->ext_copy.data_ack = READ_ONCE(msk->ack_seq);
opts->ext_copy.ack64 = 1;
@@ -657,6 +657,12 @@
if (unlikely(mptcp_check_fallback(sk)))
return false;
+ /* prevent adding of any MPTCP related options on reset packet
+ * until we support MP_TCPRST/MP_FASTCLOSE
+ */
+ if (unlikely(skb && TCP_SKB_CB(skb)->tcp_flags & TCPHDR_RST))
+ return false;
+
if (mptcp_established_options_mp(sk, skb, &opt_size, remaining, opts))
ret = true;
else if (mptcp_established_options_dss(sk, skb, &opt_size, remaining,
@@ -711,7 +717,7 @@
return false;
}
-static bool check_fully_established(struct mptcp_sock *msk, struct sock *sk,
+static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk,
struct mptcp_subflow_context *subflow,
struct sk_buff *skb,
struct mptcp_options_received *mp_opt)
@@ -728,15 +734,20 @@
TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq &&
subflow->mp_join && mp_opt->mp_join &&
READ_ONCE(msk->pm.server_side))
- tcp_send_ack(sk);
+ tcp_send_ack(ssk);
goto fully_established;
}
- /* we should process OoO packets before the first subflow is fully
- * established, but not expected for MP_JOIN subflows
+ /* we must process OoO packets before the first subflow is fully
+ * established. OoO packets are instead a protocol violation
+ * for MP_JOIN subflows as the peer must not send any data
+ * before receiving the forth ack - cfr. RFC 8684 section 3.2.
*/
- if (TCP_SKB_CB(skb)->seq != subflow->ssn_offset + 1)
+ if (TCP_SKB_CB(skb)->seq != subflow->ssn_offset + 1) {
+ if (subflow->mp_join)
+ goto reset;
return subflow->mp_capable;
+ }
if (mp_opt->dss && mp_opt->use_ack) {
/* subflows are fully established as soon as we get any
@@ -748,9 +759,12 @@
}
/* If the first established packet does not contain MP_CAPABLE + data
- * then fallback to TCP
+ * then fallback to TCP. Fallback scenarios requires a reset for
+ * MP_JOIN subflows.
*/
if (!mp_opt->mp_capable) {
+ if (subflow->mp_join)
+ goto reset;
subflow->mp_capable = 0;
pr_fallback(msk);
__mptcp_do_fallback(msk);
@@ -767,12 +781,16 @@
subflow->pm_notified = 1;
if (subflow->mp_join) {
- clear_3rdack_retransmission(sk);
+ clear_3rdack_retransmission(ssk);
mptcp_pm_subflow_established(msk, subflow);
} else {
mptcp_pm_fully_established(msk);
}
return true;
+
+reset:
+ mptcp_subflow_reset(ssk);
+ return false;
}
static u64 expand_ack(u64 old_ack, u64 cur_ack, bool use_64bit)
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 9816608..185dacb3 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -1710,6 +1710,20 @@
spin_unlock_bh(&msk->pm.lock);
}
+static void __mptcp_close_subflow(struct mptcp_sock *msk)
+{
+ struct mptcp_subflow_context *subflow, *tmp;
+
+ list_for_each_entry_safe(subflow, tmp, &msk->conn_list, node) {
+ struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+
+ if (inet_sk_state_load(ssk) != TCP_CLOSE)
+ continue;
+
+ __mptcp_close_ssk((struct sock *)msk, ssk, subflow, 0);
+ }
+}
+
static void mptcp_worker(struct work_struct *work)
{
struct mptcp_sock *msk = container_of(work, struct mptcp_sock, work);
@@ -1727,6 +1741,9 @@
mptcp_clean_una(sk);
mptcp_check_data_fin_ack(sk);
__mptcp_flush_join_list(msk);
+ if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags))
+ __mptcp_close_subflow(msk);
+
__mptcp_move_skbs(msk);
if (msk->pm.status)
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index aa0ab18..13ab89d 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -90,6 +90,7 @@
#define MPTCP_WORK_RTX 2
#define MPTCP_WORK_EOF 3
#define MPTCP_FALLBACK_DONE 4
+#define MPTCP_WORK_CLOSE_SUBFLOW 5
struct mptcp_options_received {
u64 sndr_key;
@@ -211,6 +212,7 @@
bool fully_established;
bool rcv_data_fin;
bool snd_data_fin_enable;
+ bool use_64bit_ack; /* Set when we received a 64-bit DSN */
spinlock_t join_list_lock;
struct work_struct work;
struct sk_buff *ooo_last_skb;
@@ -310,7 +312,6 @@
mpc_map : 1,
backup : 1,
rx_eof : 1,
- use_64bit_ack : 1, /* Set when we received a 64-bit DSN */
can_ack : 1; /* only after processing the remote a key */
enum mptcp_data_avail data_avail;
u32 remote_nonce;
@@ -369,6 +370,7 @@
void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
struct mptcp_subflow_context *subflow,
long timeout);
+void mptcp_subflow_reset(struct sock *ssk);
/* called with sk socket lock held */
int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 5d91e3a..ac4a1fe 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -271,6 +271,19 @@
return thmac == subflow->thmac;
}
+void mptcp_subflow_reset(struct sock *ssk)
+{
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
+ struct sock *sk = subflow->conn;
+
+ tcp_set_state(ssk, TCP_CLOSE);
+ tcp_send_active_reset(ssk, GFP_ATOMIC);
+ tcp_done(ssk);
+ if (!test_and_set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &mptcp_sk(sk)->flags) &&
+ schedule_work(&mptcp_sk(sk)->work))
+ sock_hold(sk);
+}
+
static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
@@ -343,8 +356,7 @@
return;
do_reset:
- tcp_send_active_reset(sk, GFP_ATOMIC);
- tcp_done(sk);
+ mptcp_subflow_reset(sk);
}
struct request_sock_ops mptcp_subflow_request_sock_ops;
@@ -770,12 +782,11 @@
if (!mpext->dsn64) {
map_seq = expand_seq(subflow->map_seq, subflow->map_data_len,
mpext->data_seq);
- subflow->use_64bit_ack = 0;
pr_debug("expanded seq=%llu", subflow->map_seq);
} else {
map_seq = mpext->data_seq;
- subflow->use_64bit_ack = 1;
}
+ WRITE_ONCE(mptcp_sk(subflow->conn)->use_64bit_ack, !!mpext->dsn64);
if (subflow->map_valid) {
/* Allow replacing only with an identical map */
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 25313c2..5237021 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -441,6 +441,7 @@
config NF_TABLES
select NETFILTER_NETLINK
+ select LIBCRC32C
tristate "Netfilter nf_tables support"
help
nftables is the new packet classification framework that intends to
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index b00866d..d2e5a8f 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -609,6 +609,8 @@
if (ret == NF_ACCEPT) {
nf_reset_ct(skb);
skb_forward_csum(skb);
+ if (skb->dev)
+ skb->tstamp = 0;
}
return ret;
}
@@ -649,6 +651,8 @@
if (!local) {
skb_forward_csum(skb);
+ if (skb->dev)
+ skb->tstamp = 0;
NF_HOOK(pf, NF_INET_LOCAL_OUT, cp->ipvs->net, NULL, skb,
NULL, skb_dst(skb)->dev, dst_output);
} else
@@ -669,6 +673,8 @@
if (!local) {
ip_vs_drop_early_demux_sk(skb);
skb_forward_csum(skb);
+ if (skb->dev)
+ skb->tstamp = 0;
NF_HOOK(pf, NF_INET_LOCAL_OUT, cp->ipvs->net, NULL, skb,
NULL, skb_dst(skb)->dev, dst_output);
} else
diff --git a/net/netfilter/nf_log_common.c b/net/netfilter/nf_log_common.c
index ae5628d..fd7c5f0 100644
--- a/net/netfilter/nf_log_common.c
+++ b/net/netfilter/nf_log_common.c
@@ -171,6 +171,18 @@
}
EXPORT_SYMBOL_GPL(nf_log_dump_packet_common);
+void nf_log_dump_vlan(struct nf_log_buf *m, const struct sk_buff *skb)
+{
+ u16 vid;
+
+ if (!skb_vlan_tag_present(skb))
+ return;
+
+ vid = skb_vlan_tag_get(skb);
+ nf_log_buf_add(m, "VPROTO=%04x VID=%u ", ntohs(skb->vlan_proto), vid);
+}
+EXPORT_SYMBOL_GPL(nf_log_dump_vlan);
+
/* bridge and netdev logging families share this code. */
void nf_log_l2packet(struct net *net, u_int8_t pf,
__be16 protocol,
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index f22ad21..9957e0e 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1864,7 +1864,7 @@
if (IS_ERR(type))
return PTR_ERR(type);
}
- if (hook->num > NF_MAX_HOOKS || !(type->hook_mask & (1 << hook->num)))
+ if (hook->num >= NFT_MAX_HOOKS || !(type->hook_mask & (1 << hook->num)))
return -EOPNOTSUPP;
if (type->type == NFT_CHAIN_T_NAT &&
@@ -2138,7 +2138,8 @@
}
static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
- u32 flags)
+ u32 flags, const struct nlattr *attr,
+ struct netlink_ext_ack *extack)
{
const struct nlattr * const *nla = ctx->nla;
struct nft_table *table = ctx->table;
@@ -2154,9 +2155,10 @@
return -EOPNOTSUPP;
if (nla[NFTA_CHAIN_HOOK]) {
- if (!nft_is_base_chain(chain))
+ if (!nft_is_base_chain(chain)) {
+ NL_SET_BAD_ATTR(extack, attr);
return -EEXIST;
-
+ }
err = nft_chain_parse_hook(ctx->net, nla, &hook, ctx->family,
false);
if (err < 0)
@@ -2165,6 +2167,7 @@
basechain = nft_base_chain(chain);
if (basechain->type != hook.type) {
nft_chain_release_hook(&hook);
+ NL_SET_BAD_ATTR(extack, attr);
return -EEXIST;
}
@@ -2172,6 +2175,7 @@
if (!nft_hook_list_equal(&basechain->hook_list,
&hook.list)) {
nft_chain_release_hook(&hook);
+ NL_SET_BAD_ATTR(extack, attr);
return -EEXIST;
}
} else {
@@ -2179,6 +2183,7 @@
if (ops->hooknum != hook.num ||
ops->priority != hook.priority) {
nft_chain_release_hook(&hook);
+ NL_SET_BAD_ATTR(extack, attr);
return -EEXIST;
}
}
@@ -2191,8 +2196,10 @@
chain2 = nft_chain_lookup(ctx->net, table,
nla[NFTA_CHAIN_NAME], genmask);
- if (!IS_ERR(chain2))
+ if (!IS_ERR(chain2)) {
+ NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_NAME]);
return -EEXIST;
+ }
}
if (nla[NFTA_CHAIN_COUNTERS]) {
@@ -2235,6 +2242,7 @@
nft_trans_chain_update(tmp) &&
nft_trans_chain_name(tmp) &&
strcmp(name, nft_trans_chain_name(tmp)) == 0) {
+ NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_NAME]);
kfree(name);
goto err;
}
@@ -2357,7 +2365,8 @@
return -EOPNOTSUPP;
flags |= chain->flags & NFT_CHAIN_BASE;
- return nf_tables_updchain(&ctx, genmask, policy, flags);
+ return nf_tables_updchain(&ctx, genmask, policy, flags, attr,
+ extack);
}
return nf_tables_addchain(&ctx, family, genmask, policy, flags);
diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c
index 7a2e596..dcd3c7b 100644
--- a/net/netfilter/nft_payload.c
+++ b/net/netfilter/nft_payload.c
@@ -22,6 +22,7 @@
#include <linux/icmpv6.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
+#include <net/sctp/checksum.h>
static bool nft_payload_rebuild_vlan_hdr(const struct sk_buff *skb, int mac_off,
struct vlan_ethhdr *veth)
@@ -484,6 +485,19 @@
return 0;
}
+static int nft_payload_csum_sctp(struct sk_buff *skb, int offset)
+{
+ struct sctphdr *sh;
+
+ if (skb_ensure_writable(skb, offset + sizeof(*sh)))
+ return -1;
+
+ sh = (struct sctphdr *)(skb->data + offset);
+ sh->checksum = sctp_compute_cksum(skb, offset);
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ return 0;
+}
+
static int nft_payload_l4csum_update(const struct nft_pktinfo *pkt,
struct sk_buff *skb,
__wsum fsum, __wsum tsum)
@@ -587,6 +601,13 @@
skb_store_bits(skb, offset, src, priv->len) < 0)
goto err;
+ if (priv->csum_type == NFT_PAYLOAD_CSUM_SCTP &&
+ pkt->tprot == IPPROTO_SCTP &&
+ skb->ip_summed != CHECKSUM_PARTIAL) {
+ if (nft_payload_csum_sctp(skb, pkt->xt.thoff))
+ goto err;
+ }
+
return;
err:
regs->verdict.code = NFT_BREAK;
@@ -623,6 +644,13 @@
case NFT_PAYLOAD_CSUM_NONE:
case NFT_PAYLOAD_CSUM_INET:
break;
+ case NFT_PAYLOAD_CSUM_SCTP:
+ if (priv->base != NFT_PAYLOAD_TRANSPORT_HEADER)
+ return -EINVAL;
+
+ if (priv->csum_offset != offsetof(struct sctphdr, checksum))
+ return -EINVAL;
+ break;
default:
return -EOPNOTSUPP;
}
diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c
index d8fe66e..1e30d8d 100644
--- a/net/openvswitch/vport-internal_dev.c
+++ b/net/openvswitch/vport-internal_dev.c
@@ -86,31 +86,13 @@
static void
internal_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats)
{
- int i;
-
memset(stats, 0, sizeof(*stats));
stats->rx_errors = dev->stats.rx_errors;
stats->tx_errors = dev->stats.tx_errors;
stats->tx_dropped = dev->stats.tx_dropped;
stats->rx_dropped = dev->stats.rx_dropped;
- for_each_possible_cpu(i) {
- const struct pcpu_sw_netstats *percpu_stats;
- struct pcpu_sw_netstats local_stats;
- unsigned int start;
-
- percpu_stats = per_cpu_ptr(dev->tstats, i);
-
- do {
- start = u64_stats_fetch_begin_irq(&percpu_stats->syncp);
- local_stats = *percpu_stats;
- } while (u64_stats_fetch_retry_irq(&percpu_stats->syncp, start));
-
- stats->rx_bytes += local_stats.rx_bytes;
- stats->rx_packets += local_stats.rx_packets;
- stats->tx_bytes += local_stats.tx_bytes;
- stats->tx_packets += local_stats.tx_packets;
- }
+ dev_fetch_sw_netstats(stats, dev->tstats);
}
static const struct net_device_ops internal_dev_netdev_ops = {
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index c9287b6..dce4816 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -831,6 +831,7 @@
* conn_event.c
*/
void rxrpc_process_connection(struct work_struct *);
+void rxrpc_process_delayed_final_acks(struct rxrpc_connection *, bool);
/*
* conn_object.c
diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c
index 78c845a..7e574c7 100644
--- a/net/rxrpc/conn_client.c
+++ b/net/rxrpc/conn_client.c
@@ -901,11 +901,14 @@
struct rxrpc_bundle *bundle = conn->bundle;
struct rxrpc_local *local = bundle->params.local;
unsigned int bindex;
- bool need_drop = false;
+ bool need_drop = false, need_put = false;
int i;
_enter("C=%x", conn->debug_id);
+ if (conn->flags & RXRPC_CONN_FINAL_ACK_MASK)
+ rxrpc_process_delayed_final_acks(conn, true);
+
spin_lock(&bundle->channel_lock);
bindex = conn->bundle_shift / RXRPC_MAXCALLS;
if (bundle->conns[bindex] == conn) {
@@ -928,10 +931,11 @@
if (i == ARRAY_SIZE(bundle->conns) && !bundle->params.exclusive) {
_debug("erase bundle");
rb_erase(&bundle->local_node, &local->client_bundles);
+ need_put = true;
}
spin_unlock(&local->client_bundles_lock);
- if (i == ARRAY_SIZE(bundle->conns))
+ if (need_put)
rxrpc_put_bundle(bundle);
}
diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c
index 6b7c6f4..aff1841 100644
--- a/net/rxrpc/conn_event.c
+++ b/net/rxrpc/conn_event.c
@@ -397,7 +397,7 @@
/*
* Process delayed final ACKs that we haven't subsumed into a subsequent call.
*/
-static void rxrpc_process_delayed_final_acks(struct rxrpc_connection *conn)
+void rxrpc_process_delayed_final_acks(struct rxrpc_connection *conn, bool force)
{
unsigned long j = jiffies, next_j;
unsigned int channel;
@@ -416,7 +416,7 @@
smp_rmb(); /* vs rxrpc_disconnect_client_call */
ack_at = READ_ONCE(chan->final_ack_at);
- if (time_before(j, ack_at)) {
+ if (time_before(j, ack_at) && !force) {
if (time_before(ack_at, next_j)) {
next_j = ack_at;
set = true;
@@ -450,7 +450,7 @@
/* Process delayed ACKs whose time has come. */
if (conn->flags & RXRPC_CONN_FINAL_ACK_MASK)
- rxrpc_process_delayed_final_acks(conn);
+ rxrpc_process_delayed_final_acks(conn, false);
/* go through the conn-level event packets, releasing the ref on this
* connection that each one has when we've finished with it */
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index f1dbb50..d790c43 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -1596,7 +1596,7 @@
return rc;
}
-#define SMCD_DMBE_SIZES 7 /* 0 -> 16KB, 1 -> 32KB, .. 6 -> 1MB */
+#define SMCD_DMBE_SIZES 6 /* 0 -> 16KB, 1 -> 32KB, .. 6 -> 1MB */
static struct smc_buf_desc *smcd_new_buf_create(struct smc_link_group *lgr,
bool is_dmb, int bufsize)
@@ -1615,7 +1615,8 @@
rc = smc_ism_register_dmb(lgr, bufsize, buf_desc);
if (rc) {
kfree(buf_desc);
- return (rc == -ENOMEM) ? ERR_PTR(-EAGAIN) : ERR_PTR(rc);
+ return (rc == -ENOMEM) ? ERR_PTR(-EAGAIN) :
+ ERR_PTR(-EIO);
}
buf_desc->pages = virt_to_page(buf_desc->cpu_addr);
/* CDC header stored in buf. So, pretend it was smaller */
diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index 2db967f..273eaf1 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -233,8 +233,6 @@
default:
flow->type = SMC_LLC_FLOW_NONE;
}
- if (qentry == lgr->delayed_event)
- lgr->delayed_event = NULL;
smc_llc_flow_qentry_set(flow, qentry);
spin_unlock_bh(&lgr->llc_flow_lock);
return true;
@@ -1209,7 +1207,7 @@
/* enqueue a local add_link req to trigger a new add_link flow */
void smc_llc_add_link_local(struct smc_link *link)
{
- struct smc_llc_msg_add_link add_llc = {0};
+ struct smc_llc_msg_add_link add_llc = {};
add_llc.hd.length = sizeof(add_llc);
add_llc.hd.common.type = SMC_LLC_ADD_LINK;
@@ -1242,7 +1240,7 @@
*/
void smc_llc_srv_delete_link_local(struct smc_link *link, u8 del_link_id)
{
- struct smc_llc_msg_del_link del_llc = {0};
+ struct smc_llc_msg_del_link del_llc = {};
del_llc.hd.length = sizeof(del_llc);
del_llc.hd.common.type = SMC_LLC_DELETE_LINK;
@@ -1314,7 +1312,7 @@
*/
void smc_llc_send_link_delete_all(struct smc_link_group *lgr, bool ord, u32 rsn)
{
- struct smc_llc_msg_del_link delllc = {0};
+ struct smc_llc_msg_del_link delllc = {};
int i;
delllc.hd.common.type = SMC_LLC_DELETE_LINK;
@@ -1603,13 +1601,12 @@
struct smc_llc_qentry *qentry;
if (!lgr->llc_flow_lcl.type && lgr->delayed_event) {
- if (smc_link_usable(lgr->delayed_event->link)) {
- smc_llc_event_handler(lgr->delayed_event);
- } else {
- qentry = lgr->delayed_event;
- lgr->delayed_event = NULL;
+ qentry = lgr->delayed_event;
+ lgr->delayed_event = NULL;
+ if (smc_link_usable(qentry->link))
+ smc_llc_event_handler(qentry);
+ else
kfree(qentry);
- }
}
again:
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index 11a429f..2a78aa7 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -150,7 +150,8 @@
if (fragid == FIRST_FRAGMENT) {
if (unlikely(head))
goto err;
- frag = skb_unshare(frag, GFP_ATOMIC);
+ if (skb_cloned(frag))
+ frag = skb_copy(frag, GFP_ATOMIC);
if (unlikely(!frag))
goto err;
head = *headbuf = frag;
diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
index 2f9c148..fe4edce 100644
--- a/net/tipc/name_distr.c
+++ b/net/tipc/name_distr.c
@@ -327,8 +327,13 @@
struct tipc_msg *hdr;
u16 seqno;
+ spin_lock_bh(&namedq->lock);
skb_queue_walk_safe(namedq, skb, tmp) {
- skb_linearize(skb);
+ if (unlikely(skb_linearize(skb))) {
+ __skb_unlink(skb, namedq);
+ kfree_skb(skb);
+ continue;
+ }
hdr = buf_msg(skb);
seqno = msg_named_seqno(hdr);
if (msg_is_last_bulk(hdr)) {
@@ -338,12 +343,14 @@
if (msg_is_bulk(hdr) || msg_is_legacy(hdr)) {
__skb_unlink(skb, namedq);
+ spin_unlock_bh(&namedq->lock);
return skb;
}
if (*open && (*rcv_nxt == seqno)) {
(*rcv_nxt)++;
__skb_unlink(skb, namedq);
+ spin_unlock_bh(&namedq->lock);
return skb;
}
@@ -353,6 +360,7 @@
continue;
}
}
+ spin_unlock_bh(&namedq->lock);
return NULL;
}
diff --git a/net/tipc/node.c b/net/tipc/node.c
index cf4b239..d269ebe 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -1496,7 +1496,7 @@
/* Clean up broadcast state */
tipc_bcast_remove_peer(n->net, n->bc_entry.link);
- __skb_queue_purge(&n->bc_entry.namedq);
+ skb_queue_purge(&n->bc_entry.namedq);
/* Abort any ongoing link failover */
for (i = 0; i < MAX_BEARERS; i++) {
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index b74e274..cec8622 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -418,14 +418,14 @@
struct tls_context *tls_ctx = tls_get_ctx(sk);
struct tls_prot_info *prot = &tls_ctx->prot_info;
struct tls_offload_context_tx *ctx = tls_offload_ctx_tx(tls_ctx);
- int more = flags & (MSG_SENDPAGE_NOTLAST | MSG_MORE);
struct tls_record_info *record = ctx->open_record;
int tls_push_record_flags;
struct page_frag *pfrag;
size_t orig_size = size;
u32 max_open_record_len;
- int copy, rc = 0;
+ bool more = false;
bool done = false;
+ int copy, rc = 0;
long timeo;
if (flags &
@@ -492,9 +492,8 @@
if (!size) {
last_record:
tls_push_record_flags = flags;
- if (more) {
- tls_ctx->pending_open_record_frags =
- !!record->num_frags;
+ if (flags & (MSG_SENDPAGE_NOTLAST | MSG_MORE)) {
+ more = true;
break;
}
@@ -526,6 +525,8 @@
}
} while (!done);
+ tls_ctx->pending_open_record_frags = more;
+
if (orig_size - size > 0)
rc = orig_size - size;
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index 002b085..8d93cea 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -869,7 +869,7 @@
tls_sw_proto_ops = inet_stream_ops;
tls_sw_proto_ops.splice_read = tls_sw_splice_read;
- tls_sw_proto_ops.sendpage_locked = tls_sw_sendpage_locked,
+ tls_sw_proto_ops.sendpage_locked = tls_sw_sendpage_locked;
tls_device_init();
tcp_register_ulp(&tcp_tls_ulp_ops);
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index eb82bdc..41c3303 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -613,7 +613,6 @@
int err;
struct sock *sk = sock->sk;
struct unix_sock *u = unix_sk(sk);
- struct pid *old_pid = NULL;
err = -EOPNOTSUPP;
if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
@@ -634,7 +633,6 @@
out_unlock:
unix_state_unlock(sk);
- put_pid(old_pid);
out:
return err;
}
diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c
index 5b12093..aa4cdcf 100644
--- a/net/xfrm/xfrm_interface.c
+++ b/net/xfrm/xfrm_interface.c
@@ -541,27 +541,7 @@
static void xfrmi_get_stats64(struct net_device *dev,
struct rtnl_link_stats64 *s)
{
- int cpu;
-
- for_each_possible_cpu(cpu) {
- struct pcpu_sw_netstats *stats;
- struct pcpu_sw_netstats tmp;
- int start;
-
- stats = per_cpu_ptr(dev->tstats, cpu);
- do {
- start = u64_stats_fetch_begin_irq(&stats->syncp);
- tmp.rx_packets = stats->rx_packets;
- tmp.rx_bytes = stats->rx_bytes;
- tmp.tx_packets = stats->tx_packets;
- tmp.tx_bytes = stats->tx_bytes;
- } while (u64_stats_fetch_retry_irq(&stats->syncp, start));
-
- s->rx_packets += tmp.rx_packets;
- s->rx_bytes += tmp.rx_bytes;
- s->tx_packets += tmp.tx_packets;
- s->tx_bytes += tmp.tx_bytes;
- }
+ dev_fetch_sw_netstats(s, dev->tstats);
s->rx_dropped = dev->stats.rx_dropped;
s->tx_dropped = dev->stats.tx_dropped;
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 4773ce7..ef35247 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -20,6 +20,7 @@
TEST_PROGS += rxtimestamp.sh
TEST_PROGS += devlink_port_split.py
TEST_PROGS += drop_monitor_tests.sh
+TEST_PROGS += vrf_route_leaking.sh
TEST_PROGS_EXTENDED := in_netns.sh
TEST_GEN_FILES = socket nettest
TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any
diff --git a/tools/testing/selftests/net/vrf_route_leaking.sh b/tools/testing/selftests/net/vrf_route_leaking.sh
new file mode 100755
index 0000000..23cf924
--- /dev/null
+++ b/tools/testing/selftests/net/vrf_route_leaking.sh
@@ -0,0 +1,626 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (c) 2019 David Ahern <dsahern@gmail.com>. All rights reserved.
+# Copyright (c) 2020 Michael Jeanson <mjeanson@efficios.com>. All rights reserved.
+#
+# Requires CONFIG_NET_VRF, CONFIG_VETH, CONFIG_BRIDGE and CONFIG_NET_NS.
+#
+#
+# Symmetric routing topology
+#
+# blue red
+# +----+ .253 +----+ .253 +----+
+# | h1 |-------------------| r1 |-------------------| h2 |
+# +----+ .1 +----+ .2 +----+
+# 172.16.1/24 172.16.2/24
+# 2001:db8:16:1/64 2001:db8:16:2/64
+#
+#
+# Route from h1 to h2 and back goes through r1, incoming vrf blue has a route
+# to the outgoing vrf red for the n2 network and red has a route back to n1.
+# The red VRF interface has a MTU of 1400.
+#
+# The first test sends a ping with a ttl of 1 from h1 to h2 and parses the
+# output of the command to check that a ttl expired error is received.
+#
+# The second test runs traceroute from h1 to h2 and parses the output to check
+# for a hop on r1.
+#
+# The third test sends a ping with a packet size of 1450 from h1 to h2 and
+# parses the output of the command to check that a fragmentation error is
+# received.
+#
+#
+# Asymmetric routing topology
+#
+# This topology represents a customer setup where the issue with icmp errors
+# and VRF route leaking was initialy reported. The MTU test isn't done here
+# because of the lack of a return route in the red VRF.
+#
+# blue red
+# .253 +----+ .253
+# +----| r1 |----+
+# | +----+ |
+# +----+ | | +----+
+# | h1 |--------------+ +--------------| h2 |
+# +----+ .1 | | .2 +----+
+# 172.16.1/24 | +----+ | 172.16.2/24
+# 2001:db8:16:1/64 +----| r2 |----+ 2001:db8:16:2/64
+# .254 +----+ .254
+#
+#
+# Route from h1 to h2 goes through r1, incoming vrf blue has a route to the
+# outgoing vrf red for the n2 network but red doesn't have a route back to n1.
+# Route from h2 to h1 goes through r2.
+#
+# The objective is to check that the incoming vrf routing table is selected
+# to send an ICMP error back to the source when the ttl of a packet reaches 1
+# while it is forwarded between different vrfs.
+
+VERBOSE=0
+PAUSE_ON_FAIL=no
+DEFAULT_TTYPE=sym
+
+H1_N1=172.16.1.0/24
+H1_N1_6=2001:db8:16:1::/64
+
+H1_N1_IP=172.16.1.1
+R1_N1_IP=172.16.1.253
+R2_N1_IP=172.16.1.254
+
+H1_N1_IP6=2001:db8:16:1::1
+R1_N1_IP6=2001:db8:16:1::253
+R2_N1_IP6=2001:db8:16:1::254
+
+H2_N2=172.16.2.0/24
+H2_N2_6=2001:db8:16:2::/64
+
+H2_N2_IP=172.16.2.2
+R1_N2_IP=172.16.2.253
+R2_N2_IP=172.16.2.254
+
+H2_N2_IP6=2001:db8:16:2::2
+R1_N2_IP6=2001:db8:16:2::253
+R2_N2_IP6=2001:db8:16:2::254
+
+################################################################################
+# helpers
+
+log_section()
+{
+ echo
+ echo "###########################################################################"
+ echo "$*"
+ echo "###########################################################################"
+ echo
+}
+
+log_test()
+{
+ local rc=$1
+ local expected=$2
+ local msg="$3"
+
+ if [ "${rc}" -eq "${expected}" ]; then
+ printf "TEST: %-60s [ OK ]\n" "${msg}"
+ nsuccess=$((nsuccess+1))
+ else
+ ret=1
+ nfail=$((nfail+1))
+ printf "TEST: %-60s [FAIL]\n" "${msg}"
+ if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+ echo
+ echo "hit enter to continue, 'q' to quit"
+ read -r a
+ [ "$a" = "q" ] && exit 1
+ fi
+ fi
+}
+
+run_cmd()
+{
+ local cmd="$*"
+ local out
+ local rc
+
+ if [ "$VERBOSE" = "1" ]; then
+ echo "COMMAND: $cmd"
+ fi
+
+ # shellcheck disable=SC2086
+ out=$(eval $cmd 2>&1)
+ rc=$?
+ if [ "$VERBOSE" = "1" ] && [ -n "$out" ]; then
+ echo "$out"
+ fi
+
+ [ "$VERBOSE" = "1" ] && echo
+
+ return $rc
+}
+
+run_cmd_grep()
+{
+ local grep_pattern="$1"
+ shift
+ local cmd="$*"
+ local out
+ local rc
+
+ if [ "$VERBOSE" = "1" ]; then
+ echo "COMMAND: $cmd"
+ fi
+
+ # shellcheck disable=SC2086
+ out=$(eval $cmd 2>&1)
+ if [ "$VERBOSE" = "1" ] && [ -n "$out" ]; then
+ echo "$out"
+ fi
+
+ echo "$out" | grep -q "$grep_pattern"
+ rc=$?
+
+ [ "$VERBOSE" = "1" ] && echo
+
+ return $rc
+}
+
+################################################################################
+# setup and teardown
+
+cleanup()
+{
+ local ns
+
+ for ns in h1 h2 r1 r2; do
+ ip netns del $ns 2>/dev/null
+ done
+}
+
+setup_vrf()
+{
+ local ns=$1
+
+ ip -netns "${ns}" rule del pref 0
+ ip -netns "${ns}" rule add pref 32765 from all lookup local
+ ip -netns "${ns}" -6 rule del pref 0
+ ip -netns "${ns}" -6 rule add pref 32765 from all lookup local
+}
+
+create_vrf()
+{
+ local ns=$1
+ local vrf=$2
+ local table=$3
+
+ ip -netns "${ns}" link add "${vrf}" type vrf table "${table}"
+ ip -netns "${ns}" link set "${vrf}" up
+ ip -netns "${ns}" route add vrf "${vrf}" unreachable default metric 8192
+ ip -netns "${ns}" -6 route add vrf "${vrf}" unreachable default metric 8192
+
+ ip -netns "${ns}" addr add 127.0.0.1/8 dev "${vrf}"
+ ip -netns "${ns}" -6 addr add ::1 dev "${vrf}" nodad
+}
+
+setup_sym()
+{
+ local ns
+
+ # make sure we are starting with a clean slate
+ cleanup
+
+ #
+ # create nodes as namespaces
+ #
+ for ns in h1 h2 r1; do
+ ip netns add $ns
+ ip -netns $ns link set lo up
+
+ case "${ns}" in
+ h[12]) ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=0
+ ip netns exec $ns sysctl -q -w net.ipv6.conf.all.keep_addr_on_down=1
+ ;;
+ r1) ip netns exec $ns sysctl -q -w net.ipv4.ip_forward=1
+ ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=1
+ esac
+ done
+
+ #
+ # create interconnects
+ #
+ ip -netns h1 link add eth0 type veth peer name r1h1
+ ip -netns h1 link set r1h1 netns r1 name eth0 up
+
+ ip -netns h2 link add eth0 type veth peer name r1h2
+ ip -netns h2 link set r1h2 netns r1 name eth1 up
+
+ #
+ # h1
+ #
+ ip -netns h1 addr add dev eth0 ${H1_N1_IP}/24
+ ip -netns h1 -6 addr add dev eth0 ${H1_N1_IP6}/64 nodad
+ ip -netns h1 link set eth0 up
+
+ # h1 to h2 via r1
+ ip -netns h1 route add ${H2_N2} via ${R1_N1_IP} dev eth0
+ ip -netns h1 -6 route add ${H2_N2_6} via "${R1_N1_IP6}" dev eth0
+
+ #
+ # h2
+ #
+ ip -netns h2 addr add dev eth0 ${H2_N2_IP}/24
+ ip -netns h2 -6 addr add dev eth0 ${H2_N2_IP6}/64 nodad
+ ip -netns h2 link set eth0 up
+
+ # h2 to h1 via r1
+ ip -netns h2 route add default via ${R1_N2_IP} dev eth0
+ ip -netns h2 -6 route add default via ${R1_N2_IP6} dev eth0
+
+ #
+ # r1
+ #
+ setup_vrf r1
+ create_vrf r1 blue 1101
+ create_vrf r1 red 1102
+ ip -netns r1 link set mtu 1400 dev eth1
+ ip -netns r1 link set eth0 vrf blue up
+ ip -netns r1 link set eth1 vrf red up
+ ip -netns r1 addr add dev eth0 ${R1_N1_IP}/24
+ ip -netns r1 -6 addr add dev eth0 ${R1_N1_IP6}/64 nodad
+ ip -netns r1 addr add dev eth1 ${R1_N2_IP}/24
+ ip -netns r1 -6 addr add dev eth1 ${R1_N2_IP6}/64 nodad
+
+ # Route leak from blue to red
+ ip -netns r1 route add vrf blue ${H2_N2} dev red
+ ip -netns r1 -6 route add vrf blue ${H2_N2_6} dev red
+
+ # Route leak from red to blue
+ ip -netns r1 route add vrf red ${H1_N1} dev blue
+ ip -netns r1 -6 route add vrf red ${H1_N1_6} dev blue
+
+
+ # Wait for ip config to settle
+ sleep 2
+}
+
+setup_asym()
+{
+ local ns
+
+ # make sure we are starting with a clean slate
+ cleanup
+
+ #
+ # create nodes as namespaces
+ #
+ for ns in h1 h2 r1 r2; do
+ ip netns add $ns
+ ip -netns $ns link set lo up
+
+ case "${ns}" in
+ h[12]) ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=0
+ ip netns exec $ns sysctl -q -w net.ipv6.conf.all.keep_addr_on_down=1
+ ;;
+ r[12]) ip netns exec $ns sysctl -q -w net.ipv4.ip_forward=1
+ ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=1
+ esac
+ done
+
+ #
+ # create interconnects
+ #
+ ip -netns h1 link add eth0 type veth peer name r1h1
+ ip -netns h1 link set r1h1 netns r1 name eth0 up
+
+ ip -netns h1 link add eth1 type veth peer name r2h1
+ ip -netns h1 link set r2h1 netns r2 name eth0 up
+
+ ip -netns h2 link add eth0 type veth peer name r1h2
+ ip -netns h2 link set r1h2 netns r1 name eth1 up
+
+ ip -netns h2 link add eth1 type veth peer name r2h2
+ ip -netns h2 link set r2h2 netns r2 name eth1 up
+
+ #
+ # h1
+ #
+ ip -netns h1 link add br0 type bridge
+ ip -netns h1 link set br0 up
+ ip -netns h1 addr add dev br0 ${H1_N1_IP}/24
+ ip -netns h1 -6 addr add dev br0 ${H1_N1_IP6}/64 nodad
+ ip -netns h1 link set eth0 master br0 up
+ ip -netns h1 link set eth1 master br0 up
+
+ # h1 to h2 via r1
+ ip -netns h1 route add ${H2_N2} via ${R1_N1_IP} dev br0
+ ip -netns h1 -6 route add ${H2_N2_6} via "${R1_N1_IP6}" dev br0
+
+ #
+ # h2
+ #
+ ip -netns h2 link add br0 type bridge
+ ip -netns h2 link set br0 up
+ ip -netns h2 addr add dev br0 ${H2_N2_IP}/24
+ ip -netns h2 -6 addr add dev br0 ${H2_N2_IP6}/64 nodad
+ ip -netns h2 link set eth0 master br0 up
+ ip -netns h2 link set eth1 master br0 up
+
+ # h2 to h1 via r2
+ ip -netns h2 route add default via ${R2_N2_IP} dev br0
+ ip -netns h2 -6 route add default via ${R2_N2_IP6} dev br0
+
+ #
+ # r1
+ #
+ setup_vrf r1
+ create_vrf r1 blue 1101
+ create_vrf r1 red 1102
+ ip -netns r1 link set mtu 1400 dev eth1
+ ip -netns r1 link set eth0 vrf blue up
+ ip -netns r1 link set eth1 vrf red up
+ ip -netns r1 addr add dev eth0 ${R1_N1_IP}/24
+ ip -netns r1 -6 addr add dev eth0 ${R1_N1_IP6}/64 nodad
+ ip -netns r1 addr add dev eth1 ${R1_N2_IP}/24
+ ip -netns r1 -6 addr add dev eth1 ${R1_N2_IP6}/64 nodad
+
+ # Route leak from blue to red
+ ip -netns r1 route add vrf blue ${H2_N2} dev red
+ ip -netns r1 -6 route add vrf blue ${H2_N2_6} dev red
+
+ # No route leak from red to blue
+
+ #
+ # r2
+ #
+ ip -netns r2 addr add dev eth0 ${R2_N1_IP}/24
+ ip -netns r2 -6 addr add dev eth0 ${R2_N1_IP6}/64 nodad
+ ip -netns r2 addr add dev eth1 ${R2_N2_IP}/24
+ ip -netns r2 -6 addr add dev eth1 ${R2_N2_IP6}/64 nodad
+
+ # Wait for ip config to settle
+ sleep 2
+}
+
+check_connectivity()
+{
+ ip netns exec h1 ping -c1 -w1 ${H2_N2_IP} >/dev/null 2>&1
+ log_test $? 0 "Basic IPv4 connectivity"
+ return $?
+}
+
+check_connectivity6()
+{
+ ip netns exec h1 "${ping6}" -c1 -w1 ${H2_N2_IP6} >/dev/null 2>&1
+ log_test $? 0 "Basic IPv6 connectivity"
+ return $?
+}
+
+check_traceroute()
+{
+ if [ ! -x "$(command -v traceroute)" ]; then
+ echo "SKIP: Could not run IPV4 test without traceroute"
+ return 1
+ fi
+}
+
+check_traceroute6()
+{
+ if [ ! -x "$(command -v traceroute6)" ]; then
+ echo "SKIP: Could not run IPV6 test without traceroute6"
+ return 1
+ fi
+}
+
+ipv4_traceroute()
+{
+ local ttype="$1"
+
+ [ "x$ttype" = "x" ] && ttype="$DEFAULT_TTYPE"
+
+ log_section "IPv4 ($ttype route): VRF ICMP error route lookup traceroute"
+
+ check_traceroute || return
+
+ setup_"$ttype"
+
+ check_connectivity || return
+
+ run_cmd_grep "${R1_N1_IP}" ip netns exec h1 traceroute ${H2_N2_IP}
+ log_test $? 0 "Traceroute reports a hop on r1"
+}
+
+ipv4_traceroute_asym()
+{
+ ipv4_traceroute asym
+}
+
+ipv6_traceroute()
+{
+ local ttype="$1"
+
+ [ "x$ttype" = "x" ] && ttype="$DEFAULT_TTYPE"
+
+ log_section "IPv6 ($ttype route): VRF ICMP error route lookup traceroute"
+
+ check_traceroute6 || return
+
+ setup_"$ttype"
+
+ check_connectivity6 || return
+
+ run_cmd_grep "${R1_N1_IP6}" ip netns exec h1 traceroute6 ${H2_N2_IP6}
+ log_test $? 0 "Traceroute6 reports a hop on r1"
+}
+
+ipv6_traceroute_asym()
+{
+ ipv6_traceroute asym
+}
+
+ipv4_ping_ttl()
+{
+ local ttype="$1"
+
+ [ "x$ttype" = "x" ] && ttype="$DEFAULT_TTYPE"
+
+ log_section "IPv4 ($ttype route): VRF ICMP ttl error route lookup ping"
+
+ setup_"$ttype"
+
+ check_connectivity || return
+
+ run_cmd_grep "Time to live exceeded" ip netns exec h1 ping -t1 -c1 -W2 ${H2_N2_IP}
+ log_test $? 0 "Ping received ICMP ttl exceeded"
+}
+
+ipv4_ping_ttl_asym()
+{
+ ipv4_ping_ttl asym
+}
+
+ipv4_ping_frag()
+{
+ local ttype="$1"
+
+ [ "x$ttype" = "x" ] && ttype="$DEFAULT_TTYPE"
+
+ log_section "IPv4 ($ttype route): VRF ICMP fragmentation error route lookup ping"
+
+ setup_"$ttype"
+
+ check_connectivity || return
+
+ run_cmd_grep "Frag needed" ip netns exec h1 ping -s 1450 -Mdo -c1 -W2 ${H2_N2_IP}
+ log_test $? 0 "Ping received ICMP Frag needed"
+}
+
+ipv4_ping_frag_asym()
+{
+ ipv4_ping_frag asym
+}
+
+ipv6_ping_ttl()
+{
+ local ttype="$1"
+
+ [ "x$ttype" = "x" ] && ttype="$DEFAULT_TTYPE"
+
+ log_section "IPv6 ($ttype route): VRF ICMP ttl error route lookup ping"
+
+ setup_"$ttype"
+
+ check_connectivity6 || return
+
+ run_cmd_grep "Time exceeded: Hop limit" ip netns exec h1 "${ping6}" -t1 -c1 -W2 ${H2_N2_IP6}
+ log_test $? 0 "Ping received ICMP Hop limit"
+}
+
+ipv6_ping_ttl_asym()
+{
+ ipv6_ping_ttl asym
+}
+
+ipv6_ping_frag()
+{
+ local ttype="$1"
+
+ [ "x$ttype" = "x" ] && ttype="$DEFAULT_TTYPE"
+
+ log_section "IPv6 ($ttype route): VRF ICMP fragmentation error route lookup ping"
+
+ setup_"$ttype"
+
+ check_connectivity6 || return
+
+ run_cmd_grep "Packet too big" ip netns exec h1 "${ping6}" -s 1450 -Mdo -c1 -W2 ${H2_N2_IP6}
+ log_test $? 0 "Ping received ICMP Packet too big"
+}
+
+ipv6_ping_frag_asym()
+{
+ ipv6_ping_frag asym
+}
+
+################################################################################
+# usage
+
+usage()
+{
+ cat <<EOF
+usage: ${0##*/} OPTS
+
+ -4 Run IPv4 tests only
+ -6 Run IPv6 tests only
+ -t TEST Run only TEST
+ -p Pause on fail
+ -v verbose mode (show commands and output)
+EOF
+}
+
+################################################################################
+# main
+
+# Some systems don't have a ping6 binary anymore
+command -v ping6 > /dev/null 2>&1 && ping6=$(command -v ping6) || ping6=$(command -v ping)
+
+TESTS_IPV4="ipv4_ping_ttl ipv4_traceroute ipv4_ping_frag ipv4_ping_ttl_asym ipv4_traceroute_asym"
+TESTS_IPV6="ipv6_ping_ttl ipv6_traceroute ipv6_ping_frag ipv6_ping_ttl_asym ipv6_traceroute_asym"
+
+ret=0
+nsuccess=0
+nfail=0
+
+while getopts :46t:pvh o
+do
+ case $o in
+ 4) TESTS=ipv4;;
+ 6) TESTS=ipv6;;
+ t) TESTS=$OPTARG;;
+ p) PAUSE_ON_FAIL=yes;;
+ v) VERBOSE=1;;
+ h) usage; exit 0;;
+ *) usage; exit 1;;
+ esac
+done
+
+#
+# show user test config
+#
+if [ -z "$TESTS" ]; then
+ TESTS="$TESTS_IPV4 $TESTS_IPV6"
+elif [ "$TESTS" = "ipv4" ]; then
+ TESTS="$TESTS_IPV4"
+elif [ "$TESTS" = "ipv6" ]; then
+ TESTS="$TESTS_IPV6"
+fi
+
+for t in $TESTS
+do
+ case $t in
+ ipv4_ping_ttl|ping) ipv4_ping_ttl;;&
+ ipv4_ping_ttl_asym|ping) ipv4_ping_ttl_asym;;&
+ ipv4_traceroute|traceroute) ipv4_traceroute;;&
+ ipv4_traceroute_asym|traceroute) ipv4_traceroute_asym;;&
+ ipv4_ping_frag|ping) ipv4_ping_frag;;&
+
+ ipv6_ping_ttl|ping) ipv6_ping_ttl;;&
+ ipv6_ping_ttl_asym|ping) ipv6_ping_ttl_asym;;&
+ ipv6_traceroute|traceroute) ipv6_traceroute;;&
+ ipv6_traceroute_asym|traceroute) ipv6_traceroute_asym;;&
+ ipv6_ping_frag|ping) ipv6_ping_frag;;&
+
+ # setup namespaces and config, but do not run any tests
+ setup_sym|setup) setup_sym; exit 0;;
+ setup_asym) setup_asym; exit 0;;
+
+ help) echo "Test names: $TESTS"; exit 0;;
+ esac
+done
+
+cleanup
+
+printf "\nTests passed: %3d\n" ${nsuccess}
+printf "Tests failed: %3d\n" ${nfail}
+
+exit $ret
diff --git a/tools/testing/selftests/netfilter/nf-queue.c b/tools/testing/selftests/netfilter/nf-queue.c
index 29c73bc..9e56b9d 100644
--- a/tools/testing/selftests/netfilter/nf-queue.c
+++ b/tools/testing/selftests/netfilter/nf-queue.c
@@ -17,9 +17,12 @@
struct options {
bool count_packets;
+ bool gso_enabled;
int verbose;
unsigned int queue_num;
unsigned int timeout;
+ uint32_t verdict;
+ uint32_t delay_ms;
};
static unsigned int queue_stats[5];
@@ -27,7 +30,7 @@
static void help(const char *p)
{
- printf("Usage: %s [-c|-v [-vv] ] [-t timeout] [-q queue_num]\n", p);
+ printf("Usage: %s [-c|-v [-vv] ] [-t timeout] [-q queue_num] [-Qdst_queue ] [ -d ms_delay ] [-G]\n", p);
}
static int parse_attr_cb(const struct nlattr *attr, void *data)
@@ -162,7 +165,7 @@
}
static struct nlmsghdr *
-nfq_build_verdict(char *buf, int id, int queue_num, int verd)
+nfq_build_verdict(char *buf, int id, int queue_num, uint32_t verd)
{
struct nfqnl_msg_verdict_hdr vh = {
.verdict = htonl(verd),
@@ -189,9 +192,6 @@
unsigned int last, total;
int i;
- if (!opts.count_packets)
- return;
-
total = 0;
last = queue_stats[0];
@@ -234,7 +234,8 @@
nlh = nfq_build_cfg_params(buf, NFQNL_COPY_PACKET, 0xFFFF, queue_num);
- flags = NFQA_CFG_F_GSO | NFQA_CFG_F_UID_GID;
+ flags = opts.gso_enabled ? NFQA_CFG_F_GSO : 0;
+ flags |= NFQA_CFG_F_UID_GID;
mnl_attr_put_u32(nlh, NFQA_CFG_FLAGS, htonl(flags));
mnl_attr_put_u32(nlh, NFQA_CFG_MASK, htonl(flags));
@@ -255,6 +256,17 @@
return nl;
}
+static void sleep_ms(uint32_t delay)
+{
+ struct timespec ts = { .tv_sec = delay / 1000 };
+
+ delay %= 1000;
+
+ ts.tv_nsec = delay * 1000llu * 1000llu;
+
+ nanosleep(&ts, NULL);
+}
+
static int mainloop(void)
{
unsigned int buflen = 64 * 1024 + MNL_SOCKET_BUFFER_SIZE;
@@ -278,7 +290,7 @@
ret = mnl_socket_recvfrom(nl, buf, buflen);
if (ret == -1) {
- if (errno == ENOBUFS)
+ if (errno == ENOBUFS || errno == EINTR)
continue;
if (errno == EAGAIN) {
@@ -298,7 +310,10 @@
}
id = ret - MNL_CB_OK;
- nlh = nfq_build_verdict(buf, id, opts.queue_num, NF_ACCEPT);
+ if (opts.delay_ms)
+ sleep_ms(opts.delay_ms);
+
+ nlh = nfq_build_verdict(buf, id, opts.queue_num, opts.verdict);
if (mnl_socket_sendto(nl, nlh, nlh->nlmsg_len) < 0) {
perror("mnl_socket_sendto");
exit(EXIT_FAILURE);
@@ -314,7 +329,7 @@
{
int c;
- while ((c = getopt(argc, argv, "chvt:q:")) != -1) {
+ while ((c = getopt(argc, argv, "chvt:q:Q:d:G")) != -1) {
switch (c) {
case 'c':
opts.count_packets = true;
@@ -328,20 +343,48 @@
if (opts.queue_num > 0xffff)
opts.queue_num = 0;
break;
+ case 'Q':
+ opts.verdict = atoi(optarg);
+ if (opts.verdict > 0xffff) {
+ fprintf(stderr, "Expected destination queue number\n");
+ exit(1);
+ }
+
+ opts.verdict <<= 16;
+ opts.verdict |= NF_QUEUE;
+ break;
+ case 'd':
+ opts.delay_ms = atoi(optarg);
+ if (opts.delay_ms == 0) {
+ fprintf(stderr, "Expected nonzero delay (in milliseconds)\n");
+ exit(1);
+ }
+ break;
case 't':
opts.timeout = atoi(optarg);
break;
+ case 'G':
+ opts.gso_enabled = false;
+ break;
case 'v':
opts.verbose++;
break;
}
}
+
+ if (opts.verdict != NF_ACCEPT && (opts.verdict >> 16 == opts.queue_num)) {
+ fprintf(stderr, "Cannot use same destination and source queue\n");
+ exit(1);
+ }
}
int main(int argc, char *argv[])
{
int ret;
+ opts.verdict = NF_ACCEPT;
+ opts.gso_enabled = true;
+
parse_opts(argc, argv);
ret = mainloop();
diff --git a/tools/testing/selftests/netfilter/nft_meta.sh b/tools/testing/selftests/netfilter/nft_meta.sh
index d250b84..087f0e6 100755
--- a/tools/testing/selftests/netfilter/nft_meta.sh
+++ b/tools/testing/selftests/netfilter/nft_meta.sh
@@ -7,8 +7,7 @@
sfx=$(mktemp -u "XXXXXXXX")
ns0="ns0-$sfx"
-nft --version > /dev/null 2>&1
-if [ $? -ne 0 ];then
+if ! nft --version > /dev/null 2>&1; then
echo "SKIP: Could not run test without nft tool"
exit $ksft_skip
fi
@@ -24,6 +23,8 @@
trap cleanup EXIT
+currentyear=$(date +%G)
+lastyear=$((currentyear-1))
ip netns exec "$ns0" nft -f /dev/stdin <<EOF
table inet filter {
counter iifcount {}
@@ -33,6 +34,9 @@
counter infproto4count {}
counter il4protocounter {}
counter imarkcounter {}
+ counter icpu0counter {}
+ counter ilastyearcounter {}
+ counter icurrentyearcounter {}
counter oifcount {}
counter oifnamecount {}
@@ -54,6 +58,9 @@
meta nfproto ipv4 counter name "infproto4count"
meta l4proto icmp counter name "il4protocounter"
meta mark 42 counter name "imarkcounter"
+ meta cpu 0 counter name "icpu0counter"
+ meta time "$lastyear-01-01" - "$lastyear-12-31" counter name ilastyearcounter
+ meta time "$currentyear-01-01" - "$currentyear-12-31" counter name icurrentyearcounter
}
chain output {
@@ -84,11 +91,10 @@
local want="packets $2"
local verbose="$3"
- cnt=$(ip netns exec "$ns0" nft list counter inet filter $cname | grep -q "$want")
- if [ $? -ne 0 ];then
+ if ! ip netns exec "$ns0" nft list counter inet filter $cname | grep -q "$want"; then
echo "FAIL: $cname, want \"$want\", got"
ret=1
- ip netns exec "$ns0" nft list counter inet filter $counter
+ ip netns exec "$ns0" nft list counter inet filter $cname
fi
}
@@ -100,8 +106,7 @@
for counter in iifcount iifnamecount iifgroupcount iiftypecount infproto4count \
oifcount oifnamecount oifgroupcount oiftypecount onfproto4count \
- il4protocounter \
- ol4protocounter \
+ il4protocounter icurrentyearcounter ol4protocounter \
; do
check_one_counter "$counter" "$want" "$verbose"
done
@@ -116,9 +121,22 @@
check_one_counter oskgidcounter "1" true
check_one_counter imarkcounter "1" true
check_one_counter omarkcounter "1" true
+check_one_counter ilastyearcounter "0" true
if [ $ret -eq 0 ];then
echo "OK: nftables meta iif/oif counters at expected values"
+else
+ exit $ret
+fi
+
+#First CPU execution and counter
+taskset -p 01 $$ > /dev/null
+ip netns exec "$ns0" nft reset counters > /dev/null
+ip netns exec "$ns0" ping -q -c 1 127.0.0.1 > /dev/null
+check_one_counter icpu0counter "2" true
+
+if [ $ret -eq 0 ];then
+ echo "OK: nftables meta cpu counter at expected values"
fi
exit $ret
diff --git a/tools/testing/selftests/netfilter/nft_queue.sh b/tools/testing/selftests/netfilter/nft_queue.sh
index 6898448..3d202b9 100755
--- a/tools/testing/selftests/netfilter/nft_queue.sh
+++ b/tools/testing/selftests/netfilter/nft_queue.sh
@@ -12,6 +12,7 @@
ns1="ns1-$sfx"
ns2="ns2-$sfx"
nsrouter="nsrouter-$sfx"
+timeout=4
cleanup()
{
@@ -20,6 +21,7 @@
ip netns del ${nsrouter}
rm -f "$TMPFILE0"
rm -f "$TMPFILE1"
+ rm -f "$TMPFILE2" "$TMPFILE3"
}
nft --version > /dev/null 2>&1
@@ -42,6 +44,8 @@
TMPFILE0=$(mktemp)
TMPFILE1=$(mktemp)
+TMPFILE2=$(mktemp)
+TMPFILE3=$(mktemp)
trap cleanup EXIT
ip netns add ${ns1}
@@ -83,7 +87,7 @@
local name=$1
local prio=$2
-ip netns exec ${nsrouter} nft -f - <<EOF
+ip netns exec ${nsrouter} nft -f /dev/stdin <<EOF
table inet $name {
chain nfq {
ip protocol icmp queue bypass
@@ -118,7 +122,7 @@
load_counter_ruleset() {
local prio=$1
-ip netns exec ${nsrouter} nft -f - <<EOF
+ip netns exec ${nsrouter} nft -f /dev/stdin <<EOF
table inet countrules {
chain pre {
type filter hook prerouting priority $prio; policy accept;
@@ -175,7 +179,7 @@
test_queue_blackhole() {
local proto=$1
-ip netns exec ${nsrouter} nft -f - <<EOF
+ip netns exec ${nsrouter} nft -f /dev/stdin <<EOF
table $proto blackh {
chain forward {
type filter hook forward priority 0; policy accept;
@@ -184,10 +188,10 @@
}
EOF
if [ $proto = "ip" ] ;then
- ip netns exec ${ns1} ping -c 1 -q 10.0.2.99 > /dev/null
+ ip netns exec ${ns1} ping -W 2 -c 1 -q 10.0.2.99 > /dev/null
lret=$?
elif [ $proto = "ip6" ]; then
- ip netns exec ${ns1} ping -c 1 -q dead:2::99 > /dev/null
+ ip netns exec ${ns1} ping -W 2 -c 1 -q dead:2::99 > /dev/null
lret=$?
else
lret=111
@@ -214,8 +218,8 @@
local last=""
# spawn nf-queue listeners
- ip netns exec ${nsrouter} ./nf-queue -c -q 0 -t 3 > "$TMPFILE0" &
- ip netns exec ${nsrouter} ./nf-queue -c -q 1 -t 3 > "$TMPFILE1" &
+ ip netns exec ${nsrouter} ./nf-queue -c -q 0 -t $timeout > "$TMPFILE0" &
+ ip netns exec ${nsrouter} ./nf-queue -c -q 1 -t $timeout > "$TMPFILE1" &
sleep 1
test_ping
ret=$?
@@ -250,11 +254,11 @@
test_tcp_forward()
{
- ip netns exec ${nsrouter} ./nf-queue -q 2 -t 10 &
+ ip netns exec ${nsrouter} ./nf-queue -q 2 -t $timeout &
local nfqpid=$!
tmpfile=$(mktemp) || exit 1
- dd conv=sparse status=none if=/dev/zero bs=1M count=100 of=$tmpfile
+ dd conv=sparse status=none if=/dev/zero bs=1M count=200 of=$tmpfile
ip netns exec ${ns2} nc -w 5 -l -p 12345 <"$tmpfile" >/dev/null &
local rpid=$!
@@ -270,15 +274,13 @@
test_tcp_localhost()
{
- tc -net "${nsrouter}" qdisc add dev lo root netem loss random 1%
-
tmpfile=$(mktemp) || exit 1
- dd conv=sparse status=none if=/dev/zero bs=1M count=900 of=$tmpfile
+ dd conv=sparse status=none if=/dev/zero bs=1M count=200 of=$tmpfile
ip netns exec ${nsrouter} nc -w 5 -l -p 12345 <"$tmpfile" >/dev/null &
local rpid=$!
- ip netns exec ${nsrouter} ./nf-queue -q 3 -t 30 &
+ ip netns exec ${nsrouter} ./nf-queue -q 3 -t $timeout &
local nfqpid=$!
sleep 1
@@ -287,6 +289,47 @@
wait $rpid
[ $? -eq 0 ] && echo "PASS: tcp via loopback"
+ wait 2>/dev/null
+}
+
+test_tcp_localhost_requeue()
+{
+ip netns exec ${nsrouter} nft -f /dev/stdin <<EOF
+flush ruleset
+table inet filter {
+ chain output {
+ type filter hook output priority 0; policy accept;
+ tcp dport 12345 limit rate 1/second burst 1 packets counter queue num 0
+ }
+ chain post {
+ type filter hook postrouting priority 0; policy accept;
+ tcp dport 12345 limit rate 1/second burst 1 packets counter queue num 0
+ }
+}
+EOF
+ tmpfile=$(mktemp) || exit 1
+ dd conv=sparse status=none if=/dev/zero bs=1M count=200 of=$tmpfile
+ ip netns exec ${nsrouter} nc -w 5 -l -p 12345 <"$tmpfile" >/dev/null &
+ local rpid=$!
+
+ ip netns exec ${nsrouter} ./nf-queue -c -q 1 -t $timeout > "$TMPFILE2" &
+
+ # nfqueue 1 will be called via output hook. But this time,
+ # re-queue the packet to nfqueue program on queue 2.
+ ip netns exec ${nsrouter} ./nf-queue -G -d 150 -c -q 0 -Q 1 -t $timeout > "$TMPFILE3" &
+
+ sleep 1
+ ip netns exec ${nsrouter} nc -w 5 127.0.0.1 12345 <"$tmpfile" > /dev/null
+ rm -f "$tmpfile"
+
+ wait
+
+ if ! diff -u "$TMPFILE2" "$TMPFILE3" ; then
+ echo "FAIL: lost packets during requeue?!" 1>&2
+ return
+ fi
+
+ echo "PASS: tcp via loopback and re-queueing"
}
ip netns exec ${nsrouter} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
@@ -328,5 +371,6 @@
test_tcp_forward
test_tcp_localhost
+test_tcp_localhost_requeue
exit $ret