Merge branch 'for-davem' of git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless-next
John W. Linville says:
====================
This is a batch of updates intended for the 3.13 stream...
The biggest item of interest in here is wcn36xx, the new mac80211
driver for Qualcomm WCN3660/WCN3680 hardware.
Regarding the mac80211 bits, Johannes says:
"We have an assortment of cleanups and new features, of which the
biggest one is probably the channel-switch support in IBSS. Nothing
else really stands out much."
On top of that, the ath9k and rt2x00 get a lot of update action from
Felix Fietkau and Gabor Juhos, respectively. There are a handful of
updates to other drivers here and there as well.
Please let me know if there are problems!
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/drivers/isdn/hardware/eicon/divasmain.c b/drivers/isdn/hardware/eicon/divasmain.c
index 52377b4..a2e0ed6 100644
--- a/drivers/isdn/hardware/eicon/divasmain.c
+++ b/drivers/isdn/hardware/eicon/divasmain.c
@@ -481,7 +481,7 @@
int diva_os_register_irq(void *context, byte irq, const char *name)
{
int result = request_irq(irq, diva_os_irq_wrapper,
- IRQF_DISABLED | IRQF_SHARED, name, context);
+ IRQF_SHARED, name, context);
return (result);
}
diff --git a/drivers/isdn/sc/init.c b/drivers/isdn/sc/init.c
index ca997bd..92acc81 100644
--- a/drivers/isdn/sc/init.c
+++ b/drivers/isdn/sc/init.c
@@ -336,7 +336,7 @@
*/
sc_adapter[cinst]->interrupt = irq[b];
if (request_irq(sc_adapter[cinst]->interrupt, interrupt_handler,
- IRQF_DISABLED, interface->id,
+ 0, interface->id,
(void *)(unsigned long) cinst))
{
kfree(sc_adapter[cinst]->channel);
diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
index ea3e64e..187b1b7 100644
--- a/drivers/net/bonding/bond_3ad.c
+++ b/drivers/net/bonding/bond_3ad.c
@@ -2344,7 +2344,7 @@
struct slave *slave;
struct port *port;
- bond_for_each_slave(bond, slave, iter) {
+ bond_for_each_slave_rcu(bond, slave, iter) {
port = &(SLAVE_AD_INFO(slave).port);
if (port->aggregator && port->aggregator->is_active) {
aggregator = port->aggregator;
@@ -2369,9 +2369,9 @@
{
int ret;
- read_lock(&bond->lock);
+ rcu_read_lock();
ret = __bond_3ad_get_active_agg_info(bond, ad_info);
- read_unlock(&bond->lock);
+ rcu_read_unlock();
return ret;
}
@@ -2388,7 +2388,6 @@
int res = 1;
int agg_id;
- read_lock(&bond->lock);
if (__bond_3ad_get_active_agg_info(bond, &ad_info)) {
pr_debug("%s: Error: __bond_3ad_get_active_agg_info failed\n",
dev->name);
@@ -2406,7 +2405,7 @@
slave_agg_no = bond_xmit_hash(bond, skb, slaves_in_agg);
first_ok_slave = NULL;
- bond_for_each_slave(bond, slave, iter) {
+ bond_for_each_slave_rcu(bond, slave, iter) {
agg = SLAVE_AD_INFO(slave).port.aggregator;
if (!agg || agg->aggregator_identifier != agg_id)
continue;
@@ -2436,7 +2435,6 @@
res = bond_dev_queue_xmit(bond, skb, first_ok_slave->dev);
out:
- read_unlock(&bond->lock);
if (res) {
/* no suitable interface, frame not sent */
kfree_skb(skb);
diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
index 576ccea..0287240 100644
--- a/drivers/net/bonding/bond_alb.c
+++ b/drivers/net/bonding/bond_alb.c
@@ -230,7 +230,7 @@
max_gap = LLONG_MIN;
/* Find the slave with the largest gap */
- bond_for_each_slave(bond, slave, iter) {
+ bond_for_each_slave_rcu(bond, slave, iter) {
if (SLAVE_IS_OK(slave)) {
long long gap = compute_gap(slave);
@@ -412,6 +412,39 @@
return rx_slave;
}
+/* Caller must hold rcu_read_lock() for read */
+static struct slave *__rlb_next_rx_slave(struct bonding *bond)
+{
+ struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
+ struct slave *before = NULL, *rx_slave = NULL, *slave;
+ struct list_head *iter;
+ bool found = false;
+
+ bond_for_each_slave_rcu(bond, slave, iter) {
+ if (!SLAVE_IS_OK(slave))
+ continue;
+ if (!found) {
+ if (!before || before->speed < slave->speed)
+ before = slave;
+ } else {
+ if (!rx_slave || rx_slave->speed < slave->speed)
+ rx_slave = slave;
+ }
+ if (slave == bond_info->rx_slave)
+ found = true;
+ }
+ /* we didn't find anything after the current or we have something
+ * better before and up to the current slave
+ */
+ if (!rx_slave || (before && rx_slave->speed < before->speed))
+ rx_slave = before;
+
+ if (rx_slave)
+ bond_info->rx_slave = rx_slave;
+
+ return rx_slave;
+}
+
/* teach the switch the mac of a disabled slave
* on the primary for fault tolerance
*
@@ -628,12 +661,14 @@
{
struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
struct arp_pkt *arp = arp_pkt(skb);
- struct slave *assigned_slave;
+ struct slave *assigned_slave, *curr_active_slave;
struct rlb_client_info *client_info;
u32 hash_index = 0;
_lock_rx_hashtbl(bond);
+ curr_active_slave = rcu_dereference(bond->curr_active_slave);
+
hash_index = _simple_hash((u8 *)&arp->ip_dst, sizeof(arp->ip_dst));
client_info = &(bond_info->rx_hashtbl[hash_index]);
@@ -658,14 +693,14 @@
* that the new client can be assigned to this entry.
*/
if (bond->curr_active_slave &&
- client_info->slave != bond->curr_active_slave) {
- client_info->slave = bond->curr_active_slave;
+ client_info->slave != curr_active_slave) {
+ client_info->slave = curr_active_slave;
rlb_update_client(client_info);
}
}
}
/* assign a new slave */
- assigned_slave = rlb_next_rx_slave(bond);
+ assigned_slave = __rlb_next_rx_slave(bond);
if (assigned_slave) {
if (!(client_info->assigned &&
@@ -728,7 +763,7 @@
/* Don't modify or load balance ARPs that do not originate locally
* (e.g.,arrive via a bridge).
*/
- if (!bond_slave_has_mac(bond, arp->mac_src))
+ if (!bond_slave_has_mac_rcu(bond, arp->mac_src))
return NULL;
if (arp->op_code == htons(ARPOP_REPLY)) {
@@ -1343,11 +1378,6 @@
skb_reset_mac_header(skb);
eth_data = eth_hdr(skb);
- /* make sure that the curr_active_slave do not change during tx
- */
- read_lock(&bond->lock);
- read_lock(&bond->curr_slave_lock);
-
switch (ntohs(skb->protocol)) {
case ETH_P_IP: {
const struct iphdr *iph = ip_hdr(skb);
@@ -1429,12 +1459,12 @@
if (!tx_slave) {
/* unbalanced or unassigned, send through primary */
- tx_slave = bond->curr_active_slave;
+ tx_slave = rcu_dereference(bond->curr_active_slave);
bond_info->unbalanced_load += skb->len;
}
if (tx_slave && SLAVE_IS_OK(tx_slave)) {
- if (tx_slave != bond->curr_active_slave) {
+ if (tx_slave != rcu_dereference(bond->curr_active_slave)) {
memcpy(eth_data->h_source,
tx_slave->dev->dev_addr,
ETH_ALEN);
@@ -1449,8 +1479,6 @@
}
}
- read_unlock(&bond->curr_slave_lock);
- read_unlock(&bond->lock);
if (res) {
/* no suitable interface, frame not sent */
kfree_skb(skb);
diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c
index e924952..03bed0c 100644
--- a/drivers/net/bonding/bond_sysfs.c
+++ b/drivers/net/bonding/bond_sysfs.c
@@ -179,7 +179,9 @@
struct slave *slave;
int res = 0;
- read_lock(&bond->lock);
+ if (!rtnl_trylock())
+ return restart_syscall();
+
bond_for_each_slave(bond, slave, iter) {
if (res > (PAGE_SIZE - IFNAMSIZ)) {
/* not enough space for another interface name */
@@ -190,7 +192,9 @@
}
res += sprintf(buf + res, "%s ", slave->dev->name);
}
- read_unlock(&bond->lock);
+
+ rtnl_unlock();
+
if (res)
buf[res-1] = '\n'; /* eat the leftover space */
@@ -626,6 +630,9 @@
unsigned long *targets_rx;
int ind, i, j, ret = -EINVAL;
+ if (!rtnl_trylock())
+ return restart_syscall();
+
targets = bond->params.arp_targets;
newtarget = in_aton(buf + 1);
/* look for adds */
@@ -699,6 +706,7 @@
ret = count;
out:
+ rtnl_unlock();
return ret;
}
static DEVICE_ATTR(arp_ip_target, S_IRUGO | S_IWUSR , bonding_show_arp_targets, bonding_store_arp_targets);
@@ -1467,7 +1475,6 @@
if (!rtnl_trylock())
return restart_syscall();
- read_lock(&bond->lock);
bond_for_each_slave(bond, slave, iter) {
if (res > (PAGE_SIZE - IFNAMSIZ - 6)) {
/* not enough space for another interface_name:queue_id pair */
@@ -1479,9 +1486,9 @@
res += sprintf(buf + res, "%s:%d ",
slave->dev->name, slave->queue_id);
}
- read_unlock(&bond->lock);
if (res)
buf[res-1] = '\n'; /* eat the leftover space */
+
rtnl_unlock();
return res;
@@ -1530,8 +1537,6 @@
if (!sdev)
goto err_no_cmd;
- read_lock(&bond->lock);
-
/* Search for thes slave and check for duplicate qids */
update_slave = NULL;
bond_for_each_slave(bond, slave, iter) {
@@ -1542,23 +1547,20 @@
*/
update_slave = slave;
else if (qid && qid == slave->queue_id) {
- goto err_no_cmd_unlock;
+ goto err_no_cmd;
}
}
if (!update_slave)
- goto err_no_cmd_unlock;
+ goto err_no_cmd;
/* Actually set the qids for the slave */
update_slave->queue_id = qid;
- read_unlock(&bond->lock);
out:
rtnl_unlock();
return ret;
-err_no_cmd_unlock:
- read_unlock(&bond->lock);
err_no_cmd:
pr_info("invalid input for queue_id set for %s.\n",
bond->dev->name);
@@ -1591,6 +1593,9 @@
struct list_head *iter;
struct slave *slave;
+ if (!rtnl_trylock())
+ return restart_syscall();
+
if (sscanf(buf, "%d", &new_value) != 1) {
pr_err("%s: no all_slaves_active value specified.\n",
bond->dev->name);
@@ -1610,7 +1615,6 @@
goto out;
}
- read_lock(&bond->lock);
bond_for_each_slave(bond, slave, iter) {
if (!bond_is_active_slave(slave)) {
if (new_value)
@@ -1619,8 +1623,8 @@
slave->inactive = 1;
}
}
- read_unlock(&bond->lock);
out:
+ rtnl_unlock();
return ret;
}
static DEVICE_ATTR(all_slaves_active, S_IRUGO | S_IWUSR,
diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
index 0bd04fb..bb5c731e 100644
--- a/drivers/net/bonding/bonding.h
+++ b/drivers/net/bonding/bonding.h
@@ -464,6 +464,20 @@
return NULL;
}
+/* Caller must hold rcu_read_lock() for read */
+static inline struct slave *bond_slave_has_mac_rcu(struct bonding *bond,
+ const u8 *mac)
+{
+ struct list_head *iter;
+ struct slave *tmp;
+
+ bond_for_each_slave_rcu(bond, tmp, iter)
+ if (ether_addr_equal_64bits(mac, tmp->dev->dev_addr))
+ return tmp;
+
+ return NULL;
+}
+
/* Check if the ip is present in arp ip list, or first free slot if ip == 0
* Returns -1 if not found, index if found
*/
diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c
index ea20182..735765c 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
@@ -2253,7 +2253,6 @@
int mlx4_set_vf_vlan(struct mlx4_dev *dev, int port, int vf, u16 vlan, u8 qos)
{
struct mlx4_priv *priv = mlx4_priv(dev);
- struct mlx4_vport_oper_state *vf_oper;
struct mlx4_vport_state *vf_admin;
int slave;
@@ -2269,7 +2268,6 @@
return -EINVAL;
vf_admin = &priv->mfunc.master.vf_admin[slave].vport[port];
- vf_oper = &priv->mfunc.master.vf_oper[slave].vport[port];
if ((0 == vlan) && (0 == qos))
vf_admin->default_vlan = MLX4_VGT;
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index fa37b7a..85d9166 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -1733,7 +1733,7 @@
/* Unregister Mac address for the port */
mlx4_en_put_qp(priv);
- if (!(mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAGS2_REASSIGN_MAC_EN))
+ if (!(mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_REASSIGN_MAC_EN))
mdev->mac_removed[priv->port] = 1;
/* Free RX Rings */
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index 0d63daa..c151e7a 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -652,7 +652,7 @@
QUERY_DEV_CAP_RSVD_LKEY_OFFSET);
MLX4_GET(field, outbox, QUERY_DEV_CAP_FW_REASSIGN_MAC);
if (field & 1<<6)
- dev_cap->flags2 |= MLX4_DEV_CAP_FLAGS2_REASSIGN_MAC_EN;
+ dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_REASSIGN_MAC_EN;
MLX4_GET(dev_cap->max_icm_sz, outbox,
QUERY_DEV_CAP_MAX_ICM_SZ_OFFSET);
if (dev_cap->flags & MLX4_DEV_CAP_FLAG_COUNTERS)
@@ -1713,7 +1713,6 @@
u32 *outbox;
u32 modifier;
u16 token;
- u16 type_m;
u16 type;
int err;
u32 num_qps;
@@ -1746,7 +1745,6 @@
MLX4_GET(modifier, outbox, GET_OP_REQ_MODIFIER_OFFSET);
MLX4_GET(token, outbox, GET_OP_REQ_TOKEN_OFFSET);
MLX4_GET(type, outbox, GET_OP_REQ_TYPE_OFFSET);
- type_m = type >> 12;
type &= 0xfff;
switch (type) {
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 60c9f4f..179d267 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -42,6 +42,7 @@
#include <linux/io-mapping.h>
#include <linux/delay.h>
#include <linux/netdevice.h>
+#include <linux/kmod.h>
#include <linux/mlx4/device.h>
#include <linux/mlx4/doorbell.h>
@@ -650,6 +651,27 @@
return err;
}
+static void mlx4_request_modules(struct mlx4_dev *dev)
+{
+ int port;
+ int has_ib_port = false;
+ int has_eth_port = false;
+#define EN_DRV_NAME "mlx4_en"
+#define IB_DRV_NAME "mlx4_ib"
+
+ for (port = 1; port <= dev->caps.num_ports; port++) {
+ if (dev->caps.port_type[port] == MLX4_PORT_TYPE_IB)
+ has_ib_port = true;
+ else if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH)
+ has_eth_port = true;
+ }
+
+ if (has_ib_port)
+ request_module_nowait(IB_DRV_NAME);
+ if (has_eth_port)
+ request_module_nowait(EN_DRV_NAME);
+}
+
/*
* Change the port configuration of the device.
* Every user of this function must hold the port mutex.
@@ -681,6 +703,11 @@
}
mlx4_set_port_mask(dev);
err = mlx4_register_device(dev);
+ if (err) {
+ mlx4_err(dev, "Failed to register device\n");
+ goto out;
+ }
+ mlx4_request_modules(dev);
}
out:
@@ -2305,6 +2332,8 @@
if (err)
goto err_port;
+ mlx4_request_modules(dev);
+
mlx4_sense_init(dev);
mlx4_start_sense(dev);
diff --git a/drivers/net/ethernet/mellanox/mlx4/mcg.c b/drivers/net/ethernet/mellanox/mlx4/mcg.c
index 55f6245..70f0213 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mcg.c
+++ b/drivers/net/ethernet/mellanox/mlx4/mcg.c
@@ -645,7 +645,7 @@
int mlx4_map_sw_to_hw_steering_mode(struct mlx4_dev *dev,
enum mlx4_net_trans_promisc_mode flow_type)
{
- if (flow_type >= MLX4_FS_MODE_NUM || flow_type < 0) {
+ if (flow_type >= MLX4_FS_MODE_NUM) {
mlx4_err(dev, "Invalid flow type. type = %d\n", flow_type);
return -EINVAL;
}
@@ -681,7 +681,7 @@
int mlx4_map_sw_to_hw_steering_id(struct mlx4_dev *dev,
enum mlx4_net_trans_rule_id id)
{
- if (id >= MLX4_NET_TRANS_RULE_NUM || id < 0) {
+ if (id >= MLX4_NET_TRANS_RULE_NUM) {
mlx4_err(dev, "Invalid network rule id. id = %d\n", id);
return -EINVAL;
}
@@ -706,7 +706,7 @@
int mlx4_hw_rule_sz(struct mlx4_dev *dev,
enum mlx4_net_trans_rule_id id)
{
- if (id >= MLX4_NET_TRANS_RULE_NUM || id < 0) {
+ if (id >= MLX4_NET_TRANS_RULE_NUM) {
mlx4_err(dev, "Invalid network rule id. id = %d\n", id);
return -EINVAL;
}
diff --git a/drivers/net/ethernet/mellanox/mlx4/srq.c b/drivers/net/ethernet/mellanox/mlx4/srq.c
index 79fd269..9e08e35 100644
--- a/drivers/net/ethernet/mellanox/mlx4/srq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/srq.c
@@ -34,6 +34,7 @@
#include <linux/init.h>
#include <linux/mlx4/cmd.h>
+#include <linux/mlx4/srq.h>
#include <linux/export.h>
#include <linux/gfp.h>
diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
index 5715318..55b8dec 100644
--- a/drivers/net/xen-netback/common.h
+++ b/drivers/net/xen-netback/common.h
@@ -87,9 +87,13 @@
struct xenvif_rx_meta {
int id;
int size;
+ int gso_type;
int gso_size;
};
+#define GSO_BIT(type) \
+ (1 << XEN_NETIF_GSO_TYPE_ ## type)
+
/* Discriminate from any valid pending_idx value. */
#define INVALID_PENDING_IDX 0xFFFF
@@ -150,10 +154,12 @@
u8 fe_dev_addr[6];
/* Frontend feature information. */
+ int gso_mask;
+ int gso_prefix_mask;
+
u8 can_sg:1;
- u8 gso:1;
- u8 gso_prefix:1;
- u8 csum:1;
+ u8 ip_csum:1;
+ u8 ipv6_csum:1;
/* Internal feature information. */
u8 can_queue:1; /* can queue packets for receiver? */
diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
index 01bb854..e4aa267 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -214,10 +214,14 @@
if (!vif->can_sg)
features &= ~NETIF_F_SG;
- if (!vif->gso && !vif->gso_prefix)
+ if (~(vif->gso_mask | vif->gso_prefix_mask) & GSO_BIT(TCPV4))
features &= ~NETIF_F_TSO;
- if (!vif->csum)
+ if (~(vif->gso_mask | vif->gso_prefix_mask) & GSO_BIT(TCPV6))
+ features &= ~NETIF_F_TSO6;
+ if (!vif->ip_csum)
features &= ~NETIF_F_IP_CSUM;
+ if (!vif->ipv6_csum)
+ features &= ~NETIF_F_IPV6_CSUM;
return features;
}
@@ -306,7 +310,7 @@
vif->domid = domid;
vif->handle = handle;
vif->can_sg = 1;
- vif->csum = 1;
+ vif->ip_csum = 1;
vif->dev = dev;
vif->credit_bytes = vif->remaining_credit = ~0UL;
@@ -316,8 +320,10 @@
vif->credit_timeout.expires = jiffies;
dev->netdev_ops = &xenvif_netdev_ops;
- dev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO;
- dev->features = dev->hw_features;
+ dev->hw_features = NETIF_F_SG |
+ NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
+ NETIF_F_TSO | NETIF_F_TSO6;
+ dev->features = dev->hw_features | NETIF_F_RXCSUM;
SET_ETHTOOL_OPS(dev, &xenvif_ethtool_ops);
dev->tx_queue_len = XENVIF_QUEUE_LENGTH;
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index f3e591c..828fdab 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -109,15 +109,12 @@
return (unsigned long)pfn_to_kaddr(idx_to_pfn(vif, idx));
}
-/*
- * This is the amount of packet we copy rather than map, so that the
- * guest can't fiddle with the contents of the headers while we do
- * packet processing on them (netfilter, routing, etc).
+/* This is a miniumum size for the linear area to avoid lots of
+ * calls to __pskb_pull_tail() as we set up checksum offsets. The
+ * value 128 was chosen as it covers all IPv4 and most likely
+ * IPv6 headers.
*/
-#define PKT_PROT_LEN (ETH_HLEN + \
- VLAN_HLEN + \
- sizeof(struct iphdr) + MAX_IPOPTLEN + \
- sizeof(struct tcphdr) + MAX_TCP_OPTION_SPACE)
+#define PKT_PROT_LEN 128
static u16 frag_get_pending_idx(skb_frag_t *frag)
{
@@ -145,7 +142,7 @@
int max = DIV_ROUND_UP(vif->dev->mtu, PAGE_SIZE);
/* XXX FIXME: RX path dependent on MAX_SKB_FRAGS */
- if (vif->can_sg || vif->gso || vif->gso_prefix)
+ if (vif->can_sg || vif->gso_mask || vif->gso_prefix_mask)
max += MAX_SKB_FRAGS + 1; /* extra_info + frags */
return max;
@@ -317,6 +314,7 @@
req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++);
meta = npo->meta + npo->meta_prod++;
+ meta->gso_type = XEN_NETIF_GSO_TYPE_NONE;
meta->gso_size = 0;
meta->size = 0;
meta->id = req->id;
@@ -339,6 +337,7 @@
struct gnttab_copy *copy_gop;
struct xenvif_rx_meta *meta;
unsigned long bytes;
+ int gso_type;
/* Data must not cross a page boundary. */
BUG_ON(size + offset > PAGE_SIZE<<compound_order(page));
@@ -397,7 +396,14 @@
}
/* Leave a gap for the GSO descriptor. */
- if (*head && skb_shinfo(skb)->gso_size && !vif->gso_prefix)
+ if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4)
+ gso_type = XEN_NETIF_GSO_TYPE_TCPV4;
+ else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6)
+ gso_type = XEN_NETIF_GSO_TYPE_TCPV6;
+ else
+ gso_type = XEN_NETIF_GSO_TYPE_NONE;
+
+ if (*head && ((1 << gso_type) & vif->gso_mask))
vif->rx.req_cons++;
*head = 0; /* There must be something in this buffer now. */
@@ -428,14 +434,28 @@
unsigned char *data;
int head = 1;
int old_meta_prod;
+ int gso_type;
+ int gso_size;
old_meta_prod = npo->meta_prod;
+ if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) {
+ gso_type = XEN_NETIF_GSO_TYPE_TCPV4;
+ gso_size = skb_shinfo(skb)->gso_size;
+ } else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) {
+ gso_type = XEN_NETIF_GSO_TYPE_TCPV6;
+ gso_size = skb_shinfo(skb)->gso_size;
+ } else {
+ gso_type = XEN_NETIF_GSO_TYPE_NONE;
+ gso_size = 0;
+ }
+
/* Set up a GSO prefix descriptor, if necessary */
- if (skb_shinfo(skb)->gso_size && vif->gso_prefix) {
+ if ((1 << skb_shinfo(skb)->gso_type) & vif->gso_prefix_mask) {
req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++);
meta = npo->meta + npo->meta_prod++;
- meta->gso_size = skb_shinfo(skb)->gso_size;
+ meta->gso_type = gso_type;
+ meta->gso_size = gso_size;
meta->size = 0;
meta->id = req->id;
}
@@ -443,10 +463,13 @@
req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++);
meta = npo->meta + npo->meta_prod++;
- if (!vif->gso_prefix)
- meta->gso_size = skb_shinfo(skb)->gso_size;
- else
+ if ((1 << gso_type) & vif->gso_mask) {
+ meta->gso_type = gso_type;
+ meta->gso_size = gso_size;
+ } else {
+ meta->gso_type = XEN_NETIF_GSO_TYPE_NONE;
meta->gso_size = 0;
+ }
meta->size = 0;
meta->id = req->id;
@@ -592,7 +615,8 @@
vif = netdev_priv(skb->dev);
- if (vif->meta[npo.meta_cons].gso_size && vif->gso_prefix) {
+ if ((1 << vif->meta[npo.meta_cons].gso_type) &
+ vif->gso_prefix_mask) {
resp = RING_GET_RESPONSE(&vif->rx,
vif->rx.rsp_prod_pvt++);
@@ -629,7 +653,8 @@
vif->meta[npo.meta_cons].size,
flags);
- if (vif->meta[npo.meta_cons].gso_size && !vif->gso_prefix) {
+ if ((1 << vif->meta[npo.meta_cons].gso_type) &
+ vif->gso_mask) {
struct xen_netif_extra_info *gso =
(struct xen_netif_extra_info *)
RING_GET_RESPONSE(&vif->rx,
@@ -637,8 +662,8 @@
resp->flags |= XEN_NETRXF_extra_info;
+ gso->u.gso.type = vif->meta[npo.meta_cons].gso_type;
gso->u.gso.size = vif->meta[npo.meta_cons].gso_size;
- gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
gso->u.gso.pad = 0;
gso->u.gso.features = 0;
@@ -1101,15 +1126,20 @@
return -EINVAL;
}
- /* Currently only TCPv4 S.O. is supported. */
- if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) {
+ switch (gso->u.gso.type) {
+ case XEN_NETIF_GSO_TYPE_TCPV4:
+ skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
+ break;
+ case XEN_NETIF_GSO_TYPE_TCPV6:
+ skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
+ break;
+ default:
netdev_err(vif->dev, "Bad GSO type %d.\n", gso->u.gso.type);
xenvif_fatal_tx_err(vif);
return -EINVAL;
}
skb_shinfo(skb)->gso_size = gso->u.gso.size;
- skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
/* Header must be checked, and gso_segs computed. */
skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
@@ -1118,14 +1148,214 @@
return 0;
}
+static inline void maybe_pull_tail(struct sk_buff *skb, unsigned int len)
+{
+ if (skb_is_nonlinear(skb) && skb_headlen(skb) < len) {
+ /* If we need to pullup then pullup to the max, so we
+ * won't need to do it again.
+ */
+ int target = min_t(int, skb->len, MAX_TCP_HEADER);
+ __pskb_pull_tail(skb, target - skb_headlen(skb));
+ }
+}
+
+static int checksum_setup_ip(struct xenvif *vif, struct sk_buff *skb,
+ int recalculate_partial_csum)
+{
+ struct iphdr *iph = (void *)skb->data;
+ unsigned int header_size;
+ unsigned int off;
+ int err = -EPROTO;
+
+ off = sizeof(struct iphdr);
+
+ header_size = skb->network_header + off + MAX_IPOPTLEN;
+ maybe_pull_tail(skb, header_size);
+
+ off = iph->ihl * 4;
+
+ switch (iph->protocol) {
+ case IPPROTO_TCP:
+ if (!skb_partial_csum_set(skb, off,
+ offsetof(struct tcphdr, check)))
+ goto out;
+
+ if (recalculate_partial_csum) {
+ struct tcphdr *tcph = tcp_hdr(skb);
+
+ header_size = skb->network_header +
+ off +
+ sizeof(struct tcphdr);
+ maybe_pull_tail(skb, header_size);
+
+ tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
+ skb->len - off,
+ IPPROTO_TCP, 0);
+ }
+ break;
+ case IPPROTO_UDP:
+ if (!skb_partial_csum_set(skb, off,
+ offsetof(struct udphdr, check)))
+ goto out;
+
+ if (recalculate_partial_csum) {
+ struct udphdr *udph = udp_hdr(skb);
+
+ header_size = skb->network_header +
+ off +
+ sizeof(struct udphdr);
+ maybe_pull_tail(skb, header_size);
+
+ udph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
+ skb->len - off,
+ IPPROTO_UDP, 0);
+ }
+ break;
+ default:
+ if (net_ratelimit())
+ netdev_err(vif->dev,
+ "Attempting to checksum a non-TCP/UDP packet, "
+ "dropping a protocol %d packet\n",
+ iph->protocol);
+ goto out;
+ }
+
+ err = 0;
+
+out:
+ return err;
+}
+
+static int checksum_setup_ipv6(struct xenvif *vif, struct sk_buff *skb,
+ int recalculate_partial_csum)
+{
+ int err = -EPROTO;
+ struct ipv6hdr *ipv6h = (void *)skb->data;
+ u8 nexthdr;
+ unsigned int header_size;
+ unsigned int off;
+ bool fragment;
+ bool done;
+
+ done = false;
+
+ off = sizeof(struct ipv6hdr);
+
+ header_size = skb->network_header + off;
+ maybe_pull_tail(skb, header_size);
+
+ nexthdr = ipv6h->nexthdr;
+
+ while ((off <= sizeof(struct ipv6hdr) + ntohs(ipv6h->payload_len)) &&
+ !done) {
+ switch (nexthdr) {
+ case IPPROTO_DSTOPTS:
+ case IPPROTO_HOPOPTS:
+ case IPPROTO_ROUTING: {
+ struct ipv6_opt_hdr *hp = (void *)(skb->data + off);
+
+ header_size = skb->network_header +
+ off +
+ sizeof(struct ipv6_opt_hdr);
+ maybe_pull_tail(skb, header_size);
+
+ nexthdr = hp->nexthdr;
+ off += ipv6_optlen(hp);
+ break;
+ }
+ case IPPROTO_AH: {
+ struct ip_auth_hdr *hp = (void *)(skb->data + off);
+
+ header_size = skb->network_header +
+ off +
+ sizeof(struct ip_auth_hdr);
+ maybe_pull_tail(skb, header_size);
+
+ nexthdr = hp->nexthdr;
+ off += (hp->hdrlen+2)<<2;
+ break;
+ }
+ case IPPROTO_FRAGMENT:
+ fragment = true;
+ /* fall through */
+ default:
+ done = true;
+ break;
+ }
+ }
+
+ if (!done) {
+ if (net_ratelimit())
+ netdev_err(vif->dev, "Failed to parse packet header\n");
+ goto out;
+ }
+
+ if (fragment) {
+ if (net_ratelimit())
+ netdev_err(vif->dev, "Packet is a fragment!\n");
+ goto out;
+ }
+
+ switch (nexthdr) {
+ case IPPROTO_TCP:
+ if (!skb_partial_csum_set(skb, off,
+ offsetof(struct tcphdr, check)))
+ goto out;
+
+ if (recalculate_partial_csum) {
+ struct tcphdr *tcph = tcp_hdr(skb);
+
+ header_size = skb->network_header +
+ off +
+ sizeof(struct tcphdr);
+ maybe_pull_tail(skb, header_size);
+
+ tcph->check = ~csum_ipv6_magic(&ipv6h->saddr,
+ &ipv6h->daddr,
+ skb->len - off,
+ IPPROTO_TCP, 0);
+ }
+ break;
+ case IPPROTO_UDP:
+ if (!skb_partial_csum_set(skb, off,
+ offsetof(struct udphdr, check)))
+ goto out;
+
+ if (recalculate_partial_csum) {
+ struct udphdr *udph = udp_hdr(skb);
+
+ header_size = skb->network_header +
+ off +
+ sizeof(struct udphdr);
+ maybe_pull_tail(skb, header_size);
+
+ udph->check = ~csum_ipv6_magic(&ipv6h->saddr,
+ &ipv6h->daddr,
+ skb->len - off,
+ IPPROTO_UDP, 0);
+ }
+ break;
+ default:
+ if (net_ratelimit())
+ netdev_err(vif->dev,
+ "Attempting to checksum a non-TCP/UDP packet, "
+ "dropping a protocol %d packet\n",
+ nexthdr);
+ goto out;
+ }
+
+ err = 0;
+
+out:
+ return err;
+}
+
static int checksum_setup(struct xenvif *vif, struct sk_buff *skb)
{
- struct iphdr *iph;
int err = -EPROTO;
int recalculate_partial_csum = 0;
- /*
- * A GSO SKB must be CHECKSUM_PARTIAL. However some buggy
+ /* A GSO SKB must be CHECKSUM_PARTIAL. However some buggy
* peers can fail to set NETRXF_csum_blank when sending a GSO
* frame. In this case force the SKB to CHECKSUM_PARTIAL and
* recalculate the partial checksum.
@@ -1140,46 +1370,11 @@
if (skb->ip_summed != CHECKSUM_PARTIAL)
return 0;
- if (skb->protocol != htons(ETH_P_IP))
- goto out;
+ if (skb->protocol == htons(ETH_P_IP))
+ err = checksum_setup_ip(vif, skb, recalculate_partial_csum);
+ else if (skb->protocol == htons(ETH_P_IPV6))
+ err = checksum_setup_ipv6(vif, skb, recalculate_partial_csum);
- iph = (void *)skb->data;
- switch (iph->protocol) {
- case IPPROTO_TCP:
- if (!skb_partial_csum_set(skb, 4 * iph->ihl,
- offsetof(struct tcphdr, check)))
- goto out;
-
- if (recalculate_partial_csum) {
- struct tcphdr *tcph = tcp_hdr(skb);
- tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
- skb->len - iph->ihl*4,
- IPPROTO_TCP, 0);
- }
- break;
- case IPPROTO_UDP:
- if (!skb_partial_csum_set(skb, 4 * iph->ihl,
- offsetof(struct udphdr, check)))
- goto out;
-
- if (recalculate_partial_csum) {
- struct udphdr *udph = udp_hdr(skb);
- udph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
- skb->len - iph->ihl*4,
- IPPROTO_UDP, 0);
- }
- break;
- default:
- if (net_ratelimit())
- netdev_err(vif->dev,
- "Attempting to checksum a non-TCP/UDP packet, dropping a protocol %d packet\n",
- iph->protocol);
- goto out;
- }
-
- err = 0;
-
-out:
return err;
}
@@ -1428,12 +1623,7 @@
xenvif_fill_frags(vif, skb);
- /*
- * If the initial fragment was < PKT_PROT_LEN then
- * pull through some bytes from the other fragments to
- * increase the linear region to PKT_PROT_LEN bytes.
- */
- if (skb_headlen(skb) < PKT_PROT_LEN && skb_is_nonlinear(skb)) {
+ if (skb_is_nonlinear(skb) && skb_headlen(skb) < PKT_PROT_LEN) {
int target = min_t(int, skb->len, PKT_PROT_LEN);
__pskb_pull_tail(skb, target - skb_headlen(skb));
}
diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c
index 1b08d87..f035899 100644
--- a/drivers/net/xen-netback/xenbus.c
+++ b/drivers/net/xen-netback/xenbus.c
@@ -105,6 +105,22 @@
goto abort_transaction;
}
+ err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv6",
+ "%d", sg);
+ if (err) {
+ message = "writing feature-gso-tcpv6";
+ goto abort_transaction;
+ }
+
+ /* We support partial checksum setup for IPv6 packets */
+ err = xenbus_printf(xbt, dev->nodename,
+ "feature-ipv6-csum-offload",
+ "%d", 1);
+ if (err) {
+ message = "writing feature-ipv6-csum-offload";
+ goto abort_transaction;
+ }
+
/* We support rx-copy path. */
err = xenbus_printf(xbt, dev->nodename,
"feature-rx-copy", "%d", 1);
@@ -561,20 +577,50 @@
val = 0;
vif->can_sg = !!val;
+ vif->gso_mask = 0;
+ vif->gso_prefix_mask = 0;
+
if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4",
"%d", &val) < 0)
val = 0;
- vif->gso = !!val;
+ if (val)
+ vif->gso_mask |= GSO_BIT(TCPV4);
if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4-prefix",
"%d", &val) < 0)
val = 0;
- vif->gso_prefix = !!val;
+ if (val)
+ vif->gso_prefix_mask |= GSO_BIT(TCPV4);
+
+ if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv6",
+ "%d", &val) < 0)
+ val = 0;
+ if (val)
+ vif->gso_mask |= GSO_BIT(TCPV6);
+
+ if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv6-prefix",
+ "%d", &val) < 0)
+ val = 0;
+ if (val)
+ vif->gso_prefix_mask |= GSO_BIT(TCPV6);
+
+ if (vif->gso_mask & vif->gso_prefix_mask) {
+ xenbus_dev_fatal(dev, err,
+ "%s: gso and gso prefix flags are not "
+ "mutually exclusive",
+ dev->otherend);
+ return -EOPNOTSUPP;
+ }
if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-no-csum-offload",
"%d", &val) < 0)
val = 0;
- vif->csum = !val;
+ vif->ip_csum = !val;
+
+ if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-ipv6-csum-offload",
+ "%d", &val) < 0)
+ val = 0;
+ vif->ipv6_csum = !!val;
/* Map the shared frame, irq etc. */
err = xenvif_connect(vif, tx_ring_ref, rx_ring_ref,
diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h
index cd1fdf7..8df61bc 100644
--- a/include/linux/mlx4/cmd.h
+++ b/include/linux/mlx4/cmd.h
@@ -154,10 +154,6 @@
MLX4_CMD_QUERY_IF_STAT = 0X54,
MLX4_CMD_SET_IF_STAT = 0X55,
- /* set port opcode modifiers */
- MLX4_SET_PORT_PRIO2TC = 0x8,
- MLX4_SET_PORT_SCHEDULER = 0x9,
-
/* register/delete flow steering network rules */
MLX4_QP_FLOW_STEERING_ATTACH = 0x65,
MLX4_QP_FLOW_STEERING_DETACH = 0x66,
@@ -182,6 +178,8 @@
MLX4_SET_PORT_VLAN_TABLE = 0x3,
MLX4_SET_PORT_PRIO_MAP = 0x4,
MLX4_SET_PORT_GID_TABLE = 0x5,
+ MLX4_SET_PORT_PRIO2TC = 0x8,
+ MLX4_SET_PORT_SCHEDULER = 0x9,
};
enum {
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 24ce6bd..9ad0c18 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -155,7 +155,7 @@
MLX4_DEV_CAP_FLAG2_RSS_TOP = 1LL << 1,
MLX4_DEV_CAP_FLAG2_RSS_XOR = 1LL << 2,
MLX4_DEV_CAP_FLAG2_FS_EN = 1LL << 3,
- MLX4_DEV_CAP_FLAGS2_REASSIGN_MAC_EN = 1LL << 4,
+ MLX4_DEV_CAP_FLAG2_REASSIGN_MAC_EN = 1LL << 4,
MLX4_DEV_CAP_FLAG2_TS = 1LL << 5,
MLX4_DEV_CAP_FLAG2_VLAN_CONTROL = 1LL << 6,
MLX4_DEV_CAP_FLAG2_FSM = 1LL << 7,
diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 61223c5..2077489 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -42,7 +42,8 @@
struct sk_buff;
-typedef unsigned int nf_hookfn(unsigned int hooknum,
+struct nf_hook_ops;
+typedef unsigned int nf_hookfn(const struct nf_hook_ops *ops,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
@@ -52,12 +53,13 @@
struct list_head list;
/* User fills in from here down. */
- nf_hookfn *hook;
- struct module *owner;
- u_int8_t pf;
- unsigned int hooknum;
+ nf_hookfn *hook;
+ struct module *owner;
+ void *priv;
+ u_int8_t pf;
+ unsigned int hooknum;
/* Hooks are ordered in ascending priority. */
- int priority;
+ int priority;
};
struct nf_sockopt_ops {
diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h
index 4f68cd7..28c7436 100644
--- a/include/linux/netfilter/nfnetlink.h
+++ b/include/linux/netfilter/nfnetlink.h
@@ -14,6 +14,9 @@
int (*call_rcu)(struct sock *nl, struct sk_buff *skb,
const struct nlmsghdr *nlh,
const struct nlattr * const cda[]);
+ int (*call_batch)(struct sock *nl, struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const cda[]);
const struct nla_policy *policy; /* netlink attribute policy */
const u_int16_t attr_count; /* number of nlattr's */
};
@@ -23,6 +26,8 @@
__u8 subsys_id; /* nfnetlink subsystem ID */
__u8 cb_count; /* number of callbacks */
const struct nfnl_callback *cb; /* callback for individual types */
+ int (*commit)(struct sk_buff *skb);
+ int (*abort)(struct sk_buff *skb);
};
int nfnetlink_subsys_register(const struct nfnetlink_subsystem *n);
diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h
index b647c62..71c6e26 100644
--- a/include/net/inet_timewait_sock.h
+++ b/include/net/inet_timewait_sock.h
@@ -135,7 +135,7 @@
tw_transparent : 1,
tw_pad : 6, /* 6 bits hole */
tw_tos : 8,
- tw_pad2 : 16 /* 16 bits hole */
+ tw_pad2 : 16; /* 16 bits hole */
kmemcheck_bitfield_end(flags);
u32 tw_ttd;
struct inet_bind_bucket *tw_tb;
diff --git a/include/net/irda/irda_device.h b/include/net/irda/irda_device.h
index 94c852d..1141747 100644
--- a/include/net/irda/irda_device.h
+++ b/include/net/irda/irda_device.h
@@ -162,7 +162,7 @@
int irq, irq2; /* Interrupts used */
int dma, dma2; /* DMA channel(s) used */
int fifo_size; /* FIFO size */
- int irqflags; /* interrupt flags (ie, IRQF_SHARED|IRQF_DISABLED) */
+ int irqflags; /* interrupt flags (ie, IRQF_SHARED) */
int direction; /* Link direction, used by some FIR drivers */
int enabled; /* Powered on? */
int suspended; /* Suspended by APM */
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index bcc4a8e..da68c9a 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -22,6 +22,7 @@
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
#include <net/netns/conntrack.h>
#endif
+#include <net/netns/nftables.h>
#include <net/netns/xfrm.h>
struct user_namespace;
@@ -101,6 +102,9 @@
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
struct netns_ct ct;
#endif
+#if defined(CONFIG_NF_TABLES) || defined(CONFIG_NF_TABLES_MODULE)
+ struct netns_nftables nft;
+#endif
#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
struct netns_nf_frag nf_frag;
#endif
diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h
index c29b4e5..07eaaf6 100644
--- a/include/net/netfilter/nf_nat.h
+++ b/include/net/netfilter/nf_nat.h
@@ -45,6 +45,9 @@
const struct nf_nat_range *range,
enum nf_nat_manip_type maniptype);
+extern unsigned int nf_nat_alloc_null_binding(struct nf_conn *ct,
+ unsigned int hooknum);
+
/* Is this tuple already taken? (not by us)*/
int nf_nat_used_tuple(const struct nf_conntrack_tuple *tuple,
const struct nf_conn *ignored_conntrack);
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
new file mode 100644
index 0000000..54c4a5c
--- /dev/null
+++ b/include/net/netfilter/nf_tables.h
@@ -0,0 +1,522 @@
+#ifndef _NET_NF_TABLES_H
+#define _NET_NF_TABLES_H
+
+#include <linux/list.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netlink.h>
+
+#define NFT_JUMP_STACK_SIZE 16
+
+struct nft_pktinfo {
+ struct sk_buff *skb;
+ const struct net_device *in;
+ const struct net_device *out;
+ u8 hooknum;
+ u8 nhoff;
+ u8 thoff;
+ /* for x_tables compatibility */
+ struct xt_action_param xt;
+};
+
+static inline void nft_set_pktinfo(struct nft_pktinfo *pkt,
+ const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out)
+{
+ pkt->skb = skb;
+ pkt->in = pkt->xt.in = in;
+ pkt->out = pkt->xt.out = out;
+ pkt->hooknum = pkt->xt.hooknum = ops->hooknum;
+ pkt->xt.family = ops->pf;
+}
+
+struct nft_data {
+ union {
+ u32 data[4];
+ struct {
+ u32 verdict;
+ struct nft_chain *chain;
+ };
+ };
+} __attribute__((aligned(__alignof__(u64))));
+
+static inline int nft_data_cmp(const struct nft_data *d1,
+ const struct nft_data *d2,
+ unsigned int len)
+{
+ return memcmp(d1->data, d2->data, len);
+}
+
+static inline void nft_data_copy(struct nft_data *dst,
+ const struct nft_data *src)
+{
+ BUILD_BUG_ON(__alignof__(*dst) != __alignof__(u64));
+ *(u64 *)&dst->data[0] = *(u64 *)&src->data[0];
+ *(u64 *)&dst->data[2] = *(u64 *)&src->data[2];
+}
+
+static inline void nft_data_debug(const struct nft_data *data)
+{
+ pr_debug("data[0]=%x data[1]=%x data[2]=%x data[3]=%x\n",
+ data->data[0], data->data[1],
+ data->data[2], data->data[3]);
+}
+
+/**
+ * struct nft_ctx - nf_tables rule/set context
+ *
+ * @net: net namespace
+ * @skb: netlink skb
+ * @nlh: netlink message header
+ * @afi: address family info
+ * @table: the table the chain is contained in
+ * @chain: the chain the rule is contained in
+ * @nla: netlink attributes
+ */
+struct nft_ctx {
+ struct net *net;
+ const struct sk_buff *skb;
+ const struct nlmsghdr *nlh;
+ const struct nft_af_info *afi;
+ const struct nft_table *table;
+ const struct nft_chain *chain;
+ const struct nlattr * const *nla;
+};
+
+struct nft_data_desc {
+ enum nft_data_types type;
+ unsigned int len;
+};
+
+extern int nft_data_init(const struct nft_ctx *ctx, struct nft_data *data,
+ struct nft_data_desc *desc, const struct nlattr *nla);
+extern void nft_data_uninit(const struct nft_data *data,
+ enum nft_data_types type);
+extern int nft_data_dump(struct sk_buff *skb, int attr,
+ const struct nft_data *data,
+ enum nft_data_types type, unsigned int len);
+
+static inline enum nft_data_types nft_dreg_to_type(enum nft_registers reg)
+{
+ return reg == NFT_REG_VERDICT ? NFT_DATA_VERDICT : NFT_DATA_VALUE;
+}
+
+static inline enum nft_registers nft_type_to_reg(enum nft_data_types type)
+{
+ return type == NFT_DATA_VERDICT ? NFT_REG_VERDICT : NFT_REG_1;
+}
+
+extern int nft_validate_input_register(enum nft_registers reg);
+extern int nft_validate_output_register(enum nft_registers reg);
+extern int nft_validate_data_load(const struct nft_ctx *ctx,
+ enum nft_registers reg,
+ const struct nft_data *data,
+ enum nft_data_types type);
+
+/**
+ * struct nft_set_elem - generic representation of set elements
+ *
+ * @cookie: implementation specific element cookie
+ * @key: element key
+ * @data: element data (maps only)
+ * @flags: element flags (end of interval)
+ *
+ * The cookie can be used to store a handle to the element for subsequent
+ * removal.
+ */
+struct nft_set_elem {
+ void *cookie;
+ struct nft_data key;
+ struct nft_data data;
+ u32 flags;
+};
+
+struct nft_set;
+struct nft_set_iter {
+ unsigned int count;
+ unsigned int skip;
+ int err;
+ int (*fn)(const struct nft_ctx *ctx,
+ const struct nft_set *set,
+ const struct nft_set_iter *iter,
+ const struct nft_set_elem *elem);
+};
+
+/**
+ * struct nft_set_ops - nf_tables set operations
+ *
+ * @lookup: look up an element within the set
+ * @insert: insert new element into set
+ * @remove: remove element from set
+ * @walk: iterate over all set elemeennts
+ * @privsize: function to return size of set private data
+ * @init: initialize private data of new set instance
+ * @destroy: destroy private data of set instance
+ * @list: nf_tables_set_ops list node
+ * @owner: module reference
+ * @features: features supported by the implementation
+ */
+struct nft_set_ops {
+ bool (*lookup)(const struct nft_set *set,
+ const struct nft_data *key,
+ struct nft_data *data);
+ int (*get)(const struct nft_set *set,
+ struct nft_set_elem *elem);
+ int (*insert)(const struct nft_set *set,
+ const struct nft_set_elem *elem);
+ void (*remove)(const struct nft_set *set,
+ const struct nft_set_elem *elem);
+ void (*walk)(const struct nft_ctx *ctx,
+ const struct nft_set *set,
+ struct nft_set_iter *iter);
+
+ unsigned int (*privsize)(const struct nlattr * const nla[]);
+ int (*init)(const struct nft_set *set,
+ const struct nlattr * const nla[]);
+ void (*destroy)(const struct nft_set *set);
+
+ struct list_head list;
+ struct module *owner;
+ u32 features;
+};
+
+extern int nft_register_set(struct nft_set_ops *ops);
+extern void nft_unregister_set(struct nft_set_ops *ops);
+
+/**
+ * struct nft_set - nf_tables set instance
+ *
+ * @list: table set list node
+ * @bindings: list of set bindings
+ * @name: name of the set
+ * @ktype: key type (numeric type defined by userspace, not used in the kernel)
+ * @dtype: data type (verdict or numeric type defined by userspace)
+ * @ops: set ops
+ * @flags: set flags
+ * @klen: key length
+ * @dlen: data length
+ * @data: private set data
+ */
+struct nft_set {
+ struct list_head list;
+ struct list_head bindings;
+ char name[IFNAMSIZ];
+ u32 ktype;
+ u32 dtype;
+ /* runtime data below here */
+ const struct nft_set_ops *ops ____cacheline_aligned;
+ u16 flags;
+ u8 klen;
+ u8 dlen;
+ unsigned char data[]
+ __attribute__((aligned(__alignof__(u64))));
+};
+
+static inline void *nft_set_priv(const struct nft_set *set)
+{
+ return (void *)set->data;
+}
+
+extern struct nft_set *nf_tables_set_lookup(const struct nft_table *table,
+ const struct nlattr *nla);
+
+/**
+ * struct nft_set_binding - nf_tables set binding
+ *
+ * @list: set bindings list node
+ * @chain: chain containing the rule bound to the set
+ *
+ * A set binding contains all information necessary for validation
+ * of new elements added to a bound set.
+ */
+struct nft_set_binding {
+ struct list_head list;
+ const struct nft_chain *chain;
+};
+
+extern int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set,
+ struct nft_set_binding *binding);
+extern void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
+ struct nft_set_binding *binding);
+
+
+/**
+ * struct nft_expr_type - nf_tables expression type
+ *
+ * @select_ops: function to select nft_expr_ops
+ * @ops: default ops, used when no select_ops functions is present
+ * @list: used internally
+ * @name: Identifier
+ * @owner: module reference
+ * @policy: netlink attribute policy
+ * @maxattr: highest netlink attribute number
+ */
+struct nft_expr_type {
+ const struct nft_expr_ops *(*select_ops)(const struct nft_ctx *,
+ const struct nlattr * const tb[]);
+ const struct nft_expr_ops *ops;
+ struct list_head list;
+ const char *name;
+ struct module *owner;
+ const struct nla_policy *policy;
+ unsigned int maxattr;
+};
+
+/**
+ * struct nft_expr_ops - nf_tables expression operations
+ *
+ * @eval: Expression evaluation function
+ * @size: full expression size, including private data size
+ * @init: initialization function
+ * @destroy: destruction function
+ * @dump: function to dump parameters
+ * @type: expression type
+ * @validate: validate expression, called during loop detection
+ * @data: extra data to attach to this expression operation
+ */
+struct nft_expr;
+struct nft_expr_ops {
+ void (*eval)(const struct nft_expr *expr,
+ struct nft_data data[NFT_REG_MAX + 1],
+ const struct nft_pktinfo *pkt);
+ unsigned int size;
+
+ int (*init)(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[]);
+ void (*destroy)(const struct nft_expr *expr);
+ int (*dump)(struct sk_buff *skb,
+ const struct nft_expr *expr);
+ int (*validate)(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nft_data **data);
+ const struct nft_expr_type *type;
+ void *data;
+};
+
+#define NFT_EXPR_MAXATTR 16
+#define NFT_EXPR_SIZE(size) (sizeof(struct nft_expr) + \
+ ALIGN(size, __alignof__(struct nft_expr)))
+
+/**
+ * struct nft_expr - nf_tables expression
+ *
+ * @ops: expression ops
+ * @data: expression private data
+ */
+struct nft_expr {
+ const struct nft_expr_ops *ops;
+ unsigned char data[];
+};
+
+static inline void *nft_expr_priv(const struct nft_expr *expr)
+{
+ return (void *)expr->data;
+}
+
+/**
+ * struct nft_rule - nf_tables rule
+ *
+ * @list: used internally
+ * @rcu_head: used internally for rcu
+ * @handle: rule handle
+ * @genmask: generation mask
+ * @dlen: length of expression data
+ * @data: expression data
+ */
+struct nft_rule {
+ struct list_head list;
+ struct rcu_head rcu_head;
+ u64 handle:46,
+ genmask:2,
+ dlen:16;
+ unsigned char data[]
+ __attribute__((aligned(__alignof__(struct nft_expr))));
+};
+
+/**
+ * struct nft_rule_trans - nf_tables rule update in transaction
+ *
+ * @list: used internally
+ * @rule: rule that needs to be updated
+ * @chain: chain that this rule belongs to
+ * @table: table for which this chain applies
+ * @nlh: netlink header of the message that contain this update
+ * @family: family expressesed as AF_*
+ */
+struct nft_rule_trans {
+ struct list_head list;
+ struct nft_rule *rule;
+ const struct nft_chain *chain;
+ const struct nft_table *table;
+ const struct nlmsghdr *nlh;
+ u8 family;
+};
+
+static inline struct nft_expr *nft_expr_first(const struct nft_rule *rule)
+{
+ return (struct nft_expr *)&rule->data[0];
+}
+
+static inline struct nft_expr *nft_expr_next(const struct nft_expr *expr)
+{
+ return ((void *)expr) + expr->ops->size;
+}
+
+static inline struct nft_expr *nft_expr_last(const struct nft_rule *rule)
+{
+ return (struct nft_expr *)&rule->data[rule->dlen];
+}
+
+/*
+ * The last pointer isn't really necessary, but the compiler isn't able to
+ * determine that the result of nft_expr_last() is always the same since it
+ * can't assume that the dlen value wasn't changed within calls in the loop.
+ */
+#define nft_rule_for_each_expr(expr, last, rule) \
+ for ((expr) = nft_expr_first(rule), (last) = nft_expr_last(rule); \
+ (expr) != (last); \
+ (expr) = nft_expr_next(expr))
+
+enum nft_chain_flags {
+ NFT_BASE_CHAIN = 0x1,
+};
+
+/**
+ * struct nft_chain - nf_tables chain
+ *
+ * @rules: list of rules in the chain
+ * @list: used internally
+ * @rcu_head: used internally
+ * @net: net namespace that this chain belongs to
+ * @table: table that this chain belongs to
+ * @handle: chain handle
+ * @flags: bitmask of enum nft_chain_flags
+ * @use: number of jump references to this chain
+ * @level: length of longest path to this chain
+ * @name: name of the chain
+ */
+struct nft_chain {
+ struct list_head rules;
+ struct list_head list;
+ struct rcu_head rcu_head;
+ struct net *net;
+ struct nft_table *table;
+ u64 handle;
+ u8 flags;
+ u16 use;
+ u16 level;
+ char name[NFT_CHAIN_MAXNAMELEN];
+};
+
+enum nft_chain_type {
+ NFT_CHAIN_T_DEFAULT = 0,
+ NFT_CHAIN_T_ROUTE,
+ NFT_CHAIN_T_NAT,
+ NFT_CHAIN_T_MAX
+};
+
+struct nft_stats {
+ u64 bytes;
+ u64 pkts;
+};
+
+/**
+ * struct nft_base_chain - nf_tables base chain
+ *
+ * @ops: netfilter hook ops
+ * @type: chain type
+ * @policy: default policy
+ * @stats: per-cpu chain stats
+ * @chain: the chain
+ */
+struct nft_base_chain {
+ struct nf_hook_ops ops;
+ enum nft_chain_type type;
+ u8 policy;
+ struct nft_stats __percpu *stats;
+ struct nft_chain chain;
+};
+
+static inline struct nft_base_chain *nft_base_chain(const struct nft_chain *chain)
+{
+ return container_of(chain, struct nft_base_chain, chain);
+}
+
+extern unsigned int nft_do_chain_pktinfo(struct nft_pktinfo *pkt,
+ const struct nf_hook_ops *ops);
+
+/**
+ * struct nft_table - nf_tables table
+ *
+ * @list: used internally
+ * @chains: chains in the table
+ * @sets: sets in the table
+ * @hgenerator: handle generator state
+ * @use: number of chain references to this table
+ * @flags: table flag (see enum nft_table_flags)
+ * @name: name of the table
+ */
+struct nft_table {
+ struct list_head list;
+ struct list_head chains;
+ struct list_head sets;
+ u64 hgenerator;
+ u32 use;
+ u16 flags;
+ char name[];
+};
+
+/**
+ * struct nft_af_info - nf_tables address family info
+ *
+ * @list: used internally
+ * @family: address family
+ * @nhooks: number of hooks in this family
+ * @owner: module owner
+ * @tables: used internally
+ * @hooks: hookfn overrides for packet validation
+ */
+struct nft_af_info {
+ struct list_head list;
+ int family;
+ unsigned int nhooks;
+ struct module *owner;
+ struct list_head tables;
+ nf_hookfn *hooks[NF_MAX_HOOKS];
+};
+
+extern int nft_register_afinfo(struct net *, struct nft_af_info *);
+extern void nft_unregister_afinfo(struct nft_af_info *);
+
+struct nf_chain_type {
+ unsigned int hook_mask;
+ const char *name;
+ enum nft_chain_type type;
+ nf_hookfn *fn[NF_MAX_HOOKS];
+ struct module *me;
+ int family;
+};
+
+extern int nft_register_chain_type(struct nf_chain_type *);
+extern void nft_unregister_chain_type(struct nf_chain_type *);
+
+extern int nft_register_expr(struct nft_expr_type *);
+extern void nft_unregister_expr(struct nft_expr_type *);
+
+#define MODULE_ALIAS_NFT_FAMILY(family) \
+ MODULE_ALIAS("nft-afinfo-" __stringify(family))
+
+#define MODULE_ALIAS_NFT_CHAIN(family, name) \
+ MODULE_ALIAS("nft-chain-" __stringify(family) "-" name)
+
+#define MODULE_ALIAS_NFT_EXPR(name) \
+ MODULE_ALIAS("nft-expr-" name)
+
+#define MODULE_ALIAS_NFT_SET() \
+ MODULE_ALIAS("nft-set")
+
+#endif /* _NET_NF_TABLES_H */
diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h
new file mode 100644
index 0000000..fe7b162
--- /dev/null
+++ b/include/net/netfilter/nf_tables_core.h
@@ -0,0 +1,42 @@
+#ifndef _NET_NF_TABLES_CORE_H
+#define _NET_NF_TABLES_CORE_H
+
+extern int nf_tables_core_module_init(void);
+extern void nf_tables_core_module_exit(void);
+
+extern int nft_immediate_module_init(void);
+extern void nft_immediate_module_exit(void);
+
+struct nft_cmp_fast_expr {
+ u32 data;
+ enum nft_registers sreg:8;
+ u8 len;
+};
+
+extern const struct nft_expr_ops nft_cmp_fast_ops;
+
+extern int nft_cmp_module_init(void);
+extern void nft_cmp_module_exit(void);
+
+extern int nft_lookup_module_init(void);
+extern void nft_lookup_module_exit(void);
+
+extern int nft_bitwise_module_init(void);
+extern void nft_bitwise_module_exit(void);
+
+extern int nft_byteorder_module_init(void);
+extern void nft_byteorder_module_exit(void);
+
+struct nft_payload {
+ enum nft_payload_bases base:8;
+ u8 offset;
+ u8 len;
+ enum nft_registers dreg:8;
+};
+
+extern const struct nft_expr_ops nft_payload_fast_ops;
+
+extern int nft_payload_module_init(void);
+extern void nft_payload_module_exit(void);
+
+#endif /* _NET_NF_TABLES_CORE_H */
diff --git a/include/net/netfilter/nf_tables_ipv4.h b/include/net/netfilter/nf_tables_ipv4.h
new file mode 100644
index 0000000..1be1c2c
--- /dev/null
+++ b/include/net/netfilter/nf_tables_ipv4.h
@@ -0,0 +1,23 @@
+#ifndef _NF_TABLES_IPV4_H_
+#define _NF_TABLES_IPV4_H_
+
+#include <net/netfilter/nf_tables.h>
+#include <net/ip.h>
+
+static inline void
+nft_set_pktinfo_ipv4(struct nft_pktinfo *pkt,
+ const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out)
+{
+ struct iphdr *ip;
+
+ nft_set_pktinfo(pkt, ops, skb, in, out);
+
+ pkt->xt.thoff = ip_hdrlen(pkt->skb);
+ ip = ip_hdr(pkt->skb);
+ pkt->xt.fragoff = ntohs(ip->frag_off) & IP_OFFSET;
+}
+
+#endif
diff --git a/include/net/netfilter/nf_tables_ipv6.h b/include/net/netfilter/nf_tables_ipv6.h
new file mode 100644
index 0000000..4a9b88a
--- /dev/null
+++ b/include/net/netfilter/nf_tables_ipv6.h
@@ -0,0 +1,30 @@
+#ifndef _NF_TABLES_IPV6_H_
+#define _NF_TABLES_IPV6_H_
+
+#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <net/ipv6.h>
+
+static inline int
+nft_set_pktinfo_ipv6(struct nft_pktinfo *pkt,
+ const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out)
+{
+ int protohdr, thoff = 0;
+ unsigned short frag_off;
+
+ nft_set_pktinfo(pkt, ops, skb, in, out);
+
+ protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, NULL);
+ /* If malformed, drop it */
+ if (protohdr < 0)
+ return -1;
+
+ pkt->xt.thoff = thoff;
+ pkt->xt.fragoff = frag_off;
+
+ return 0;
+}
+
+#endif
diff --git a/include/net/netns/nftables.h b/include/net/netns/nftables.h
new file mode 100644
index 0000000..15d056d
--- /dev/null
+++ b/include/net/netns/nftables.h
@@ -0,0 +1,19 @@
+#ifndef _NETNS_NFTABLES_H_
+#define _NETNS_NFTABLES_H_
+
+#include <linux/list.h>
+
+struct nft_af_info;
+
+struct netns_nftables {
+ struct list_head af_info;
+ struct list_head commit_list;
+ struct nft_af_info *ipv4;
+ struct nft_af_info *ipv6;
+ struct nft_af_info *arp;
+ struct nft_af_info *bridge;
+ u8 gencursor;
+ u8 genctr;
+};
+
+#endif
diff --git a/include/net/route.h b/include/net/route.h
index 0ad8e01..dd4ae00 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -88,22 +88,14 @@
};
struct rt_cache_stat {
- unsigned int in_hit;
unsigned int in_slow_tot;
unsigned int in_slow_mc;
unsigned int in_no_route;
unsigned int in_brd;
unsigned int in_martian_dst;
unsigned int in_martian_src;
- unsigned int out_hit;
unsigned int out_slow_tot;
unsigned int out_slow_mc;
- unsigned int gc_total;
- unsigned int gc_ignored;
- unsigned int gc_goal_miss;
- unsigned int gc_dst_overflow;
- unsigned int in_hlist_search;
- unsigned int out_hlist_search;
};
extern struct ip_rt_acct __percpu *ip_rt_acct;
diff --git a/include/uapi/linux/netfilter/Kbuild b/include/uapi/linux/netfilter/Kbuild
index 1749154..17c3af2 100644
--- a/include/uapi/linux/netfilter/Kbuild
+++ b/include/uapi/linux/netfilter/Kbuild
@@ -5,6 +5,8 @@
header-y += nf_conntrack_sctp.h
header-y += nf_conntrack_tcp.h
header-y += nf_conntrack_tuple_common.h
+header-y += nf_tables.h
+header-y += nf_tables_compat.h
header-y += nf_nat.h
header-y += nfnetlink.h
header-y += nfnetlink_acct.h
diff --git a/include/uapi/linux/netfilter/nf_conntrack_common.h b/include/uapi/linux/netfilter/nf_conntrack_common.h
index 8dd8038..319f471 100644
--- a/include/uapi/linux/netfilter/nf_conntrack_common.h
+++ b/include/uapi/linux/netfilter/nf_conntrack_common.h
@@ -25,6 +25,10 @@
IP_CT_NUMBER = IP_CT_IS_REPLY * 2 - 1
};
+#define NF_CT_STATE_INVALID_BIT (1 << 0)
+#define NF_CT_STATE_BIT(ctinfo) (1 << ((ctinfo) % IP_CT_IS_REPLY + 1))
+#define NF_CT_STATE_UNTRACKED_BIT (1 << (IP_CT_NUMBER + 1))
+
/* Bitset representing status of connection. */
enum ip_conntrack_status {
/* It's an expected connection: bit 0 set. This bit never changed */
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
new file mode 100644
index 0000000..fbfd229
--- /dev/null
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -0,0 +1,718 @@
+#ifndef _LINUX_NF_TABLES_H
+#define _LINUX_NF_TABLES_H
+
+#define NFT_CHAIN_MAXNAMELEN 32
+
+enum nft_registers {
+ NFT_REG_VERDICT,
+ NFT_REG_1,
+ NFT_REG_2,
+ NFT_REG_3,
+ NFT_REG_4,
+ __NFT_REG_MAX
+};
+#define NFT_REG_MAX (__NFT_REG_MAX - 1)
+
+/**
+ * enum nft_verdicts - nf_tables internal verdicts
+ *
+ * @NFT_CONTINUE: continue evaluation of the current rule
+ * @NFT_BREAK: terminate evaluation of the current rule
+ * @NFT_JUMP: push the current chain on the jump stack and jump to a chain
+ * @NFT_GOTO: jump to a chain without pushing the current chain on the jump stack
+ * @NFT_RETURN: return to the topmost chain on the jump stack
+ *
+ * The nf_tables verdicts share their numeric space with the netfilter verdicts.
+ */
+enum nft_verdicts {
+ NFT_CONTINUE = -1,
+ NFT_BREAK = -2,
+ NFT_JUMP = -3,
+ NFT_GOTO = -4,
+ NFT_RETURN = -5,
+};
+
+/**
+ * enum nf_tables_msg_types - nf_tables netlink message types
+ *
+ * @NFT_MSG_NEWTABLE: create a new table (enum nft_table_attributes)
+ * @NFT_MSG_GETTABLE: get a table (enum nft_table_attributes)
+ * @NFT_MSG_DELTABLE: delete a table (enum nft_table_attributes)
+ * @NFT_MSG_NEWCHAIN: create a new chain (enum nft_chain_attributes)
+ * @NFT_MSG_GETCHAIN: get a chain (enum nft_chain_attributes)
+ * @NFT_MSG_DELCHAIN: delete a chain (enum nft_chain_attributes)
+ * @NFT_MSG_NEWRULE: create a new rule (enum nft_rule_attributes)
+ * @NFT_MSG_GETRULE: get a rule (enum nft_rule_attributes)
+ * @NFT_MSG_DELRULE: delete a rule (enum nft_rule_attributes)
+ * @NFT_MSG_NEWSET: create a new set (enum nft_set_attributes)
+ * @NFT_MSG_GETSET: get a set (enum nft_set_attributes)
+ * @NFT_MSG_DELSET: delete a set (enum nft_set_attributes)
+ * @NFT_MSG_NEWSETELEM: create a new set element (enum nft_set_elem_attributes)
+ * @NFT_MSG_GETSETELEM: get a set element (enum nft_set_elem_attributes)
+ * @NFT_MSG_DELSETELEM: delete a set element (enum nft_set_elem_attributes)
+ */
+enum nf_tables_msg_types {
+ NFT_MSG_NEWTABLE,
+ NFT_MSG_GETTABLE,
+ NFT_MSG_DELTABLE,
+ NFT_MSG_NEWCHAIN,
+ NFT_MSG_GETCHAIN,
+ NFT_MSG_DELCHAIN,
+ NFT_MSG_NEWRULE,
+ NFT_MSG_GETRULE,
+ NFT_MSG_DELRULE,
+ NFT_MSG_NEWSET,
+ NFT_MSG_GETSET,
+ NFT_MSG_DELSET,
+ NFT_MSG_NEWSETELEM,
+ NFT_MSG_GETSETELEM,
+ NFT_MSG_DELSETELEM,
+ NFT_MSG_MAX,
+};
+
+/**
+ * enum nft_list_attributes - nf_tables generic list netlink attributes
+ *
+ * @NFTA_LIST_ELEM: list element (NLA_NESTED)
+ */
+enum nft_list_attributes {
+ NFTA_LIST_UNPEC,
+ NFTA_LIST_ELEM,
+ __NFTA_LIST_MAX
+};
+#define NFTA_LIST_MAX (__NFTA_LIST_MAX - 1)
+
+/**
+ * enum nft_hook_attributes - nf_tables netfilter hook netlink attributes
+ *
+ * @NFTA_HOOK_HOOKNUM: netfilter hook number (NLA_U32)
+ * @NFTA_HOOK_PRIORITY: netfilter hook priority (NLA_U32)
+ */
+enum nft_hook_attributes {
+ NFTA_HOOK_UNSPEC,
+ NFTA_HOOK_HOOKNUM,
+ NFTA_HOOK_PRIORITY,
+ __NFTA_HOOK_MAX
+};
+#define NFTA_HOOK_MAX (__NFTA_HOOK_MAX - 1)
+
+/**
+ * enum nft_table_flags - nf_tables table flags
+ *
+ * @NFT_TABLE_F_DORMANT: this table is not active
+ */
+enum nft_table_flags {
+ NFT_TABLE_F_DORMANT = 0x1,
+};
+
+/**
+ * enum nft_table_attributes - nf_tables table netlink attributes
+ *
+ * @NFTA_TABLE_NAME: name of the table (NLA_STRING)
+ * @NFTA_TABLE_FLAGS: bitmask of enum nft_table_flags (NLA_U32)
+ */
+enum nft_table_attributes {
+ NFTA_TABLE_UNSPEC,
+ NFTA_TABLE_NAME,
+ NFTA_TABLE_FLAGS,
+ __NFTA_TABLE_MAX
+};
+#define NFTA_TABLE_MAX (__NFTA_TABLE_MAX - 1)
+
+/**
+ * enum nft_chain_attributes - nf_tables chain netlink attributes
+ *
+ * @NFTA_CHAIN_TABLE: name of the table containing the chain (NLA_STRING)
+ * @NFTA_CHAIN_HANDLE: numeric handle of the chain (NLA_U64)
+ * @NFTA_CHAIN_NAME: name of the chain (NLA_STRING)
+ * @NFTA_CHAIN_HOOK: hook specification for basechains (NLA_NESTED: nft_hook_attributes)
+ * @NFTA_CHAIN_POLICY: numeric policy of the chain (NLA_U32)
+ * @NFTA_CHAIN_USE: number of references to this chain (NLA_U32)
+ * @NFTA_CHAIN_TYPE: type name of the string (NLA_NUL_STRING)
+ * @NFTA_CHAIN_COUNTERS: counter specification of the chain (NLA_NESTED: nft_counter_attributes)
+ */
+enum nft_chain_attributes {
+ NFTA_CHAIN_UNSPEC,
+ NFTA_CHAIN_TABLE,
+ NFTA_CHAIN_HANDLE,
+ NFTA_CHAIN_NAME,
+ NFTA_CHAIN_HOOK,
+ NFTA_CHAIN_POLICY,
+ NFTA_CHAIN_USE,
+ NFTA_CHAIN_TYPE,
+ NFTA_CHAIN_COUNTERS,
+ __NFTA_CHAIN_MAX
+};
+#define NFTA_CHAIN_MAX (__NFTA_CHAIN_MAX - 1)
+
+/**
+ * enum nft_rule_attributes - nf_tables rule netlink attributes
+ *
+ * @NFTA_RULE_TABLE: name of the table containing the rule (NLA_STRING)
+ * @NFTA_RULE_CHAIN: name of the chain containing the rule (NLA_STRING)
+ * @NFTA_RULE_HANDLE: numeric handle of the rule (NLA_U64)
+ * @NFTA_RULE_EXPRESSIONS: list of expressions (NLA_NESTED: nft_expr_attributes)
+ * @NFTA_RULE_COMPAT: compatibility specifications of the rule (NLA_NESTED: nft_rule_compat_attributes)
+ * @NFTA_RULE_POSITION: numeric handle of the previous rule (NLA_U64)
+ */
+enum nft_rule_attributes {
+ NFTA_RULE_UNSPEC,
+ NFTA_RULE_TABLE,
+ NFTA_RULE_CHAIN,
+ NFTA_RULE_HANDLE,
+ NFTA_RULE_EXPRESSIONS,
+ NFTA_RULE_COMPAT,
+ NFTA_RULE_POSITION,
+ __NFTA_RULE_MAX
+};
+#define NFTA_RULE_MAX (__NFTA_RULE_MAX - 1)
+
+/**
+ * enum nft_rule_compat_flags - nf_tables rule compat flags
+ *
+ * @NFT_RULE_COMPAT_F_INV: invert the check result
+ */
+enum nft_rule_compat_flags {
+ NFT_RULE_COMPAT_F_INV = (1 << 1),
+ NFT_RULE_COMPAT_F_MASK = NFT_RULE_COMPAT_F_INV,
+};
+
+/**
+ * enum nft_rule_compat_attributes - nf_tables rule compat attributes
+ *
+ * @NFTA_RULE_COMPAT_PROTO: numerice value of handled protocol (NLA_U32)
+ * @NFTA_RULE_COMPAT_FLAGS: bitmask of enum nft_rule_compat_flags (NLA_U32)
+ */
+enum nft_rule_compat_attributes {
+ NFTA_RULE_COMPAT_UNSPEC,
+ NFTA_RULE_COMPAT_PROTO,
+ NFTA_RULE_COMPAT_FLAGS,
+ __NFTA_RULE_COMPAT_MAX
+};
+#define NFTA_RULE_COMPAT_MAX (__NFTA_RULE_COMPAT_MAX - 1)
+
+/**
+ * enum nft_set_flags - nf_tables set flags
+ *
+ * @NFT_SET_ANONYMOUS: name allocation, automatic cleanup on unlink
+ * @NFT_SET_CONSTANT: set contents may not change while bound
+ * @NFT_SET_INTERVAL: set contains intervals
+ * @NFT_SET_MAP: set is used as a dictionary
+ */
+enum nft_set_flags {
+ NFT_SET_ANONYMOUS = 0x1,
+ NFT_SET_CONSTANT = 0x2,
+ NFT_SET_INTERVAL = 0x4,
+ NFT_SET_MAP = 0x8,
+};
+
+/**
+ * enum nft_set_attributes - nf_tables set netlink attributes
+ *
+ * @NFTA_SET_TABLE: table name (NLA_STRING)
+ * @NFTA_SET_NAME: set name (NLA_STRING)
+ * @NFTA_SET_FLAGS: bitmask of enum nft_set_flags (NLA_U32)
+ * @NFTA_SET_KEY_TYPE: key data type, informational purpose only (NLA_U32)
+ * @NFTA_SET_KEY_LEN: key data length (NLA_U32)
+ * @NFTA_SET_DATA_TYPE: mapping data type (NLA_U32)
+ * @NFTA_SET_DATA_LEN: mapping data length (NLA_U32)
+ */
+enum nft_set_attributes {
+ NFTA_SET_UNSPEC,
+ NFTA_SET_TABLE,
+ NFTA_SET_NAME,
+ NFTA_SET_FLAGS,
+ NFTA_SET_KEY_TYPE,
+ NFTA_SET_KEY_LEN,
+ NFTA_SET_DATA_TYPE,
+ NFTA_SET_DATA_LEN,
+ __NFTA_SET_MAX
+};
+#define NFTA_SET_MAX (__NFTA_SET_MAX - 1)
+
+/**
+ * enum nft_set_elem_flags - nf_tables set element flags
+ *
+ * @NFT_SET_ELEM_INTERVAL_END: element ends the previous interval
+ */
+enum nft_set_elem_flags {
+ NFT_SET_ELEM_INTERVAL_END = 0x1,
+};
+
+/**
+ * enum nft_set_elem_attributes - nf_tables set element netlink attributes
+ *
+ * @NFTA_SET_ELEM_KEY: key value (NLA_NESTED: nft_data)
+ * @NFTA_SET_ELEM_DATA: data value of mapping (NLA_NESTED: nft_data_attributes)
+ * @NFTA_SET_ELEM_FLAGS: bitmask of nft_set_elem_flags (NLA_U32)
+ */
+enum nft_set_elem_attributes {
+ NFTA_SET_ELEM_UNSPEC,
+ NFTA_SET_ELEM_KEY,
+ NFTA_SET_ELEM_DATA,
+ NFTA_SET_ELEM_FLAGS,
+ __NFTA_SET_ELEM_MAX
+};
+#define NFTA_SET_ELEM_MAX (__NFTA_SET_ELEM_MAX - 1)
+
+/**
+ * enum nft_set_elem_list_attributes - nf_tables set element list netlink attributes
+ *
+ * @NFTA_SET_ELEM_LIST_TABLE: table of the set to be changed (NLA_STRING)
+ * @NFTA_SET_ELEM_LIST_SET: name of the set to be changed (NLA_STRING)
+ * @NFTA_SET_ELEM_LIST_ELEMENTS: list of set elements (NLA_NESTED: nft_set_elem_attributes)
+ */
+enum nft_set_elem_list_attributes {
+ NFTA_SET_ELEM_LIST_UNSPEC,
+ NFTA_SET_ELEM_LIST_TABLE,
+ NFTA_SET_ELEM_LIST_SET,
+ NFTA_SET_ELEM_LIST_ELEMENTS,
+ __NFTA_SET_ELEM_LIST_MAX
+};
+#define NFTA_SET_ELEM_LIST_MAX (__NFTA_SET_ELEM_LIST_MAX - 1)
+
+/**
+ * enum nft_data_types - nf_tables data types
+ *
+ * @NFT_DATA_VALUE: generic data
+ * @NFT_DATA_VERDICT: netfilter verdict
+ *
+ * The type of data is usually determined by the kernel directly and is not
+ * explicitly specified by userspace. The only difference are sets, where
+ * userspace specifies the key and mapping data types.
+ *
+ * The values 0xffffff00-0xffffffff are reserved for internally used types.
+ * The remaining range can be freely used by userspace to encode types, all
+ * values are equivalent to NFT_DATA_VALUE.
+ */
+enum nft_data_types {
+ NFT_DATA_VALUE,
+ NFT_DATA_VERDICT = 0xffffff00U,
+};
+
+#define NFT_DATA_RESERVED_MASK 0xffffff00U
+
+/**
+ * enum nft_data_attributes - nf_tables data netlink attributes
+ *
+ * @NFTA_DATA_VALUE: generic data (NLA_BINARY)
+ * @NFTA_DATA_VERDICT: nf_tables verdict (NLA_NESTED: nft_verdict_attributes)
+ */
+enum nft_data_attributes {
+ NFTA_DATA_UNSPEC,
+ NFTA_DATA_VALUE,
+ NFTA_DATA_VERDICT,
+ __NFTA_DATA_MAX
+};
+#define NFTA_DATA_MAX (__NFTA_DATA_MAX - 1)
+
+/**
+ * enum nft_verdict_attributes - nf_tables verdict netlink attributes
+ *
+ * @NFTA_VERDICT_CODE: nf_tables verdict (NLA_U32: enum nft_verdicts)
+ * @NFTA_VERDICT_CHAIN: jump target chain name (NLA_STRING)
+ */
+enum nft_verdict_attributes {
+ NFTA_VERDICT_UNSPEC,
+ NFTA_VERDICT_CODE,
+ NFTA_VERDICT_CHAIN,
+ __NFTA_VERDICT_MAX
+};
+#define NFTA_VERDICT_MAX (__NFTA_VERDICT_MAX - 1)
+
+/**
+ * enum nft_expr_attributes - nf_tables expression netlink attributes
+ *
+ * @NFTA_EXPR_NAME: name of the expression type (NLA_STRING)
+ * @NFTA_EXPR_DATA: type specific data (NLA_NESTED)
+ */
+enum nft_expr_attributes {
+ NFTA_EXPR_UNSPEC,
+ NFTA_EXPR_NAME,
+ NFTA_EXPR_DATA,
+ __NFTA_EXPR_MAX
+};
+#define NFTA_EXPR_MAX (__NFTA_EXPR_MAX - 1)
+
+/**
+ * enum nft_immediate_attributes - nf_tables immediate expression netlink attributes
+ *
+ * @NFTA_IMMEDIATE_DREG: destination register to load data into (NLA_U32)
+ * @NFTA_IMMEDIATE_DATA: data to load (NLA_NESTED: nft_data_attributes)
+ */
+enum nft_immediate_attributes {
+ NFTA_IMMEDIATE_UNSPEC,
+ NFTA_IMMEDIATE_DREG,
+ NFTA_IMMEDIATE_DATA,
+ __NFTA_IMMEDIATE_MAX
+};
+#define NFTA_IMMEDIATE_MAX (__NFTA_IMMEDIATE_MAX - 1)
+
+/**
+ * enum nft_bitwise_attributes - nf_tables bitwise expression netlink attributes
+ *
+ * @NFTA_BITWISE_SREG: source register (NLA_U32: nft_registers)
+ * @NFTA_BITWISE_DREG: destination register (NLA_U32: nft_registers)
+ * @NFTA_BITWISE_LEN: length of operands (NLA_U32)
+ * @NFTA_BITWISE_MASK: mask value (NLA_NESTED: nft_data_attributes)
+ * @NFTA_BITWISE_XOR: xor value (NLA_NESTED: nft_data_attributes)
+ *
+ * The bitwise expression performs the following operation:
+ *
+ * dreg = (sreg & mask) ^ xor
+ *
+ * which allow to express all bitwise operations:
+ *
+ * mask xor
+ * NOT: 1 1
+ * OR: 0 x
+ * XOR: 1 x
+ * AND: x 0
+ */
+enum nft_bitwise_attributes {
+ NFTA_BITWISE_UNSPEC,
+ NFTA_BITWISE_SREG,
+ NFTA_BITWISE_DREG,
+ NFTA_BITWISE_LEN,
+ NFTA_BITWISE_MASK,
+ NFTA_BITWISE_XOR,
+ __NFTA_BITWISE_MAX
+};
+#define NFTA_BITWISE_MAX (__NFTA_BITWISE_MAX - 1)
+
+/**
+ * enum nft_byteorder_ops - nf_tables byteorder operators
+ *
+ * @NFT_BYTEORDER_NTOH: network to host operator
+ * @NFT_BYTEORDER_HTON: host to network opertaor
+ */
+enum nft_byteorder_ops {
+ NFT_BYTEORDER_NTOH,
+ NFT_BYTEORDER_HTON,
+};
+
+/**
+ * enum nft_byteorder_attributes - nf_tables byteorder expression netlink attributes
+ *
+ * @NFTA_BYTEORDER_SREG: source register (NLA_U32: nft_registers)
+ * @NFTA_BYTEORDER_DREG: destination register (NLA_U32: nft_registers)
+ * @NFTA_BYTEORDER_OP: operator (NLA_U32: enum nft_byteorder_ops)
+ * @NFTA_BYTEORDER_LEN: length of the data (NLA_U32)
+ * @NFTA_BYTEORDER_SIZE: data size in bytes (NLA_U32: 2 or 4)
+ */
+enum nft_byteorder_attributes {
+ NFTA_BYTEORDER_UNSPEC,
+ NFTA_BYTEORDER_SREG,
+ NFTA_BYTEORDER_DREG,
+ NFTA_BYTEORDER_OP,
+ NFTA_BYTEORDER_LEN,
+ NFTA_BYTEORDER_SIZE,
+ __NFTA_BYTEORDER_MAX
+};
+#define NFTA_BYTEORDER_MAX (__NFTA_BYTEORDER_MAX - 1)
+
+/**
+ * enum nft_cmp_ops - nf_tables relational operator
+ *
+ * @NFT_CMP_EQ: equal
+ * @NFT_CMP_NEQ: not equal
+ * @NFT_CMP_LT: less than
+ * @NFT_CMP_LTE: less than or equal to
+ * @NFT_CMP_GT: greater than
+ * @NFT_CMP_GTE: greater than or equal to
+ */
+enum nft_cmp_ops {
+ NFT_CMP_EQ,
+ NFT_CMP_NEQ,
+ NFT_CMP_LT,
+ NFT_CMP_LTE,
+ NFT_CMP_GT,
+ NFT_CMP_GTE,
+};
+
+/**
+ * enum nft_cmp_attributes - nf_tables cmp expression netlink attributes
+ *
+ * @NFTA_CMP_SREG: source register of data to compare (NLA_U32: nft_registers)
+ * @NFTA_CMP_OP: cmp operation (NLA_U32: nft_cmp_ops)
+ * @NFTA_CMP_DATA: data to compare against (NLA_NESTED: nft_data_attributes)
+ */
+enum nft_cmp_attributes {
+ NFTA_CMP_UNSPEC,
+ NFTA_CMP_SREG,
+ NFTA_CMP_OP,
+ NFTA_CMP_DATA,
+ __NFTA_CMP_MAX
+};
+#define NFTA_CMP_MAX (__NFTA_CMP_MAX - 1)
+
+/**
+ * enum nft_lookup_attributes - nf_tables set lookup expression netlink attributes
+ *
+ * @NFTA_LOOKUP_SET: name of the set where to look for (NLA_STRING)
+ * @NFTA_LOOKUP_SREG: source register of the data to look for (NLA_U32: nft_registers)
+ * @NFTA_LOOKUP_DREG: destination register (NLA_U32: nft_registers)
+ */
+enum nft_lookup_attributes {
+ NFTA_LOOKUP_UNSPEC,
+ NFTA_LOOKUP_SET,
+ NFTA_LOOKUP_SREG,
+ NFTA_LOOKUP_DREG,
+ __NFTA_LOOKUP_MAX
+};
+#define NFTA_LOOKUP_MAX (__NFTA_LOOKUP_MAX - 1)
+
+/**
+ * enum nft_payload_bases - nf_tables payload expression offset bases
+ *
+ * @NFT_PAYLOAD_LL_HEADER: link layer header
+ * @NFT_PAYLOAD_NETWORK_HEADER: network header
+ * @NFT_PAYLOAD_TRANSPORT_HEADER: transport header
+ */
+enum nft_payload_bases {
+ NFT_PAYLOAD_LL_HEADER,
+ NFT_PAYLOAD_NETWORK_HEADER,
+ NFT_PAYLOAD_TRANSPORT_HEADER,
+};
+
+/**
+ * enum nft_payload_attributes - nf_tables payload expression netlink attributes
+ *
+ * @NFTA_PAYLOAD_DREG: destination register to load data into (NLA_U32: nft_registers)
+ * @NFTA_PAYLOAD_BASE: payload base (NLA_U32: nft_payload_bases)
+ * @NFTA_PAYLOAD_OFFSET: payload offset relative to base (NLA_U32)
+ * @NFTA_PAYLOAD_LEN: payload length (NLA_U32)
+ */
+enum nft_payload_attributes {
+ NFTA_PAYLOAD_UNSPEC,
+ NFTA_PAYLOAD_DREG,
+ NFTA_PAYLOAD_BASE,
+ NFTA_PAYLOAD_OFFSET,
+ NFTA_PAYLOAD_LEN,
+ __NFTA_PAYLOAD_MAX
+};
+#define NFTA_PAYLOAD_MAX (__NFTA_PAYLOAD_MAX - 1)
+
+/**
+ * enum nft_exthdr_attributes - nf_tables IPv6 extension header expression netlink attributes
+ *
+ * @NFTA_EXTHDR_DREG: destination register (NLA_U32: nft_registers)
+ * @NFTA_EXTHDR_TYPE: extension header type (NLA_U8)
+ * @NFTA_EXTHDR_OFFSET: extension header offset (NLA_U32)
+ * @NFTA_EXTHDR_LEN: extension header length (NLA_U32)
+ */
+enum nft_exthdr_attributes {
+ NFTA_EXTHDR_UNSPEC,
+ NFTA_EXTHDR_DREG,
+ NFTA_EXTHDR_TYPE,
+ NFTA_EXTHDR_OFFSET,
+ NFTA_EXTHDR_LEN,
+ __NFTA_EXTHDR_MAX
+};
+#define NFTA_EXTHDR_MAX (__NFTA_EXTHDR_MAX - 1)
+
+/**
+ * enum nft_meta_keys - nf_tables meta expression keys
+ *
+ * @NFT_META_LEN: packet length (skb->len)
+ * @NFT_META_PROTOCOL: packet ethertype protocol (skb->protocol), invalid in OUTPUT
+ * @NFT_META_PRIORITY: packet priority (skb->priority)
+ * @NFT_META_MARK: packet mark (skb->mark)
+ * @NFT_META_IIF: packet input interface index (dev->ifindex)
+ * @NFT_META_OIF: packet output interface index (dev->ifindex)
+ * @NFT_META_IIFNAME: packet input interface name (dev->name)
+ * @NFT_META_OIFNAME: packet output interface name (dev->name)
+ * @NFT_META_IIFTYPE: packet input interface type (dev->type)
+ * @NFT_META_OIFTYPE: packet output interface type (dev->type)
+ * @NFT_META_SKUID: originating socket UID (fsuid)
+ * @NFT_META_SKGID: originating socket GID (fsgid)
+ * @NFT_META_NFTRACE: packet nftrace bit
+ * @NFT_META_RTCLASSID: realm value of packet's route (skb->dst->tclassid)
+ * @NFT_META_SECMARK: packet secmark (skb->secmark)
+ */
+enum nft_meta_keys {
+ NFT_META_LEN,
+ NFT_META_PROTOCOL,
+ NFT_META_PRIORITY,
+ NFT_META_MARK,
+ NFT_META_IIF,
+ NFT_META_OIF,
+ NFT_META_IIFNAME,
+ NFT_META_OIFNAME,
+ NFT_META_IIFTYPE,
+ NFT_META_OIFTYPE,
+ NFT_META_SKUID,
+ NFT_META_SKGID,
+ NFT_META_NFTRACE,
+ NFT_META_RTCLASSID,
+ NFT_META_SECMARK,
+};
+
+/**
+ * enum nft_meta_attributes - nf_tables meta expression netlink attributes
+ *
+ * @NFTA_META_DREG: destination register (NLA_U32)
+ * @NFTA_META_KEY: meta data item to load (NLA_U32: nft_meta_keys)
+ */
+enum nft_meta_attributes {
+ NFTA_META_UNSPEC,
+ NFTA_META_DREG,
+ NFTA_META_KEY,
+ __NFTA_META_MAX
+};
+#define NFTA_META_MAX (__NFTA_META_MAX - 1)
+
+/**
+ * enum nft_ct_keys - nf_tables ct expression keys
+ *
+ * @NFT_CT_STATE: conntrack state (bitmask of enum ip_conntrack_info)
+ * @NFT_CT_DIRECTION: conntrack direction (enum ip_conntrack_dir)
+ * @NFT_CT_STATUS: conntrack status (bitmask of enum ip_conntrack_status)
+ * @NFT_CT_MARK: conntrack mark value
+ * @NFT_CT_SECMARK: conntrack secmark value
+ * @NFT_CT_EXPIRATION: relative conntrack expiration time in ms
+ * @NFT_CT_HELPER: connection tracking helper assigned to conntrack
+ * @NFT_CT_L3PROTOCOL: conntrack layer 3 protocol
+ * @NFT_CT_SRC: conntrack layer 3 protocol source (IPv4/IPv6 address)
+ * @NFT_CT_DST: conntrack layer 3 protocol destination (IPv4/IPv6 address)
+ * @NFT_CT_PROTOCOL: conntrack layer 4 protocol
+ * @NFT_CT_PROTO_SRC: conntrack layer 4 protocol source
+ * @NFT_CT_PROTO_DST: conntrack layer 4 protocol destination
+ */
+enum nft_ct_keys {
+ NFT_CT_STATE,
+ NFT_CT_DIRECTION,
+ NFT_CT_STATUS,
+ NFT_CT_MARK,
+ NFT_CT_SECMARK,
+ NFT_CT_EXPIRATION,
+ NFT_CT_HELPER,
+ NFT_CT_L3PROTOCOL,
+ NFT_CT_SRC,
+ NFT_CT_DST,
+ NFT_CT_PROTOCOL,
+ NFT_CT_PROTO_SRC,
+ NFT_CT_PROTO_DST,
+};
+
+/**
+ * enum nft_ct_attributes - nf_tables ct expression netlink attributes
+ *
+ * @NFTA_CT_DREG: destination register (NLA_U32)
+ * @NFTA_CT_KEY: conntrack data item to load (NLA_U32: nft_ct_keys)
+ * @NFTA_CT_DIRECTION: direction in case of directional keys (NLA_U8)
+ */
+enum nft_ct_attributes {
+ NFTA_CT_UNSPEC,
+ NFTA_CT_DREG,
+ NFTA_CT_KEY,
+ NFTA_CT_DIRECTION,
+ __NFTA_CT_MAX
+};
+#define NFTA_CT_MAX (__NFTA_CT_MAX - 1)
+
+/**
+ * enum nft_limit_attributes - nf_tables limit expression netlink attributes
+ *
+ * @NFTA_LIMIT_RATE: refill rate (NLA_U64)
+ * @NFTA_LIMIT_UNIT: refill unit (NLA_U64)
+ */
+enum nft_limit_attributes {
+ NFTA_LIMIT_UNSPEC,
+ NFTA_LIMIT_RATE,
+ NFTA_LIMIT_UNIT,
+ __NFTA_LIMIT_MAX
+};
+#define NFTA_LIMIT_MAX (__NFTA_LIMIT_MAX - 1)
+
+/**
+ * enum nft_counter_attributes - nf_tables counter expression netlink attributes
+ *
+ * @NFTA_COUNTER_BYTES: number of bytes (NLA_U64)
+ * @NFTA_COUNTER_PACKETS: number of packets (NLA_U64)
+ */
+enum nft_counter_attributes {
+ NFTA_COUNTER_UNSPEC,
+ NFTA_COUNTER_BYTES,
+ NFTA_COUNTER_PACKETS,
+ __NFTA_COUNTER_MAX
+};
+#define NFTA_COUNTER_MAX (__NFTA_COUNTER_MAX - 1)
+
+/**
+ * enum nft_log_attributes - nf_tables log expression netlink attributes
+ *
+ * @NFTA_LOG_GROUP: netlink group to send messages to (NLA_U32)
+ * @NFTA_LOG_PREFIX: prefix to prepend to log messages (NLA_STRING)
+ * @NFTA_LOG_SNAPLEN: length of payload to include in netlink message (NLA_U32)
+ * @NFTA_LOG_QTHRESHOLD: queue threshold (NLA_U32)
+ */
+enum nft_log_attributes {
+ NFTA_LOG_UNSPEC,
+ NFTA_LOG_GROUP,
+ NFTA_LOG_PREFIX,
+ NFTA_LOG_SNAPLEN,
+ NFTA_LOG_QTHRESHOLD,
+ __NFTA_LOG_MAX
+};
+#define NFTA_LOG_MAX (__NFTA_LOG_MAX - 1)
+
+/**
+ * enum nft_reject_types - nf_tables reject expression reject types
+ *
+ * @NFT_REJECT_ICMP_UNREACH: reject using ICMP unreachable
+ * @NFT_REJECT_TCP_RST: reject using TCP RST
+ */
+enum nft_reject_types {
+ NFT_REJECT_ICMP_UNREACH,
+ NFT_REJECT_TCP_RST,
+};
+
+/**
+ * enum nft_reject_attributes - nf_tables reject expression netlink attributes
+ *
+ * @NFTA_REJECT_TYPE: packet type to use (NLA_U32: nft_reject_types)
+ * @NFTA_REJECT_ICMP_CODE: ICMP code to use (NLA_U8)
+ */
+enum nft_reject_attributes {
+ NFTA_REJECT_UNSPEC,
+ NFTA_REJECT_TYPE,
+ NFTA_REJECT_ICMP_CODE,
+ __NFTA_REJECT_MAX
+};
+#define NFTA_REJECT_MAX (__NFTA_REJECT_MAX - 1)
+
+/**
+ * enum nft_nat_types - nf_tables nat expression NAT types
+ *
+ * @NFT_NAT_SNAT: source NAT
+ * @NFT_NAT_DNAT: destination NAT
+ */
+enum nft_nat_types {
+ NFT_NAT_SNAT,
+ NFT_NAT_DNAT,
+};
+
+/**
+ * enum nft_nat_attributes - nf_tables nat expression netlink attributes
+ *
+ * @NFTA_NAT_TYPE: NAT type (NLA_U32: nft_nat_types)
+ * @NFTA_NAT_FAMILY: NAT family (NLA_U32)
+ * @NFTA_NAT_REG_ADDR_MIN: source register of address range start (NLA_U32: nft_registers)
+ * @NFTA_NAT_REG_ADDR_MAX: source register of address range end (NLA_U32: nft_registers)
+ * @NFTA_NAT_REG_PROTO_MIN: source register of proto range start (NLA_U32: nft_registers)
+ * @NFTA_NAT_REG_PROTO_MAX: source register of proto range end (NLA_U32: nft_registers)
+ */
+enum nft_nat_attributes {
+ NFTA_NAT_UNSPEC,
+ NFTA_NAT_TYPE,
+ NFTA_NAT_FAMILY,
+ NFTA_NAT_REG_ADDR_MIN,
+ NFTA_NAT_REG_ADDR_MAX,
+ NFTA_NAT_REG_PROTO_MIN,
+ NFTA_NAT_REG_PROTO_MAX,
+ __NFTA_NAT_MAX
+};
+#define NFTA_NAT_MAX (__NFTA_NAT_MAX - 1)
+
+#endif /* _LINUX_NF_TABLES_H */
diff --git a/include/uapi/linux/netfilter/nf_tables_compat.h b/include/uapi/linux/netfilter/nf_tables_compat.h
new file mode 100644
index 0000000..8310f5f
--- /dev/null
+++ b/include/uapi/linux/netfilter/nf_tables_compat.h
@@ -0,0 +1,38 @@
+#ifndef _NFT_COMPAT_NFNETLINK_H_
+#define _NFT_COMPAT_NFNETLINK_H_
+
+enum nft_target_attributes {
+ NFTA_TARGET_UNSPEC,
+ NFTA_TARGET_NAME,
+ NFTA_TARGET_REV,
+ NFTA_TARGET_INFO,
+ __NFTA_TARGET_MAX
+};
+#define NFTA_TARGET_MAX (__NFTA_TARGET_MAX - 1)
+
+enum nft_match_attributes {
+ NFTA_MATCH_UNSPEC,
+ NFTA_MATCH_NAME,
+ NFTA_MATCH_REV,
+ NFTA_MATCH_INFO,
+ __NFTA_MATCH_MAX
+};
+#define NFTA_MATCH_MAX (__NFTA_MATCH_MAX - 1)
+
+#define NFT_COMPAT_NAME_MAX 32
+
+enum {
+ NFNL_MSG_COMPAT_GET,
+ NFNL_MSG_COMPAT_MAX
+};
+
+enum {
+ NFTA_COMPAT_UNSPEC = 0,
+ NFTA_COMPAT_NAME,
+ NFTA_COMPAT_REV,
+ NFTA_COMPAT_TYPE,
+ __NFTA_COMPAT_MAX,
+};
+#define NFTA_COMPAT_MAX (__NFTA_COMPAT_MAX - 1)
+
+#endif
diff --git a/include/uapi/linux/netfilter/nfnetlink.h b/include/uapi/linux/netfilter/nfnetlink.h
index 4a4efaf..596ddd4 100644
--- a/include/uapi/linux/netfilter/nfnetlink.h
+++ b/include/uapi/linux/netfilter/nfnetlink.h
@@ -18,6 +18,8 @@
#define NFNLGRP_CONNTRACK_EXP_UPDATE NFNLGRP_CONNTRACK_EXP_UPDATE
NFNLGRP_CONNTRACK_EXP_DESTROY,
#define NFNLGRP_CONNTRACK_EXP_DESTROY NFNLGRP_CONNTRACK_EXP_DESTROY
+ NFNLGRP_NFTABLES,
+#define NFNLGRP_NFTABLES NFNLGRP_NFTABLES
__NFNLGRP_MAX,
};
#define NFNLGRP_MAX (__NFNLGRP_MAX - 1)
@@ -51,6 +53,12 @@
#define NFNL_SUBSYS_ACCT 7
#define NFNL_SUBSYS_CTNETLINK_TIMEOUT 8
#define NFNL_SUBSYS_CTHELPER 9
-#define NFNL_SUBSYS_COUNT 10
+#define NFNL_SUBSYS_NFTABLES 10
+#define NFNL_SUBSYS_NFT_COMPAT 11
+#define NFNL_SUBSYS_COUNT 12
+
+/* Reserved control nfnetlink messages */
+#define NFNL_MSG_BATCH_BEGIN NLMSG_MIN_TYPE
+#define NFNL_MSG_BATCH_END NLMSG_MIN_TYPE+1
#endif /* _UAPI_NFNETLINK_H */
diff --git a/include/xen/interface/io/netif.h b/include/xen/interface/io/netif.h
index eb262e3..c50061d 100644
--- a/include/xen/interface/io/netif.h
+++ b/include/xen/interface/io/netif.h
@@ -51,6 +51,20 @@
*/
/*
+ * "feature-no-csum-offload" should be used to turn IPv4 TCP/UDP checksum
+ * offload off or on. If it is missing then the feature is assumed to be on.
+ * "feature-ipv6-csum-offload" should be used to turn IPv6 TCP/UDP checksum
+ * offload on or off. If it is missing then the feature is assumed to be off.
+ */
+
+/*
+ * "feature-gso-tcpv4" and "feature-gso-tcpv6" advertise the capability to
+ * handle large TCP packets (in IPv4 or IPv6 form respectively). Neither
+ * frontends nor backends are assumed to be capable unless the flags are
+ * present.
+ */
+
+/*
* This is the 'wire' format for packets:
* Request 1: xen_netif_tx_request -- XEN_NETTXF_* (any flags)
* [Request 2: xen_netif_extra_info] (only if request 1 has XEN_NETTXF_extra_info)
@@ -95,8 +109,10 @@
#define _XEN_NETIF_EXTRA_FLAG_MORE (0)
#define XEN_NETIF_EXTRA_FLAG_MORE (1U<<_XEN_NETIF_EXTRA_FLAG_MORE)
-/* GSO types - only TCPv4 currently supported. */
+/* GSO types */
+#define XEN_NETIF_GSO_TYPE_NONE (0)
#define XEN_NETIF_GSO_TYPE_TCPV4 (1)
+#define XEN_NETIF_GSO_TYPE_TCPV6 (2)
/*
* This structure needs to fit within both netif_tx_request and
diff --git a/net/batman-adv/Makefile b/net/batman-adv/Makefile
index 8ddbfe66..4f4aabb 100644
--- a/net/batman-adv/Makefile
+++ b/net/batman-adv/Makefile
@@ -24,6 +24,7 @@
batman-adv-$(CONFIG_BATMAN_ADV_BLA) += bridge_loop_avoidance.o
batman-adv-y += debugfs.o
batman-adv-$(CONFIG_BATMAN_ADV_DAT) += distributed-arp-table.o
+batman-adv-y += fragmentation.o
batman-adv-y += gateway_client.o
batman-adv-y += gateway_common.o
batman-adv-y += hard-interface.o
@@ -37,4 +38,3 @@
batman-adv-y += soft-interface.o
batman-adv-y += sysfs.o
batman-adv-y += translation-table.o
-batman-adv-y += unicast.o
diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index 264de88..5bb58d7 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -863,25 +863,25 @@
struct arphdr *arphdr;
uint8_t *hw_src, *hw_dst;
struct batadv_bla_claim_dst *bla_dst;
- uint16_t proto;
+ __be16 proto;
int headlen;
unsigned short vid = BATADV_NO_FLAGS;
int ret;
ethhdr = eth_hdr(skb);
- if (ntohs(ethhdr->h_proto) == ETH_P_8021Q) {
+ if (ethhdr->h_proto == htons(ETH_P_8021Q)) {
vhdr = (struct vlan_ethhdr *)ethhdr;
vid = ntohs(vhdr->h_vlan_TCI) & VLAN_VID_MASK;
vid |= BATADV_VLAN_HAS_TAG;
- proto = ntohs(vhdr->h_vlan_encapsulated_proto);
+ proto = vhdr->h_vlan_encapsulated_proto;
headlen = sizeof(*vhdr);
} else {
- proto = ntohs(ethhdr->h_proto);
+ proto = ethhdr->h_proto;
headlen = ETH_HLEN;
}
- if (proto != ETH_P_ARP)
+ if (proto != htons(ETH_P_ARP))
return 0; /* not a claim frame */
/* this must be a ARP frame. check if it is a claim. */
@@ -1379,8 +1379,8 @@
ethhdr = (struct ethhdr *)(((uint8_t *)skb->data) + hdr_size);
- if (ntohs(ethhdr->h_proto) == ETH_P_8021Q) {
- if (!pskb_may_pull(skb, hdr_size + sizeof(struct vlan_ethhdr)))
+ if (ethhdr->h_proto == htons(ETH_P_8021Q)) {
+ if (!pskb_may_pull(skb, hdr_size + VLAN_ETH_HLEN))
return 0;
vhdr = (struct vlan_ethhdr *)(skb->data + hdr_size);
diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index f07ec32..99da412 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -29,7 +29,6 @@
#include "send.h"
#include "types.h"
#include "translation-table.h"
-#include "unicast.h"
static void batadv_dat_purge(struct work_struct *work);
@@ -592,9 +591,9 @@
goto free_orig;
tmp_skb = pskb_copy(skb, GFP_ATOMIC);
- if (!batadv_unicast_4addr_prepare_skb(bat_priv, tmp_skb,
- cand[i].orig_node,
- packet_subtype)) {
+ if (!batadv_send_skb_prepare_unicast_4addr(bat_priv, tmp_skb,
+ cand[i].orig_node,
+ packet_subtype)) {
kfree_skb(tmp_skb);
goto free_neigh;
}
@@ -990,10 +989,10 @@
* that a node not using the 4addr packet format doesn't support it.
*/
if (hdr_size == sizeof(struct batadv_unicast_4addr_packet))
- err = batadv_unicast_4addr_send_skb(bat_priv, skb_new,
+ err = batadv_send_skb_unicast_4addr(bat_priv, skb_new,
BATADV_P_DAT_CACHE_REPLY);
else
- err = batadv_unicast_send_skb(bat_priv, skb_new);
+ err = batadv_send_skb_unicast(bat_priv, skb_new);
if (!err) {
batadv_inc_counter(bat_priv, BATADV_CNT_DAT_CACHED_REPLY_TX);
diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c
new file mode 100644
index 0000000..271d321
--- /dev/null
+++ b/net/batman-adv/fragmentation.c
@@ -0,0 +1,491 @@
+/* Copyright (C) 2013 B.A.T.M.A.N. contributors:
+ *
+ * Martin Hundebøll <martin@hundeboll.net>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#include "main.h"
+#include "fragmentation.h"
+#include "send.h"
+#include "originator.h"
+#include "routing.h"
+#include "hard-interface.h"
+#include "soft-interface.h"
+
+
+/**
+ * batadv_frag_clear_chain - delete entries in the fragment buffer chain
+ * @head: head of chain with entries.
+ *
+ * Free fragments in the passed hlist. Should be called with appropriate lock.
+ */
+static void batadv_frag_clear_chain(struct hlist_head *head)
+{
+ struct batadv_frag_list_entry *entry;
+ struct hlist_node *node;
+
+ hlist_for_each_entry_safe(entry, node, head, list) {
+ hlist_del(&entry->list);
+ kfree_skb(entry->skb);
+ kfree(entry);
+ }
+}
+
+/**
+ * batadv_frag_purge_orig - free fragments associated to an orig
+ * @orig_node: originator to free fragments from
+ * @check_cb: optional function to tell if an entry should be purged
+ */
+void batadv_frag_purge_orig(struct batadv_orig_node *orig_node,
+ bool (*check_cb)(struct batadv_frag_table_entry *))
+{
+ struct batadv_frag_table_entry *chain;
+ uint8_t i;
+
+ for (i = 0; i < BATADV_FRAG_BUFFER_COUNT; i++) {
+ chain = &orig_node->fragments[i];
+ spin_lock_bh(&orig_node->fragments[i].lock);
+
+ if (!check_cb || check_cb(chain)) {
+ batadv_frag_clear_chain(&orig_node->fragments[i].head);
+ orig_node->fragments[i].size = 0;
+ }
+
+ spin_unlock_bh(&orig_node->fragments[i].lock);
+ }
+}
+
+/**
+ * batadv_frag_size_limit - maximum possible size of packet to be fragmented
+ *
+ * Returns the maximum size of payload that can be fragmented.
+ */
+static int batadv_frag_size_limit(void)
+{
+ int limit = BATADV_FRAG_MAX_FRAG_SIZE;
+
+ limit -= sizeof(struct batadv_frag_packet);
+ limit *= BATADV_FRAG_MAX_FRAGMENTS;
+
+ return limit;
+}
+
+/**
+ * batadv_frag_init_chain - check and prepare fragment chain for new fragment
+ * @chain: chain in fragments table to init
+ * @seqno: sequence number of the received fragment
+ *
+ * Make chain ready for a fragment with sequence number "seqno". Delete existing
+ * entries if they have an "old" sequence number.
+ *
+ * Caller must hold chain->lock.
+ *
+ * Returns true if chain is empty and caller can just insert the new fragment
+ * without searching for the right position.
+ */
+static bool batadv_frag_init_chain(struct batadv_frag_table_entry *chain,
+ uint16_t seqno)
+{
+ if (chain->seqno == seqno)
+ return false;
+
+ if (!hlist_empty(&chain->head))
+ batadv_frag_clear_chain(&chain->head);
+
+ chain->size = 0;
+ chain->seqno = seqno;
+
+ return true;
+}
+
+/**
+ * batadv_frag_insert_packet - insert a fragment into a fragment chain
+ * @orig_node: originator that the fragment was received from
+ * @skb: skb to insert
+ * @chain_out: list head to attach complete chains of fragments to
+ *
+ * Insert a new fragment into the reverse ordered chain in the right table
+ * entry. The hash table entry is cleared if "old" fragments exist in it.
+ *
+ * Returns true if skb is buffered, false on error. If the chain has all the
+ * fragments needed to merge the packet, the chain is moved to the passed head
+ * to avoid locking the chain in the table.
+ */
+static bool batadv_frag_insert_packet(struct batadv_orig_node *orig_node,
+ struct sk_buff *skb,
+ struct hlist_head *chain_out)
+{
+ struct batadv_frag_table_entry *chain;
+ struct batadv_frag_list_entry *frag_entry_new = NULL, *frag_entry_curr;
+ struct batadv_frag_packet *frag_packet;
+ uint8_t bucket;
+ uint16_t seqno, hdr_size = sizeof(struct batadv_frag_packet);
+ bool ret = false;
+
+ /* Linearize packet to avoid linearizing 16 packets in a row when doing
+ * the later merge. Non-linear merge should be added to remove this
+ * linearization.
+ */
+ if (skb_linearize(skb) < 0)
+ goto err;
+
+ frag_packet = (struct batadv_frag_packet *)skb->data;
+ seqno = ntohs(frag_packet->seqno);
+ bucket = seqno % BATADV_FRAG_BUFFER_COUNT;
+
+ frag_entry_new = kmalloc(sizeof(*frag_entry_new), GFP_ATOMIC);
+ if (!frag_entry_new)
+ goto err;
+
+ frag_entry_new->skb = skb;
+ frag_entry_new->no = frag_packet->no;
+
+ /* Select entry in the "chain table" and delete any prior fragments
+ * with another sequence number. batadv_frag_init_chain() returns true,
+ * if the list is empty at return.
+ */
+ chain = &orig_node->fragments[bucket];
+ spin_lock_bh(&chain->lock);
+ if (batadv_frag_init_chain(chain, seqno)) {
+ hlist_add_head(&frag_entry_new->list, &chain->head);
+ chain->size = skb->len - hdr_size;
+ chain->timestamp = jiffies;
+ ret = true;
+ goto out;
+ }
+
+ /* Find the position for the new fragment. */
+ hlist_for_each_entry(frag_entry_curr, &chain->head, list) {
+ /* Drop packet if fragment already exists. */
+ if (frag_entry_curr->no == frag_entry_new->no)
+ goto err_unlock;
+
+ /* Order fragments from highest to lowest. */
+ if (frag_entry_curr->no < frag_entry_new->no) {
+ hlist_add_before(&frag_entry_new->list,
+ &frag_entry_curr->list);
+ chain->size += skb->len - hdr_size;
+ chain->timestamp = jiffies;
+ ret = true;
+ goto out;
+ }
+ }
+
+ /* Reached the end of the list, so insert after 'frag_entry_curr'. */
+ if (likely(frag_entry_curr)) {
+ hlist_add_after(&frag_entry_curr->list, &frag_entry_new->list);
+ chain->size += skb->len - hdr_size;
+ chain->timestamp = jiffies;
+ ret = true;
+ }
+
+out:
+ if (chain->size > batadv_frag_size_limit() ||
+ ntohs(frag_packet->total_size) > batadv_frag_size_limit()) {
+ /* Clear chain if total size of either the list or the packet
+ * exceeds the maximum size of one merged packet.
+ */
+ batadv_frag_clear_chain(&chain->head);
+ chain->size = 0;
+ } else if (ntohs(frag_packet->total_size) == chain->size) {
+ /* All fragments received. Hand over chain to caller. */
+ hlist_move_list(&chain->head, chain_out);
+ chain->size = 0;
+ }
+
+err_unlock:
+ spin_unlock_bh(&chain->lock);
+
+err:
+ if (!ret)
+ kfree(frag_entry_new);
+
+ return ret;
+}
+
+/**
+ * batadv_frag_merge_packets - merge a chain of fragments
+ * @chain: head of chain with fragments
+ * @skb: packet with total size of skb after merging
+ *
+ * Expand the first skb in the chain and copy the content of the remaining
+ * skb's into the expanded one. After doing so, clear the chain.
+ *
+ * Returns the merged skb or NULL on error.
+ */
+static struct sk_buff *
+batadv_frag_merge_packets(struct hlist_head *chain, struct sk_buff *skb)
+{
+ struct batadv_frag_packet *packet;
+ struct batadv_frag_list_entry *entry;
+ struct sk_buff *skb_out = NULL;
+ int size, hdr_size = sizeof(struct batadv_frag_packet);
+
+ /* Make sure incoming skb has non-bogus data. */
+ packet = (struct batadv_frag_packet *)skb->data;
+ size = ntohs(packet->total_size);
+ if (size > batadv_frag_size_limit())
+ goto free;
+
+ /* Remove first entry, as this is the destination for the rest of the
+ * fragments.
+ */
+ entry = hlist_entry(chain->first, struct batadv_frag_list_entry, list);
+ hlist_del(&entry->list);
+ skb_out = entry->skb;
+ kfree(entry);
+
+ /* Make room for the rest of the fragments. */
+ if (pskb_expand_head(skb_out, 0, size - skb->len, GFP_ATOMIC) < 0) {
+ kfree_skb(skb_out);
+ skb_out = NULL;
+ goto free;
+ }
+
+ /* Move the existing MAC header to just before the payload. (Override
+ * the fragment header.)
+ */
+ skb_pull_rcsum(skb_out, hdr_size);
+ memmove(skb_out->data - ETH_HLEN, skb_mac_header(skb_out), ETH_HLEN);
+ skb_set_mac_header(skb_out, -ETH_HLEN);
+ skb_reset_network_header(skb_out);
+ skb_reset_transport_header(skb_out);
+
+ /* Copy the payload of the each fragment into the last skb */
+ hlist_for_each_entry(entry, chain, list) {
+ size = entry->skb->len - hdr_size;
+ memcpy(skb_put(skb_out, size), entry->skb->data + hdr_size,
+ size);
+ }
+
+free:
+ /* Locking is not needed, because 'chain' is not part of any orig. */
+ batadv_frag_clear_chain(chain);
+ return skb_out;
+}
+
+/**
+ * batadv_frag_skb_buffer - buffer fragment for later merge
+ * @skb: skb to buffer
+ * @orig_node_src: originator that the skb is received from
+ *
+ * Add fragment to buffer and merge fragments if possible.
+ *
+ * There are three possible outcomes: 1) Packet is merged: Return true and
+ * set *skb to merged packet; 2) Packet is buffered: Return true and set *skb
+ * to NULL; 3) Error: Return false and leave skb as is.
+ */
+bool batadv_frag_skb_buffer(struct sk_buff **skb,
+ struct batadv_orig_node *orig_node_src)
+{
+ struct sk_buff *skb_out = NULL;
+ struct hlist_head head = HLIST_HEAD_INIT;
+ bool ret = false;
+
+ /* Add packet to buffer and table entry if merge is possible. */
+ if (!batadv_frag_insert_packet(orig_node_src, *skb, &head))
+ goto out_err;
+
+ /* Leave if more fragments are needed to merge. */
+ if (hlist_empty(&head))
+ goto out;
+
+ skb_out = batadv_frag_merge_packets(&head, *skb);
+ if (!skb_out)
+ goto out_err;
+
+out:
+ *skb = skb_out;
+ ret = true;
+out_err:
+ return ret;
+}
+
+/**
+ * batadv_frag_skb_fwd - forward fragments that would exceed MTU when merged
+ * @skb: skb to forward
+ * @recv_if: interface that the skb is received on
+ * @orig_node_src: originator that the skb is received from
+ *
+ * Look up the next-hop of the fragments payload and check if the merged packet
+ * will exceed the MTU towards the next-hop. If so, the fragment is forwarded
+ * without merging it.
+ *
+ * Returns true if the fragment is consumed/forwarded, false otherwise.
+ */
+bool batadv_frag_skb_fwd(struct sk_buff *skb,
+ struct batadv_hard_iface *recv_if,
+ struct batadv_orig_node *orig_node_src)
+{
+ struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface);
+ struct batadv_orig_node *orig_node_dst = NULL;
+ struct batadv_neigh_node *neigh_node = NULL;
+ struct batadv_frag_packet *packet;
+ uint16_t total_size;
+ bool ret = false;
+
+ packet = (struct batadv_frag_packet *)skb->data;
+ orig_node_dst = batadv_orig_hash_find(bat_priv, packet->dest);
+ if (!orig_node_dst)
+ goto out;
+
+ neigh_node = batadv_find_router(bat_priv, orig_node_dst, recv_if);
+ if (!neigh_node)
+ goto out;
+
+ /* Forward the fragment, if the merged packet would be too big to
+ * be assembled.
+ */
+ total_size = ntohs(packet->total_size);
+ if (total_size > neigh_node->if_incoming->net_dev->mtu) {
+ batadv_inc_counter(bat_priv, BATADV_CNT_FRAG_FWD);
+ batadv_add_counter(bat_priv, BATADV_CNT_FRAG_FWD_BYTES,
+ skb->len + ETH_HLEN);
+
+ packet->header.ttl--;
+ batadv_send_skb_packet(skb, neigh_node->if_incoming,
+ neigh_node->addr);
+ ret = true;
+ }
+
+out:
+ if (orig_node_dst)
+ batadv_orig_node_free_ref(orig_node_dst);
+ if (neigh_node)
+ batadv_neigh_node_free_ref(neigh_node);
+ return ret;
+}
+
+/**
+ * batadv_frag_create - create a fragment from skb
+ * @skb: skb to create fragment from
+ * @frag_head: header to use in new fragment
+ * @mtu: size of new fragment
+ *
+ * Split the passed skb into two fragments: A new one with size matching the
+ * passed mtu and the old one with the rest. The new skb contains data from the
+ * tail of the old skb.
+ *
+ * Returns the new fragment, NULL on error.
+ */
+static struct sk_buff *batadv_frag_create(struct sk_buff *skb,
+ struct batadv_frag_packet *frag_head,
+ unsigned int mtu)
+{
+ struct sk_buff *skb_fragment;
+ unsigned header_size = sizeof(*frag_head);
+ unsigned fragment_size = mtu - header_size;
+
+ skb_fragment = netdev_alloc_skb(NULL, mtu + ETH_HLEN);
+ if (!skb_fragment)
+ goto err;
+
+ skb->priority = TC_PRIO_CONTROL;
+
+ /* Eat the last mtu-bytes of the skb */
+ skb_reserve(skb_fragment, header_size + ETH_HLEN);
+ skb_split(skb, skb_fragment, skb->len - fragment_size);
+
+ /* Add the header */
+ skb_push(skb_fragment, header_size);
+ memcpy(skb_fragment->data, frag_head, header_size);
+
+err:
+ return skb_fragment;
+}
+
+/**
+ * batadv_frag_send_packet - create up to 16 fragments from the passed skb
+ * @skb: skb to create fragments from
+ * @orig_node: final destination of the created fragments
+ * @neigh_node: next-hop of the created fragments
+ *
+ * Returns true on success, false otherwise.
+ */
+bool batadv_frag_send_packet(struct sk_buff *skb,
+ struct batadv_orig_node *orig_node,
+ struct batadv_neigh_node *neigh_node)
+{
+ struct batadv_priv *bat_priv;
+ struct batadv_hard_iface *primary_if;
+ struct batadv_frag_packet frag_header;
+ struct sk_buff *skb_fragment;
+ unsigned mtu = neigh_node->if_incoming->net_dev->mtu;
+ unsigned header_size = sizeof(frag_header);
+ unsigned max_fragment_size, max_packet_size;
+
+ /* To avoid merge and refragmentation at next-hops we never send
+ * fragments larger than BATADV_FRAG_MAX_FRAG_SIZE
+ */
+ mtu = min_t(unsigned, mtu, BATADV_FRAG_MAX_FRAG_SIZE);
+ max_fragment_size = (mtu - header_size - ETH_HLEN);
+ max_packet_size = max_fragment_size * BATADV_FRAG_MAX_FRAGMENTS;
+
+ /* Don't even try to fragment, if we need more than 16 fragments */
+ if (skb->len > max_packet_size)
+ goto out_err;
+
+ bat_priv = orig_node->bat_priv;
+ primary_if = batadv_primary_if_get_selected(bat_priv);
+ if (!primary_if)
+ goto out_err;
+
+ /* Create one header to be copied to all fragments */
+ frag_header.header.packet_type = BATADV_UNICAST_FRAG;
+ frag_header.header.version = BATADV_COMPAT_VERSION;
+ frag_header.header.ttl = BATADV_TTL;
+ frag_header.seqno = htons(atomic_inc_return(&bat_priv->frag_seqno));
+ frag_header.reserved = 0;
+ frag_header.no = 0;
+ frag_header.total_size = htons(skb->len);
+ memcpy(frag_header.orig, primary_if->net_dev->dev_addr, ETH_ALEN);
+ memcpy(frag_header.dest, orig_node->orig, ETH_ALEN);
+
+ /* Eat and send fragments from the tail of skb */
+ while (skb->len > max_fragment_size) {
+ skb_fragment = batadv_frag_create(skb, &frag_header, mtu);
+ if (!skb_fragment)
+ goto out_err;
+
+ batadv_inc_counter(bat_priv, BATADV_CNT_FRAG_TX);
+ batadv_add_counter(bat_priv, BATADV_CNT_FRAG_TX_BYTES,
+ skb_fragment->len + ETH_HLEN);
+ batadv_send_skb_packet(skb_fragment, neigh_node->if_incoming,
+ neigh_node->addr);
+ frag_header.no++;
+
+ /* The initial check in this function should cover this case */
+ if (frag_header.no == BATADV_FRAG_MAX_FRAGMENTS - 1)
+ goto out_err;
+ }
+
+ /* Make room for the fragment header. */
+ if (batadv_skb_head_push(skb, header_size) < 0 ||
+ pskb_expand_head(skb, header_size + ETH_HLEN, 0, GFP_ATOMIC) < 0)
+ goto out_err;
+
+ memcpy(skb->data, &frag_header, header_size);
+
+ /* Send the last fragment */
+ batadv_inc_counter(bat_priv, BATADV_CNT_FRAG_TX);
+ batadv_add_counter(bat_priv, BATADV_CNT_FRAG_TX_BYTES,
+ skb->len + ETH_HLEN);
+ batadv_send_skb_packet(skb, neigh_node->if_incoming, neigh_node->addr);
+
+ return true;
+out_err:
+ return false;
+}
diff --git a/net/batman-adv/fragmentation.h b/net/batman-adv/fragmentation.h
new file mode 100644
index 0000000..ca029e26
--- /dev/null
+++ b/net/batman-adv/fragmentation.h
@@ -0,0 +1,50 @@
+/* Copyright (C) 2013 B.A.T.M.A.N. contributors:
+ *
+ * Martin Hundebøll <martin@hundeboll.net>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#ifndef _NET_BATMAN_ADV_FRAGMENTATION_H_
+#define _NET_BATMAN_ADV_FRAGMENTATION_H_
+
+void batadv_frag_purge_orig(struct batadv_orig_node *orig,
+ bool (*check_cb)(struct batadv_frag_table_entry *));
+bool batadv_frag_skb_fwd(struct sk_buff *skb,
+ struct batadv_hard_iface *recv_if,
+ struct batadv_orig_node *orig_node_src);
+bool batadv_frag_skb_buffer(struct sk_buff **skb,
+ struct batadv_orig_node *orig_node);
+bool batadv_frag_send_packet(struct sk_buff *skb,
+ struct batadv_orig_node *orig_node,
+ struct batadv_neigh_node *neigh_node);
+
+/**
+ * batadv_frag_check_entry - check if a list of fragments has timed out
+ * @frags_entry: table entry to check
+ *
+ * Returns true if the frags entry has timed out, false otherwise.
+ */
+static inline bool
+batadv_frag_check_entry(struct batadv_frag_table_entry *frags_entry)
+{
+ if (!hlist_empty(&frags_entry->head) &&
+ batadv_has_timed_out(frags_entry->timestamp, BATADV_FRAG_TIMEOUT))
+ return true;
+ else
+ return false;
+}
+
+#endif /* _NET_BATMAN_ADV_FRAGMENTATION_H_ */
diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index 1bce63aa..053bb31 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -655,24 +655,29 @@
struct iphdr *iphdr;
struct ipv6hdr *ipv6hdr;
struct udphdr *udphdr;
+ struct vlan_ethhdr *vhdr;
+ __be16 proto;
/* check for ethernet header */
if (!pskb_may_pull(skb, *header_len + ETH_HLEN))
return false;
ethhdr = (struct ethhdr *)skb->data;
+ proto = ethhdr->h_proto;
*header_len += ETH_HLEN;
/* check for initial vlan header */
- if (ntohs(ethhdr->h_proto) == ETH_P_8021Q) {
+ if (proto == htons(ETH_P_8021Q)) {
if (!pskb_may_pull(skb, *header_len + VLAN_HLEN))
return false;
- ethhdr = (struct ethhdr *)(skb->data + VLAN_HLEN);
+
+ vhdr = (struct vlan_ethhdr *)skb->data;
+ proto = vhdr->h_vlan_encapsulated_proto;
*header_len += VLAN_HLEN;
}
/* check for ip header */
- switch (ntohs(ethhdr->h_proto)) {
- case ETH_P_IP:
+ switch (proto) {
+ case htons(ETH_P_IP):
if (!pskb_may_pull(skb, *header_len + sizeof(*iphdr)))
return false;
iphdr = (struct iphdr *)(skb->data + *header_len);
@@ -683,7 +688,7 @@
return false;
break;
- case ETH_P_IPV6:
+ case htons(ETH_P_IPV6):
if (!pskb_may_pull(skb, *header_len + sizeof(*ipv6hdr)))
return false;
ipv6hdr = (struct ipv6hdr *)(skb->data + *header_len);
@@ -710,12 +715,12 @@
*header_len += sizeof(*udphdr);
/* check for bootp port */
- if ((ntohs(ethhdr->h_proto) == ETH_P_IP) &&
- (ntohs(udphdr->dest) != 67))
+ if ((proto == htons(ETH_P_IP)) &&
+ (udphdr->dest != htons(67)))
return false;
- if ((ntohs(ethhdr->h_proto) == ETH_P_IPV6) &&
- (ntohs(udphdr->dest) != 547))
+ if ((proto == htons(ETH_P_IPV6)) &&
+ (udphdr->dest != htons(547)))
return false;
return true;
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index eeb6671..d564af2 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -269,9 +269,10 @@
const struct batadv_priv *bat_priv = netdev_priv(soft_iface);
const struct batadv_hard_iface *hard_iface;
/* allow big frames if all devices are capable to do so
- * (have MTU > 1500 + BAT_HEADER_LEN)
+ * (have MTU > 1500 + batadv_max_header_len())
*/
int min_mtu = ETH_DATA_LEN;
+ int max_header_len = batadv_max_header_len();
if (atomic_read(&bat_priv->fragmentation))
goto out;
@@ -285,8 +286,7 @@
if (hard_iface->soft_iface != soft_iface)
continue;
- min_mtu = min_t(int,
- hard_iface->net_dev->mtu - BATADV_HEADER_LEN,
+ min_mtu = min_t(int, hard_iface->net_dev->mtu - max_header_len,
min_mtu);
}
rcu_read_unlock();
@@ -379,7 +379,8 @@
{
struct batadv_priv *bat_priv;
struct net_device *soft_iface, *master;
- __be16 ethertype = __constant_htons(ETH_P_BATMAN);
+ __be16 ethertype = htons(ETH_P_BATMAN);
+ int max_header_len = batadv_max_header_len();
int ret;
if (hard_iface->if_status != BATADV_IF_NOT_IN_USE)
@@ -444,23 +445,22 @@
hard_iface->batman_adv_ptype.dev = hard_iface->net_dev;
dev_add_pack(&hard_iface->batman_adv_ptype);
- atomic_set(&hard_iface->frag_seqno, 1);
batadv_info(hard_iface->soft_iface, "Adding interface: %s\n",
hard_iface->net_dev->name);
if (atomic_read(&bat_priv->fragmentation) &&
- hard_iface->net_dev->mtu < ETH_DATA_LEN + BATADV_HEADER_LEN)
+ hard_iface->net_dev->mtu < ETH_DATA_LEN + max_header_len)
batadv_info(hard_iface->soft_iface,
- "The MTU of interface %s is too small (%i) to handle the transport of batman-adv packets. Packets going over this interface will be fragmented on layer2 which could impact the performance. Setting the MTU to %zi would solve the problem.\n",
+ "The MTU of interface %s is too small (%i) to handle the transport of batman-adv packets. Packets going over this interface will be fragmented on layer2 which could impact the performance. Setting the MTU to %i would solve the problem.\n",
hard_iface->net_dev->name, hard_iface->net_dev->mtu,
- ETH_DATA_LEN + BATADV_HEADER_LEN);
+ ETH_DATA_LEN + max_header_len);
if (!atomic_read(&bat_priv->fragmentation) &&
- hard_iface->net_dev->mtu < ETH_DATA_LEN + BATADV_HEADER_LEN)
+ hard_iface->net_dev->mtu < ETH_DATA_LEN + max_header_len)
batadv_info(hard_iface->soft_iface,
- "The MTU of interface %s is too small (%i) to handle the transport of batman-adv packets. If you experience problems getting traffic through try increasing the MTU to %zi.\n",
+ "The MTU of interface %s is too small (%i) to handle the transport of batman-adv packets. If you experience problems getting traffic through try increasing the MTU to %i.\n",
hard_iface->net_dev->name, hard_iface->net_dev->mtu,
- ETH_DATA_LEN + BATADV_HEADER_LEN);
+ ETH_DATA_LEN + max_header_len);
if (batadv_hardif_is_iface_up(hard_iface))
batadv_hardif_activate_interface(hard_iface);
diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c
index 5a99bb4..82ac647 100644
--- a/net/batman-adv/icmp_socket.c
+++ b/net/batman-adv/icmp_socket.c
@@ -192,25 +192,25 @@
goto free_skb;
}
- if (icmp_packet->header.packet_type != BATADV_ICMP) {
+ if (icmp_packet->icmph.header.packet_type != BATADV_ICMP) {
batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
"Error - can't send packet from char device: got bogus packet type (expected: BAT_ICMP)\n");
len = -EINVAL;
goto free_skb;
}
- if (icmp_packet->msg_type != BATADV_ECHO_REQUEST) {
+ if (icmp_packet->icmph.msg_type != BATADV_ECHO_REQUEST) {
batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
"Error - can't send packet from char device: got bogus message type (expected: ECHO_REQUEST)\n");
len = -EINVAL;
goto free_skb;
}
- icmp_packet->uid = socket_client->index;
+ icmp_packet->icmph.uid = socket_client->index;
- if (icmp_packet->header.version != BATADV_COMPAT_VERSION) {
- icmp_packet->msg_type = BATADV_PARAMETER_PROBLEM;
- icmp_packet->header.version = BATADV_COMPAT_VERSION;
+ if (icmp_packet->icmph.header.version != BATADV_COMPAT_VERSION) {
+ icmp_packet->icmph.msg_type = BATADV_PARAMETER_PROBLEM;
+ icmp_packet->icmph.header.version = BATADV_COMPAT_VERSION;
batadv_socket_add_packet(socket_client, icmp_packet,
packet_len);
goto free_skb;
@@ -219,7 +219,7 @@
if (atomic_read(&bat_priv->mesh_state) != BATADV_MESH_ACTIVE)
goto dst_unreach;
- orig_node = batadv_orig_hash_find(bat_priv, icmp_packet->dst);
+ orig_node = batadv_orig_hash_find(bat_priv, icmp_packet->icmph.dst);
if (!orig_node)
goto dst_unreach;
@@ -233,7 +233,7 @@
if (neigh_node->if_incoming->if_status != BATADV_IF_ACTIVE)
goto dst_unreach;
- memcpy(icmp_packet->orig,
+ memcpy(icmp_packet->icmph.orig,
primary_if->net_dev->dev_addr, ETH_ALEN);
if (packet_len == sizeof(struct batadv_icmp_packet_rr))
@@ -244,7 +244,7 @@
goto out;
dst_unreach:
- icmp_packet->msg_type = BATADV_DESTINATION_UNREACHABLE;
+ icmp_packet->icmph.msg_type = BATADV_DESTINATION_UNREACHABLE;
batadv_socket_add_packet(socket_client, icmp_packet, packet_len);
free_skb:
kfree_skb(skb);
@@ -318,7 +318,7 @@
/* while waiting for the lock the socket_client could have been
* deleted
*/
- if (!batadv_socket_client_hash[icmp_packet->uid]) {
+ if (!batadv_socket_client_hash[icmp_packet->icmph.uid]) {
spin_unlock_bh(&socket_client->lock);
kfree(socket_packet);
return;
@@ -347,7 +347,7 @@
{
struct batadv_socket_client *hash;
- hash = batadv_socket_client_hash[icmp_packet->uid];
+ hash = batadv_socket_client_hash[icmp_packet->icmph.uid];
if (hash)
batadv_socket_add_packet(hash, icmp_packet, icmp_len);
}
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index 8b195e6..7f3a5c4 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -36,11 +36,11 @@
#include "gateway_client.h"
#include "bridge_loop_avoidance.h"
#include "distributed-arp-table.h"
-#include "unicast.h"
#include "gateway_common.h"
#include "hash.h"
#include "bat_algo.h"
#include "network-coding.h"
+#include "fragmentation.h"
/* List manipulations on hardif_list have to be rtnl_lock()'ed,
@@ -256,6 +256,31 @@
}
/**
+ * batadv_max_header_len - calculate maximum encapsulation overhead for a
+ * payload packet
+ *
+ * Return the maximum encapsulation overhead in bytes.
+ */
+int batadv_max_header_len(void)
+{
+ int header_len = 0;
+
+ header_len = max_t(int, header_len,
+ sizeof(struct batadv_unicast_packet));
+ header_len = max_t(int, header_len,
+ sizeof(struct batadv_unicast_4addr_packet));
+ header_len = max_t(int, header_len,
+ sizeof(struct batadv_bcast_packet));
+
+#ifdef CONFIG_BATMAN_ADV_NC
+ header_len = max_t(int, header_len,
+ sizeof(struct batadv_coded_packet));
+#endif
+
+ return header_len;
+}
+
+/**
* batadv_skb_set_priority - sets skb priority according to packet content
* @skb: the packet to be sent
* @offset: offset to the packet content
@@ -399,10 +424,10 @@
/* compile time checks for struct member offsets */
BUILD_BUG_ON(offsetof(struct batadv_unicast_4addr_packet, src) != 10);
BUILD_BUG_ON(offsetof(struct batadv_unicast_packet, dest) != 4);
- BUILD_BUG_ON(offsetof(struct batadv_unicast_frag_packet, dest) != 4);
BUILD_BUG_ON(offsetof(struct batadv_unicast_tvlv_packet, dst) != 4);
- BUILD_BUG_ON(offsetof(struct batadv_icmp_packet, dst) != 4);
- BUILD_BUG_ON(offsetof(struct batadv_icmp_packet_rr, dst) != 4);
+ BUILD_BUG_ON(offsetof(struct batadv_frag_packet, dest) != 4);
+ BUILD_BUG_ON(offsetof(struct batadv_icmp_packet, icmph.dst) != 4);
+ BUILD_BUG_ON(offsetof(struct batadv_icmp_packet_rr, icmph.dst) != 4);
/* broadcast packet */
batadv_rx_handler[BATADV_BCAST] = batadv_recv_bcast_packet;
@@ -412,12 +437,12 @@
batadv_rx_handler[BATADV_UNICAST_4ADDR] = batadv_recv_unicast_packet;
/* unicast packet */
batadv_rx_handler[BATADV_UNICAST] = batadv_recv_unicast_packet;
- /* fragmented unicast packet */
- batadv_rx_handler[BATADV_UNICAST_FRAG] = batadv_recv_ucast_frag_packet;
/* unicast tvlv packet */
batadv_rx_handler[BATADV_UNICAST_TVLV] = batadv_recv_unicast_tvlv;
/* batman icmp packet */
batadv_rx_handler[BATADV_ICMP] = batadv_recv_icmp_packet;
+ /* Fragmented packets */
+ batadv_rx_handler[BATADV_UNICAST_FRAG] = batadv_recv_frag_packet;
}
int
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index e11c2ec..54c13d5 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -131,6 +131,15 @@
#define BATADV_GW_THRESHOLD 50
+/* Number of fragment chains for each orig_node */
+#define BATADV_FRAG_BUFFER_COUNT 8
+/* Maximum number of fragments for one packet */
+#define BATADV_FRAG_MAX_FRAGMENTS 16
+/* Maxumim size of each fragment */
+#define BATADV_FRAG_MAX_FRAG_SIZE 1400
+/* Time to keep fragments while waiting for rest of the fragments */
+#define BATADV_FRAG_TIMEOUT 10000
+
#define BATADV_DAT_CANDIDATE_NOT_FOUND 0
#define BATADV_DAT_CANDIDATE_ORIG 1
@@ -182,6 +191,7 @@
int batadv_is_my_mac(struct batadv_priv *bat_priv, const uint8_t *addr);
struct batadv_hard_iface *
batadv_seq_print_text_primary_if_get(struct seq_file *seq);
+int batadv_max_header_len(void);
void batadv_skb_set_priority(struct sk_buff *skb, int offset);
int batadv_batman_skb_recv(struct sk_buff *skb, struct net_device *dev,
struct packet_type *ptype,
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index 5d53d2f..a591dc5 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -25,10 +25,10 @@
#include "routing.h"
#include "gateway_client.h"
#include "hard-interface.h"
-#include "unicast.h"
#include "soft-interface.h"
#include "bridge_loop_avoidance.h"
#include "network-coding.h"
+#include "fragmentation.h"
/* hash class keys */
static struct lock_class_key batadv_orig_hash_lock_class_key;
@@ -146,7 +146,8 @@
/* Free nc_nodes */
batadv_nc_purge_orig(orig_node->bat_priv, orig_node, NULL);
- batadv_frag_list_free(&orig_node->frag_list);
+ batadv_frag_purge_orig(orig_node, NULL);
+
batadv_tt_global_del_orig(orig_node->bat_priv, orig_node,
"originator timed out");
@@ -217,7 +218,7 @@
const uint8_t *addr)
{
struct batadv_orig_node *orig_node;
- int size;
+ int size, i;
int hash_added;
unsigned long reset_time;
@@ -269,8 +270,11 @@
size = bat_priv->num_ifaces * sizeof(uint8_t);
orig_node->bcast_own_sum = kzalloc(size, GFP_ATOMIC);
- INIT_LIST_HEAD(&orig_node->frag_list);
- orig_node->last_frag_packet = 0;
+ for (i = 0; i < BATADV_FRAG_BUFFER_COUNT; i++) {
+ INIT_HLIST_HEAD(&orig_node->fragments[i].head);
+ spin_lock_init(&orig_node->fragments[i].lock);
+ orig_node->fragments[i].size = 0;
+ }
if (!orig_node->bcast_own_sum)
goto free_bcast_own;
@@ -394,9 +398,8 @@
continue;
}
- if (batadv_has_timed_out(orig_node->last_frag_packet,
- BATADV_FRAG_TIMEOUT))
- batadv_frag_list_free(&orig_node->frag_list);
+ batadv_frag_purge_orig(orig_node,
+ batadv_frag_check_entry);
}
spin_unlock_bh(list_lock);
}
diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h
index 4361bae..65e723e 100644
--- a/net/batman-adv/packet.h
+++ b/net/batman-adv/packet.h
@@ -91,12 +91,6 @@
BATADV_PARAMETER_PROBLEM = 12,
};
-/* fragmentation defines */
-enum batadv_unicast_frag_flags {
- BATADV_UNI_FRAG_HEAD = BIT(0),
- BATADV_UNI_FRAG_LARGETAIL = BIT(1),
-};
-
/* tt data subtypes */
#define BATADV_TT_DATA_TYPE_MASK 0x0F
@@ -192,29 +186,47 @@
#define BATADV_OGM_HLEN sizeof(struct batadv_ogm_packet)
-struct batadv_icmp_packet {
+/**
+ * batadv_icmp_header - common ICMP header
+ * @header: common batman header
+ * @msg_type: ICMP packet type
+ * @dst: address of the destination node
+ * @orig: address of the source node
+ * @uid: local ICMP socket identifier
+ */
+struct batadv_icmp_header {
struct batadv_header header;
uint8_t msg_type; /* see ICMP message types above */
uint8_t dst[ETH_ALEN];
uint8_t orig[ETH_ALEN];
- __be16 seqno;
uint8_t uid;
+};
+
+/**
+ * batadv_icmp_packet - ICMP packet
+ * @icmph: common ICMP header
+ * @reserved: not used - useful for alignment
+ * @seqno: ICMP sequence number
+ */
+struct batadv_icmp_packet {
+ struct batadv_icmp_header icmph;
uint8_t reserved;
+ __be16 seqno;
};
#define BATADV_RR_LEN 16
-/* icmp_packet_rr must start with all fields from imcp_packet
- * as this is assumed by code that handles ICMP packets
+/**
+ * batadv_icmp_packet_rr - ICMP RouteRecord packet
+ * @icmph: common ICMP header
+ * @rr_cur: number of entries the rr array
+ * @seqno: ICMP sequence number
+ * @rr: route record array
*/
struct batadv_icmp_packet_rr {
- struct batadv_header header;
- uint8_t msg_type; /* see ICMP message types above */
- uint8_t dst[ETH_ALEN];
- uint8_t orig[ETH_ALEN];
- __be16 seqno;
- uint8_t uid;
+ struct batadv_icmp_header icmph;
uint8_t rr_cur;
+ __be16 seqno;
uint8_t rr[BATADV_RR_LEN][ETH_ALEN];
};
@@ -255,15 +267,32 @@
*/
};
-struct batadv_unicast_frag_packet {
- struct batadv_header header;
- uint8_t ttvn; /* destination translation table version number */
- uint8_t dest[ETH_ALEN];
- uint8_t flags;
- uint8_t align;
- uint8_t orig[ETH_ALEN];
- __be16 seqno;
-} __packed;
+/**
+ * struct batadv_frag_packet - fragmented packet
+ * @header: common batman packet header with type, compatversion, and ttl
+ * @dest: final destination used when routing fragments
+ * @orig: originator of the fragment used when merging the packet
+ * @no: fragment number within this sequence
+ * @reserved: reserved byte for alignment
+ * @seqno: sequence identification
+ * @total_size: size of the merged packet
+ */
+struct batadv_frag_packet {
+ struct batadv_header header;
+#if defined(__BIG_ENDIAN_BITFIELD)
+ uint8_t no:4;
+ uint8_t reserved:4;
+#elif defined(__LITTLE_ENDIAN_BITFIELD)
+ uint8_t reserved:4;
+ uint8_t no:4;
+#else
+#error "unknown bitfield endianess"
+#endif
+ uint8_t dest[ETH_ALEN];
+ uint8_t orig[ETH_ALEN];
+ __be16 seqno;
+ __be16 total_size;
+};
struct batadv_bcast_packet {
struct batadv_header header;
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index 457dfef..3281a50 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -25,10 +25,10 @@
#include "icmp_socket.h"
#include "translation-table.h"
#include "originator.h"
-#include "unicast.h"
#include "bridge_loop_avoidance.h"
#include "distributed-arp-table.h"
#include "network-coding.h"
+#include "fragmentation.h"
static int batadv_route_unicast_packet(struct sk_buff *skb,
struct batadv_hard_iface *recv_if);
@@ -258,7 +258,7 @@
icmp_packet = (struct batadv_icmp_packet_rr *)skb->data;
/* add data to device queue */
- if (icmp_packet->msg_type != BATADV_ECHO_REQUEST) {
+ if (icmp_packet->icmph.msg_type != BATADV_ECHO_REQUEST) {
batadv_socket_receive_packet(icmp_packet, icmp_len);
goto out;
}
@@ -269,7 +269,7 @@
/* answer echo request (ping) */
/* get routing information */
- orig_node = batadv_orig_hash_find(bat_priv, icmp_packet->orig);
+ orig_node = batadv_orig_hash_find(bat_priv, icmp_packet->icmph.orig);
if (!orig_node)
goto out;
@@ -279,10 +279,11 @@
icmp_packet = (struct batadv_icmp_packet_rr *)skb->data;
- memcpy(icmp_packet->dst, icmp_packet->orig, ETH_ALEN);
- memcpy(icmp_packet->orig, primary_if->net_dev->dev_addr, ETH_ALEN);
- icmp_packet->msg_type = BATADV_ECHO_REPLY;
- icmp_packet->header.ttl = BATADV_TTL;
+ memcpy(icmp_packet->icmph.dst, icmp_packet->icmph.orig, ETH_ALEN);
+ memcpy(icmp_packet->icmph.orig, primary_if->net_dev->dev_addr,
+ ETH_ALEN);
+ icmp_packet->icmph.msg_type = BATADV_ECHO_REPLY;
+ icmp_packet->icmph.header.ttl = BATADV_TTL;
if (batadv_send_skb_to_orig(skb, orig_node, NULL) != NET_XMIT_DROP)
ret = NET_RX_SUCCESS;
@@ -306,9 +307,9 @@
icmp_packet = (struct batadv_icmp_packet *)skb->data;
/* send TTL exceeded if packet is an echo request (traceroute) */
- if (icmp_packet->msg_type != BATADV_ECHO_REQUEST) {
+ if (icmp_packet->icmph.msg_type != BATADV_ECHO_REQUEST) {
pr_debug("Warning - can't forward icmp packet from %pM to %pM: ttl exceeded\n",
- icmp_packet->orig, icmp_packet->dst);
+ icmp_packet->icmph.orig, icmp_packet->icmph.dst);
goto out;
}
@@ -317,7 +318,7 @@
goto out;
/* get routing information */
- orig_node = batadv_orig_hash_find(bat_priv, icmp_packet->orig);
+ orig_node = batadv_orig_hash_find(bat_priv, icmp_packet->icmph.orig);
if (!orig_node)
goto out;
@@ -327,10 +328,11 @@
icmp_packet = (struct batadv_icmp_packet *)skb->data;
- memcpy(icmp_packet->dst, icmp_packet->orig, ETH_ALEN);
- memcpy(icmp_packet->orig, primary_if->net_dev->dev_addr, ETH_ALEN);
- icmp_packet->msg_type = BATADV_TTL_EXCEEDED;
- icmp_packet->header.ttl = BATADV_TTL;
+ memcpy(icmp_packet->icmph.dst, icmp_packet->icmph.orig, ETH_ALEN);
+ memcpy(icmp_packet->icmph.orig, primary_if->net_dev->dev_addr,
+ ETH_ALEN);
+ icmp_packet->icmph.msg_type = BATADV_TTL_EXCEEDED;
+ icmp_packet->icmph.header.ttl = BATADV_TTL;
if (batadv_send_skb_to_orig(skb, orig_node, NULL) != NET_XMIT_DROP)
ret = NET_RX_SUCCESS;
@@ -379,7 +381,9 @@
icmp_packet = (struct batadv_icmp_packet_rr *)skb->data;
/* add record route information if not full */
- if ((hdr_size == sizeof(struct batadv_icmp_packet_rr)) &&
+ if ((icmp_packet->icmph.msg_type == BATADV_ECHO_REPLY ||
+ icmp_packet->icmph.msg_type == BATADV_ECHO_REQUEST) &&
+ (hdr_size == sizeof(struct batadv_icmp_packet_rr)) &&
(icmp_packet->rr_cur < BATADV_RR_LEN)) {
memcpy(&(icmp_packet->rr[icmp_packet->rr_cur]),
ethhdr->h_dest, ETH_ALEN);
@@ -387,15 +391,15 @@
}
/* packet for me */
- if (batadv_is_my_mac(bat_priv, icmp_packet->dst))
+ if (batadv_is_my_mac(bat_priv, icmp_packet->icmph.dst))
return batadv_recv_my_icmp_packet(bat_priv, skb, hdr_size);
/* TTL exceeded */
- if (icmp_packet->header.ttl < 2)
+ if (icmp_packet->icmph.header.ttl < 2)
return batadv_recv_icmp_ttl_exceeded(bat_priv, skb);
/* get routing information */
- orig_node = batadv_orig_hash_find(bat_priv, icmp_packet->dst);
+ orig_node = batadv_orig_hash_find(bat_priv, icmp_packet->icmph.dst);
if (!orig_node)
goto out;
@@ -406,7 +410,7 @@
icmp_packet = (struct batadv_icmp_packet_rr *)skb->data;
/* decrement ttl */
- icmp_packet->header.ttl--;
+ icmp_packet->icmph.header.ttl--;
/* route it */
if (batadv_send_skb_to_orig(skb, orig_node, recv_if) != NET_XMIT_DROP)
@@ -651,11 +655,9 @@
{
struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface);
struct batadv_orig_node *orig_node = NULL;
- struct batadv_neigh_node *neigh_node = NULL;
struct batadv_unicast_packet *unicast_packet;
struct ethhdr *ethhdr = eth_hdr(skb);
int res, hdr_len, ret = NET_RX_DROP;
- struct sk_buff *new_skb;
unicast_packet = (struct batadv_unicast_packet *)skb->data;
@@ -672,46 +674,12 @@
if (!orig_node)
goto out;
- /* find_router() increases neigh_nodes refcount if found. */
- neigh_node = batadv_find_router(bat_priv, orig_node, recv_if);
-
- if (!neigh_node)
- goto out;
-
/* create a copy of the skb, if needed, to modify it. */
if (skb_cow(skb, ETH_HLEN) < 0)
goto out;
- unicast_packet = (struct batadv_unicast_packet *)skb->data;
-
- if (unicast_packet->header.packet_type == BATADV_UNICAST &&
- atomic_read(&bat_priv->fragmentation) &&
- skb->len > neigh_node->if_incoming->net_dev->mtu) {
- ret = batadv_frag_send_skb(skb, bat_priv,
- neigh_node->if_incoming,
- neigh_node->addr);
- goto out;
- }
-
- if (unicast_packet->header.packet_type == BATADV_UNICAST_FRAG &&
- batadv_frag_can_reassemble(skb,
- neigh_node->if_incoming->net_dev->mtu)) {
- ret = batadv_frag_reassemble_skb(skb, bat_priv, &new_skb);
-
- if (ret == NET_RX_DROP)
- goto out;
-
- /* packet was buffered for late merge */
- if (!new_skb) {
- ret = NET_RX_SUCCESS;
- goto out;
- }
-
- skb = new_skb;
- unicast_packet = (struct batadv_unicast_packet *)skb->data;
- }
-
/* decrement ttl */
+ unicast_packet = (struct batadv_unicast_packet *)skb->data;
unicast_packet->header.ttl--;
switch (unicast_packet->header.packet_type) {
@@ -746,8 +714,6 @@
}
out:
- if (neigh_node)
- batadv_neigh_node_free_ref(neigh_node);
if (orig_node)
batadv_orig_node_free_ref(orig_node);
return ret;
@@ -1001,51 +967,6 @@
return batadv_route_unicast_packet(skb, recv_if);
}
-int batadv_recv_ucast_frag_packet(struct sk_buff *skb,
- struct batadv_hard_iface *recv_if)
-{
- struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface);
- struct batadv_unicast_frag_packet *unicast_packet;
- int hdr_size = sizeof(*unicast_packet);
- struct sk_buff *new_skb = NULL;
- int ret;
-
- if (batadv_check_unicast_packet(bat_priv, skb, hdr_size) < 0)
- return NET_RX_DROP;
-
- if (!batadv_check_unicast_ttvn(bat_priv, skb, hdr_size))
- return NET_RX_DROP;
-
- unicast_packet = (struct batadv_unicast_frag_packet *)skb->data;
-
- /* packet for me */
- if (batadv_is_my_mac(bat_priv, unicast_packet->dest)) {
- ret = batadv_frag_reassemble_skb(skb, bat_priv, &new_skb);
-
- if (ret == NET_RX_DROP)
- return NET_RX_DROP;
-
- /* packet was buffered for late merge */
- if (!new_skb)
- return NET_RX_SUCCESS;
-
- if (batadv_dat_snoop_incoming_arp_request(bat_priv, new_skb,
- hdr_size))
- goto rx_success;
- if (batadv_dat_snoop_incoming_arp_reply(bat_priv, new_skb,
- hdr_size))
- goto rx_success;
-
- batadv_interface_rx(recv_if->soft_iface, new_skb, recv_if,
- sizeof(struct batadv_unicast_packet), NULL);
-
-rx_success:
- return NET_RX_SUCCESS;
- }
-
- return batadv_route_unicast_packet(skb, recv_if);
-}
-
/**
* batadv_recv_unicast_tvlv - receive and process unicast tvlv packets
* @skb: unicast tvlv packet to process
@@ -1095,6 +1016,64 @@
return ret;
}
+/**
+ * batadv_recv_frag_packet - process received fragment
+ * @skb: the received fragment
+ * @recv_if: interface that the skb is received on
+ *
+ * This function does one of the three following things: 1) Forward fragment, if
+ * the assembled packet will exceed our MTU; 2) Buffer fragment, if we till
+ * lack further fragments; 3) Merge fragments, if we have all needed parts.
+ *
+ * Return NET_RX_DROP if the skb is not consumed, NET_RX_SUCCESS otherwise.
+ */
+int batadv_recv_frag_packet(struct sk_buff *skb,
+ struct batadv_hard_iface *recv_if)
+{
+ struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface);
+ struct batadv_orig_node *orig_node_src = NULL;
+ struct batadv_frag_packet *frag_packet;
+ int ret = NET_RX_DROP;
+
+ if (batadv_check_unicast_packet(bat_priv, skb,
+ sizeof(*frag_packet)) < 0)
+ goto out;
+
+ frag_packet = (struct batadv_frag_packet *)skb->data;
+ orig_node_src = batadv_orig_hash_find(bat_priv, frag_packet->orig);
+ if (!orig_node_src)
+ goto out;
+
+ /* Route the fragment if it is not for us and too big to be merged. */
+ if (!batadv_is_my_mac(bat_priv, frag_packet->dest) &&
+ batadv_frag_skb_fwd(skb, recv_if, orig_node_src)) {
+ ret = NET_RX_SUCCESS;
+ goto out;
+ }
+
+ batadv_inc_counter(bat_priv, BATADV_CNT_FRAG_RX);
+ batadv_add_counter(bat_priv, BATADV_CNT_FRAG_RX_BYTES, skb->len);
+
+ /* Add fragment to buffer and merge if possible. */
+ if (!batadv_frag_skb_buffer(&skb, orig_node_src))
+ goto out;
+
+ /* Deliver merged packet to the appropriate handler, if it was
+ * merged
+ */
+ if (skb)
+ batadv_batman_skb_recv(skb, recv_if->net_dev,
+ &recv_if->batman_adv_ptype, NULL);
+
+ ret = NET_RX_SUCCESS;
+
+out:
+ if (orig_node_src)
+ batadv_orig_node_free_ref(orig_node_src);
+
+ return ret;
+}
+
int batadv_recv_bcast_packet(struct sk_buff *skb,
struct batadv_hard_iface *recv_if)
{
diff --git a/net/batman-adv/routing.h b/net/batman-adv/routing.h
index ea15fa6..55d637a 100644
--- a/net/batman-adv/routing.h
+++ b/net/batman-adv/routing.h
@@ -30,8 +30,8 @@
struct batadv_hard_iface *recv_if);
int batadv_recv_unicast_packet(struct sk_buff *skb,
struct batadv_hard_iface *recv_if);
-int batadv_recv_ucast_frag_packet(struct sk_buff *skb,
- struct batadv_hard_iface *recv_if);
+int batadv_recv_frag_packet(struct sk_buff *skb,
+ struct batadv_hard_iface *iface);
int batadv_recv_bcast_packet(struct sk_buff *skb,
struct batadv_hard_iface *recv_if);
int batadv_recv_tt_query(struct sk_buff *skb,
diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c
index 81d69fb..82588e4 100644
--- a/net/batman-adv/send.c
+++ b/net/batman-adv/send.c
@@ -25,10 +25,10 @@
#include "soft-interface.h"
#include "hard-interface.h"
#include "gateway_common.h"
+#include "gateway_client.h"
#include "originator.h"
#include "network-coding.h"
-
-#include <linux/if_ether.h>
+#include "fragmentation.h"
static void batadv_send_outstanding_bcast_packet(struct work_struct *work);
@@ -63,10 +63,10 @@
ethhdr = eth_hdr(skb);
memcpy(ethhdr->h_source, hard_iface->net_dev->dev_addr, ETH_ALEN);
memcpy(ethhdr->h_dest, dst_addr, ETH_ALEN);
- ethhdr->h_proto = __constant_htons(ETH_P_BATMAN);
+ ethhdr->h_proto = htons(ETH_P_BATMAN);
skb_set_network_header(skb, ETH_HLEN);
- skb->protocol = __constant_htons(ETH_P_BATMAN);
+ skb->protocol = htons(ETH_P_BATMAN);
skb->dev = hard_iface->net_dev;
@@ -108,7 +108,19 @@
/* batadv_find_router() increases neigh_nodes refcount if found. */
neigh_node = batadv_find_router(bat_priv, orig_node, recv_if);
if (!neigh_node)
- return ret;
+ goto out;
+
+ /* Check if the skb is too large to send in one piece and fragment
+ * it if needed.
+ */
+ if (atomic_read(&bat_priv->fragmentation) &&
+ skb->len > neigh_node->if_incoming->net_dev->mtu) {
+ /* Fragment and send packet. */
+ if (batadv_frag_send_packet(skb, orig_node, neigh_node))
+ ret = NET_XMIT_SUCCESS;
+
+ goto out;
+ }
/* try to network code the packet, if it is received on an interface
* (i.e. being forwarded). If the packet originates from this node or if
@@ -122,11 +134,173 @@
ret = NET_XMIT_SUCCESS;
}
- batadv_neigh_node_free_ref(neigh_node);
+out:
+ if (neigh_node)
+ batadv_neigh_node_free_ref(neigh_node);
return ret;
}
+/**
+ * batadv_send_skb_push_fill_unicast - extend the buffer and initialize the
+ * common fields for unicast packets
+ * @skb: the skb carrying the unicast header to initialize
+ * @hdr_size: amount of bytes to push at the beginning of the skb
+ * @orig_node: the destination node
+ *
+ * Returns false if the buffer extension was not possible or true otherwise.
+ */
+static bool
+batadv_send_skb_push_fill_unicast(struct sk_buff *skb, int hdr_size,
+ struct batadv_orig_node *orig_node)
+{
+ struct batadv_unicast_packet *unicast_packet;
+ uint8_t ttvn = (uint8_t)atomic_read(&orig_node->last_ttvn);
+
+ if (batadv_skb_head_push(skb, hdr_size) < 0)
+ return false;
+
+ unicast_packet = (struct batadv_unicast_packet *)skb->data;
+ unicast_packet->header.version = BATADV_COMPAT_VERSION;
+ /* batman packet type: unicast */
+ unicast_packet->header.packet_type = BATADV_UNICAST;
+ /* set unicast ttl */
+ unicast_packet->header.ttl = BATADV_TTL;
+ /* copy the destination for faster routing */
+ memcpy(unicast_packet->dest, orig_node->orig, ETH_ALEN);
+ /* set the destination tt version number */
+ unicast_packet->ttvn = ttvn;
+
+ return true;
+}
+
+/**
+ * batadv_send_skb_prepare_unicast - encapsulate an skb with a unicast header
+ * @skb: the skb containing the payload to encapsulate
+ * @orig_node: the destination node
+ *
+ * Returns false if the payload could not be encapsulated or true otherwise.
+ */
+static bool batadv_send_skb_prepare_unicast(struct sk_buff *skb,
+ struct batadv_orig_node *orig_node)
+{
+ size_t uni_size = sizeof(struct batadv_unicast_packet);
+
+ return batadv_send_skb_push_fill_unicast(skb, uni_size, orig_node);
+}
+
+/**
+ * batadv_send_skb_prepare_unicast_4addr - encapsulate an skb with a
+ * unicast 4addr header
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the skb containing the payload to encapsulate
+ * @orig_node: the destination node
+ * @packet_subtype: the unicast 4addr packet subtype to use
+ *
+ * Returns false if the payload could not be encapsulated or true otherwise.
+ */
+bool batadv_send_skb_prepare_unicast_4addr(struct batadv_priv *bat_priv,
+ struct sk_buff *skb,
+ struct batadv_orig_node *orig,
+ int packet_subtype)
+{
+ struct batadv_hard_iface *primary_if;
+ struct batadv_unicast_4addr_packet *uc_4addr_packet;
+ bool ret = false;
+
+ primary_if = batadv_primary_if_get_selected(bat_priv);
+ if (!primary_if)
+ goto out;
+
+ /* Pull the header space and fill the unicast_packet substructure.
+ * We can do that because the first member of the uc_4addr_packet
+ * is of type struct unicast_packet
+ */
+ if (!batadv_send_skb_push_fill_unicast(skb, sizeof(*uc_4addr_packet),
+ orig))
+ goto out;
+
+ uc_4addr_packet = (struct batadv_unicast_4addr_packet *)skb->data;
+ uc_4addr_packet->u.header.packet_type = BATADV_UNICAST_4ADDR;
+ memcpy(uc_4addr_packet->src, primary_if->net_dev->dev_addr, ETH_ALEN);
+ uc_4addr_packet->subtype = packet_subtype;
+ uc_4addr_packet->reserved = 0;
+
+ ret = true;
+out:
+ if (primary_if)
+ batadv_hardif_free_ref(primary_if);
+ return ret;
+}
+
+/**
+ * batadv_send_generic_unicast_skb - send an skb as unicast
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: payload to send
+ * @packet_type: the batman unicast packet type to use
+ * @packet_subtype: the unicast 4addr packet subtype (only relevant for unicast
+ * 4addr packets)
+ *
+ * Returns 1 in case of error or 0 otherwise.
+ */
+int batadv_send_skb_generic_unicast(struct batadv_priv *bat_priv,
+ struct sk_buff *skb, int packet_type,
+ int packet_subtype)
+{
+ struct ethhdr *ethhdr = (struct ethhdr *)skb->data;
+ struct batadv_unicast_packet *unicast_packet;
+ struct batadv_orig_node *orig_node;
+ int ret = NET_RX_DROP;
+
+ /* get routing information */
+ if (is_multicast_ether_addr(ethhdr->h_dest))
+ orig_node = batadv_gw_get_selected_orig(bat_priv);
+ else
+ /* check for tt host - increases orig_node refcount.
+ * returns NULL in case of AP isolation
+ */
+ orig_node = batadv_transtable_search(bat_priv, ethhdr->h_source,
+ ethhdr->h_dest);
+
+ if (!orig_node)
+ goto out;
+
+ switch (packet_type) {
+ case BATADV_UNICAST:
+ batadv_send_skb_prepare_unicast(skb, orig_node);
+ break;
+ case BATADV_UNICAST_4ADDR:
+ batadv_send_skb_prepare_unicast_4addr(bat_priv, skb, orig_node,
+ packet_subtype);
+ break;
+ default:
+ /* this function supports UNICAST and UNICAST_4ADDR only. It
+ * should never be invoked with any other packet type
+ */
+ goto out;
+ }
+
+ unicast_packet = (struct batadv_unicast_packet *)skb->data;
+
+ /* inform the destination node that we are still missing a correct route
+ * for this client. The destination will receive this packet and will
+ * try to reroute it because the ttvn contained in the header is less
+ * than the current one
+ */
+ if (batadv_tt_global_client_is_roaming(bat_priv, ethhdr->h_dest))
+ unicast_packet->ttvn = unicast_packet->ttvn - 1;
+
+ if (batadv_send_skb_to_orig(skb, orig_node, NULL) != NET_XMIT_DROP)
+ ret = 0;
+
+out:
+ if (orig_node)
+ batadv_orig_node_free_ref(orig_node);
+ if (ret == NET_RX_DROP)
+ kfree_skb(skb);
+ return ret;
+}
+
void batadv_schedule_bat_ogm(struct batadv_hard_iface *hard_iface)
{
struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
diff --git a/net/batman-adv/send.h b/net/batman-adv/send.h
index e7b1788..ad63184 100644
--- a/net/batman-adv/send.h
+++ b/net/batman-adv/send.h
@@ -34,5 +34,45 @@
void
batadv_purge_outstanding_packets(struct batadv_priv *bat_priv,
const struct batadv_hard_iface *hard_iface);
+bool batadv_send_skb_prepare_unicast_4addr(struct batadv_priv *bat_priv,
+ struct sk_buff *skb,
+ struct batadv_orig_node *orig_node,
+ int packet_subtype);
+int batadv_send_skb_generic_unicast(struct batadv_priv *bat_priv,
+ struct sk_buff *skb, int packet_type,
+ int packet_subtype);
+
+
+/**
+ * batadv_send_unicast_skb - send the skb encapsulated in a unicast packet
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the payload to send
+ *
+ * Returns 1 in case of error or 0 otherwise.
+ */
+static inline int batadv_send_skb_unicast(struct batadv_priv *bat_priv,
+ struct sk_buff *skb)
+{
+ return batadv_send_skb_generic_unicast(bat_priv, skb, BATADV_UNICAST,
+ 0);
+}
+
+/**
+ * batadv_send_4addr_unicast_skb - send the skb encapsulated in a unicast 4addr
+ * packet
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the payload to send
+ * @packet_subtype: the unicast 4addr packet subtype to use
+ *
+ * Returns 1 in case of error or 0 otherwise.
+ */
+static inline int batadv_send_skb_unicast_4addr(struct batadv_priv *bat_priv,
+ struct sk_buff *skb,
+ int packet_subtype)
+{
+ return batadv_send_skb_generic_unicast(bat_priv, skb,
+ BATADV_UNICAST_4ADDR,
+ packet_subtype);
+}
#endif /* _NET_BATMAN_ADV_SEND_H_ */
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 25e6004..e8a2bd6 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -34,8 +34,6 @@
#include <linux/ethtool.h>
#include <linux/etherdevice.h>
#include <linux/if_vlan.h>
-#include <linux/if_ether.h>
-#include "unicast.h"
#include "bridge_loop_avoidance.h"
#include "network-coding.h"
@@ -139,6 +137,18 @@
return 0;
}
+/**
+ * batadv_interface_set_rx_mode - set the rx mode of a device
+ * @dev: registered network device to modify
+ *
+ * We do not actually need to set any rx filters for the virtual batman
+ * soft interface. However a dummy handler enables a user to set static
+ * multicast listeners for instance.
+ */
+static void batadv_interface_set_rx_mode(struct net_device *dev)
+{
+}
+
static int batadv_interface_tx(struct sk_buff *skb,
struct net_device *soft_iface)
{
@@ -147,7 +157,7 @@
struct batadv_hard_iface *primary_if = NULL;
struct batadv_bcast_packet *bcast_packet;
struct vlan_ethhdr *vhdr;
- __be16 ethertype = __constant_htons(ETH_P_BATMAN);
+ __be16 ethertype = htons(ETH_P_BATMAN);
static const uint8_t stp_addr[ETH_ALEN] = {0x01, 0x80, 0xC2, 0x00,
0x00, 0x00};
static const uint8_t ectp_addr[ETH_ALEN] = {0xCF, 0x00, 0x00, 0x00,
@@ -286,7 +296,7 @@
batadv_dat_snoop_outgoing_arp_reply(bat_priv, skb);
- ret = batadv_unicast_send_skb(bat_priv, skb);
+ ret = batadv_send_skb_unicast(bat_priv, skb);
if (ret != 0)
goto dropped_freed;
}
@@ -314,7 +324,7 @@
struct vlan_ethhdr *vhdr;
struct batadv_header *batadv_header = (struct batadv_header *)skb->data;
unsigned short vid __maybe_unused = BATADV_NO_FLAGS;
- __be16 ethertype = __constant_htons(ETH_P_BATMAN);
+ __be16 ethertype = htons(ETH_P_BATMAN);
bool is_bcast;
is_bcast = (batadv_header->packet_type == BATADV_BCAST);
@@ -444,6 +454,7 @@
static int batadv_softif_init_late(struct net_device *dev)
{
struct batadv_priv *bat_priv;
+ uint32_t random_seqno;
int ret;
size_t cnt_len = sizeof(uint64_t) * BATADV_CNT_NUM;
@@ -493,6 +504,10 @@
bat_priv->tt.last_changeset = NULL;
bat_priv->tt.last_changeset_len = 0;
+ /* randomize initial seqno to avoid collision */
+ get_random_bytes(&random_seqno, sizeof(random_seqno));
+ atomic_set(&bat_priv->frag_seqno, random_seqno);
+
bat_priv->primary_if = NULL;
bat_priv->num_ifaces = 0;
@@ -580,6 +595,7 @@
.ndo_get_stats = batadv_interface_stats,
.ndo_set_mac_address = batadv_interface_set_mac_addr,
.ndo_change_mtu = batadv_interface_change_mtu,
+ .ndo_set_rx_mode = batadv_interface_set_rx_mode,
.ndo_start_xmit = batadv_interface_tx,
.ndo_validate_addr = eth_validate_addr,
.ndo_add_slave = batadv_softif_slave_add,
@@ -623,7 +639,7 @@
*/
dev->mtu = ETH_DATA_LEN;
/* reserve more space in the skbuff for our header */
- dev->hard_header_len = BATADV_HEADER_LEN;
+ dev->hard_header_len = batadv_max_header_len();
/* generate random address */
eth_hw_addr_random(dev);
@@ -760,6 +776,12 @@
{ "mgmt_tx_bytes" },
{ "mgmt_rx" },
{ "mgmt_rx_bytes" },
+ { "frag_tx" },
+ { "frag_tx_bytes" },
+ { "frag_rx" },
+ { "frag_rx_bytes" },
+ { "frag_fwd" },
+ { "frag_fwd_bytes" },
{ "tt_request_tx" },
{ "tt_request_rx" },
{ "tt_response_tx" },
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index c741694..b521afb 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -117,25 +117,17 @@
kfree_rcu(tt_local_entry, common.rcu);
}
-static void batadv_tt_global_entry_free_rcu(struct rcu_head *rcu)
-{
- struct batadv_tt_common_entry *tt_common_entry;
- struct batadv_tt_global_entry *tt_global_entry;
-
- tt_common_entry = container_of(rcu, struct batadv_tt_common_entry, rcu);
- tt_global_entry = container_of(tt_common_entry,
- struct batadv_tt_global_entry, common);
-
- kfree(tt_global_entry);
-}
-
+/**
+ * batadv_tt_global_entry_free_ref - decrement the refcounter for a
+ * tt_global_entry and possibly free it
+ * @tt_global_entry: the object to free
+ */
static void
batadv_tt_global_entry_free_ref(struct batadv_tt_global_entry *tt_global_entry)
{
if (atomic_dec_and_test(&tt_global_entry->common.refcount)) {
batadv_tt_global_del_orig_list(tt_global_entry);
- call_rcu(&tt_global_entry->common.rcu,
- batadv_tt_global_entry_free_rcu);
+ kfree_rcu(tt_global_entry, common.rcu);
}
}
@@ -240,6 +232,17 @@
return changes_num * sizeof(struct batadv_tvlv_tt_change);
}
+/**
+ * batadv_tt_entries - compute the number of entries fitting in tt_len bytes
+ * @tt_len: available space
+ *
+ * Returns the number of entries.
+ */
+static uint16_t batadv_tt_entries(uint16_t tt_len)
+{
+ return tt_len / batadv_tt_len(1);
+}
+
static int batadv_tt_local_init(struct batadv_priv *bat_priv)
{
if (bat_priv->tt.local_hash)
@@ -414,7 +417,7 @@
if (tt_diff_len == 0)
goto container_register;
- tt_diff_entries_num = tt_diff_len / batadv_tt_len(1);
+ tt_diff_entries_num = batadv_tt_entries(tt_diff_len);
spin_lock_bh(&bat_priv->tt.changes_list_lock);
atomic_set(&bat_priv->tt.local_changes, 0);
@@ -805,15 +808,17 @@
* If a TT local entry exists for this non-mesh client remove it.
*
* The caller must hold orig_node refcount.
+ *
+ * Return true if the new entry has been added, false otherwise
*/
-int batadv_tt_global_add(struct batadv_priv *bat_priv,
- struct batadv_orig_node *orig_node,
- const unsigned char *tt_addr, uint16_t flags,
- uint8_t ttvn)
+static bool batadv_tt_global_add(struct batadv_priv *bat_priv,
+ struct batadv_orig_node *orig_node,
+ const unsigned char *tt_addr, uint16_t flags,
+ uint8_t ttvn)
{
struct batadv_tt_global_entry *tt_global_entry;
struct batadv_tt_local_entry *tt_local_entry;
- int ret = 0;
+ bool ret = false;
int hash_added;
struct batadv_tt_common_entry *common;
uint16_t local_flags;
@@ -914,7 +919,7 @@
batadv_dbg(BATADV_DBG_TT, bat_priv,
"Creating new global tt entry: %pM (via %pM)\n",
common->addr, orig_node->orig);
- ret = 1;
+ ret = true;
out_remove:
@@ -1491,11 +1496,9 @@
static void batadv_tt_save_orig_buffer(struct batadv_priv *bat_priv,
struct batadv_orig_node *orig_node,
- const unsigned char *tt_buff,
- uint16_t tt_num_changes)
+ const void *tt_buff,
+ uint16_t tt_buff_len)
{
- uint16_t tt_buff_len = batadv_tt_len(tt_num_changes);
-
/* Replace the old buffer only if I received something in the
* last OGM (the OGM could carry no changes)
*/
@@ -1622,7 +1625,7 @@
tt_len -= tt_len % sizeof(struct batadv_tvlv_tt_change);
}
- tt_tot = tt_len / sizeof(struct batadv_tvlv_tt_change);
+ tt_tot = batadv_tt_entries(tt_len);
tvlv_tt_data = kzalloc(sizeof(*tvlv_tt_data) + tt_len,
GFP_ATOMIC);
@@ -2032,8 +2035,8 @@
_batadv_tt_update_changes(bat_priv, orig_node, tt_change,
tt_num_changes, ttvn);
- batadv_tt_save_orig_buffer(bat_priv, orig_node,
- (unsigned char *)tt_change, tt_num_changes);
+ batadv_tt_save_orig_buffer(bat_priv, orig_node, tt_change,
+ batadv_tt_len(tt_num_changes));
atomic_set(&orig_node->last_ttvn, ttvn);
}
@@ -2573,7 +2576,7 @@
tt_data = (struct batadv_tvlv_tt_data *)tvlv_value;
tvlv_value_len -= sizeof(*tt_data);
- num_entries = tvlv_value_len / batadv_tt_len(1);
+ num_entries = batadv_tt_entries(tvlv_value_len);
batadv_tt_update_orig(bat_priv, orig,
(unsigned char *)(tt_data + 1),
@@ -2608,7 +2611,7 @@
tt_data = (struct batadv_tvlv_tt_data *)tvlv_value;
tvlv_value_len -= sizeof(*tt_data);
- num_entries = tvlv_value_len / batadv_tt_len(1);
+ num_entries = batadv_tt_entries(tvlv_value_len);
switch (tt_data->flags & BATADV_TT_DATA_TYPE_MASK) {
case BATADV_TT_REQUEST:
diff --git a/net/batman-adv/translation-table.h b/net/batman-adv/translation-table.h
index b4b6dea..015d8b9 100644
--- a/net/batman-adv/translation-table.h
+++ b/net/batman-adv/translation-table.h
@@ -27,13 +27,6 @@
const uint8_t *addr, const char *message,
bool roaming);
int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset);
-void batadv_tt_global_add_orig(struct batadv_priv *bat_priv,
- struct batadv_orig_node *orig_node,
- const unsigned char *tt_buff, int tt_buff_len);
-int batadv_tt_global_add(struct batadv_priv *bat_priv,
- struct batadv_orig_node *orig_node,
- const unsigned char *addr, uint16_t flags,
- uint8_t ttvn);
int batadv_tt_global_seq_print_text(struct seq_file *seq, void *offset);
void batadv_tt_global_del_orig(struct batadv_priv *bat_priv,
struct batadv_orig_node *orig_node,
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index 8fbd89d..5cbb0d0 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -24,13 +24,6 @@
#include "bitarray.h"
#include <linux/kernel.h>
-/**
- * Maximum overhead for the encapsulation for a payload packet
- */
-#define BATADV_HEADER_LEN \
- (ETH_HLEN + max(sizeof(struct batadv_unicast_packet), \
- sizeof(struct batadv_bcast_packet)))
-
#ifdef CONFIG_BATMAN_ADV_DAT
/* batadv_dat_addr_t is the type used for all DHT addresses. If it is changed,
@@ -60,7 +53,6 @@
* @if_num: identificator of the interface
* @if_status: status of the interface for batman-adv
* @net_dev: pointer to the net_device
- * @frag_seqno: last fragment sequence number sent by this interface
* @num_bcasts: number of payload re-broadcasts on this interface (ARQ)
* @hardif_obj: kobject of the per interface sysfs "mesh" directory
* @refcount: number of contexts the object is used
@@ -76,7 +68,6 @@
int16_t if_num;
char if_status;
struct net_device *net_dev;
- atomic_t frag_seqno;
uint8_t num_bcasts;
struct kobject *hardif_obj;
atomic_t refcount;
@@ -88,6 +79,34 @@
};
/**
+ * struct batadv_frag_table_entry - head in the fragment buffer table
+ * @head: head of list with fragments
+ * @lock: lock to protect the list of fragments
+ * @timestamp: time (jiffie) of last received fragment
+ * @seqno: sequence number of the fragments in the list
+ * @size: accumulated size of packets in list
+ */
+struct batadv_frag_table_entry {
+ struct hlist_head head;
+ spinlock_t lock; /* protects head */
+ unsigned long timestamp;
+ uint16_t seqno;
+ uint16_t size;
+};
+
+/**
+ * struct batadv_frag_list_entry - entry in a list of fragments
+ * @list: list node information
+ * @skb: fragment
+ * @no: fragment number in the set
+ */
+struct batadv_frag_list_entry {
+ struct hlist_node list;
+ struct sk_buff *skb;
+ uint8_t no;
+};
+
+/**
* struct batadv_orig_node - structure for orig_list maintaining nodes of mesh
* @orig: originator ethernet address
* @primary_addr: hosts primary interface address
@@ -116,9 +135,6 @@
* last_bcast_seqno)
* @last_bcast_seqno: last broadcast sequence number received by this host
* @neigh_list: list of potential next hop neighbor towards this orig node
- * @frag_list: fragmentation buffer list for fragment re-assembly
- * @last_frag_packet: time when last fragmented packet from this node was
- * received
* @neigh_list_lock: lock protecting neigh_list, router and bonding_list
* @hash_entry: hlist node for batadv_priv::orig_hash
* @bat_priv: pointer to soft_iface this orig node belongs to
@@ -133,6 +149,7 @@
* @out_coding_list: list of nodes that can hear this orig
* @in_coding_list_lock: protects in_coding_list
* @out_coding_list_lock: protects out_coding_list
+ * @fragments: array with heads for fragment chains
*/
struct batadv_orig_node {
uint8_t orig[ETH_ALEN];
@@ -159,8 +176,6 @@
DECLARE_BITMAP(bcast_bits, BATADV_TQ_LOCAL_WINDOW_SIZE);
uint32_t last_bcast_seqno;
struct hlist_head neigh_list;
- struct list_head frag_list;
- unsigned long last_frag_packet;
/* neigh_list_lock protects: neigh_list, router & bonding_list */
spinlock_t neigh_list_lock;
struct hlist_node hash_entry;
@@ -181,6 +196,7 @@
spinlock_t in_coding_list_lock; /* Protects in_coding_list */
spinlock_t out_coding_list_lock; /* Protects out_coding_list */
#endif
+ struct batadv_frag_table_entry fragments[BATADV_FRAG_BUFFER_COUNT];
};
/**
@@ -277,6 +293,12 @@
* @BATADV_CNT_MGMT_TX_BYTES: transmitted routing protocol traffic bytes counter
* @BATADV_CNT_MGMT_RX: received routing protocol traffic packet counter
* @BATADV_CNT_MGMT_RX_BYTES: received routing protocol traffic bytes counter
+ * @BATADV_CNT_FRAG_TX: transmitted fragment traffic packet counter
+ * @BATADV_CNT_FRAG_TX_BYTES: transmitted fragment traffic bytes counter
+ * @BATADV_CNT_FRAG_RX: received fragment traffic packet counter
+ * @BATADV_CNT_FRAG_RX_BYTES: received fragment traffic bytes counter
+ * @BATADV_CNT_FRAG_FWD: forwarded fragment traffic packet counter
+ * @BATADV_CNT_FRAG_FWD_BYTES: forwarded fragment traffic bytes counter
* @BATADV_CNT_TT_REQUEST_TX: transmitted tt req traffic packet counter
* @BATADV_CNT_TT_REQUEST_RX: received tt req traffic packet counter
* @BATADV_CNT_TT_RESPONSE_TX: transmitted tt resp traffic packet counter
@@ -314,6 +336,12 @@
BATADV_CNT_MGMT_TX_BYTES,
BATADV_CNT_MGMT_RX,
BATADV_CNT_MGMT_RX_BYTES,
+ BATADV_CNT_FRAG_TX,
+ BATADV_CNT_FRAG_TX_BYTES,
+ BATADV_CNT_FRAG_RX,
+ BATADV_CNT_FRAG_RX_BYTES,
+ BATADV_CNT_FRAG_FWD,
+ BATADV_CNT_FRAG_FWD_BYTES,
BATADV_CNT_TT_REQUEST_TX,
BATADV_CNT_TT_REQUEST_RX,
BATADV_CNT_TT_RESPONSE_TX,
@@ -511,6 +539,7 @@
* @aggregated_ogms: bool indicating whether OGM aggregation is enabled
* @bonding: bool indicating whether traffic bonding is enabled
* @fragmentation: bool indicating whether traffic fragmentation is enabled
+ * @frag_seqno: incremental counter to identify chains of egress fragments
* @ap_isolation: bool indicating whether ap isolation is enabled
* @bridge_loop_avoidance: bool indicating whether bridge loop avoidance is
* enabled
@@ -554,6 +583,7 @@
atomic_t aggregated_ogms;
atomic_t bonding;
atomic_t fragmentation;
+ atomic_t frag_seqno;
atomic_t ap_isolation;
#ifdef CONFIG_BATMAN_ADV_BLA
atomic_t bridge_loop_avoidance;
@@ -874,18 +904,6 @@
};
/**
- * struct batadv_frag_packet_list_entry - storage for fragment packet
- * @list: list node for orig_node::frag_list
- * @seqno: sequence number of the fragment
- * @skb: fragment's skb buffer
- */
-struct batadv_frag_packet_list_entry {
- struct list_head list;
- uint16_t seqno;
- struct sk_buff *skb;
-};
-
-/**
* struct batadv_algo_ops - mesh algorithm callbacks
* @list: list node for the batadv_algo_list
* @name: name of the algorithm
diff --git a/net/batman-adv/unicast.c b/net/batman-adv/unicast.c
deleted file mode 100644
index 48b31d3..0000000
--- a/net/batman-adv/unicast.c
+++ /dev/null
@@ -1,491 +0,0 @@
-/* Copyright (C) 2010-2013 B.A.T.M.A.N. contributors:
- *
- * Andreas Langer
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA
- */
-
-#include "main.h"
-#include "unicast.h"
-#include "send.h"
-#include "soft-interface.h"
-#include "gateway_client.h"
-#include "originator.h"
-#include "hash.h"
-#include "translation-table.h"
-#include "routing.h"
-#include "hard-interface.h"
-
-
-static struct sk_buff *
-batadv_frag_merge_packet(struct list_head *head,
- struct batadv_frag_packet_list_entry *tfp,
- struct sk_buff *skb)
-{
- struct batadv_unicast_frag_packet *up;
- struct sk_buff *tmp_skb;
- struct batadv_unicast_packet *unicast_packet;
- int hdr_len = sizeof(*unicast_packet);
- int uni_diff = sizeof(*up) - hdr_len;
- uint8_t *packet_pos;
-
- up = (struct batadv_unicast_frag_packet *)skb->data;
- /* set skb to the first part and tmp_skb to the second part */
- if (up->flags & BATADV_UNI_FRAG_HEAD) {
- tmp_skb = tfp->skb;
- } else {
- tmp_skb = skb;
- skb = tfp->skb;
- }
-
- if (skb_linearize(skb) < 0 || skb_linearize(tmp_skb) < 0)
- goto err;
-
- skb_pull(tmp_skb, sizeof(*up));
- if (pskb_expand_head(skb, 0, tmp_skb->len, GFP_ATOMIC) < 0)
- goto err;
-
- /* move free entry to end */
- tfp->skb = NULL;
- tfp->seqno = 0;
- list_move_tail(&tfp->list, head);
-
- memcpy(skb_put(skb, tmp_skb->len), tmp_skb->data, tmp_skb->len);
- kfree_skb(tmp_skb);
-
- memmove(skb->data + uni_diff, skb->data, hdr_len);
- packet_pos = skb_pull(skb, uni_diff);
- unicast_packet = (struct batadv_unicast_packet *)packet_pos;
- unicast_packet->header.packet_type = BATADV_UNICAST;
-
- return skb;
-
-err:
- /* free buffered skb, skb will be freed later */
- kfree_skb(tfp->skb);
- return NULL;
-}
-
-static void batadv_frag_create_entry(struct list_head *head,
- struct sk_buff *skb)
-{
- struct batadv_frag_packet_list_entry *tfp;
- struct batadv_unicast_frag_packet *up;
-
- up = (struct batadv_unicast_frag_packet *)skb->data;
-
- /* free and oldest packets stand at the end */
- tfp = list_entry((head)->prev, typeof(*tfp), list);
- kfree_skb(tfp->skb);
-
- tfp->seqno = ntohs(up->seqno);
- tfp->skb = skb;
- list_move(&tfp->list, head);
- return;
-}
-
-static int batadv_frag_create_buffer(struct list_head *head)
-{
- int i;
- struct batadv_frag_packet_list_entry *tfp;
-
- for (i = 0; i < BATADV_FRAG_BUFFER_SIZE; i++) {
- tfp = kmalloc(sizeof(*tfp), GFP_ATOMIC);
- if (!tfp) {
- batadv_frag_list_free(head);
- return -ENOMEM;
- }
- tfp->skb = NULL;
- tfp->seqno = 0;
- INIT_LIST_HEAD(&tfp->list);
- list_add(&tfp->list, head);
- }
-
- return 0;
-}
-
-static struct batadv_frag_packet_list_entry *
-batadv_frag_search_packet(struct list_head *head,
- const struct batadv_unicast_frag_packet *up)
-{
- struct batadv_frag_packet_list_entry *tfp;
- struct batadv_unicast_frag_packet *tmp_up = NULL;
- bool is_head_tmp, is_head;
- uint16_t search_seqno;
-
- if (up->flags & BATADV_UNI_FRAG_HEAD)
- search_seqno = ntohs(up->seqno)+1;
- else
- search_seqno = ntohs(up->seqno)-1;
-
- is_head = up->flags & BATADV_UNI_FRAG_HEAD;
-
- list_for_each_entry(tfp, head, list) {
- if (!tfp->skb)
- continue;
-
- if (tfp->seqno == ntohs(up->seqno))
- goto mov_tail;
-
- tmp_up = (struct batadv_unicast_frag_packet *)tfp->skb->data;
-
- if (tfp->seqno == search_seqno) {
- is_head_tmp = tmp_up->flags & BATADV_UNI_FRAG_HEAD;
- if (is_head_tmp != is_head)
- return tfp;
- else
- goto mov_tail;
- }
- }
- return NULL;
-
-mov_tail:
- list_move_tail(&tfp->list, head);
- return NULL;
-}
-
-void batadv_frag_list_free(struct list_head *head)
-{
- struct batadv_frag_packet_list_entry *pf, *tmp_pf;
-
- if (!list_empty(head)) {
- list_for_each_entry_safe(pf, tmp_pf, head, list) {
- kfree_skb(pf->skb);
- list_del(&pf->list);
- kfree(pf);
- }
- }
- return;
-}
-
-/* frag_reassemble_skb():
- * returns NET_RX_DROP if the operation failed - skb is left intact
- * returns NET_RX_SUCCESS if the fragment was buffered (skb_new will be NULL)
- * or the skb could be reassembled (skb_new will point to the new packet and
- * skb was freed)
- */
-int batadv_frag_reassemble_skb(struct sk_buff *skb,
- struct batadv_priv *bat_priv,
- struct sk_buff **new_skb)
-{
- struct batadv_orig_node *orig_node;
- struct batadv_frag_packet_list_entry *tmp_frag_entry;
- int ret = NET_RX_DROP;
- struct batadv_unicast_frag_packet *unicast_packet;
-
- unicast_packet = (struct batadv_unicast_frag_packet *)skb->data;
- *new_skb = NULL;
-
- orig_node = batadv_orig_hash_find(bat_priv, unicast_packet->orig);
- if (!orig_node)
- goto out;
-
- orig_node->last_frag_packet = jiffies;
-
- if (list_empty(&orig_node->frag_list) &&
- batadv_frag_create_buffer(&orig_node->frag_list)) {
- pr_debug("couldn't create frag buffer\n");
- goto out;
- }
-
- tmp_frag_entry = batadv_frag_search_packet(&orig_node->frag_list,
- unicast_packet);
-
- if (!tmp_frag_entry) {
- batadv_frag_create_entry(&orig_node->frag_list, skb);
- ret = NET_RX_SUCCESS;
- goto out;
- }
-
- *new_skb = batadv_frag_merge_packet(&orig_node->frag_list,
- tmp_frag_entry, skb);
- /* if not, merge failed */
- if (*new_skb)
- ret = NET_RX_SUCCESS;
-
-out:
- if (orig_node)
- batadv_orig_node_free_ref(orig_node);
- return ret;
-}
-
-int batadv_frag_send_skb(struct sk_buff *skb, struct batadv_priv *bat_priv,
- struct batadv_hard_iface *hard_iface,
- const uint8_t dstaddr[])
-{
- struct batadv_unicast_packet tmp_uc, *unicast_packet;
- struct batadv_hard_iface *primary_if;
- struct sk_buff *frag_skb;
- struct batadv_unicast_frag_packet *frag1, *frag2;
- int uc_hdr_len = sizeof(*unicast_packet);
- int ucf_hdr_len = sizeof(*frag1);
- int data_len = skb->len - uc_hdr_len;
- int large_tail = 0, ret = NET_RX_DROP;
- uint16_t seqno;
-
- primary_if = batadv_primary_if_get_selected(bat_priv);
- if (!primary_if)
- goto dropped;
-
- frag_skb = dev_alloc_skb(data_len - (data_len / 2) + ucf_hdr_len);
- if (!frag_skb)
- goto dropped;
-
- skb->priority = TC_PRIO_CONTROL;
- skb_reserve(frag_skb, ucf_hdr_len);
-
- unicast_packet = (struct batadv_unicast_packet *)skb->data;
- memcpy(&tmp_uc, unicast_packet, uc_hdr_len);
- skb_split(skb, frag_skb, data_len / 2 + uc_hdr_len);
-
- if (batadv_skb_head_push(skb, ucf_hdr_len - uc_hdr_len) < 0 ||
- batadv_skb_head_push(frag_skb, ucf_hdr_len) < 0)
- goto drop_frag;
-
- frag1 = (struct batadv_unicast_frag_packet *)skb->data;
- frag2 = (struct batadv_unicast_frag_packet *)frag_skb->data;
-
- memcpy(frag1, &tmp_uc, sizeof(tmp_uc));
-
- frag1->header.ttl--;
- frag1->header.version = BATADV_COMPAT_VERSION;
- frag1->header.packet_type = BATADV_UNICAST_FRAG;
-
- memcpy(frag1->orig, primary_if->net_dev->dev_addr, ETH_ALEN);
- memcpy(frag2, frag1, sizeof(*frag2));
-
- if (data_len & 1)
- large_tail = BATADV_UNI_FRAG_LARGETAIL;
-
- frag1->flags = BATADV_UNI_FRAG_HEAD | large_tail;
- frag2->flags = large_tail;
-
- seqno = atomic_add_return(2, &hard_iface->frag_seqno);
- frag1->seqno = htons(seqno - 1);
- frag2->seqno = htons(seqno);
-
- batadv_send_skb_packet(skb, hard_iface, dstaddr);
- batadv_send_skb_packet(frag_skb, hard_iface, dstaddr);
- ret = NET_RX_SUCCESS;
- goto out;
-
-drop_frag:
- kfree_skb(frag_skb);
-dropped:
- kfree_skb(skb);
-out:
- if (primary_if)
- batadv_hardif_free_ref(primary_if);
- return ret;
-}
-
-/**
- * batadv_unicast_push_and_fill_skb - extends the buffer and initializes the
- * common fields for unicast packets
- * @skb: packet
- * @hdr_size: amount of bytes to push at the beginning of the skb
- * @orig_node: the destination node
- *
- * Returns false if the buffer extension was not possible or true otherwise
- */
-static bool batadv_unicast_push_and_fill_skb(struct sk_buff *skb, int hdr_size,
- struct batadv_orig_node *orig_node)
-{
- struct batadv_unicast_packet *unicast_packet;
- uint8_t ttvn = (uint8_t)atomic_read(&orig_node->last_ttvn);
-
- if (batadv_skb_head_push(skb, hdr_size) < 0)
- return false;
-
- unicast_packet = (struct batadv_unicast_packet *)skb->data;
- unicast_packet->header.version = BATADV_COMPAT_VERSION;
- /* batman packet type: unicast */
- unicast_packet->header.packet_type = BATADV_UNICAST;
- /* set unicast ttl */
- unicast_packet->header.ttl = BATADV_TTL;
- /* copy the destination for faster routing */
- memcpy(unicast_packet->dest, orig_node->orig, ETH_ALEN);
- /* set the destination tt version number */
- unicast_packet->ttvn = ttvn;
-
- return true;
-}
-
-/**
- * batadv_unicast_prepare_skb - encapsulate an skb with a unicast header
- * @skb: the skb containing the payload to encapsulate
- * @orig_node: the destination node
- *
- * Returns false if the payload could not be encapsulated or true otherwise.
- *
- * This call might reallocate skb data.
- */
-static bool batadv_unicast_prepare_skb(struct sk_buff *skb,
- struct batadv_orig_node *orig_node)
-{
- size_t uni_size = sizeof(struct batadv_unicast_packet);
- return batadv_unicast_push_and_fill_skb(skb, uni_size, orig_node);
-}
-
-/**
- * batadv_unicast_4addr_prepare_skb - encapsulate an skb with a unicast4addr
- * header
- * @bat_priv: the bat priv with all the soft interface information
- * @skb: the skb containing the payload to encapsulate
- * @orig_node: the destination node
- * @packet_subtype: the batman 4addr packet subtype to use
- *
- * Returns false if the payload could not be encapsulated or true otherwise.
- *
- * This call might reallocate skb data.
- */
-bool batadv_unicast_4addr_prepare_skb(struct batadv_priv *bat_priv,
- struct sk_buff *skb,
- struct batadv_orig_node *orig,
- int packet_subtype)
-{
- struct batadv_hard_iface *primary_if;
- struct batadv_unicast_4addr_packet *unicast_4addr_packet;
- bool ret = false;
-
- primary_if = batadv_primary_if_get_selected(bat_priv);
- if (!primary_if)
- goto out;
-
- /* pull the header space and fill the unicast_packet substructure.
- * We can do that because the first member of the unicast_4addr_packet
- * is of type struct unicast_packet
- */
- if (!batadv_unicast_push_and_fill_skb(skb,
- sizeof(*unicast_4addr_packet),
- orig))
- goto out;
-
- unicast_4addr_packet = (struct batadv_unicast_4addr_packet *)skb->data;
- unicast_4addr_packet->u.header.packet_type = BATADV_UNICAST_4ADDR;
- memcpy(unicast_4addr_packet->src, primary_if->net_dev->dev_addr,
- ETH_ALEN);
- unicast_4addr_packet->subtype = packet_subtype;
- unicast_4addr_packet->reserved = 0;
-
- ret = true;
-out:
- if (primary_if)
- batadv_hardif_free_ref(primary_if);
- return ret;
-}
-
-/**
- * batadv_unicast_generic_send_skb - send an skb as unicast
- * @bat_priv: the bat priv with all the soft interface information
- * @skb: payload to send
- * @packet_type: the batman unicast packet type to use
- * @packet_subtype: the batman packet subtype. It is ignored if packet_type is
- * not BATADV_UNICAT_4ADDR
- *
- * Returns 1 in case of error or 0 otherwise
- */
-int batadv_unicast_generic_send_skb(struct batadv_priv *bat_priv,
- struct sk_buff *skb, int packet_type,
- int packet_subtype)
-{
- struct ethhdr *ethhdr = (struct ethhdr *)skb->data;
- struct batadv_unicast_packet *unicast_packet;
- struct batadv_orig_node *orig_node;
- struct batadv_neigh_node *neigh_node;
- int data_len = skb->len;
- int ret = NET_RX_DROP;
- unsigned int dev_mtu, header_len;
-
- /* get routing information */
- if (is_multicast_ether_addr(ethhdr->h_dest)) {
- orig_node = batadv_gw_get_selected_orig(bat_priv);
- if (orig_node)
- goto find_router;
- }
-
- /* check for tt host - increases orig_node refcount.
- * returns NULL in case of AP isolation
- */
- orig_node = batadv_transtable_search(bat_priv, ethhdr->h_source,
- ethhdr->h_dest);
-
-find_router:
- /* find_router():
- * - if orig_node is NULL it returns NULL
- * - increases neigh_nodes refcount if found.
- */
- neigh_node = batadv_find_router(bat_priv, orig_node, NULL);
-
- if (!neigh_node)
- goto out;
-
- switch (packet_type) {
- case BATADV_UNICAST:
- if (!batadv_unicast_prepare_skb(skb, orig_node))
- goto out;
-
- header_len = sizeof(struct batadv_unicast_packet);
- break;
- case BATADV_UNICAST_4ADDR:
- if (!batadv_unicast_4addr_prepare_skb(bat_priv, skb, orig_node,
- packet_subtype))
- goto out;
-
- header_len = sizeof(struct batadv_unicast_4addr_packet);
- break;
- default:
- /* this function supports UNICAST and UNICAST_4ADDR only. It
- * should never be invoked with any other packet type
- */
- goto out;
- }
-
- ethhdr = (struct ethhdr *)(skb->data + header_len);
- unicast_packet = (struct batadv_unicast_packet *)skb->data;
-
- /* inform the destination node that we are still missing a correct route
- * for this client. The destination will receive this packet and will
- * try to reroute it because the ttvn contained in the header is less
- * than the current one
- */
- if (batadv_tt_global_client_is_roaming(bat_priv, ethhdr->h_dest))
- unicast_packet->ttvn = unicast_packet->ttvn - 1;
-
- dev_mtu = neigh_node->if_incoming->net_dev->mtu;
- /* fragmentation mechanism only works for UNICAST (now) */
- if (packet_type == BATADV_UNICAST &&
- atomic_read(&bat_priv->fragmentation) &&
- data_len + sizeof(*unicast_packet) > dev_mtu) {
- /* send frag skb decreases ttl */
- unicast_packet->header.ttl++;
- ret = batadv_frag_send_skb(skb, bat_priv,
- neigh_node->if_incoming,
- neigh_node->addr);
- goto out;
- }
-
- if (batadv_send_skb_to_orig(skb, orig_node, NULL) != NET_XMIT_DROP)
- ret = 0;
-
-out:
- if (neigh_node)
- batadv_neigh_node_free_ref(neigh_node);
- if (orig_node)
- batadv_orig_node_free_ref(orig_node);
- if (ret == NET_RX_DROP)
- kfree_skb(skb);
- return ret;
-}
diff --git a/net/batman-adv/unicast.h b/net/batman-adv/unicast.h
deleted file mode 100644
index 429cf8a..0000000
--- a/net/batman-adv/unicast.h
+++ /dev/null
@@ -1,92 +0,0 @@
-/* Copyright (C) 2010-2013 B.A.T.M.A.N. contributors:
- *
- * Andreas Langer
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA
- */
-
-#ifndef _NET_BATMAN_ADV_UNICAST_H_
-#define _NET_BATMAN_ADV_UNICAST_H_
-
-#include "packet.h"
-
-#define BATADV_FRAG_TIMEOUT 10000 /* purge frag list entries after time in ms */
-#define BATADV_FRAG_BUFFER_SIZE 6 /* number of list elements in buffer */
-
-int batadv_frag_reassemble_skb(struct sk_buff *skb,
- struct batadv_priv *bat_priv,
- struct sk_buff **new_skb);
-void batadv_frag_list_free(struct list_head *head);
-int batadv_frag_send_skb(struct sk_buff *skb, struct batadv_priv *bat_priv,
- struct batadv_hard_iface *hard_iface,
- const uint8_t dstaddr[]);
-bool batadv_unicast_4addr_prepare_skb(struct batadv_priv *bat_priv,
- struct sk_buff *skb,
- struct batadv_orig_node *orig_node,
- int packet_subtype);
-int batadv_unicast_generic_send_skb(struct batadv_priv *bat_priv,
- struct sk_buff *skb, int packet_type,
- int packet_subtype);
-
-
-/**
- * batadv_unicast_send_skb - send the skb encapsulated in a unicast packet
- * @bat_priv: the bat priv with all the soft interface information
- * @skb: the payload to send
- */
-static inline int batadv_unicast_send_skb(struct batadv_priv *bat_priv,
- struct sk_buff *skb)
-{
- return batadv_unicast_generic_send_skb(bat_priv, skb, BATADV_UNICAST,
- 0);
-}
-
-/**
- * batadv_unicast_send_skb - send the skb encapsulated in a unicast4addr packet
- * @bat_priv: the bat priv with all the soft interface information
- * @skb: the payload to send
- * @packet_subtype: the batman 4addr packet subtype to use
- */
-static inline int batadv_unicast_4addr_send_skb(struct batadv_priv *bat_priv,
- struct sk_buff *skb,
- int packet_subtype)
-{
- return batadv_unicast_generic_send_skb(bat_priv, skb,
- BATADV_UNICAST_4ADDR,
- packet_subtype);
-}
-
-static inline int batadv_frag_can_reassemble(const struct sk_buff *skb, int mtu)
-{
- const struct batadv_unicast_frag_packet *unicast_packet;
- int uneven_correction = 0;
- unsigned int merged_size;
-
- unicast_packet = (struct batadv_unicast_frag_packet *)skb->data;
-
- if (unicast_packet->flags & BATADV_UNI_FRAG_LARGETAIL) {
- if (unicast_packet->flags & BATADV_UNI_FRAG_HEAD)
- uneven_correction = 1;
- else
- uneven_correction = -1;
- }
-
- merged_size = (skb->len - sizeof(*unicast_packet)) * 2;
- merged_size += sizeof(struct batadv_unicast_packet) + uneven_correction;
-
- return merged_size <= mtu;
-}
-
-#endif /* _NET_BATMAN_ADV_UNICAST_H_ */
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index f877362..878f008 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -619,7 +619,7 @@
/* Replicate the checks that IPv6 does on packet reception and pass the packet
* to ip6tables, which doesn't support NAT, so things are fairly simple. */
-static unsigned int br_nf_pre_routing_ipv6(unsigned int hook,
+static unsigned int br_nf_pre_routing_ipv6(const struct nf_hook_ops *ops,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
@@ -669,7 +669,8 @@
* receiving device) to make netfilter happy, the REDIRECT
* target in particular. Save the original destination IP
* address to be able to detect DNAT afterwards. */
-static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff *skb,
+static unsigned int br_nf_pre_routing(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
@@ -691,7 +692,7 @@
return NF_ACCEPT;
nf_bridge_pull_encap_header_rcsum(skb);
- return br_nf_pre_routing_ipv6(hook, skb, in, out, okfn);
+ return br_nf_pre_routing_ipv6(ops, skb, in, out, okfn);
}
if (!brnf_call_iptables && !br->nf_call_iptables)
@@ -727,7 +728,8 @@
* took place when the packet entered the bridge), but we
* register an IPv4 PRE_ROUTING 'sabotage' hook that will
* prevent this from happening. */
-static unsigned int br_nf_local_in(unsigned int hook, struct sk_buff *skb,
+static unsigned int br_nf_local_in(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
@@ -765,7 +767,8 @@
* but we are still able to filter on the 'real' indev/outdev
* because of the physdev module. For ARP, indev and outdev are the
* bridge ports. */
-static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff *skb,
+static unsigned int br_nf_forward_ip(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
@@ -818,7 +821,8 @@
return NF_STOLEN;
}
-static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff *skb,
+static unsigned int br_nf_forward_arp(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
@@ -878,7 +882,8 @@
#endif
/* PF_BRIDGE/POST_ROUTING ********************************************/
-static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff *skb,
+static unsigned int br_nf_post_routing(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
@@ -923,7 +928,8 @@
/* IP/SABOTAGE *****************************************************/
/* Don't hand locally destined packets to PF_INET(6)/PRE_ROUTING
* for the second time. */
-static unsigned int ip_sabotage_in(unsigned int hook, struct sk_buff *skb,
+static unsigned int ip_sabotage_in(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig
index a9aff9c..68f8128 100644
--- a/net/bridge/netfilter/Kconfig
+++ b/net/bridge/netfilter/Kconfig
@@ -1,6 +1,9 @@
#
# Bridge netfilter configuration
#
+#
+config NF_TABLES_BRIDGE
+ tristate "Ethernet Bridge nf_tables support"
menuconfig BRIDGE_NF_EBTABLES
tristate "Ethernet Bridge tables (ebtables) support"
diff --git a/net/bridge/netfilter/Makefile b/net/bridge/netfilter/Makefile
index 0718699..ea7629f 100644
--- a/net/bridge/netfilter/Makefile
+++ b/net/bridge/netfilter/Makefile
@@ -2,6 +2,8 @@
# Makefile for the netfilter modules for Link Layer filtering on a bridge.
#
+obj-$(CONFIG_NF_TABLES_BRIDGE) += nf_tables_bridge.o
+
obj-$(CONFIG_BRIDGE_NF_EBTABLES) += ebtables.o
# tables
diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c
index 94b2b70..bb2da7b 100644
--- a/net/bridge/netfilter/ebtable_filter.c
+++ b/net/bridge/netfilter/ebtable_filter.c
@@ -60,17 +60,21 @@
};
static unsigned int
-ebt_in_hook(unsigned int hook, struct sk_buff *skb, const struct net_device *in,
- const struct net_device *out, int (*okfn)(struct sk_buff *))
+ebt_in_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ const struct net_device *in, const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
{
- return ebt_do_table(hook, skb, in, out, dev_net(in)->xt.frame_filter);
+ return ebt_do_table(ops->hooknum, skb, in, out,
+ dev_net(in)->xt.frame_filter);
}
static unsigned int
-ebt_out_hook(unsigned int hook, struct sk_buff *skb, const struct net_device *in,
- const struct net_device *out, int (*okfn)(struct sk_buff *))
+ebt_out_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ const struct net_device *in, const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
{
- return ebt_do_table(hook, skb, in, out, dev_net(out)->xt.frame_filter);
+ return ebt_do_table(ops->hooknum, skb, in, out,
+ dev_net(out)->xt.frame_filter);
}
static struct nf_hook_ops ebt_ops_filter[] __read_mostly = {
diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c
index 322555a..bd238f1 100644
--- a/net/bridge/netfilter/ebtable_nat.c
+++ b/net/bridge/netfilter/ebtable_nat.c
@@ -60,17 +60,21 @@
};
static unsigned int
-ebt_nat_in(unsigned int hook, struct sk_buff *skb, const struct net_device *in
- , const struct net_device *out, int (*okfn)(struct sk_buff *))
+ebt_nat_in(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ const struct net_device *in, const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
{
- return ebt_do_table(hook, skb, in, out, dev_net(in)->xt.frame_nat);
+ return ebt_do_table(ops->hooknum, skb, in, out,
+ dev_net(in)->xt.frame_nat);
}
static unsigned int
-ebt_nat_out(unsigned int hook, struct sk_buff *skb, const struct net_device *in
- , const struct net_device *out, int (*okfn)(struct sk_buff *))
+ebt_nat_out(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ const struct net_device *in, const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
{
- return ebt_do_table(hook, skb, in, out, dev_net(out)->xt.frame_nat);
+ return ebt_do_table(ops->hooknum, skb, in, out,
+ dev_net(out)->xt.frame_nat);
}
static struct nf_hook_ops ebt_ops_nat[] __read_mostly = {
diff --git a/net/bridge/netfilter/nf_tables_bridge.c b/net/bridge/netfilter/nf_tables_bridge.c
new file mode 100644
index 0000000..e8cb016f
--- /dev/null
+++ b/net/bridge/netfilter/nf_tables_bridge.c
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netfilter_bridge.h>
+#include <net/netfilter/nf_tables.h>
+
+static struct nft_af_info nft_af_bridge __read_mostly = {
+ .family = NFPROTO_BRIDGE,
+ .nhooks = NF_BR_NUMHOOKS,
+ .owner = THIS_MODULE,
+};
+
+static int nf_tables_bridge_init_net(struct net *net)
+{
+ net->nft.bridge = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL);
+ if (net->nft.bridge == NULL)
+ return -ENOMEM;
+
+ memcpy(net->nft.bridge, &nft_af_bridge, sizeof(nft_af_bridge));
+
+ if (nft_register_afinfo(net, net->nft.bridge) < 0)
+ goto err;
+
+ return 0;
+err:
+ kfree(net->nft.bridge);
+ return -ENOMEM;
+}
+
+static void nf_tables_bridge_exit_net(struct net *net)
+{
+ nft_unregister_afinfo(net->nft.bridge);
+ kfree(net->nft.bridge);
+}
+
+static struct pernet_operations nf_tables_bridge_net_ops = {
+ .init = nf_tables_bridge_init_net,
+ .exit = nf_tables_bridge_exit_net,
+};
+
+static int __init nf_tables_bridge_init(void)
+{
+ return register_pernet_subsys(&nf_tables_bridge_net_ops);
+}
+
+static void __exit nf_tables_bridge_exit(void)
+{
+ return unregister_pernet_subsys(&nf_tables_bridge_net_ops);
+}
+
+module_init(nf_tables_bridge_init);
+module_exit(nf_tables_bridge_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_FAMILY(AF_BRIDGE);
diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c
index 2a7efe3..e83015c 100644
--- a/net/decnet/netfilter/dn_rtmsg.c
+++ b/net/decnet/netfilter/dn_rtmsg.c
@@ -87,7 +87,7 @@
}
-static unsigned int dnrmg_hook(unsigned int hook,
+static unsigned int dnrmg_hook(const struct nf_hook_ops *ops,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 41e1c3e..56a964a 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -336,12 +336,9 @@
err = 0;
out:
- if (sk) {
- if (sk->sk_state == TCP_TIME_WAIT)
- inet_twsk_put((struct inet_timewait_sock *)sk);
- else
- sock_put(sk);
- }
+ if (sk)
+ sock_gen_put(sk);
+
out_nosk:
return err;
}
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 1657e39b..40d5607 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -36,6 +36,27 @@
If unsure, say Y.
+config NF_TABLES_IPV4
+ depends on NF_TABLES
+ tristate "IPv4 nf_tables support"
+
+config NFT_REJECT_IPV4
+ depends on NF_TABLES_IPV4
+ tristate "nf_tables IPv4 reject support"
+
+config NFT_CHAIN_ROUTE_IPV4
+ depends on NF_TABLES_IPV4
+ tristate "IPv4 nf_tables route chain support"
+
+config NFT_CHAIN_NAT_IPV4
+ depends on NF_TABLES_IPV4
+ depends on NF_NAT_IPV4 && NFT_NAT
+ tristate "IPv4 nf_tables nat chain support"
+
+config NF_TABLES_ARP
+ depends on NF_TABLES
+ tristate "ARP nf_tables support"
+
config IP_NF_IPTABLES
tristate "IP tables support (required for filtering/masq/NAT)"
default m if NETFILTER_ADVANCED=n
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 3622b24..19df72b 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -27,6 +27,12 @@
# NAT protocols (nf_nat)
obj-$(CONFIG_NF_NAT_PROTO_GRE) += nf_nat_proto_gre.o
+obj-$(CONFIG_NF_TABLES_IPV4) += nf_tables_ipv4.o
+obj-$(CONFIG_NFT_REJECT_IPV4) += nft_reject_ipv4.o
+obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV4) += nft_chain_route_ipv4.o
+obj-$(CONFIG_NFT_CHAIN_NAT_IPV4) += nft_chain_nat_ipv4.o
+obj-$(CONFIG_NF_TABLES_ARP) += nf_tables_arp.o
+
# generic IP tables
obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o
diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c
index a865f6f..802ddec 100644
--- a/net/ipv4/netfilter/arptable_filter.c
+++ b/net/ipv4/netfilter/arptable_filter.c
@@ -27,13 +27,14 @@
/* The work comes in here from netfilter.c */
static unsigned int
-arptable_filter_hook(unsigned int hook, struct sk_buff *skb,
+arptable_filter_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
const struct net_device *in, const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
const struct net *net = dev_net((in != NULL) ? in : out);
- return arpt_do_table(skb, hook, in, out, net->ipv4.arptable_filter);
+ return arpt_do_table(skb, ops->hooknum, in, out,
+ net->ipv4.arptable_filter);
}
static struct nf_hook_ops *arpfilter_ops __read_mostly;
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 0b732ef..a2e2b61 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -483,7 +483,7 @@
#endif
static unsigned int
-arp_mangle(unsigned int hook,
+arp_mangle(const struct nf_hook_ops *ops,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c
index b6346bf..01cffea 100644
--- a/net/ipv4/netfilter/ipt_SYNPROXY.c
+++ b/net/ipv4/netfilter/ipt_SYNPROXY.c
@@ -297,7 +297,7 @@
return XT_CONTINUE;
}
-static unsigned int ipv4_synproxy_hook(unsigned int hooknum,
+static unsigned int ipv4_synproxy_hook(const struct nf_hook_ops *ops,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index 50af5b4..e08a74a 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -33,20 +33,21 @@
};
static unsigned int
-iptable_filter_hook(unsigned int hook, struct sk_buff *skb,
+iptable_filter_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
const struct net_device *in, const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
const struct net *net;
- if (hook == NF_INET_LOCAL_OUT &&
+ if (ops->hooknum == NF_INET_LOCAL_OUT &&
(skb->len < sizeof(struct iphdr) ||
ip_hdrlen(skb) < sizeof(struct iphdr)))
/* root is playing with raw sockets. */
return NF_ACCEPT;
net = dev_net((in != NULL) ? in : out);
- return ipt_do_table(skb, hook, in, out, net->ipv4.iptable_filter);
+ return ipt_do_table(skb, ops->hooknum, in, out,
+ net->ipv4.iptable_filter);
}
static struct nf_hook_ops *filter_ops __read_mostly;
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index 0d8cd82..6a5079c 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -79,19 +79,19 @@
/* The work comes in here from netfilter.c. */
static unsigned int
-iptable_mangle_hook(unsigned int hook,
+iptable_mangle_hook(const struct nf_hook_ops *ops,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- if (hook == NF_INET_LOCAL_OUT)
+ if (ops->hooknum == NF_INET_LOCAL_OUT)
return ipt_mangle_out(skb, out);
- if (hook == NF_INET_POST_ROUTING)
- return ipt_do_table(skb, hook, in, out,
+ if (ops->hooknum == NF_INET_POST_ROUTING)
+ return ipt_do_table(skb, ops->hooknum, in, out,
dev_net(out)->ipv4.iptable_mangle);
/* PREROUTING/INPUT/FORWARD: */
- return ipt_do_table(skb, hook, in, out,
+ return ipt_do_table(skb, ops->hooknum, in, out,
dev_net(in)->ipv4.iptable_mangle);
}
diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c
index 683bfaf..ee28861 100644
--- a/net/ipv4/netfilter/iptable_nat.c
+++ b/net/ipv4/netfilter/iptable_nat.c
@@ -61,7 +61,7 @@
}
static unsigned int
-nf_nat_ipv4_fn(unsigned int hooknum,
+nf_nat_ipv4_fn(const struct nf_hook_ops *ops,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
@@ -71,7 +71,7 @@
enum ip_conntrack_info ctinfo;
struct nf_conn_nat *nat;
/* maniptype == SRC for postrouting. */
- enum nf_nat_manip_type maniptype = HOOK2MANIP(hooknum);
+ enum nf_nat_manip_type maniptype = HOOK2MANIP(ops->hooknum);
/* We never see fragments: conntrack defrags on pre-routing
* and local-out, and nf_nat_out protects post-routing.
@@ -108,7 +108,7 @@
case IP_CT_RELATED_REPLY:
if (ip_hdr(skb)->protocol == IPPROTO_ICMP) {
if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo,
- hooknum))
+ ops->hooknum))
return NF_DROP;
else
return NF_ACCEPT;
@@ -121,14 +121,14 @@
if (!nf_nat_initialized(ct, maniptype)) {
unsigned int ret;
- ret = nf_nat_rule_find(skb, hooknum, in, out, ct);
+ ret = nf_nat_rule_find(skb, ops->hooknum, in, out, ct);
if (ret != NF_ACCEPT)
return ret;
} else {
pr_debug("Already setup manip %s for ct %p\n",
maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST",
ct);
- if (nf_nat_oif_changed(hooknum, ctinfo, nat, out))
+ if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, out))
goto oif_changed;
}
break;
@@ -137,11 +137,11 @@
/* ESTABLISHED */
NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED ||
ctinfo == IP_CT_ESTABLISHED_REPLY);
- if (nf_nat_oif_changed(hooknum, ctinfo, nat, out))
+ if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, out))
goto oif_changed;
}
- return nf_nat_packet(ct, ctinfo, hooknum, skb);
+ return nf_nat_packet(ct, ctinfo, ops->hooknum, skb);
oif_changed:
nf_ct_kill_acct(ct, ctinfo, skb);
@@ -149,7 +149,7 @@
}
static unsigned int
-nf_nat_ipv4_in(unsigned int hooknum,
+nf_nat_ipv4_in(const struct nf_hook_ops *ops,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
@@ -158,7 +158,7 @@
unsigned int ret;
__be32 daddr = ip_hdr(skb)->daddr;
- ret = nf_nat_ipv4_fn(hooknum, skb, in, out, okfn);
+ ret = nf_nat_ipv4_fn(ops, skb, in, out, okfn);
if (ret != NF_DROP && ret != NF_STOLEN &&
daddr != ip_hdr(skb)->daddr)
skb_dst_drop(skb);
@@ -167,7 +167,7 @@
}
static unsigned int
-nf_nat_ipv4_out(unsigned int hooknum,
+nf_nat_ipv4_out(const struct nf_hook_ops *ops,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
@@ -185,7 +185,7 @@
ip_hdrlen(skb) < sizeof(struct iphdr))
return NF_ACCEPT;
- ret = nf_nat_ipv4_fn(hooknum, skb, in, out, okfn);
+ ret = nf_nat_ipv4_fn(ops, skb, in, out, okfn);
#ifdef CONFIG_XFRM
if (ret != NF_DROP && ret != NF_STOLEN &&
!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) &&
@@ -207,7 +207,7 @@
}
static unsigned int
-nf_nat_ipv4_local_fn(unsigned int hooknum,
+nf_nat_ipv4_local_fn(const struct nf_hook_ops *ops,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
@@ -223,7 +223,7 @@
ip_hdrlen(skb) < sizeof(struct iphdr))
return NF_ACCEPT;
- ret = nf_nat_ipv4_fn(hooknum, skb, in, out, okfn);
+ ret = nf_nat_ipv4_fn(ops, skb, in, out, okfn);
if (ret != NF_DROP && ret != NF_STOLEN &&
(ct = nf_ct_get(skb, &ctinfo)) != NULL) {
enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c
index 1f82aea..b2f7e8f 100644
--- a/net/ipv4/netfilter/iptable_raw.c
+++ b/net/ipv4/netfilter/iptable_raw.c
@@ -20,20 +20,20 @@
/* The work comes in here from netfilter.c. */
static unsigned int
-iptable_raw_hook(unsigned int hook, struct sk_buff *skb,
+iptable_raw_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
const struct net_device *in, const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
const struct net *net;
- if (hook == NF_INET_LOCAL_OUT &&
+ if (ops->hooknum == NF_INET_LOCAL_OUT &&
(skb->len < sizeof(struct iphdr) ||
ip_hdrlen(skb) < sizeof(struct iphdr)))
/* root is playing with raw sockets. */
return NF_ACCEPT;
net = dev_net((in != NULL) ? in : out);
- return ipt_do_table(skb, hook, in, out, net->ipv4.iptable_raw);
+ return ipt_do_table(skb, ops->hooknum, in, out, net->ipv4.iptable_raw);
}
static struct nf_hook_ops *rawtable_ops __read_mostly;
diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c
index f867a8d..c86647e 100644
--- a/net/ipv4/netfilter/iptable_security.c
+++ b/net/ipv4/netfilter/iptable_security.c
@@ -37,21 +37,22 @@
};
static unsigned int
-iptable_security_hook(unsigned int hook, struct sk_buff *skb,
+iptable_security_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
const struct net *net;
- if (hook == NF_INET_LOCAL_OUT &&
+ if (ops->hooknum == NF_INET_LOCAL_OUT &&
(skb->len < sizeof(struct iphdr) ||
ip_hdrlen(skb) < sizeof(struct iphdr)))
/* Somebody is playing with raw sockets. */
return NF_ACCEPT;
net = dev_net((in != NULL) ? in : out);
- return ipt_do_table(skb, hook, in, out, net->ipv4.iptable_security);
+ return ipt_do_table(skb, ops->hooknum, in, out,
+ net->ipv4.iptable_security);
}
static struct nf_hook_ops *sectbl_ops __read_mostly;
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 86f5b34..ecd8bec 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -92,7 +92,7 @@
return NF_ACCEPT;
}
-static unsigned int ipv4_helper(unsigned int hooknum,
+static unsigned int ipv4_helper(const struct nf_hook_ops *ops,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
@@ -121,7 +121,7 @@
ct, ctinfo);
}
-static unsigned int ipv4_confirm(unsigned int hooknum,
+static unsigned int ipv4_confirm(const struct nf_hook_ops *ops,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
@@ -147,16 +147,16 @@
return nf_conntrack_confirm(skb);
}
-static unsigned int ipv4_conntrack_in(unsigned int hooknum,
+static unsigned int ipv4_conntrack_in(const struct nf_hook_ops *ops,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- return nf_conntrack_in(dev_net(in), PF_INET, hooknum, skb);
+ return nf_conntrack_in(dev_net(in), PF_INET, ops->hooknum, skb);
}
-static unsigned int ipv4_conntrack_local(unsigned int hooknum,
+static unsigned int ipv4_conntrack_local(const struct nf_hook_ops *ops,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
@@ -166,7 +166,7 @@
if (skb->len < sizeof(struct iphdr) ||
ip_hdrlen(skb) < sizeof(struct iphdr))
return NF_ACCEPT;
- return nf_conntrack_in(dev_net(out), PF_INET, hooknum, skb);
+ return nf_conntrack_in(dev_net(out), PF_INET, ops->hooknum, skb);
}
/* Connection tracking may drop packets, but never alters them, so
diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
index 7428155..12e13bd 100644
--- a/net/ipv4/netfilter/nf_defrag_ipv4.c
+++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
@@ -60,7 +60,7 @@
return IP_DEFRAG_CONNTRACK_OUT + zone;
}
-static unsigned int ipv4_conntrack_defrag(unsigned int hooknum,
+static unsigned int ipv4_conntrack_defrag(const struct nf_hook_ops *ops,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
@@ -83,7 +83,9 @@
#endif
/* Gather fragments. */
if (ip_is_fragment(ip_hdr(skb))) {
- enum ip_defrag_users user = nf_ct_defrag_user(hooknum, skb);
+ enum ip_defrag_users user =
+ nf_ct_defrag_user(ops->hooknum, skb);
+
if (nf_ct_ipv4_gather_frags(skb, user))
return NF_STOLEN;
}
diff --git a/net/ipv4/netfilter/nf_tables_arp.c b/net/ipv4/netfilter/nf_tables_arp.c
new file mode 100644
index 0000000..3e67ef1c
--- /dev/null
+++ b/net/ipv4/netfilter/nf_tables_arp.c
@@ -0,0 +1,102 @@
+/*
+ * Copyright (c) 2008-2010 Patrick McHardy <kaber@trash.net>
+ * Copyright (c) 2013 Pablo Neira Ayuso <pablo@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/netfilter_arp.h>
+#include <net/netfilter/nf_tables.h>
+
+static struct nft_af_info nft_af_arp __read_mostly = {
+ .family = NFPROTO_ARP,
+ .nhooks = NF_ARP_NUMHOOKS,
+ .owner = THIS_MODULE,
+};
+
+static int nf_tables_arp_init_net(struct net *net)
+{
+ net->nft.arp = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL);
+ if (net->nft.arp== NULL)
+ return -ENOMEM;
+
+ memcpy(net->nft.arp, &nft_af_arp, sizeof(nft_af_arp));
+
+ if (nft_register_afinfo(net, net->nft.arp) < 0)
+ goto err;
+
+ return 0;
+err:
+ kfree(net->nft.arp);
+ return -ENOMEM;
+}
+
+static void nf_tables_arp_exit_net(struct net *net)
+{
+ nft_unregister_afinfo(net->nft.arp);
+ kfree(net->nft.arp);
+}
+
+static struct pernet_operations nf_tables_arp_net_ops = {
+ .init = nf_tables_arp_init_net,
+ .exit = nf_tables_arp_exit_net,
+};
+
+static unsigned int
+nft_do_chain_arp(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ struct nft_pktinfo pkt;
+
+ nft_set_pktinfo(&pkt, ops, skb, in, out);
+
+ return nft_do_chain_pktinfo(&pkt, ops);
+}
+
+static struct nf_chain_type filter_arp = {
+ .family = NFPROTO_ARP,
+ .name = "filter",
+ .type = NFT_CHAIN_T_DEFAULT,
+ .hook_mask = (1 << NF_ARP_IN) |
+ (1 << NF_ARP_OUT) |
+ (1 << NF_ARP_FORWARD),
+ .fn = {
+ [NF_ARP_IN] = nft_do_chain_arp,
+ [NF_ARP_OUT] = nft_do_chain_arp,
+ [NF_ARP_FORWARD] = nft_do_chain_arp,
+ },
+};
+
+static int __init nf_tables_arp_init(void)
+{
+ int ret;
+
+ nft_register_chain_type(&filter_arp);
+ ret = register_pernet_subsys(&nf_tables_arp_net_ops);
+ if (ret < 0)
+ nft_unregister_chain_type(&filter_arp);
+
+ return ret;
+}
+
+static void __exit nf_tables_arp_exit(void)
+{
+ unregister_pernet_subsys(&nf_tables_arp_net_ops);
+ nft_unregister_chain_type(&filter_arp);
+}
+
+module_init(nf_tables_arp_init);
+module_exit(nf_tables_arp_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_FAMILY(3); /* NFPROTO_ARP */
diff --git a/net/ipv4/netfilter/nf_tables_ipv4.c b/net/ipv4/netfilter/nf_tables_ipv4.c
new file mode 100644
index 0000000..8f7536b
--- /dev/null
+++ b/net/ipv4/netfilter/nf_tables_ipv4.c
@@ -0,0 +1,128 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ * Copyright (c) 2012-2013 Pablo Neira Ayuso <pablo@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/ip.h>
+#include <linux/netfilter_ipv4.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/net_namespace.h>
+#include <net/ip.h>
+#include <net/net_namespace.h>
+#include <net/netfilter/nf_tables_ipv4.h>
+
+static unsigned int nft_ipv4_output(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ struct nft_pktinfo pkt;
+
+ if (unlikely(skb->len < sizeof(struct iphdr) ||
+ ip_hdr(skb)->ihl < sizeof(struct iphdr) / 4)) {
+ if (net_ratelimit())
+ pr_info("nf_tables_ipv4: ignoring short SOCK_RAW "
+ "packet\n");
+ return NF_ACCEPT;
+ }
+ nft_set_pktinfo_ipv4(&pkt, ops, skb, in, out);
+
+ return nft_do_chain_pktinfo(&pkt, ops);
+}
+
+static struct nft_af_info nft_af_ipv4 __read_mostly = {
+ .family = NFPROTO_IPV4,
+ .nhooks = NF_INET_NUMHOOKS,
+ .owner = THIS_MODULE,
+ .hooks = {
+ [NF_INET_LOCAL_OUT] = nft_ipv4_output,
+ },
+};
+
+static int nf_tables_ipv4_init_net(struct net *net)
+{
+ net->nft.ipv4 = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL);
+ if (net->nft.ipv4 == NULL)
+ return -ENOMEM;
+
+ memcpy(net->nft.ipv4, &nft_af_ipv4, sizeof(nft_af_ipv4));
+
+ if (nft_register_afinfo(net, net->nft.ipv4) < 0)
+ goto err;
+
+ return 0;
+err:
+ kfree(net->nft.ipv4);
+ return -ENOMEM;
+}
+
+static void nf_tables_ipv4_exit_net(struct net *net)
+{
+ nft_unregister_afinfo(net->nft.ipv4);
+ kfree(net->nft.ipv4);
+}
+
+static struct pernet_operations nf_tables_ipv4_net_ops = {
+ .init = nf_tables_ipv4_init_net,
+ .exit = nf_tables_ipv4_exit_net,
+};
+
+static unsigned int
+nft_do_chain_ipv4(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ struct nft_pktinfo pkt;
+
+ nft_set_pktinfo_ipv4(&pkt, ops, skb, in, out);
+
+ return nft_do_chain_pktinfo(&pkt, ops);
+}
+
+static struct nf_chain_type filter_ipv4 = {
+ .family = NFPROTO_IPV4,
+ .name = "filter",
+ .type = NFT_CHAIN_T_DEFAULT,
+ .hook_mask = (1 << NF_INET_LOCAL_IN) |
+ (1 << NF_INET_LOCAL_OUT) |
+ (1 << NF_INET_FORWARD) |
+ (1 << NF_INET_PRE_ROUTING) |
+ (1 << NF_INET_POST_ROUTING),
+ .fn = {
+ [NF_INET_LOCAL_IN] = nft_do_chain_ipv4,
+ [NF_INET_LOCAL_OUT] = nft_ipv4_output,
+ [NF_INET_FORWARD] = nft_do_chain_ipv4,
+ [NF_INET_PRE_ROUTING] = nft_do_chain_ipv4,
+ [NF_INET_POST_ROUTING] = nft_do_chain_ipv4,
+ },
+};
+
+static int __init nf_tables_ipv4_init(void)
+{
+ nft_register_chain_type(&filter_ipv4);
+ return register_pernet_subsys(&nf_tables_ipv4_net_ops);
+}
+
+static void __exit nf_tables_ipv4_exit(void)
+{
+ unregister_pernet_subsys(&nf_tables_ipv4_net_ops);
+ nft_unregister_chain_type(&filter_ipv4);
+}
+
+module_init(nf_tables_ipv4_init);
+module_exit(nf_tables_ipv4_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_FAMILY(AF_INET);
diff --git a/net/ipv4/netfilter/nft_chain_nat_ipv4.c b/net/ipv4/netfilter/nft_chain_nat_ipv4.c
new file mode 100644
index 0000000..cf2c792
--- /dev/null
+++ b/net/ipv4/netfilter/nft_chain_nat_ipv4.c
@@ -0,0 +1,205 @@
+/*
+ * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
+ * Copyright (c) 2012 Pablo Neira Ayuso <pablo@netfilter.org>
+ * Copyright (c) 2012 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_core.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_ipv4.h>
+#include <net/netfilter/nf_nat_l3proto.h>
+#include <net/ip.h>
+
+/*
+ * NAT chains
+ */
+
+static unsigned int nf_nat_fn(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+ struct nf_conn_nat *nat;
+ enum nf_nat_manip_type maniptype = HOOK2MANIP(ops->hooknum);
+ struct nft_pktinfo pkt;
+ unsigned int ret;
+
+ if (ct == NULL || nf_ct_is_untracked(ct))
+ return NF_ACCEPT;
+
+ NF_CT_ASSERT(!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)));
+
+ nat = nfct_nat(ct);
+ if (nat == NULL) {
+ /* Conntrack module was loaded late, can't add extension. */
+ if (nf_ct_is_confirmed(ct))
+ return NF_ACCEPT;
+ nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC);
+ if (nat == NULL)
+ return NF_ACCEPT;
+ }
+
+ switch (ctinfo) {
+ case IP_CT_RELATED:
+ case IP_CT_RELATED + IP_CT_IS_REPLY:
+ if (ip_hdr(skb)->protocol == IPPROTO_ICMP) {
+ if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo,
+ ops->hooknum))
+ return NF_DROP;
+ else
+ return NF_ACCEPT;
+ }
+ /* Fall through */
+ case IP_CT_NEW:
+ if (nf_nat_initialized(ct, maniptype))
+ break;
+
+ nft_set_pktinfo_ipv4(&pkt, ops, skb, in, out);
+
+ ret = nft_do_chain_pktinfo(&pkt, ops);
+ if (ret != NF_ACCEPT)
+ return ret;
+ if (!nf_nat_initialized(ct, maniptype)) {
+ ret = nf_nat_alloc_null_binding(ct, ops->hooknum);
+ if (ret != NF_ACCEPT)
+ return ret;
+ }
+ default:
+ break;
+ }
+
+ return nf_nat_packet(ct, ctinfo, ops->hooknum, skb);
+}
+
+static unsigned int nf_nat_prerouting(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ __be32 daddr = ip_hdr(skb)->daddr;
+ unsigned int ret;
+
+ ret = nf_nat_fn(ops, skb, in, out, okfn);
+ if (ret != NF_DROP && ret != NF_STOLEN &&
+ ip_hdr(skb)->daddr != daddr) {
+ skb_dst_drop(skb);
+ }
+ return ret;
+}
+
+static unsigned int nf_nat_postrouting(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ enum ip_conntrack_info ctinfo __maybe_unused;
+ const struct nf_conn *ct __maybe_unused;
+ unsigned int ret;
+
+ ret = nf_nat_fn(ops, skb, in, out, okfn);
+#ifdef CONFIG_XFRM
+ if (ret != NF_DROP && ret != NF_STOLEN &&
+ (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
+ enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+
+ if (ct->tuplehash[dir].tuple.src.u3.ip !=
+ ct->tuplehash[!dir].tuple.dst.u3.ip ||
+ ct->tuplehash[dir].tuple.src.u.all !=
+ ct->tuplehash[!dir].tuple.dst.u.all)
+ return nf_xfrm_me_harder(skb, AF_INET) == 0 ?
+ ret : NF_DROP;
+ }
+#endif
+ return ret;
+}
+
+static unsigned int nf_nat_output(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ enum ip_conntrack_info ctinfo;
+ const struct nf_conn *ct;
+ unsigned int ret;
+
+ ret = nf_nat_fn(ops, skb, in, out, okfn);
+ if (ret != NF_DROP && ret != NF_STOLEN &&
+ (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
+ enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+
+ if (ct->tuplehash[dir].tuple.dst.u3.ip !=
+ ct->tuplehash[!dir].tuple.src.u3.ip) {
+ if (ip_route_me_harder(skb, RTN_UNSPEC))
+ ret = NF_DROP;
+ }
+#ifdef CONFIG_XFRM
+ else if (ct->tuplehash[dir].tuple.dst.u.all !=
+ ct->tuplehash[!dir].tuple.src.u.all)
+ if (nf_xfrm_me_harder(skb, AF_INET))
+ ret = NF_DROP;
+#endif
+ }
+ return ret;
+}
+
+static struct nf_chain_type nft_chain_nat_ipv4 = {
+ .family = NFPROTO_IPV4,
+ .name = "nat",
+ .type = NFT_CHAIN_T_NAT,
+ .hook_mask = (1 << NF_INET_PRE_ROUTING) |
+ (1 << NF_INET_POST_ROUTING) |
+ (1 << NF_INET_LOCAL_OUT) |
+ (1 << NF_INET_LOCAL_IN),
+ .fn = {
+ [NF_INET_PRE_ROUTING] = nf_nat_prerouting,
+ [NF_INET_POST_ROUTING] = nf_nat_postrouting,
+ [NF_INET_LOCAL_OUT] = nf_nat_output,
+ [NF_INET_LOCAL_IN] = nf_nat_fn,
+ },
+ .me = THIS_MODULE,
+};
+
+static int __init nft_chain_nat_init(void)
+{
+ int err;
+
+ err = nft_register_chain_type(&nft_chain_nat_ipv4);
+ if (err < 0)
+ return err;
+
+ return 0;
+}
+
+static void __exit nft_chain_nat_exit(void)
+{
+ nft_unregister_chain_type(&nft_chain_nat_ipv4);
+}
+
+module_init(nft_chain_nat_init);
+module_exit(nft_chain_nat_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_CHAIN(AF_INET, "nat");
diff --git a/net/ipv4/netfilter/nft_chain_route_ipv4.c b/net/ipv4/netfilter/nft_chain_route_ipv4.c
new file mode 100644
index 0000000..4e6bf9a
--- /dev/null
+++ b/net/ipv4/netfilter/nft_chain_route_ipv4.c
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ * Copyright (c) 2012 Pablo Neira Ayuso <pablo@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/skbuff.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_ipv4.h>
+#include <net/route.h>
+#include <net/ip.h>
+
+static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ unsigned int ret;
+ struct nft_pktinfo pkt;
+ u32 mark;
+ __be32 saddr, daddr;
+ u_int8_t tos;
+ const struct iphdr *iph;
+
+ /* root is playing with raw sockets. */
+ if (skb->len < sizeof(struct iphdr) ||
+ ip_hdrlen(skb) < sizeof(struct iphdr))
+ return NF_ACCEPT;
+
+ nft_set_pktinfo_ipv4(&pkt, ops, skb, in, out);
+
+ mark = skb->mark;
+ iph = ip_hdr(skb);
+ saddr = iph->saddr;
+ daddr = iph->daddr;
+ tos = iph->tos;
+
+ ret = nft_do_chain_pktinfo(&pkt, ops);
+ if (ret != NF_DROP && ret != NF_QUEUE) {
+ iph = ip_hdr(skb);
+
+ if (iph->saddr != saddr ||
+ iph->daddr != daddr ||
+ skb->mark != mark ||
+ iph->tos != tos)
+ if (ip_route_me_harder(skb, RTN_UNSPEC))
+ ret = NF_DROP;
+ }
+ return ret;
+}
+
+static struct nf_chain_type nft_chain_route_ipv4 = {
+ .family = NFPROTO_IPV4,
+ .name = "route",
+ .type = NFT_CHAIN_T_ROUTE,
+ .hook_mask = (1 << NF_INET_LOCAL_OUT),
+ .fn = {
+ [NF_INET_LOCAL_OUT] = nf_route_table_hook,
+ },
+ .me = THIS_MODULE,
+};
+
+static int __init nft_chain_route_init(void)
+{
+ return nft_register_chain_type(&nft_chain_route_ipv4);
+}
+
+static void __exit nft_chain_route_exit(void)
+{
+ nft_unregister_chain_type(&nft_chain_route_ipv4);
+}
+
+module_init(nft_chain_route_init);
+module_exit(nft_chain_route_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_CHAIN(AF_INET, "route");
diff --git a/net/ipv4/netfilter/nft_reject_ipv4.c b/net/ipv4/netfilter/nft_reject_ipv4.c
new file mode 100644
index 0000000..fff5ba1
--- /dev/null
+++ b/net/ipv4/netfilter/nft_reject_ipv4.c
@@ -0,0 +1,123 @@
+/*
+ * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/icmp.h>
+
+struct nft_reject {
+ enum nft_reject_types type:8;
+ u8 icmp_code;
+};
+
+static void nft_reject_eval(const struct nft_expr *expr,
+ struct nft_data data[NFT_REG_MAX + 1],
+ const struct nft_pktinfo *pkt)
+{
+ struct nft_reject *priv = nft_expr_priv(expr);
+
+ switch (priv->type) {
+ case NFT_REJECT_ICMP_UNREACH:
+ icmp_send(pkt->skb, ICMP_DEST_UNREACH, priv->icmp_code, 0);
+ break;
+ case NFT_REJECT_TCP_RST:
+ break;
+ }
+
+ data[NFT_REG_VERDICT].verdict = NF_DROP;
+}
+
+static const struct nla_policy nft_reject_policy[NFTA_REJECT_MAX + 1] = {
+ [NFTA_REJECT_TYPE] = { .type = NLA_U32 },
+ [NFTA_REJECT_ICMP_CODE] = { .type = NLA_U8 },
+};
+
+static int nft_reject_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_reject *priv = nft_expr_priv(expr);
+
+ if (tb[NFTA_REJECT_TYPE] == NULL)
+ return -EINVAL;
+
+ priv->type = ntohl(nla_get_be32(tb[NFTA_REJECT_TYPE]));
+ switch (priv->type) {
+ case NFT_REJECT_ICMP_UNREACH:
+ if (tb[NFTA_REJECT_ICMP_CODE] == NULL)
+ return -EINVAL;
+ priv->icmp_code = nla_get_u8(tb[NFTA_REJECT_ICMP_CODE]);
+ case NFT_REJECT_TCP_RST:
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int nft_reject_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ const struct nft_reject *priv = nft_expr_priv(expr);
+
+ if (nla_put_be32(skb, NFTA_REJECT_TYPE, priv->type))
+ goto nla_put_failure;
+
+ switch (priv->type) {
+ case NFT_REJECT_ICMP_UNREACH:
+ if (nla_put_u8(skb, NFTA_REJECT_ICMP_CODE, priv->icmp_code))
+ goto nla_put_failure;
+ break;
+ }
+
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+static struct nft_expr_type nft_reject_type;
+static const struct nft_expr_ops nft_reject_ops = {
+ .type = &nft_reject_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_reject)),
+ .eval = nft_reject_eval,
+ .init = nft_reject_init,
+ .dump = nft_reject_dump,
+};
+
+static struct nft_expr_type nft_reject_type __read_mostly = {
+ .name = "reject",
+ .ops = &nft_reject_ops,
+ .policy = nft_reject_policy,
+ .maxattr = NFTA_REJECT_MAX,
+ .owner = THIS_MODULE,
+};
+
+static int __init nft_reject_module_init(void)
+{
+ return nft_register_expr(&nft_reject_type);
+}
+
+static void __exit nft_reject_module_exit(void)
+{
+ nft_unregister_expr(&nft_reject_type);
+}
+
+module_init(nft_reject_module_init);
+module_exit(nft_reject_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_EXPR("reject");
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 6011615..d2d3253 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -295,7 +295,7 @@
seq_printf(seq,"%08x %08x %08x %08x %08x %08x %08x %08x "
" %08x %08x %08x %08x %08x %08x %08x %08x %08x \n",
dst_entries_get_slow(&ipv4_dst_ops),
- st->in_hit,
+ 0, /* st->in_hit */
st->in_slow_tot,
st->in_slow_mc,
st->in_no_route,
@@ -303,16 +303,16 @@
st->in_martian_dst,
st->in_martian_src,
- st->out_hit,
+ 0, /* st->out_hit */
st->out_slow_tot,
st->out_slow_mc,
- st->gc_total,
- st->gc_ignored,
- st->gc_goal_miss,
- st->gc_dst_overflow,
- st->in_hlist_search,
- st->out_hlist_search
+ 0, /* st->gc_total */
+ 0, /* st->gc_ignored */
+ 0, /* st->gc_goal_miss */
+ 0, /* st->gc_dst_overflow */
+ 0, /* st->in_hlist_search */
+ 0 /* st->out_hlist_search */
);
return 0;
}
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index a7f842b..7702f9e 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -25,6 +25,19 @@
To compile it as a module, choose M here. If unsure, say N.
+config NF_TABLES_IPV6
+ depends on NF_TABLES
+ tristate "IPv6 nf_tables support"
+
+config NFT_CHAIN_ROUTE_IPV6
+ depends on NF_TABLES_IPV6
+ tristate "IPv6 nf_tables route chain support"
+
+config NFT_CHAIN_NAT_IPV6
+ depends on NF_TABLES_IPV6
+ depends on NF_NAT_IPV6 && NFT_NAT
+ tristate "IPv6 nf_tables nat chain support"
+
config IP6_NF_IPTABLES
tristate "IP6 tables support (required for filtering)"
depends on INET && IPV6
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index 2b53738f..d1b4928 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -23,6 +23,11 @@
nf_defrag_ipv6-y := nf_defrag_ipv6_hooks.o nf_conntrack_reasm.o
obj-$(CONFIG_NF_DEFRAG_IPV6) += nf_defrag_ipv6.o
+# nf_tables
+obj-$(CONFIG_NF_TABLES_IPV6) += nf_tables_ipv6.o
+obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV6) += nft_chain_route_ipv6.o
+obj-$(CONFIG_NFT_CHAIN_NAT_IPV6) += nft_chain_nat_ipv6.o
+
# matches
obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o
obj-$(CONFIG_IP6_NF_MATCH_EUI64) += ip6t_eui64.o
diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c
index 2748b04..bf9f612 100644
--- a/net/ipv6/netfilter/ip6t_SYNPROXY.c
+++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c
@@ -312,7 +312,7 @@
return XT_CONTINUE;
}
-static unsigned int ipv6_synproxy_hook(unsigned int hooknum,
+static unsigned int ipv6_synproxy_hook(const struct nf_hook_ops *ops,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c
index 29b44b1..ca7f6c1 100644
--- a/net/ipv6/netfilter/ip6table_filter.c
+++ b/net/ipv6/netfilter/ip6table_filter.c
@@ -32,13 +32,14 @@
/* The work comes in here from netfilter.c. */
static unsigned int
-ip6table_filter_hook(unsigned int hook, struct sk_buff *skb,
+ip6table_filter_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
const struct net_device *in, const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
const struct net *net = dev_net((in != NULL) ? in : out);
- return ip6t_do_table(skb, hook, in, out, net->ipv6.ip6table_filter);
+ return ip6t_do_table(skb, ops->hooknum, in, out,
+ net->ipv6.ip6table_filter);
}
static struct nf_hook_ops *filter_ops __read_mostly;
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index c705907..307bbb7 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -76,17 +76,17 @@
/* The work comes in here from netfilter.c. */
static unsigned int
-ip6table_mangle_hook(unsigned int hook, struct sk_buff *skb,
+ip6table_mangle_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
const struct net_device *in, const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- if (hook == NF_INET_LOCAL_OUT)
+ if (ops->hooknum == NF_INET_LOCAL_OUT)
return ip6t_mangle_out(skb, out);
- if (hook == NF_INET_POST_ROUTING)
- return ip6t_do_table(skb, hook, in, out,
+ if (ops->hooknum == NF_INET_POST_ROUTING)
+ return ip6t_do_table(skb, ops->hooknum, in, out,
dev_net(out)->ipv6.ip6table_mangle);
/* INPUT/FORWARD */
- return ip6t_do_table(skb, hook, in, out,
+ return ip6t_do_table(skb, ops->hooknum, in, out,
dev_net(in)->ipv6.ip6table_mangle);
}
diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c
index 9b076d2..84c7f33 100644
--- a/net/ipv6/netfilter/ip6table_nat.c
+++ b/net/ipv6/netfilter/ip6table_nat.c
@@ -63,7 +63,7 @@
}
static unsigned int
-nf_nat_ipv6_fn(unsigned int hooknum,
+nf_nat_ipv6_fn(const struct nf_hook_ops *ops,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
@@ -72,7 +72,7 @@
struct nf_conn *ct;
enum ip_conntrack_info ctinfo;
struct nf_conn_nat *nat;
- enum nf_nat_manip_type maniptype = HOOK2MANIP(hooknum);
+ enum nf_nat_manip_type maniptype = HOOK2MANIP(ops->hooknum);
__be16 frag_off;
int hdrlen;
u8 nexthdr;
@@ -111,7 +111,8 @@
if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) {
if (!nf_nat_icmpv6_reply_translation(skb, ct, ctinfo,
- hooknum, hdrlen))
+ ops->hooknum,
+ hdrlen))
return NF_DROP;
else
return NF_ACCEPT;
@@ -124,14 +125,14 @@
if (!nf_nat_initialized(ct, maniptype)) {
unsigned int ret;
- ret = nf_nat_rule_find(skb, hooknum, in, out, ct);
+ ret = nf_nat_rule_find(skb, ops->hooknum, in, out, ct);
if (ret != NF_ACCEPT)
return ret;
} else {
pr_debug("Already setup manip %s for ct %p\n",
maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST",
ct);
- if (nf_nat_oif_changed(hooknum, ctinfo, nat, out))
+ if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, out))
goto oif_changed;
}
break;
@@ -140,11 +141,11 @@
/* ESTABLISHED */
NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED ||
ctinfo == IP_CT_ESTABLISHED_REPLY);
- if (nf_nat_oif_changed(hooknum, ctinfo, nat, out))
+ if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, out))
goto oif_changed;
}
- return nf_nat_packet(ct, ctinfo, hooknum, skb);
+ return nf_nat_packet(ct, ctinfo, ops->hooknum, skb);
oif_changed:
nf_ct_kill_acct(ct, ctinfo, skb);
@@ -152,7 +153,7 @@
}
static unsigned int
-nf_nat_ipv6_in(unsigned int hooknum,
+nf_nat_ipv6_in(const struct nf_hook_ops *ops,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
@@ -161,7 +162,7 @@
unsigned int ret;
struct in6_addr daddr = ipv6_hdr(skb)->daddr;
- ret = nf_nat_ipv6_fn(hooknum, skb, in, out, okfn);
+ ret = nf_nat_ipv6_fn(ops, skb, in, out, okfn);
if (ret != NF_DROP && ret != NF_STOLEN &&
ipv6_addr_cmp(&daddr, &ipv6_hdr(skb)->daddr))
skb_dst_drop(skb);
@@ -170,7 +171,7 @@
}
static unsigned int
-nf_nat_ipv6_out(unsigned int hooknum,
+nf_nat_ipv6_out(const struct nf_hook_ops *ops,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
@@ -187,7 +188,7 @@
if (skb->len < sizeof(struct ipv6hdr))
return NF_ACCEPT;
- ret = nf_nat_ipv6_fn(hooknum, skb, in, out, okfn);
+ ret = nf_nat_ipv6_fn(ops, skb, in, out, okfn);
#ifdef CONFIG_XFRM
if (ret != NF_DROP && ret != NF_STOLEN &&
!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
@@ -209,7 +210,7 @@
}
static unsigned int
-nf_nat_ipv6_local_fn(unsigned int hooknum,
+nf_nat_ipv6_local_fn(const struct nf_hook_ops *ops,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
@@ -224,7 +225,7 @@
if (skb->len < sizeof(struct ipv6hdr))
return NF_ACCEPT;
- ret = nf_nat_ipv6_fn(hooknum, skb, in, out, okfn);
+ ret = nf_nat_ipv6_fn(ops, skb, in, out, okfn);
if (ret != NF_DROP && ret != NF_STOLEN &&
(ct = nf_ct_get(skb, &ctinfo)) != NULL) {
enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c
index 9a626d8..5274740 100644
--- a/net/ipv6/netfilter/ip6table_raw.c
+++ b/net/ipv6/netfilter/ip6table_raw.c
@@ -19,13 +19,14 @@
/* The work comes in here from netfilter.c. */
static unsigned int
-ip6table_raw_hook(unsigned int hook, struct sk_buff *skb,
+ip6table_raw_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
const struct net_device *in, const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
const struct net *net = dev_net((in != NULL) ? in : out);
- return ip6t_do_table(skb, hook, in, out, net->ipv6.ip6table_raw);
+ return ip6t_do_table(skb, ops->hooknum, in, out,
+ net->ipv6.ip6table_raw);
}
static struct nf_hook_ops *rawtable_ops __read_mostly;
diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c
index ce88d1d..ab3b021 100644
--- a/net/ipv6/netfilter/ip6table_security.c
+++ b/net/ipv6/netfilter/ip6table_security.c
@@ -36,14 +36,15 @@
};
static unsigned int
-ip6table_security_hook(unsigned int hook, struct sk_buff *skb,
+ip6table_security_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
const struct net *net = dev_net((in != NULL) ? in : out);
- return ip6t_do_table(skb, hook, in, out, net->ipv6.ip6table_security);
+ return ip6t_do_table(skb, ops->hooknum, in, out,
+ net->ipv6.ip6table_security);
}
static struct nf_hook_ops *sectbl_ops __read_mostly;
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 54b75ea..486545e 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -95,7 +95,7 @@
return NF_ACCEPT;
}
-static unsigned int ipv6_helper(unsigned int hooknum,
+static unsigned int ipv6_helper(const struct nf_hook_ops *ops,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
@@ -133,7 +133,7 @@
return helper->help(skb, protoff, ct, ctinfo);
}
-static unsigned int ipv6_confirm(unsigned int hooknum,
+static unsigned int ipv6_confirm(const struct nf_hook_ops *ops,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
@@ -219,16 +219,17 @@
return nf_conntrack_in(net, PF_INET6, hooknum, skb);
}
-static unsigned int ipv6_conntrack_in(unsigned int hooknum,
+static unsigned int ipv6_conntrack_in(const struct nf_hook_ops *ops,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- return __ipv6_conntrack_in(dev_net(in), hooknum, skb, in, out, okfn);
+ return __ipv6_conntrack_in(dev_net(in), ops->hooknum, skb, in, out,
+ okfn);
}
-static unsigned int ipv6_conntrack_local(unsigned int hooknum,
+static unsigned int ipv6_conntrack_local(const struct nf_hook_ops *ops,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
@@ -239,7 +240,8 @@
net_notice_ratelimited("ipv6_conntrack_local: packet too short\n");
return NF_ACCEPT;
}
- return __ipv6_conntrack_in(dev_net(out), hooknum, skb, in, out, okfn);
+ return __ipv6_conntrack_in(dev_net(out), ops->hooknum, skb, in, out,
+ okfn);
}
static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = {
diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
index aacd121..ec483aa 100644
--- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
+++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
@@ -52,7 +52,7 @@
}
-static unsigned int ipv6_defrag(unsigned int hooknum,
+static unsigned int ipv6_defrag(const struct nf_hook_ops *ops,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
@@ -66,7 +66,7 @@
return NF_ACCEPT;
#endif
- reasm = nf_ct_frag6_gather(skb, nf_ct6_defrag_user(hooknum, skb));
+ reasm = nf_ct_frag6_gather(skb, nf_ct6_defrag_user(ops->hooknum, skb));
/* queued */
if (reasm == NULL)
return NF_STOLEN;
@@ -75,7 +75,7 @@
if (reasm == skb)
return NF_ACCEPT;
- nf_ct_frag6_output(hooknum, reasm, (struct net_device *)in,
+ nf_ct_frag6_output(ops->hooknum, reasm, (struct net_device *)in,
(struct net_device *)out, okfn);
return NF_STOLEN;
diff --git a/net/ipv6/netfilter/nf_tables_ipv6.c b/net/ipv6/netfilter/nf_tables_ipv6.c
new file mode 100644
index 0000000..d77db8a
--- /dev/null
+++ b/net/ipv6/netfilter/nf_tables_ipv6.c
@@ -0,0 +1,127 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ * Copyright (c) 2012-2013 Pablo Neira Ayuso <pablo@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/ipv6.h>
+#include <linux/netfilter_ipv6.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_ipv6.h>
+
+static unsigned int nft_ipv6_output(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ struct nft_pktinfo pkt;
+
+ if (unlikely(skb->len < sizeof(struct ipv6hdr))) {
+ if (net_ratelimit())
+ pr_info("nf_tables_ipv6: ignoring short SOCK_RAW "
+ "packet\n");
+ return NF_ACCEPT;
+ }
+ if (nft_set_pktinfo_ipv6(&pkt, ops, skb, in, out) < 0)
+ return NF_DROP;
+
+ return nft_do_chain_pktinfo(&pkt, ops);
+}
+
+static struct nft_af_info nft_af_ipv6 __read_mostly = {
+ .family = NFPROTO_IPV6,
+ .nhooks = NF_INET_NUMHOOKS,
+ .owner = THIS_MODULE,
+ .hooks = {
+ [NF_INET_LOCAL_OUT] = nft_ipv6_output,
+ },
+};
+
+static int nf_tables_ipv6_init_net(struct net *net)
+{
+ net->nft.ipv6 = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL);
+ if (net->nft.ipv6 == NULL)
+ return -ENOMEM;
+
+ memcpy(net->nft.ipv6, &nft_af_ipv6, sizeof(nft_af_ipv6));
+
+ if (nft_register_afinfo(net, net->nft.ipv6) < 0)
+ goto err;
+
+ return 0;
+err:
+ kfree(net->nft.ipv6);
+ return -ENOMEM;
+}
+
+static void nf_tables_ipv6_exit_net(struct net *net)
+{
+ nft_unregister_afinfo(net->nft.ipv6);
+ kfree(net->nft.ipv6);
+}
+
+static struct pernet_operations nf_tables_ipv6_net_ops = {
+ .init = nf_tables_ipv6_init_net,
+ .exit = nf_tables_ipv6_exit_net,
+};
+
+static unsigned int
+nft_do_chain_ipv6(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ struct nft_pktinfo pkt;
+
+ /* malformed packet, drop it */
+ if (nft_set_pktinfo_ipv6(&pkt, ops, skb, in, out) < 0)
+ return NF_DROP;
+
+ return nft_do_chain_pktinfo(&pkt, ops);
+}
+
+static struct nf_chain_type filter_ipv6 = {
+ .family = NFPROTO_IPV6,
+ .name = "filter",
+ .type = NFT_CHAIN_T_DEFAULT,
+ .hook_mask = (1 << NF_INET_LOCAL_IN) |
+ (1 << NF_INET_LOCAL_OUT) |
+ (1 << NF_INET_FORWARD) |
+ (1 << NF_INET_PRE_ROUTING) |
+ (1 << NF_INET_POST_ROUTING),
+ .fn = {
+ [NF_INET_LOCAL_IN] = nft_do_chain_ipv6,
+ [NF_INET_LOCAL_OUT] = nft_ipv6_output,
+ [NF_INET_FORWARD] = nft_do_chain_ipv6,
+ [NF_INET_PRE_ROUTING] = nft_do_chain_ipv6,
+ [NF_INET_POST_ROUTING] = nft_do_chain_ipv6,
+ },
+};
+
+static int __init nf_tables_ipv6_init(void)
+{
+ nft_register_chain_type(&filter_ipv6);
+ return register_pernet_subsys(&nf_tables_ipv6_net_ops);
+}
+
+static void __exit nf_tables_ipv6_exit(void)
+{
+ unregister_pernet_subsys(&nf_tables_ipv6_net_ops);
+ nft_unregister_chain_type(&filter_ipv6);
+}
+
+module_init(nf_tables_ipv6_init);
+module_exit(nf_tables_ipv6_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_FAMILY(AF_INET6);
diff --git a/net/ipv6/netfilter/nft_chain_nat_ipv6.c b/net/ipv6/netfilter/nft_chain_nat_ipv6.c
new file mode 100644
index 0000000..e86dcd7
--- /dev/null
+++ b/net/ipv6/netfilter/nft_chain_nat_ipv6.c
@@ -0,0 +1,211 @@
+/*
+ * Copyright (c) 2011 Patrick McHardy <kaber@trash.net>
+ * Copyright (c) 2012 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv6.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_core.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_ipv6.h>
+#include <net/netfilter/nf_nat_l3proto.h>
+#include <net/ipv6.h>
+
+/*
+ * IPv6 NAT chains
+ */
+
+static unsigned int nf_nat_ipv6_fn(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+ struct nf_conn_nat *nat;
+ enum nf_nat_manip_type maniptype = HOOK2MANIP(ops->hooknum);
+ __be16 frag_off;
+ int hdrlen;
+ u8 nexthdr;
+ struct nft_pktinfo pkt;
+ unsigned int ret;
+
+ if (ct == NULL || nf_ct_is_untracked(ct))
+ return NF_ACCEPT;
+
+ nat = nfct_nat(ct);
+ if (nat == NULL) {
+ /* Conntrack module was loaded late, can't add extension. */
+ if (nf_ct_is_confirmed(ct))
+ return NF_ACCEPT;
+ nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC);
+ if (nat == NULL)
+ return NF_ACCEPT;
+ }
+
+ switch (ctinfo) {
+ case IP_CT_RELATED:
+ case IP_CT_RELATED + IP_CT_IS_REPLY:
+ nexthdr = ipv6_hdr(skb)->nexthdr;
+ hdrlen = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
+ &nexthdr, &frag_off);
+
+ if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) {
+ if (!nf_nat_icmpv6_reply_translation(skb, ct, ctinfo,
+ ops->hooknum,
+ hdrlen))
+ return NF_DROP;
+ else
+ return NF_ACCEPT;
+ }
+ /* Fall through */
+ case IP_CT_NEW:
+ if (nf_nat_initialized(ct, maniptype))
+ break;
+
+ nft_set_pktinfo_ipv6(&pkt, ops, skb, in, out);
+
+ ret = nft_do_chain_pktinfo(&pkt, ops);
+ if (ret != NF_ACCEPT)
+ return ret;
+ if (!nf_nat_initialized(ct, maniptype)) {
+ ret = nf_nat_alloc_null_binding(ct, ops->hooknum);
+ if (ret != NF_ACCEPT)
+ return ret;
+ }
+ default:
+ break;
+ }
+
+ return nf_nat_packet(ct, ctinfo, ops->hooknum, skb);
+}
+
+static unsigned int nf_nat_ipv6_prerouting(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ struct in6_addr daddr = ipv6_hdr(skb)->daddr;
+ unsigned int ret;
+
+ ret = nf_nat_ipv6_fn(ops, skb, in, out, okfn);
+ if (ret != NF_DROP && ret != NF_STOLEN &&
+ ipv6_addr_cmp(&daddr, &ipv6_hdr(skb)->daddr))
+ skb_dst_drop(skb);
+
+ return ret;
+}
+
+static unsigned int nf_nat_ipv6_postrouting(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ enum ip_conntrack_info ctinfo __maybe_unused;
+ const struct nf_conn *ct __maybe_unused;
+ unsigned int ret;
+
+ ret = nf_nat_ipv6_fn(ops, skb, in, out, okfn);
+#ifdef CONFIG_XFRM
+ if (ret != NF_DROP && ret != NF_STOLEN &&
+ !(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
+ (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
+ enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+
+ if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.src.u3,
+ &ct->tuplehash[!dir].tuple.dst.u3) ||
+ (ct->tuplehash[dir].tuple.src.u.all !=
+ ct->tuplehash[!dir].tuple.dst.u.all))
+ if (nf_xfrm_me_harder(skb, AF_INET6) < 0)
+ ret = NF_DROP;
+ }
+#endif
+ return ret;
+}
+
+static unsigned int nf_nat_ipv6_output(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ enum ip_conntrack_info ctinfo;
+ const struct nf_conn *ct;
+ unsigned int ret;
+
+ ret = nf_nat_ipv6_fn(ops, skb, in, out, okfn);
+ if (ret != NF_DROP && ret != NF_STOLEN &&
+ (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
+ enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+
+ if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3,
+ &ct->tuplehash[!dir].tuple.src.u3)) {
+ if (ip6_route_me_harder(skb))
+ ret = NF_DROP;
+ }
+#ifdef CONFIG_XFRM
+ else if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
+ ct->tuplehash[dir].tuple.dst.u.all !=
+ ct->tuplehash[!dir].tuple.src.u.all)
+ if (nf_xfrm_me_harder(skb, AF_INET6))
+ ret = NF_DROP;
+#endif
+ }
+ return ret;
+}
+
+static struct nf_chain_type nft_chain_nat_ipv6 = {
+ .family = NFPROTO_IPV6,
+ .name = "nat",
+ .type = NFT_CHAIN_T_NAT,
+ .hook_mask = (1 << NF_INET_PRE_ROUTING) |
+ (1 << NF_INET_POST_ROUTING) |
+ (1 << NF_INET_LOCAL_OUT) |
+ (1 << NF_INET_LOCAL_IN),
+ .fn = {
+ [NF_INET_PRE_ROUTING] = nf_nat_ipv6_prerouting,
+ [NF_INET_POST_ROUTING] = nf_nat_ipv6_postrouting,
+ [NF_INET_LOCAL_OUT] = nf_nat_ipv6_output,
+ [NF_INET_LOCAL_IN] = nf_nat_ipv6_fn,
+ },
+ .me = THIS_MODULE,
+};
+
+static int __init nft_chain_nat_ipv6_init(void)
+{
+ int err;
+
+ err = nft_register_chain_type(&nft_chain_nat_ipv6);
+ if (err < 0)
+ return err;
+
+ return 0;
+}
+
+static void __exit nft_chain_nat_ipv6_exit(void)
+{
+ nft_unregister_chain_type(&nft_chain_nat_ipv6);
+}
+
+module_init(nft_chain_nat_ipv6_init);
+module_exit(nft_chain_nat_ipv6_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Tomasz Bursztyka <tomasz.bursztyka@linux.intel.com>");
+MODULE_ALIAS_NFT_CHAIN(AF_INET6, "nat");
diff --git a/net/ipv6/netfilter/nft_chain_route_ipv6.c b/net/ipv6/netfilter/nft_chain_route_ipv6.c
new file mode 100644
index 0000000..3fe40f0
--- /dev/null
+++ b/net/ipv6/netfilter/nft_chain_route_ipv6.c
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ * Copyright (c) 2012 Pablo Neira Ayuso <pablo@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/skbuff.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv6.h>
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_ipv6.h>
+#include <net/route.h>
+
+static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ unsigned int ret;
+ struct nft_pktinfo pkt;
+ struct in6_addr saddr, daddr;
+ u_int8_t hop_limit;
+ u32 mark, flowlabel;
+
+ /* malformed packet, drop it */
+ if (nft_set_pktinfo_ipv6(&pkt, ops, skb, in, out) < 0)
+ return NF_DROP;
+
+ /* save source/dest address, mark, hoplimit, flowlabel, priority */
+ memcpy(&saddr, &ipv6_hdr(skb)->saddr, sizeof(saddr));
+ memcpy(&daddr, &ipv6_hdr(skb)->daddr, sizeof(daddr));
+ mark = skb->mark;
+ hop_limit = ipv6_hdr(skb)->hop_limit;
+
+ /* flowlabel and prio (includes version, which shouldn't change either */
+ flowlabel = *((u32 *)ipv6_hdr(skb));
+
+ ret = nft_do_chain_pktinfo(&pkt, ops);
+ if (ret != NF_DROP && ret != NF_QUEUE &&
+ (memcmp(&ipv6_hdr(skb)->saddr, &saddr, sizeof(saddr)) ||
+ memcmp(&ipv6_hdr(skb)->daddr, &daddr, sizeof(daddr)) ||
+ skb->mark != mark ||
+ ipv6_hdr(skb)->hop_limit != hop_limit ||
+ flowlabel != *((u_int32_t *)ipv6_hdr(skb))))
+ return ip6_route_me_harder(skb) == 0 ? ret : NF_DROP;
+
+ return ret;
+}
+
+static struct nf_chain_type nft_chain_route_ipv6 = {
+ .family = NFPROTO_IPV6,
+ .name = "route",
+ .type = NFT_CHAIN_T_ROUTE,
+ .hook_mask = (1 << NF_INET_LOCAL_OUT),
+ .fn = {
+ [NF_INET_LOCAL_OUT] = nf_route_table_hook,
+ },
+ .me = THIS_MODULE,
+};
+
+static int __init nft_chain_route_init(void)
+{
+ return nft_register_chain_type(&nft_chain_route_ipv6);
+}
+
+static void __exit nft_chain_route_exit(void)
+{
+ nft_unregister_chain_type(&nft_chain_route_ipv6);
+}
+
+module_init(nft_chain_route_init);
+module_exit(nft_chain_route_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_CHAIN(AF_INET6, "route");
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 6e839b6..48acec1 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -413,6 +413,58 @@
endif # NF_CONNTRACK
+config NF_TABLES
+ depends on NETFILTER_NETLINK
+ tristate "Netfilter nf_tables support"
+
+config NFT_EXTHDR
+ depends on NF_TABLES
+ tristate "Netfilter nf_tables IPv6 exthdr module"
+
+config NFT_META
+ depends on NF_TABLES
+ tristate "Netfilter nf_tables meta module"
+
+config NFT_CT
+ depends on NF_TABLES
+ depends on NF_CONNTRACK
+ tristate "Netfilter nf_tables conntrack module"
+
+config NFT_RBTREE
+ depends on NF_TABLES
+ tristate "Netfilter nf_tables rbtree set module"
+
+config NFT_HASH
+ depends on NF_TABLES
+ tristate "Netfilter nf_tables hash set module"
+
+config NFT_COUNTER
+ depends on NF_TABLES
+ tristate "Netfilter nf_tables counter module"
+
+config NFT_LOG
+ depends on NF_TABLES
+ tristate "Netfilter nf_tables log module"
+
+config NFT_LIMIT
+ depends on NF_TABLES
+ tristate "Netfilter nf_tables limit module"
+
+config NFT_NAT
+ depends on NF_TABLES
+ depends on NF_CONNTRACK
+ depends on NF_NAT
+ tristate "Netfilter nf_tables nat module"
+
+config NFT_COMPAT
+ depends on NF_TABLES
+ depends on NETFILTER_XTABLES
+ tristate "Netfilter x_tables over nf_tables module"
+ help
+ This is required if you intend to use any of existing
+ x_tables match/target extensions over the nf_tables
+ framework.
+
config NETFILTER_XTABLES
tristate "Netfilter Xtables support (required for ip_tables)"
default m if NETFILTER_ADVANCED=n
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index c3a0a12..394483b 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -64,6 +64,24 @@
# SYNPROXY
obj-$(CONFIG_NETFILTER_SYNPROXY) += nf_synproxy_core.o
+# nf_tables
+nf_tables-objs += nf_tables_core.o nf_tables_api.o
+nf_tables-objs += nft_immediate.o nft_cmp.o nft_lookup.o
+nf_tables-objs += nft_bitwise.o nft_byteorder.o nft_payload.o
+
+obj-$(CONFIG_NF_TABLES) += nf_tables.o
+obj-$(CONFIG_NFT_COMPAT) += nft_compat.o
+obj-$(CONFIG_NFT_EXTHDR) += nft_exthdr.o
+obj-$(CONFIG_NFT_META) += nft_meta.o
+obj-$(CONFIG_NFT_CT) += nft_ct.o
+obj-$(CONFIG_NFT_LIMIT) += nft_limit.o
+obj-$(CONFIG_NFT_NAT) += nft_nat.o
+#nf_tables-objs += nft_meta_target.o
+obj-$(CONFIG_NFT_RBTREE) += nft_rbtree.o
+obj-$(CONFIG_NFT_HASH) += nft_hash.o
+obj-$(CONFIG_NFT_COUNTER) += nft_counter.o
+obj-$(CONFIG_NFT_LOG) += nft_log.o
+
# generic X tables
obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 593b16e..1fbab0c 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -146,7 +146,7 @@
/* Optimization: we don't need to hold module
reference here, since function can't sleep. --RR */
repeat:
- verdict = (*elemp)->hook(hook, skb, indev, outdev, okfn);
+ verdict = (*elemp)->hook(*elemp, skb, indev, outdev, okfn);
if (verdict != NF_ACCEPT) {
#ifdef CONFIG_NETFILTER_DEBUG
if (unlikely((verdict & NF_VERDICT_MASK)
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 74fd00c..34fda62 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1239,11 +1239,11 @@
* Check if packet is reply for established ip_vs_conn.
*/
static unsigned int
-ip_vs_reply4(unsigned int hooknum, struct sk_buff *skb,
+ip_vs_reply4(const struct nf_hook_ops *ops, struct sk_buff *skb,
const struct net_device *in, const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- return ip_vs_out(hooknum, skb, AF_INET);
+ return ip_vs_out(ops->hooknum, skb, AF_INET);
}
/*
@@ -1251,11 +1251,11 @@
* Check if packet is reply for established ip_vs_conn.
*/
static unsigned int
-ip_vs_local_reply4(unsigned int hooknum, struct sk_buff *skb,
+ip_vs_local_reply4(const struct nf_hook_ops *ops, struct sk_buff *skb,
const struct net_device *in, const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- return ip_vs_out(hooknum, skb, AF_INET);
+ return ip_vs_out(ops->hooknum, skb, AF_INET);
}
#ifdef CONFIG_IP_VS_IPV6
@@ -1266,11 +1266,11 @@
* Check if packet is reply for established ip_vs_conn.
*/
static unsigned int
-ip_vs_reply6(unsigned int hooknum, struct sk_buff *skb,
+ip_vs_reply6(const struct nf_hook_ops *ops, struct sk_buff *skb,
const struct net_device *in, const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- return ip_vs_out(hooknum, skb, AF_INET6);
+ return ip_vs_out(ops->hooknum, skb, AF_INET6);
}
/*
@@ -1278,11 +1278,11 @@
* Check if packet is reply for established ip_vs_conn.
*/
static unsigned int
-ip_vs_local_reply6(unsigned int hooknum, struct sk_buff *skb,
+ip_vs_local_reply6(const struct nf_hook_ops *ops, struct sk_buff *skb,
const struct net_device *in, const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- return ip_vs_out(hooknum, skb, AF_INET6);
+ return ip_vs_out(ops->hooknum, skb, AF_INET6);
}
#endif
@@ -1733,12 +1733,12 @@
* Schedule and forward packets from remote clients
*/
static unsigned int
-ip_vs_remote_request4(unsigned int hooknum, struct sk_buff *skb,
+ip_vs_remote_request4(const struct nf_hook_ops *ops, struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- return ip_vs_in(hooknum, skb, AF_INET);
+ return ip_vs_in(ops->hooknum, skb, AF_INET);
}
/*
@@ -1746,11 +1746,11 @@
* Schedule and forward packets from local clients
*/
static unsigned int
-ip_vs_local_request4(unsigned int hooknum, struct sk_buff *skb,
+ip_vs_local_request4(const struct nf_hook_ops *ops, struct sk_buff *skb,
const struct net_device *in, const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- return ip_vs_in(hooknum, skb, AF_INET);
+ return ip_vs_in(ops->hooknum, skb, AF_INET);
}
#ifdef CONFIG_IP_VS_IPV6
@@ -1760,7 +1760,7 @@
* Copy info from first fragment, to the rest of them.
*/
static unsigned int
-ip_vs_preroute_frag6(unsigned int hooknum, struct sk_buff *skb,
+ip_vs_preroute_frag6(const struct nf_hook_ops *ops, struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
@@ -1792,12 +1792,12 @@
* Schedule and forward packets from remote clients
*/
static unsigned int
-ip_vs_remote_request6(unsigned int hooknum, struct sk_buff *skb,
+ip_vs_remote_request6(const struct nf_hook_ops *ops, struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- return ip_vs_in(hooknum, skb, AF_INET6);
+ return ip_vs_in(ops->hooknum, skb, AF_INET6);
}
/*
@@ -1805,11 +1805,11 @@
* Schedule and forward packets from local clients
*/
static unsigned int
-ip_vs_local_request6(unsigned int hooknum, struct sk_buff *skb,
+ip_vs_local_request6(const struct nf_hook_ops *ops, struct sk_buff *skb,
const struct net_device *in, const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- return ip_vs_in(hooknum, skb, AF_INET6);
+ return ip_vs_in(ops->hooknum, skb, AF_INET6);
}
#endif
@@ -1825,7 +1825,7 @@
* and send them to ip_vs_in_icmp.
*/
static unsigned int
-ip_vs_forward_icmp(unsigned int hooknum, struct sk_buff *skb,
+ip_vs_forward_icmp(const struct nf_hook_ops *ops, struct sk_buff *skb,
const struct net_device *in, const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
@@ -1842,12 +1842,12 @@
if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable))
return NF_ACCEPT;
- return ip_vs_in_icmp(skb, &r, hooknum);
+ return ip_vs_in_icmp(skb, &r, ops->hooknum);
}
#ifdef CONFIG_IP_VS_IPV6
static unsigned int
-ip_vs_forward_icmp_v6(unsigned int hooknum, struct sk_buff *skb,
+ip_vs_forward_icmp_v6(const struct nf_hook_ops *ops, struct sk_buff *skb,
const struct net_device *in, const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
@@ -1866,7 +1866,7 @@
if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable))
return NF_ACCEPT;
- return ip_vs_in_icmp_v6(skb, &r, hooknum, &iphdr);
+ return ip_vs_in_icmp_v6(skb, &r, ops->hooknum, &iphdr);
}
#endif
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 6f0f4f7..63a8154 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -432,6 +432,26 @@
}
EXPORT_SYMBOL(nf_nat_setup_info);
+unsigned int
+nf_nat_alloc_null_binding(struct nf_conn *ct, unsigned int hooknum)
+{
+ /* Force range to this IP; let proto decide mapping for
+ * per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED).
+ * Use reply in case it's already been mangled (eg local packet).
+ */
+ union nf_inet_addr ip =
+ (HOOK2MANIP(hooknum) == NF_NAT_MANIP_SRC ?
+ ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3 :
+ ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3);
+ struct nf_nat_range range = {
+ .flags = NF_NAT_RANGE_MAP_IPS,
+ .min_addr = ip,
+ .max_addr = ip,
+ };
+ return nf_nat_setup_info(ct, &range, HOOK2MANIP(hooknum));
+}
+EXPORT_SYMBOL_GPL(nf_nat_alloc_null_binding);
+
/* Do packet manipulations according to nf_nat_setup_info. */
unsigned int nf_nat_packet(struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
new file mode 100644
index 0000000..dcddc49
--- /dev/null
+++ b/net/netfilter/nf_tables_api.c
@@ -0,0 +1,3275 @@
+/*
+ * Copyright (c) 2007-2009 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/skbuff.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/net_namespace.h>
+#include <net/sock.h>
+
+static LIST_HEAD(nf_tables_expressions);
+
+/**
+ * nft_register_afinfo - register nf_tables address family info
+ *
+ * @afi: address family info to register
+ *
+ * Register the address family for use with nf_tables. Returns zero on
+ * success or a negative errno code otherwise.
+ */
+int nft_register_afinfo(struct net *net, struct nft_af_info *afi)
+{
+ INIT_LIST_HEAD(&afi->tables);
+ nfnl_lock(NFNL_SUBSYS_NFTABLES);
+ list_add_tail(&afi->list, &net->nft.af_info);
+ nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nft_register_afinfo);
+
+/**
+ * nft_unregister_afinfo - unregister nf_tables address family info
+ *
+ * @afi: address family info to unregister
+ *
+ * Unregister the address family for use with nf_tables.
+ */
+void nft_unregister_afinfo(struct nft_af_info *afi)
+{
+ nfnl_lock(NFNL_SUBSYS_NFTABLES);
+ list_del(&afi->list);
+ nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+}
+EXPORT_SYMBOL_GPL(nft_unregister_afinfo);
+
+static struct nft_af_info *nft_afinfo_lookup(struct net *net, int family)
+{
+ struct nft_af_info *afi;
+
+ list_for_each_entry(afi, &net->nft.af_info, list) {
+ if (afi->family == family)
+ return afi;
+ }
+ return NULL;
+}
+
+static struct nft_af_info *
+nf_tables_afinfo_lookup(struct net *net, int family, bool autoload)
+{
+ struct nft_af_info *afi;
+
+ afi = nft_afinfo_lookup(net, family);
+ if (afi != NULL)
+ return afi;
+#ifdef CONFIG_MODULES
+ if (autoload) {
+ nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+ request_module("nft-afinfo-%u", family);
+ nfnl_lock(NFNL_SUBSYS_NFTABLES);
+ afi = nft_afinfo_lookup(net, family);
+ if (afi != NULL)
+ return ERR_PTR(-EAGAIN);
+ }
+#endif
+ return ERR_PTR(-EAFNOSUPPORT);
+}
+
+/*
+ * Tables
+ */
+
+static struct nft_table *nft_table_lookup(const struct nft_af_info *afi,
+ const struct nlattr *nla)
+{
+ struct nft_table *table;
+
+ list_for_each_entry(table, &afi->tables, list) {
+ if (!nla_strcmp(nla, table->name))
+ return table;
+ }
+ return NULL;
+}
+
+static struct nft_table *nf_tables_table_lookup(const struct nft_af_info *afi,
+ const struct nlattr *nla)
+{
+ struct nft_table *table;
+
+ if (nla == NULL)
+ return ERR_PTR(-EINVAL);
+
+ table = nft_table_lookup(afi, nla);
+ if (table != NULL)
+ return table;
+
+ return ERR_PTR(-ENOENT);
+}
+
+static inline u64 nf_tables_alloc_handle(struct nft_table *table)
+{
+ return ++table->hgenerator;
+}
+
+static struct nf_chain_type *chain_type[AF_MAX][NFT_CHAIN_T_MAX];
+
+static int __nf_tables_chain_type_lookup(int family, const struct nlattr *nla)
+{
+ int i;
+
+ for (i=0; i<NFT_CHAIN_T_MAX; i++) {
+ if (chain_type[family][i] != NULL &&
+ !nla_strcmp(nla, chain_type[family][i]->name))
+ return i;
+ }
+ return -1;
+}
+
+static int nf_tables_chain_type_lookup(const struct nft_af_info *afi,
+ const struct nlattr *nla,
+ bool autoload)
+{
+ int type;
+
+ type = __nf_tables_chain_type_lookup(afi->family, nla);
+#ifdef CONFIG_MODULES
+ if (type < 0 && autoload) {
+ nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+ request_module("nft-chain-%u-%*.s", afi->family,
+ nla_len(nla)-1, (const char *)nla_data(nla));
+ nfnl_lock(NFNL_SUBSYS_NFTABLES);
+ type = __nf_tables_chain_type_lookup(afi->family, nla);
+ }
+#endif
+ return type;
+}
+
+static const struct nla_policy nft_table_policy[NFTA_TABLE_MAX + 1] = {
+ [NFTA_TABLE_NAME] = { .type = NLA_STRING },
+ [NFTA_TABLE_FLAGS] = { .type = NLA_U32 },
+};
+
+static int nf_tables_fill_table_info(struct sk_buff *skb, u32 portid, u32 seq,
+ int event, u32 flags, int family,
+ const struct nft_table *table)
+{
+ struct nlmsghdr *nlh;
+ struct nfgenmsg *nfmsg;
+
+ event |= NFNL_SUBSYS_NFTABLES << 8;
+ nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), flags);
+ if (nlh == NULL)
+ goto nla_put_failure;
+
+ nfmsg = nlmsg_data(nlh);
+ nfmsg->nfgen_family = family;
+ nfmsg->version = NFNETLINK_V0;
+ nfmsg->res_id = 0;
+
+ if (nla_put_string(skb, NFTA_TABLE_NAME, table->name) ||
+ nla_put_be32(skb, NFTA_TABLE_FLAGS, htonl(table->flags)))
+ goto nla_put_failure;
+
+ return nlmsg_end(skb, nlh);
+
+nla_put_failure:
+ nlmsg_trim(skb, nlh);
+ return -1;
+}
+
+static int nf_tables_table_notify(const struct sk_buff *oskb,
+ const struct nlmsghdr *nlh,
+ const struct nft_table *table,
+ int event, int family)
+{
+ struct sk_buff *skb;
+ u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
+ u32 seq = nlh ? nlh->nlmsg_seq : 0;
+ struct net *net = oskb ? sock_net(oskb->sk) : &init_net;
+ bool report;
+ int err;
+
+ report = nlh ? nlmsg_report(nlh) : false;
+ if (!report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES))
+ return 0;
+
+ err = -ENOBUFS;
+ skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (skb == NULL)
+ goto err;
+
+ err = nf_tables_fill_table_info(skb, portid, seq, event, 0,
+ family, table);
+ if (err < 0) {
+ kfree_skb(skb);
+ goto err;
+ }
+
+ err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report,
+ GFP_KERNEL);
+err:
+ if (err < 0)
+ nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err);
+ return err;
+}
+
+static int nf_tables_dump_tables(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
+ const struct nft_af_info *afi;
+ const struct nft_table *table;
+ unsigned int idx = 0, s_idx = cb->args[0];
+ struct net *net = sock_net(skb->sk);
+ int family = nfmsg->nfgen_family;
+
+ list_for_each_entry(afi, &net->nft.af_info, list) {
+ if (family != NFPROTO_UNSPEC && family != afi->family)
+ continue;
+
+ list_for_each_entry(table, &afi->tables, list) {
+ if (idx < s_idx)
+ goto cont;
+ if (idx > s_idx)
+ memset(&cb->args[1], 0,
+ sizeof(cb->args) - sizeof(cb->args[0]));
+ if (nf_tables_fill_table_info(skb,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ NFT_MSG_NEWTABLE,
+ NLM_F_MULTI,
+ afi->family, table) < 0)
+ goto done;
+cont:
+ idx++;
+ }
+ }
+done:
+ cb->args[0] = idx;
+ return skb->len;
+}
+
+static int nf_tables_gettable(struct sock *nlsk, struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const nla[])
+{
+ const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+ const struct nft_af_info *afi;
+ const struct nft_table *table;
+ struct sk_buff *skb2;
+ struct net *net = sock_net(skb->sk);
+ int family = nfmsg->nfgen_family;
+ int err;
+
+ if (nlh->nlmsg_flags & NLM_F_DUMP) {
+ struct netlink_dump_control c = {
+ .dump = nf_tables_dump_tables,
+ };
+ return netlink_dump_start(nlsk, skb, nlh, &c);
+ }
+
+ afi = nf_tables_afinfo_lookup(net, family, false);
+ if (IS_ERR(afi))
+ return PTR_ERR(afi);
+
+ table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME]);
+ if (IS_ERR(table))
+ return PTR_ERR(table);
+
+ skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (!skb2)
+ return -ENOMEM;
+
+ err = nf_tables_fill_table_info(skb2, NETLINK_CB(skb).portid,
+ nlh->nlmsg_seq, NFT_MSG_NEWTABLE, 0,
+ family, table);
+ if (err < 0)
+ goto err;
+
+ return nlmsg_unicast(nlsk, skb2, NETLINK_CB(skb).portid);
+
+err:
+ kfree_skb(skb2);
+ return err;
+}
+
+static int nf_tables_table_enable(struct nft_table *table)
+{
+ struct nft_chain *chain;
+ int err, i = 0;
+
+ list_for_each_entry(chain, &table->chains, list) {
+ err = nf_register_hook(&nft_base_chain(chain)->ops);
+ if (err < 0)
+ goto err;
+
+ i++;
+ }
+ return 0;
+err:
+ list_for_each_entry(chain, &table->chains, list) {
+ if (i-- <= 0)
+ break;
+
+ nf_unregister_hook(&nft_base_chain(chain)->ops);
+ }
+ return err;
+}
+
+static int nf_tables_table_disable(struct nft_table *table)
+{
+ struct nft_chain *chain;
+
+ list_for_each_entry(chain, &table->chains, list)
+ nf_unregister_hook(&nft_base_chain(chain)->ops);
+
+ return 0;
+}
+
+static int nf_tables_updtable(struct sock *nlsk, struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const nla[],
+ struct nft_af_info *afi, struct nft_table *table)
+{
+ const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+ int family = nfmsg->nfgen_family, ret = 0;
+
+ if (nla[NFTA_TABLE_FLAGS]) {
+ __be32 flags;
+
+ flags = ntohl(nla_get_be32(nla[NFTA_TABLE_FLAGS]));
+ if (flags & ~NFT_TABLE_F_DORMANT)
+ return -EINVAL;
+
+ if ((flags & NFT_TABLE_F_DORMANT) &&
+ !(table->flags & NFT_TABLE_F_DORMANT)) {
+ ret = nf_tables_table_disable(table);
+ if (ret >= 0)
+ table->flags |= NFT_TABLE_F_DORMANT;
+ } else if (!(flags & NFT_TABLE_F_DORMANT) &&
+ table->flags & NFT_TABLE_F_DORMANT) {
+ ret = nf_tables_table_enable(table);
+ if (ret >= 0)
+ table->flags &= ~NFT_TABLE_F_DORMANT;
+ }
+ if (ret < 0)
+ goto err;
+ }
+
+ nf_tables_table_notify(skb, nlh, table, NFT_MSG_NEWTABLE, family);
+err:
+ return ret;
+}
+
+static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const nla[])
+{
+ const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+ const struct nlattr *name;
+ struct nft_af_info *afi;
+ struct nft_table *table;
+ struct net *net = sock_net(skb->sk);
+ int family = nfmsg->nfgen_family;
+
+ afi = nf_tables_afinfo_lookup(net, family, true);
+ if (IS_ERR(afi))
+ return PTR_ERR(afi);
+
+ name = nla[NFTA_TABLE_NAME];
+ table = nf_tables_table_lookup(afi, name);
+ if (IS_ERR(table)) {
+ if (PTR_ERR(table) != -ENOENT)
+ return PTR_ERR(table);
+ table = NULL;
+ }
+
+ if (table != NULL) {
+ if (nlh->nlmsg_flags & NLM_F_EXCL)
+ return -EEXIST;
+ if (nlh->nlmsg_flags & NLM_F_REPLACE)
+ return -EOPNOTSUPP;
+ return nf_tables_updtable(nlsk, skb, nlh, nla, afi, table);
+ }
+
+ table = kzalloc(sizeof(*table) + nla_len(name), GFP_KERNEL);
+ if (table == NULL)
+ return -ENOMEM;
+
+ nla_strlcpy(table->name, name, nla_len(name));
+ INIT_LIST_HEAD(&table->chains);
+ INIT_LIST_HEAD(&table->sets);
+
+ if (nla[NFTA_TABLE_FLAGS]) {
+ __be32 flags;
+
+ flags = ntohl(nla_get_be32(nla[NFTA_TABLE_FLAGS]));
+ if (flags & ~NFT_TABLE_F_DORMANT) {
+ kfree(table);
+ return -EINVAL;
+ }
+
+ table->flags |= flags;
+ }
+
+ list_add_tail(&table->list, &afi->tables);
+ nf_tables_table_notify(skb, nlh, table, NFT_MSG_NEWTABLE, family);
+ return 0;
+}
+
+static int nf_tables_deltable(struct sock *nlsk, struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const nla[])
+{
+ const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+ struct nft_af_info *afi;
+ struct nft_table *table;
+ struct net *net = sock_net(skb->sk);
+ int family = nfmsg->nfgen_family;
+
+ afi = nf_tables_afinfo_lookup(net, family, false);
+ if (IS_ERR(afi))
+ return PTR_ERR(afi);
+
+ table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME]);
+ if (IS_ERR(table))
+ return PTR_ERR(table);
+
+ if (table->use)
+ return -EBUSY;
+
+ list_del(&table->list);
+ nf_tables_table_notify(skb, nlh, table, NFT_MSG_DELTABLE, family);
+ kfree(table);
+ return 0;
+}
+
+int nft_register_chain_type(struct nf_chain_type *ctype)
+{
+ int err = 0;
+
+ nfnl_lock(NFNL_SUBSYS_NFTABLES);
+ if (chain_type[ctype->family][ctype->type] != NULL) {
+ err = -EBUSY;
+ goto out;
+ }
+
+ if (!try_module_get(ctype->me))
+ goto out;
+
+ chain_type[ctype->family][ctype->type] = ctype;
+out:
+ nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+ return err;
+}
+EXPORT_SYMBOL_GPL(nft_register_chain_type);
+
+void nft_unregister_chain_type(struct nf_chain_type *ctype)
+{
+ nfnl_lock(NFNL_SUBSYS_NFTABLES);
+ chain_type[ctype->family][ctype->type] = NULL;
+ module_put(ctype->me);
+ nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+}
+EXPORT_SYMBOL_GPL(nft_unregister_chain_type);
+
+/*
+ * Chains
+ */
+
+static struct nft_chain *
+nf_tables_chain_lookup_byhandle(const struct nft_table *table, u64 handle)
+{
+ struct nft_chain *chain;
+
+ list_for_each_entry(chain, &table->chains, list) {
+ if (chain->handle == handle)
+ return chain;
+ }
+
+ return ERR_PTR(-ENOENT);
+}
+
+static struct nft_chain *nf_tables_chain_lookup(const struct nft_table *table,
+ const struct nlattr *nla)
+{
+ struct nft_chain *chain;
+
+ if (nla == NULL)
+ return ERR_PTR(-EINVAL);
+
+ list_for_each_entry(chain, &table->chains, list) {
+ if (!nla_strcmp(nla, chain->name))
+ return chain;
+ }
+
+ return ERR_PTR(-ENOENT);
+}
+
+static const struct nla_policy nft_chain_policy[NFTA_CHAIN_MAX + 1] = {
+ [NFTA_CHAIN_TABLE] = { .type = NLA_STRING },
+ [NFTA_CHAIN_HANDLE] = { .type = NLA_U64 },
+ [NFTA_CHAIN_NAME] = { .type = NLA_STRING,
+ .len = NFT_CHAIN_MAXNAMELEN - 1 },
+ [NFTA_CHAIN_HOOK] = { .type = NLA_NESTED },
+ [NFTA_CHAIN_POLICY] = { .type = NLA_U32 },
+ [NFTA_CHAIN_TYPE] = { .type = NLA_NUL_STRING },
+ [NFTA_CHAIN_COUNTERS] = { .type = NLA_NESTED },
+};
+
+static const struct nla_policy nft_hook_policy[NFTA_HOOK_MAX + 1] = {
+ [NFTA_HOOK_HOOKNUM] = { .type = NLA_U32 },
+ [NFTA_HOOK_PRIORITY] = { .type = NLA_U32 },
+};
+
+static int nft_dump_stats(struct sk_buff *skb, struct nft_stats __percpu *stats)
+{
+ struct nft_stats *cpu_stats, total;
+ struct nlattr *nest;
+ int cpu;
+
+ memset(&total, 0, sizeof(total));
+ for_each_possible_cpu(cpu) {
+ cpu_stats = per_cpu_ptr(stats, cpu);
+ total.pkts += cpu_stats->pkts;
+ total.bytes += cpu_stats->bytes;
+ }
+ nest = nla_nest_start(skb, NFTA_CHAIN_COUNTERS);
+ if (nest == NULL)
+ goto nla_put_failure;
+
+ if (nla_put_be64(skb, NFTA_COUNTER_PACKETS, cpu_to_be64(total.pkts)) ||
+ nla_put_be64(skb, NFTA_COUNTER_BYTES, cpu_to_be64(total.bytes)))
+ goto nla_put_failure;
+
+ nla_nest_end(skb, nest);
+ return 0;
+
+nla_put_failure:
+ return -ENOSPC;
+}
+
+static int nf_tables_fill_chain_info(struct sk_buff *skb, u32 portid, u32 seq,
+ int event, u32 flags, int family,
+ const struct nft_table *table,
+ const struct nft_chain *chain)
+{
+ struct nlmsghdr *nlh;
+ struct nfgenmsg *nfmsg;
+
+ event |= NFNL_SUBSYS_NFTABLES << 8;
+ nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), flags);
+ if (nlh == NULL)
+ goto nla_put_failure;
+
+ nfmsg = nlmsg_data(nlh);
+ nfmsg->nfgen_family = family;
+ nfmsg->version = NFNETLINK_V0;
+ nfmsg->res_id = 0;
+
+ if (nla_put_string(skb, NFTA_CHAIN_TABLE, table->name))
+ goto nla_put_failure;
+ if (nla_put_be64(skb, NFTA_CHAIN_HANDLE, cpu_to_be64(chain->handle)))
+ goto nla_put_failure;
+ if (nla_put_string(skb, NFTA_CHAIN_NAME, chain->name))
+ goto nla_put_failure;
+
+ if (chain->flags & NFT_BASE_CHAIN) {
+ const struct nft_base_chain *basechain = nft_base_chain(chain);
+ const struct nf_hook_ops *ops = &basechain->ops;
+ struct nlattr *nest;
+
+ nest = nla_nest_start(skb, NFTA_CHAIN_HOOK);
+ if (nest == NULL)
+ goto nla_put_failure;
+ if (nla_put_be32(skb, NFTA_HOOK_HOOKNUM, htonl(ops->hooknum)))
+ goto nla_put_failure;
+ if (nla_put_be32(skb, NFTA_HOOK_PRIORITY, htonl(ops->priority)))
+ goto nla_put_failure;
+ nla_nest_end(skb, nest);
+
+ if (nla_put_be32(skb, NFTA_CHAIN_POLICY,
+ htonl(basechain->policy)))
+ goto nla_put_failure;
+
+ if (nla_put_string(skb, NFTA_CHAIN_TYPE,
+ chain_type[ops->pf][nft_base_chain(chain)->type]->name))
+ goto nla_put_failure;
+
+ if (nft_dump_stats(skb, nft_base_chain(chain)->stats))
+ goto nla_put_failure;
+ }
+
+ if (nla_put_be32(skb, NFTA_CHAIN_USE, htonl(chain->use)))
+ goto nla_put_failure;
+
+ return nlmsg_end(skb, nlh);
+
+nla_put_failure:
+ nlmsg_trim(skb, nlh);
+ return -1;
+}
+
+static int nf_tables_chain_notify(const struct sk_buff *oskb,
+ const struct nlmsghdr *nlh,
+ const struct nft_table *table,
+ const struct nft_chain *chain,
+ int event, int family)
+{
+ struct sk_buff *skb;
+ u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
+ struct net *net = oskb ? sock_net(oskb->sk) : &init_net;
+ u32 seq = nlh ? nlh->nlmsg_seq : 0;
+ bool report;
+ int err;
+
+ report = nlh ? nlmsg_report(nlh) : false;
+ if (!report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES))
+ return 0;
+
+ err = -ENOBUFS;
+ skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (skb == NULL)
+ goto err;
+
+ err = nf_tables_fill_chain_info(skb, portid, seq, event, 0, family,
+ table, chain);
+ if (err < 0) {
+ kfree_skb(skb);
+ goto err;
+ }
+
+ err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report,
+ GFP_KERNEL);
+err:
+ if (err < 0)
+ nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err);
+ return err;
+}
+
+static int nf_tables_dump_chains(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
+ const struct nft_af_info *afi;
+ const struct nft_table *table;
+ const struct nft_chain *chain;
+ unsigned int idx = 0, s_idx = cb->args[0];
+ struct net *net = sock_net(skb->sk);
+ int family = nfmsg->nfgen_family;
+
+ list_for_each_entry(afi, &net->nft.af_info, list) {
+ if (family != NFPROTO_UNSPEC && family != afi->family)
+ continue;
+
+ list_for_each_entry(table, &afi->tables, list) {
+ list_for_each_entry(chain, &table->chains, list) {
+ if (idx < s_idx)
+ goto cont;
+ if (idx > s_idx)
+ memset(&cb->args[1], 0,
+ sizeof(cb->args) - sizeof(cb->args[0]));
+ if (nf_tables_fill_chain_info(skb, NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ NFT_MSG_NEWCHAIN,
+ NLM_F_MULTI,
+ afi->family, table, chain) < 0)
+ goto done;
+cont:
+ idx++;
+ }
+ }
+ }
+done:
+ cb->args[0] = idx;
+ return skb->len;
+}
+
+
+static int nf_tables_getchain(struct sock *nlsk, struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const nla[])
+{
+ const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+ const struct nft_af_info *afi;
+ const struct nft_table *table;
+ const struct nft_chain *chain;
+ struct sk_buff *skb2;
+ struct net *net = sock_net(skb->sk);
+ int family = nfmsg->nfgen_family;
+ int err;
+
+ if (nlh->nlmsg_flags & NLM_F_DUMP) {
+ struct netlink_dump_control c = {
+ .dump = nf_tables_dump_chains,
+ };
+ return netlink_dump_start(nlsk, skb, nlh, &c);
+ }
+
+ afi = nf_tables_afinfo_lookup(net, family, false);
+ if (IS_ERR(afi))
+ return PTR_ERR(afi);
+
+ table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE]);
+ if (IS_ERR(table))
+ return PTR_ERR(table);
+
+ chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME]);
+ if (IS_ERR(chain))
+ return PTR_ERR(chain);
+
+ skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (!skb2)
+ return -ENOMEM;
+
+ err = nf_tables_fill_chain_info(skb2, NETLINK_CB(skb).portid,
+ nlh->nlmsg_seq, NFT_MSG_NEWCHAIN, 0,
+ family, table, chain);
+ if (err < 0)
+ goto err;
+
+ return nlmsg_unicast(nlsk, skb2, NETLINK_CB(skb).portid);
+
+err:
+ kfree_skb(skb2);
+ return err;
+}
+
+static int
+nf_tables_chain_policy(struct nft_base_chain *chain, const struct nlattr *attr)
+{
+ switch (ntohl(nla_get_be32(attr))) {
+ case NF_DROP:
+ chain->policy = NF_DROP;
+ break;
+ case NF_ACCEPT:
+ chain->policy = NF_ACCEPT;
+ break;
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static const struct nla_policy nft_counter_policy[NFTA_COUNTER_MAX + 1] = {
+ [NFTA_COUNTER_PACKETS] = { .type = NLA_U64 },
+ [NFTA_COUNTER_BYTES] = { .type = NLA_U64 },
+};
+
+static int
+nf_tables_counters(struct nft_base_chain *chain, const struct nlattr *attr)
+{
+ struct nlattr *tb[NFTA_COUNTER_MAX+1];
+ struct nft_stats __percpu *newstats;
+ struct nft_stats *stats;
+ int err;
+
+ err = nla_parse_nested(tb, NFTA_COUNTER_MAX, attr, nft_counter_policy);
+ if (err < 0)
+ return err;
+
+ if (!tb[NFTA_COUNTER_BYTES] || !tb[NFTA_COUNTER_PACKETS])
+ return -EINVAL;
+
+ newstats = alloc_percpu(struct nft_stats);
+ if (newstats == NULL)
+ return -ENOMEM;
+
+ /* Restore old counters on this cpu, no problem. Per-cpu statistics
+ * are not exposed to userspace.
+ */
+ stats = this_cpu_ptr(newstats);
+ stats->bytes = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_BYTES]));
+ stats->pkts = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_PACKETS]));
+
+ if (chain->stats) {
+ /* nfnl_lock is held, add some nfnl function for this, later */
+ struct nft_stats __percpu *oldstats =
+ rcu_dereference_protected(chain->stats, 1);
+
+ rcu_assign_pointer(chain->stats, newstats);
+ synchronize_rcu();
+ free_percpu(oldstats);
+ } else
+ rcu_assign_pointer(chain->stats, newstats);
+
+ return 0;
+}
+
+static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const nla[])
+{
+ const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+ const struct nlattr * uninitialized_var(name);
+ const struct nft_af_info *afi;
+ struct nft_table *table;
+ struct nft_chain *chain;
+ struct nft_base_chain *basechain = NULL;
+ struct nlattr *ha[NFTA_HOOK_MAX + 1];
+ struct net *net = sock_net(skb->sk);
+ int family = nfmsg->nfgen_family;
+ u64 handle = 0;
+ int err;
+ bool create;
+
+ create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false;
+
+ afi = nf_tables_afinfo_lookup(net, family, true);
+ if (IS_ERR(afi))
+ return PTR_ERR(afi);
+
+ table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE]);
+ if (IS_ERR(table))
+ return PTR_ERR(table);
+
+ if (table->use == UINT_MAX)
+ return -EOVERFLOW;
+
+ chain = NULL;
+ name = nla[NFTA_CHAIN_NAME];
+
+ if (nla[NFTA_CHAIN_HANDLE]) {
+ handle = be64_to_cpu(nla_get_be64(nla[NFTA_CHAIN_HANDLE]));
+ chain = nf_tables_chain_lookup_byhandle(table, handle);
+ if (IS_ERR(chain))
+ return PTR_ERR(chain);
+ } else {
+ chain = nf_tables_chain_lookup(table, name);
+ if (IS_ERR(chain)) {
+ if (PTR_ERR(chain) != -ENOENT)
+ return PTR_ERR(chain);
+ chain = NULL;
+ }
+ }
+
+ if (chain != NULL) {
+ if (nlh->nlmsg_flags & NLM_F_EXCL)
+ return -EEXIST;
+ if (nlh->nlmsg_flags & NLM_F_REPLACE)
+ return -EOPNOTSUPP;
+
+ if (nla[NFTA_CHAIN_HANDLE] && name &&
+ !IS_ERR(nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME])))
+ return -EEXIST;
+
+ if (nla[NFTA_CHAIN_POLICY]) {
+ if (!(chain->flags & NFT_BASE_CHAIN))
+ return -EOPNOTSUPP;
+
+ err = nf_tables_chain_policy(nft_base_chain(chain),
+ nla[NFTA_CHAIN_POLICY]);
+ if (err < 0)
+ return err;
+ }
+
+ if (nla[NFTA_CHAIN_COUNTERS]) {
+ if (!(chain->flags & NFT_BASE_CHAIN))
+ return -EOPNOTSUPP;
+
+ err = nf_tables_counters(nft_base_chain(chain),
+ nla[NFTA_CHAIN_COUNTERS]);
+ if (err < 0)
+ return err;
+ }
+
+ if (nla[NFTA_CHAIN_HANDLE] && name)
+ nla_strlcpy(chain->name, name, NFT_CHAIN_MAXNAMELEN);
+
+ goto notify;
+ }
+
+ if (nla[NFTA_CHAIN_HOOK]) {
+ struct nf_hook_ops *ops;
+ nf_hookfn *hookfn;
+ u32 hooknum;
+ int type = NFT_CHAIN_T_DEFAULT;
+
+ if (nla[NFTA_CHAIN_TYPE]) {
+ type = nf_tables_chain_type_lookup(afi,
+ nla[NFTA_CHAIN_TYPE],
+ create);
+ if (type < 0)
+ return -ENOENT;
+ }
+
+ err = nla_parse_nested(ha, NFTA_HOOK_MAX, nla[NFTA_CHAIN_HOOK],
+ nft_hook_policy);
+ if (err < 0)
+ return err;
+ if (ha[NFTA_HOOK_HOOKNUM] == NULL ||
+ ha[NFTA_HOOK_PRIORITY] == NULL)
+ return -EINVAL;
+
+ hooknum = ntohl(nla_get_be32(ha[NFTA_HOOK_HOOKNUM]));
+ if (hooknum >= afi->nhooks)
+ return -EINVAL;
+
+ hookfn = chain_type[family][type]->fn[hooknum];
+ if (hookfn == NULL)
+ return -EOPNOTSUPP;
+
+ basechain = kzalloc(sizeof(*basechain), GFP_KERNEL);
+ if (basechain == NULL)
+ return -ENOMEM;
+
+ basechain->type = type;
+ chain = &basechain->chain;
+
+ ops = &basechain->ops;
+ ops->pf = family;
+ ops->owner = afi->owner;
+ ops->hooknum = ntohl(nla_get_be32(ha[NFTA_HOOK_HOOKNUM]));
+ ops->priority = ntohl(nla_get_be32(ha[NFTA_HOOK_PRIORITY]));
+ ops->priv = chain;
+ ops->hook = hookfn;
+ if (afi->hooks[ops->hooknum])
+ ops->hook = afi->hooks[ops->hooknum];
+
+ chain->flags |= NFT_BASE_CHAIN;
+
+ if (nla[NFTA_CHAIN_POLICY]) {
+ err = nf_tables_chain_policy(basechain,
+ nla[NFTA_CHAIN_POLICY]);
+ if (err < 0) {
+ free_percpu(basechain->stats);
+ kfree(basechain);
+ return err;
+ }
+ } else
+ basechain->policy = NF_ACCEPT;
+
+ if (nla[NFTA_CHAIN_COUNTERS]) {
+ err = nf_tables_counters(basechain,
+ nla[NFTA_CHAIN_COUNTERS]);
+ if (err < 0) {
+ free_percpu(basechain->stats);
+ kfree(basechain);
+ return err;
+ }
+ } else {
+ struct nft_stats __percpu *newstats;
+
+ newstats = alloc_percpu(struct nft_stats);
+ if (newstats == NULL)
+ return -ENOMEM;
+
+ rcu_assign_pointer(nft_base_chain(chain)->stats,
+ newstats);
+ }
+ } else {
+ chain = kzalloc(sizeof(*chain), GFP_KERNEL);
+ if (chain == NULL)
+ return -ENOMEM;
+ }
+
+ INIT_LIST_HEAD(&chain->rules);
+ chain->handle = nf_tables_alloc_handle(table);
+ chain->net = net;
+ chain->table = table;
+ nla_strlcpy(chain->name, name, NFT_CHAIN_MAXNAMELEN);
+
+ if (!(table->flags & NFT_TABLE_F_DORMANT) &&
+ chain->flags & NFT_BASE_CHAIN) {
+ err = nf_register_hook(&nft_base_chain(chain)->ops);
+ if (err < 0) {
+ free_percpu(basechain->stats);
+ kfree(basechain);
+ return err;
+ }
+ }
+ list_add_tail(&chain->list, &table->chains);
+ table->use++;
+notify:
+ nf_tables_chain_notify(skb, nlh, table, chain, NFT_MSG_NEWCHAIN,
+ family);
+ return 0;
+}
+
+static void nf_tables_rcu_chain_destroy(struct rcu_head *head)
+{
+ struct nft_chain *chain = container_of(head, struct nft_chain, rcu_head);
+
+ BUG_ON(chain->use > 0);
+
+ if (chain->flags & NFT_BASE_CHAIN) {
+ free_percpu(nft_base_chain(chain)->stats);
+ kfree(nft_base_chain(chain));
+ } else
+ kfree(chain);
+}
+
+static int nf_tables_delchain(struct sock *nlsk, struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const nla[])
+{
+ const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+ const struct nft_af_info *afi;
+ struct nft_table *table;
+ struct nft_chain *chain;
+ struct net *net = sock_net(skb->sk);
+ int family = nfmsg->nfgen_family;
+
+ afi = nf_tables_afinfo_lookup(net, family, false);
+ if (IS_ERR(afi))
+ return PTR_ERR(afi);
+
+ table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE]);
+ if (IS_ERR(table))
+ return PTR_ERR(table);
+
+ chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME]);
+ if (IS_ERR(chain))
+ return PTR_ERR(chain);
+
+ if (!list_empty(&chain->rules))
+ return -EBUSY;
+
+ list_del(&chain->list);
+ table->use--;
+
+ if (!(table->flags & NFT_TABLE_F_DORMANT) &&
+ chain->flags & NFT_BASE_CHAIN)
+ nf_unregister_hook(&nft_base_chain(chain)->ops);
+
+ nf_tables_chain_notify(skb, nlh, table, chain, NFT_MSG_DELCHAIN,
+ family);
+
+ /* Make sure all rule references are gone before this is released */
+ call_rcu(&chain->rcu_head, nf_tables_rcu_chain_destroy);
+ return 0;
+}
+
+static void nft_ctx_init(struct nft_ctx *ctx,
+ const struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ const struct nft_af_info *afi,
+ const struct nft_table *table,
+ const struct nft_chain *chain,
+ const struct nlattr * const *nla)
+{
+ ctx->net = sock_net(skb->sk);
+ ctx->skb = skb;
+ ctx->nlh = nlh;
+ ctx->afi = afi;
+ ctx->table = table;
+ ctx->chain = chain;
+ ctx->nla = nla;
+}
+
+/*
+ * Expressions
+ */
+
+/**
+ * nft_register_expr - register nf_tables expr type
+ * @ops: expr type
+ *
+ * Registers the expr type for use with nf_tables. Returns zero on
+ * success or a negative errno code otherwise.
+ */
+int nft_register_expr(struct nft_expr_type *type)
+{
+ nfnl_lock(NFNL_SUBSYS_NFTABLES);
+ list_add_tail(&type->list, &nf_tables_expressions);
+ nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nft_register_expr);
+
+/**
+ * nft_unregister_expr - unregister nf_tables expr type
+ * @ops: expr type
+ *
+ * Unregisters the expr typefor use with nf_tables.
+ */
+void nft_unregister_expr(struct nft_expr_type *type)
+{
+ nfnl_lock(NFNL_SUBSYS_NFTABLES);
+ list_del(&type->list);
+ nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+}
+EXPORT_SYMBOL_GPL(nft_unregister_expr);
+
+static const struct nft_expr_type *__nft_expr_type_get(struct nlattr *nla)
+{
+ const struct nft_expr_type *type;
+
+ list_for_each_entry(type, &nf_tables_expressions, list) {
+ if (!nla_strcmp(nla, type->name))
+ return type;
+ }
+ return NULL;
+}
+
+static const struct nft_expr_type *nft_expr_type_get(struct nlattr *nla)
+{
+ const struct nft_expr_type *type;
+
+ if (nla == NULL)
+ return ERR_PTR(-EINVAL);
+
+ type = __nft_expr_type_get(nla);
+ if (type != NULL && try_module_get(type->owner))
+ return type;
+
+#ifdef CONFIG_MODULES
+ if (type == NULL) {
+ nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+ request_module("nft-expr-%.*s",
+ nla_len(nla), (char *)nla_data(nla));
+ nfnl_lock(NFNL_SUBSYS_NFTABLES);
+ if (__nft_expr_type_get(nla))
+ return ERR_PTR(-EAGAIN);
+ }
+#endif
+ return ERR_PTR(-ENOENT);
+}
+
+static const struct nla_policy nft_expr_policy[NFTA_EXPR_MAX + 1] = {
+ [NFTA_EXPR_NAME] = { .type = NLA_STRING },
+ [NFTA_EXPR_DATA] = { .type = NLA_NESTED },
+};
+
+static int nf_tables_fill_expr_info(struct sk_buff *skb,
+ const struct nft_expr *expr)
+{
+ if (nla_put_string(skb, NFTA_EXPR_NAME, expr->ops->type->name))
+ goto nla_put_failure;
+
+ if (expr->ops->dump) {
+ struct nlattr *data = nla_nest_start(skb, NFTA_EXPR_DATA);
+ if (data == NULL)
+ goto nla_put_failure;
+ if (expr->ops->dump(skb, expr) < 0)
+ goto nla_put_failure;
+ nla_nest_end(skb, data);
+ }
+
+ return skb->len;
+
+nla_put_failure:
+ return -1;
+};
+
+struct nft_expr_info {
+ const struct nft_expr_ops *ops;
+ struct nlattr *tb[NFT_EXPR_MAXATTR + 1];
+};
+
+static int nf_tables_expr_parse(const struct nft_ctx *ctx,
+ const struct nlattr *nla,
+ struct nft_expr_info *info)
+{
+ const struct nft_expr_type *type;
+ const struct nft_expr_ops *ops;
+ struct nlattr *tb[NFTA_EXPR_MAX + 1];
+ int err;
+
+ err = nla_parse_nested(tb, NFTA_EXPR_MAX, nla, nft_expr_policy);
+ if (err < 0)
+ return err;
+
+ type = nft_expr_type_get(tb[NFTA_EXPR_NAME]);
+ if (IS_ERR(type))
+ return PTR_ERR(type);
+
+ if (tb[NFTA_EXPR_DATA]) {
+ err = nla_parse_nested(info->tb, type->maxattr,
+ tb[NFTA_EXPR_DATA], type->policy);
+ if (err < 0)
+ goto err1;
+ } else
+ memset(info->tb, 0, sizeof(info->tb[0]) * (type->maxattr + 1));
+
+ if (type->select_ops != NULL) {
+ ops = type->select_ops(ctx,
+ (const struct nlattr * const *)info->tb);
+ if (IS_ERR(ops)) {
+ err = PTR_ERR(ops);
+ goto err1;
+ }
+ } else
+ ops = type->ops;
+
+ info->ops = ops;
+ return 0;
+
+err1:
+ module_put(type->owner);
+ return err;
+}
+
+static int nf_tables_newexpr(const struct nft_ctx *ctx,
+ const struct nft_expr_info *info,
+ struct nft_expr *expr)
+{
+ const struct nft_expr_ops *ops = info->ops;
+ int err;
+
+ expr->ops = ops;
+ if (ops->init) {
+ err = ops->init(ctx, expr, (const struct nlattr **)info->tb);
+ if (err < 0)
+ goto err1;
+ }
+
+ return 0;
+
+err1:
+ expr->ops = NULL;
+ return err;
+}
+
+static void nf_tables_expr_destroy(struct nft_expr *expr)
+{
+ if (expr->ops->destroy)
+ expr->ops->destroy(expr);
+ module_put(expr->ops->type->owner);
+}
+
+/*
+ * Rules
+ */
+
+static struct nft_rule *__nf_tables_rule_lookup(const struct nft_chain *chain,
+ u64 handle)
+{
+ struct nft_rule *rule;
+
+ // FIXME: this sucks
+ list_for_each_entry(rule, &chain->rules, list) {
+ if (handle == rule->handle)
+ return rule;
+ }
+
+ return ERR_PTR(-ENOENT);
+}
+
+static struct nft_rule *nf_tables_rule_lookup(const struct nft_chain *chain,
+ const struct nlattr *nla)
+{
+ if (nla == NULL)
+ return ERR_PTR(-EINVAL);
+
+ return __nf_tables_rule_lookup(chain, be64_to_cpu(nla_get_be64(nla)));
+}
+
+static const struct nla_policy nft_rule_policy[NFTA_RULE_MAX + 1] = {
+ [NFTA_RULE_TABLE] = { .type = NLA_STRING },
+ [NFTA_RULE_CHAIN] = { .type = NLA_STRING,
+ .len = NFT_CHAIN_MAXNAMELEN - 1 },
+ [NFTA_RULE_HANDLE] = { .type = NLA_U64 },
+ [NFTA_RULE_EXPRESSIONS] = { .type = NLA_NESTED },
+ [NFTA_RULE_COMPAT] = { .type = NLA_NESTED },
+ [NFTA_RULE_POSITION] = { .type = NLA_U64 },
+};
+
+static int nf_tables_fill_rule_info(struct sk_buff *skb, u32 portid, u32 seq,
+ int event, u32 flags, int family,
+ const struct nft_table *table,
+ const struct nft_chain *chain,
+ const struct nft_rule *rule)
+{
+ struct nlmsghdr *nlh;
+ struct nfgenmsg *nfmsg;
+ const struct nft_expr *expr, *next;
+ struct nlattr *list;
+ const struct nft_rule *prule;
+ int type = event | NFNL_SUBSYS_NFTABLES << 8;
+
+ nlh = nlmsg_put(skb, portid, seq, type, sizeof(struct nfgenmsg),
+ flags);
+ if (nlh == NULL)
+ goto nla_put_failure;
+
+ nfmsg = nlmsg_data(nlh);
+ nfmsg->nfgen_family = family;
+ nfmsg->version = NFNETLINK_V0;
+ nfmsg->res_id = 0;
+
+ if (nla_put_string(skb, NFTA_RULE_TABLE, table->name))
+ goto nla_put_failure;
+ if (nla_put_string(skb, NFTA_RULE_CHAIN, chain->name))
+ goto nla_put_failure;
+ if (nla_put_be64(skb, NFTA_RULE_HANDLE, cpu_to_be64(rule->handle)))
+ goto nla_put_failure;
+
+ if ((event != NFT_MSG_DELRULE) && (rule->list.prev != &chain->rules)) {
+ prule = list_entry(rule->list.prev, struct nft_rule, list);
+ if (nla_put_be64(skb, NFTA_RULE_POSITION,
+ cpu_to_be64(prule->handle)))
+ goto nla_put_failure;
+ }
+
+ list = nla_nest_start(skb, NFTA_RULE_EXPRESSIONS);
+ if (list == NULL)
+ goto nla_put_failure;
+ nft_rule_for_each_expr(expr, next, rule) {
+ struct nlattr *elem = nla_nest_start(skb, NFTA_LIST_ELEM);
+ if (elem == NULL)
+ goto nla_put_failure;
+ if (nf_tables_fill_expr_info(skb, expr) < 0)
+ goto nla_put_failure;
+ nla_nest_end(skb, elem);
+ }
+ nla_nest_end(skb, list);
+
+ return nlmsg_end(skb, nlh);
+
+nla_put_failure:
+ nlmsg_trim(skb, nlh);
+ return -1;
+}
+
+static int nf_tables_rule_notify(const struct sk_buff *oskb,
+ const struct nlmsghdr *nlh,
+ const struct nft_table *table,
+ const struct nft_chain *chain,
+ const struct nft_rule *rule,
+ int event, u32 flags, int family)
+{
+ struct sk_buff *skb;
+ u32 portid = NETLINK_CB(oskb).portid;
+ struct net *net = oskb ? sock_net(oskb->sk) : &init_net;
+ u32 seq = nlh->nlmsg_seq;
+ bool report;
+ int err;
+
+ report = nlmsg_report(nlh);
+ if (!report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES))
+ return 0;
+
+ err = -ENOBUFS;
+ skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (skb == NULL)
+ goto err;
+
+ err = nf_tables_fill_rule_info(skb, portid, seq, event, flags,
+ family, table, chain, rule);
+ if (err < 0) {
+ kfree_skb(skb);
+ goto err;
+ }
+
+ err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report,
+ GFP_KERNEL);
+err:
+ if (err < 0)
+ nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err);
+ return err;
+}
+
+static inline bool
+nft_rule_is_active(struct net *net, const struct nft_rule *rule)
+{
+ return (rule->genmask & (1 << net->nft.gencursor)) == 0;
+}
+
+static inline int gencursor_next(struct net *net)
+{
+ return net->nft.gencursor+1 == 1 ? 1 : 0;
+}
+
+static inline int
+nft_rule_is_active_next(struct net *net, const struct nft_rule *rule)
+{
+ return (rule->genmask & (1 << gencursor_next(net))) == 0;
+}
+
+static inline void
+nft_rule_activate_next(struct net *net, struct nft_rule *rule)
+{
+ /* Now inactive, will be active in the future */
+ rule->genmask = (1 << net->nft.gencursor);
+}
+
+static inline void
+nft_rule_disactivate_next(struct net *net, struct nft_rule *rule)
+{
+ rule->genmask = (1 << gencursor_next(net));
+}
+
+static inline void nft_rule_clear(struct net *net, struct nft_rule *rule)
+{
+ rule->genmask = 0;
+}
+
+static int nf_tables_dump_rules(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
+ const struct nft_af_info *afi;
+ const struct nft_table *table;
+ const struct nft_chain *chain;
+ const struct nft_rule *rule;
+ unsigned int idx = 0, s_idx = cb->args[0];
+ struct net *net = sock_net(skb->sk);
+ int family = nfmsg->nfgen_family;
+ u8 genctr = ACCESS_ONCE(net->nft.genctr);
+ u8 gencursor = ACCESS_ONCE(net->nft.gencursor);
+
+ list_for_each_entry(afi, &net->nft.af_info, list) {
+ if (family != NFPROTO_UNSPEC && family != afi->family)
+ continue;
+
+ list_for_each_entry(table, &afi->tables, list) {
+ list_for_each_entry(chain, &table->chains, list) {
+ list_for_each_entry(rule, &chain->rules, list) {
+ if (!nft_rule_is_active(net, rule))
+ goto cont;
+ if (idx < s_idx)
+ goto cont;
+ if (idx > s_idx)
+ memset(&cb->args[1], 0,
+ sizeof(cb->args) - sizeof(cb->args[0]));
+ if (nf_tables_fill_rule_info(skb, NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ NFT_MSG_NEWRULE,
+ NLM_F_MULTI | NLM_F_APPEND,
+ afi->family, table, chain, rule) < 0)
+ goto done;
+cont:
+ idx++;
+ }
+ }
+ }
+ }
+done:
+ /* Invalidate this dump, a transition to the new generation happened */
+ if (gencursor != net->nft.gencursor || genctr != net->nft.genctr)
+ return -EBUSY;
+
+ cb->args[0] = idx;
+ return skb->len;
+}
+
+static int nf_tables_getrule(struct sock *nlsk, struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const nla[])
+{
+ const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+ const struct nft_af_info *afi;
+ const struct nft_table *table;
+ const struct nft_chain *chain;
+ const struct nft_rule *rule;
+ struct sk_buff *skb2;
+ struct net *net = sock_net(skb->sk);
+ int family = nfmsg->nfgen_family;
+ int err;
+
+ if (nlh->nlmsg_flags & NLM_F_DUMP) {
+ struct netlink_dump_control c = {
+ .dump = nf_tables_dump_rules,
+ };
+ return netlink_dump_start(nlsk, skb, nlh, &c);
+ }
+
+ afi = nf_tables_afinfo_lookup(net, family, false);
+ if (IS_ERR(afi))
+ return PTR_ERR(afi);
+
+ table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE]);
+ if (IS_ERR(table))
+ return PTR_ERR(table);
+
+ chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN]);
+ if (IS_ERR(chain))
+ return PTR_ERR(chain);
+
+ rule = nf_tables_rule_lookup(chain, nla[NFTA_RULE_HANDLE]);
+ if (IS_ERR(rule))
+ return PTR_ERR(rule);
+
+ skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (!skb2)
+ return -ENOMEM;
+
+ err = nf_tables_fill_rule_info(skb2, NETLINK_CB(skb).portid,
+ nlh->nlmsg_seq, NFT_MSG_NEWRULE, 0,
+ family, table, chain, rule);
+ if (err < 0)
+ goto err;
+
+ return nlmsg_unicast(nlsk, skb2, NETLINK_CB(skb).portid);
+
+err:
+ kfree_skb(skb2);
+ return err;
+}
+
+static void nf_tables_rcu_rule_destroy(struct rcu_head *head)
+{
+ struct nft_rule *rule = container_of(head, struct nft_rule, rcu_head);
+ struct nft_expr *expr;
+
+ /*
+ * Careful: some expressions might not be initialized in case this
+ * is called on error from nf_tables_newrule().
+ */
+ expr = nft_expr_first(rule);
+ while (expr->ops && expr != nft_expr_last(rule)) {
+ nf_tables_expr_destroy(expr);
+ expr = nft_expr_next(expr);
+ }
+ kfree(rule);
+}
+
+static void nf_tables_rule_destroy(struct nft_rule *rule)
+{
+ call_rcu(&rule->rcu_head, nf_tables_rcu_rule_destroy);
+}
+
+#define NFT_RULE_MAXEXPRS 128
+
+static struct nft_expr_info *info;
+
+static struct nft_rule_trans *
+nf_tables_trans_add(struct nft_rule *rule, const struct nft_ctx *ctx)
+{
+ struct nft_rule_trans *rupd;
+
+ rupd = kmalloc(sizeof(struct nft_rule_trans), GFP_KERNEL);
+ if (rupd == NULL)
+ return NULL;
+
+ rupd->chain = ctx->chain;
+ rupd->table = ctx->table;
+ rupd->rule = rule;
+ rupd->family = ctx->afi->family;
+ rupd->nlh = ctx->nlh;
+ list_add_tail(&rupd->list, &ctx->net->nft.commit_list);
+
+ return rupd;
+}
+
+static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const nla[])
+{
+ const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+ const struct nft_af_info *afi;
+ struct net *net = sock_net(skb->sk);
+ struct nft_table *table;
+ struct nft_chain *chain;
+ struct nft_rule *rule, *old_rule = NULL;
+ struct nft_rule_trans *repl = NULL;
+ struct nft_expr *expr;
+ struct nft_ctx ctx;
+ struct nlattr *tmp;
+ unsigned int size, i, n;
+ int err, rem;
+ bool create;
+ u64 handle, pos_handle;
+
+ create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false;
+
+ afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, create);
+ if (IS_ERR(afi))
+ return PTR_ERR(afi);
+
+ table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE]);
+ if (IS_ERR(table))
+ return PTR_ERR(table);
+
+ chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN]);
+ if (IS_ERR(chain))
+ return PTR_ERR(chain);
+
+ if (nla[NFTA_RULE_HANDLE]) {
+ handle = be64_to_cpu(nla_get_be64(nla[NFTA_RULE_HANDLE]));
+ rule = __nf_tables_rule_lookup(chain, handle);
+ if (IS_ERR(rule))
+ return PTR_ERR(rule);
+
+ if (nlh->nlmsg_flags & NLM_F_EXCL)
+ return -EEXIST;
+ if (nlh->nlmsg_flags & NLM_F_REPLACE)
+ old_rule = rule;
+ else
+ return -EOPNOTSUPP;
+ } else {
+ if (!create || nlh->nlmsg_flags & NLM_F_REPLACE)
+ return -EINVAL;
+ handle = nf_tables_alloc_handle(table);
+ }
+
+ if (nla[NFTA_RULE_POSITION]) {
+ if (!(nlh->nlmsg_flags & NLM_F_CREATE))
+ return -EOPNOTSUPP;
+
+ pos_handle = be64_to_cpu(nla_get_be64(nla[NFTA_RULE_POSITION]));
+ old_rule = __nf_tables_rule_lookup(chain, pos_handle);
+ if (IS_ERR(old_rule))
+ return PTR_ERR(old_rule);
+ }
+
+ nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla);
+
+ n = 0;
+ size = 0;
+ if (nla[NFTA_RULE_EXPRESSIONS]) {
+ nla_for_each_nested(tmp, nla[NFTA_RULE_EXPRESSIONS], rem) {
+ err = -EINVAL;
+ if (nla_type(tmp) != NFTA_LIST_ELEM)
+ goto err1;
+ if (n == NFT_RULE_MAXEXPRS)
+ goto err1;
+ err = nf_tables_expr_parse(&ctx, tmp, &info[n]);
+ if (err < 0)
+ goto err1;
+ size += info[n].ops->size;
+ n++;
+ }
+ }
+
+ err = -ENOMEM;
+ rule = kzalloc(sizeof(*rule) + size, GFP_KERNEL);
+ if (rule == NULL)
+ goto err1;
+
+ nft_rule_activate_next(net, rule);
+
+ rule->handle = handle;
+ rule->dlen = size;
+
+ expr = nft_expr_first(rule);
+ for (i = 0; i < n; i++) {
+ err = nf_tables_newexpr(&ctx, &info[i], expr);
+ if (err < 0)
+ goto err2;
+ info[i].ops = NULL;
+ expr = nft_expr_next(expr);
+ }
+
+ if (nlh->nlmsg_flags & NLM_F_REPLACE) {
+ if (nft_rule_is_active_next(net, old_rule)) {
+ repl = nf_tables_trans_add(old_rule, &ctx);
+ if (repl == NULL) {
+ err = -ENOMEM;
+ goto err2;
+ }
+ nft_rule_disactivate_next(net, old_rule);
+ list_add_tail(&rule->list, &old_rule->list);
+ } else {
+ err = -ENOENT;
+ goto err2;
+ }
+ } else if (nlh->nlmsg_flags & NLM_F_APPEND)
+ if (old_rule)
+ list_add_rcu(&rule->list, &old_rule->list);
+ else
+ list_add_tail_rcu(&rule->list, &chain->rules);
+ else {
+ if (old_rule)
+ list_add_tail_rcu(&rule->list, &old_rule->list);
+ else
+ list_add_rcu(&rule->list, &chain->rules);
+ }
+
+ if (nf_tables_trans_add(rule, &ctx) == NULL) {
+ err = -ENOMEM;
+ goto err3;
+ }
+ return 0;
+
+err3:
+ list_del_rcu(&rule->list);
+ if (repl) {
+ list_del_rcu(&repl->rule->list);
+ list_del(&repl->list);
+ nft_rule_clear(net, repl->rule);
+ kfree(repl);
+ }
+err2:
+ nf_tables_rule_destroy(rule);
+err1:
+ for (i = 0; i < n; i++) {
+ if (info[i].ops != NULL)
+ module_put(info[i].ops->type->owner);
+ }
+ return err;
+}
+
+static int
+nf_tables_delrule_one(struct nft_ctx *ctx, struct nft_rule *rule)
+{
+ /* You cannot delete the same rule twice */
+ if (nft_rule_is_active_next(ctx->net, rule)) {
+ if (nf_tables_trans_add(rule, ctx) == NULL)
+ return -ENOMEM;
+ nft_rule_disactivate_next(ctx->net, rule);
+ return 0;
+ }
+ return -ENOENT;
+}
+
+static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const nla[])
+{
+ const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+ const struct nft_af_info *afi;
+ struct net *net = sock_net(skb->sk);
+ const struct nft_table *table;
+ struct nft_chain *chain;
+ struct nft_rule *rule, *tmp;
+ int family = nfmsg->nfgen_family, err = 0;
+ struct nft_ctx ctx;
+
+ afi = nf_tables_afinfo_lookup(net, family, false);
+ if (IS_ERR(afi))
+ return PTR_ERR(afi);
+
+ table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE]);
+ if (IS_ERR(table))
+ return PTR_ERR(table);
+
+ chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN]);
+ if (IS_ERR(chain))
+ return PTR_ERR(chain);
+
+ nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla);
+
+ if (nla[NFTA_RULE_HANDLE]) {
+ rule = nf_tables_rule_lookup(chain, nla[NFTA_RULE_HANDLE]);
+ if (IS_ERR(rule))
+ return PTR_ERR(rule);
+
+ err = nf_tables_delrule_one(&ctx, rule);
+ } else {
+ /* Remove all rules in this chain */
+ list_for_each_entry_safe(rule, tmp, &chain->rules, list) {
+ err = nf_tables_delrule_one(&ctx, rule);
+ if (err < 0)
+ break;
+ }
+ }
+
+ return err;
+}
+
+static int nf_tables_commit(struct sk_buff *skb)
+{
+ struct net *net = sock_net(skb->sk);
+ struct nft_rule_trans *rupd, *tmp;
+
+ /* Bump generation counter, invalidate any dump in progress */
+ net->nft.genctr++;
+
+ /* A new generation has just started */
+ net->nft.gencursor = gencursor_next(net);
+
+ /* Make sure all packets have left the previous generation before
+ * purging old rules.
+ */
+ synchronize_rcu();
+
+ list_for_each_entry_safe(rupd, tmp, &net->nft.commit_list, list) {
+ /* Delete this rule from the dirty list */
+ list_del(&rupd->list);
+
+ /* This rule was inactive in the past and just became active.
+ * Clear the next bit of the genmask since its meaning has
+ * changed, now it is the future.
+ */
+ if (nft_rule_is_active(net, rupd->rule)) {
+ nft_rule_clear(net, rupd->rule);
+ nf_tables_rule_notify(skb, rupd->nlh, rupd->table,
+ rupd->chain, rupd->rule,
+ NFT_MSG_NEWRULE, 0,
+ rupd->family);
+ kfree(rupd);
+ continue;
+ }
+
+ /* This rule is in the past, get rid of it */
+ list_del_rcu(&rupd->rule->list);
+ nf_tables_rule_notify(skb, rupd->nlh, rupd->table, rupd->chain,
+ rupd->rule, NFT_MSG_DELRULE, 0,
+ rupd->family);
+ nf_tables_rule_destroy(rupd->rule);
+ kfree(rupd);
+ }
+
+ return 0;
+}
+
+static int nf_tables_abort(struct sk_buff *skb)
+{
+ struct net *net = sock_net(skb->sk);
+ struct nft_rule_trans *rupd, *tmp;
+
+ list_for_each_entry_safe(rupd, tmp, &net->nft.commit_list, list) {
+ /* Delete all rules from the dirty list */
+ list_del(&rupd->list);
+
+ if (!nft_rule_is_active_next(net, rupd->rule)) {
+ nft_rule_clear(net, rupd->rule);
+ kfree(rupd);
+ continue;
+ }
+
+ /* This rule is inactive, get rid of it */
+ list_del_rcu(&rupd->rule->list);
+ nf_tables_rule_destroy(rupd->rule);
+ kfree(rupd);
+ }
+ return 0;
+}
+
+/*
+ * Sets
+ */
+
+static LIST_HEAD(nf_tables_set_ops);
+
+int nft_register_set(struct nft_set_ops *ops)
+{
+ nfnl_lock(NFNL_SUBSYS_NFTABLES);
+ list_add_tail(&ops->list, &nf_tables_set_ops);
+ nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nft_register_set);
+
+void nft_unregister_set(struct nft_set_ops *ops)
+{
+ nfnl_lock(NFNL_SUBSYS_NFTABLES);
+ list_del(&ops->list);
+ nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+}
+EXPORT_SYMBOL_GPL(nft_unregister_set);
+
+static const struct nft_set_ops *nft_select_set_ops(const struct nlattr * const nla[])
+{
+ const struct nft_set_ops *ops;
+ u32 features;
+
+#ifdef CONFIG_MODULES
+ if (list_empty(&nf_tables_set_ops)) {
+ nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+ request_module("nft-set");
+ nfnl_lock(NFNL_SUBSYS_NFTABLES);
+ if (!list_empty(&nf_tables_set_ops))
+ return ERR_PTR(-EAGAIN);
+ }
+#endif
+ features = 0;
+ if (nla[NFTA_SET_FLAGS] != NULL) {
+ features = ntohl(nla_get_be32(nla[NFTA_SET_FLAGS]));
+ features &= NFT_SET_INTERVAL | NFT_SET_MAP;
+ }
+
+ // FIXME: implement selection properly
+ list_for_each_entry(ops, &nf_tables_set_ops, list) {
+ if ((ops->features & features) != features)
+ continue;
+ if (!try_module_get(ops->owner))
+ continue;
+ return ops;
+ }
+
+ return ERR_PTR(-EOPNOTSUPP);
+}
+
+static const struct nla_policy nft_set_policy[NFTA_SET_MAX + 1] = {
+ [NFTA_SET_TABLE] = { .type = NLA_STRING },
+ [NFTA_SET_NAME] = { .type = NLA_STRING },
+ [NFTA_SET_FLAGS] = { .type = NLA_U32 },
+ [NFTA_SET_KEY_TYPE] = { .type = NLA_U32 },
+ [NFTA_SET_KEY_LEN] = { .type = NLA_U32 },
+ [NFTA_SET_DATA_TYPE] = { .type = NLA_U32 },
+ [NFTA_SET_DATA_LEN] = { .type = NLA_U32 },
+};
+
+static int nft_ctx_init_from_setattr(struct nft_ctx *ctx,
+ const struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const nla[])
+{
+ struct net *net = sock_net(skb->sk);
+ const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+ const struct nft_af_info *afi;
+ const struct nft_table *table = NULL;
+
+ afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, false);
+ if (IS_ERR(afi))
+ return PTR_ERR(afi);
+
+ if (nla[NFTA_SET_TABLE] != NULL) {
+ table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE]);
+ if (IS_ERR(table))
+ return PTR_ERR(table);
+ }
+
+ nft_ctx_init(ctx, skb, nlh, afi, table, NULL, nla);
+ return 0;
+}
+
+struct nft_set *nf_tables_set_lookup(const struct nft_table *table,
+ const struct nlattr *nla)
+{
+ struct nft_set *set;
+
+ if (nla == NULL)
+ return ERR_PTR(-EINVAL);
+
+ list_for_each_entry(set, &table->sets, list) {
+ if (!nla_strcmp(nla, set->name))
+ return set;
+ }
+ return ERR_PTR(-ENOENT);
+}
+
+static int nf_tables_set_alloc_name(struct nft_ctx *ctx, struct nft_set *set,
+ const char *name)
+{
+ const struct nft_set *i;
+ const char *p;
+ unsigned long *inuse;
+ unsigned int n = 0;
+
+ p = strnchr(name, IFNAMSIZ, '%');
+ if (p != NULL) {
+ if (p[1] != 'd' || strchr(p + 2, '%'))
+ return -EINVAL;
+
+ inuse = (unsigned long *)get_zeroed_page(GFP_KERNEL);
+ if (inuse == NULL)
+ return -ENOMEM;
+
+ list_for_each_entry(i, &ctx->table->sets, list) {
+ if (!sscanf(i->name, name, &n))
+ continue;
+ if (n < 0 || n > BITS_PER_LONG * PAGE_SIZE)
+ continue;
+ set_bit(n, inuse);
+ }
+
+ n = find_first_zero_bit(inuse, BITS_PER_LONG * PAGE_SIZE);
+ free_page((unsigned long)inuse);
+ }
+
+ snprintf(set->name, sizeof(set->name), name, n);
+ list_for_each_entry(i, &ctx->table->sets, list) {
+ if (!strcmp(set->name, i->name))
+ return -ENFILE;
+ }
+ return 0;
+}
+
+static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx,
+ const struct nft_set *set, u16 event, u16 flags)
+{
+ struct nfgenmsg *nfmsg;
+ struct nlmsghdr *nlh;
+ u32 portid = NETLINK_CB(ctx->skb).portid;
+ u32 seq = ctx->nlh->nlmsg_seq;
+
+ event |= NFNL_SUBSYS_NFTABLES << 8;
+ nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg),
+ flags);
+ if (nlh == NULL)
+ goto nla_put_failure;
+
+ nfmsg = nlmsg_data(nlh);
+ nfmsg->nfgen_family = ctx->afi->family;
+ nfmsg->version = NFNETLINK_V0;
+ nfmsg->res_id = 0;
+
+ if (nla_put_string(skb, NFTA_SET_TABLE, ctx->table->name))
+ goto nla_put_failure;
+ if (nla_put_string(skb, NFTA_SET_NAME, set->name))
+ goto nla_put_failure;
+ if (set->flags != 0)
+ if (nla_put_be32(skb, NFTA_SET_FLAGS, htonl(set->flags)))
+ goto nla_put_failure;
+
+ if (nla_put_be32(skb, NFTA_SET_KEY_TYPE, htonl(set->ktype)))
+ goto nla_put_failure;
+ if (nla_put_be32(skb, NFTA_SET_KEY_LEN, htonl(set->klen)))
+ goto nla_put_failure;
+ if (set->flags & NFT_SET_MAP) {
+ if (nla_put_be32(skb, NFTA_SET_DATA_TYPE, htonl(set->dtype)))
+ goto nla_put_failure;
+ if (nla_put_be32(skb, NFTA_SET_DATA_LEN, htonl(set->dlen)))
+ goto nla_put_failure;
+ }
+
+ return nlmsg_end(skb, nlh);
+
+nla_put_failure:
+ nlmsg_trim(skb, nlh);
+ return -1;
+}
+
+static int nf_tables_set_notify(const struct nft_ctx *ctx,
+ const struct nft_set *set,
+ int event)
+{
+ struct sk_buff *skb;
+ u32 portid = NETLINK_CB(ctx->skb).portid;
+ bool report;
+ int err;
+
+ report = nlmsg_report(ctx->nlh);
+ if (!report && !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
+ return 0;
+
+ err = -ENOBUFS;
+ skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (skb == NULL)
+ goto err;
+
+ err = nf_tables_fill_set(skb, ctx, set, event, 0);
+ if (err < 0) {
+ kfree_skb(skb);
+ goto err;
+ }
+
+ err = nfnetlink_send(skb, ctx->net, portid, NFNLGRP_NFTABLES, report,
+ GFP_KERNEL);
+err:
+ if (err < 0)
+ nfnetlink_set_err(ctx->net, portid, NFNLGRP_NFTABLES, err);
+ return err;
+}
+
+static int nf_tables_dump_sets_table(struct nft_ctx *ctx, struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ const struct nft_set *set;
+ unsigned int idx = 0, s_idx = cb->args[0];
+
+ if (cb->args[1])
+ return skb->len;
+
+ list_for_each_entry(set, &ctx->table->sets, list) {
+ if (idx < s_idx)
+ goto cont;
+ if (nf_tables_fill_set(skb, ctx, set, NFT_MSG_NEWSET,
+ NLM_F_MULTI) < 0) {
+ cb->args[0] = idx;
+ goto done;
+ }
+cont:
+ idx++;
+ }
+ cb->args[1] = 1;
+done:
+ return skb->len;
+}
+
+static int nf_tables_dump_sets_all(struct nft_ctx *ctx, struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ const struct nft_set *set;
+ unsigned int idx = 0, s_idx = cb->args[0];
+ struct nft_table *table, *cur_table = (struct nft_table *)cb->args[2];
+
+ if (cb->args[1])
+ return skb->len;
+
+ list_for_each_entry(table, &ctx->afi->tables, list) {
+ if (cur_table && cur_table != table)
+ continue;
+
+ ctx->table = table;
+ list_for_each_entry(set, &ctx->table->sets, list) {
+ if (idx < s_idx)
+ goto cont;
+ if (nf_tables_fill_set(skb, ctx, set, NFT_MSG_NEWSET,
+ NLM_F_MULTI) < 0) {
+ cb->args[0] = idx;
+ cb->args[2] = (unsigned long) table;
+ goto done;
+ }
+cont:
+ idx++;
+ }
+ }
+ cb->args[1] = 1;
+done:
+ return skb->len;
+}
+
+static int nf_tables_dump_sets(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
+ struct nlattr *nla[NFTA_SET_MAX + 1];
+ struct nft_ctx ctx;
+ int err, ret;
+
+ err = nlmsg_parse(cb->nlh, sizeof(*nfmsg), nla, NFTA_SET_MAX,
+ nft_set_policy);
+ if (err < 0)
+ return err;
+
+ err = nft_ctx_init_from_setattr(&ctx, cb->skb, cb->nlh, (void *)nla);
+ if (err < 0)
+ return err;
+
+ if (ctx.table == NULL)
+ ret = nf_tables_dump_sets_all(&ctx, skb, cb);
+ else
+ ret = nf_tables_dump_sets_table(&ctx, skb, cb);
+
+ return ret;
+}
+
+static int nf_tables_getset(struct sock *nlsk, struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const nla[])
+{
+ const struct nft_set *set;
+ struct nft_ctx ctx;
+ struct sk_buff *skb2;
+ int err;
+
+ /* Verify existance before starting dump */
+ err = nft_ctx_init_from_setattr(&ctx, skb, nlh, nla);
+ if (err < 0)
+ return err;
+
+ if (nlh->nlmsg_flags & NLM_F_DUMP) {
+ struct netlink_dump_control c = {
+ .dump = nf_tables_dump_sets,
+ };
+ return netlink_dump_start(nlsk, skb, nlh, &c);
+ }
+
+ set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME]);
+ if (IS_ERR(set))
+ return PTR_ERR(set);
+
+ skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (skb2 == NULL)
+ return -ENOMEM;
+
+ err = nf_tables_fill_set(skb2, &ctx, set, NFT_MSG_NEWSET, 0);
+ if (err < 0)
+ goto err;
+
+ return nlmsg_unicast(nlsk, skb2, NETLINK_CB(skb).portid);
+
+err:
+ kfree_skb(skb2);
+ return err;
+}
+
+static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const nla[])
+{
+ const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+ const struct nft_set_ops *ops;
+ const struct nft_af_info *afi;
+ struct net *net = sock_net(skb->sk);
+ struct nft_table *table;
+ struct nft_set *set;
+ struct nft_ctx ctx;
+ char name[IFNAMSIZ];
+ unsigned int size;
+ bool create;
+ u32 ktype, klen, dlen, dtype, flags;
+ int err;
+
+ if (nla[NFTA_SET_TABLE] == NULL ||
+ nla[NFTA_SET_NAME] == NULL ||
+ nla[NFTA_SET_KEY_LEN] == NULL)
+ return -EINVAL;
+
+ ktype = NFT_DATA_VALUE;
+ if (nla[NFTA_SET_KEY_TYPE] != NULL) {
+ ktype = ntohl(nla_get_be32(nla[NFTA_SET_KEY_TYPE]));
+ if ((ktype & NFT_DATA_RESERVED_MASK) == NFT_DATA_RESERVED_MASK)
+ return -EINVAL;
+ }
+
+ klen = ntohl(nla_get_be32(nla[NFTA_SET_KEY_LEN]));
+ if (klen == 0 || klen > FIELD_SIZEOF(struct nft_data, data))
+ return -EINVAL;
+
+ flags = 0;
+ if (nla[NFTA_SET_FLAGS] != NULL) {
+ flags = ntohl(nla_get_be32(nla[NFTA_SET_FLAGS]));
+ if (flags & ~(NFT_SET_ANONYMOUS | NFT_SET_CONSTANT |
+ NFT_SET_INTERVAL | NFT_SET_MAP))
+ return -EINVAL;
+ }
+
+ dtype = 0;
+ dlen = 0;
+ if (nla[NFTA_SET_DATA_TYPE] != NULL) {
+ if (!(flags & NFT_SET_MAP))
+ return -EINVAL;
+
+ dtype = ntohl(nla_get_be32(nla[NFTA_SET_DATA_TYPE]));
+ if ((dtype & NFT_DATA_RESERVED_MASK) == NFT_DATA_RESERVED_MASK &&
+ dtype != NFT_DATA_VERDICT)
+ return -EINVAL;
+
+ if (dtype != NFT_DATA_VERDICT) {
+ if (nla[NFTA_SET_DATA_LEN] == NULL)
+ return -EINVAL;
+ dlen = ntohl(nla_get_be32(nla[NFTA_SET_DATA_LEN]));
+ if (dlen == 0 ||
+ dlen > FIELD_SIZEOF(struct nft_data, data))
+ return -EINVAL;
+ } else
+ dlen = sizeof(struct nft_data);
+ } else if (flags & NFT_SET_MAP)
+ return -EINVAL;
+
+ create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false;
+
+ afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, create);
+ if (IS_ERR(afi))
+ return PTR_ERR(afi);
+
+ table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE]);
+ if (IS_ERR(table))
+ return PTR_ERR(table);
+
+ nft_ctx_init(&ctx, skb, nlh, afi, table, NULL, nla);
+
+ set = nf_tables_set_lookup(table, nla[NFTA_SET_NAME]);
+ if (IS_ERR(set)) {
+ if (PTR_ERR(set) != -ENOENT)
+ return PTR_ERR(set);
+ set = NULL;
+ }
+
+ if (set != NULL) {
+ if (nlh->nlmsg_flags & NLM_F_EXCL)
+ return -EEXIST;
+ if (nlh->nlmsg_flags & NLM_F_REPLACE)
+ return -EOPNOTSUPP;
+ return 0;
+ }
+
+ if (!(nlh->nlmsg_flags & NLM_F_CREATE))
+ return -ENOENT;
+
+ ops = nft_select_set_ops(nla);
+ if (IS_ERR(ops))
+ return PTR_ERR(ops);
+
+ size = 0;
+ if (ops->privsize != NULL)
+ size = ops->privsize(nla);
+
+ err = -ENOMEM;
+ set = kzalloc(sizeof(*set) + size, GFP_KERNEL);
+ if (set == NULL)
+ goto err1;
+
+ nla_strlcpy(name, nla[NFTA_SET_NAME], sizeof(set->name));
+ err = nf_tables_set_alloc_name(&ctx, set, name);
+ if (err < 0)
+ goto err2;
+
+ INIT_LIST_HEAD(&set->bindings);
+ set->ops = ops;
+ set->ktype = ktype;
+ set->klen = klen;
+ set->dtype = dtype;
+ set->dlen = dlen;
+ set->flags = flags;
+
+ err = ops->init(set, nla);
+ if (err < 0)
+ goto err2;
+
+ list_add_tail(&set->list, &table->sets);
+ nf_tables_set_notify(&ctx, set, NFT_MSG_NEWSET);
+ return 0;
+
+err2:
+ kfree(set);
+err1:
+ module_put(ops->owner);
+ return err;
+}
+
+static void nf_tables_set_destroy(const struct nft_ctx *ctx, struct nft_set *set)
+{
+ list_del(&set->list);
+ if (!(set->flags & NFT_SET_ANONYMOUS))
+ nf_tables_set_notify(ctx, set, NFT_MSG_DELSET);
+
+ set->ops->destroy(set);
+ module_put(set->ops->owner);
+ kfree(set);
+}
+
+static int nf_tables_delset(struct sock *nlsk, struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const nla[])
+{
+ struct nft_set *set;
+ struct nft_ctx ctx;
+ int err;
+
+ if (nla[NFTA_SET_TABLE] == NULL)
+ return -EINVAL;
+
+ err = nft_ctx_init_from_setattr(&ctx, skb, nlh, nla);
+ if (err < 0)
+ return err;
+
+ set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME]);
+ if (IS_ERR(set))
+ return PTR_ERR(set);
+ if (!list_empty(&set->bindings))
+ return -EBUSY;
+
+ nf_tables_set_destroy(&ctx, set);
+ return 0;
+}
+
+static int nf_tables_bind_check_setelem(const struct nft_ctx *ctx,
+ const struct nft_set *set,
+ const struct nft_set_iter *iter,
+ const struct nft_set_elem *elem)
+{
+ enum nft_registers dreg;
+
+ dreg = nft_type_to_reg(set->dtype);
+ return nft_validate_data_load(ctx, dreg, &elem->data, set->dtype);
+}
+
+int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set,
+ struct nft_set_binding *binding)
+{
+ struct nft_set_binding *i;
+ struct nft_set_iter iter;
+
+ if (!list_empty(&set->bindings) && set->flags & NFT_SET_ANONYMOUS)
+ return -EBUSY;
+
+ if (set->flags & NFT_SET_MAP) {
+ /* If the set is already bound to the same chain all
+ * jumps are already validated for that chain.
+ */
+ list_for_each_entry(i, &set->bindings, list) {
+ if (i->chain == binding->chain)
+ goto bind;
+ }
+
+ iter.skip = 0;
+ iter.count = 0;
+ iter.err = 0;
+ iter.fn = nf_tables_bind_check_setelem;
+
+ set->ops->walk(ctx, set, &iter);
+ if (iter.err < 0) {
+ /* Destroy anonymous sets if binding fails */
+ if (set->flags & NFT_SET_ANONYMOUS)
+ nf_tables_set_destroy(ctx, set);
+
+ return iter.err;
+ }
+ }
+bind:
+ binding->chain = ctx->chain;
+ list_add_tail(&binding->list, &set->bindings);
+ return 0;
+}
+
+void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
+ struct nft_set_binding *binding)
+{
+ list_del(&binding->list);
+
+ if (list_empty(&set->bindings) && set->flags & NFT_SET_ANONYMOUS)
+ nf_tables_set_destroy(ctx, set);
+}
+
+/*
+ * Set elements
+ */
+
+static const struct nla_policy nft_set_elem_policy[NFTA_SET_ELEM_MAX + 1] = {
+ [NFTA_SET_ELEM_KEY] = { .type = NLA_NESTED },
+ [NFTA_SET_ELEM_DATA] = { .type = NLA_NESTED },
+ [NFTA_SET_ELEM_FLAGS] = { .type = NLA_U32 },
+};
+
+static const struct nla_policy nft_set_elem_list_policy[NFTA_SET_ELEM_LIST_MAX + 1] = {
+ [NFTA_SET_ELEM_LIST_TABLE] = { .type = NLA_STRING },
+ [NFTA_SET_ELEM_LIST_SET] = { .type = NLA_STRING },
+ [NFTA_SET_ELEM_LIST_ELEMENTS] = { .type = NLA_NESTED },
+};
+
+static int nft_ctx_init_from_elemattr(struct nft_ctx *ctx,
+ const struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const nla[])
+{
+ const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+ const struct nft_af_info *afi;
+ const struct nft_table *table;
+ struct net *net = sock_net(skb->sk);
+
+ afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, false);
+ if (IS_ERR(afi))
+ return PTR_ERR(afi);
+
+ table = nf_tables_table_lookup(afi, nla[NFTA_SET_ELEM_LIST_TABLE]);
+ if (IS_ERR(table))
+ return PTR_ERR(table);
+
+ nft_ctx_init(ctx, skb, nlh, afi, table, NULL, nla);
+ return 0;
+}
+
+static int nf_tables_fill_setelem(struct sk_buff *skb,
+ const struct nft_set *set,
+ const struct nft_set_elem *elem)
+{
+ unsigned char *b = skb_tail_pointer(skb);
+ struct nlattr *nest;
+
+ nest = nla_nest_start(skb, NFTA_LIST_ELEM);
+ if (nest == NULL)
+ goto nla_put_failure;
+
+ if (nft_data_dump(skb, NFTA_SET_ELEM_KEY, &elem->key, NFT_DATA_VALUE,
+ set->klen) < 0)
+ goto nla_put_failure;
+
+ if (set->flags & NFT_SET_MAP &&
+ !(elem->flags & NFT_SET_ELEM_INTERVAL_END) &&
+ nft_data_dump(skb, NFTA_SET_ELEM_DATA, &elem->data,
+ set->dtype == NFT_DATA_VERDICT ? NFT_DATA_VERDICT : NFT_DATA_VALUE,
+ set->dlen) < 0)
+ goto nla_put_failure;
+
+ if (elem->flags != 0)
+ if (nla_put_be32(skb, NFTA_SET_ELEM_FLAGS, htonl(elem->flags)))
+ goto nla_put_failure;
+
+ nla_nest_end(skb, nest);
+ return 0;
+
+nla_put_failure:
+ nlmsg_trim(skb, b);
+ return -EMSGSIZE;
+}
+
+struct nft_set_dump_args {
+ const struct netlink_callback *cb;
+ struct nft_set_iter iter;
+ struct sk_buff *skb;
+};
+
+static int nf_tables_dump_setelem(const struct nft_ctx *ctx,
+ const struct nft_set *set,
+ const struct nft_set_iter *iter,
+ const struct nft_set_elem *elem)
+{
+ struct nft_set_dump_args *args;
+
+ args = container_of(iter, struct nft_set_dump_args, iter);
+ return nf_tables_fill_setelem(args->skb, set, elem);
+}
+
+static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ const struct nft_set *set;
+ struct nft_set_dump_args args;
+ struct nft_ctx ctx;
+ struct nlattr *nla[NFTA_SET_ELEM_LIST_MAX + 1];
+ struct nfgenmsg *nfmsg;
+ struct nlmsghdr *nlh;
+ struct nlattr *nest;
+ u32 portid, seq;
+ int event, err;
+
+ nfmsg = nlmsg_data(cb->nlh);
+ err = nlmsg_parse(cb->nlh, sizeof(*nfmsg), nla, NFTA_SET_ELEM_LIST_MAX,
+ nft_set_elem_list_policy);
+ if (err < 0)
+ return err;
+
+ err = nft_ctx_init_from_elemattr(&ctx, cb->skb, cb->nlh, (void *)nla);
+ if (err < 0)
+ return err;
+
+ set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]);
+ if (IS_ERR(set))
+ return PTR_ERR(set);
+
+ event = NFT_MSG_NEWSETELEM;
+ event |= NFNL_SUBSYS_NFTABLES << 8;
+ portid = NETLINK_CB(cb->skb).portid;
+ seq = cb->nlh->nlmsg_seq;
+
+ nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg),
+ NLM_F_MULTI);
+ if (nlh == NULL)
+ goto nla_put_failure;
+
+ nfmsg = nlmsg_data(nlh);
+ nfmsg->nfgen_family = NFPROTO_UNSPEC;
+ nfmsg->version = NFNETLINK_V0;
+ nfmsg->res_id = 0;
+
+ if (nla_put_string(skb, NFTA_SET_ELEM_LIST_TABLE, ctx.table->name))
+ goto nla_put_failure;
+ if (nla_put_string(skb, NFTA_SET_ELEM_LIST_SET, set->name))
+ goto nla_put_failure;
+
+ nest = nla_nest_start(skb, NFTA_SET_ELEM_LIST_ELEMENTS);
+ if (nest == NULL)
+ goto nla_put_failure;
+
+ args.cb = cb;
+ args.skb = skb;
+ args.iter.skip = cb->args[0];
+ args.iter.count = 0;
+ args.iter.err = 0;
+ args.iter.fn = nf_tables_dump_setelem;
+ set->ops->walk(&ctx, set, &args.iter);
+
+ nla_nest_end(skb, nest);
+ nlmsg_end(skb, nlh);
+
+ if (args.iter.err && args.iter.err != -EMSGSIZE)
+ return args.iter.err;
+ if (args.iter.count == cb->args[0])
+ return 0;
+
+ cb->args[0] = args.iter.count;
+ return skb->len;
+
+nla_put_failure:
+ return -ENOSPC;
+}
+
+static int nf_tables_getsetelem(struct sock *nlsk, struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const nla[])
+{
+ const struct nft_set *set;
+ struct nft_ctx ctx;
+ int err;
+
+ err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla);
+ if (err < 0)
+ return err;
+
+ set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]);
+ if (IS_ERR(set))
+ return PTR_ERR(set);
+
+ if (nlh->nlmsg_flags & NLM_F_DUMP) {
+ struct netlink_dump_control c = {
+ .dump = nf_tables_dump_set,
+ };
+ return netlink_dump_start(nlsk, skb, nlh, &c);
+ }
+ return -EOPNOTSUPP;
+}
+
+static int nft_add_set_elem(const struct nft_ctx *ctx, struct nft_set *set,
+ const struct nlattr *attr)
+{
+ struct nlattr *nla[NFTA_SET_ELEM_MAX + 1];
+ struct nft_data_desc d1, d2;
+ struct nft_set_elem elem;
+ struct nft_set_binding *binding;
+ enum nft_registers dreg;
+ int err;
+
+ err = nla_parse_nested(nla, NFTA_SET_ELEM_MAX, attr,
+ nft_set_elem_policy);
+ if (err < 0)
+ return err;
+
+ if (nla[NFTA_SET_ELEM_KEY] == NULL)
+ return -EINVAL;
+
+ elem.flags = 0;
+ if (nla[NFTA_SET_ELEM_FLAGS] != NULL) {
+ elem.flags = ntohl(nla_get_be32(nla[NFTA_SET_ELEM_FLAGS]));
+ if (elem.flags & ~NFT_SET_ELEM_INTERVAL_END)
+ return -EINVAL;
+ }
+
+ if (set->flags & NFT_SET_MAP) {
+ if (nla[NFTA_SET_ELEM_DATA] == NULL &&
+ !(elem.flags & NFT_SET_ELEM_INTERVAL_END))
+ return -EINVAL;
+ } else {
+ if (nla[NFTA_SET_ELEM_DATA] != NULL)
+ return -EINVAL;
+ }
+
+ err = nft_data_init(ctx, &elem.key, &d1, nla[NFTA_SET_ELEM_KEY]);
+ if (err < 0)
+ goto err1;
+ err = -EINVAL;
+ if (d1.type != NFT_DATA_VALUE || d1.len != set->klen)
+ goto err2;
+
+ err = -EEXIST;
+ if (set->ops->get(set, &elem) == 0)
+ goto err2;
+
+ if (nla[NFTA_SET_ELEM_DATA] != NULL) {
+ err = nft_data_init(ctx, &elem.data, &d2, nla[NFTA_SET_ELEM_DATA]);
+ if (err < 0)
+ goto err2;
+
+ err = -EINVAL;
+ if (set->dtype != NFT_DATA_VERDICT && d2.len != set->dlen)
+ goto err3;
+
+ dreg = nft_type_to_reg(set->dtype);
+ list_for_each_entry(binding, &set->bindings, list) {
+ struct nft_ctx bind_ctx = {
+ .afi = ctx->afi,
+ .table = ctx->table,
+ .chain = binding->chain,
+ };
+
+ err = nft_validate_data_load(&bind_ctx, dreg,
+ &elem.data, d2.type);
+ if (err < 0)
+ goto err3;
+ }
+ }
+
+ err = set->ops->insert(set, &elem);
+ if (err < 0)
+ goto err3;
+
+ return 0;
+
+err3:
+ if (nla[NFTA_SET_ELEM_DATA] != NULL)
+ nft_data_uninit(&elem.data, d2.type);
+err2:
+ nft_data_uninit(&elem.key, d1.type);
+err1:
+ return err;
+}
+
+static int nf_tables_newsetelem(struct sock *nlsk, struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const nla[])
+{
+ const struct nlattr *attr;
+ struct nft_set *set;
+ struct nft_ctx ctx;
+ int rem, err;
+
+ err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla);
+ if (err < 0)
+ return err;
+
+ set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]);
+ if (IS_ERR(set))
+ return PTR_ERR(set);
+ if (!list_empty(&set->bindings) && set->flags & NFT_SET_CONSTANT)
+ return -EBUSY;
+
+ nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) {
+ err = nft_add_set_elem(&ctx, set, attr);
+ if (err < 0)
+ return err;
+ }
+ return 0;
+}
+
+static int nft_del_setelem(const struct nft_ctx *ctx, struct nft_set *set,
+ const struct nlattr *attr)
+{
+ struct nlattr *nla[NFTA_SET_ELEM_MAX + 1];
+ struct nft_data_desc desc;
+ struct nft_set_elem elem;
+ int err;
+
+ err = nla_parse_nested(nla, NFTA_SET_ELEM_MAX, attr,
+ nft_set_elem_policy);
+ if (err < 0)
+ goto err1;
+
+ err = -EINVAL;
+ if (nla[NFTA_SET_ELEM_KEY] == NULL)
+ goto err1;
+
+ err = nft_data_init(ctx, &elem.key, &desc, nla[NFTA_SET_ELEM_KEY]);
+ if (err < 0)
+ goto err1;
+
+ err = -EINVAL;
+ if (desc.type != NFT_DATA_VALUE || desc.len != set->klen)
+ goto err2;
+
+ err = set->ops->get(set, &elem);
+ if (err < 0)
+ goto err2;
+
+ set->ops->remove(set, &elem);
+
+ nft_data_uninit(&elem.key, NFT_DATA_VALUE);
+ if (set->flags & NFT_SET_MAP)
+ nft_data_uninit(&elem.data, set->dtype);
+
+err2:
+ nft_data_uninit(&elem.key, desc.type);
+err1:
+ return err;
+}
+
+static int nf_tables_delsetelem(struct sock *nlsk, struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const nla[])
+{
+ const struct nlattr *attr;
+ struct nft_set *set;
+ struct nft_ctx ctx;
+ int rem, err;
+
+ err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla);
+ if (err < 0)
+ return err;
+
+ set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]);
+ if (IS_ERR(set))
+ return PTR_ERR(set);
+ if (!list_empty(&set->bindings) && set->flags & NFT_SET_CONSTANT)
+ return -EBUSY;
+
+ nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) {
+ err = nft_del_setelem(&ctx, set, attr);
+ if (err < 0)
+ return err;
+ }
+ return 0;
+}
+
+static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
+ [NFT_MSG_NEWTABLE] = {
+ .call = nf_tables_newtable,
+ .attr_count = NFTA_TABLE_MAX,
+ .policy = nft_table_policy,
+ },
+ [NFT_MSG_GETTABLE] = {
+ .call = nf_tables_gettable,
+ .attr_count = NFTA_TABLE_MAX,
+ .policy = nft_table_policy,
+ },
+ [NFT_MSG_DELTABLE] = {
+ .call = nf_tables_deltable,
+ .attr_count = NFTA_TABLE_MAX,
+ .policy = nft_table_policy,
+ },
+ [NFT_MSG_NEWCHAIN] = {
+ .call = nf_tables_newchain,
+ .attr_count = NFTA_CHAIN_MAX,
+ .policy = nft_chain_policy,
+ },
+ [NFT_MSG_GETCHAIN] = {
+ .call = nf_tables_getchain,
+ .attr_count = NFTA_CHAIN_MAX,
+ .policy = nft_chain_policy,
+ },
+ [NFT_MSG_DELCHAIN] = {
+ .call = nf_tables_delchain,
+ .attr_count = NFTA_CHAIN_MAX,
+ .policy = nft_chain_policy,
+ },
+ [NFT_MSG_NEWRULE] = {
+ .call_batch = nf_tables_newrule,
+ .attr_count = NFTA_RULE_MAX,
+ .policy = nft_rule_policy,
+ },
+ [NFT_MSG_GETRULE] = {
+ .call = nf_tables_getrule,
+ .attr_count = NFTA_RULE_MAX,
+ .policy = nft_rule_policy,
+ },
+ [NFT_MSG_DELRULE] = {
+ .call_batch = nf_tables_delrule,
+ .attr_count = NFTA_RULE_MAX,
+ .policy = nft_rule_policy,
+ },
+ [NFT_MSG_NEWSET] = {
+ .call = nf_tables_newset,
+ .attr_count = NFTA_SET_MAX,
+ .policy = nft_set_policy,
+ },
+ [NFT_MSG_GETSET] = {
+ .call = nf_tables_getset,
+ .attr_count = NFTA_SET_MAX,
+ .policy = nft_set_policy,
+ },
+ [NFT_MSG_DELSET] = {
+ .call = nf_tables_delset,
+ .attr_count = NFTA_SET_MAX,
+ .policy = nft_set_policy,
+ },
+ [NFT_MSG_NEWSETELEM] = {
+ .call = nf_tables_newsetelem,
+ .attr_count = NFTA_SET_ELEM_LIST_MAX,
+ .policy = nft_set_elem_list_policy,
+ },
+ [NFT_MSG_GETSETELEM] = {
+ .call = nf_tables_getsetelem,
+ .attr_count = NFTA_SET_ELEM_LIST_MAX,
+ .policy = nft_set_elem_list_policy,
+ },
+ [NFT_MSG_DELSETELEM] = {
+ .call = nf_tables_delsetelem,
+ .attr_count = NFTA_SET_ELEM_LIST_MAX,
+ .policy = nft_set_elem_list_policy,
+ },
+};
+
+static const struct nfnetlink_subsystem nf_tables_subsys = {
+ .name = "nf_tables",
+ .subsys_id = NFNL_SUBSYS_NFTABLES,
+ .cb_count = NFT_MSG_MAX,
+ .cb = nf_tables_cb,
+ .commit = nf_tables_commit,
+ .abort = nf_tables_abort,
+};
+
+/*
+ * Loop detection - walk through the ruleset beginning at the destination chain
+ * of a new jump until either the source chain is reached (loop) or all
+ * reachable chains have been traversed.
+ *
+ * The loop check is performed whenever a new jump verdict is added to an
+ * expression or verdict map or a verdict map is bound to a new chain.
+ */
+
+static int nf_tables_check_loops(const struct nft_ctx *ctx,
+ const struct nft_chain *chain);
+
+static int nf_tables_loop_check_setelem(const struct nft_ctx *ctx,
+ const struct nft_set *set,
+ const struct nft_set_iter *iter,
+ const struct nft_set_elem *elem)
+{
+ switch (elem->data.verdict) {
+ case NFT_JUMP:
+ case NFT_GOTO:
+ return nf_tables_check_loops(ctx, elem->data.chain);
+ default:
+ return 0;
+ }
+}
+
+static int nf_tables_check_loops(const struct nft_ctx *ctx,
+ const struct nft_chain *chain)
+{
+ const struct nft_rule *rule;
+ const struct nft_expr *expr, *last;
+ const struct nft_set *set;
+ struct nft_set_binding *binding;
+ struct nft_set_iter iter;
+
+ if (ctx->chain == chain)
+ return -ELOOP;
+
+ list_for_each_entry(rule, &chain->rules, list) {
+ nft_rule_for_each_expr(expr, last, rule) {
+ const struct nft_data *data = NULL;
+ int err;
+
+ if (!expr->ops->validate)
+ continue;
+
+ err = expr->ops->validate(ctx, expr, &data);
+ if (err < 0)
+ return err;
+
+ if (data == NULL)
+ continue;
+
+ switch (data->verdict) {
+ case NFT_JUMP:
+ case NFT_GOTO:
+ err = nf_tables_check_loops(ctx, data->chain);
+ if (err < 0)
+ return err;
+ default:
+ break;
+ }
+ }
+ }
+
+ list_for_each_entry(set, &ctx->table->sets, list) {
+ if (!(set->flags & NFT_SET_MAP) ||
+ set->dtype != NFT_DATA_VERDICT)
+ continue;
+
+ list_for_each_entry(binding, &set->bindings, list) {
+ if (binding->chain != chain)
+ continue;
+
+ iter.skip = 0;
+ iter.count = 0;
+ iter.err = 0;
+ iter.fn = nf_tables_loop_check_setelem;
+
+ set->ops->walk(ctx, set, &iter);
+ if (iter.err < 0)
+ return iter.err;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * nft_validate_input_register - validate an expressions' input register
+ *
+ * @reg: the register number
+ *
+ * Validate that the input register is one of the general purpose
+ * registers.
+ */
+int nft_validate_input_register(enum nft_registers reg)
+{
+ if (reg <= NFT_REG_VERDICT)
+ return -EINVAL;
+ if (reg > NFT_REG_MAX)
+ return -ERANGE;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nft_validate_input_register);
+
+/**
+ * nft_validate_output_register - validate an expressions' output register
+ *
+ * @reg: the register number
+ *
+ * Validate that the output register is one of the general purpose
+ * registers or the verdict register.
+ */
+int nft_validate_output_register(enum nft_registers reg)
+{
+ if (reg < NFT_REG_VERDICT)
+ return -EINVAL;
+ if (reg > NFT_REG_MAX)
+ return -ERANGE;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nft_validate_output_register);
+
+/**
+ * nft_validate_data_load - validate an expressions' data load
+ *
+ * @ctx: context of the expression performing the load
+ * @reg: the destination register number
+ * @data: the data to load
+ * @type: the data type
+ *
+ * Validate that a data load uses the appropriate data type for
+ * the destination register. A value of NULL for the data means
+ * that its runtime gathered data, which is always of type
+ * NFT_DATA_VALUE.
+ */
+int nft_validate_data_load(const struct nft_ctx *ctx, enum nft_registers reg,
+ const struct nft_data *data,
+ enum nft_data_types type)
+{
+ int err;
+
+ switch (reg) {
+ case NFT_REG_VERDICT:
+ if (data == NULL || type != NFT_DATA_VERDICT)
+ return -EINVAL;
+
+ if (data->verdict == NFT_GOTO || data->verdict == NFT_JUMP) {
+ err = nf_tables_check_loops(ctx, data->chain);
+ if (err < 0)
+ return err;
+
+ if (ctx->chain->level + 1 > data->chain->level) {
+ if (ctx->chain->level + 1 == NFT_JUMP_STACK_SIZE)
+ return -EMLINK;
+ data->chain->level = ctx->chain->level + 1;
+ }
+ }
+
+ return 0;
+ default:
+ if (data != NULL && type != NFT_DATA_VALUE)
+ return -EINVAL;
+ return 0;
+ }
+}
+EXPORT_SYMBOL_GPL(nft_validate_data_load);
+
+static const struct nla_policy nft_verdict_policy[NFTA_VERDICT_MAX + 1] = {
+ [NFTA_VERDICT_CODE] = { .type = NLA_U32 },
+ [NFTA_VERDICT_CHAIN] = { .type = NLA_STRING,
+ .len = NFT_CHAIN_MAXNAMELEN - 1 },
+};
+
+static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
+ struct nft_data_desc *desc, const struct nlattr *nla)
+{
+ struct nlattr *tb[NFTA_VERDICT_MAX + 1];
+ struct nft_chain *chain;
+ int err;
+
+ err = nla_parse_nested(tb, NFTA_VERDICT_MAX, nla, nft_verdict_policy);
+ if (err < 0)
+ return err;
+
+ if (!tb[NFTA_VERDICT_CODE])
+ return -EINVAL;
+ data->verdict = ntohl(nla_get_be32(tb[NFTA_VERDICT_CODE]));
+
+ switch (data->verdict) {
+ case NF_ACCEPT:
+ case NF_DROP:
+ case NF_QUEUE:
+ case NFT_CONTINUE:
+ case NFT_BREAK:
+ case NFT_RETURN:
+ desc->len = sizeof(data->verdict);
+ break;
+ case NFT_JUMP:
+ case NFT_GOTO:
+ if (!tb[NFTA_VERDICT_CHAIN])
+ return -EINVAL;
+ chain = nf_tables_chain_lookup(ctx->table,
+ tb[NFTA_VERDICT_CHAIN]);
+ if (IS_ERR(chain))
+ return PTR_ERR(chain);
+ if (chain->flags & NFT_BASE_CHAIN)
+ return -EOPNOTSUPP;
+
+ chain->use++;
+ data->chain = chain;
+ desc->len = sizeof(data);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ desc->type = NFT_DATA_VERDICT;
+ return 0;
+}
+
+static void nft_verdict_uninit(const struct nft_data *data)
+{
+ switch (data->verdict) {
+ case NFT_JUMP:
+ case NFT_GOTO:
+ data->chain->use--;
+ break;
+ }
+}
+
+static int nft_verdict_dump(struct sk_buff *skb, const struct nft_data *data)
+{
+ struct nlattr *nest;
+
+ nest = nla_nest_start(skb, NFTA_DATA_VERDICT);
+ if (!nest)
+ goto nla_put_failure;
+
+ if (nla_put_be32(skb, NFTA_VERDICT_CODE, htonl(data->verdict)))
+ goto nla_put_failure;
+
+ switch (data->verdict) {
+ case NFT_JUMP:
+ case NFT_GOTO:
+ if (nla_put_string(skb, NFTA_VERDICT_CHAIN, data->chain->name))
+ goto nla_put_failure;
+ }
+ nla_nest_end(skb, nest);
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+static int nft_value_init(const struct nft_ctx *ctx, struct nft_data *data,
+ struct nft_data_desc *desc, const struct nlattr *nla)
+{
+ unsigned int len;
+
+ len = nla_len(nla);
+ if (len == 0)
+ return -EINVAL;
+ if (len > sizeof(data->data))
+ return -EOVERFLOW;
+
+ nla_memcpy(data->data, nla, sizeof(data->data));
+ desc->type = NFT_DATA_VALUE;
+ desc->len = len;
+ return 0;
+}
+
+static int nft_value_dump(struct sk_buff *skb, const struct nft_data *data,
+ unsigned int len)
+{
+ return nla_put(skb, NFTA_DATA_VALUE, len, data->data);
+}
+
+static const struct nla_policy nft_data_policy[NFTA_DATA_MAX + 1] = {
+ [NFTA_DATA_VALUE] = { .type = NLA_BINARY,
+ .len = FIELD_SIZEOF(struct nft_data, data) },
+ [NFTA_DATA_VERDICT] = { .type = NLA_NESTED },
+};
+
+/**
+ * nft_data_init - parse nf_tables data netlink attributes
+ *
+ * @ctx: context of the expression using the data
+ * @data: destination struct nft_data
+ * @desc: data description
+ * @nla: netlink attribute containing data
+ *
+ * Parse the netlink data attributes and initialize a struct nft_data.
+ * The type and length of data are returned in the data description.
+ *
+ * The caller can indicate that it only wants to accept data of type
+ * NFT_DATA_VALUE by passing NULL for the ctx argument.
+ */
+int nft_data_init(const struct nft_ctx *ctx, struct nft_data *data,
+ struct nft_data_desc *desc, const struct nlattr *nla)
+{
+ struct nlattr *tb[NFTA_DATA_MAX + 1];
+ int err;
+
+ err = nla_parse_nested(tb, NFTA_DATA_MAX, nla, nft_data_policy);
+ if (err < 0)
+ return err;
+
+ if (tb[NFTA_DATA_VALUE])
+ return nft_value_init(ctx, data, desc, tb[NFTA_DATA_VALUE]);
+ if (tb[NFTA_DATA_VERDICT] && ctx != NULL)
+ return nft_verdict_init(ctx, data, desc, tb[NFTA_DATA_VERDICT]);
+ return -EINVAL;
+}
+EXPORT_SYMBOL_GPL(nft_data_init);
+
+/**
+ * nft_data_uninit - release a nft_data item
+ *
+ * @data: struct nft_data to release
+ * @type: type of data
+ *
+ * Release a nft_data item. NFT_DATA_VALUE types can be silently discarded,
+ * all others need to be released by calling this function.
+ */
+void nft_data_uninit(const struct nft_data *data, enum nft_data_types type)
+{
+ switch (type) {
+ case NFT_DATA_VALUE:
+ return;
+ case NFT_DATA_VERDICT:
+ return nft_verdict_uninit(data);
+ default:
+ WARN_ON(1);
+ }
+}
+EXPORT_SYMBOL_GPL(nft_data_uninit);
+
+int nft_data_dump(struct sk_buff *skb, int attr, const struct nft_data *data,
+ enum nft_data_types type, unsigned int len)
+{
+ struct nlattr *nest;
+ int err;
+
+ nest = nla_nest_start(skb, attr);
+ if (nest == NULL)
+ return -1;
+
+ switch (type) {
+ case NFT_DATA_VALUE:
+ err = nft_value_dump(skb, data, len);
+ break;
+ case NFT_DATA_VERDICT:
+ err = nft_verdict_dump(skb, data);
+ break;
+ default:
+ err = -EINVAL;
+ WARN_ON(1);
+ }
+
+ nla_nest_end(skb, nest);
+ return err;
+}
+EXPORT_SYMBOL_GPL(nft_data_dump);
+
+static int nf_tables_init_net(struct net *net)
+{
+ INIT_LIST_HEAD(&net->nft.af_info);
+ INIT_LIST_HEAD(&net->nft.commit_list);
+ return 0;
+}
+
+static struct pernet_operations nf_tables_net_ops = {
+ .init = nf_tables_init_net,
+};
+
+static int __init nf_tables_module_init(void)
+{
+ int err;
+
+ info = kmalloc(sizeof(struct nft_expr_info) * NFT_RULE_MAXEXPRS,
+ GFP_KERNEL);
+ if (info == NULL) {
+ err = -ENOMEM;
+ goto err1;
+ }
+
+ err = nf_tables_core_module_init();
+ if (err < 0)
+ goto err2;
+
+ err = nfnetlink_subsys_register(&nf_tables_subsys);
+ if (err < 0)
+ goto err3;
+
+ pr_info("nf_tables: (c) 2007-2009 Patrick McHardy <kaber@trash.net>\n");
+ return register_pernet_subsys(&nf_tables_net_ops);
+err3:
+ nf_tables_core_module_exit();
+err2:
+ kfree(info);
+err1:
+ return err;
+}
+
+static void __exit nf_tables_module_exit(void)
+{
+ unregister_pernet_subsys(&nf_tables_net_ops);
+ nfnetlink_subsys_unregister(&nf_tables_subsys);
+ nf_tables_core_module_exit();
+ kfree(info);
+}
+
+module_init(nf_tables_module_init);
+module_exit(nf_tables_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_NFTABLES);
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
new file mode 100644
index 0000000..cb9e685
--- /dev/null
+++ b/net/netfilter/nf_tables_core.c
@@ -0,0 +1,270 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/rculist.h>
+#include <linux/skbuff.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_log.h>
+
+static void nft_cmp_fast_eval(const struct nft_expr *expr,
+ struct nft_data data[NFT_REG_MAX + 1])
+{
+ const struct nft_cmp_fast_expr *priv = nft_expr_priv(expr);
+ u32 mask;
+
+ mask = ~0U >> (sizeof(priv->data) * BITS_PER_BYTE - priv->len);
+ if ((data[priv->sreg].data[0] & mask) == priv->data)
+ return;
+ data[NFT_REG_VERDICT].verdict = NFT_BREAK;
+}
+
+static bool nft_payload_fast_eval(const struct nft_expr *expr,
+ struct nft_data data[NFT_REG_MAX + 1],
+ const struct nft_pktinfo *pkt)
+{
+ const struct nft_payload *priv = nft_expr_priv(expr);
+ const struct sk_buff *skb = pkt->skb;
+ struct nft_data *dest = &data[priv->dreg];
+ unsigned char *ptr;
+
+ if (priv->base == NFT_PAYLOAD_NETWORK_HEADER)
+ ptr = skb_network_header(skb);
+ else
+ ptr = skb_network_header(skb) + pkt->xt.thoff;
+
+ ptr += priv->offset;
+
+ if (unlikely(ptr + priv->len >= skb_tail_pointer(skb)))
+ return false;
+
+ if (priv->len == 2)
+ *(u16 *)dest->data = *(u16 *)ptr;
+ else if (priv->len == 4)
+ *(u32 *)dest->data = *(u32 *)ptr;
+ else
+ *(u8 *)dest->data = *(u8 *)ptr;
+ return true;
+}
+
+struct nft_jumpstack {
+ const struct nft_chain *chain;
+ const struct nft_rule *rule;
+ int rulenum;
+};
+
+static inline void
+nft_chain_stats(const struct nft_chain *this, const struct nft_pktinfo *pkt,
+ struct nft_jumpstack *jumpstack, unsigned int stackptr)
+{
+ struct nft_stats __percpu *stats;
+ const struct nft_chain *chain = stackptr ? jumpstack[0].chain : this;
+
+ rcu_read_lock_bh();
+ stats = rcu_dereference(nft_base_chain(chain)->stats);
+ __this_cpu_inc(stats->pkts);
+ __this_cpu_add(stats->bytes, pkt->skb->len);
+ rcu_read_unlock_bh();
+}
+
+enum nft_trace {
+ NFT_TRACE_RULE,
+ NFT_TRACE_RETURN,
+ NFT_TRACE_POLICY,
+};
+
+static const char *const comments[] = {
+ [NFT_TRACE_RULE] = "rule",
+ [NFT_TRACE_RETURN] = "return",
+ [NFT_TRACE_POLICY] = "policy",
+};
+
+static struct nf_loginfo trace_loginfo = {
+ .type = NF_LOG_TYPE_LOG,
+ .u = {
+ .log = {
+ .level = 4,
+ .logflags = NF_LOG_MASK,
+ },
+ },
+};
+
+static inline void nft_trace_packet(const struct nft_pktinfo *pkt,
+ const struct nft_chain *chain,
+ int rulenum, enum nft_trace type)
+{
+ struct net *net = dev_net(pkt->in ? pkt->in : pkt->out);
+
+ nf_log_packet(net, pkt->xt.family, pkt->hooknum, pkt->skb, pkt->in,
+ pkt->out, &trace_loginfo, "TRACE: %s:%s:%s:%u ",
+ chain->table->name, chain->name, comments[type],
+ rulenum);
+}
+
+unsigned int
+nft_do_chain_pktinfo(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops)
+{
+ const struct nft_chain *chain = ops->priv;
+ const struct nft_rule *rule;
+ const struct nft_expr *expr, *last;
+ struct nft_data data[NFT_REG_MAX + 1];
+ unsigned int stackptr = 0;
+ struct nft_jumpstack jumpstack[NFT_JUMP_STACK_SIZE];
+ int rulenum = 0;
+ /*
+ * Cache cursor to avoid problems in case that the cursor is updated
+ * while traversing the ruleset.
+ */
+ unsigned int gencursor = ACCESS_ONCE(chain->net->nft.gencursor);
+
+do_chain:
+ rule = list_entry(&chain->rules, struct nft_rule, list);
+next_rule:
+ data[NFT_REG_VERDICT].verdict = NFT_CONTINUE;
+ list_for_each_entry_continue_rcu(rule, &chain->rules, list) {
+
+ /* This rule is not active, skip. */
+ if (unlikely(rule->genmask & (1 << gencursor)))
+ continue;
+
+ rulenum++;
+
+ nft_rule_for_each_expr(expr, last, rule) {
+ if (expr->ops == &nft_cmp_fast_ops)
+ nft_cmp_fast_eval(expr, data);
+ else if (expr->ops != &nft_payload_fast_ops ||
+ !nft_payload_fast_eval(expr, data, pkt))
+ expr->ops->eval(expr, data, pkt);
+
+ if (data[NFT_REG_VERDICT].verdict != NFT_CONTINUE)
+ break;
+ }
+
+ switch (data[NFT_REG_VERDICT].verdict) {
+ case NFT_BREAK:
+ data[NFT_REG_VERDICT].verdict = NFT_CONTINUE;
+ /* fall through */
+ case NFT_CONTINUE:
+ continue;
+ }
+ break;
+ }
+
+ switch (data[NFT_REG_VERDICT].verdict) {
+ case NF_ACCEPT:
+ case NF_DROP:
+ case NF_QUEUE:
+ if (unlikely(pkt->skb->nf_trace))
+ nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE);
+
+ return data[NFT_REG_VERDICT].verdict;
+ case NFT_JUMP:
+ if (unlikely(pkt->skb->nf_trace))
+ nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE);
+
+ BUG_ON(stackptr >= NFT_JUMP_STACK_SIZE);
+ jumpstack[stackptr].chain = chain;
+ jumpstack[stackptr].rule = rule;
+ jumpstack[stackptr].rulenum = rulenum;
+ stackptr++;
+ /* fall through */
+ case NFT_GOTO:
+ chain = data[NFT_REG_VERDICT].chain;
+ goto do_chain;
+ case NFT_RETURN:
+ if (unlikely(pkt->skb->nf_trace))
+ nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RETURN);
+
+ /* fall through */
+ case NFT_CONTINUE:
+ break;
+ default:
+ WARN_ON(1);
+ }
+
+ if (stackptr > 0) {
+ if (unlikely(pkt->skb->nf_trace))
+ nft_trace_packet(pkt, chain, ++rulenum, NFT_TRACE_RETURN);
+
+ stackptr--;
+ chain = jumpstack[stackptr].chain;
+ rule = jumpstack[stackptr].rule;
+ rulenum = jumpstack[stackptr].rulenum;
+ goto next_rule;
+ }
+ nft_chain_stats(chain, pkt, jumpstack, stackptr);
+
+ if (unlikely(pkt->skb->nf_trace))
+ nft_trace_packet(pkt, chain, ++rulenum, NFT_TRACE_POLICY);
+
+ return nft_base_chain(chain)->policy;
+}
+EXPORT_SYMBOL_GPL(nft_do_chain_pktinfo);
+
+int __init nf_tables_core_module_init(void)
+{
+ int err;
+
+ err = nft_immediate_module_init();
+ if (err < 0)
+ goto err1;
+
+ err = nft_cmp_module_init();
+ if (err < 0)
+ goto err2;
+
+ err = nft_lookup_module_init();
+ if (err < 0)
+ goto err3;
+
+ err = nft_bitwise_module_init();
+ if (err < 0)
+ goto err4;
+
+ err = nft_byteorder_module_init();
+ if (err < 0)
+ goto err5;
+
+ err = nft_payload_module_init();
+ if (err < 0)
+ goto err6;
+
+ return 0;
+
+err6:
+ nft_byteorder_module_exit();
+err5:
+ nft_bitwise_module_exit();
+err4:
+ nft_lookup_module_exit();
+err3:
+ nft_cmp_module_exit();
+err2:
+ nft_immediate_module_exit();
+err1:
+ return err;
+}
+
+void nf_tables_core_module_exit(void)
+{
+ nft_payload_module_exit();
+ nft_byteorder_module_exit();
+ nft_bitwise_module_exit();
+ nft_lookup_module_exit();
+ nft_cmp_module_exit();
+ nft_immediate_module_exit();
+}
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 572d87d..027f16a 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -147,9 +147,6 @@
const struct nfnetlink_subsystem *ss;
int type, err;
- if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
- return -EPERM;
-
/* All the messages must at least contain nfgenmsg */
if (nlmsg_len(nlh) < sizeof(struct nfgenmsg))
return 0;
@@ -217,9 +214,179 @@
}
}
+static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh,
+ u_int16_t subsys_id)
+{
+ struct sk_buff *nskb, *oskb = skb;
+ struct net *net = sock_net(skb->sk);
+ const struct nfnetlink_subsystem *ss;
+ const struct nfnl_callback *nc;
+ bool success = true, done = false;
+ int err;
+
+ if (subsys_id >= NFNL_SUBSYS_COUNT)
+ return netlink_ack(skb, nlh, -EINVAL);
+replay:
+ nskb = netlink_skb_clone(oskb, GFP_KERNEL);
+ if (!nskb)
+ return netlink_ack(oskb, nlh, -ENOMEM);
+
+ nskb->sk = oskb->sk;
+ skb = nskb;
+
+ nfnl_lock(subsys_id);
+ ss = rcu_dereference_protected(table[subsys_id].subsys,
+ lockdep_is_held(&table[subsys_id].mutex));
+ if (!ss) {
+#ifdef CONFIG_MODULES
+ nfnl_unlock(subsys_id);
+ request_module("nfnetlink-subsys-%d", subsys_id);
+ nfnl_lock(subsys_id);
+ ss = rcu_dereference_protected(table[subsys_id].subsys,
+ lockdep_is_held(&table[subsys_id].mutex));
+ if (!ss)
+#endif
+ {
+ nfnl_unlock(subsys_id);
+ kfree_skb(nskb);
+ return netlink_ack(skb, nlh, -EOPNOTSUPP);
+ }
+ }
+
+ if (!ss->commit || !ss->abort) {
+ nfnl_unlock(subsys_id);
+ kfree_skb(nskb);
+ return netlink_ack(skb, nlh, -EOPNOTSUPP);
+ }
+
+ while (skb->len >= nlmsg_total_size(0)) {
+ int msglen, type;
+
+ nlh = nlmsg_hdr(skb);
+ err = 0;
+
+ if (nlh->nlmsg_len < NLMSG_HDRLEN) {
+ err = -EINVAL;
+ goto ack;
+ }
+
+ /* Only requests are handled by the kernel */
+ if (!(nlh->nlmsg_flags & NLM_F_REQUEST)) {
+ err = -EINVAL;
+ goto ack;
+ }
+
+ type = nlh->nlmsg_type;
+ if (type == NFNL_MSG_BATCH_BEGIN) {
+ /* Malformed: Batch begin twice */
+ success = false;
+ goto done;
+ } else if (type == NFNL_MSG_BATCH_END) {
+ done = true;
+ goto done;
+ } else if (type < NLMSG_MIN_TYPE) {
+ err = -EINVAL;
+ goto ack;
+ }
+
+ /* We only accept a batch with messages for the same
+ * subsystem.
+ */
+ if (NFNL_SUBSYS_ID(type) != subsys_id) {
+ err = -EINVAL;
+ goto ack;
+ }
+
+ nc = nfnetlink_find_client(type, ss);
+ if (!nc) {
+ err = -EINVAL;
+ goto ack;
+ }
+
+ {
+ int min_len = nlmsg_total_size(sizeof(struct nfgenmsg));
+ u_int8_t cb_id = NFNL_MSG_TYPE(nlh->nlmsg_type);
+ struct nlattr *cda[ss->cb[cb_id].attr_count + 1];
+ struct nlattr *attr = (void *)nlh + min_len;
+ int attrlen = nlh->nlmsg_len - min_len;
+
+ err = nla_parse(cda, ss->cb[cb_id].attr_count,
+ attr, attrlen, ss->cb[cb_id].policy);
+ if (err < 0)
+ goto ack;
+
+ if (nc->call_batch) {
+ err = nc->call_batch(net->nfnl, skb, nlh,
+ (const struct nlattr **)cda);
+ }
+
+ /* The lock was released to autoload some module, we
+ * have to abort and start from scratch using the
+ * original skb.
+ */
+ if (err == -EAGAIN) {
+ ss->abort(skb);
+ nfnl_unlock(subsys_id);
+ kfree_skb(nskb);
+ goto replay;
+ }
+ }
+ack:
+ if (nlh->nlmsg_flags & NLM_F_ACK || err) {
+ /* We don't stop processing the batch on errors, thus,
+ * userspace gets all the errors that the batch
+ * triggers.
+ */
+ netlink_ack(skb, nlh, err);
+ if (err)
+ success = false;
+ }
+
+ msglen = NLMSG_ALIGN(nlh->nlmsg_len);
+ if (msglen > skb->len)
+ msglen = skb->len;
+ skb_pull(skb, msglen);
+ }
+done:
+ if (success && done)
+ ss->commit(skb);
+ else
+ ss->abort(skb);
+
+ nfnl_unlock(subsys_id);
+ kfree_skb(nskb);
+}
+
static void nfnetlink_rcv(struct sk_buff *skb)
{
- netlink_rcv_skb(skb, &nfnetlink_rcv_msg);
+ struct nlmsghdr *nlh = nlmsg_hdr(skb);
+ struct net *net = sock_net(skb->sk);
+ int msglen;
+
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
+ return netlink_ack(skb, nlh, -EPERM);
+
+ if (nlh->nlmsg_len < NLMSG_HDRLEN ||
+ skb->len < nlh->nlmsg_len)
+ return;
+
+ if (nlh->nlmsg_type == NFNL_MSG_BATCH_BEGIN) {
+ struct nfgenmsg *nfgenmsg;
+
+ msglen = NLMSG_ALIGN(nlh->nlmsg_len);
+ if (msglen > skb->len)
+ msglen = skb->len;
+
+ if (nlh->nlmsg_len < NLMSG_HDRLEN ||
+ skb->len < NLMSG_HDRLEN + sizeof(struct nfgenmsg))
+ return;
+
+ nfgenmsg = nlmsg_data(nlh);
+ skb_pull(skb, msglen);
+ nfnetlink_rcv_batch(skb, nlh, nfgenmsg->res_id);
+ } else {
+ netlink_rcv_skb(skb, &nfnetlink_rcv_msg);
+ }
}
#ifdef CONFIG_MODULES
diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c
new file mode 100644
index 0000000..4fb6ee2
--- /dev/null
+++ b/net/netfilter/nft_bitwise.c
@@ -0,0 +1,146 @@
+/*
+ * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_tables.h>
+
+struct nft_bitwise {
+ enum nft_registers sreg:8;
+ enum nft_registers dreg:8;
+ u8 len;
+ struct nft_data mask;
+ struct nft_data xor;
+};
+
+static void nft_bitwise_eval(const struct nft_expr *expr,
+ struct nft_data data[NFT_REG_MAX + 1],
+ const struct nft_pktinfo *pkt)
+{
+ const struct nft_bitwise *priv = nft_expr_priv(expr);
+ const struct nft_data *src = &data[priv->sreg];
+ struct nft_data *dst = &data[priv->dreg];
+ unsigned int i;
+
+ for (i = 0; i < DIV_ROUND_UP(priv->len, 4); i++) {
+ dst->data[i] = (src->data[i] & priv->mask.data[i]) ^
+ priv->xor.data[i];
+ }
+}
+
+static const struct nla_policy nft_bitwise_policy[NFTA_BITWISE_MAX + 1] = {
+ [NFTA_BITWISE_SREG] = { .type = NLA_U32 },
+ [NFTA_BITWISE_DREG] = { .type = NLA_U32 },
+ [NFTA_BITWISE_LEN] = { .type = NLA_U32 },
+ [NFTA_BITWISE_MASK] = { .type = NLA_NESTED },
+ [NFTA_BITWISE_XOR] = { .type = NLA_NESTED },
+};
+
+static int nft_bitwise_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_bitwise *priv = nft_expr_priv(expr);
+ struct nft_data_desc d1, d2;
+ int err;
+
+ if (tb[NFTA_BITWISE_SREG] == NULL ||
+ tb[NFTA_BITWISE_DREG] == NULL ||
+ tb[NFTA_BITWISE_LEN] == NULL ||
+ tb[NFTA_BITWISE_MASK] == NULL ||
+ tb[NFTA_BITWISE_XOR] == NULL)
+ return -EINVAL;
+
+ priv->sreg = ntohl(nla_get_be32(tb[NFTA_BITWISE_SREG]));
+ err = nft_validate_input_register(priv->sreg);
+ if (err < 0)
+ return err;
+
+ priv->dreg = ntohl(nla_get_be32(tb[NFTA_BITWISE_DREG]));
+ err = nft_validate_output_register(priv->dreg);
+ if (err < 0)
+ return err;
+ err = nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE);
+ if (err < 0)
+ return err;
+
+ priv->len = ntohl(nla_get_be32(tb[NFTA_BITWISE_LEN]));
+
+ err = nft_data_init(NULL, &priv->mask, &d1, tb[NFTA_BITWISE_MASK]);
+ if (err < 0)
+ return err;
+ if (d1.len != priv->len)
+ return -EINVAL;
+
+ err = nft_data_init(NULL, &priv->xor, &d2, tb[NFTA_BITWISE_XOR]);
+ if (err < 0)
+ return err;
+ if (d2.len != priv->len)
+ return -EINVAL;
+
+ return 0;
+}
+
+static int nft_bitwise_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ const struct nft_bitwise *priv = nft_expr_priv(expr);
+
+ if (nla_put_be32(skb, NFTA_BITWISE_SREG, htonl(priv->sreg)))
+ goto nla_put_failure;
+ if (nla_put_be32(skb, NFTA_BITWISE_DREG, htonl(priv->dreg)))
+ goto nla_put_failure;
+ if (nla_put_be32(skb, NFTA_BITWISE_LEN, htonl(priv->len)))
+ goto nla_put_failure;
+
+ if (nft_data_dump(skb, NFTA_BITWISE_MASK, &priv->mask,
+ NFT_DATA_VALUE, priv->len) < 0)
+ goto nla_put_failure;
+
+ if (nft_data_dump(skb, NFTA_BITWISE_XOR, &priv->xor,
+ NFT_DATA_VALUE, priv->len) < 0)
+ goto nla_put_failure;
+
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+static struct nft_expr_type nft_bitwise_type;
+static const struct nft_expr_ops nft_bitwise_ops = {
+ .type = &nft_bitwise_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_bitwise)),
+ .eval = nft_bitwise_eval,
+ .init = nft_bitwise_init,
+ .dump = nft_bitwise_dump,
+};
+
+static struct nft_expr_type nft_bitwise_type __read_mostly = {
+ .name = "bitwise",
+ .ops = &nft_bitwise_ops,
+ .policy = nft_bitwise_policy,
+ .maxattr = NFTA_BITWISE_MAX,
+ .owner = THIS_MODULE,
+};
+
+int __init nft_bitwise_module_init(void)
+{
+ return nft_register_expr(&nft_bitwise_type);
+}
+
+void nft_bitwise_module_exit(void)
+{
+ nft_unregister_expr(&nft_bitwise_type);
+}
diff --git a/net/netfilter/nft_byteorder.c b/net/netfilter/nft_byteorder.c
new file mode 100644
index 0000000..c39ed8d
--- /dev/null
+++ b/net/netfilter/nft_byteorder.c
@@ -0,0 +1,173 @@
+/*
+ * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_tables.h>
+
+struct nft_byteorder {
+ enum nft_registers sreg:8;
+ enum nft_registers dreg:8;
+ enum nft_byteorder_ops op:8;
+ u8 len;
+ u8 size;
+};
+
+static void nft_byteorder_eval(const struct nft_expr *expr,
+ struct nft_data data[NFT_REG_MAX + 1],
+ const struct nft_pktinfo *pkt)
+{
+ const struct nft_byteorder *priv = nft_expr_priv(expr);
+ struct nft_data *src = &data[priv->sreg], *dst = &data[priv->dreg];
+ union { u32 u32; u16 u16; } *s, *d;
+ unsigned int i;
+
+ s = (void *)src->data;
+ d = (void *)dst->data;
+
+ switch (priv->size) {
+ case 4:
+ switch (priv->op) {
+ case NFT_BYTEORDER_NTOH:
+ for (i = 0; i < priv->len / 4; i++)
+ d[i].u32 = ntohl((__force __be32)s[i].u32);
+ break;
+ case NFT_BYTEORDER_HTON:
+ for (i = 0; i < priv->len / 4; i++)
+ d[i].u32 = (__force __u32)htonl(s[i].u32);
+ break;
+ }
+ break;
+ case 2:
+ switch (priv->op) {
+ case NFT_BYTEORDER_NTOH:
+ for (i = 0; i < priv->len / 2; i++)
+ d[i].u16 = ntohs((__force __be16)s[i].u16);
+ break;
+ case NFT_BYTEORDER_HTON:
+ for (i = 0; i < priv->len / 2; i++)
+ d[i].u16 = (__force __u16)htons(s[i].u16);
+ break;
+ }
+ break;
+ }
+}
+
+static const struct nla_policy nft_byteorder_policy[NFTA_BYTEORDER_MAX + 1] = {
+ [NFTA_BYTEORDER_SREG] = { .type = NLA_U32 },
+ [NFTA_BYTEORDER_DREG] = { .type = NLA_U32 },
+ [NFTA_BYTEORDER_OP] = { .type = NLA_U32 },
+ [NFTA_BYTEORDER_LEN] = { .type = NLA_U32 },
+ [NFTA_BYTEORDER_SIZE] = { .type = NLA_U32 },
+};
+
+static int nft_byteorder_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_byteorder *priv = nft_expr_priv(expr);
+ int err;
+
+ if (tb[NFTA_BYTEORDER_SREG] == NULL ||
+ tb[NFTA_BYTEORDER_DREG] == NULL ||
+ tb[NFTA_BYTEORDER_LEN] == NULL ||
+ tb[NFTA_BYTEORDER_SIZE] == NULL ||
+ tb[NFTA_BYTEORDER_OP] == NULL)
+ return -EINVAL;
+
+ priv->sreg = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_SREG]));
+ err = nft_validate_input_register(priv->sreg);
+ if (err < 0)
+ return err;
+
+ priv->dreg = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_DREG]));
+ err = nft_validate_output_register(priv->dreg);
+ if (err < 0)
+ return err;
+ err = nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE);
+ if (err < 0)
+ return err;
+
+ priv->op = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_OP]));
+ switch (priv->op) {
+ case NFT_BYTEORDER_NTOH:
+ case NFT_BYTEORDER_HTON:
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ priv->len = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_LEN]));
+ if (priv->len == 0 || priv->len > FIELD_SIZEOF(struct nft_data, data))
+ return -EINVAL;
+
+ priv->size = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_SIZE]));
+ switch (priv->size) {
+ case 2:
+ case 4:
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int nft_byteorder_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ const struct nft_byteorder *priv = nft_expr_priv(expr);
+
+ if (nla_put_be32(skb, NFTA_BYTEORDER_SREG, htonl(priv->sreg)))
+ goto nla_put_failure;
+ if (nla_put_be32(skb, NFTA_BYTEORDER_DREG, htonl(priv->dreg)))
+ goto nla_put_failure;
+ if (nla_put_be32(skb, NFTA_BYTEORDER_OP, htonl(priv->op)))
+ goto nla_put_failure;
+ if (nla_put_be32(skb, NFTA_BYTEORDER_LEN, htonl(priv->len)))
+ goto nla_put_failure;
+ if (nla_put_be32(skb, NFTA_BYTEORDER_SIZE, htonl(priv->size)))
+ goto nla_put_failure;
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+static struct nft_expr_type nft_byteorder_type;
+static const struct nft_expr_ops nft_byteorder_ops = {
+ .type = &nft_byteorder_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_byteorder)),
+ .eval = nft_byteorder_eval,
+ .init = nft_byteorder_init,
+ .dump = nft_byteorder_dump,
+};
+
+static struct nft_expr_type nft_byteorder_type __read_mostly = {
+ .name = "byteorder",
+ .ops = &nft_byteorder_ops,
+ .policy = nft_byteorder_policy,
+ .maxattr = NFTA_BYTEORDER_MAX,
+ .owner = THIS_MODULE,
+};
+
+int __init nft_byteorder_module_init(void)
+{
+ return nft_register_expr(&nft_byteorder_type);
+}
+
+void nft_byteorder_module_exit(void)
+{
+ nft_unregister_expr(&nft_byteorder_type);
+}
diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c
new file mode 100644
index 0000000..954925d
--- /dev/null
+++ b/net/netfilter/nft_cmp.c
@@ -0,0 +1,223 @@
+/*
+ * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_tables.h>
+
+struct nft_cmp_expr {
+ struct nft_data data;
+ enum nft_registers sreg:8;
+ u8 len;
+ enum nft_cmp_ops op:8;
+};
+
+static void nft_cmp_eval(const struct nft_expr *expr,
+ struct nft_data data[NFT_REG_MAX + 1],
+ const struct nft_pktinfo *pkt)
+{
+ const struct nft_cmp_expr *priv = nft_expr_priv(expr);
+ int d;
+
+ d = nft_data_cmp(&data[priv->sreg], &priv->data, priv->len);
+ switch (priv->op) {
+ case NFT_CMP_EQ:
+ if (d != 0)
+ goto mismatch;
+ break;
+ case NFT_CMP_NEQ:
+ if (d == 0)
+ goto mismatch;
+ break;
+ case NFT_CMP_LT:
+ if (d == 0)
+ goto mismatch;
+ case NFT_CMP_LTE:
+ if (d > 0)
+ goto mismatch;
+ break;
+ case NFT_CMP_GT:
+ if (d == 0)
+ goto mismatch;
+ case NFT_CMP_GTE:
+ if (d < 0)
+ goto mismatch;
+ break;
+ }
+ return;
+
+mismatch:
+ data[NFT_REG_VERDICT].verdict = NFT_BREAK;
+}
+
+static const struct nla_policy nft_cmp_policy[NFTA_CMP_MAX + 1] = {
+ [NFTA_CMP_SREG] = { .type = NLA_U32 },
+ [NFTA_CMP_OP] = { .type = NLA_U32 },
+ [NFTA_CMP_DATA] = { .type = NLA_NESTED },
+};
+
+static int nft_cmp_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_cmp_expr *priv = nft_expr_priv(expr);
+ struct nft_data_desc desc;
+ int err;
+
+ priv->sreg = ntohl(nla_get_be32(tb[NFTA_CMP_SREG]));
+ priv->op = ntohl(nla_get_be32(tb[NFTA_CMP_OP]));
+
+ err = nft_data_init(NULL, &priv->data, &desc, tb[NFTA_CMP_DATA]);
+ BUG_ON(err < 0);
+
+ priv->len = desc.len;
+ return 0;
+}
+
+static int nft_cmp_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ const struct nft_cmp_expr *priv = nft_expr_priv(expr);
+
+ if (nla_put_be32(skb, NFTA_CMP_SREG, htonl(priv->sreg)))
+ goto nla_put_failure;
+ if (nla_put_be32(skb, NFTA_CMP_OP, htonl(priv->op)))
+ goto nla_put_failure;
+
+ if (nft_data_dump(skb, NFTA_CMP_DATA, &priv->data,
+ NFT_DATA_VALUE, priv->len) < 0)
+ goto nla_put_failure;
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+static struct nft_expr_type nft_cmp_type;
+static const struct nft_expr_ops nft_cmp_ops = {
+ .type = &nft_cmp_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_cmp_expr)),
+ .eval = nft_cmp_eval,
+ .init = nft_cmp_init,
+ .dump = nft_cmp_dump,
+};
+
+static int nft_cmp_fast_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_cmp_fast_expr *priv = nft_expr_priv(expr);
+ struct nft_data_desc desc;
+ struct nft_data data;
+ u32 mask;
+ int err;
+
+ priv->sreg = ntohl(nla_get_be32(tb[NFTA_CMP_SREG]));
+
+ err = nft_data_init(NULL, &data, &desc, tb[NFTA_CMP_DATA]);
+ BUG_ON(err < 0);
+ desc.len *= BITS_PER_BYTE;
+
+ mask = ~0U >> (sizeof(priv->data) * BITS_PER_BYTE - desc.len);
+ priv->data = data.data[0] & mask;
+ priv->len = desc.len;
+ return 0;
+}
+
+static int nft_cmp_fast_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ const struct nft_cmp_fast_expr *priv = nft_expr_priv(expr);
+ struct nft_data data;
+
+ if (nla_put_be32(skb, NFTA_CMP_SREG, htonl(priv->sreg)))
+ goto nla_put_failure;
+ if (nla_put_be32(skb, NFTA_CMP_OP, htonl(NFT_CMP_EQ)))
+ goto nla_put_failure;
+
+ data.data[0] = priv->data;
+ if (nft_data_dump(skb, NFTA_CMP_DATA, &data,
+ NFT_DATA_VALUE, priv->len / BITS_PER_BYTE) < 0)
+ goto nla_put_failure;
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+const struct nft_expr_ops nft_cmp_fast_ops = {
+ .type = &nft_cmp_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_cmp_fast_expr)),
+ .eval = NULL, /* inlined */
+ .init = nft_cmp_fast_init,
+ .dump = nft_cmp_fast_dump,
+};
+
+static const struct nft_expr_ops *
+nft_cmp_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[])
+{
+ struct nft_data_desc desc;
+ struct nft_data data;
+ enum nft_registers sreg;
+ enum nft_cmp_ops op;
+ int err;
+
+ if (tb[NFTA_CMP_SREG] == NULL ||
+ tb[NFTA_CMP_OP] == NULL ||
+ tb[NFTA_CMP_DATA] == NULL)
+ return ERR_PTR(-EINVAL);
+
+ sreg = ntohl(nla_get_be32(tb[NFTA_CMP_SREG]));
+ err = nft_validate_input_register(sreg);
+ if (err < 0)
+ return ERR_PTR(err);
+
+ op = ntohl(nla_get_be32(tb[NFTA_CMP_OP]));
+ switch (op) {
+ case NFT_CMP_EQ:
+ case NFT_CMP_NEQ:
+ case NFT_CMP_LT:
+ case NFT_CMP_LTE:
+ case NFT_CMP_GT:
+ case NFT_CMP_GTE:
+ break;
+ default:
+ return ERR_PTR(-EINVAL);
+ }
+
+ err = nft_data_init(NULL, &data, &desc, tb[NFTA_CMP_DATA]);
+ if (err < 0)
+ return ERR_PTR(err);
+
+ if (desc.len <= sizeof(u32) && op == NFT_CMP_EQ)
+ return &nft_cmp_fast_ops;
+ else
+ return &nft_cmp_ops;
+}
+
+static struct nft_expr_type nft_cmp_type __read_mostly = {
+ .name = "cmp",
+ .select_ops = nft_cmp_select_ops,
+ .policy = nft_cmp_policy,
+ .maxattr = NFTA_CMP_MAX,
+ .owner = THIS_MODULE,
+};
+
+int __init nft_cmp_module_init(void)
+{
+ return nft_register_expr(&nft_cmp_type);
+}
+
+void nft_cmp_module_exit(void)
+{
+ nft_unregister_expr(&nft_cmp_type);
+}
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
new file mode 100644
index 0000000..4811f76
--- /dev/null
+++ b/net/netfilter/nft_compat.c
@@ -0,0 +1,768 @@
+/*
+ * (C) 2012-2013 by Pablo Neira Ayuso <pablo@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This software has been sponsored by Sophos Astaro <http://www.sophos.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nf_tables.h>
+#include <linux/netfilter/nf_tables_compat.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <asm/uaccess.h> /* for set_fs */
+#include <net/netfilter/nf_tables.h>
+
+union nft_entry {
+ struct ipt_entry e4;
+ struct ip6t_entry e6;
+};
+
+static inline void
+nft_compat_set_par(struct xt_action_param *par, void *xt, const void *xt_info)
+{
+ par->target = xt;
+ par->targinfo = xt_info;
+ par->hotdrop = false;
+}
+
+static void nft_target_eval(const struct nft_expr *expr,
+ struct nft_data data[NFT_REG_MAX + 1],
+ const struct nft_pktinfo *pkt)
+{
+ void *info = nft_expr_priv(expr);
+ struct xt_target *target = expr->ops->data;
+ struct sk_buff *skb = pkt->skb;
+ int ret;
+
+ nft_compat_set_par((struct xt_action_param *)&pkt->xt, target, info);
+
+ ret = target->target(skb, &pkt->xt);
+
+ if (pkt->xt.hotdrop)
+ ret = NF_DROP;
+
+ switch(ret) {
+ case XT_CONTINUE:
+ data[NFT_REG_VERDICT].verdict = NFT_CONTINUE;
+ break;
+ default:
+ data[NFT_REG_VERDICT].verdict = ret;
+ break;
+ }
+ return;
+}
+
+static const struct nla_policy nft_target_policy[NFTA_TARGET_MAX + 1] = {
+ [NFTA_TARGET_NAME] = { .type = NLA_NUL_STRING },
+ [NFTA_TARGET_REV] = { .type = NLA_U32 },
+ [NFTA_TARGET_INFO] = { .type = NLA_BINARY },
+};
+
+static void
+nft_target_set_tgchk_param(struct xt_tgchk_param *par,
+ const struct nft_ctx *ctx,
+ struct xt_target *target, void *info,
+ union nft_entry *entry, u8 proto, bool inv)
+{
+ par->net = &init_net;
+ par->table = ctx->table->name;
+ switch (ctx->afi->family) {
+ case AF_INET:
+ entry->e4.ip.proto = proto;
+ entry->e4.ip.invflags = inv ? IPT_INV_PROTO : 0;
+ break;
+ case AF_INET6:
+ entry->e6.ipv6.proto = proto;
+ entry->e6.ipv6.invflags = inv ? IP6T_INV_PROTO : 0;
+ break;
+ }
+ par->entryinfo = entry;
+ par->target = target;
+ par->targinfo = info;
+ if (ctx->chain->flags & NFT_BASE_CHAIN) {
+ const struct nft_base_chain *basechain =
+ nft_base_chain(ctx->chain);
+ const struct nf_hook_ops *ops = &basechain->ops;
+
+ par->hook_mask = 1 << ops->hooknum;
+ }
+ par->family = ctx->afi->family;
+}
+
+static void target_compat_from_user(struct xt_target *t, void *in, void *out)
+{
+#ifdef CONFIG_COMPAT
+ if (t->compat_from_user) {
+ int pad;
+
+ t->compat_from_user(out, in);
+ pad = XT_ALIGN(t->targetsize) - t->targetsize;
+ if (pad > 0)
+ memset(out + t->targetsize, 0, pad);
+ } else
+#endif
+ memcpy(out, in, XT_ALIGN(t->targetsize));
+}
+
+static inline int nft_compat_target_offset(struct xt_target *target)
+{
+#ifdef CONFIG_COMPAT
+ return xt_compat_target_offset(target);
+#else
+ return 0;
+#endif
+}
+
+static const struct nla_policy nft_rule_compat_policy[NFTA_RULE_COMPAT_MAX + 1] = {
+ [NFTA_RULE_COMPAT_PROTO] = { .type = NLA_U32 },
+ [NFTA_RULE_COMPAT_FLAGS] = { .type = NLA_U32 },
+};
+
+static u8 nft_parse_compat(const struct nlattr *attr, bool *inv)
+{
+ struct nlattr *tb[NFTA_RULE_COMPAT_MAX+1];
+ u32 flags;
+ int err;
+
+ err = nla_parse_nested(tb, NFTA_RULE_COMPAT_MAX, attr,
+ nft_rule_compat_policy);
+ if (err < 0)
+ return err;
+
+ if (!tb[NFTA_RULE_COMPAT_PROTO] || !tb[NFTA_RULE_COMPAT_FLAGS])
+ return -EINVAL;
+
+ flags = ntohl(nla_get_be32(tb[NFTA_RULE_COMPAT_FLAGS]));
+ if (flags & ~NFT_RULE_COMPAT_F_MASK)
+ return -EINVAL;
+ if (flags & NFT_RULE_COMPAT_F_INV)
+ *inv = true;
+
+ return ntohl(nla_get_be32(tb[NFTA_RULE_COMPAT_PROTO]));
+}
+
+static int
+nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ void *info = nft_expr_priv(expr);
+ struct xt_target *target = expr->ops->data;
+ struct xt_tgchk_param par;
+ size_t size = XT_ALIGN(nla_len(tb[NFTA_TARGET_INFO]));
+ u8 proto = 0;
+ bool inv = false;
+ union nft_entry e = {};
+ int ret;
+
+ target_compat_from_user(target, nla_data(tb[NFTA_TARGET_INFO]), info);
+
+ if (ctx->nla[NFTA_RULE_COMPAT])
+ proto = nft_parse_compat(ctx->nla[NFTA_RULE_COMPAT], &inv);
+
+ nft_target_set_tgchk_param(&par, ctx, target, info, &e, proto, inv);
+
+ ret = xt_check_target(&par, size, proto, inv);
+ if (ret < 0)
+ goto err;
+
+ /* The standard target cannot be used */
+ if (target->target == NULL) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ return 0;
+err:
+ module_put(target->me);
+ return ret;
+}
+
+static void
+nft_target_destroy(const struct nft_expr *expr)
+{
+ struct xt_target *target = expr->ops->data;
+
+ module_put(target->me);
+}
+
+static int
+target_dump_info(struct sk_buff *skb, const struct xt_target *t, const void *in)
+{
+ int ret;
+
+#ifdef CONFIG_COMPAT
+ if (t->compat_to_user) {
+ mm_segment_t old_fs;
+ void *out;
+
+ out = kmalloc(XT_ALIGN(t->targetsize), GFP_ATOMIC);
+ if (out == NULL)
+ return -ENOMEM;
+
+ /* We want to reuse existing compat_to_user */
+ old_fs = get_fs();
+ set_fs(KERNEL_DS);
+ t->compat_to_user(out, in);
+ set_fs(old_fs);
+ ret = nla_put(skb, NFTA_TARGET_INFO, XT_ALIGN(t->targetsize), out);
+ kfree(out);
+ } else
+#endif
+ ret = nla_put(skb, NFTA_TARGET_INFO, XT_ALIGN(t->targetsize), in);
+
+ return ret;
+}
+
+static int nft_target_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ const struct xt_target *target = expr->ops->data;
+ void *info = nft_expr_priv(expr);
+
+ if (nla_put_string(skb, NFTA_TARGET_NAME, target->name) ||
+ nla_put_be32(skb, NFTA_TARGET_REV, htonl(target->revision)) ||
+ target_dump_info(skb, target, info))
+ goto nla_put_failure;
+
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+static int nft_target_validate(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nft_data **data)
+{
+ struct xt_target *target = expr->ops->data;
+ unsigned int hook_mask = 0;
+
+ if (ctx->chain->flags & NFT_BASE_CHAIN) {
+ const struct nft_base_chain *basechain =
+ nft_base_chain(ctx->chain);
+ const struct nf_hook_ops *ops = &basechain->ops;
+
+ hook_mask = 1 << ops->hooknum;
+ if (hook_mask & target->hooks)
+ return 0;
+
+ /* This target is being called from an invalid chain */
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static void nft_match_eval(const struct nft_expr *expr,
+ struct nft_data data[NFT_REG_MAX + 1],
+ const struct nft_pktinfo *pkt)
+{
+ void *info = nft_expr_priv(expr);
+ struct xt_match *match = expr->ops->data;
+ struct sk_buff *skb = pkt->skb;
+ bool ret;
+
+ nft_compat_set_par((struct xt_action_param *)&pkt->xt, match, info);
+
+ ret = match->match(skb, (struct xt_action_param *)&pkt->xt);
+
+ if (pkt->xt.hotdrop) {
+ data[NFT_REG_VERDICT].verdict = NF_DROP;
+ return;
+ }
+
+ switch(ret) {
+ case true:
+ data[NFT_REG_VERDICT].verdict = NFT_CONTINUE;
+ break;
+ case false:
+ data[NFT_REG_VERDICT].verdict = NFT_BREAK;
+ break;
+ }
+}
+
+static const struct nla_policy nft_match_policy[NFTA_MATCH_MAX + 1] = {
+ [NFTA_MATCH_NAME] = { .type = NLA_NUL_STRING },
+ [NFTA_MATCH_REV] = { .type = NLA_U32 },
+ [NFTA_MATCH_INFO] = { .type = NLA_BINARY },
+};
+
+/* struct xt_mtchk_param and xt_tgchk_param look very similar */
+static void
+nft_match_set_mtchk_param(struct xt_mtchk_param *par, const struct nft_ctx *ctx,
+ struct xt_match *match, void *info,
+ union nft_entry *entry, u8 proto, bool inv)
+{
+ par->net = &init_net;
+ par->table = ctx->table->name;
+ switch (ctx->afi->family) {
+ case AF_INET:
+ entry->e4.ip.proto = proto;
+ entry->e4.ip.invflags = inv ? IPT_INV_PROTO : 0;
+ break;
+ case AF_INET6:
+ entry->e6.ipv6.proto = proto;
+ entry->e6.ipv6.invflags = inv ? IP6T_INV_PROTO : 0;
+ break;
+ }
+ par->entryinfo = entry;
+ par->match = match;
+ par->matchinfo = info;
+ if (ctx->chain->flags & NFT_BASE_CHAIN) {
+ const struct nft_base_chain *basechain =
+ nft_base_chain(ctx->chain);
+ const struct nf_hook_ops *ops = &basechain->ops;
+
+ par->hook_mask = 1 << ops->hooknum;
+ }
+ par->family = ctx->afi->family;
+}
+
+static void match_compat_from_user(struct xt_match *m, void *in, void *out)
+{
+#ifdef CONFIG_COMPAT
+ if (m->compat_from_user) {
+ int pad;
+
+ m->compat_from_user(out, in);
+ pad = XT_ALIGN(m->matchsize) - m->matchsize;
+ if (pad > 0)
+ memset(out + m->matchsize, 0, pad);
+ } else
+#endif
+ memcpy(out, in, XT_ALIGN(m->matchsize));
+}
+
+static int
+nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ void *info = nft_expr_priv(expr);
+ struct xt_match *match = expr->ops->data;
+ struct xt_mtchk_param par;
+ size_t size = XT_ALIGN(nla_len(tb[NFTA_MATCH_INFO]));
+ u8 proto = 0;
+ bool inv = false;
+ union nft_entry e = {};
+ int ret;
+
+ match_compat_from_user(match, nla_data(tb[NFTA_MATCH_INFO]), info);
+
+ if (ctx->nla[NFTA_RULE_COMPAT])
+ proto = nft_parse_compat(ctx->nla[NFTA_RULE_COMPAT], &inv);
+
+ nft_match_set_mtchk_param(&par, ctx, match, info, &e, proto, inv);
+
+ ret = xt_check_match(&par, size, proto, inv);
+ if (ret < 0)
+ goto err;
+
+ return 0;
+err:
+ module_put(match->me);
+ return ret;
+}
+
+static void
+nft_match_destroy(const struct nft_expr *expr)
+{
+ struct xt_match *match = expr->ops->data;
+
+ module_put(match->me);
+}
+
+static int
+match_dump_info(struct sk_buff *skb, const struct xt_match *m, const void *in)
+{
+ int ret;
+
+#ifdef CONFIG_COMPAT
+ if (m->compat_to_user) {
+ mm_segment_t old_fs;
+ void *out;
+
+ out = kmalloc(XT_ALIGN(m->matchsize), GFP_ATOMIC);
+ if (out == NULL)
+ return -ENOMEM;
+
+ /* We want to reuse existing compat_to_user */
+ old_fs = get_fs();
+ set_fs(KERNEL_DS);
+ m->compat_to_user(out, in);
+ set_fs(old_fs);
+ ret = nla_put(skb, NFTA_MATCH_INFO, XT_ALIGN(m->matchsize), out);
+ kfree(out);
+ } else
+#endif
+ ret = nla_put(skb, NFTA_MATCH_INFO, XT_ALIGN(m->matchsize), in);
+
+ return ret;
+}
+
+static inline int nft_compat_match_offset(struct xt_match *match)
+{
+#ifdef CONFIG_COMPAT
+ return xt_compat_match_offset(match);
+#else
+ return 0;
+#endif
+}
+
+static int nft_match_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ void *info = nft_expr_priv(expr);
+ struct xt_match *match = expr->ops->data;
+
+ if (nla_put_string(skb, NFTA_MATCH_NAME, match->name) ||
+ nla_put_be32(skb, NFTA_MATCH_REV, htonl(match->revision)) ||
+ match_dump_info(skb, match, info))
+ goto nla_put_failure;
+
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+static int nft_match_validate(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nft_data **data)
+{
+ struct xt_match *match = expr->ops->data;
+ unsigned int hook_mask = 0;
+
+ if (ctx->chain->flags & NFT_BASE_CHAIN) {
+ const struct nft_base_chain *basechain =
+ nft_base_chain(ctx->chain);
+ const struct nf_hook_ops *ops = &basechain->ops;
+
+ hook_mask = 1 << ops->hooknum;
+ if (hook_mask & match->hooks)
+ return 0;
+
+ /* This match is being called from an invalid chain */
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int
+nfnl_compat_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
+ int event, u16 family, const char *name,
+ int rev, int target)
+{
+ struct nlmsghdr *nlh;
+ struct nfgenmsg *nfmsg;
+ unsigned int flags = portid ? NLM_F_MULTI : 0;
+
+ event |= NFNL_SUBSYS_NFT_COMPAT << 8;
+ nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags);
+ if (nlh == NULL)
+ goto nlmsg_failure;
+
+ nfmsg = nlmsg_data(nlh);
+ nfmsg->nfgen_family = family;
+ nfmsg->version = NFNETLINK_V0;
+ nfmsg->res_id = 0;
+
+ if (nla_put_string(skb, NFTA_COMPAT_NAME, name) ||
+ nla_put_be32(skb, NFTA_COMPAT_REV, htonl(rev)) ||
+ nla_put_be32(skb, NFTA_COMPAT_TYPE, htonl(target)))
+ goto nla_put_failure;
+
+ nlmsg_end(skb, nlh);
+ return skb->len;
+
+nlmsg_failure:
+nla_put_failure:
+ nlmsg_cancel(skb, nlh);
+ return -1;
+}
+
+static int
+nfnl_compat_get(struct sock *nfnl, struct sk_buff *skb,
+ const struct nlmsghdr *nlh, const struct nlattr * const tb[])
+{
+ int ret = 0, target;
+ struct nfgenmsg *nfmsg;
+ const char *fmt;
+ const char *name;
+ u32 rev;
+ struct sk_buff *skb2;
+
+ if (tb[NFTA_COMPAT_NAME] == NULL ||
+ tb[NFTA_COMPAT_REV] == NULL ||
+ tb[NFTA_COMPAT_TYPE] == NULL)
+ return -EINVAL;
+
+ name = nla_data(tb[NFTA_COMPAT_NAME]);
+ rev = ntohl(nla_get_be32(tb[NFTA_COMPAT_REV]));
+ target = ntohl(nla_get_be32(tb[NFTA_COMPAT_TYPE]));
+
+ nfmsg = nlmsg_data(nlh);
+
+ switch(nfmsg->nfgen_family) {
+ case AF_INET:
+ fmt = "ipt_%s";
+ break;
+ case AF_INET6:
+ fmt = "ip6t_%s";
+ break;
+ default:
+ pr_err("nft_compat: unsupported protocol %d\n",
+ nfmsg->nfgen_family);
+ return -EINVAL;
+ }
+
+ try_then_request_module(xt_find_revision(nfmsg->nfgen_family, name,
+ rev, target, &ret),
+ fmt, name);
+
+ if (ret < 0)
+ return ret;
+
+ skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (skb2 == NULL)
+ return -ENOMEM;
+
+ /* include the best revision for this extension in the message */
+ if (nfnl_compat_fill_info(skb2, NETLINK_CB(skb).portid,
+ nlh->nlmsg_seq,
+ NFNL_MSG_TYPE(nlh->nlmsg_type),
+ NFNL_MSG_COMPAT_GET,
+ nfmsg->nfgen_family,
+ name, ret, target) <= 0) {
+ kfree_skb(skb2);
+ return -ENOSPC;
+ }
+
+ ret = netlink_unicast(nfnl, skb2, NETLINK_CB(skb).portid,
+ MSG_DONTWAIT);
+ if (ret > 0)
+ ret = 0;
+
+ return ret == -EAGAIN ? -ENOBUFS : ret;
+}
+
+static const struct nla_policy nfnl_compat_policy_get[NFTA_COMPAT_MAX+1] = {
+ [NFTA_COMPAT_NAME] = { .type = NLA_NUL_STRING,
+ .len = NFT_COMPAT_NAME_MAX-1 },
+ [NFTA_COMPAT_REV] = { .type = NLA_U32 },
+ [NFTA_COMPAT_TYPE] = { .type = NLA_U32 },
+};
+
+static const struct nfnl_callback nfnl_nft_compat_cb[NFNL_MSG_COMPAT_MAX] = {
+ [NFNL_MSG_COMPAT_GET] = { .call = nfnl_compat_get,
+ .attr_count = NFTA_COMPAT_MAX,
+ .policy = nfnl_compat_policy_get },
+};
+
+static const struct nfnetlink_subsystem nfnl_compat_subsys = {
+ .name = "nft-compat",
+ .subsys_id = NFNL_SUBSYS_NFT_COMPAT,
+ .cb_count = NFNL_MSG_COMPAT_MAX,
+ .cb = nfnl_nft_compat_cb,
+};
+
+static LIST_HEAD(nft_match_list);
+
+struct nft_xt {
+ struct list_head head;
+ struct nft_expr_ops ops;
+};
+
+static struct nft_expr_type nft_match_type;
+
+static const struct nft_expr_ops *
+nft_match_select_ops(const struct nft_ctx *ctx,
+ const struct nlattr * const tb[])
+{
+ struct nft_xt *nft_match;
+ struct xt_match *match;
+ char *mt_name;
+ __u32 rev, family;
+
+ if (tb[NFTA_MATCH_NAME] == NULL ||
+ tb[NFTA_MATCH_REV] == NULL ||
+ tb[NFTA_MATCH_INFO] == NULL)
+ return ERR_PTR(-EINVAL);
+
+ mt_name = nla_data(tb[NFTA_MATCH_NAME]);
+ rev = ntohl(nla_get_be32(tb[NFTA_MATCH_REV]));
+ family = ctx->afi->family;
+
+ /* Re-use the existing match if it's already loaded. */
+ list_for_each_entry(nft_match, &nft_match_list, head) {
+ struct xt_match *match = nft_match->ops.data;
+
+ if (strcmp(match->name, mt_name) == 0 &&
+ match->revision == rev && match->family == family)
+ return &nft_match->ops;
+ }
+
+ match = xt_request_find_match(family, mt_name, rev);
+ if (IS_ERR(match))
+ return ERR_PTR(-ENOENT);
+
+ /* This is the first time we use this match, allocate operations */
+ nft_match = kzalloc(sizeof(struct nft_xt), GFP_KERNEL);
+ if (nft_match == NULL)
+ return ERR_PTR(-ENOMEM);
+
+ nft_match->ops.type = &nft_match_type;
+ nft_match->ops.size = NFT_EXPR_SIZE(XT_ALIGN(match->matchsize) +
+ nft_compat_match_offset(match));
+ nft_match->ops.eval = nft_match_eval;
+ nft_match->ops.init = nft_match_init;
+ nft_match->ops.destroy = nft_match_destroy;
+ nft_match->ops.dump = nft_match_dump;
+ nft_match->ops.validate = nft_match_validate;
+ nft_match->ops.data = match;
+
+ list_add(&nft_match->head, &nft_match_list);
+
+ return &nft_match->ops;
+}
+
+static void nft_match_release(void)
+{
+ struct nft_xt *nft_match;
+
+ list_for_each_entry(nft_match, &nft_match_list, head)
+ kfree(nft_match);
+}
+
+static struct nft_expr_type nft_match_type __read_mostly = {
+ .name = "match",
+ .select_ops = nft_match_select_ops,
+ .policy = nft_match_policy,
+ .maxattr = NFTA_MATCH_MAX,
+ .owner = THIS_MODULE,
+};
+
+static LIST_HEAD(nft_target_list);
+
+static struct nft_expr_type nft_target_type;
+
+static const struct nft_expr_ops *
+nft_target_select_ops(const struct nft_ctx *ctx,
+ const struct nlattr * const tb[])
+{
+ struct nft_xt *nft_target;
+ struct xt_target *target;
+ char *tg_name;
+ __u32 rev, family;
+
+ if (tb[NFTA_TARGET_NAME] == NULL ||
+ tb[NFTA_TARGET_REV] == NULL ||
+ tb[NFTA_TARGET_INFO] == NULL)
+ return ERR_PTR(-EINVAL);
+
+ tg_name = nla_data(tb[NFTA_TARGET_NAME]);
+ rev = ntohl(nla_get_be32(tb[NFTA_TARGET_REV]));
+ family = ctx->afi->family;
+
+ /* Re-use the existing target if it's already loaded. */
+ list_for_each_entry(nft_target, &nft_match_list, head) {
+ struct xt_target *target = nft_target->ops.data;
+
+ if (strcmp(target->name, tg_name) == 0 &&
+ target->revision == rev && target->family == family)
+ return &nft_target->ops;
+ }
+
+ target = xt_request_find_target(family, tg_name, rev);
+ if (IS_ERR(target))
+ return ERR_PTR(-ENOENT);
+
+ /* This is the first time we use this target, allocate operations */
+ nft_target = kzalloc(sizeof(struct nft_xt), GFP_KERNEL);
+ if (nft_target == NULL)
+ return ERR_PTR(-ENOMEM);
+
+ nft_target->ops.type = &nft_target_type;
+ nft_target->ops.size = NFT_EXPR_SIZE(XT_ALIGN(target->targetsize) +
+ nft_compat_target_offset(target));
+ nft_target->ops.eval = nft_target_eval;
+ nft_target->ops.init = nft_target_init;
+ nft_target->ops.destroy = nft_target_destroy;
+ nft_target->ops.dump = nft_target_dump;
+ nft_target->ops.validate = nft_target_validate;
+ nft_target->ops.data = target;
+
+ list_add(&nft_target->head, &nft_target_list);
+
+ return &nft_target->ops;
+}
+
+static void nft_target_release(void)
+{
+ struct nft_xt *nft_target;
+
+ list_for_each_entry(nft_target, &nft_target_list, head)
+ kfree(nft_target);
+}
+
+static struct nft_expr_type nft_target_type __read_mostly = {
+ .name = "target",
+ .select_ops = nft_target_select_ops,
+ .policy = nft_target_policy,
+ .maxattr = NFTA_TARGET_MAX,
+ .owner = THIS_MODULE,
+};
+
+static int __init nft_compat_module_init(void)
+{
+ int ret;
+
+ ret = nft_register_expr(&nft_match_type);
+ if (ret < 0)
+ return ret;
+
+ ret = nft_register_expr(&nft_target_type);
+ if (ret < 0)
+ goto err_match;
+
+ ret = nfnetlink_subsys_register(&nfnl_compat_subsys);
+ if (ret < 0) {
+ pr_err("nft_compat: cannot register with nfnetlink.\n");
+ goto err_target;
+ }
+
+ pr_info("nf_tables_compat: (c) 2012 Pablo Neira Ayuso <pablo@netfilter.org>\n");
+
+ return ret;
+
+err_target:
+ nft_unregister_expr(&nft_target_type);
+err_match:
+ nft_unregister_expr(&nft_match_type);
+ return ret;
+}
+
+static void __exit nft_compat_module_exit(void)
+{
+ nfnetlink_subsys_unregister(&nfnl_compat_subsys);
+ nft_unregister_expr(&nft_target_type);
+ nft_unregister_expr(&nft_match_type);
+ nft_match_release();
+ nft_target_release();
+}
+
+MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_NFT_COMPAT);
+
+module_init(nft_compat_module_init);
+module_exit(nft_compat_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+MODULE_ALIAS_NFT_EXPR("match");
+MODULE_ALIAS_NFT_EXPR("target");
diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c
new file mode 100644
index 0000000..c89ee48
--- /dev/null
+++ b/net/netfilter/nft_counter.c
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/seqlock.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+
+struct nft_counter {
+ seqlock_t lock;
+ u64 bytes;
+ u64 packets;
+};
+
+static void nft_counter_eval(const struct nft_expr *expr,
+ struct nft_data data[NFT_REG_MAX + 1],
+ const struct nft_pktinfo *pkt)
+{
+ struct nft_counter *priv = nft_expr_priv(expr);
+
+ write_seqlock_bh(&priv->lock);
+ priv->bytes += pkt->skb->len;
+ priv->packets++;
+ write_sequnlock_bh(&priv->lock);
+}
+
+static int nft_counter_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ struct nft_counter *priv = nft_expr_priv(expr);
+ unsigned int seq;
+ u64 bytes;
+ u64 packets;
+
+ do {
+ seq = read_seqbegin(&priv->lock);
+ bytes = priv->bytes;
+ packets = priv->packets;
+ } while (read_seqretry(&priv->lock, seq));
+
+ if (nla_put_be64(skb, NFTA_COUNTER_BYTES, cpu_to_be64(bytes)))
+ goto nla_put_failure;
+ if (nla_put_be64(skb, NFTA_COUNTER_PACKETS, cpu_to_be64(packets)))
+ goto nla_put_failure;
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+static const struct nla_policy nft_counter_policy[NFTA_COUNTER_MAX + 1] = {
+ [NFTA_COUNTER_PACKETS] = { .type = NLA_U64 },
+ [NFTA_COUNTER_BYTES] = { .type = NLA_U64 },
+};
+
+static int nft_counter_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_counter *priv = nft_expr_priv(expr);
+
+ if (tb[NFTA_COUNTER_PACKETS])
+ priv->packets = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_PACKETS]));
+ if (tb[NFTA_COUNTER_BYTES])
+ priv->bytes = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_BYTES]));
+
+ seqlock_init(&priv->lock);
+ return 0;
+}
+
+static struct nft_expr_type nft_counter_type;
+static const struct nft_expr_ops nft_counter_ops = {
+ .type = &nft_counter_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_counter)),
+ .eval = nft_counter_eval,
+ .init = nft_counter_init,
+ .dump = nft_counter_dump,
+};
+
+static struct nft_expr_type nft_counter_type __read_mostly = {
+ .name = "counter",
+ .ops = &nft_counter_ops,
+ .policy = nft_counter_policy,
+ .maxattr = NFTA_COUNTER_MAX,
+ .owner = THIS_MODULE,
+};
+
+static int __init nft_counter_module_init(void)
+{
+ return nft_register_expr(&nft_counter_type);
+}
+
+static void __exit nft_counter_module_exit(void)
+{
+ nft_unregister_expr(&nft_counter_type);
+}
+
+module_init(nft_counter_module_init);
+module_exit(nft_counter_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_EXPR("counter");
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
new file mode 100644
index 0000000..955f4e6
--- /dev/null
+++ b/net/netfilter/nft_ct.c
@@ -0,0 +1,258 @@
+/*
+ * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_tuple.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+
+struct nft_ct {
+ enum nft_ct_keys key:8;
+ enum ip_conntrack_dir dir:8;
+ enum nft_registers dreg:8;
+ uint8_t family;
+};
+
+static void nft_ct_eval(const struct nft_expr *expr,
+ struct nft_data data[NFT_REG_MAX + 1],
+ const struct nft_pktinfo *pkt)
+{
+ const struct nft_ct *priv = nft_expr_priv(expr);
+ struct nft_data *dest = &data[priv->dreg];
+ enum ip_conntrack_info ctinfo;
+ const struct nf_conn *ct;
+ const struct nf_conn_help *help;
+ const struct nf_conntrack_tuple *tuple;
+ const struct nf_conntrack_helper *helper;
+ long diff;
+ unsigned int state;
+
+ ct = nf_ct_get(pkt->skb, &ctinfo);
+
+ switch (priv->key) {
+ case NFT_CT_STATE:
+ if (ct == NULL)
+ state = NF_CT_STATE_INVALID_BIT;
+ else if (nf_ct_is_untracked(ct))
+ state = NF_CT_STATE_UNTRACKED_BIT;
+ else
+ state = NF_CT_STATE_BIT(ctinfo);
+ dest->data[0] = state;
+ return;
+ }
+
+ if (ct == NULL)
+ goto err;
+
+ switch (priv->key) {
+ case NFT_CT_DIRECTION:
+ dest->data[0] = CTINFO2DIR(ctinfo);
+ return;
+ case NFT_CT_STATUS:
+ dest->data[0] = ct->status;
+ return;
+#ifdef CONFIG_NF_CONNTRACK_MARK
+ case NFT_CT_MARK:
+ dest->data[0] = ct->mark;
+ return;
+#endif
+#ifdef CONFIG_NF_CONNTRACK_SECMARK
+ case NFT_CT_SECMARK:
+ dest->data[0] = ct->secmark;
+ return;
+#endif
+ case NFT_CT_EXPIRATION:
+ diff = (long)jiffies - (long)ct->timeout.expires;
+ if (diff < 0)
+ diff = 0;
+ dest->data[0] = jiffies_to_msecs(diff);
+ return;
+ case NFT_CT_HELPER:
+ if (ct->master == NULL)
+ goto err;
+ help = nfct_help(ct->master);
+ if (help == NULL)
+ goto err;
+ helper = rcu_dereference(help->helper);
+ if (helper == NULL)
+ goto err;
+ if (strlen(helper->name) >= sizeof(dest->data))
+ goto err;
+ strncpy((char *)dest->data, helper->name, sizeof(dest->data));
+ return;
+ }
+
+ tuple = &ct->tuplehash[priv->dir].tuple;
+ switch (priv->key) {
+ case NFT_CT_L3PROTOCOL:
+ dest->data[0] = nf_ct_l3num(ct);
+ return;
+ case NFT_CT_SRC:
+ memcpy(dest->data, tuple->src.u3.all,
+ nf_ct_l3num(ct) == NFPROTO_IPV4 ? 4 : 16);
+ return;
+ case NFT_CT_DST:
+ memcpy(dest->data, tuple->dst.u3.all,
+ nf_ct_l3num(ct) == NFPROTO_IPV4 ? 4 : 16);
+ return;
+ case NFT_CT_PROTOCOL:
+ dest->data[0] = nf_ct_protonum(ct);
+ return;
+ case NFT_CT_PROTO_SRC:
+ dest->data[0] = (__force __u16)tuple->src.u.all;
+ return;
+ case NFT_CT_PROTO_DST:
+ dest->data[0] = (__force __u16)tuple->dst.u.all;
+ return;
+ }
+ return;
+err:
+ data[NFT_REG_VERDICT].verdict = NFT_BREAK;
+}
+
+static const struct nla_policy nft_ct_policy[NFTA_CT_MAX + 1] = {
+ [NFTA_CT_DREG] = { .type = NLA_U32 },
+ [NFTA_CT_KEY] = { .type = NLA_U32 },
+ [NFTA_CT_DIRECTION] = { .type = NLA_U8 },
+};
+
+static int nft_ct_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_ct *priv = nft_expr_priv(expr);
+ int err;
+
+ if (tb[NFTA_CT_DREG] == NULL ||
+ tb[NFTA_CT_KEY] == NULL)
+ return -EINVAL;
+
+ priv->key = ntohl(nla_get_be32(tb[NFTA_CT_KEY]));
+ if (tb[NFTA_CT_DIRECTION] != NULL) {
+ priv->dir = nla_get_u8(tb[NFTA_CT_DIRECTION]);
+ switch (priv->dir) {
+ case IP_CT_DIR_ORIGINAL:
+ case IP_CT_DIR_REPLY:
+ break;
+ default:
+ return -EINVAL;
+ }
+ }
+
+ switch (priv->key) {
+ case NFT_CT_STATE:
+ case NFT_CT_DIRECTION:
+ case NFT_CT_STATUS:
+#ifdef CONFIG_NF_CONNTRACK_MARK
+ case NFT_CT_MARK:
+#endif
+#ifdef CONFIG_NF_CONNTRACK_SECMARK
+ case NFT_CT_SECMARK:
+#endif
+ case NFT_CT_EXPIRATION:
+ case NFT_CT_HELPER:
+ if (tb[NFTA_CT_DIRECTION] != NULL)
+ return -EINVAL;
+ break;
+ case NFT_CT_PROTOCOL:
+ case NFT_CT_SRC:
+ case NFT_CT_DST:
+ case NFT_CT_PROTO_SRC:
+ case NFT_CT_PROTO_DST:
+ if (tb[NFTA_CT_DIRECTION] == NULL)
+ return -EINVAL;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ err = nf_ct_l3proto_try_module_get(ctx->afi->family);
+ if (err < 0)
+ return err;
+ priv->family = ctx->afi->family;
+
+ priv->dreg = ntohl(nla_get_be32(tb[NFTA_CT_DREG]));
+ err = nft_validate_output_register(priv->dreg);
+ if (err < 0)
+ goto err1;
+
+ err = nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE);
+ if (err < 0)
+ goto err1;
+ return 0;
+
+err1:
+ nf_ct_l3proto_module_put(ctx->afi->family);
+ return err;
+}
+
+static void nft_ct_destroy(const struct nft_expr *expr)
+{
+ struct nft_ct *priv = nft_expr_priv(expr);
+
+ nf_ct_l3proto_module_put(priv->family);
+}
+
+static int nft_ct_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ const struct nft_ct *priv = nft_expr_priv(expr);
+
+ if (nla_put_be32(skb, NFTA_CT_DREG, htonl(priv->dreg)))
+ goto nla_put_failure;
+ if (nla_put_be32(skb, NFTA_CT_KEY, htonl(priv->key)))
+ goto nla_put_failure;
+ if (nla_put_u8(skb, NFTA_CT_DIRECTION, priv->dir))
+ goto nla_put_failure;
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+static struct nft_expr_type nft_ct_type;
+static const struct nft_expr_ops nft_ct_ops = {
+ .type = &nft_ct_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_ct)),
+ .eval = nft_ct_eval,
+ .init = nft_ct_init,
+ .destroy = nft_ct_destroy,
+ .dump = nft_ct_dump,
+};
+
+static struct nft_expr_type nft_ct_type __read_mostly = {
+ .name = "ct",
+ .ops = &nft_ct_ops,
+ .policy = nft_ct_policy,
+ .maxattr = NFTA_CT_MAX,
+ .owner = THIS_MODULE,
+};
+
+static int __init nft_ct_module_init(void)
+{
+ return nft_register_expr(&nft_ct_type);
+}
+
+static void __exit nft_ct_module_exit(void)
+{
+ nft_unregister_expr(&nft_ct_type);
+}
+
+module_init(nft_ct_module_init);
+module_exit(nft_ct_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_EXPR("ct");
diff --git a/net/netfilter/nft_expr_template.c b/net/netfilter/nft_expr_template.c
new file mode 100644
index 0000000..b6eed4d
--- /dev/null
+++ b/net/netfilter/nft_expr_template.c
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+
+struct nft_template {
+
+};
+
+static void nft_template_eval(const struct nft_expr *expr,
+ struct nft_data data[NFT_REG_MAX + 1],
+ const struct nft_pktinfo *pkt)
+{
+ struct nft_template *priv = nft_expr_priv(expr);
+
+}
+
+static const struct nla_policy nft_template_policy[NFTA_TEMPLATE_MAX + 1] = {
+ [NFTA_TEMPLATE_ATTR] = { .type = NLA_U32 },
+};
+
+static int nft_template_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_template *priv = nft_expr_priv(expr);
+
+ return 0;
+}
+
+static void nft_template_destroy(const struct nft_ctx *ctx,
+ const struct nft_expr *expr)
+{
+ struct nft_template *priv = nft_expr_priv(expr);
+
+}
+
+static int nft_template_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ const struct nft_template *priv = nft_expr_priv(expr);
+
+ NLA_PUT_BE32(skb, NFTA_TEMPLATE_ATTR, priv->field);
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+static struct nft_expr_type nft_template_type;
+static const struct nft_expr_ops nft_template_ops = {
+ .type = &nft_template_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_template)),
+ .eval = nft_template_eval,
+ .init = nft_template_init,
+ .destroy = nft_template_destroy,
+ .dump = nft_template_dump,
+};
+
+static struct nft_expr_type nft_template_type __read_mostly = {
+ .name = "template",
+ .ops = &nft_template_ops,
+ .policy = nft_template_policy,
+ .maxattr = NFTA_TEMPLATE_MAX,
+ .owner = THIS_MODULE,
+};
+
+static int __init nft_template_module_init(void)
+{
+ return nft_register_expr(&nft_template_type);
+}
+
+static void __exit nft_template_module_exit(void)
+{
+ nft_unregister_expr(&nft_template_type);
+}
+
+module_init(nft_template_module_init);
+module_exit(nft_template_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_EXPR("template");
diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c
new file mode 100644
index 0000000..8e0bb75
--- /dev/null
+++ b/net/netfilter/nft_exthdr.c
@@ -0,0 +1,133 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+// FIXME:
+#include <net/ipv6.h>
+
+struct nft_exthdr {
+ u8 type;
+ u8 offset;
+ u8 len;
+ enum nft_registers dreg:8;
+};
+
+static void nft_exthdr_eval(const struct nft_expr *expr,
+ struct nft_data data[NFT_REG_MAX + 1],
+ const struct nft_pktinfo *pkt)
+{
+ struct nft_exthdr *priv = nft_expr_priv(expr);
+ struct nft_data *dest = &data[priv->dreg];
+ unsigned int offset;
+ int err;
+
+ err = ipv6_find_hdr(pkt->skb, &offset, priv->type, NULL, NULL);
+ if (err < 0)
+ goto err;
+ offset += priv->offset;
+
+ if (skb_copy_bits(pkt->skb, offset, dest->data, priv->len) < 0)
+ goto err;
+ return;
+err:
+ data[NFT_REG_VERDICT].verdict = NFT_BREAK;
+}
+
+static const struct nla_policy nft_exthdr_policy[NFTA_EXTHDR_MAX + 1] = {
+ [NFTA_EXTHDR_DREG] = { .type = NLA_U32 },
+ [NFTA_EXTHDR_TYPE] = { .type = NLA_U8 },
+ [NFTA_EXTHDR_OFFSET] = { .type = NLA_U32 },
+ [NFTA_EXTHDR_LEN] = { .type = NLA_U32 },
+};
+
+static int nft_exthdr_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_exthdr *priv = nft_expr_priv(expr);
+ int err;
+
+ if (tb[NFTA_EXTHDR_DREG] == NULL ||
+ tb[NFTA_EXTHDR_TYPE] == NULL ||
+ tb[NFTA_EXTHDR_OFFSET] == NULL ||
+ tb[NFTA_EXTHDR_LEN] == NULL)
+ return -EINVAL;
+
+ priv->type = nla_get_u8(tb[NFTA_EXTHDR_TYPE]);
+ priv->offset = ntohl(nla_get_be32(tb[NFTA_EXTHDR_OFFSET]));
+ priv->len = ntohl(nla_get_be32(tb[NFTA_EXTHDR_LEN]));
+ if (priv->len == 0 ||
+ priv->len > FIELD_SIZEOF(struct nft_data, data))
+ return -EINVAL;
+
+ priv->dreg = ntohl(nla_get_be32(tb[NFTA_EXTHDR_DREG]));
+ err = nft_validate_output_register(priv->dreg);
+ if (err < 0)
+ return err;
+ return nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE);
+}
+
+static int nft_exthdr_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ const struct nft_exthdr *priv = nft_expr_priv(expr);
+
+ if (nla_put_be32(skb, NFTA_EXTHDR_DREG, htonl(priv->dreg)))
+ goto nla_put_failure;
+ if (nla_put_u8(skb, NFTA_EXTHDR_TYPE, priv->type))
+ goto nla_put_failure;
+ if (nla_put_be32(skb, NFTA_EXTHDR_OFFSET, htonl(priv->offset)))
+ goto nla_put_failure;
+ if (nla_put_be32(skb, NFTA_EXTHDR_LEN, htonl(priv->len)))
+ goto nla_put_failure;
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+static struct nft_expr_type nft_exthdr_type;
+static const struct nft_expr_ops nft_exthdr_ops = {
+ .type = &nft_exthdr_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_exthdr)),
+ .eval = nft_exthdr_eval,
+ .init = nft_exthdr_init,
+ .dump = nft_exthdr_dump,
+};
+
+static struct nft_expr_type nft_exthdr_type __read_mostly = {
+ .name = "exthdr",
+ .ops = &nft_exthdr_ops,
+ .policy = nft_exthdr_policy,
+ .maxattr = NFTA_EXTHDR_MAX,
+ .owner = THIS_MODULE,
+};
+
+static int __init nft_exthdr_module_init(void)
+{
+ return nft_register_expr(&nft_exthdr_type);
+}
+
+static void __exit nft_exthdr_module_exit(void)
+{
+ nft_unregister_expr(&nft_exthdr_type);
+}
+
+module_init(nft_exthdr_module_init);
+module_exit(nft_exthdr_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_EXPR("exthdr");
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
new file mode 100644
index 0000000..3d3f8fc
--- /dev/null
+++ b/net/netfilter/nft_hash.c
@@ -0,0 +1,231 @@
+/*
+ * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/list.h>
+#include <linux/jhash.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+
+struct nft_hash {
+ struct hlist_head *hash;
+ unsigned int hsize;
+};
+
+struct nft_hash_elem {
+ struct hlist_node hnode;
+ struct nft_data key;
+ struct nft_data data[];
+};
+
+static u32 nft_hash_rnd __read_mostly;
+static bool nft_hash_rnd_initted __read_mostly;
+
+static unsigned int nft_hash_data(const struct nft_data *data,
+ unsigned int hsize, unsigned int len)
+{
+ unsigned int h;
+
+ h = jhash(data->data, len, nft_hash_rnd);
+ return ((u64)h * hsize) >> 32;
+}
+
+static bool nft_hash_lookup(const struct nft_set *set,
+ const struct nft_data *key,
+ struct nft_data *data)
+{
+ const struct nft_hash *priv = nft_set_priv(set);
+ const struct nft_hash_elem *he;
+ unsigned int h;
+
+ h = nft_hash_data(key, priv->hsize, set->klen);
+ hlist_for_each_entry(he, &priv->hash[h], hnode) {
+ if (nft_data_cmp(&he->key, key, set->klen))
+ continue;
+ if (set->flags & NFT_SET_MAP)
+ nft_data_copy(data, he->data);
+ return true;
+ }
+ return false;
+}
+
+static void nft_hash_elem_destroy(const struct nft_set *set,
+ struct nft_hash_elem *he)
+{
+ nft_data_uninit(&he->key, NFT_DATA_VALUE);
+ if (set->flags & NFT_SET_MAP)
+ nft_data_uninit(he->data, set->dtype);
+ kfree(he);
+}
+
+static int nft_hash_insert(const struct nft_set *set,
+ const struct nft_set_elem *elem)
+{
+ struct nft_hash *priv = nft_set_priv(set);
+ struct nft_hash_elem *he;
+ unsigned int size, h;
+
+ if (elem->flags != 0)
+ return -EINVAL;
+
+ size = sizeof(*he);
+ if (set->flags & NFT_SET_MAP)
+ size += sizeof(he->data[0]);
+
+ he = kzalloc(size, GFP_KERNEL);
+ if (he == NULL)
+ return -ENOMEM;
+
+ nft_data_copy(&he->key, &elem->key);
+ if (set->flags & NFT_SET_MAP)
+ nft_data_copy(he->data, &elem->data);
+
+ h = nft_hash_data(&he->key, priv->hsize, set->klen);
+ hlist_add_head_rcu(&he->hnode, &priv->hash[h]);
+ return 0;
+}
+
+static void nft_hash_remove(const struct nft_set *set,
+ const struct nft_set_elem *elem)
+{
+ struct nft_hash_elem *he = elem->cookie;
+
+ hlist_del_rcu(&he->hnode);
+ kfree(he);
+}
+
+static int nft_hash_get(const struct nft_set *set, struct nft_set_elem *elem)
+{
+ const struct nft_hash *priv = nft_set_priv(set);
+ struct nft_hash_elem *he;
+ unsigned int h;
+
+ h = nft_hash_data(&elem->key, priv->hsize, set->klen);
+ hlist_for_each_entry(he, &priv->hash[h], hnode) {
+ if (nft_data_cmp(&he->key, &elem->key, set->klen))
+ continue;
+
+ elem->cookie = he;
+ elem->flags = 0;
+ if (set->flags & NFT_SET_MAP)
+ nft_data_copy(&elem->data, he->data);
+ return 0;
+ }
+ return -ENOENT;
+}
+
+static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set,
+ struct nft_set_iter *iter)
+{
+ const struct nft_hash *priv = nft_set_priv(set);
+ const struct nft_hash_elem *he;
+ struct nft_set_elem elem;
+ unsigned int i;
+
+ for (i = 0; i < priv->hsize; i++) {
+ hlist_for_each_entry(he, &priv->hash[i], hnode) {
+ if (iter->count < iter->skip)
+ goto cont;
+
+ memcpy(&elem.key, &he->key, sizeof(elem.key));
+ if (set->flags & NFT_SET_MAP)
+ memcpy(&elem.data, he->data, sizeof(elem.data));
+ elem.flags = 0;
+
+ iter->err = iter->fn(ctx, set, iter, &elem);
+ if (iter->err < 0)
+ return;
+cont:
+ iter->count++;
+ }
+ }
+}
+
+static unsigned int nft_hash_privsize(const struct nlattr * const nla[])
+{
+ return sizeof(struct nft_hash);
+}
+
+static int nft_hash_init(const struct nft_set *set,
+ const struct nlattr * const tb[])
+{
+ struct nft_hash *priv = nft_set_priv(set);
+ unsigned int cnt, i;
+
+ if (unlikely(!nft_hash_rnd_initted)) {
+ get_random_bytes(&nft_hash_rnd, 4);
+ nft_hash_rnd_initted = true;
+ }
+
+ /* Aim for a load factor of 0.75 */
+ // FIXME: temporarily broken until we have set descriptions
+ cnt = 100;
+ cnt = cnt * 4 / 3;
+
+ priv->hash = kcalloc(cnt, sizeof(struct hlist_head), GFP_KERNEL);
+ if (priv->hash == NULL)
+ return -ENOMEM;
+ priv->hsize = cnt;
+
+ for (i = 0; i < cnt; i++)
+ INIT_HLIST_HEAD(&priv->hash[i]);
+
+ return 0;
+}
+
+static void nft_hash_destroy(const struct nft_set *set)
+{
+ const struct nft_hash *priv = nft_set_priv(set);
+ const struct hlist_node *next;
+ struct nft_hash_elem *elem;
+ unsigned int i;
+
+ for (i = 0; i < priv->hsize; i++) {
+ hlist_for_each_entry_safe(elem, next, &priv->hash[i], hnode) {
+ hlist_del(&elem->hnode);
+ nft_hash_elem_destroy(set, elem);
+ }
+ }
+ kfree(priv->hash);
+}
+
+static struct nft_set_ops nft_hash_ops __read_mostly = {
+ .privsize = nft_hash_privsize,
+ .init = nft_hash_init,
+ .destroy = nft_hash_destroy,
+ .get = nft_hash_get,
+ .insert = nft_hash_insert,
+ .remove = nft_hash_remove,
+ .lookup = nft_hash_lookup,
+ .walk = nft_hash_walk,
+ .features = NFT_SET_MAP,
+ .owner = THIS_MODULE,
+};
+
+static int __init nft_hash_module_init(void)
+{
+ return nft_register_set(&nft_hash_ops);
+}
+
+static void __exit nft_hash_module_exit(void)
+{
+ nft_unregister_set(&nft_hash_ops);
+}
+
+module_init(nft_hash_module_init);
+module_exit(nft_hash_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_SET();
diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c
new file mode 100644
index 0000000..f169501
--- /dev/null
+++ b/net/netfilter/nft_immediate.c
@@ -0,0 +1,132 @@
+/*
+ * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_tables.h>
+
+struct nft_immediate_expr {
+ struct nft_data data;
+ enum nft_registers dreg:8;
+ u8 dlen;
+};
+
+static void nft_immediate_eval(const struct nft_expr *expr,
+ struct nft_data data[NFT_REG_MAX + 1],
+ const struct nft_pktinfo *pkt)
+{
+ const struct nft_immediate_expr *priv = nft_expr_priv(expr);
+
+ nft_data_copy(&data[priv->dreg], &priv->data);
+}
+
+static const struct nla_policy nft_immediate_policy[NFTA_IMMEDIATE_MAX + 1] = {
+ [NFTA_IMMEDIATE_DREG] = { .type = NLA_U32 },
+ [NFTA_IMMEDIATE_DATA] = { .type = NLA_NESTED },
+};
+
+static int nft_immediate_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_immediate_expr *priv = nft_expr_priv(expr);
+ struct nft_data_desc desc;
+ int err;
+
+ if (tb[NFTA_IMMEDIATE_DREG] == NULL ||
+ tb[NFTA_IMMEDIATE_DATA] == NULL)
+ return -EINVAL;
+
+ priv->dreg = ntohl(nla_get_be32(tb[NFTA_IMMEDIATE_DREG]));
+ err = nft_validate_output_register(priv->dreg);
+ if (err < 0)
+ return err;
+
+ err = nft_data_init(ctx, &priv->data, &desc, tb[NFTA_IMMEDIATE_DATA]);
+ if (err < 0)
+ return err;
+ priv->dlen = desc.len;
+
+ err = nft_validate_data_load(ctx, priv->dreg, &priv->data, desc.type);
+ if (err < 0)
+ goto err1;
+
+ return 0;
+
+err1:
+ nft_data_uninit(&priv->data, desc.type);
+ return err;
+}
+
+static void nft_immediate_destroy(const struct nft_expr *expr)
+{
+ const struct nft_immediate_expr *priv = nft_expr_priv(expr);
+ return nft_data_uninit(&priv->data, nft_dreg_to_type(priv->dreg));
+}
+
+static int nft_immediate_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ const struct nft_immediate_expr *priv = nft_expr_priv(expr);
+
+ if (nla_put_be32(skb, NFTA_IMMEDIATE_DREG, htonl(priv->dreg)))
+ goto nla_put_failure;
+
+ return nft_data_dump(skb, NFTA_IMMEDIATE_DATA, &priv->data,
+ nft_dreg_to_type(priv->dreg), priv->dlen);
+
+nla_put_failure:
+ return -1;
+}
+
+static int nft_immediate_validate(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nft_data **data)
+{
+ const struct nft_immediate_expr *priv = nft_expr_priv(expr);
+
+ if (priv->dreg == NFT_REG_VERDICT)
+ *data = &priv->data;
+
+ return 0;
+}
+
+static struct nft_expr_type nft_imm_type;
+static const struct nft_expr_ops nft_imm_ops = {
+ .type = &nft_imm_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_immediate_expr)),
+ .eval = nft_immediate_eval,
+ .init = nft_immediate_init,
+ .destroy = nft_immediate_destroy,
+ .dump = nft_immediate_dump,
+ .validate = nft_immediate_validate,
+};
+
+static struct nft_expr_type nft_imm_type __read_mostly = {
+ .name = "immediate",
+ .ops = &nft_imm_ops,
+ .policy = nft_immediate_policy,
+ .maxattr = NFTA_IMMEDIATE_MAX,
+ .owner = THIS_MODULE,
+};
+
+int __init nft_immediate_module_init(void)
+{
+ return nft_register_expr(&nft_imm_type);
+}
+
+void nft_immediate_module_exit(void)
+{
+ nft_unregister_expr(&nft_imm_type);
+}
diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c
new file mode 100644
index 0000000..85da5bd
--- /dev/null
+++ b/net/netfilter/nft_limit.c
@@ -0,0 +1,119 @@
+/*
+ * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+
+static DEFINE_SPINLOCK(limit_lock);
+
+struct nft_limit {
+ u64 tokens;
+ u64 rate;
+ u64 unit;
+ unsigned long stamp;
+};
+
+static void nft_limit_eval(const struct nft_expr *expr,
+ struct nft_data data[NFT_REG_MAX + 1],
+ const struct nft_pktinfo *pkt)
+{
+ struct nft_limit *priv = nft_expr_priv(expr);
+
+ spin_lock_bh(&limit_lock);
+ if (time_after_eq(jiffies, priv->stamp)) {
+ priv->tokens = priv->rate;
+ priv->stamp = jiffies + priv->unit * HZ;
+ }
+
+ if (priv->tokens >= 1) {
+ priv->tokens--;
+ spin_unlock_bh(&limit_lock);
+ return;
+ }
+ spin_unlock_bh(&limit_lock);
+
+ data[NFT_REG_VERDICT].verdict = NFT_BREAK;
+}
+
+static const struct nla_policy nft_limit_policy[NFTA_LIMIT_MAX + 1] = {
+ [NFTA_LIMIT_RATE] = { .type = NLA_U64 },
+ [NFTA_LIMIT_UNIT] = { .type = NLA_U64 },
+};
+
+static int nft_limit_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_limit *priv = nft_expr_priv(expr);
+
+ if (tb[NFTA_LIMIT_RATE] == NULL ||
+ tb[NFTA_LIMIT_UNIT] == NULL)
+ return -EINVAL;
+
+ priv->rate = be64_to_cpu(nla_get_be64(tb[NFTA_LIMIT_RATE]));
+ priv->unit = be64_to_cpu(nla_get_be64(tb[NFTA_LIMIT_UNIT]));
+ priv->stamp = jiffies + priv->unit * HZ;
+ priv->tokens = priv->rate;
+ return 0;
+}
+
+static int nft_limit_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ const struct nft_limit *priv = nft_expr_priv(expr);
+
+ if (nla_put_be64(skb, NFTA_LIMIT_RATE, cpu_to_be64(priv->rate)))
+ goto nla_put_failure;
+ if (nla_put_be64(skb, NFTA_LIMIT_UNIT, cpu_to_be64(priv->unit)))
+ goto nla_put_failure;
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+static struct nft_expr_type nft_limit_type;
+static const struct nft_expr_ops nft_limit_ops = {
+ .type = &nft_limit_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_limit)),
+ .eval = nft_limit_eval,
+ .init = nft_limit_init,
+ .dump = nft_limit_dump,
+};
+
+static struct nft_expr_type nft_limit_type __read_mostly = {
+ .name = "limit",
+ .ops = &nft_limit_ops,
+ .policy = nft_limit_policy,
+ .maxattr = NFTA_LIMIT_MAX,
+ .owner = THIS_MODULE,
+};
+
+static int __init nft_limit_module_init(void)
+{
+ return nft_register_expr(&nft_limit_type);
+}
+
+static void __exit nft_limit_module_exit(void)
+{
+ nft_unregister_expr(&nft_limit_type);
+}
+
+module_init(nft_limit_module_init);
+module_exit(nft_limit_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_EXPR("limit");
diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c
new file mode 100644
index 0000000..57cad07
--- /dev/null
+++ b/net/netfilter/nft_log.c
@@ -0,0 +1,146 @@
+/*
+ * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_log.h>
+#include <linux/netdevice.h>
+
+static const char *nft_log_null_prefix = "";
+
+struct nft_log {
+ struct nf_loginfo loginfo;
+ char *prefix;
+ int family;
+};
+
+static void nft_log_eval(const struct nft_expr *expr,
+ struct nft_data data[NFT_REG_MAX + 1],
+ const struct nft_pktinfo *pkt)
+{
+ const struct nft_log *priv = nft_expr_priv(expr);
+ struct net *net = dev_net(pkt->in ? pkt->in : pkt->out);
+
+ nf_log_packet(net, priv->family, pkt->hooknum, pkt->skb, pkt->in,
+ pkt->out, &priv->loginfo, "%s", priv->prefix);
+}
+
+static const struct nla_policy nft_log_policy[NFTA_LOG_MAX + 1] = {
+ [NFTA_LOG_GROUP] = { .type = NLA_U16 },
+ [NFTA_LOG_PREFIX] = { .type = NLA_STRING },
+ [NFTA_LOG_SNAPLEN] = { .type = NLA_U32 },
+ [NFTA_LOG_QTHRESHOLD] = { .type = NLA_U16 },
+};
+
+static int nft_log_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_log *priv = nft_expr_priv(expr);
+ struct nf_loginfo *li = &priv->loginfo;
+ const struct nlattr *nla;
+
+ priv->family = ctx->afi->family;
+
+ nla = tb[NFTA_LOG_PREFIX];
+ if (nla != NULL) {
+ priv->prefix = kmalloc(nla_len(nla) + 1, GFP_KERNEL);
+ if (priv->prefix == NULL)
+ return -ENOMEM;
+ nla_strlcpy(priv->prefix, nla, nla_len(nla) + 1);
+ } else
+ priv->prefix = (char *)nft_log_null_prefix;
+
+ li->type = NF_LOG_TYPE_ULOG;
+ if (tb[NFTA_LOG_GROUP] != NULL)
+ li->u.ulog.group = ntohs(nla_get_be16(tb[NFTA_LOG_GROUP]));
+
+ if (tb[NFTA_LOG_SNAPLEN] != NULL)
+ li->u.ulog.copy_len = ntohl(nla_get_be32(tb[NFTA_LOG_SNAPLEN]));
+ if (tb[NFTA_LOG_QTHRESHOLD] != NULL) {
+ li->u.ulog.qthreshold =
+ ntohs(nla_get_be16(tb[NFTA_LOG_QTHRESHOLD]));
+ }
+
+ return 0;
+}
+
+static void nft_log_destroy(const struct nft_expr *expr)
+{
+ struct nft_log *priv = nft_expr_priv(expr);
+
+ if (priv->prefix != nft_log_null_prefix)
+ kfree(priv->prefix);
+}
+
+static int nft_log_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ const struct nft_log *priv = nft_expr_priv(expr);
+ const struct nf_loginfo *li = &priv->loginfo;
+
+ if (priv->prefix != nft_log_null_prefix)
+ if (nla_put_string(skb, NFTA_LOG_PREFIX, priv->prefix))
+ goto nla_put_failure;
+ if (li->u.ulog.group)
+ if (nla_put_be16(skb, NFTA_LOG_GROUP, htons(li->u.ulog.group)))
+ goto nla_put_failure;
+ if (li->u.ulog.copy_len)
+ if (nla_put_be32(skb, NFTA_LOG_SNAPLEN,
+ htonl(li->u.ulog.copy_len)))
+ goto nla_put_failure;
+ if (li->u.ulog.qthreshold)
+ if (nla_put_be16(skb, NFTA_LOG_QTHRESHOLD,
+ htons(li->u.ulog.qthreshold)))
+ goto nla_put_failure;
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+static struct nft_expr_type nft_log_type;
+static const struct nft_expr_ops nft_log_ops = {
+ .type = &nft_log_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_log)),
+ .eval = nft_log_eval,
+ .init = nft_log_init,
+ .destroy = nft_log_destroy,
+ .dump = nft_log_dump,
+};
+
+static struct nft_expr_type nft_log_type __read_mostly = {
+ .name = "log",
+ .ops = &nft_log_ops,
+ .policy = nft_log_policy,
+ .maxattr = NFTA_LOG_MAX,
+ .owner = THIS_MODULE,
+};
+
+static int __init nft_log_module_init(void)
+{
+ return nft_register_expr(&nft_log_type);
+}
+
+static void __exit nft_log_module_exit(void)
+{
+ nft_unregister_expr(&nft_log_type);
+}
+
+module_init(nft_log_module_init);
+module_exit(nft_log_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_EXPR("log");
diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c
new file mode 100644
index 0000000..8a6116b
--- /dev/null
+++ b/net/netfilter/nft_lookup.c
@@ -0,0 +1,141 @@
+/*
+ * Copyright (c) 2009 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/rbtree.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+
+struct nft_lookup {
+ struct nft_set *set;
+ enum nft_registers sreg:8;
+ enum nft_registers dreg:8;
+ struct nft_set_binding binding;
+};
+
+static void nft_lookup_eval(const struct nft_expr *expr,
+ struct nft_data data[NFT_REG_MAX + 1],
+ const struct nft_pktinfo *pkt)
+{
+ const struct nft_lookup *priv = nft_expr_priv(expr);
+ const struct nft_set *set = priv->set;
+
+ if (set->ops->lookup(set, &data[priv->sreg], &data[priv->dreg]))
+ return;
+ data[NFT_REG_VERDICT].verdict = NFT_BREAK;
+}
+
+static const struct nla_policy nft_lookup_policy[NFTA_LOOKUP_MAX + 1] = {
+ [NFTA_LOOKUP_SET] = { .type = NLA_STRING },
+ [NFTA_LOOKUP_SREG] = { .type = NLA_U32 },
+ [NFTA_LOOKUP_DREG] = { .type = NLA_U32 },
+};
+
+static int nft_lookup_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_lookup *priv = nft_expr_priv(expr);
+ struct nft_set *set;
+ int err;
+
+ if (tb[NFTA_LOOKUP_SET] == NULL ||
+ tb[NFTA_LOOKUP_SREG] == NULL)
+ return -EINVAL;
+
+ set = nf_tables_set_lookup(ctx->table, tb[NFTA_LOOKUP_SET]);
+ if (IS_ERR(set))
+ return PTR_ERR(set);
+
+ priv->sreg = ntohl(nla_get_be32(tb[NFTA_LOOKUP_SREG]));
+ err = nft_validate_input_register(priv->sreg);
+ if (err < 0)
+ return err;
+
+ if (tb[NFTA_LOOKUP_DREG] != NULL) {
+ if (!(set->flags & NFT_SET_MAP))
+ return -EINVAL;
+
+ priv->dreg = ntohl(nla_get_be32(tb[NFTA_LOOKUP_DREG]));
+ err = nft_validate_output_register(priv->dreg);
+ if (err < 0)
+ return err;
+
+ if (priv->dreg == NFT_REG_VERDICT) {
+ if (set->dtype != NFT_DATA_VERDICT)
+ return -EINVAL;
+ } else if (set->dtype == NFT_DATA_VERDICT)
+ return -EINVAL;
+ } else if (set->flags & NFT_SET_MAP)
+ return -EINVAL;
+
+ err = nf_tables_bind_set(ctx, set, &priv->binding);
+ if (err < 0)
+ return err;
+
+ priv->set = set;
+ return 0;
+}
+
+static void nft_lookup_destroy(const struct nft_expr *expr)
+{
+ struct nft_lookup *priv = nft_expr_priv(expr);
+
+ nf_tables_unbind_set(NULL, priv->set, &priv->binding);
+}
+
+static int nft_lookup_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ const struct nft_lookup *priv = nft_expr_priv(expr);
+
+ if (nla_put_string(skb, NFTA_LOOKUP_SET, priv->set->name))
+ goto nla_put_failure;
+ if (nla_put_be32(skb, NFTA_LOOKUP_SREG, htonl(priv->sreg)))
+ goto nla_put_failure;
+ if (priv->set->flags & NFT_SET_MAP)
+ if (nla_put_be32(skb, NFTA_LOOKUP_DREG, htonl(priv->dreg)))
+ goto nla_put_failure;
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+static struct nft_expr_type nft_lookup_type;
+static const struct nft_expr_ops nft_lookup_ops = {
+ .type = &nft_lookup_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_lookup)),
+ .eval = nft_lookup_eval,
+ .init = nft_lookup_init,
+ .destroy = nft_lookup_destroy,
+ .dump = nft_lookup_dump,
+};
+
+static struct nft_expr_type nft_lookup_type __read_mostly = {
+ .name = "lookup",
+ .ops = &nft_lookup_ops,
+ .policy = nft_lookup_policy,
+ .maxattr = NFTA_LOOKUP_MAX,
+ .owner = THIS_MODULE,
+};
+
+int __init nft_lookup_module_init(void)
+{
+ return nft_register_expr(&nft_lookup_type);
+}
+
+void nft_lookup_module_exit(void)
+{
+ nft_unregister_expr(&nft_lookup_type);
+}
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
new file mode 100644
index 0000000..8c28220
--- /dev/null
+++ b/net/netfilter/nft_meta.c
@@ -0,0 +1,228 @@
+/*
+ * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/dst.h>
+#include <net/sock.h>
+#include <net/tcp_states.h> /* for TCP_TIME_WAIT */
+#include <net/netfilter/nf_tables.h>
+
+struct nft_meta {
+ enum nft_meta_keys key:8;
+ enum nft_registers dreg:8;
+};
+
+static void nft_meta_eval(const struct nft_expr *expr,
+ struct nft_data data[NFT_REG_MAX + 1],
+ const struct nft_pktinfo *pkt)
+{
+ const struct nft_meta *priv = nft_expr_priv(expr);
+ const struct sk_buff *skb = pkt->skb;
+ const struct net_device *in = pkt->in, *out = pkt->out;
+ struct nft_data *dest = &data[priv->dreg];
+
+ switch (priv->key) {
+ case NFT_META_LEN:
+ dest->data[0] = skb->len;
+ break;
+ case NFT_META_PROTOCOL:
+ *(__be16 *)dest->data = skb->protocol;
+ break;
+ case NFT_META_PRIORITY:
+ dest->data[0] = skb->priority;
+ break;
+ case NFT_META_MARK:
+ dest->data[0] = skb->mark;
+ break;
+ case NFT_META_IIF:
+ if (in == NULL)
+ goto err;
+ dest->data[0] = in->ifindex;
+ break;
+ case NFT_META_OIF:
+ if (out == NULL)
+ goto err;
+ dest->data[0] = out->ifindex;
+ break;
+ case NFT_META_IIFNAME:
+ if (in == NULL)
+ goto err;
+ strncpy((char *)dest->data, in->name, sizeof(dest->data));
+ break;
+ case NFT_META_OIFNAME:
+ if (out == NULL)
+ goto err;
+ strncpy((char *)dest->data, out->name, sizeof(dest->data));
+ break;
+ case NFT_META_IIFTYPE:
+ if (in == NULL)
+ goto err;
+ *(u16 *)dest->data = in->type;
+ break;
+ case NFT_META_OIFTYPE:
+ if (out == NULL)
+ goto err;
+ *(u16 *)dest->data = out->type;
+ break;
+ case NFT_META_SKUID:
+ if (skb->sk == NULL || skb->sk->sk_state == TCP_TIME_WAIT)
+ goto err;
+
+ read_lock_bh(&skb->sk->sk_callback_lock);
+ if (skb->sk->sk_socket == NULL ||
+ skb->sk->sk_socket->file == NULL) {
+ read_unlock_bh(&skb->sk->sk_callback_lock);
+ goto err;
+ }
+
+ dest->data[0] =
+ from_kuid_munged(&init_user_ns,
+ skb->sk->sk_socket->file->f_cred->fsuid);
+ read_unlock_bh(&skb->sk->sk_callback_lock);
+ break;
+ case NFT_META_SKGID:
+ if (skb->sk == NULL || skb->sk->sk_state == TCP_TIME_WAIT)
+ goto err;
+
+ read_lock_bh(&skb->sk->sk_callback_lock);
+ if (skb->sk->sk_socket == NULL ||
+ skb->sk->sk_socket->file == NULL) {
+ read_unlock_bh(&skb->sk->sk_callback_lock);
+ goto err;
+ }
+ dest->data[0] =
+ from_kgid_munged(&init_user_ns,
+ skb->sk->sk_socket->file->f_cred->fsgid);
+ read_unlock_bh(&skb->sk->sk_callback_lock);
+ break;
+#ifdef CONFIG_NET_CLS_ROUTE
+ case NFT_META_RTCLASSID: {
+ const struct dst_entry *dst = skb_dst(skb);
+
+ if (dst == NULL)
+ goto err;
+ dest->data[0] = dst->tclassid;
+ break;
+ }
+#endif
+#ifdef CONFIG_NETWORK_SECMARK
+ case NFT_META_SECMARK:
+ dest->data[0] = skb->secmark;
+ break;
+#endif
+ default:
+ WARN_ON(1);
+ goto err;
+ }
+ return;
+
+err:
+ data[NFT_REG_VERDICT].verdict = NFT_BREAK;
+}
+
+static const struct nla_policy nft_meta_policy[NFTA_META_MAX + 1] = {
+ [NFTA_META_DREG] = { .type = NLA_U32 },
+ [NFTA_META_KEY] = { .type = NLA_U32 },
+};
+
+static int nft_meta_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_meta *priv = nft_expr_priv(expr);
+ int err;
+
+ if (tb[NFTA_META_DREG] == NULL ||
+ tb[NFTA_META_KEY] == NULL)
+ return -EINVAL;
+
+ priv->key = ntohl(nla_get_be32(tb[NFTA_META_KEY]));
+ switch (priv->key) {
+ case NFT_META_LEN:
+ case NFT_META_PROTOCOL:
+ case NFT_META_PRIORITY:
+ case NFT_META_MARK:
+ case NFT_META_IIF:
+ case NFT_META_OIF:
+ case NFT_META_IIFNAME:
+ case NFT_META_OIFNAME:
+ case NFT_META_IIFTYPE:
+ case NFT_META_OIFTYPE:
+ case NFT_META_SKUID:
+ case NFT_META_SKGID:
+#ifdef CONFIG_NET_CLS_ROUTE
+ case NFT_META_RTCLASSID:
+#endif
+#ifdef CONFIG_NETWORK_SECMARK
+ case NFT_META_SECMARK:
+#endif
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ priv->dreg = ntohl(nla_get_be32(tb[NFTA_META_DREG]));
+ err = nft_validate_output_register(priv->dreg);
+ if (err < 0)
+ return err;
+ return nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE);
+}
+
+static int nft_meta_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ const struct nft_meta *priv = nft_expr_priv(expr);
+
+ if (nla_put_be32(skb, NFTA_META_DREG, htonl(priv->dreg)))
+ goto nla_put_failure;
+ if (nla_put_be32(skb, NFTA_META_KEY, htonl(priv->key)))
+ goto nla_put_failure;
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+static struct nft_expr_type nft_meta_type;
+static const struct nft_expr_ops nft_meta_ops = {
+ .type = &nft_meta_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_meta)),
+ .eval = nft_meta_eval,
+ .init = nft_meta_init,
+ .dump = nft_meta_dump,
+};
+
+static struct nft_expr_type nft_meta_type __read_mostly = {
+ .name = "meta",
+ .ops = &nft_meta_ops,
+ .policy = nft_meta_policy,
+ .maxattr = NFTA_META_MAX,
+ .owner = THIS_MODULE,
+};
+
+static int __init nft_meta_module_init(void)
+{
+ return nft_register_expr(&nft_meta_type);
+}
+
+static void __exit nft_meta_module_exit(void)
+{
+ nft_unregister_expr(&nft_meta_type);
+}
+
+module_init(nft_meta_module_init);
+module_exit(nft_meta_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_EXPR("meta");
diff --git a/net/netfilter/nft_meta_target.c b/net/netfilter/nft_meta_target.c
new file mode 100644
index 0000000..71177df
--- /dev/null
+++ b/net/netfilter/nft_meta_target.c
@@ -0,0 +1,117 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/rbtree.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+
+struct nft_meta {
+ enum nft_meta_keys key;
+};
+
+static void nft_meta_eval(const struct nft_expr *expr,
+ struct nft_data *nfres,
+ struct nft_data *data,
+ const struct nft_pktinfo *pkt)
+{
+ const struct nft_meta *meta = nft_expr_priv(expr);
+ struct sk_buff *skb = pkt->skb;
+ u32 val = data->data[0];
+
+ switch (meta->key) {
+ case NFT_META_MARK:
+ skb->mark = val;
+ break;
+ case NFT_META_PRIORITY:
+ skb->priority = val;
+ break;
+ case NFT_META_NFTRACE:
+ skb->nf_trace = val;
+ break;
+#ifdef CONFIG_NETWORK_SECMARK
+ case NFT_META_SECMARK:
+ skb->secmark = val;
+ break;
+#endif
+ default:
+ WARN_ON(1);
+ }
+}
+
+static const struct nla_policy nft_meta_policy[NFTA_META_MAX + 1] = {
+ [NFTA_META_KEY] = { .type = NLA_U32 },
+};
+
+static int nft_meta_init(const struct nft_expr *expr, struct nlattr *tb[])
+{
+ struct nft_meta *meta = nft_expr_priv(expr);
+
+ if (tb[NFTA_META_KEY] == NULL)
+ return -EINVAL;
+
+ meta->key = ntohl(nla_get_be32(tb[NFTA_META_KEY]));
+ switch (meta->key) {
+ case NFT_META_MARK:
+ case NFT_META_PRIORITY:
+ case NFT_META_NFTRACE:
+#ifdef CONFIG_NETWORK_SECMARK
+ case NFT_META_SECMARK:
+#endif
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int nft_meta_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ struct nft_meta *meta = nft_expr_priv(expr);
+
+ NLA_PUT_BE32(skb, NFTA_META_KEY, htonl(meta->key));
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+static struct nft_expr_ops meta_target __read_mostly = {
+ .name = "meta",
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_meta)),
+ .owner = THIS_MODULE,
+ .eval = nft_meta_eval,
+ .init = nft_meta_init,
+ .dump = nft_meta_dump,
+ .policy = nft_meta_policy,
+ .maxattr = NFTA_META_MAX,
+};
+
+static int __init nft_meta_target_init(void)
+{
+ return nft_register_expr(&meta_target);
+}
+
+static void __exit nft_meta_target_exit(void)
+{
+ nft_unregister_expr(&meta_target);
+}
+
+module_init(nft_meta_target_init);
+module_exit(nft_meta_target_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_EXPR("meta");
diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c
new file mode 100644
index 0000000..b0b87b2
--- /dev/null
+++ b/net/netfilter/nft_nat.c
@@ -0,0 +1,220 @@
+/*
+ * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
+ * Copyright (c) 2012 Pablo Neira Ayuso <pablo@netfilter.org>
+ * Copyright (c) 2012 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <linux/string.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_core.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_nat_l3proto.h>
+#include <net/ip.h>
+
+struct nft_nat {
+ enum nft_registers sreg_addr_min:8;
+ enum nft_registers sreg_addr_max:8;
+ enum nft_registers sreg_proto_min:8;
+ enum nft_registers sreg_proto_max:8;
+ int family;
+ enum nf_nat_manip_type type;
+};
+
+static void nft_nat_eval(const struct nft_expr *expr,
+ struct nft_data data[NFT_REG_MAX + 1],
+ const struct nft_pktinfo *pkt)
+{
+ const struct nft_nat *priv = nft_expr_priv(expr);
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *ct = nf_ct_get(pkt->skb, &ctinfo);
+ struct nf_nat_range range;
+
+ memset(&range, 0, sizeof(range));
+ if (priv->sreg_addr_min) {
+ if (priv->family == AF_INET) {
+ range.min_addr.ip = data[priv->sreg_addr_min].data[0];
+ range.max_addr.ip = data[priv->sreg_addr_max].data[0];
+
+ } else {
+ memcpy(range.min_addr.ip6,
+ data[priv->sreg_addr_min].data,
+ sizeof(struct nft_data));
+ memcpy(range.max_addr.ip6,
+ data[priv->sreg_addr_max].data,
+ sizeof(struct nft_data));
+ }
+ range.flags |= NF_NAT_RANGE_MAP_IPS;
+ }
+
+ if (priv->sreg_proto_min) {
+ range.min_proto.all = data[priv->sreg_proto_min].data[0];
+ range.max_proto.all = data[priv->sreg_proto_max].data[0];
+ range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
+ }
+
+ data[NFT_REG_VERDICT].verdict =
+ nf_nat_setup_info(ct, &range, priv->type);
+}
+
+static const struct nla_policy nft_nat_policy[NFTA_NAT_MAX + 1] = {
+ [NFTA_NAT_TYPE] = { .type = NLA_U32 },
+ [NFTA_NAT_FAMILY] = { .type = NLA_U32 },
+ [NFTA_NAT_REG_ADDR_MIN] = { .type = NLA_U32 },
+ [NFTA_NAT_REG_ADDR_MAX] = { .type = NLA_U32 },
+ [NFTA_NAT_REG_PROTO_MIN] = { .type = NLA_U32 },
+ [NFTA_NAT_REG_PROTO_MAX] = { .type = NLA_U32 },
+};
+
+static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_nat *priv = nft_expr_priv(expr);
+ int err;
+
+ if (tb[NFTA_NAT_TYPE] == NULL)
+ return -EINVAL;
+
+ switch (ntohl(nla_get_be32(tb[NFTA_NAT_TYPE]))) {
+ case NFT_NAT_SNAT:
+ priv->type = NF_NAT_MANIP_SRC;
+ break;
+ case NFT_NAT_DNAT:
+ priv->type = NF_NAT_MANIP_DST;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (tb[NFTA_NAT_FAMILY] == NULL)
+ return -EINVAL;
+
+ priv->family = ntohl(nla_get_be32(tb[NFTA_NAT_FAMILY]));
+ if (priv->family != AF_INET && priv->family != AF_INET6)
+ return -EINVAL;
+
+ if (tb[NFTA_NAT_REG_ADDR_MIN]) {
+ priv->sreg_addr_min = ntohl(nla_get_be32(
+ tb[NFTA_NAT_REG_ADDR_MIN]));
+ err = nft_validate_input_register(priv->sreg_addr_min);
+ if (err < 0)
+ return err;
+ }
+
+ if (tb[NFTA_NAT_REG_ADDR_MAX]) {
+ priv->sreg_addr_max = ntohl(nla_get_be32(
+ tb[NFTA_NAT_REG_ADDR_MAX]));
+ err = nft_validate_input_register(priv->sreg_addr_max);
+ if (err < 0)
+ return err;
+ } else
+ priv->sreg_addr_max = priv->sreg_addr_min;
+
+ if (tb[NFTA_NAT_REG_PROTO_MIN]) {
+ priv->sreg_proto_min = ntohl(nla_get_be32(
+ tb[NFTA_NAT_REG_PROTO_MIN]));
+ err = nft_validate_input_register(priv->sreg_proto_min);
+ if (err < 0)
+ return err;
+ }
+
+ if (tb[NFTA_NAT_REG_PROTO_MAX]) {
+ priv->sreg_proto_max = ntohl(nla_get_be32(
+ tb[NFTA_NAT_REG_PROTO_MAX]));
+ err = nft_validate_input_register(priv->sreg_proto_max);
+ if (err < 0)
+ return err;
+ } else
+ priv->sreg_proto_max = priv->sreg_proto_min;
+
+ return 0;
+}
+
+static int nft_nat_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ const struct nft_nat *priv = nft_expr_priv(expr);
+
+ switch (priv->type) {
+ case NF_NAT_MANIP_SRC:
+ if (nla_put_be32(skb, NFTA_NAT_TYPE, htonl(NFT_NAT_SNAT)))
+ goto nla_put_failure;
+ break;
+ case NF_NAT_MANIP_DST:
+ if (nla_put_be32(skb, NFTA_NAT_TYPE, htonl(NFT_NAT_DNAT)))
+ goto nla_put_failure;
+ break;
+ }
+
+ if (nla_put_be32(skb, NFTA_NAT_FAMILY, htonl(priv->family)))
+ goto nla_put_failure;
+ if (nla_put_be32(skb,
+ NFTA_NAT_REG_ADDR_MIN, htonl(priv->sreg_addr_min)))
+ goto nla_put_failure;
+ if (nla_put_be32(skb,
+ NFTA_NAT_REG_ADDR_MAX, htonl(priv->sreg_addr_max)))
+ goto nla_put_failure;
+ if (nla_put_be32(skb,
+ NFTA_NAT_REG_PROTO_MIN, htonl(priv->sreg_proto_min)))
+ goto nla_put_failure;
+ if (nla_put_be32(skb,
+ NFTA_NAT_REG_PROTO_MAX, htonl(priv->sreg_proto_max)))
+ goto nla_put_failure;
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+static struct nft_expr_type nft_nat_type;
+static const struct nft_expr_ops nft_nat_ops = {
+ .type = &nft_nat_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_nat)),
+ .eval = nft_nat_eval,
+ .init = nft_nat_init,
+ .dump = nft_nat_dump,
+};
+
+static struct nft_expr_type nft_nat_type __read_mostly = {
+ .name = "nat",
+ .ops = &nft_nat_ops,
+ .policy = nft_nat_policy,
+ .maxattr = NFTA_NAT_MAX,
+ .owner = THIS_MODULE,
+};
+
+static int __init nft_nat_module_init(void)
+{
+ int err;
+
+ err = nft_register_expr(&nft_nat_type);
+ if (err < 0)
+ return err;
+
+ return 0;
+}
+
+static void __exit nft_nat_module_exit(void)
+{
+ nft_unregister_expr(&nft_nat_type);
+}
+
+module_init(nft_nat_module_init);
+module_exit(nft_nat_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Tomasz Bursztyka <tomasz.bursztyka@linux.intel.com>");
+MODULE_ALIAS_NFT_EXPR("nat");
diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c
new file mode 100644
index 0000000..a2aeb31
--- /dev/null
+++ b/net/netfilter/nft_payload.c
@@ -0,0 +1,160 @@
+/*
+ * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_tables.h>
+
+static void nft_payload_eval(const struct nft_expr *expr,
+ struct nft_data data[NFT_REG_MAX + 1],
+ const struct nft_pktinfo *pkt)
+{
+ const struct nft_payload *priv = nft_expr_priv(expr);
+ const struct sk_buff *skb = pkt->skb;
+ struct nft_data *dest = &data[priv->dreg];
+ int offset;
+
+ switch (priv->base) {
+ case NFT_PAYLOAD_LL_HEADER:
+ if (!skb_mac_header_was_set(skb))
+ goto err;
+ offset = skb_mac_header(skb) - skb->data;
+ break;
+ case NFT_PAYLOAD_NETWORK_HEADER:
+ offset = skb_network_offset(skb);
+ break;
+ case NFT_PAYLOAD_TRANSPORT_HEADER:
+ offset = pkt->xt.thoff;
+ break;
+ default:
+ BUG();
+ }
+ offset += priv->offset;
+
+ if (skb_copy_bits(skb, offset, dest->data, priv->len) < 0)
+ goto err;
+ return;
+err:
+ data[NFT_REG_VERDICT].verdict = NFT_BREAK;
+}
+
+static const struct nla_policy nft_payload_policy[NFTA_PAYLOAD_MAX + 1] = {
+ [NFTA_PAYLOAD_DREG] = { .type = NLA_U32 },
+ [NFTA_PAYLOAD_BASE] = { .type = NLA_U32 },
+ [NFTA_PAYLOAD_OFFSET] = { .type = NLA_U32 },
+ [NFTA_PAYLOAD_LEN] = { .type = NLA_U32 },
+};
+
+static int nft_payload_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_payload *priv = nft_expr_priv(expr);
+ int err;
+
+ priv->base = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_BASE]));
+ priv->offset = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_OFFSET]));
+ priv->len = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN]));
+
+ priv->dreg = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_DREG]));
+ err = nft_validate_output_register(priv->dreg);
+ if (err < 0)
+ return err;
+ return nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE);
+}
+
+static int nft_payload_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ const struct nft_payload *priv = nft_expr_priv(expr);
+
+ if (nla_put_be32(skb, NFTA_PAYLOAD_DREG, htonl(priv->dreg)) ||
+ nla_put_be32(skb, NFTA_PAYLOAD_BASE, htonl(priv->base)) ||
+ nla_put_be32(skb, NFTA_PAYLOAD_OFFSET, htonl(priv->offset)) ||
+ nla_put_be32(skb, NFTA_PAYLOAD_LEN, htonl(priv->len)))
+ goto nla_put_failure;
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+static struct nft_expr_type nft_payload_type;
+static const struct nft_expr_ops nft_payload_ops = {
+ .type = &nft_payload_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_payload)),
+ .eval = nft_payload_eval,
+ .init = nft_payload_init,
+ .dump = nft_payload_dump,
+};
+
+const struct nft_expr_ops nft_payload_fast_ops = {
+ .type = &nft_payload_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_payload)),
+ .eval = nft_payload_eval,
+ .init = nft_payload_init,
+ .dump = nft_payload_dump,
+};
+
+static const struct nft_expr_ops *
+nft_payload_select_ops(const struct nft_ctx *ctx,
+ const struct nlattr * const tb[])
+{
+ enum nft_payload_bases base;
+ unsigned int offset, len;
+
+ if (tb[NFTA_PAYLOAD_DREG] == NULL ||
+ tb[NFTA_PAYLOAD_BASE] == NULL ||
+ tb[NFTA_PAYLOAD_OFFSET] == NULL ||
+ tb[NFTA_PAYLOAD_LEN] == NULL)
+ return ERR_PTR(-EINVAL);
+
+ base = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_BASE]));
+ switch (base) {
+ case NFT_PAYLOAD_LL_HEADER:
+ case NFT_PAYLOAD_NETWORK_HEADER:
+ case NFT_PAYLOAD_TRANSPORT_HEADER:
+ break;
+ default:
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+
+ offset = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_OFFSET]));
+ len = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN]));
+ if (len == 0 || len > FIELD_SIZEOF(struct nft_data, data))
+ return ERR_PTR(-EINVAL);
+
+ if (len <= 4 && IS_ALIGNED(offset, len) && base != NFT_PAYLOAD_LL_HEADER)
+ return &nft_payload_fast_ops;
+ else
+ return &nft_payload_ops;
+}
+
+static struct nft_expr_type nft_payload_type __read_mostly = {
+ .name = "payload",
+ .select_ops = nft_payload_select_ops,
+ .policy = nft_payload_policy,
+ .maxattr = NFTA_PAYLOAD_MAX,
+ .owner = THIS_MODULE,
+};
+
+int __init nft_payload_module_init(void)
+{
+ return nft_register_expr(&nft_payload_type);
+}
+
+void nft_payload_module_exit(void)
+{
+ nft_unregister_expr(&nft_payload_type);
+}
diff --git a/net/netfilter/nft_rbtree.c b/net/netfilter/nft_rbtree.c
new file mode 100644
index 0000000..ca0c1b2
--- /dev/null
+++ b/net/netfilter/nft_rbtree.c
@@ -0,0 +1,247 @@
+/*
+ * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/list.h>
+#include <linux/rbtree.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+
+struct nft_rbtree {
+ struct rb_root root;
+};
+
+struct nft_rbtree_elem {
+ struct rb_node node;
+ u16 flags;
+ struct nft_data key;
+ struct nft_data data[];
+};
+
+static bool nft_rbtree_lookup(const struct nft_set *set,
+ const struct nft_data *key,
+ struct nft_data *data)
+{
+ const struct nft_rbtree *priv = nft_set_priv(set);
+ const struct nft_rbtree_elem *rbe, *interval = NULL;
+ const struct rb_node *parent = priv->root.rb_node;
+ int d;
+
+ while (parent != NULL) {
+ rbe = rb_entry(parent, struct nft_rbtree_elem, node);
+
+ d = nft_data_cmp(&rbe->key, key, set->klen);
+ if (d < 0) {
+ parent = parent->rb_left;
+ interval = rbe;
+ } else if (d > 0)
+ parent = parent->rb_right;
+ else {
+found:
+ if (rbe->flags & NFT_SET_ELEM_INTERVAL_END)
+ goto out;
+ if (set->flags & NFT_SET_MAP)
+ nft_data_copy(data, rbe->data);
+ return true;
+ }
+ }
+
+ if (set->flags & NFT_SET_INTERVAL && interval != NULL) {
+ rbe = interval;
+ goto found;
+ }
+out:
+ return false;
+}
+
+static void nft_rbtree_elem_destroy(const struct nft_set *set,
+ struct nft_rbtree_elem *rbe)
+{
+ nft_data_uninit(&rbe->key, NFT_DATA_VALUE);
+ if (set->flags & NFT_SET_MAP)
+ nft_data_uninit(rbe->data, set->dtype);
+ kfree(rbe);
+}
+
+static int __nft_rbtree_insert(const struct nft_set *set,
+ struct nft_rbtree_elem *new)
+{
+ struct nft_rbtree *priv = nft_set_priv(set);
+ struct nft_rbtree_elem *rbe;
+ struct rb_node *parent, **p;
+ int d;
+
+ parent = NULL;
+ p = &priv->root.rb_node;
+ while (*p != NULL) {
+ parent = *p;
+ rbe = rb_entry(parent, struct nft_rbtree_elem, node);
+ d = nft_data_cmp(&rbe->key, &new->key, set->klen);
+ if (d < 0)
+ p = &parent->rb_left;
+ else if (d > 0)
+ p = &parent->rb_right;
+ else
+ return -EEXIST;
+ }
+ rb_link_node(&new->node, parent, p);
+ rb_insert_color(&new->node, &priv->root);
+ return 0;
+}
+
+static int nft_rbtree_insert(const struct nft_set *set,
+ const struct nft_set_elem *elem)
+{
+ struct nft_rbtree_elem *rbe;
+ unsigned int size;
+ int err;
+
+ size = sizeof(*rbe);
+ if (set->flags & NFT_SET_MAP)
+ size += sizeof(rbe->data[0]);
+
+ rbe = kzalloc(size, GFP_KERNEL);
+ if (rbe == NULL)
+ return -ENOMEM;
+
+ rbe->flags = elem->flags;
+ nft_data_copy(&rbe->key, &elem->key);
+ if (set->flags & NFT_SET_MAP)
+ nft_data_copy(rbe->data, &elem->data);
+
+ err = __nft_rbtree_insert(set, rbe);
+ if (err < 0)
+ kfree(rbe);
+ return err;
+}
+
+static void nft_rbtree_remove(const struct nft_set *set,
+ const struct nft_set_elem *elem)
+{
+ struct nft_rbtree *priv = nft_set_priv(set);
+ struct nft_rbtree_elem *rbe = elem->cookie;
+
+ rb_erase(&rbe->node, &priv->root);
+ kfree(rbe);
+}
+
+static int nft_rbtree_get(const struct nft_set *set, struct nft_set_elem *elem)
+{
+ const struct nft_rbtree *priv = nft_set_priv(set);
+ const struct rb_node *parent = priv->root.rb_node;
+ struct nft_rbtree_elem *rbe;
+ int d;
+
+ while (parent != NULL) {
+ rbe = rb_entry(parent, struct nft_rbtree_elem, node);
+
+ d = nft_data_cmp(&rbe->key, &elem->key, set->klen);
+ if (d < 0)
+ parent = parent->rb_left;
+ else if (d > 0)
+ parent = parent->rb_right;
+ else {
+ elem->cookie = rbe;
+ if (set->flags & NFT_SET_MAP)
+ nft_data_copy(&elem->data, rbe->data);
+ elem->flags = rbe->flags;
+ return 0;
+ }
+ }
+ return -ENOENT;
+}
+
+static void nft_rbtree_walk(const struct nft_ctx *ctx,
+ const struct nft_set *set,
+ struct nft_set_iter *iter)
+{
+ const struct nft_rbtree *priv = nft_set_priv(set);
+ const struct nft_rbtree_elem *rbe;
+ struct nft_set_elem elem;
+ struct rb_node *node;
+
+ for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) {
+ if (iter->count < iter->skip)
+ goto cont;
+
+ rbe = rb_entry(node, struct nft_rbtree_elem, node);
+ nft_data_copy(&elem.key, &rbe->key);
+ if (set->flags & NFT_SET_MAP)
+ nft_data_copy(&elem.data, rbe->data);
+ elem.flags = rbe->flags;
+
+ iter->err = iter->fn(ctx, set, iter, &elem);
+ if (iter->err < 0)
+ return;
+cont:
+ iter->count++;
+ }
+}
+
+static unsigned int nft_rbtree_privsize(const struct nlattr * const nla[])
+{
+ return sizeof(struct nft_rbtree);
+}
+
+static int nft_rbtree_init(const struct nft_set *set,
+ const struct nlattr * const nla[])
+{
+ struct nft_rbtree *priv = nft_set_priv(set);
+
+ priv->root = RB_ROOT;
+ return 0;
+}
+
+static void nft_rbtree_destroy(const struct nft_set *set)
+{
+ struct nft_rbtree *priv = nft_set_priv(set);
+ struct nft_rbtree_elem *rbe;
+ struct rb_node *node;
+
+ while ((node = priv->root.rb_node) != NULL) {
+ rb_erase(node, &priv->root);
+ rbe = rb_entry(node, struct nft_rbtree_elem, node);
+ nft_rbtree_elem_destroy(set, rbe);
+ }
+}
+
+static struct nft_set_ops nft_rbtree_ops __read_mostly = {
+ .privsize = nft_rbtree_privsize,
+ .init = nft_rbtree_init,
+ .destroy = nft_rbtree_destroy,
+ .insert = nft_rbtree_insert,
+ .remove = nft_rbtree_remove,
+ .get = nft_rbtree_get,
+ .lookup = nft_rbtree_lookup,
+ .walk = nft_rbtree_walk,
+ .features = NFT_SET_INTERVAL | NFT_SET_MAP,
+ .owner = THIS_MODULE,
+};
+
+static int __init nft_rbtree_module_init(void)
+{
+ return nft_register_set(&nft_rbtree_ops);
+}
+
+static void __exit nft_rbtree_module_exit(void)
+{
+ nft_unregister_set(&nft_rbtree_ops);
+}
+
+module_init(nft_rbtree_module_init);
+module_exit(nft_rbtree_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_SET();
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 568c769..3f224d7 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -4668,7 +4668,7 @@
return NF_ACCEPT;
}
-static unsigned int selinux_ipv4_forward(unsigned int hooknum,
+static unsigned int selinux_ipv4_forward(const struct nf_hook_ops *ops,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
@@ -4678,7 +4678,7 @@
}
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-static unsigned int selinux_ipv6_forward(unsigned int hooknum,
+static unsigned int selinux_ipv6_forward(const struct nf_hook_ops *ops,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
@@ -4710,7 +4710,7 @@
return NF_ACCEPT;
}
-static unsigned int selinux_ipv4_output(unsigned int hooknum,
+static unsigned int selinux_ipv4_output(const struct nf_hook_ops *ops,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
@@ -4837,7 +4837,7 @@
return NF_ACCEPT;
}
-static unsigned int selinux_ipv4_postroute(unsigned int hooknum,
+static unsigned int selinux_ipv4_postroute(const struct nf_hook_ops *ops,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
@@ -4847,7 +4847,7 @@
}
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-static unsigned int selinux_ipv6_postroute(unsigned int hooknum,
+static unsigned int selinux_ipv6_postroute(const struct nf_hook_ops *ops,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,