netfilter: nf_tables: switch registers to 32 bit addressing

Switch the nf_tables registers from 128 bit addressing to 32 bit
addressing to support so called concatenations, where multiple values
can be concatenated over multiple registers for O(1) exact matches of
multiple dimensions using sets.

The old register values are mapped to areas of 128 bits for compatibility.
When dumping register numbers, values are expressed using the old values
if they refer to the beginning of a 128 bit area for compatibility.

To support concatenations, register loads of less than a full 32 bit
value need to be padded. This mainly affects the payload and exthdr
expressions, which both unconditionally zero the last word before
copying the data.

Userspace fully passes the testsuite using both old and new register
addressing.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index a25fd19..03faf76 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -3201,8 +3201,7 @@
 
 void *nft_set_elem_init(const struct nft_set *set,
 			const struct nft_set_ext_tmpl *tmpl,
-			const struct nft_data *key,
-			const struct nft_data *data,
+			const u32 *key, const u32 *data,
 			u64 timeout, gfp_t gfp)
 {
 	struct nft_set_ext *ext;
@@ -3357,7 +3356,7 @@
 	}
 
 	err = -ENOMEM;
-	elem.priv = nft_set_elem_init(set, &tmpl, &elem.key, &data,
+	elem.priv = nft_set_elem_init(set, &tmpl, elem.key.data, data.data,
 				      timeout, GFP_KERNEL);
 	if (elem.priv == NULL)
 		goto err3;
@@ -4122,14 +4121,47 @@
 	return 0;
 }
 
+/**
+ *	nft_parse_register - parse a register value from a netlink attribute
+ *
+ *	@attr: netlink attribute
+ *
+ *	Parse and translate a register value from a netlink attribute.
+ *	Registers used to be 128 bit wide, these register numbers will be
+ *	mapped to the corresponding 32 bit register numbers.
+ */
 unsigned int nft_parse_register(const struct nlattr *attr)
 {
-	return ntohl(nla_get_be32(attr));
+	unsigned int reg;
+
+	reg = ntohl(nla_get_be32(attr));
+	switch (reg) {
+	case NFT_REG_VERDICT...NFT_REG_4:
+		return reg * NFT_REG_SIZE / NFT_REG32_SIZE;
+	default:
+		return reg + NFT_REG_SIZE / NFT_REG32_SIZE - NFT_REG32_00;
+	}
 }
 EXPORT_SYMBOL_GPL(nft_parse_register);
 
+/**
+ *	nft_dump_register - dump a register value to a netlink attribute
+ *
+ *	@skb: socket buffer
+ *	@attr: attribute number
+ *	@reg: register number
+ *
+ *	Construct a netlink attribute containing the register number. For
+ *	compatibility reasons, register numbers being a multiple of 4 are
+ *	translated to the corresponding 128 bit register numbers.
+ */
 int nft_dump_register(struct sk_buff *skb, unsigned int attr, unsigned int reg)
 {
+	if (reg % (NFT_REG_SIZE / NFT_REG32_SIZE) == 0)
+		reg = reg / (NFT_REG_SIZE / NFT_REG32_SIZE);
+	else
+		reg = reg - NFT_REG_SIZE / NFT_REG32_SIZE + NFT_REG32_00;
+
 	return nla_put_be32(skb, attr, htonl(reg));
 }
 EXPORT_SYMBOL_GPL(nft_dump_register);
@@ -4145,14 +4177,13 @@
  */
 int nft_validate_register_load(enum nft_registers reg, unsigned int len)
 {
-	if (reg <= NFT_REG_VERDICT)
+	if (reg < NFT_REG_1 * NFT_REG_SIZE / NFT_REG32_SIZE)
 		return -EINVAL;
-	if (reg > NFT_REG_MAX)
-		return -ERANGE;
 	if (len == 0)
 		return -EINVAL;
-	if (len > FIELD_SIZEOF(struct nft_data, data))
+	if (reg * NFT_REG32_SIZE + len > FIELD_SIZEOF(struct nft_regs, data))
 		return -ERANGE;
+
 	return 0;
 }
 EXPORT_SYMBOL_GPL(nft_validate_register_load);
@@ -4200,13 +4231,12 @@
 
 		return 0;
 	default:
-		if (reg < NFT_REG_1)
+		if (reg < NFT_REG_1 * NFT_REG_SIZE / NFT_REG32_SIZE)
 			return -EINVAL;
-		if (reg > NFT_REG_MAX)
-			return -ERANGE;
 		if (len == 0)
 			return -EINVAL;
-		if (len > FIELD_SIZEOF(struct nft_data, data))
+		if (reg * NFT_REG32_SIZE + len >
+		    FIELD_SIZEOF(struct nft_regs, data))
 			return -ERANGE;
 
 		if (data != NULL && type != NFT_DATA_VALUE)
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index 5ef07d1..f153b07 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -70,7 +70,7 @@
 	const struct nft_cmp_fast_expr *priv = nft_expr_priv(expr);
 	u32 mask = nft_cmp_fast_mask(priv->len);
 
-	if ((regs->data[priv->sreg].data[0] & mask) == priv->data)
+	if ((regs->data[priv->sreg] & mask) == priv->data)
 		return;
 	regs->verdict.code = NFT_BREAK;
 }
@@ -81,7 +81,7 @@
 {
 	const struct nft_payload *priv = nft_expr_priv(expr);
 	const struct sk_buff *skb = pkt->skb;
-	u32 *dest = &regs->data[priv->dreg].data[0];
+	u32 *dest = &regs->data[priv->dreg];
 	unsigned char *ptr;
 
 	if (priv->base == NFT_PAYLOAD_NETWORK_HEADER)
@@ -94,6 +94,7 @@
 	if (unlikely(ptr + priv->len >= skb_tail_pointer(skb)))
 		return false;
 
+	*dest = 0;
 	if (priv->len == 2)
 		*(u16 *)dest = *(u16 *)ptr;
 	else if (priv->len == 4)
diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c
index aa11470..f1a9be2 100644
--- a/net/netfilter/nft_bitwise.c
+++ b/net/netfilter/nft_bitwise.c
@@ -30,8 +30,8 @@
 			     const struct nft_pktinfo *pkt)
 {
 	const struct nft_bitwise *priv = nft_expr_priv(expr);
-	const u32  *src = &regs->data[priv->sreg].data[0];
-	u32 *dst = &regs->data[priv->dreg].data[0];
+	const u32 *src = &regs->data[priv->sreg];
+	u32 *dst = &regs->data[priv->dreg];
 	unsigned int i;
 
 	for (i = 0; i < DIV_ROUND_UP(priv->len, 4); i++)
diff --git a/net/netfilter/nft_byteorder.c b/net/netfilter/nft_byteorder.c
index 2ee3e57..fde5145 100644
--- a/net/netfilter/nft_byteorder.c
+++ b/net/netfilter/nft_byteorder.c
@@ -30,8 +30,8 @@
 			       const struct nft_pktinfo *pkt)
 {
 	const struct nft_byteorder *priv = nft_expr_priv(expr);
-	u32 *src = &regs->data[priv->sreg].data[0];
-	u32 *dst = &regs->data[priv->dreg].data[0];
+	u32 *src = &regs->data[priv->sreg];
+	u32 *dst = &regs->data[priv->dreg];
 	union { u32 u32; u16 u16; } *s, *d;
 	unsigned int i;
 
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index fab8e75..8cbca34 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -35,7 +35,7 @@
 			    const struct nft_pktinfo *pkt)
 {
 	const struct nft_ct *priv = nft_expr_priv(expr);
-	u32 *dest = &regs->data[priv->dreg].data[0];
+	u32 *dest = &regs->data[priv->dreg];
 	enum ip_conntrack_info ctinfo;
 	const struct nf_conn *ct;
 	const struct nf_conn_help *help;
@@ -156,7 +156,7 @@
 	const struct nft_ct *priv = nft_expr_priv(expr);
 	struct sk_buff *skb = pkt->skb;
 #ifdef CONFIG_NF_CONNTRACK_MARK
-	u32 value = regs->data[priv->sreg].data[0];
+	u32 value = regs->data[priv->sreg];
 #endif
 	enum ip_conntrack_info ctinfo;
 	struct nf_conn *ct;
diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c
index 098ffee..ba7aed1 100644
--- a/net/netfilter/nft_exthdr.c
+++ b/net/netfilter/nft_exthdr.c
@@ -30,7 +30,7 @@
 			    const struct nft_pktinfo *pkt)
 {
 	struct nft_exthdr *priv = nft_expr_priv(expr);
-	u32 *dest = &regs->data[priv->dreg].data[0];
+	u32 *dest = &regs->data[priv->dreg];
 	unsigned int offset = 0;
 	int err;
 
@@ -39,6 +39,7 @@
 		goto err;
 	offset += priv->offset;
 
+	dest[priv->len / NFT_REG32_SIZE] = 0;
 	if (skb_copy_bits(pkt->skb, offset, dest, priv->len) < 0)
 		goto err;
 	return;
diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c
index 0682f60..1e8e412 100644
--- a/net/netfilter/nft_immediate.c
+++ b/net/netfilter/nft_immediate.c
@@ -29,7 +29,7 @@
 {
 	const struct nft_immediate_expr *priv = nft_expr_priv(expr);
 
-	nft_data_copy(&regs->data[priv->dreg], &priv->data);
+	nft_data_copy(&regs->data[priv->dreg], &priv->data, priv->dlen);
 }
 
 static const struct nla_policy nft_immediate_policy[NFTA_IMMEDIATE_MAX + 1] = {
diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c
index fc7afff..ba14662 100644
--- a/net/netfilter/nft_lookup.c
+++ b/net/netfilter/nft_lookup.c
@@ -36,7 +36,7 @@
 	if (set->ops->lookup(set, &regs->data[priv->sreg], &ext)) {
 		if (set->flags & NFT_SET_MAP)
 			nft_data_copy(&regs->data[priv->dreg],
-				      nft_set_ext_data(ext));
+				      nft_set_ext_data(ext), set->dlen);
 		return;
 	}
 	regs->verdict.code = NFT_BREAK;
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index 5f744eb..52561e1 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -31,13 +31,14 @@
 	const struct nft_meta *priv = nft_expr_priv(expr);
 	const struct sk_buff *skb = pkt->skb;
 	const struct net_device *in = pkt->in, *out = pkt->out;
-	u32 *dest = &regs->data[priv->dreg].data[0];
+	u32 *dest = &regs->data[priv->dreg];
 
 	switch (priv->key) {
 	case NFT_META_LEN:
 		*dest = skb->len;
 		break;
 	case NFT_META_PROTOCOL:
+		*dest = 0;
 		*(__be16 *)dest = skb->protocol;
 		break;
 	case NFT_META_NFPROTO:
@@ -75,11 +76,13 @@
 	case NFT_META_IIFTYPE:
 		if (in == NULL)
 			goto err;
+		*dest = 0;
 		*(u16 *)dest = in->type;
 		break;
 	case NFT_META_OIFTYPE:
 		if (out == NULL)
 			goto err;
+		*dest = 0;
 		*(u16 *)dest = out->type;
 		break;
 	case NFT_META_SKUID:
@@ -185,7 +188,7 @@
 {
 	const struct nft_meta *meta = nft_expr_priv(expr);
 	struct sk_buff *skb = pkt->skb;
-	u32 value = regs->data[meta->sreg].data[0];
+	u32 value = regs->data[meta->sreg];
 
 	switch (meta->key) {
 	case NFT_META_MARK:
diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c
index 065cbda..ee2d717 100644
--- a/net/netfilter/nft_nat.c
+++ b/net/netfilter/nft_nat.c
@@ -49,26 +49,26 @@
 	if (priv->sreg_addr_min) {
 		if (priv->family == AF_INET) {
 			range.min_addr.ip = (__force __be32)
-					regs->data[priv->sreg_addr_min].data[0];
+					regs->data[priv->sreg_addr_min];
 			range.max_addr.ip = (__force __be32)
-					regs->data[priv->sreg_addr_max].data[0];
+					regs->data[priv->sreg_addr_max];
 
 		} else {
 			memcpy(range.min_addr.ip6,
-			       &regs->data[priv->sreg_addr_min].data,
-			       sizeof(struct nft_data));
+			       &regs->data[priv->sreg_addr_min],
+			       sizeof(range.min_addr.ip6));
 			memcpy(range.max_addr.ip6,
-			       &regs->data[priv->sreg_addr_max].data,
-			       sizeof(struct nft_data));
+			       &regs->data[priv->sreg_addr_max],
+			       sizeof(range.max_addr.ip6));
 		}
 		range.flags |= NF_NAT_RANGE_MAP_IPS;
 	}
 
 	if (priv->sreg_proto_min) {
 		range.min_proto.all =
-			*(__be16 *)&regs->data[priv->sreg_proto_min].data[0];
+			*(__be16 *)&regs->data[priv->sreg_proto_min];
 		range.max_proto.all =
-			*(__be16 *)&regs->data[priv->sreg_proto_max].data[0];
+			*(__be16 *)&regs->data[priv->sreg_proto_max];
 		range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
 	}
 
diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c
index 5fa9973..94fb3b2 100644
--- a/net/netfilter/nft_payload.c
+++ b/net/netfilter/nft_payload.c
@@ -23,7 +23,7 @@
 {
 	const struct nft_payload *priv = nft_expr_priv(expr);
 	const struct sk_buff *skb = pkt->skb;
-	u32 *dest = &regs->data[priv->dreg].data[0];
+	u32 *dest = &regs->data[priv->dreg];
 	int offset;
 
 	switch (priv->base) {
@@ -43,6 +43,7 @@
 	}
 	offset += priv->offset;
 
+	dest[priv->len / NFT_REG32_SIZE] = 0;
 	if (skb_copy_bits(skb, offset, dest, priv->len) < 0)
 		goto err;
 	return;