packet: tpacket_snd gso and checksum offload
Support socket option PACKET_VNET_HDR together with PACKET_TX_RING.
When enabled, a struct virtio_net_hdr is expected to precede the data
in the ring. The vnet option must be set before the ring is created.
The implementation reuses the existing skb_copy_bits code that is used
when dev->hard_header_len is non-zero. Move this ll_header check to
before the skb alloc and combine it with a test for vnet_hdr->hdr_len.
Allocate and copy the max of the two.
Verified with test program at
github.com/wdebruij/kerneltools/blob/master/tests/psock_txring_vnet.c
Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 89377bf..b7e7851 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2495,7 +2495,7 @@
static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
void *frame, struct net_device *dev, void *data, int tp_len,
- __be16 proto, unsigned char *addr, int hlen)
+ __be16 proto, unsigned char *addr, int hlen, int copylen)
{
union tpacket_uhdr ph;
int to_write, offset, len, nr_frags, len_max;
@@ -2522,20 +2522,17 @@
NULL, tp_len);
if (unlikely(err < 0))
return -EINVAL;
- } else if (dev->hard_header_len) {
- if (ll_header_truncated(dev, tp_len))
- return -EINVAL;
-
+ } else if (copylen) {
skb_push(skb, dev->hard_header_len);
- err = skb_store_bits(skb, 0, data,
- dev->hard_header_len);
+ skb_put(skb, copylen - dev->hard_header_len);
+ err = skb_store_bits(skb, 0, data, copylen);
if (unlikely(err))
return err;
if (!skb->protocol)
tpacket_set_protocol(dev, skb);
- data += dev->hard_header_len;
- to_write -= dev->hard_header_len;
+ data += copylen;
+ to_write -= copylen;
}
offset = offset_in_page(data);
@@ -2631,6 +2628,7 @@
{
struct sk_buff *skb;
struct net_device *dev;
+ struct virtio_net_hdr *vnet_hdr = NULL;
__be16 proto;
int err, reserve = 0;
void *ph;
@@ -2641,7 +2639,7 @@
void *data;
int len_sum = 0;
int status = TP_STATUS_AVAILABLE;
- int hlen, tlen;
+ int hlen, tlen, copylen = 0;
mutex_lock(&po->pg_vec_lock);
@@ -2674,7 +2672,7 @@
size_max = po->tx_ring.frame_size
- (po->tp_hdrlen - sizeof(struct sockaddr_ll));
- if (size_max > dev->mtu + reserve + VLAN_HLEN)
+ if ((size_max > dev->mtu + reserve + VLAN_HLEN) && !po->has_vnet_hdr)
size_max = dev->mtu + reserve + VLAN_HLEN;
do {
@@ -2694,8 +2692,28 @@
status = TP_STATUS_SEND_REQUEST;
hlen = LL_RESERVED_SPACE(dev);
tlen = dev->needed_tailroom;
+ if (po->has_vnet_hdr) {
+ vnet_hdr = data;
+ data += sizeof(*vnet_hdr);
+ tp_len -= sizeof(*vnet_hdr);
+ if (tp_len < 0 ||
+ __packet_snd_vnet_parse(vnet_hdr, tp_len)) {
+ tp_len = -EINVAL;
+ goto tpacket_error;
+ }
+ copylen = __virtio16_to_cpu(vio_le(),
+ vnet_hdr->hdr_len);
+ }
+ if (dev->hard_header_len) {
+ if (ll_header_truncated(dev, tp_len)) {
+ tp_len = -EINVAL;
+ goto tpacket_error;
+ }
+ copylen = max_t(int, copylen, dev->hard_header_len);
+ }
skb = sock_alloc_send_skb(&po->sk,
- hlen + tlen + sizeof(struct sockaddr_ll),
+ hlen + tlen + sizeof(struct sockaddr_ll) +
+ (copylen - dev->hard_header_len),
!need_wait, &err);
if (unlikely(skb == NULL)) {
@@ -2705,9 +2723,10 @@
goto out_status;
}
tp_len = tpacket_fill_skb(po, skb, ph, dev, data, tp_len, proto,
- addr, hlen);
+ addr, hlen, copylen);
if (likely(tp_len >= 0) &&
tp_len > dev->mtu + reserve &&
+ !po->has_vnet_hdr &&
!packet_extra_vlan_len_allowed(dev, skb))
tp_len = -EMSGSIZE;
@@ -2726,6 +2745,11 @@
}
}
+ if (po->has_vnet_hdr && packet_snd_vnet_gso(skb, vnet_hdr)) {
+ tp_len = -EINVAL;
+ goto tpacket_error;
+ }
+
packet_pick_tx_queue(dev, skb);
skb->destructor = tpacket_destruct_skb;
@@ -3616,9 +3640,6 @@
}
if (optlen < len)
return -EINVAL;
- if (pkt_sk(sk)->has_vnet_hdr &&
- optname == PACKET_TX_RING)
- return -EINVAL;
if (copy_from_user(&req_u.req, optval, len))
return -EFAULT;
return packet_set_ring(sk, &req_u, 0,