net: add a noref bit on skb dst
Use low order bit of skb->_skb_dst to tell dst is not refcounted.
Change _skb_dst to _skb_refdst to make sure all uses are catched.
skb_dst() returns the dst, regardless of noref bit set or not, but
with a lockdep check to make sure a noref dst is not given if current
user is not rcu protected.
New skb_dst_set_noref() helper to set an notrefcounted dst on a skb.
(with lockdep check)
skb_dst_drop() drops a reference only if skb dst was refcounted.
skb_dst_force() helper is used to force a refcount on dst, when skb
is queued and not anymore RCU protected.
Use skb_dst_force() in __sk_add_backlog(), __dev_xmit_skb() if
!IFF_XMIT_DST_RELEASE or skb enqueued on qdisc queue, in
sock_queue_rcv_skb(), in __nf_queue().
Use skb_dst_force() in dev_requeue_skb().
Note: dst_use_noref() still dirties dst, we might transform it
later to do one dirtying per jiffies.
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/net/core/dev.c b/net/core/dev.c
index cdcb9cb..6c82065 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2052,6 +2052,8 @@
* waiting to be sent out; and the qdisc is not running -
* xmit the skb directly.
*/
+ if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE))
+ skb_dst_force(skb);
__qdisc_update_bstats(q, skb->len);
if (sch_direct_xmit(skb, q, dev, txq, root_lock))
__qdisc_run(q);
@@ -2060,6 +2062,7 @@
rc = NET_XMIT_SUCCESS;
} else {
+ skb_dst_force(skb);
rc = qdisc_enqueue_root(skb, q);
qdisc_run(q);
}
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index a9b0e1f..c543dd2 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -520,7 +520,7 @@
new->transport_header = old->transport_header;
new->network_header = old->network_header;
new->mac_header = old->mac_header;
- skb_dst_set(new, dst_clone(skb_dst(old)));
+ skb_dst_copy(new, old);
new->rxhash = old->rxhash;
#ifdef CONFIG_XFRM
new->sp = secpath_get(old->sp);
diff --git a/net/core/sock.c b/net/core/sock.c
index 63530a0..bf88a16 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -307,6 +307,11 @@
*/
skb_len = skb->len;
+ /* we escape from rcu protected region, make sure we dont leak
+ * a norefcounted dst
+ */
+ skb_dst_force(skb);
+
spin_lock_irqsave(&list->lock, flags);
skb->dropcount = atomic_read(&sk->sk_drops);
__skb_queue_tail(list, skb);
@@ -1536,6 +1541,7 @@
do {
struct sk_buff *next = skb->next;
+ WARN_ON_ONCE(skb_dst_is_noref(skb));
skb->next = NULL;
sk_backlog_rcv(sk, skb);