tipc: rate limit broadcast retransmissions
As cluster sizes grow, so does the amount of identical or overlapping
broadcast NACKs generated by the packet receivers. This often leads to
'NACK crunches' resulting in huge numbers of redundant retransmissions
of the same packet ranges.
In this commit, we introduce rate control of broadcast retransmissions,
so that a retransmitted range cannot be retransmitted again until after
at least 10 ms. This reduces the frequency of duplicate, redundant
retransmissions by an order of magnitude, while having a significant
positive impact on overall throughput and scalability.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 136316f..58bb44d 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -181,7 +181,10 @@
u16 acked;
struct tipc_link *bc_rcvlink;
struct tipc_link *bc_sndlink;
- int nack_state;
+ unsigned long prev_retr;
+ u16 prev_from;
+ u16 prev_to;
+ u8 nack_state;
bool bc_peer_is_up;
/* Statistics */
@@ -202,6 +205,8 @@
BC_NACK_SND_SUPPRESS,
};
+#define TIPC_BC_RETR_LIMIT 10 /* [ms] */
+
/*
* Interval between NACKs when packets arrive out of order
*/
@@ -1590,11 +1595,48 @@
l->rcv_nxt = peers_snd_nxt;
}
+/* link_bc_retr eval()- check if the indicated range can be retransmitted now
+ * - Adjust permitted range if there is overlap with previous retransmission
+ */
+static bool link_bc_retr_eval(struct tipc_link *l, u16 *from, u16 *to)
+{
+ unsigned long elapsed = jiffies_to_msecs(jiffies - l->prev_retr);
+
+ if (less(*to, *from))
+ return false;
+
+ /* New retransmission request */
+ if ((elapsed > TIPC_BC_RETR_LIMIT) ||
+ less(*to, l->prev_from) || more(*from, l->prev_to)) {
+ l->prev_from = *from;
+ l->prev_to = *to;
+ l->prev_retr = jiffies;
+ return true;
+ }
+
+ /* Inside range of previous retransmit */
+ if (!less(*from, l->prev_from) && !more(*to, l->prev_to))
+ return false;
+
+ /* Fully or partially outside previous range => exclude overlap */
+ if (less(*from, l->prev_from)) {
+ *to = l->prev_from - 1;
+ l->prev_from = *from;
+ }
+ if (more(*to, l->prev_to)) {
+ *from = l->prev_to + 1;
+ l->prev_to = *to;
+ }
+ l->prev_retr = jiffies;
+ return true;
+}
+
/* tipc_link_bc_sync_rcv - update rcv link according to peer's send state
*/
int tipc_link_bc_sync_rcv(struct tipc_link *l, struct tipc_msg *hdr,
struct sk_buff_head *xmitq)
{
+ struct tipc_link *snd_l = l->bc_sndlink;
u16 peers_snd_nxt = msg_bc_snd_nxt(hdr);
u16 from = msg_bcast_ack(hdr) + 1;
u16 to = from + msg_bc_gap(hdr) - 1;
@@ -1613,14 +1655,14 @@
if (!l->bc_peer_is_up)
return rc;
+ l->stats.recv_nacks++;
+
/* Ignore if peers_snd_nxt goes beyond receive window */
if (more(peers_snd_nxt, l->rcv_nxt + l->window))
return rc;
- if (!less(to, from)) {
- rc = tipc_link_retrans(l->bc_sndlink, from, to, xmitq);
- l->stats.recv_nacks++;
- }
+ if (link_bc_retr_eval(snd_l, &from, &to))
+ rc = tipc_link_retrans(snd_l, from, to, xmitq);
l->snd_nxt = peers_snd_nxt;
if (link_bc_rcv_gap(l))