[ICSK]: Introduce reqsk_queue_prune from code in tcp_synack_timer

With this we're very close to getting all of the current TCP
refactorings in my dccp-2.6 tree merged, next changeset will export
some functions needed by the current DCCP code and then dccp-2.6.git
will be born!

Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 800930f..6200968 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -270,7 +270,7 @@
 	__u8	frto_counter;	/* Number of new acks after RTO */
 
 	__u8	nonagle;	/* Disable Nagle algorithm?             */
-	__u8	defer_accept;	/* User waits for some data after accept() */
+	/* ONE BYTE HOLE, TRY TO PACK */
 
 /* RTT measurement */
 	__u32	srtt;		/* smoothed round trip time << 3	*/
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index a50f4a4b..692825f 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -239,4 +239,6 @@
 	reqsk_free(req);
 }
 
+extern void inet_csk_listen_stop(struct sock *sk);
+
 #endif /* _INET_CONNECTION_SOCK_H */
diff --git a/include/net/request_sock.h b/include/net/request_sock.h
index b7c7eec..447d287 100644
--- a/include/net/request_sock.h
+++ b/include/net/request_sock.h
@@ -97,6 +97,7 @@
  *
  * @rskq_accept_head - FIFO head of established children
  * @rskq_accept_tail - FIFO tail of established children
+ * @rskq_defer_accept - User waits for some data after accept()
  * @syn_wait_lock - serializer
  *
  * %syn_wait_lock is necessary only to avoid proc interface having to grab the main
@@ -112,6 +113,8 @@
 	struct request_sock	*rskq_accept_head;
 	struct request_sock	*rskq_accept_tail;
 	rwlock_t		syn_wait_lock;
+	u8			rskq_defer_accept;
+	/* 3 bytes hole, try to pack */
 	struct listen_sock	*listen_opt;
 };
 
@@ -255,4 +258,8 @@
 	write_unlock(&queue->syn_wait_lock);
 }
 
+extern void reqsk_queue_prune(struct request_sock_queue *queue, struct sock *parent,
+			      const unsigned long interval, const unsigned long timeout,
+			      const unsigned long max_rto, int max_retries);
+
 #endif /* _REQUEST_SOCK_H */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 68f1ec1..2423f05 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -423,7 +423,8 @@
 					    size_t len, int nonblock, 
 					    int flags, int *addr_len);
 
-extern int			tcp_listen_start(struct sock *sk);
+extern int			inet_csk_listen_start(struct sock *sk,
+						      const int nr_table_entries);
 
 extern void			tcp_parse_options(struct sk_buff *skb,
 						  struct tcp_options_received *opt_rx,
diff --git a/net/core/request_sock.c b/net/core/request_sock.c
index 98f0fc9..b8203de 100644
--- a/net/core/request_sock.c
+++ b/net/core/request_sock.c
@@ -52,6 +52,7 @@
 	get_random_bytes(&lopt->hash_rnd, sizeof(lopt->hash_rnd));
 	rwlock_init(&queue->syn_wait_lock);
 	queue->rskq_accept_head = queue->rskq_accept_head = NULL;
+	queue->rskq_defer_accept = 0;
 	lopt->nr_table_entries = nr_table_entries;
 
 	write_lock_bh(&queue->syn_wait_lock);
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index f691058..52f5ecc 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -99,6 +99,7 @@
 #include <net/arp.h>
 #include <net/route.h>
 #include <net/ip_fib.h>
+#include <net/inet_connection_sock.h>
 #include <net/tcp.h>
 #include <net/udp.h>
 #include <linux/skbuff.h>
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index a1f8121..a4e9eec 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -495,7 +495,7 @@
  *	This routine closes sockets which have been at least partially
  *	opened, but not yet accepted.
  */
-static void inet_csk_listen_stop(struct sock *sk)
+void inet_csk_listen_stop(struct sock *sk)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct request_sock *acc_req;
@@ -1947,15 +1947,15 @@
 		break;
 
 	case TCP_DEFER_ACCEPT:
-		tp->defer_accept = 0;
+		icsk->icsk_accept_queue.rskq_defer_accept = 0;
 		if (val > 0) {
 			/* Translate value in seconds to number of
 			 * retransmits */
-			while (tp->defer_accept < 32 &&
+			while (icsk->icsk_accept_queue.rskq_defer_accept < 32 &&
 			       val > ((TCP_TIMEOUT_INIT / HZ) <<
-				       tp->defer_accept))
-				tp->defer_accept++;
-			tp->defer_accept++;
+				       icsk->icsk_accept_queue.rskq_defer_accept))
+				icsk->icsk_accept_queue.rskq_defer_accept++;
+			icsk->icsk_accept_queue.rskq_defer_accept++;
 		}
 		break;
 
@@ -2058,6 +2058,7 @@
 int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval,
 		   int __user *optlen)
 {
+	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
 	int val, len;
 
@@ -2095,7 +2096,7 @@
 		val = tp->keepalive_probes ? : sysctl_tcp_keepalive_probes;
 		break;
 	case TCP_SYNCNT:
-		val = inet_csk(sk)->icsk_syn_retries ? : sysctl_tcp_syn_retries;
+		val = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries;
 		break;
 	case TCP_LINGER2:
 		val = tp->linger2;
@@ -2103,8 +2104,8 @@
 			val = (val ? : sysctl_tcp_fin_timeout) / HZ;
 		break;
 	case TCP_DEFER_ACCEPT:
-		val = !tp->defer_accept ? 0 : ((TCP_TIMEOUT_INIT / HZ) <<
-					       (tp->defer_accept - 1));
+		val = !icsk->icsk_accept_queue.rskq_defer_accept ? 0 :
+			((TCP_TIMEOUT_INIT / HZ) << (icsk->icsk_accept_queue.rskq_defer_accept - 1));
 		break;
 	case TCP_WINDOW_CLAMP:
 		val = tp->window_clamp;
@@ -2125,7 +2126,7 @@
 		return 0;
 	}
 	case TCP_QUICKACK:
-		val = !inet_csk(sk)->icsk_ack.pingpong;
+		val = !icsk->icsk_ack.pingpong;
 		break;
 
 	case TCP_CONGESTION:
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index b35badf..71d4561 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3831,6 +3831,7 @@
 	tcp_parse_options(skb, &tp->rx_opt, 0);
 
 	if (th->ack) {
+		struct inet_connection_sock *icsk;
 		/* rfc793:
 		 * "If the state is SYN-SENT then
 		 *    first check the ACK bit
@@ -3956,7 +3957,11 @@
 			sk_wake_async(sk, 0, POLL_OUT);
 		}
 
-		if (sk->sk_write_pending || tp->defer_accept || inet_csk(sk)->icsk_ack.pingpong) {
+		icsk = inet_csk(sk);
+
+		if (sk->sk_write_pending ||
+		    icsk->icsk_accept_queue.rskq_defer_accept ||
+		    icsk->icsk_ack.pingpong) {
 			/* Save one ACK. Data will be ready after
 			 * several ticks, if write_pending is set.
 			 *
@@ -3965,8 +3970,8 @@
 			 * to stand against the temptation 8)     --ANK
 			 */
 			inet_csk_schedule_ack(sk);
-			inet_csk(sk)->icsk_ack.lrcvtime = tcp_time_stamp;
-			inet_csk(sk)->icsk_ack.ato	 = TCP_ATO_MIN;
+			icsk->icsk_ack.lrcvtime = tcp_time_stamp;
+			icsk->icsk_ack.ato	 = TCP_ATO_MIN;
 			tcp_incr_quickack(sk);
 			tcp_enter_quickack_mode(sk);
 			inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 4cfbe1d..2d95afe 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -787,9 +787,10 @@
 	   does sequence test, SYN is truncated, and thus we consider
 	   it a bare ACK.
 
-	   If tp->defer_accept, we silently drop this bare ACK.  Otherwise,
-	   we create an established connection.  Both ends (listening sockets)
-	   accept the new incoming connection and try to talk to each other. 8-)
+	   If icsk->icsk_accept_queue.rskq_defer_accept, we silently drop this
+	   bare ACK.  Otherwise, we create an established connection.  Both
+	   ends (listening sockets) accept the new incoming connection and try
+	   to talk to each other. 8-)
 
 	   Note: This case is both harmless, and rare.  Possibility is about the
 	   same as us discovering intelligent life on another plant tomorrow.
@@ -856,7 +857,8 @@
 			return NULL;
 
 		/* If TCP_DEFER_ACCEPT is set, drop bare ACK. */
-		if (tp->defer_accept && TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) {
+		if (inet_csk(sk)->icsk_accept_queue.rskq_defer_accept &&
+		    TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) {
 			inet_rsk(req)->acked = 1;
 			return NULL;
 		}
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index c03930c..b614ad4 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -424,16 +424,12 @@
 	sock_put(sk);
 }
 
-/*
- *	Timer for listening sockets
- */
-
-static void tcp_synack_timer(struct sock *sk)
+void reqsk_queue_prune(struct request_sock_queue *queue, struct sock *parent,
+		       const unsigned long interval, const unsigned long timeout,
+		       const unsigned long max_rto, int max_retries)
 {
-	struct tcp_sock *tp = tcp_sk(sk);
-	struct inet_connection_sock *icsk = inet_csk(sk);
-	struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
-	int max_retries = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries;
+	struct inet_connection_sock *icsk = inet_csk(parent);
+	struct listen_sock *lopt = queue->listen_opt;
 	int thresh = max_retries;
 	unsigned long now = jiffies;
 	struct request_sock **reqp, *req;
@@ -470,10 +466,10 @@
 		}
 	}
 
-	if (tp->defer_accept)
-		max_retries = tp->defer_accept;
+	if (queue->rskq_defer_accept)
+		max_retries = queue->rskq_defer_accept;
 
-	budget = 2*(TCP_SYNQ_HSIZE/(TCP_TIMEOUT_INIT/TCP_SYNQ_INTERVAL));
+	budget = 2 * (lopt->nr_table_entries / (timeout / interval));
 	i = lopt->clock_hand;
 
 	do {
@@ -482,20 +478,19 @@
 			if (time_after_eq(now, req->expires)) {
 				if ((req->retrans < thresh ||
 				     (inet_rsk(req)->acked && req->retrans < max_retries))
-				    && !req->rsk_ops->rtx_syn_ack(sk, req, NULL)) {
+				    && !req->rsk_ops->rtx_syn_ack(parent, req, NULL)) {
 					unsigned long timeo;
 
 					if (req->retrans++ == 0)
 						lopt->qlen_young--;
-					timeo = min((TCP_TIMEOUT_INIT << req->retrans),
-						    TCP_RTO_MAX);
+					timeo = min((timeout << req->retrans), max_rto);
 					req->expires = now + timeo;
 					reqp = &req->dl_next;
 					continue;
 				}
 
 				/* Drop this request */
-				inet_csk_reqsk_queue_unlink(sk, req, reqp);
+				inet_csk_reqsk_queue_unlink(parent, req, reqp);
 				reqsk_queue_removed(&icsk->icsk_accept_queue, req);
 				reqsk_free(req);
 				continue;
@@ -503,14 +498,29 @@
 			reqp = &req->dl_next;
 		}
 
-		i = (i+1)&(TCP_SYNQ_HSIZE-1);
+		i = (i + 1) & (lopt->nr_table_entries - 1);
 
 	} while (--budget > 0);
 
 	lopt->clock_hand = i;
 
 	if (lopt->qlen)
-		inet_csk_reset_keepalive_timer(sk, TCP_SYNQ_INTERVAL);
+		inet_csk_reset_keepalive_timer(parent, interval);
+}
+
+EXPORT_SYMBOL_GPL(reqsk_queue_prune);
+
+/*
+ *	Timer for listening sockets
+ */
+
+static void tcp_synack_timer(struct sock *sk)
+{
+	struct inet_connection_sock *icsk = inet_csk(sk);
+	const int max_retries = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries;
+
+	reqsk_queue_prune(&icsk->icsk_accept_queue, sk, TCP_SYNQ_INTERVAL,
+			  TCP_TIMEOUT_INIT, TCP_RTO_MAX, max_retries);
 }
 
 void tcp_set_keepalive(struct sock *sk, int val)