[TCP]: MTU probing

Implementation of packetization layer path mtu discovery for TCP, based on
the internet-draft currently found at
<http://www.ietf.org/internet-drafts/draft-ietf-pmtud-method-05.txt>.

Signed-off-by: John Heffner <jheffner@psc.edu>
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index e9a54ae7..0ac388e 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1891,6 +1891,34 @@
 	}
 }
 
+static void tcp_mtup_probe_failed(struct sock *sk)
+{
+	struct inet_connection_sock *icsk = inet_csk(sk);
+
+	icsk->icsk_mtup.search_high = icsk->icsk_mtup.probe_size - 1;
+	icsk->icsk_mtup.probe_size = 0;
+}
+
+static void tcp_mtup_probe_success(struct sock *sk, struct sk_buff *skb)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct inet_connection_sock *icsk = inet_csk(sk);
+
+	/* FIXME: breaks with very large cwnd */
+	tp->prior_ssthresh = tcp_current_ssthresh(sk);
+	tp->snd_cwnd = tp->snd_cwnd *
+		       tcp_mss_to_mtu(sk, tp->mss_cache) /
+		       icsk->icsk_mtup.probe_size;
+	tp->snd_cwnd_cnt = 0;
+	tp->snd_cwnd_stamp = tcp_time_stamp;
+	tp->rcv_ssthresh = tcp_current_ssthresh(sk);
+
+	icsk->icsk_mtup.search_low = icsk->icsk_mtup.probe_size;
+	icsk->icsk_mtup.probe_size = 0;
+	tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
+}
+
+
 /* Process an event, which can update packets-in-flight not trivially.
  * Main goal of this function is to calculate new estimate for left_out,
  * taking into account both packets sitting in receiver's buffer and
@@ -2023,6 +2051,17 @@
 			return;
 		}
 
+		/* MTU probe failure: don't reduce cwnd */
+		if (icsk->icsk_ca_state < TCP_CA_CWR &&
+		    icsk->icsk_mtup.probe_size &&
+		    tp->snd_una == icsk->icsk_mtup.probe_seq_start) {
+			tcp_mtup_probe_failed(sk);
+			/* Restores the reduction we did in tcp_mtup_probe() */
+			tp->snd_cwnd++;
+			tcp_simple_retransmit(sk);
+			return;
+		}
+
 		/* Otherwise enter Recovery state */
 
 		if (IsReno(tp))
@@ -2243,6 +2282,13 @@
 			tp->retrans_stamp = 0;
 		}
 
+		/* MTU probing checks */
+		if (icsk->icsk_mtup.probe_size) {
+			if (!after(icsk->icsk_mtup.probe_seq_end, TCP_SKB_CB(skb)->end_seq)) {
+				tcp_mtup_probe_success(sk, skb);
+			}
+		}
+
 		if (sacked) {
 			if (sacked & TCPCB_RETRANS) {
 				if(sacked & TCPCB_SACKED_RETRANS)
@@ -4101,6 +4147,7 @@
 		if (tp->rx_opt.sack_ok && sysctl_tcp_fack)
 			tp->rx_opt.sack_ok |= 2;
 
+		tcp_mtup_init(sk);
 		tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
 		tcp_initialize_rcv_mss(sk);
 
@@ -4211,6 +4258,7 @@
 		if (tp->ecn_flags&TCP_ECN_OK)
 			sock_set_flag(sk, SOCK_NO_LARGESEND);
 
+		tcp_mtup_init(sk);
 		tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
 		tcp_initialize_rcv_mss(sk);
 
@@ -4399,6 +4447,7 @@
 				 */
 				tp->lsndtime = tcp_time_stamp;
 
+				tcp_mtup_init(sk);
 				tcp_initialize_rcv_mss(sk);
 				tcp_init_buffer_space(sk);
 				tcp_fast_path_on(tp);