tcp: provide SYN headers for passive connections

This patch allows a server application to get the TCP SYN headers for
its passive connections.  This is useful if the server is doing
fingerprinting of clients based on SYN packet contents.

Two socket options are added: TCP_SAVE_SYN and TCP_SAVED_SYN.

The first is used on a socket to enable saving the SYN headers
for child connections. This can be set before or after the listen()
call.

The latter is used to retrieve the SYN headers for passive connections,
if the parent listener has enabled TCP_SAVE_SYN.

TCP_SAVED_SYN is read once, it frees the saved SYN headers.

The data returned in TCP_SAVED_SYN are network (IPv4/IPv6) and TCP
headers.

Original patch was written by Tom Herbert, I changed it to not hold
a full skb (and associated dst and conntracking reference).

We have used such patch for about 3 years at Google.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Tested-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 3b29115..e6fb5df 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -199,6 +199,7 @@
 		syn_fastopen:1,	/* SYN includes Fast Open option */
 		syn_fastopen_exp:1,/* SYN includes Fast Open exp. option */
 		syn_data_acked:1,/* data in SYN is acked by SYN-ACK */
+		save_syn:1,	/* Save headers of SYN packet */
 		is_cwnd_limited:1;/* forward progress limited by snd_cwnd? */
 	u32	tlp_high_seq;	/* snd_nxt at the time of TLP retransmit. */
 
@@ -326,6 +327,7 @@
 	 * socket. Used to retransmit SYNACKs etc.
 	 */
 	struct request_sock *fastopen_rsk;
+	u32	*saved_syn;
 };
 
 enum tsq_flags {
@@ -393,4 +395,10 @@
 	return 0;
 }
 
+static inline void tcp_saved_syn_free(struct tcp_sock *tp)
+{
+	kfree(tp->saved_syn);
+	tp->saved_syn = NULL;
+}
+
 #endif	/* _LINUX_TCP_H */
diff --git a/include/net/request_sock.h b/include/net/request_sock.h
index 9f4265c..87935ca 100644
--- a/include/net/request_sock.h
+++ b/include/net/request_sock.h
@@ -64,6 +64,7 @@
 	struct timer_list		rsk_timer;
 	const struct request_sock_ops	*rsk_ops;
 	struct sock			*sk;
+	u32				*saved_syn;
 	u32				secid;
 	u32				peer_secid;
 };
@@ -77,7 +78,7 @@
 		req->rsk_ops = ops;
 		sock_hold(sk_listener);
 		req->rsk_listener = sk_listener;
-
+		req->saved_syn = NULL;
 		/* Following is temporary. It is coupled with debugging
 		 * helpers in reqsk_put() & reqsk_free()
 		 */
@@ -104,6 +105,7 @@
 	req->rsk_ops->destructor(req);
 	if (req->rsk_listener)
 		sock_put(req->rsk_listener);
+	kfree(req->saved_syn);
 	kmem_cache_free(req->rsk_ops->slab, req);
 }
 
diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h
index faa72f4..51ebedb 100644
--- a/include/uapi/linux/tcp.h
+++ b/include/uapi/linux/tcp.h
@@ -113,6 +113,8 @@
 #define TCP_TIMESTAMP		24
 #define TCP_NOTSENT_LOWAT	25	/* limit number of unsent bytes in write queue */
 #define TCP_CC_INFO		26	/* Get Congestion Control (optional) info */
+#define TCP_SAVE_SYN		27	/* Record SYN headers for new connections */
+#define TCP_SAVED_SYN		28	/* Get SYN headers recorded for connection */
 
 struct tcp_repair_opt {
 	__u32	opt_code;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 46efa03..ecccfdc 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2482,6 +2482,13 @@
 			icsk->icsk_syn_retries = val;
 		break;
 
+	case TCP_SAVE_SYN:
+		if (val < 0 || val > 1)
+			err = -EINVAL;
+		else
+			tp->save_syn = val;
+		break;
+
 	case TCP_LINGER2:
 		if (val < 0)
 			tp->linger2 = -1;
@@ -2818,6 +2825,34 @@
 	case TCP_NOTSENT_LOWAT:
 		val = tp->notsent_lowat;
 		break;
+	case TCP_SAVE_SYN:
+		val = tp->save_syn;
+		break;
+	case TCP_SAVED_SYN: {
+		if (get_user(len, optlen))
+			return -EFAULT;
+
+		lock_sock(sk);
+		if (tp->saved_syn) {
+			len = min_t(unsigned int, tp->saved_syn[0], len);
+			if (put_user(len, optlen)) {
+				release_sock(sk);
+				return -EFAULT;
+			}
+			if (copy_to_user(optval, tp->saved_syn + 1, len)) {
+				release_sock(sk);
+				return -EFAULT;
+			}
+			tcp_saved_syn_free(tp);
+			release_sock(sk);
+		} else {
+			release_sock(sk);
+			len = 0;
+			if (put_user(len, optlen))
+				return -EFAULT;
+		}
+		return 0;
+	}
 	default:
 		return -ENOPROTOOPT;
 	}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 09bdc4a..df2ca61 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -6060,6 +6060,23 @@
 	return want_cookie;
 }
 
+static void tcp_reqsk_record_syn(const struct sock *sk,
+				 struct request_sock *req,
+				 const struct sk_buff *skb)
+{
+	if (tcp_sk(sk)->save_syn) {
+		u32 len = skb_network_header_len(skb) + tcp_hdrlen(skb);
+		u32 *copy;
+
+		copy = kmalloc(len + sizeof(u32), GFP_ATOMIC);
+		if (copy) {
+			copy[0] = len;
+			memcpy(&copy[1], skb_network_header(skb), len);
+			req->saved_syn = copy;
+		}
+	}
+}
+
 int tcp_conn_request(struct request_sock_ops *rsk_ops,
 		     const struct tcp_request_sock_ops *af_ops,
 		     struct sock *sk, struct sk_buff *skb)
@@ -6192,6 +6209,7 @@
 		tcp_rsk(req)->tfo_listener = false;
 		af_ops->queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
 	}
+	tcp_reqsk_record_syn(sk, req, skb);
 
 	return 0;
 
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index fc1c658..91cb476 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1802,6 +1802,7 @@
 
 	/* If socket is aborted during connect operation */
 	tcp_free_fastopen_req(tp);
+	tcp_saved_syn_free(tp);
 
 	sk_sockets_allocated_dec(sk);
 	sock_release_memcg(sk);
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index e5d7649..ebe2ab2 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -536,6 +536,9 @@
 		newtp->fastopen_rsk = NULL;
 		newtp->syn_data_acked = 0;
 
+		newtp->saved_syn = req->saved_syn;
+		req->saved_syn = NULL;
+
 		TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_PASSIVEOPENS);
 	}
 	return newsk;