tools/testing/selftests/netfilter/nft_flowtable.sh - linux - Git at Google

 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 #
 # This tests basic flowtable functionality.
 # Creates following topology:
 #
 # Originator (MTU 9000) <-Router1-> MTU 1500 <-Router2-> Responder (MTU 2000)
 # Router1 is the one doing flow offloading, Router2 has no special
 # purpose other than having a link that is smaller than either Originator
 # and responder, i.e. TCPMSS announced values are too large and will still
 # result in fragmentation and/or PMTU discovery.

 # Kselftest framework requirement - SKIP code is 4.
 ksft_skip=4
 ret=0

 ns1in=""
 ns2in=""
 ns1out=""
 ns2out=""

 log_netns=$(sysctl -n net.netfilter.nf_log_all_netns)

 nft --version > /dev/null 2>&1
 if [ $? -ne 0 ];then
 	echo "SKIP: Could not run test without nft tool"
 	exit $ksft_skip
 fi

 ip -Version > /dev/null 2>&1
 if [ $? -ne 0 ];then
 	echo "SKIP: Could not run test without ip tool"
 	exit $ksft_skip
 fi

 which nc > /dev/null 2>&1
 if [ $? -ne 0 ];then
 	echo "SKIP: Could not run test without nc (netcat)"
 	exit $ksft_skip
 fi

 ip netns add nsr1
 if [ $? -ne 0 ];then
 	echo "SKIP: Could not create net namespace"
 	exit $ksft_skip
 fi

 ip netns add ns1
 ip netns add ns2

 ip netns add nsr2

 cleanup() {
 	for i in 1 2; do
 		ip netns del ns$i
 		ip netns del nsr$i
 	done

 	rm -f "$ns1in" "$ns1out"
 	rm -f "$ns2in" "$ns2out"

 	[ $log_netns -eq 0 ] && sysctl -q net.netfilter.nf_log_all_netns=$log_netns
 }

 trap cleanup EXIT

 sysctl -q net.netfilter.nf_log_all_netns=1

 ip link add veth0 netns nsr1 type veth peer name eth0 netns ns1
 ip link add veth1 netns nsr1 type veth peer name veth0 netns nsr2

 ip link add veth1 netns nsr2 type veth peer name eth0 netns ns2

 for dev in lo veth0 veth1; do
   for i in 1 2; do
     ip -net nsr$i link set $dev up
   done
 done

 ip -net nsr1 addr add 10.0.1.1/24 dev veth0
 ip -net nsr1 addr add dead:1::1/64 dev veth0

 ip -net nsr2 addr add 10.0.2.1/24 dev veth1
 ip -net nsr2 addr add dead:2::1/64 dev veth1

 # set different MTUs so we need to push packets coming from ns1 (large MTU)
 # to ns2 (smaller MTU) to stack either to perform fragmentation (ip_no_pmtu_disc=1),
 # or to do PTMU discovery (send ICMP error back to originator).
 # ns2 is going via nsr2 with a smaller mtu, so that TCPMSS announced by both peers
 # is NOT the lowest link mtu.

 ip -net nsr1 link set veth0 mtu 9000
 ip -net ns1 link set eth0 mtu 9000

 ip -net nsr2 link set veth1 mtu 2000
 ip -net ns2 link set eth0 mtu 2000

 # transfer-net between nsr1 and nsr2.
 # these addresses are not used for connections.
 ip -net nsr1 addr add 192.168.10.1/24 dev veth1
 ip -net nsr1 addr add fee1:2::1/64 dev veth1

 ip -net nsr2 addr add 192.168.10.2/24 dev veth0
 ip -net nsr2 addr add fee1:2::2/64 dev veth0

 for i in 1 2; do
   ip netns exec nsr$i sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
   ip netns exec nsr$i sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null

   ip -net ns$i link set lo up
   ip -net ns$i link set eth0 up
   ip -net ns$i addr add 10.0.$i.99/24 dev eth0
   ip -net ns$i route add default via 10.0.$i.1
   ip -net ns$i addr add dead:$i::99/64 dev eth0
   ip -net ns$i route add default via dead:$i::1
   ip netns exec ns$i sysctl net.ipv4.tcp_no_metrics_save=1 > /dev/null

   # don't set ip DF bit for first two tests
   ip netns exec ns$i sysctl net.ipv4.ip_no_pmtu_disc=1 > /dev/null
 done

 ip -net nsr1 route add default via 192.168.10.2
 ip -net nsr2 route add default via 192.168.10.1

 ip netns exec nsr1 nft -f - <<EOF
 table inet filter {
   flowtable f1 {
      hook ingress priority 0
      devices = { veth0, veth1 }
    }

    chain forward {
       type filter hook forward priority 0; policy drop;

       # flow offloaded? Tag ct with mark 1, so we can detect when it fails.
       meta oif "veth1" tcp dport 12345 flow offload @f1 counter

       # use packet size to trigger 'should be offloaded by now'.
       # otherwise, if 'flow offload' expression never offloads, the
       # test will pass.
       tcp dport 12345 meta length gt 200 ct mark set 1 counter

       # this turns off flow offloading internally, so expect packets again
       tcp flags fin,rst ct mark set 0 accept

       # this allows large packets from responder, we need this as long
       # as PMTUd is off.
       # This rule is deleted for the last test, when we expect PMTUd
       # to kick in and ensure all packets meet mtu requirements.
       meta length gt 1500 accept comment something-to-grep-for

       # next line blocks connection w.o. working offload.
       # we only do this for reverse dir, because we expect packets to
       # enter slow path due to MTU mismatch of veth0 and veth1.
       tcp sport 12345 ct mark 1 counter log prefix "mark failure " drop

       ct state established,related accept

       # for packets that we can't offload yet, i.e. SYN (any ct that is not confirmed)
       meta length lt 200 oif "veth1" tcp dport 12345 counter accept

       meta nfproto ipv4 meta l4proto icmp accept
       meta nfproto ipv6 meta l4proto icmpv6 accept
    }
 }
 EOF

 if [ $? -ne 0 ]; then
 	echo "SKIP: Could not load nft ruleset"
 	exit $ksft_skip
 fi

 # test basic connectivity
 ip netns exec ns1 ping -c 1 -q 10.0.2.99 > /dev/null
 if [ $? -ne 0 ];then
   echo "ERROR: ns1 cannot reach ns2" 1>&2
   bash
   exit 1
 fi

 ip netns exec ns2 ping -c 1 -q 10.0.1.99 > /dev/null
 if [ $? -ne 0 ];then
   echo "ERROR: ns2 cannot reach ns1" 1>&2
   exit 1
 fi

 if [ $ret -eq 0 ];then
 	echo "PASS: netns routing/connectivity: ns1 can reach ns2"
 fi

 ns1in=$(mktemp)
 ns1out=$(mktemp)
 ns2in=$(mktemp)
 ns2out=$(mktemp)

 make_file()
 {
 	name=$1
 	who=$2

 	SIZE=$((RANDOM % (1024 * 8)))
 	TSIZE=$((SIZE * 1024))

 	dd if=/dev/urandom of="$name" bs=1024 count=$SIZE 2> /dev/null

 	SIZE=$((RANDOM % 1024))
 	SIZE=$((SIZE + 128))
 	TSIZE=$((TSIZE + SIZE))
 	dd if=/dev/urandom conf=notrunc of="$name" bs=1 count=$SIZE 2> /dev/null
 }

 check_transfer()
 {
 	in=$1
 	out=$2
 	what=$3

 	cmp "$in" "$out" > /dev/null 2>&1
 	if [ $? -ne 0 ] ;then
 		echo "FAIL: file mismatch for $what" 1>&2
 		ls -l "$in"
 		ls -l "$out"
 		return 1
 	fi

 	return 0
 }

 test_tcp_forwarding_ip()
 {
 	local nsa=$1
 	local nsb=$2
 	local dstip=$3
 	local dstport=$4
 	local lret=0

 	ip netns exec $nsb nc -w 5 -l -p 12345 < "$ns2in" > "$ns2out" &
 	lpid=$!

 	sleep 1
 	ip netns exec $nsa nc -w 4 "$dstip" "$dstport" < "$ns1in" > "$ns1out" &
 	cpid=$!

 	sleep 3

 	kill $lpid
 	kill $cpid
 	wait

 	check_transfer "$ns1in" "$ns2out" "ns1 -> ns2"
 	if [ $? -ne 0 ];then
 		lret=1
 	fi

 	check_transfer "$ns2in" "$ns1out" "ns1 <- ns2"
 	if [ $? -ne 0 ];then
 		lret=1
 	fi

 	return $lret
 }

 test_tcp_forwarding()
 {
 	test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345

 	return $?
 }

 test_tcp_forwarding_nat()
 {
 	local lret

 	test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345
 	lret=$?

 	if [ $lret -eq 0 ] ; then
 		test_tcp_forwarding_ip "$1" "$2" 10.6.6.6 1666
 		lret=$?
 	fi

 	return $lret
 }

 make_file "$ns1in" "ns1"
 make_file "$ns2in" "ns2"

 # First test:
 # No PMTU discovery, nsr1 is expected to fragment packets from ns1 to ns2 as needed.
 test_tcp_forwarding ns1 ns2
 if [ $? -eq 0 ] ;then
 	echo "PASS: flow offloaded for ns1/ns2"
 else
 	echo "FAIL: flow offload for ns1/ns2:" 1>&2
 	ip netns exec nsr1 nft list ruleset
 	ret=1
 fi

 # delete default route, i.e. ns2 won't be able to reach ns1 and
 # will depend on ns1 being masqueraded in nsr1.
 # expect ns1 has nsr1 address.
 ip -net ns2 route del default via 10.0.2.1
 ip -net ns2 route del default via dead:2::1
 ip -net ns2 route add 192.168.10.1 via 10.0.2.1

 # Second test:
 # Same, but with NAT enabled.
 ip netns exec nsr1 nft -f - <<EOF
 table ip nat {
    chain prerouting {
       type nat hook prerouting priority 0; policy accept;
       meta iif "veth0" ip daddr 10.6.6.6 tcp dport 1666 counter dnat ip to 10.0.2.99:12345
    }

    chain postrouting {
       type nat hook postrouting priority 0; policy accept;
       meta oifname "veth1" counter masquerade
    }
 }
 EOF

 test_tcp_forwarding_nat ns1 ns2

 if [ $? -eq 0 ] ;then
 	echo "PASS: flow offloaded for ns1/ns2 with NAT"
 else
 	echo "FAIL: flow offload for ns1/ns2 with NAT" 1>&2
 	ip netns exec nsr1 nft list ruleset
 	ret=1
 fi

 # Third test:
 # Same as second test, but with PMTU discovery enabled.
 handle=$(ip netns exec nsr1 nft -a list table inet filter | grep something-to-grep-for | cut -d \# -f 2)

 ip netns exec nsr1 nft delete rule inet filter forward $handle
 if [ $? -ne 0 ] ;then
 	echo "FAIL: Could not delete large-packet accept rule"
 	exit 1
 fi

 ip netns exec ns1 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null
 ip netns exec ns2 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null

 test_tcp_forwarding_nat ns1 ns2
 if [ $? -eq 0 ] ;then
 	echo "PASS: flow offloaded for ns1/ns2 with NAT and pmtu discovery"
 else
 	echo "FAIL: flow offload for ns1/ns2 with NAT and pmtu discovery" 1>&2
 	ip netns exec nsr1 nft list ruleset
 fi

 KEY_SHA="0x"$(ps -xaf | sha1sum | cut -d " " -f 1)
 KEY_AES="0x"$(ps -xaf | md5sum | cut -d " " -f 1)
 SPI1=$RANDOM
 SPI2=$RANDOM

 if [ $SPI1 -eq $SPI2 ]; then
 	SPI2=$((SPI2+1))
 fi

 do_esp() {
     local ns=$1
     local me=$2
     local remote=$3
     local lnet=$4
     local rnet=$5
     local spi_out=$6
     local spi_in=$7

     ip -net $ns xfrm state add src $remote dst $me proto esp spi $spi_in  enc aes $KEY_AES  auth sha1 $KEY_SHA mode tunnel sel src $rnet dst $lnet
     ip -net $ns xfrm state add src $me  dst $remote proto esp spi $spi_out enc aes $KEY_AES auth sha1 $KEY_SHA mode tunnel sel src $lnet dst $rnet

     # to encrypt packets as they go out (includes forwarded packets that need encapsulation)
     ip -net $ns xfrm policy add src $lnet dst $rnet dir out tmpl src $me dst $remote proto esp mode tunnel priority 1 action allow
     # to fwd decrypted packets after esp processing:
     ip -net $ns xfrm policy add src $rnet dst $lnet dir fwd tmpl src $remote dst $me proto esp mode tunnel priority 1 action allow

 }

 do_esp nsr1 192.168.10.1 192.168.10.2 10.0.1.0/24 10.0.2.0/24 $SPI1 $SPI2

 do_esp nsr2 192.168.10.2 192.168.10.1 10.0.2.0/24 10.0.1.0/24 $SPI2 $SPI1

 ip netns exec nsr1 nft delete table ip nat

 # restore default routes
 ip -net ns2 route del 192.168.10.1 via 10.0.2.1
 ip -net ns2 route add default via 10.0.2.1
 ip -net ns2 route add default via dead:2::1

 test_tcp_forwarding ns1 ns2
 if [ $? -eq 0 ] ;then
 	echo "PASS: ipsec tunnel mode for ns1/ns2"
 else
 	echo "FAIL: ipsec tunnel mode for ns1/ns2"
 	ip netns exec nsr1 nft list ruleset 1>&2
 	ip netns exec nsr1 cat /proc/net/xfrm_stat 1>&2
 fi

 exit $ret
	#!/bin/bash
	# SPDX-License-Identifier: GPL-2.0
	#
	# This tests basic flowtable functionality.
	# Creates following topology:
	#
	# Originator (MTU 9000) <-Router1-> MTU 1500 <-Router2-> Responder (MTU 2000)
	# Router1 is the one doing flow offloading, Router2 has no special
	# purpose other than having a link that is smaller than either Originator
	# and responder, i.e. TCPMSS announced values are too large and will still
	# result in fragmentation and/or PMTU discovery.

	# Kselftest framework requirement - SKIP code is 4.
	ksft_skip=4
	ret=0

	ns1in=""
	ns2in=""
	ns1out=""
	ns2out=""

	log_netns=$(sysctl -n net.netfilter.nf_log_all_netns)

	nft --version > /dev/null 2>&1
	if [ $? -ne 0 ];then
	echo "SKIP: Could not run test without nft tool"
	exit $ksft_skip
	fi

	ip -Version > /dev/null 2>&1
	if [ $? -ne 0 ];then
	echo "SKIP: Could not run test without ip tool"
	exit $ksft_skip
	fi

	which nc > /dev/null 2>&1
	if [ $? -ne 0 ];then
	echo "SKIP: Could not run test without nc (netcat)"
	exit $ksft_skip
	fi

	ip netns add nsr1
	if [ $? -ne 0 ];then
	echo "SKIP: Could not create net namespace"
	exit $ksft_skip
	fi

	ip netns add ns1
	ip netns add ns2

	ip netns add nsr2

	cleanup() {
	for i in 1 2; do
	ip netns del ns$i
	ip netns del nsr$i
	done

	rm -f "$ns1in" "$ns1out"
	rm -f "$ns2in" "$ns2out"

	[ $log_netns -eq 0 ] && sysctl -q net.netfilter.nf_log_all_netns=$log_netns
	}

	trap cleanup EXIT

	sysctl -q net.netfilter.nf_log_all_netns=1

	ip link add veth0 netns nsr1 type veth peer name eth0 netns ns1
	ip link add veth1 netns nsr1 type veth peer name veth0 netns nsr2

	ip link add veth1 netns nsr2 type veth peer name eth0 netns ns2

	for dev in lo veth0 veth1; do
	for i in 1 2; do
	ip -net nsr$i link set $dev up
	done
	done

	ip -net nsr1 addr add 10.0.1.1/24 dev veth0
	ip -net nsr1 addr add dead:1::1/64 dev veth0

	ip -net nsr2 addr add 10.0.2.1/24 dev veth1
	ip -net nsr2 addr add dead:2::1/64 dev veth1

	# set different MTUs so we need to push packets coming from ns1 (large MTU)
	# to ns2 (smaller MTU) to stack either to perform fragmentation (ip_no_pmtu_disc=1),
	# or to do PTMU discovery (send ICMP error back to originator).
	# ns2 is going via nsr2 with a smaller mtu, so that TCPMSS announced by both peers
	# is NOT the lowest link mtu.

	ip -net nsr1 link set veth0 mtu 9000
	ip -net ns1 link set eth0 mtu 9000

	ip -net nsr2 link set veth1 mtu 2000
	ip -net ns2 link set eth0 mtu 2000

	# transfer-net between nsr1 and nsr2.
	# these addresses are not used for connections.
	ip -net nsr1 addr add 192.168.10.1/24 dev veth1
	ip -net nsr1 addr add fee1:2::1/64 dev veth1

	ip -net nsr2 addr add 192.168.10.2/24 dev veth0
	ip -net nsr2 addr add fee1:2::2/64 dev veth0

	for i in 1 2; do
	ip netns exec nsr$i sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
	ip netns exec nsr$i sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null

	ip -net ns$i link set lo up
	ip -net ns$i link set eth0 up
	ip -net ns$i addr add 10.0.$i.99/24 dev eth0
	ip -net ns$i route add default via 10.0.$i.1
	ip -net ns$i addr add dead:$i::99/64 dev eth0
	ip -net ns$i route add default via dead:$i::1
	ip netns exec ns$i sysctl net.ipv4.tcp_no_metrics_save=1 > /dev/null

	# don't set ip DF bit for first two tests
	ip netns exec ns$i sysctl net.ipv4.ip_no_pmtu_disc=1 > /dev/null
	done

	ip -net nsr1 route add default via 192.168.10.2
	ip -net nsr2 route add default via 192.168.10.1

	ip netns exec nsr1 nft -f - <<EOF
	table inet filter {
	flowtable f1 {
	hook ingress priority 0
	devices = { veth0, veth1 }
	}

	chain forward {
	type filter hook forward priority 0; policy drop;

	# flow offloaded? Tag ct with mark 1, so we can detect when it fails.
	meta oif "veth1" tcp dport 12345 flow offload @f1 counter

	# use packet size to trigger 'should be offloaded by now'.
	# otherwise, if 'flow offload' expression never offloads, the
	# test will pass.
	tcp dport 12345 meta length gt 200 ct mark set 1 counter

	# this turns off flow offloading internally, so expect packets again
	tcp flags fin,rst ct mark set 0 accept

	# this allows large packets from responder, we need this as long
	# as PMTUd is off.
	# This rule is deleted for the last test, when we expect PMTUd
	# to kick in and ensure all packets meet mtu requirements.
	meta length gt 1500 accept comment something-to-grep-for

	# next line blocks connection w.o. working offload.
	# we only do this for reverse dir, because we expect packets to
	# enter slow path due to MTU mismatch of veth0 and veth1.
	tcp sport 12345 ct mark 1 counter log prefix "mark failure " drop

	ct state established,related accept

	# for packets that we can't offload yet, i.e. SYN (any ct that is not confirmed)
	meta length lt 200 oif "veth1" tcp dport 12345 counter accept

	meta nfproto ipv4 meta l4proto icmp accept
	meta nfproto ipv6 meta l4proto icmpv6 accept
	}
	}
	EOF

	if [ $? -ne 0 ]; then
	echo "SKIP: Could not load nft ruleset"
	exit $ksft_skip
	fi

	# test basic connectivity
	ip netns exec ns1 ping -c 1 -q 10.0.2.99 > /dev/null
	if [ $? -ne 0 ];then
	echo "ERROR: ns1 cannot reach ns2" 1>&2
	bash
	exit 1
	fi

	ip netns exec ns2 ping -c 1 -q 10.0.1.99 > /dev/null
	if [ $? -ne 0 ];then
	echo "ERROR: ns2 cannot reach ns1" 1>&2
	exit 1
	fi

	if [ $ret -eq 0 ];then
	echo "PASS: netns routing/connectivity: ns1 can reach ns2"
	fi

	ns1in=$(mktemp)
	ns1out=$(mktemp)
	ns2in=$(mktemp)
	ns2out=$(mktemp)

	make_file()
	{
	name=$1
	who=$2

	SIZE=$((RANDOM % (1024 * 8)))
	TSIZE=$((SIZE * 1024))

	dd if=/dev/urandom of="$name" bs=1024 count=$SIZE 2> /dev/null

	SIZE=$((RANDOM % 1024))
	SIZE=$((SIZE + 128))
	TSIZE=$((TSIZE + SIZE))
	dd if=/dev/urandom conf=notrunc of="$name" bs=1 count=$SIZE 2> /dev/null
	}

	check_transfer()
	{
	in=$1
	out=$2
	what=$3

	cmp "$in" "$out" > /dev/null 2>&1
	if [ $? -ne 0 ] ;then
	echo "FAIL: file mismatch for $what" 1>&2
	ls -l "$in"
	ls -l "$out"
	return 1
	fi

	return 0
	}

	test_tcp_forwarding_ip()
	{
	local nsa=$1
	local nsb=$2
	local dstip=$3
	local dstport=$4
	local lret=0

	ip netns exec $nsb nc -w 5 -l -p 12345 < "$ns2in" > "$ns2out" &
	lpid=$!

	sleep 1
	ip netns exec $nsa nc -w 4 "$dstip" "$dstport" < "$ns1in" > "$ns1out" &
	cpid=$!

	sleep 3

	kill $lpid
	kill $cpid
	wait

	check_transfer "$ns1in" "$ns2out" "ns1 -> ns2"
	if [ $? -ne 0 ];then
	lret=1
	fi

	check_transfer "$ns2in" "$ns1out" "ns1 <- ns2"
	if [ $? -ne 0 ];then
	lret=1
	fi

	return $lret
	}

	test_tcp_forwarding()
	{
	test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345

	return $?
	}

	test_tcp_forwarding_nat()
	{
	local lret

	test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345
	lret=$?

	if [ $lret -eq 0 ] ; then
	test_tcp_forwarding_ip "$1" "$2" 10.6.6.6 1666
	lret=$?
	fi

	return $lret
	}

	make_file "$ns1in" "ns1"
	make_file "$ns2in" "ns2"

	# First test:
	# No PMTU discovery, nsr1 is expected to fragment packets from ns1 to ns2 as needed.
	test_tcp_forwarding ns1 ns2
	if [ $? -eq 0 ] ;then
	echo "PASS: flow offloaded for ns1/ns2"
	else
	echo "FAIL: flow offload for ns1/ns2:" 1>&2
	ip netns exec nsr1 nft list ruleset
	ret=1
	fi

	# delete default route, i.e. ns2 won't be able to reach ns1 and
	# will depend on ns1 being masqueraded in nsr1.
	# expect ns1 has nsr1 address.
	ip -net ns2 route del default via 10.0.2.1
	ip -net ns2 route del default via dead:2::1
	ip -net ns2 route add 192.168.10.1 via 10.0.2.1

	# Second test:
	# Same, but with NAT enabled.
	ip netns exec nsr1 nft -f - <<EOF
	table ip nat {
	chain prerouting {
	type nat hook prerouting priority 0; policy accept;
	meta iif "veth0" ip daddr 10.6.6.6 tcp dport 1666 counter dnat ip to 10.0.2.99:12345
	}

	chain postrouting {
	type nat hook postrouting priority 0; policy accept;
	meta oifname "veth1" counter masquerade
	}
	}
	EOF

	test_tcp_forwarding_nat ns1 ns2

	if [ $? -eq 0 ] ;then
	echo "PASS: flow offloaded for ns1/ns2 with NAT"
	else
	echo "FAIL: flow offload for ns1/ns2 with NAT" 1>&2
	ip netns exec nsr1 nft list ruleset
	ret=1
	fi

	# Third test:
	# Same as second test, but with PMTU discovery enabled.
	handle=$(ip netns exec nsr1 nft -a list table inet filter \| grep something-to-grep-for \| cut -d \# -f 2)

	ip netns exec nsr1 nft delete rule inet filter forward $handle
	if [ $? -ne 0 ] ;then
	echo "FAIL: Could not delete large-packet accept rule"
	exit 1
	fi

	ip netns exec ns1 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null
	ip netns exec ns2 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null

	test_tcp_forwarding_nat ns1 ns2
	if [ $? -eq 0 ] ;then
	echo "PASS: flow offloaded for ns1/ns2 with NAT and pmtu discovery"
	else
	echo "FAIL: flow offload for ns1/ns2 with NAT and pmtu discovery" 1>&2
	ip netns exec nsr1 nft list ruleset
	fi

	KEY_SHA="0x"$(ps -xaf \| sha1sum \| cut -d " " -f 1)
	KEY_AES="0x"$(ps -xaf \| md5sum \| cut -d " " -f 1)
	SPI1=$RANDOM
	SPI2=$RANDOM

	if [ $SPI1 -eq $SPI2 ]; then
	SPI2=$((SPI2+1))
	fi

	do_esp() {
	local ns=$1
	local me=$2
	local remote=$3
	local lnet=$4
	local rnet=$5
	local spi_out=$6
	local spi_in=$7

	ip -net $ns xfrm state add src $remote dst $me proto esp spi $spi_in enc aes $KEY_AES auth sha1 $KEY_SHA mode tunnel sel src $rnet dst $lnet
	ip -net $ns xfrm state add src $me dst $remote proto esp spi $spi_out enc aes $KEY_AES auth sha1 $KEY_SHA mode tunnel sel src $lnet dst $rnet

	# to encrypt packets as they go out (includes forwarded packets that need encapsulation)
	ip -net $ns xfrm policy add src $lnet dst $rnet dir out tmpl src $me dst $remote proto esp mode tunnel priority 1 action allow
	# to fwd decrypted packets after esp processing:
	ip -net $ns xfrm policy add src $rnet dst $lnet dir fwd tmpl src $remote dst $me proto esp mode tunnel priority 1 action allow

	}

	do_esp nsr1 192.168.10.1 192.168.10.2 10.0.1.0/24 10.0.2.0/24 $SPI1 $SPI2

	do_esp nsr2 192.168.10.2 192.168.10.1 10.0.2.0/24 10.0.1.0/24 $SPI2 $SPI1

	ip netns exec nsr1 nft delete table ip nat

	# restore default routes
	ip -net ns2 route del 192.168.10.1 via 10.0.2.1
	ip -net ns2 route add default via 10.0.2.1
	ip -net ns2 route add default via dead:2::1

	test_tcp_forwarding ns1 ns2
	if [ $? -eq 0 ] ;then
	echo "PASS: ipsec tunnel mode for ns1/ns2"
	else
	echo "FAIL: ipsec tunnel mode for ns1/ns2"
	ip netns exec nsr1 nft list ruleset 1>&2
	ip netns exec nsr1 cat /proc/net/xfrm_stat 1>&2
	fi

	exit $ret