| #!/bin/bash |
| # SPDX-License-Identifier: GPL-2.0 |
| # |
| # Copyright (c) 2019 Facebook |
| # |
| # This program is free software; you can redistribute it and/or |
| # modify it under the terms of version 2 of the GNU General Public |
| # License as published by the Free Software Foundation. |
| |
| Usage() { |
| echo "Script for testing HBM (Host Bandwidth Manager) framework." |
| echo "It creates a cgroup to use for testing and load a BPF program to limit" |
| echo "egress or ingress bandwidht. It then uses iperf3 or netperf to create" |
| echo "loads. The output is the goodput in Mbps (unless -D was used)." |
| echo "" |
| echo "USAGE: $name [out] [-b=<prog>|--bpf=<prog>] [-c=<cc>|--cc=<cc>]" |
| echo " [-D] [-d=<delay>|--delay=<delay>] [--debug] [-E] [--edt]" |
| echo " [-f=<#flows>|--flows=<#flows>] [-h] [-i=<id>|--id=<id >]" |
| echo " [-l] [-N] [--no_cn] [-p=<port>|--port=<port>] [-P]" |
| echo " [-q=<qdisc>] [-R] [-s=<server>|--server=<server]" |
| echo " [-S|--stats] -t=<time>|--time=<time>] [-w] [cubic|dctcp]" |
| echo " Where:" |
| echo " out egress (default)" |
| echo " -b or --bpf BPF program filename to load and attach." |
| echo " Default is hbm_out_kern.o for egress," |
| echo " -c or -cc TCP congestion control (cubic or dctcp)" |
| echo " --debug print BPF trace buffer" |
| echo " -d or --delay add a delay in ms using netem" |
| echo " -D In addition to the goodput in Mbps, it also outputs" |
| echo " other detailed information. This information is" |
| echo " test dependent (i.e. iperf3 or netperf)." |
| echo " -E enable ECN (not required for dctcp)" |
| echo " --edt use fq's Earliest Departure Time (requires fq)" |
| echo " -f or --flows number of concurrent flows (default=1)" |
| echo " -i or --id cgroup id (an integer, default is 1)" |
| echo " -N use netperf instead of iperf3" |
| echo " --no_cn Do not return CN notifications" |
| echo " -l do not limit flows using loopback" |
| echo " -h Help" |
| echo " -p or --port iperf3 port (default is 5201)" |
| echo " -P use an iperf3 instance for each flow" |
| echo " -q use the specified qdisc" |
| echo " -r or --rate rate in Mbps (default 1s 1Gbps)" |
| echo " -R Use TCP_RR for netperf. 1st flow has req" |
| echo " size of 10KB, rest of 1MB. Reply in all" |
| echo " cases is 1 byte." |
| echo " More detailed output for each flow can be found" |
| echo " in the files netperf.<cg>.<flow>, where <cg> is the" |
| echo " cgroup id as specified with the -i flag, and <flow>" |
| echo " is the flow id starting at 1 and increasing by 1 for" |
| echo " flow (as specified by -f)." |
| echo " -s or --server hostname of netperf server. Used to create netperf" |
| echo " test traffic between to hosts (default is within host)" |
| echo " netserver must be running on the host." |
| echo " -S or --stats whether to update hbm stats (default is yes)." |
| echo " -t or --time duration of iperf3 in seconds (default=5)" |
| echo " -w Work conserving flag. cgroup can increase its" |
| echo " bandwidth beyond the rate limit specified" |
| echo " while there is available bandwidth. Current" |
| echo " implementation assumes there is only one NIC" |
| echo " (eth0), but can be extended to support multiple" |
| echo " NICs." |
| echo " cubic or dctcp specify which TCP CC to use" |
| echo " " |
| exit |
| } |
| |
| #set -x |
| |
| debug_flag=0 |
| args="$@" |
| name="$0" |
| netem=0 |
| cc=x |
| dir="-o" |
| dir_name="out" |
| dur=5 |
| flows=1 |
| id=1 |
| prog="" |
| port=5201 |
| rate=1000 |
| multi_iperf=0 |
| flow_cnt=1 |
| use_netperf=0 |
| rr=0 |
| ecn=0 |
| details=0 |
| server="" |
| qdisc="" |
| flags="" |
| do_stats=0 |
| |
| BPFFS=/sys/fs/bpf |
| function config_bpffs () { |
| if mount | grep $BPFFS > /dev/null; then |
| echo "bpffs already mounted" |
| else |
| echo "bpffs not mounted. Mounting..." |
| mount -t bpf none $BPFFS |
| fi |
| } |
| |
| function start_hbm () { |
| rm -f hbm.out |
| echo "./hbm $dir -n $id -r $rate -t $dur $flags $dbg $prog" > hbm.out |
| echo " " >> hbm.out |
| ./hbm $dir -n $id -r $rate -t $dur $flags $dbg $prog >> hbm.out 2>&1 & |
| echo $! |
| } |
| |
| processArgs () { |
| for i in $args ; do |
| case $i in |
| # Support for upcomming ingress rate limiting |
| #in) # support for upcoming ingress rate limiting |
| # dir="-i" |
| # dir_name="in" |
| # ;; |
| out) |
| dir="-o" |
| dir_name="out" |
| ;; |
| -b=*|--bpf=*) |
| prog="${i#*=}" |
| ;; |
| -c=*|--cc=*) |
| cc="${i#*=}" |
| ;; |
| --no_cn) |
| flags="$flags --no_cn" |
| ;; |
| --debug) |
| flags="$flags -d" |
| debug_flag=1 |
| ;; |
| -d=*|--delay=*) |
| netem="${i#*=}" |
| ;; |
| -D) |
| details=1 |
| ;; |
| -E) |
| ecn=1 |
| ;; |
| --edt) |
| flags="$flags --edt" |
| qdisc="fq" |
| ;; |
| -f=*|--flows=*) |
| flows="${i#*=}" |
| ;; |
| -i=*|--id=*) |
| id="${i#*=}" |
| ;; |
| -l) |
| flags="$flags -l" |
| ;; |
| -N) |
| use_netperf=1 |
| ;; |
| -p=*|--port=*) |
| port="${i#*=}" |
| ;; |
| -P) |
| multi_iperf=1 |
| ;; |
| -q=*) |
| qdisc="${i#*=}" |
| ;; |
| -r=*|--rate=*) |
| rate="${i#*=}" |
| ;; |
| -R) |
| rr=1 |
| ;; |
| -s=*|--server=*) |
| server="${i#*=}" |
| ;; |
| -S|--stats) |
| flags="$flags -s" |
| do_stats=1 |
| ;; |
| -t=*|--time=*) |
| dur="${i#*=}" |
| ;; |
| -w) |
| flags="$flags -w" |
| ;; |
| cubic) |
| cc=cubic |
| ;; |
| dctcp) |
| cc=dctcp |
| ;; |
| *) |
| echo "Unknown arg:$i" |
| Usage |
| ;; |
| esac |
| done |
| } |
| |
| processArgs |
| config_bpffs |
| |
| if [ $debug_flag -eq 1 ] ; then |
| rm -f hbm_out.log |
| fi |
| |
| hbm_pid=$(start_hbm) |
| usleep 100000 |
| |
| host=`hostname` |
| cg_base_dir=/sys/fs/cgroup/unified |
| cg_dir="$cg_base_dir/cgroup-test-work-dir/hbm$id" |
| |
| echo $$ >> $cg_dir/cgroup.procs |
| |
| ulimit -l unlimited |
| |
| rm -f ss.out |
| rm -f hbm.[0-9]*.$dir_name |
| if [ $ecn -ne 0 ] ; then |
| sysctl -w -q -n net.ipv4.tcp_ecn=1 |
| fi |
| |
| if [ $use_netperf -eq 0 ] ; then |
| cur_cc=`sysctl -n net.ipv4.tcp_congestion_control` |
| if [ "$cc" != "x" ] ; then |
| sysctl -w -q -n net.ipv4.tcp_congestion_control=$cc |
| fi |
| fi |
| |
| if [ "$netem" -ne "0" ] ; then |
| if [ "$qdisc" != "" ] ; then |
| echo "WARNING: Ignoring -q options because -d option used" |
| fi |
| tc qdisc del dev lo root > /dev/null 2>&1 |
| tc qdisc add dev lo root netem delay $netem\ms > /dev/null 2>&1 |
| elif [ "$qdisc" != "" ] ; then |
| tc qdisc del dev eth0 root > /dev/null 2>&1 |
| tc qdisc add dev eth0 root $qdisc > /dev/null 2>&1 |
| fi |
| |
| n=0 |
| m=$[$dur * 5] |
| hn="::1" |
| if [ $use_netperf -ne 0 ] ; then |
| if [ "$server" != "" ] ; then |
| hn=$server |
| fi |
| fi |
| |
| ( ping6 -i 0.2 -c $m $hn > ping.out 2>&1 ) & |
| |
| if [ $use_netperf -ne 0 ] ; then |
| begNetserverPid=`ps ax | grep netserver | grep --invert-match "grep" | \ |
| awk '{ print $1 }'` |
| if [ "$begNetserverPid" == "" ] ; then |
| if [ "$server" == "" ] ; then |
| ( ./netserver > /dev/null 2>&1) & |
| usleep 100000 |
| fi |
| fi |
| flow_cnt=1 |
| if [ "$server" == "" ] ; then |
| np_server=$host |
| else |
| np_server=$server |
| fi |
| if [ "$cc" == "x" ] ; then |
| np_cc="" |
| else |
| np_cc="-K $cc,$cc" |
| fi |
| replySize=1 |
| while [ $flow_cnt -le $flows ] ; do |
| if [ $rr -ne 0 ] ; then |
| reqSize=1M |
| if [ $flow_cnt -eq 1 ] ; then |
| reqSize=10K |
| fi |
| if [ "$dir" == "-i" ] ; then |
| replySize=$reqSize |
| reqSize=1 |
| fi |
| ( ./netperf -H $np_server -l $dur -f m -j -t TCP_RR -- -r $reqSize,$replySize $np_cc -k P50_lATENCY,P90_LATENCY,LOCAL_TRANSPORT_RETRANS,REMOTE_TRANSPORT_RETRANS,LOCAL_SEND_THROUGHPUT,LOCAL_RECV_THROUGHPUT,REQUEST_SIZE,RESPONSE_SIZE > netperf.$id.$flow_cnt ) & |
| else |
| if [ "$dir" == "-i" ] ; then |
| ( ./netperf -H $np_server -l $dur -f m -j -t TCP_RR -- -r 1,10M $np_cc -k P50_LATENCY,P90_LATENCY,LOCAL_TRANSPORT_RETRANS,LOCAL_SEND_THROUGHPUT,REMOTE_TRANSPORT_RETRANS,REMOTE_SEND_THROUGHPUT,REQUEST_SIZE,RESPONSE_SIZE > netperf.$id.$flow_cnt ) & |
| else |
| ( ./netperf -H $np_server -l $dur -f m -j -t TCP_STREAM -- $np_cc -k P50_lATENCY,P90_LATENCY,LOCAL_TRANSPORT_RETRANS,LOCAL_SEND_THROUGHPUT,REQUEST_SIZE,RESPONSE_SIZE > netperf.$id.$flow_cnt ) & |
| fi |
| fi |
| flow_cnt=$[flow_cnt+1] |
| done |
| |
| # sleep for duration of test (plus some buffer) |
| n=$[dur+2] |
| sleep $n |
| |
| # force graceful termination of netperf |
| pids=`pgrep netperf` |
| for p in $pids ; do |
| kill -SIGALRM $p |
| done |
| |
| flow_cnt=1 |
| rate=0 |
| if [ $details -ne 0 ] ; then |
| echo "" |
| echo "Details for HBM in cgroup $id" |
| if [ $do_stats -eq 1 ] ; then |
| if [ -e hbm.$id.$dir_name ] ; then |
| cat hbm.$id.$dir_name |
| fi |
| fi |
| fi |
| while [ $flow_cnt -le $flows ] ; do |
| if [ "$dir" == "-i" ] ; then |
| r=`cat netperf.$id.$flow_cnt | grep -o "REMOTE_SEND_THROUGHPUT=[0-9]*" | grep -o "[0-9]*"` |
| else |
| r=`cat netperf.$id.$flow_cnt | grep -o "LOCAL_SEND_THROUGHPUT=[0-9]*" | grep -o "[0-9]*"` |
| fi |
| echo "rate for flow $flow_cnt: $r" |
| rate=$[rate+r] |
| if [ $details -ne 0 ] ; then |
| echo "-----" |
| echo "Details for cgroup $id, flow $flow_cnt" |
| cat netperf.$id.$flow_cnt |
| fi |
| flow_cnt=$[flow_cnt+1] |
| done |
| if [ $details -ne 0 ] ; then |
| echo "" |
| delay=`grep "avg" ping.out | grep -o "= [0-9.]*/[0-9.]*" | grep -o "[0-9.]*$"` |
| echo "PING AVG DELAY:$delay" |
| echo "AGGREGATE_GOODPUT:$rate" |
| else |
| echo $rate |
| fi |
| elif [ $multi_iperf -eq 0 ] ; then |
| (iperf3 -s -p $port -1 > /dev/null 2>&1) & |
| usleep 100000 |
| iperf3 -c $host -p $port -i 0 -P $flows -f m -t $dur > iperf.$id |
| rates=`grep receiver iperf.$id | grep -o "[0-9.]* Mbits" | grep -o "^[0-9]*"` |
| rate=`echo $rates | grep -o "[0-9]*$"` |
| |
| if [ $details -ne 0 ] ; then |
| echo "" |
| echo "Details for HBM in cgroup $id" |
| if [ $do_stats -eq 1 ] ; then |
| if [ -e hbm.$id.$dir_name ] ; then |
| cat hbm.$id.$dir_name |
| fi |
| fi |
| delay=`grep "avg" ping.out | grep -o "= [0-9.]*/[0-9.]*" | grep -o "[0-9.]*$"` |
| echo "PING AVG DELAY:$delay" |
| echo "AGGREGATE_GOODPUT:$rate" |
| else |
| echo $rate |
| fi |
| else |
| flow_cnt=1 |
| while [ $flow_cnt -le $flows ] ; do |
| (iperf3 -s -p $port -1 > /dev/null 2>&1) & |
| ( iperf3 -c $host -p $port -i 0 -P 1 -f m -t $dur | grep receiver | grep -o "[0-9.]* Mbits" | grep -o "^[0-9]*" | grep -o "[0-9]*$" > iperf3.$id.$flow_cnt ) & |
| port=$[port+1] |
| flow_cnt=$[flow_cnt+1] |
| done |
| n=$[dur+1] |
| sleep $n |
| flow_cnt=1 |
| rate=0 |
| if [ $details -ne 0 ] ; then |
| echo "" |
| echo "Details for HBM in cgroup $id" |
| if [ $do_stats -eq 1 ] ; then |
| if [ -e hbm.$id.$dir_name ] ; then |
| cat hbm.$id.$dir_name |
| fi |
| fi |
| fi |
| |
| while [ $flow_cnt -le $flows ] ; do |
| r=`cat iperf3.$id.$flow_cnt` |
| # echo "rate for flow $flow_cnt: $r" |
| if [ $details -ne 0 ] ; then |
| echo "Rate for cgroup $id, flow $flow_cnt LOCAL_SEND_THROUGHPUT=$r" |
| fi |
| rate=$[rate+r] |
| flow_cnt=$[flow_cnt+1] |
| done |
| if [ $details -ne 0 ] ; then |
| delay=`grep "avg" ping.out | grep -o "= [0-9.]*/[0-9.]*" | grep -o "[0-9.]*$"` |
| echo "PING AVG DELAY:$delay" |
| echo "AGGREGATE_GOODPUT:$rate" |
| else |
| echo $rate |
| fi |
| fi |
| |
| if [ $use_netperf -eq 0 ] ; then |
| sysctl -w -q -n net.ipv4.tcp_congestion_control=$cur_cc |
| fi |
| if [ $ecn -ne 0 ] ; then |
| sysctl -w -q -n net.ipv4.tcp_ecn=0 |
| fi |
| if [ "$netem" -ne "0" ] ; then |
| tc qdisc del dev lo root > /dev/null 2>&1 |
| fi |
| if [ "$qdisc" != "" ] ; then |
| tc qdisc del dev eth0 root > /dev/null 2>&1 |
| fi |
| sleep 2 |
| |
| hbmPid=`ps ax | grep "hbm " | grep --invert-match "grep" | awk '{ print $1 }'` |
| if [ "$hbmPid" == "$hbm_pid" ] ; then |
| kill $hbm_pid |
| fi |
| |
| sleep 1 |
| |
| # Detach any pinned BPF programs that may have lingered |
| rm -rf $BPFFS/hbm* |
| |
| if [ $use_netperf -ne 0 ] ; then |
| if [ "$server" == "" ] ; then |
| if [ "$begNetserverPid" == "" ] ; then |
| netserverPid=`ps ax | grep netserver | grep --invert-match "grep" | awk '{ print $1 }'` |
| if [ "$netserverPid" != "" ] ; then |
| kill $netserverPid |
| fi |
| fi |
| fi |
| fi |
| exit |