blob: 56c8b4115c95094cc1751f9c0ab8b9f1f32ab37c [file] [log] [blame]
brakmo4ffd44c2019-03-01 12:38:50 -08001#!/bin/bash
2# SPDX-License-Identifier: GPL-2.0
3#
4# Copyright (c) 2019 Facebook
5#
6# This program is free software; you can redistribute it and/or
7# modify it under the terms of version 2 of the GNU General Public
8# License as published by the Free Software Foundation.
9
10Usage() {
11 echo "Script for testing HBM (Host Bandwidth Manager) framework."
12 echo "It creates a cgroup to use for testing and load a BPF program to limit"
13 echo "egress or ingress bandwidht. It then uses iperf3 or netperf to create"
14 echo "loads. The output is the goodput in Mbps (unless -D was used)."
15 echo ""
16 echo "USAGE: $name [out] [-b=<prog>|--bpf=<prog>] [-c=<cc>|--cc=<cc>] [-D]"
17 echo " [-d=<delay>|--delay=<delay>] [--debug] [-E]"
18 echo " [-f=<#flows>|--flows=<#flows>] [-h] [-i=<id>|--id=<id >]"
19 echo " [-l] [-N] [-p=<port>|--port=<port>] [-P]"
20 echo " [-q=<qdisc>] [-R] [-s=<server>|--server=<server]"
21 echo " [-S|--stats] -t=<time>|--time=<time>] [-w] [cubic|dctcp]"
22 echo " Where:"
23 echo " out egress (default)"
24 echo " -b or --bpf BPF program filename to load and attach."
25 echo " Default is hbm_out_kern.o for egress,"
26 echo " -c or -cc TCP congestion control (cubic or dctcp)"
27 echo " --debug print BPF trace buffer"
28 echo " -d or --delay add a delay in ms using netem"
29 echo " -D In addition to the goodput in Mbps, it also outputs"
30 echo " other detailed information. This information is"
31 echo " test dependent (i.e. iperf3 or netperf)."
32 echo " -E enable ECN (not required for dctcp)"
33 echo " -f or --flows number of concurrent flows (default=1)"
34 echo " -i or --id cgroup id (an integer, default is 1)"
35 echo " -N use netperf instead of iperf3"
36 echo " -l do not limit flows using loopback"
37 echo " -h Help"
38 echo " -p or --port iperf3 port (default is 5201)"
39 echo " -P use an iperf3 instance for each flow"
40 echo " -q use the specified qdisc"
41 echo " -r or --rate rate in Mbps (default 1s 1Gbps)"
42 echo " -R Use TCP_RR for netperf. 1st flow has req"
43 echo " size of 10KB, rest of 1MB. Reply in all"
44 echo " cases is 1 byte."
45 echo " More detailed output for each flow can be found"
46 echo " in the files netperf.<cg>.<flow>, where <cg> is the"
47 echo " cgroup id as specified with the -i flag, and <flow>"
48 echo " is the flow id starting at 1 and increasing by 1 for"
49 echo " flow (as specified by -f)."
50 echo " -s or --server hostname of netperf server. Used to create netperf"
51 echo " test traffic between to hosts (default is within host)"
52 echo " netserver must be running on the host."
53 echo " -S or --stats whether to update hbm stats (default is yes)."
54 echo " -t or --time duration of iperf3 in seconds (default=5)"
55 echo " -w Work conserving flag. cgroup can increase its"
56 echo " bandwidth beyond the rate limit specified"
57 echo " while there is available bandwidth. Current"
58 echo " implementation assumes there is only one NIC"
59 echo " (eth0), but can be extended to support multiple"
60 echo " NICs."
61 echo " cubic or dctcp specify which TCP CC to use"
62 echo " "
63 exit
64}
65
66#set -x
67
68debug_flag=0
69args="$@"
70name="$0"
71netem=0
72cc=x
73dir="-o"
74dir_name="out"
75dur=5
76flows=1
77id=1
78prog=""
79port=5201
80rate=1000
81multi_iperf=0
82flow_cnt=1
83use_netperf=0
84rr=0
85ecn=0
86details=0
87server=""
88qdisc=""
89flags=""
90do_stats=0
91
92function start_hbm () {
93 rm -f hbm.out
94 echo "./hbm $dir -n $id -r $rate -t $dur $flags $dbg $prog" > hbm.out
95 echo " " >> hbm.out
96 ./hbm $dir -n $id -r $rate -t $dur $flags $dbg $prog >> hbm.out 2>&1 &
97 echo $!
98}
99
100processArgs () {
101 for i in $args ; do
102 case $i in
103 # Support for upcomming ingress rate limiting
104 #in) # support for upcoming ingress rate limiting
105 # dir="-i"
106 # dir_name="in"
107 # ;;
108 out)
109 dir="-o"
110 dir_name="out"
111 ;;
112 -b=*|--bpf=*)
113 prog="${i#*=}"
114 ;;
115 -c=*|--cc=*)
116 cc="${i#*=}"
117 ;;
118 --debug)
119 flags="$flags -d"
120 debug_flag=1
121 ;;
122 -d=*|--delay=*)
123 netem="${i#*=}"
124 ;;
125 -D)
126 details=1
127 ;;
128 -E)
129 ecn=1
130 ;;
131 # Support for upcomming fq Early Departure Time egress rate limiting
132 #--edt)
133 # prog="hbm_out_edt_kern.o"
134 # qdisc="fq"
135 # ;;
136 -f=*|--flows=*)
137 flows="${i#*=}"
138 ;;
139 -i=*|--id=*)
140 id="${i#*=}"
141 ;;
142 -l)
143 flags="$flags -l"
144 ;;
145 -N)
146 use_netperf=1
147 ;;
148 -p=*|--port=*)
149 port="${i#*=}"
150 ;;
151 -P)
152 multi_iperf=1
153 ;;
154 -q=*)
155 qdisc="${i#*=}"
156 ;;
157 -r=*|--rate=*)
158 rate="${i#*=}"
159 ;;
160 -R)
161 rr=1
162 ;;
163 -s=*|--server=*)
164 server="${i#*=}"
165 ;;
166 -S|--stats)
167 flags="$flags -s"
168 do_stats=1
169 ;;
170 -t=*|--time=*)
171 dur="${i#*=}"
172 ;;
173 -w)
174 flags="$flags -w"
175 ;;
176 cubic)
177 cc=cubic
178 ;;
179 dctcp)
180 cc=dctcp
181 ;;
182 *)
183 echo "Unknown arg:$i"
184 Usage
185 ;;
186 esac
187 done
188}
189
190processArgs
191
192if [ $debug_flag -eq 1 ] ; then
193 rm -f hbm_out.log
194fi
195
196hbm_pid=$(start_hbm)
197usleep 100000
198
199host=`hostname`
200cg_base_dir=/sys/fs/cgroup
201cg_dir="$cg_base_dir/cgroup-test-work-dir/hbm$id"
202
203echo $$ >> $cg_dir/cgroup.procs
204
205ulimit -l unlimited
206
207rm -f ss.out
208rm -f hbm.[0-9]*.$dir_name
209if [ $ecn -ne 0 ] ; then
210 sysctl -w -q -n net.ipv4.tcp_ecn=1
211fi
212
213if [ $use_netperf -eq 0 ] ; then
214 cur_cc=`sysctl -n net.ipv4.tcp_congestion_control`
215 if [ "$cc" != "x" ] ; then
216 sysctl -w -q -n net.ipv4.tcp_congestion_control=$cc
217 fi
218fi
219
220if [ "$netem" -ne "0" ] ; then
221 if [ "$qdisc" != "" ] ; then
222 echo "WARNING: Ignoring -q options because -d option used"
223 fi
224 tc qdisc del dev lo root > /dev/null 2>&1
225 tc qdisc add dev lo root netem delay $netem\ms > /dev/null 2>&1
226elif [ "$qdisc" != "" ] ; then
227 tc qdisc del dev lo root > /dev/null 2>&1
228 tc qdisc add dev lo root $qdisc > /dev/null 2>&1
229fi
230
231n=0
232m=$[$dur * 5]
233hn="::1"
234if [ $use_netperf -ne 0 ] ; then
235 if [ "$server" != "" ] ; then
236 hn=$server
237 fi
238fi
239
240( ping6 -i 0.2 -c $m $hn > ping.out 2>&1 ) &
241
242if [ $use_netperf -ne 0 ] ; then
243 begNetserverPid=`ps ax | grep netserver | grep --invert-match "grep" | \
244 awk '{ print $1 }'`
245 if [ "$begNetserverPid" == "" ] ; then
246 if [ "$server" == "" ] ; then
247 ( ./netserver > /dev/null 2>&1) &
248 usleep 100000
249 fi
250 fi
251 flow_cnt=1
252 if [ "$server" == "" ] ; then
253 np_server=$host
254 else
255 np_server=$server
256 fi
257 if [ "$cc" == "x" ] ; then
258 np_cc=""
259 else
260 np_cc="-K $cc,$cc"
261 fi
262 replySize=1
263 while [ $flow_cnt -le $flows ] ; do
264 if [ $rr -ne 0 ] ; then
265 reqSize=1M
266 if [ $flow_cnt -eq 1 ] ; then
267 reqSize=10K
268 fi
269 if [ "$dir" == "-i" ] ; then
270 replySize=$reqSize
271 reqSize=1
272 fi
273 ( ./netperf -H $np_server -l $dur -f m -j -t TCP_RR -- -r $reqSize,$replySize $np_cc -k P50_lATENCY,P90_LATENCY,LOCAL_TRANSPORT_RETRANS,REMOTE_TRANSPORT_RETRANS,LOCAL_SEND_THROUGHPUT,LOCAL_RECV_THROUGHPUT,REQUEST_SIZE,RESPONSE_SIZE > netperf.$id.$flow_cnt ) &
274 else
275 if [ "$dir" == "-i" ] ; then
276 ( ./netperf -H $np_server -l $dur -f m -j -t TCP_RR -- -r 1,10M $np_cc -k P50_LATENCY,P90_LATENCY,LOCAL_TRANSPORT_RETRANS,LOCAL_SEND_THROUGHPUT,REMOTE_TRANSPORT_RETRANS,REMOTE_SEND_THROUGHPUT,REQUEST_SIZE,RESPONSE_SIZE > netperf.$id.$flow_cnt ) &
277 else
278 ( ./netperf -H $np_server -l $dur -f m -j -t TCP_STREAM -- $np_cc -k P50_lATENCY,P90_LATENCY,LOCAL_TRANSPORT_RETRANS,LOCAL_SEND_THROUGHPUT,REQUEST_SIZE,RESPONSE_SIZE > netperf.$id.$flow_cnt ) &
279 fi
280 fi
281 flow_cnt=$[flow_cnt+1]
282 done
283
284# sleep for duration of test (plus some buffer)
285 n=$[dur+2]
286 sleep $n
287
288# force graceful termination of netperf
289 pids=`pgrep netperf`
290 for p in $pids ; do
291 kill -SIGALRM $p
292 done
293
294 flow_cnt=1
295 rate=0
296 if [ $details -ne 0 ] ; then
297 echo ""
298 echo "Details for HBM in cgroup $id"
299 if [ $do_stats -eq 1 ] ; then
300 if [ -e hbm.$id.$dir_name ] ; then
301 cat hbm.$id.$dir_name
302 fi
303 fi
304 fi
305 while [ $flow_cnt -le $flows ] ; do
306 if [ "$dir" == "-i" ] ; then
307 r=`cat netperf.$id.$flow_cnt | grep -o "REMOTE_SEND_THROUGHPUT=[0-9]*" | grep -o "[0-9]*"`
308 else
309 r=`cat netperf.$id.$flow_cnt | grep -o "LOCAL_SEND_THROUGHPUT=[0-9]*" | grep -o "[0-9]*"`
310 fi
311 echo "rate for flow $flow_cnt: $r"
312 rate=$[rate+r]
313 if [ $details -ne 0 ] ; then
314 echo "-----"
315 echo "Details for cgroup $id, flow $flow_cnt"
316 cat netperf.$id.$flow_cnt
317 fi
318 flow_cnt=$[flow_cnt+1]
319 done
320 if [ $details -ne 0 ] ; then
321 echo ""
322 delay=`grep "avg" ping.out | grep -o "= [0-9.]*/[0-9.]*" | grep -o "[0-9.]*$"`
323 echo "PING AVG DELAY:$delay"
324 echo "AGGREGATE_GOODPUT:$rate"
325 else
326 echo $rate
327 fi
328elif [ $multi_iperf -eq 0 ] ; then
329 (iperf3 -s -p $port -1 > /dev/null 2>&1) &
330 usleep 100000
331 iperf3 -c $host -p $port -i 0 -P $flows -f m -t $dur > iperf.$id
332 rates=`grep receiver iperf.$id | grep -o "[0-9.]* Mbits" | grep -o "^[0-9]*"`
333 rate=`echo $rates | grep -o "[0-9]*$"`
334
335 if [ $details -ne 0 ] ; then
336 echo ""
337 echo "Details for HBM in cgroup $id"
338 if [ $do_stats -eq 1 ] ; then
339 if [ -e hbm.$id.$dir_name ] ; then
340 cat hbm.$id.$dir_name
341 fi
342 fi
343 delay=`grep "avg" ping.out | grep -o "= [0-9.]*/[0-9.]*" | grep -o "[0-9.]*$"`
344 echo "PING AVG DELAY:$delay"
345 echo "AGGREGATE_GOODPUT:$rate"
346 else
347 echo $rate
348 fi
349else
350 flow_cnt=1
351 while [ $flow_cnt -le $flows ] ; do
352 (iperf3 -s -p $port -1 > /dev/null 2>&1) &
353 ( iperf3 -c $host -p $port -i 0 -P 1 -f m -t $dur | grep receiver | grep -o "[0-9.]* Mbits" | grep -o "^[0-9]*" | grep -o "[0-9]*$" > iperf3.$id.$flow_cnt ) &
354 port=$[port+1]
355 flow_cnt=$[flow_cnt+1]
356 done
357 n=$[dur+1]
358 sleep $n
359 flow_cnt=1
360 rate=0
361 if [ $details -ne 0 ] ; then
362 echo ""
363 echo "Details for HBM in cgroup $id"
364 if [ $do_stats -eq 1 ] ; then
365 if [ -e hbm.$id.$dir_name ] ; then
366 cat hbm.$id.$dir_name
367 fi
368 fi
369 fi
370
371 while [ $flow_cnt -le $flows ] ; do
372 r=`cat iperf3.$id.$flow_cnt`
373# echo "rate for flow $flow_cnt: $r"
374 if [ $details -ne 0 ] ; then
375 echo "Rate for cgroup $id, flow $flow_cnt LOCAL_SEND_THROUGHPUT=$r"
376 fi
377 rate=$[rate+r]
378 flow_cnt=$[flow_cnt+1]
379 done
380 if [ $details -ne 0 ] ; then
381 delay=`grep "avg" ping.out | grep -o "= [0-9.]*/[0-9.]*" | grep -o "[0-9.]*$"`
382 echo "PING AVG DELAY:$delay"
383 echo "AGGREGATE_GOODPUT:$rate"
384 else
385 echo $rate
386 fi
387fi
388
389if [ $use_netperf -eq 0 ] ; then
390 sysctl -w -q -n net.ipv4.tcp_congestion_control=$cur_cc
391fi
392if [ $ecn -ne 0 ] ; then
393 sysctl -w -q -n net.ipv4.tcp_ecn=0
394fi
395if [ "$netem" -ne "0" ] ; then
396 tc qdisc del dev lo root > /dev/null 2>&1
397fi
398
399sleep 2
400
401hbmPid=`ps ax | grep "hbm " | grep --invert-match "grep" | awk '{ print $1 }'`
402if [ "$hbmPid" == "$hbm_pid" ] ; then
403 kill $hbm_pid
404fi
405
406sleep 1
407
408# Detach any BPF programs that may have lingered
409ttx=`bpftool cgroup tree | grep hbm`
410v=2
411for x in $ttx ; do
412 if [ "${x:0:36}" == "/sys/fs/cgroup/cgroup-test-work-dir/" ] ; then
413 cg=$x ; v=0
414 else
415 if [ $v -eq 0 ] ; then
416 id=$x ; v=1
417 else
418 if [ $v -eq 1 ] ; then
419 type=$x ; bpftool cgroup detach $cg $type id $id
420 v=0
421 fi
422 fi
423 fi
424done
425
426if [ $use_netperf -ne 0 ] ; then
427 if [ "$server" == "" ] ; then
428 if [ "$begNetserverPid" == "" ] ; then
429 netserverPid=`ps ax | grep netserver | grep --invert-match "grep" | awk '{ print $1 }'`
430 if [ "$netserverPid" != "" ] ; then
431 kill $netserverPid
432 fi
433 fi
434 fi
435fi
436exit