| // SPDX-License-Identifier: GPL-2.0 |
| /* Copyright (c) 2019 Facebook */ |
| |
| #include <sys/socket.h> |
| #include <sys/epoll.h> |
| #include <netinet/in.h> |
| #include <arpa/inet.h> |
| #include <unistd.h> |
| #include <stdlib.h> |
| #include <string.h> |
| #include <errno.h> |
| |
| #include <bpf/bpf.h> |
| #include <bpf/libbpf.h> |
| |
| #include "cgroup_helpers.h" |
| #include "bpf_rlimit.h" |
| |
| enum bpf_addr_array_idx { |
| ADDR_SRV_IDX, |
| ADDR_CLI_IDX, |
| __NR_BPF_ADDR_ARRAY_IDX, |
| }; |
| |
| enum bpf_result_array_idx { |
| EGRESS_SRV_IDX, |
| EGRESS_CLI_IDX, |
| INGRESS_LISTEN_IDX, |
| __NR_BPF_RESULT_ARRAY_IDX, |
| }; |
| |
| enum bpf_linum_array_idx { |
| EGRESS_LINUM_IDX, |
| INGRESS_LINUM_IDX, |
| __NR_BPF_LINUM_ARRAY_IDX, |
| }; |
| |
| struct bpf_spinlock_cnt { |
| struct bpf_spin_lock lock; |
| __u32 cnt; |
| }; |
| |
| #define CHECK(condition, tag, format...) ({ \ |
| int __ret = !!(condition); \ |
| if (__ret) { \ |
| printf("%s(%d):FAIL:%s ", __func__, __LINE__, tag); \ |
| printf(format); \ |
| printf("\n"); \ |
| exit(-1); \ |
| } \ |
| }) |
| |
| #define TEST_CGROUP "/test-bpf-sock-fields" |
| #define DATA "Hello BPF!" |
| #define DATA_LEN sizeof(DATA) |
| |
| static struct sockaddr_in6 srv_sa6, cli_sa6; |
| static int sk_pkt_out_cnt10_fd; |
| static int sk_pkt_out_cnt_fd; |
| static int linum_map_fd; |
| static int addr_map_fd; |
| static int tp_map_fd; |
| static int sk_map_fd; |
| |
| static __u32 addr_srv_idx = ADDR_SRV_IDX; |
| static __u32 addr_cli_idx = ADDR_CLI_IDX; |
| |
| static __u32 egress_srv_idx = EGRESS_SRV_IDX; |
| static __u32 egress_cli_idx = EGRESS_CLI_IDX; |
| static __u32 ingress_listen_idx = INGRESS_LISTEN_IDX; |
| |
| static __u32 egress_linum_idx = EGRESS_LINUM_IDX; |
| static __u32 ingress_linum_idx = INGRESS_LINUM_IDX; |
| |
| static void init_loopback6(struct sockaddr_in6 *sa6) |
| { |
| memset(sa6, 0, sizeof(*sa6)); |
| sa6->sin6_family = AF_INET6; |
| sa6->sin6_addr = in6addr_loopback; |
| } |
| |
| static void print_sk(const struct bpf_sock *sk) |
| { |
| char src_ip4[24], dst_ip4[24]; |
| char src_ip6[64], dst_ip6[64]; |
| |
| inet_ntop(AF_INET, &sk->src_ip4, src_ip4, sizeof(src_ip4)); |
| inet_ntop(AF_INET6, &sk->src_ip6, src_ip6, sizeof(src_ip6)); |
| inet_ntop(AF_INET, &sk->dst_ip4, dst_ip4, sizeof(dst_ip4)); |
| inet_ntop(AF_INET6, &sk->dst_ip6, dst_ip6, sizeof(dst_ip6)); |
| |
| printf("state:%u bound_dev_if:%u family:%u type:%u protocol:%u mark:%u priority:%u " |
| "src_ip4:%x(%s) src_ip6:%x:%x:%x:%x(%s) src_port:%u " |
| "dst_ip4:%x(%s) dst_ip6:%x:%x:%x:%x(%s) dst_port:%u\n", |
| sk->state, sk->bound_dev_if, sk->family, sk->type, sk->protocol, |
| sk->mark, sk->priority, |
| sk->src_ip4, src_ip4, |
| sk->src_ip6[0], sk->src_ip6[1], sk->src_ip6[2], sk->src_ip6[3], |
| src_ip6, sk->src_port, |
| sk->dst_ip4, dst_ip4, |
| sk->dst_ip6[0], sk->dst_ip6[1], sk->dst_ip6[2], sk->dst_ip6[3], |
| dst_ip6, ntohs(sk->dst_port)); |
| } |
| |
| static void print_tp(const struct bpf_tcp_sock *tp) |
| { |
| printf("snd_cwnd:%u srtt_us:%u rtt_min:%u snd_ssthresh:%u rcv_nxt:%u " |
| "snd_nxt:%u snd:una:%u mss_cache:%u ecn_flags:%u " |
| "rate_delivered:%u rate_interval_us:%u packets_out:%u " |
| "retrans_out:%u total_retrans:%u segs_in:%u data_segs_in:%u " |
| "segs_out:%u data_segs_out:%u lost_out:%u sacked_out:%u " |
| "bytes_received:%llu bytes_acked:%llu\n", |
| tp->snd_cwnd, tp->srtt_us, tp->rtt_min, tp->snd_ssthresh, |
| tp->rcv_nxt, tp->snd_nxt, tp->snd_una, tp->mss_cache, |
| tp->ecn_flags, tp->rate_delivered, tp->rate_interval_us, |
| tp->packets_out, tp->retrans_out, tp->total_retrans, |
| tp->segs_in, tp->data_segs_in, tp->segs_out, |
| tp->data_segs_out, tp->lost_out, tp->sacked_out, |
| tp->bytes_received, tp->bytes_acked); |
| } |
| |
| static void check_result(void) |
| { |
| struct bpf_tcp_sock srv_tp, cli_tp, listen_tp; |
| struct bpf_sock srv_sk, cli_sk, listen_sk; |
| __u32 ingress_linum, egress_linum; |
| int err; |
| |
| err = bpf_map_lookup_elem(linum_map_fd, &egress_linum_idx, |
| &egress_linum); |
| CHECK(err == -1, "bpf_map_lookup_elem(linum_map_fd)", |
| "err:%d errno:%d", err, errno); |
| |
| err = bpf_map_lookup_elem(linum_map_fd, &ingress_linum_idx, |
| &ingress_linum); |
| CHECK(err == -1, "bpf_map_lookup_elem(linum_map_fd)", |
| "err:%d errno:%d", err, errno); |
| |
| err = bpf_map_lookup_elem(sk_map_fd, &egress_srv_idx, &srv_sk); |
| CHECK(err == -1, "bpf_map_lookup_elem(sk_map_fd, &egress_srv_idx)", |
| "err:%d errno:%d", err, errno); |
| err = bpf_map_lookup_elem(tp_map_fd, &egress_srv_idx, &srv_tp); |
| CHECK(err == -1, "bpf_map_lookup_elem(tp_map_fd, &egress_srv_idx)", |
| "err:%d errno:%d", err, errno); |
| |
| err = bpf_map_lookup_elem(sk_map_fd, &egress_cli_idx, &cli_sk); |
| CHECK(err == -1, "bpf_map_lookup_elem(sk_map_fd, &egress_cli_idx)", |
| "err:%d errno:%d", err, errno); |
| err = bpf_map_lookup_elem(tp_map_fd, &egress_cli_idx, &cli_tp); |
| CHECK(err == -1, "bpf_map_lookup_elem(tp_map_fd, &egress_cli_idx)", |
| "err:%d errno:%d", err, errno); |
| |
| err = bpf_map_lookup_elem(sk_map_fd, &ingress_listen_idx, &listen_sk); |
| CHECK(err == -1, "bpf_map_lookup_elem(sk_map_fd, &ingress_listen_idx)", |
| "err:%d errno:%d", err, errno); |
| err = bpf_map_lookup_elem(tp_map_fd, &ingress_listen_idx, &listen_tp); |
| CHECK(err == -1, "bpf_map_lookup_elem(tp_map_fd, &ingress_listen_idx)", |
| "err:%d errno:%d", err, errno); |
| |
| printf("listen_sk: "); |
| print_sk(&listen_sk); |
| printf("\n"); |
| |
| printf("srv_sk: "); |
| print_sk(&srv_sk); |
| printf("\n"); |
| |
| printf("cli_sk: "); |
| print_sk(&cli_sk); |
| printf("\n"); |
| |
| printf("listen_tp: "); |
| print_tp(&listen_tp); |
| printf("\n"); |
| |
| printf("srv_tp: "); |
| print_tp(&srv_tp); |
| printf("\n"); |
| |
| printf("cli_tp: "); |
| print_tp(&cli_tp); |
| printf("\n"); |
| |
| CHECK(listen_sk.state != 10 || |
| listen_sk.family != AF_INET6 || |
| listen_sk.protocol != IPPROTO_TCP || |
| memcmp(listen_sk.src_ip6, &in6addr_loopback, |
| sizeof(listen_sk.src_ip6)) || |
| listen_sk.dst_ip6[0] || listen_sk.dst_ip6[1] || |
| listen_sk.dst_ip6[2] || listen_sk.dst_ip6[3] || |
| listen_sk.src_port != ntohs(srv_sa6.sin6_port) || |
| listen_sk.dst_port, |
| "Unexpected listen_sk", |
| "Check listen_sk output. ingress_linum:%u", |
| ingress_linum); |
| |
| CHECK(srv_sk.state == 10 || |
| !srv_sk.state || |
| srv_sk.family != AF_INET6 || |
| srv_sk.protocol != IPPROTO_TCP || |
| memcmp(srv_sk.src_ip6, &in6addr_loopback, |
| sizeof(srv_sk.src_ip6)) || |
| memcmp(srv_sk.dst_ip6, &in6addr_loopback, |
| sizeof(srv_sk.dst_ip6)) || |
| srv_sk.src_port != ntohs(srv_sa6.sin6_port) || |
| srv_sk.dst_port != cli_sa6.sin6_port, |
| "Unexpected srv_sk", "Check srv_sk output. egress_linum:%u", |
| egress_linum); |
| |
| CHECK(cli_sk.state == 10 || |
| !cli_sk.state || |
| cli_sk.family != AF_INET6 || |
| cli_sk.protocol != IPPROTO_TCP || |
| memcmp(cli_sk.src_ip6, &in6addr_loopback, |
| sizeof(cli_sk.src_ip6)) || |
| memcmp(cli_sk.dst_ip6, &in6addr_loopback, |
| sizeof(cli_sk.dst_ip6)) || |
| cli_sk.src_port != ntohs(cli_sa6.sin6_port) || |
| cli_sk.dst_port != srv_sa6.sin6_port, |
| "Unexpected cli_sk", "Check cli_sk output. egress_linum:%u", |
| egress_linum); |
| |
| CHECK(listen_tp.data_segs_out || |
| listen_tp.data_segs_in || |
| listen_tp.total_retrans || |
| listen_tp.bytes_acked, |
| "Unexpected listen_tp", "Check listen_tp output. ingress_linum:%u", |
| ingress_linum); |
| |
| CHECK(srv_tp.data_segs_out != 2 || |
| srv_tp.data_segs_in || |
| srv_tp.snd_cwnd != 10 || |
| srv_tp.total_retrans || |
| srv_tp.bytes_acked != 2 * DATA_LEN, |
| "Unexpected srv_tp", "Check srv_tp output. egress_linum:%u", |
| egress_linum); |
| |
| CHECK(cli_tp.data_segs_out || |
| cli_tp.data_segs_in != 2 || |
| cli_tp.snd_cwnd != 10 || |
| cli_tp.total_retrans || |
| cli_tp.bytes_received != 2 * DATA_LEN, |
| "Unexpected cli_tp", "Check cli_tp output. egress_linum:%u", |
| egress_linum); |
| } |
| |
| static void check_sk_pkt_out_cnt(int accept_fd, int cli_fd) |
| { |
| struct bpf_spinlock_cnt pkt_out_cnt = {}, pkt_out_cnt10 = {}; |
| int err; |
| |
| pkt_out_cnt.cnt = ~0; |
| pkt_out_cnt10.cnt = ~0; |
| err = bpf_map_lookup_elem(sk_pkt_out_cnt_fd, &accept_fd, &pkt_out_cnt); |
| if (!err) |
| err = bpf_map_lookup_elem(sk_pkt_out_cnt10_fd, &accept_fd, |
| &pkt_out_cnt10); |
| |
| /* The bpf prog only counts for fullsock and |
| * passive conneciton did not become fullsock until 3WHS |
| * had been finished. |
| * The bpf prog only counted two data packet out but we |
| * specially init accept_fd's pkt_out_cnt by 2 in |
| * init_sk_storage(). Hence, 4 here. |
| */ |
| CHECK(err || pkt_out_cnt.cnt != 4 || pkt_out_cnt10.cnt != 40, |
| "bpf_map_lookup_elem(sk_pkt_out_cnt, &accept_fd)", |
| "err:%d errno:%d pkt_out_cnt:%u pkt_out_cnt10:%u", |
| err, errno, pkt_out_cnt.cnt, pkt_out_cnt10.cnt); |
| |
| pkt_out_cnt.cnt = ~0; |
| pkt_out_cnt10.cnt = ~0; |
| err = bpf_map_lookup_elem(sk_pkt_out_cnt_fd, &cli_fd, &pkt_out_cnt); |
| if (!err) |
| err = bpf_map_lookup_elem(sk_pkt_out_cnt10_fd, &cli_fd, |
| &pkt_out_cnt10); |
| /* Active connection is fullsock from the beginning. |
| * 1 SYN and 1 ACK during 3WHS |
| * 2 Acks on data packet. |
| * |
| * The bpf_prog initialized it to 0xeB9F. |
| */ |
| CHECK(err || pkt_out_cnt.cnt != 0xeB9F + 4 || |
| pkt_out_cnt10.cnt != 0xeB9F + 40, |
| "bpf_map_lookup_elem(sk_pkt_out_cnt, &cli_fd)", |
| "err:%d errno:%d pkt_out_cnt:%u pkt_out_cnt10:%u", |
| err, errno, pkt_out_cnt.cnt, pkt_out_cnt10.cnt); |
| } |
| |
| static void init_sk_storage(int sk_fd, __u32 pkt_out_cnt) |
| { |
| struct bpf_spinlock_cnt scnt = {}; |
| int err; |
| |
| scnt.cnt = pkt_out_cnt; |
| err = bpf_map_update_elem(sk_pkt_out_cnt_fd, &sk_fd, &scnt, |
| BPF_NOEXIST); |
| CHECK(err, "bpf_map_update_elem(sk_pkt_out_cnt_fd)", |
| "err:%d errno:%d", err, errno); |
| |
| scnt.cnt *= 10; |
| err = bpf_map_update_elem(sk_pkt_out_cnt10_fd, &sk_fd, &scnt, |
| BPF_NOEXIST); |
| CHECK(err, "bpf_map_update_elem(sk_pkt_out_cnt10_fd)", |
| "err:%d errno:%d", err, errno); |
| } |
| |
| static void test(void) |
| { |
| int listen_fd, cli_fd, accept_fd, epfd, err; |
| struct epoll_event ev; |
| socklen_t addrlen; |
| int i; |
| |
| addrlen = sizeof(struct sockaddr_in6); |
| ev.events = EPOLLIN; |
| |
| epfd = epoll_create(1); |
| CHECK(epfd == -1, "epoll_create()", "epfd:%d errno:%d", epfd, errno); |
| |
| /* Prepare listen_fd */ |
| listen_fd = socket(AF_INET6, SOCK_STREAM | SOCK_NONBLOCK, 0); |
| CHECK(listen_fd == -1, "socket()", "listen_fd:%d errno:%d", |
| listen_fd, errno); |
| |
| init_loopback6(&srv_sa6); |
| err = bind(listen_fd, (struct sockaddr *)&srv_sa6, sizeof(srv_sa6)); |
| CHECK(err, "bind(listen_fd)", "err:%d errno:%d", err, errno); |
| |
| err = getsockname(listen_fd, (struct sockaddr *)&srv_sa6, &addrlen); |
| CHECK(err, "getsockname(listen_fd)", "err:%d errno:%d", err, errno); |
| |
| err = listen(listen_fd, 1); |
| CHECK(err, "listen(listen_fd)", "err:%d errno:%d", err, errno); |
| |
| /* Prepare cli_fd */ |
| cli_fd = socket(AF_INET6, SOCK_STREAM | SOCK_NONBLOCK, 0); |
| CHECK(cli_fd == -1, "socket()", "cli_fd:%d errno:%d", cli_fd, errno); |
| |
| init_loopback6(&cli_sa6); |
| err = bind(cli_fd, (struct sockaddr *)&cli_sa6, sizeof(cli_sa6)); |
| CHECK(err, "bind(cli_fd)", "err:%d errno:%d", err, errno); |
| |
| err = getsockname(cli_fd, (struct sockaddr *)&cli_sa6, &addrlen); |
| CHECK(err, "getsockname(cli_fd)", "err:%d errno:%d", |
| err, errno); |
| |
| /* Update addr_map with srv_sa6 and cli_sa6 */ |
| err = bpf_map_update_elem(addr_map_fd, &addr_srv_idx, &srv_sa6, 0); |
| CHECK(err, "map_update", "err:%d errno:%d", err, errno); |
| |
| err = bpf_map_update_elem(addr_map_fd, &addr_cli_idx, &cli_sa6, 0); |
| CHECK(err, "map_update", "err:%d errno:%d", err, errno); |
| |
| /* Connect from cli_sa6 to srv_sa6 */ |
| err = connect(cli_fd, (struct sockaddr *)&srv_sa6, addrlen); |
| printf("srv_sa6.sin6_port:%u cli_sa6.sin6_port:%u\n\n", |
| ntohs(srv_sa6.sin6_port), ntohs(cli_sa6.sin6_port)); |
| CHECK(err && errno != EINPROGRESS, |
| "connect(cli_fd)", "err:%d errno:%d", err, errno); |
| |
| ev.data.fd = listen_fd; |
| err = epoll_ctl(epfd, EPOLL_CTL_ADD, listen_fd, &ev); |
| CHECK(err, "epoll_ctl(EPOLL_CTL_ADD, listen_fd)", "err:%d errno:%d", |
| err, errno); |
| |
| /* Accept the connection */ |
| /* Have some timeout in accept(listen_fd). Just in case. */ |
| err = epoll_wait(epfd, &ev, 1, 1000); |
| CHECK(err != 1 || ev.data.fd != listen_fd, |
| "epoll_wait(listen_fd)", |
| "err:%d errno:%d ev.data.fd:%d listen_fd:%d", |
| err, errno, ev.data.fd, listen_fd); |
| |
| accept_fd = accept(listen_fd, NULL, NULL); |
| CHECK(accept_fd == -1, "accept(listen_fd)", "accept_fd:%d errno:%d", |
| accept_fd, errno); |
| close(listen_fd); |
| |
| ev.data.fd = cli_fd; |
| err = epoll_ctl(epfd, EPOLL_CTL_ADD, cli_fd, &ev); |
| CHECK(err, "epoll_ctl(EPOLL_CTL_ADD, cli_fd)", "err:%d errno:%d", |
| err, errno); |
| |
| init_sk_storage(accept_fd, 2); |
| |
| for (i = 0; i < 2; i++) { |
| /* Send some data from accept_fd to cli_fd */ |
| err = send(accept_fd, DATA, DATA_LEN, 0); |
| CHECK(err != DATA_LEN, "send(accept_fd)", "err:%d errno:%d", |
| err, errno); |
| |
| /* Have some timeout in recv(cli_fd). Just in case. */ |
| err = epoll_wait(epfd, &ev, 1, 1000); |
| CHECK(err != 1 || ev.data.fd != cli_fd, |
| "epoll_wait(cli_fd)", "err:%d errno:%d ev.data.fd:%d cli_fd:%d", |
| err, errno, ev.data.fd, cli_fd); |
| |
| err = recv(cli_fd, NULL, 0, MSG_TRUNC); |
| CHECK(err, "recv(cli_fd)", "err:%d errno:%d", err, errno); |
| } |
| |
| check_sk_pkt_out_cnt(accept_fd, cli_fd); |
| |
| close(epfd); |
| close(accept_fd); |
| close(cli_fd); |
| |
| check_result(); |
| } |
| |
| int main(int argc, char **argv) |
| { |
| struct bpf_prog_load_attr attr = { |
| .file = "test_sock_fields_kern.o", |
| .prog_type = BPF_PROG_TYPE_CGROUP_SKB, |
| .prog_flags = BPF_F_TEST_RND_HI32, |
| }; |
| int cgroup_fd, egress_fd, ingress_fd, err; |
| struct bpf_program *ingress_prog; |
| struct bpf_object *obj; |
| struct bpf_map *map; |
| |
| err = setup_cgroup_environment(); |
| CHECK(err, "setup_cgroup_environment()", "err:%d errno:%d", |
| err, errno); |
| |
| atexit(cleanup_cgroup_environment); |
| |
| /* Create a cgroup, get fd, and join it */ |
| cgroup_fd = create_and_get_cgroup(TEST_CGROUP); |
| CHECK(cgroup_fd == -1, "create_and_get_cgroup()", |
| "cgroup_fd:%d errno:%d", cgroup_fd, errno); |
| |
| err = join_cgroup(TEST_CGROUP); |
| CHECK(err, "join_cgroup", "err:%d errno:%d", err, errno); |
| |
| err = bpf_prog_load_xattr(&attr, &obj, &egress_fd); |
| CHECK(err, "bpf_prog_load_xattr()", "err:%d", err); |
| |
| ingress_prog = bpf_object__find_program_by_title(obj, |
| "cgroup_skb/ingress"); |
| CHECK(!ingress_prog, |
| "bpf_object__find_program_by_title(cgroup_skb/ingress)", |
| "not found"); |
| ingress_fd = bpf_program__fd(ingress_prog); |
| |
| err = bpf_prog_attach(egress_fd, cgroup_fd, BPF_CGROUP_INET_EGRESS, 0); |
| CHECK(err == -1, "bpf_prog_attach(CPF_CGROUP_INET_EGRESS)", |
| "err:%d errno%d", err, errno); |
| |
| err = bpf_prog_attach(ingress_fd, cgroup_fd, |
| BPF_CGROUP_INET_INGRESS, 0); |
| CHECK(err == -1, "bpf_prog_attach(CPF_CGROUP_INET_INGRESS)", |
| "err:%d errno%d", err, errno); |
| close(cgroup_fd); |
| |
| map = bpf_object__find_map_by_name(obj, "addr_map"); |
| CHECK(!map, "cannot find addr_map", "(null)"); |
| addr_map_fd = bpf_map__fd(map); |
| |
| map = bpf_object__find_map_by_name(obj, "sock_result_map"); |
| CHECK(!map, "cannot find sock_result_map", "(null)"); |
| sk_map_fd = bpf_map__fd(map); |
| |
| map = bpf_object__find_map_by_name(obj, "tcp_sock_result_map"); |
| CHECK(!map, "cannot find tcp_sock_result_map", "(null)"); |
| tp_map_fd = bpf_map__fd(map); |
| |
| map = bpf_object__find_map_by_name(obj, "linum_map"); |
| CHECK(!map, "cannot find linum_map", "(null)"); |
| linum_map_fd = bpf_map__fd(map); |
| |
| map = bpf_object__find_map_by_name(obj, "sk_pkt_out_cnt"); |
| CHECK(!map, "cannot find sk_pkt_out_cnt", "(null)"); |
| sk_pkt_out_cnt_fd = bpf_map__fd(map); |
| |
| map = bpf_object__find_map_by_name(obj, "sk_pkt_out_cnt10"); |
| CHECK(!map, "cannot find sk_pkt_out_cnt10", "(null)"); |
| sk_pkt_out_cnt10_fd = bpf_map__fd(map); |
| |
| test(); |
| |
| bpf_object__close(obj); |
| cleanup_cgroup_environment(); |
| |
| printf("PASS\n"); |
| |
| return 0; |
| } |