| /* SPDX-License-Identifier: GPL-2.0 |
| * |
| * Copyright (c) 2019 Facebook |
| * |
| * This program is free software; you can redistribute it and/or |
| * modify it under the terms of version 2 of the GNU General Public |
| * License as published by the Free Software Foundation. |
| * |
| * Include file for sample Host Bandwidth Manager (HBM) BPF programs |
| */ |
| #define KBUILD_MODNAME "foo" |
| #include <stddef.h> |
| #include <stdbool.h> |
| #include <uapi/linux/bpf.h> |
| #include <uapi/linux/if_ether.h> |
| #include <uapi/linux/if_packet.h> |
| #include <uapi/linux/ip.h> |
| #include <uapi/linux/ipv6.h> |
| #include <uapi/linux/in.h> |
| #include <uapi/linux/tcp.h> |
| #include <uapi/linux/filter.h> |
| #include <uapi/linux/pkt_cls.h> |
| #include <net/ipv6.h> |
| #include <net/inet_ecn.h> |
| #include "bpf_endian.h" |
| #include "bpf_helpers.h" |
| #include "hbm.h" |
| |
| #define DROP_PKT 0 |
| #define ALLOW_PKT 1 |
| #define TCP_ECN_OK 1 |
| #define CWR 2 |
| |
| #ifndef HBM_DEBUG // Define HBM_DEBUG to enable debugging |
| #undef bpf_printk |
| #define bpf_printk(fmt, ...) |
| #endif |
| |
| #define INITIAL_CREDIT_PACKETS 100 |
| #define MAX_BYTES_PER_PACKET 1500 |
| #define MARK_THRESH (40 * MAX_BYTES_PER_PACKET) |
| #define DROP_THRESH (80 * 5 * MAX_BYTES_PER_PACKET) |
| #define LARGE_PKT_DROP_THRESH (DROP_THRESH - (15 * MAX_BYTES_PER_PACKET)) |
| #define MARK_REGION_SIZE (LARGE_PKT_DROP_THRESH - MARK_THRESH) |
| #define LARGE_PKT_THRESH 120 |
| #define MAX_CREDIT (100 * MAX_BYTES_PER_PACKET) |
| #define INIT_CREDIT (INITIAL_CREDIT_PACKETS * MAX_BYTES_PER_PACKET) |
| |
| // Time base accounting for fq's EDT |
| #define BURST_SIZE_NS 100000 // 100us |
| #define MARK_THRESH_NS 50000 // 50us |
| #define DROP_THRESH_NS 500000 // 500us |
| // Reserve 20us of queuing for small packets (less than 120 bytes) |
| #define LARGE_PKT_DROP_THRESH_NS (DROP_THRESH_NS - 20000) |
| #define MARK_REGION_SIZE_NS (LARGE_PKT_DROP_THRESH_NS - MARK_THRESH_NS) |
| |
| // rate in bytes per ns << 20 |
| #define CREDIT_PER_NS(delta, rate) ((((u64)(delta)) * (rate)) >> 20) |
| #define BYTES_PER_NS(delta, rate) ((((u64)(delta)) * (rate)) >> 20) |
| #define BYTES_TO_NS(bytes, rate) div64_u64(((u64)(bytes)) << 20, (u64)(rate)) |
| |
| struct bpf_map_def SEC("maps") queue_state = { |
| .type = BPF_MAP_TYPE_CGROUP_STORAGE, |
| .key_size = sizeof(struct bpf_cgroup_storage_key), |
| .value_size = sizeof(struct hbm_vqueue), |
| }; |
| BPF_ANNOTATE_KV_PAIR(queue_state, struct bpf_cgroup_storage_key, |
| struct hbm_vqueue); |
| |
| struct bpf_map_def SEC("maps") queue_stats = { |
| .type = BPF_MAP_TYPE_ARRAY, |
| .key_size = sizeof(u32), |
| .value_size = sizeof(struct hbm_queue_stats), |
| .max_entries = 1, |
| }; |
| BPF_ANNOTATE_KV_PAIR(queue_stats, int, struct hbm_queue_stats); |
| |
| struct hbm_pkt_info { |
| int cwnd; |
| int rtt; |
| int packets_out; |
| bool is_ip; |
| bool is_tcp; |
| short ecn; |
| }; |
| |
| static int get_tcp_info(struct __sk_buff *skb, struct hbm_pkt_info *pkti) |
| { |
| struct bpf_sock *sk; |
| struct bpf_tcp_sock *tp; |
| |
| sk = skb->sk; |
| if (sk) { |
| sk = bpf_sk_fullsock(sk); |
| if (sk) { |
| if (sk->protocol == IPPROTO_TCP) { |
| tp = bpf_tcp_sock(sk); |
| if (tp) { |
| pkti->cwnd = tp->snd_cwnd; |
| pkti->rtt = tp->srtt_us >> 3; |
| pkti->packets_out = tp->packets_out; |
| return 0; |
| } |
| } |
| } |
| } |
| pkti->cwnd = 0; |
| pkti->rtt = 0; |
| pkti->packets_out = 0; |
| return 1; |
| } |
| |
| static void hbm_get_pkt_info(struct __sk_buff *skb, |
| struct hbm_pkt_info *pkti) |
| { |
| struct iphdr iph; |
| struct ipv6hdr *ip6h; |
| |
| pkti->cwnd = 0; |
| pkti->rtt = 0; |
| bpf_skb_load_bytes(skb, 0, &iph, 12); |
| if (iph.version == 6) { |
| ip6h = (struct ipv6hdr *)&iph; |
| pkti->is_ip = true; |
| pkti->is_tcp = (ip6h->nexthdr == 6); |
| pkti->ecn = (ip6h->flow_lbl[0] >> 4) & INET_ECN_MASK; |
| } else if (iph.version == 4) { |
| pkti->is_ip = true; |
| pkti->is_tcp = (iph.protocol == 6); |
| pkti->ecn = iph.tos & INET_ECN_MASK; |
| } else { |
| pkti->is_ip = false; |
| pkti->is_tcp = false; |
| pkti->ecn = 0; |
| } |
| if (pkti->is_tcp) |
| get_tcp_info(skb, pkti); |
| } |
| |
| static __always_inline void hbm_init_vqueue(struct hbm_vqueue *qdp, int rate) |
| { |
| bpf_printk("Initializing queue_state, rate:%d\n", rate * 128); |
| qdp->lasttime = bpf_ktime_get_ns(); |
| qdp->credit = INIT_CREDIT; |
| qdp->rate = rate * 128; |
| } |
| |
| static __always_inline void hbm_init_edt_vqueue(struct hbm_vqueue *qdp, |
| int rate) |
| { |
| unsigned long long curtime; |
| |
| curtime = bpf_ktime_get_ns(); |
| bpf_printk("Initializing queue_state, rate:%d\n", rate * 128); |
| qdp->lasttime = curtime - BURST_SIZE_NS; // support initial burst |
| qdp->credit = 0; // not used |
| qdp->rate = rate * 128; |
| } |
| |
| static __always_inline void hbm_update_stats(struct hbm_queue_stats *qsp, |
| int len, |
| unsigned long long curtime, |
| bool congestion_flag, |
| bool drop_flag, |
| bool cwr_flag, |
| bool ecn_ce_flag, |
| struct hbm_pkt_info *pkti, |
| int credit) |
| { |
| int rv = ALLOW_PKT; |
| |
| if (qsp != NULL) { |
| // Following is needed for work conserving |
| __sync_add_and_fetch(&(qsp->bytes_total), len); |
| if (qsp->stats) { |
| // Optionally update statistics |
| if (qsp->firstPacketTime == 0) |
| qsp->firstPacketTime = curtime; |
| qsp->lastPacketTime = curtime; |
| __sync_add_and_fetch(&(qsp->pkts_total), 1); |
| if (congestion_flag) { |
| __sync_add_and_fetch(&(qsp->pkts_marked), 1); |
| __sync_add_and_fetch(&(qsp->bytes_marked), len); |
| } |
| if (drop_flag) { |
| __sync_add_and_fetch(&(qsp->pkts_dropped), 1); |
| __sync_add_and_fetch(&(qsp->bytes_dropped), |
| len); |
| } |
| if (ecn_ce_flag) |
| __sync_add_and_fetch(&(qsp->pkts_ecn_ce), 1); |
| if (pkti->cwnd) { |
| __sync_add_and_fetch(&(qsp->sum_cwnd), |
| pkti->cwnd); |
| __sync_add_and_fetch(&(qsp->sum_cwnd_cnt), 1); |
| } |
| if (pkti->rtt) |
| __sync_add_and_fetch(&(qsp->sum_rtt), |
| pkti->rtt); |
| __sync_add_and_fetch(&(qsp->sum_credit), credit); |
| |
| if (drop_flag) |
| rv = DROP_PKT; |
| if (cwr_flag) |
| rv |= 2; |
| if (rv == DROP_PKT) |
| __sync_add_and_fetch(&(qsp->returnValCount[0]), |
| 1); |
| else if (rv == ALLOW_PKT) |
| __sync_add_and_fetch(&(qsp->returnValCount[1]), |
| 1); |
| else if (rv == 2) |
| __sync_add_and_fetch(&(qsp->returnValCount[2]), |
| 1); |
| else if (rv == 3) |
| __sync_add_and_fetch(&(qsp->returnValCount[3]), |
| 1); |
| } |
| } |
| } |