| /* SPDX-License-Identifier: GPL-2.0 |
| * Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc. |
| */ |
| static const char *__doc__= |
| "XDP monitor tool, based on tracepoints\n" |
| ; |
| |
| static const char *__doc_err_only__= |
| " NOTICE: Only tracking XDP redirect errors\n" |
| " Enable TX success stats via '--stats'\n" |
| " (which comes with a per packet processing overhead)\n" |
| ; |
| |
| #include <errno.h> |
| #include <stdio.h> |
| #include <stdlib.h> |
| #include <stdbool.h> |
| #include <stdint.h> |
| #include <string.h> |
| #include <ctype.h> |
| #include <unistd.h> |
| #include <locale.h> |
| |
| #include <sys/resource.h> |
| #include <getopt.h> |
| #include <net/if.h> |
| #include <time.h> |
| |
| #include <bpf/bpf.h> |
| #include "bpf_load.h" |
| #include "bpf_util.h" |
| |
| static int verbose = 1; |
| static bool debug = false; |
| |
| static const struct option long_options[] = { |
| {"help", no_argument, NULL, 'h' }, |
| {"debug", no_argument, NULL, 'D' }, |
| {"stats", no_argument, NULL, 'S' }, |
| {"sec", required_argument, NULL, 's' }, |
| {0, 0, NULL, 0 } |
| }; |
| |
| /* C standard specifies two constants, EXIT_SUCCESS(0) and EXIT_FAILURE(1) */ |
| #define EXIT_FAIL_MEM 5 |
| |
| static void usage(char *argv[]) |
| { |
| int i; |
| printf("\nDOCUMENTATION:\n%s\n", __doc__); |
| printf("\n"); |
| printf(" Usage: %s (options-see-below)\n", |
| argv[0]); |
| printf(" Listing options:\n"); |
| for (i = 0; long_options[i].name != 0; i++) { |
| printf(" --%-15s", long_options[i].name); |
| if (long_options[i].flag != NULL) |
| printf(" flag (internal value:%d)", |
| *long_options[i].flag); |
| else |
| printf("short-option: -%c", |
| long_options[i].val); |
| printf("\n"); |
| } |
| printf("\n"); |
| } |
| |
| #define NANOSEC_PER_SEC 1000000000 /* 10^9 */ |
| static __u64 gettime(void) |
| { |
| struct timespec t; |
| int res; |
| |
| res = clock_gettime(CLOCK_MONOTONIC, &t); |
| if (res < 0) { |
| fprintf(stderr, "Error with gettimeofday! (%i)\n", res); |
| exit(EXIT_FAILURE); |
| } |
| return (__u64) t.tv_sec * NANOSEC_PER_SEC + t.tv_nsec; |
| } |
| |
| enum { |
| REDIR_SUCCESS = 0, |
| REDIR_ERROR = 1, |
| }; |
| #define REDIR_RES_MAX 2 |
| static const char *redir_names[REDIR_RES_MAX] = { |
| [REDIR_SUCCESS] = "Success", |
| [REDIR_ERROR] = "Error", |
| }; |
| static const char *err2str(int err) |
| { |
| if (err < REDIR_RES_MAX) |
| return redir_names[err]; |
| return NULL; |
| } |
| /* enum xdp_action */ |
| #define XDP_UNKNOWN XDP_REDIRECT + 1 |
| #define XDP_ACTION_MAX (XDP_UNKNOWN + 1) |
| static const char *xdp_action_names[XDP_ACTION_MAX] = { |
| [XDP_ABORTED] = "XDP_ABORTED", |
| [XDP_DROP] = "XDP_DROP", |
| [XDP_PASS] = "XDP_PASS", |
| [XDP_TX] = "XDP_TX", |
| [XDP_REDIRECT] = "XDP_REDIRECT", |
| [XDP_UNKNOWN] = "XDP_UNKNOWN", |
| }; |
| static const char *action2str(int action) |
| { |
| if (action < XDP_ACTION_MAX) |
| return xdp_action_names[action]; |
| return NULL; |
| } |
| |
| /* Common stats data record shared with _kern.c */ |
| struct datarec { |
| __u64 processed; |
| __u64 dropped; |
| __u64 info; |
| }; |
| #define MAX_CPUS 64 |
| |
| /* Userspace structs for collection of stats from maps */ |
| struct record { |
| __u64 timestamp; |
| struct datarec total; |
| struct datarec *cpu; |
| }; |
| struct u64rec { |
| __u64 processed; |
| }; |
| struct record_u64 { |
| /* record for _kern side __u64 values */ |
| __u64 timestamp; |
| struct u64rec total; |
| struct u64rec *cpu; |
| }; |
| |
| struct stats_record { |
| struct record_u64 xdp_redirect[REDIR_RES_MAX]; |
| struct record_u64 xdp_exception[XDP_ACTION_MAX]; |
| struct record xdp_cpumap_kthread; |
| struct record xdp_cpumap_enqueue[MAX_CPUS]; |
| }; |
| |
| static bool map_collect_record(int fd, __u32 key, struct record *rec) |
| { |
| /* For percpu maps, userspace gets a value per possible CPU */ |
| unsigned int nr_cpus = bpf_num_possible_cpus(); |
| struct datarec values[nr_cpus]; |
| __u64 sum_processed = 0; |
| __u64 sum_dropped = 0; |
| __u64 sum_info = 0; |
| int i; |
| |
| if ((bpf_map_lookup_elem(fd, &key, values)) != 0) { |
| fprintf(stderr, |
| "ERR: bpf_map_lookup_elem failed key:0x%X\n", key); |
| return false; |
| } |
| /* Get time as close as possible to reading map contents */ |
| rec->timestamp = gettime(); |
| |
| /* Record and sum values from each CPU */ |
| for (i = 0; i < nr_cpus; i++) { |
| rec->cpu[i].processed = values[i].processed; |
| sum_processed += values[i].processed; |
| rec->cpu[i].dropped = values[i].dropped; |
| sum_dropped += values[i].dropped; |
| rec->cpu[i].info = values[i].info; |
| sum_info += values[i].info; |
| } |
| rec->total.processed = sum_processed; |
| rec->total.dropped = sum_dropped; |
| rec->total.info = sum_info; |
| return true; |
| } |
| |
| static bool map_collect_record_u64(int fd, __u32 key, struct record_u64 *rec) |
| { |
| /* For percpu maps, userspace gets a value per possible CPU */ |
| unsigned int nr_cpus = bpf_num_possible_cpus(); |
| struct u64rec values[nr_cpus]; |
| __u64 sum_total = 0; |
| int i; |
| |
| if ((bpf_map_lookup_elem(fd, &key, values)) != 0) { |
| fprintf(stderr, |
| "ERR: bpf_map_lookup_elem failed key:0x%X\n", key); |
| return false; |
| } |
| /* Get time as close as possible to reading map contents */ |
| rec->timestamp = gettime(); |
| |
| /* Record and sum values from each CPU */ |
| for (i = 0; i < nr_cpus; i++) { |
| rec->cpu[i].processed = values[i].processed; |
| sum_total += values[i].processed; |
| } |
| rec->total.processed = sum_total; |
| return true; |
| } |
| |
| static double calc_period(struct record *r, struct record *p) |
| { |
| double period_ = 0; |
| __u64 period = 0; |
| |
| period = r->timestamp - p->timestamp; |
| if (period > 0) |
| period_ = ((double) period / NANOSEC_PER_SEC); |
| |
| return period_; |
| } |
| |
| static double calc_period_u64(struct record_u64 *r, struct record_u64 *p) |
| { |
| double period_ = 0; |
| __u64 period = 0; |
| |
| period = r->timestamp - p->timestamp; |
| if (period > 0) |
| period_ = ((double) period / NANOSEC_PER_SEC); |
| |
| return period_; |
| } |
| |
| static double calc_pps(struct datarec *r, struct datarec *p, double period) |
| { |
| __u64 packets = 0; |
| double pps = 0; |
| |
| if (period > 0) { |
| packets = r->processed - p->processed; |
| pps = packets / period; |
| } |
| return pps; |
| } |
| |
| static double calc_pps_u64(struct u64rec *r, struct u64rec *p, double period) |
| { |
| __u64 packets = 0; |
| double pps = 0; |
| |
| if (period > 0) { |
| packets = r->processed - p->processed; |
| pps = packets / period; |
| } |
| return pps; |
| } |
| |
| static double calc_drop(struct datarec *r, struct datarec *p, double period) |
| { |
| __u64 packets = 0; |
| double pps = 0; |
| |
| if (period > 0) { |
| packets = r->dropped - p->dropped; |
| pps = packets / period; |
| } |
| return pps; |
| } |
| |
| static double calc_info(struct datarec *r, struct datarec *p, double period) |
| { |
| __u64 packets = 0; |
| double pps = 0; |
| |
| if (period > 0) { |
| packets = r->info - p->info; |
| pps = packets / period; |
| } |
| return pps; |
| } |
| |
| static void stats_print(struct stats_record *stats_rec, |
| struct stats_record *stats_prev, |
| bool err_only) |
| { |
| unsigned int nr_cpus = bpf_num_possible_cpus(); |
| int rec_i = 0, i, to_cpu; |
| double t = 0, pps = 0; |
| |
| /* Header */ |
| printf("%-15s %-7s %-12s %-12s %-9s\n", |
| "XDP-event", "CPU:to", "pps", "drop-pps", "extra-info"); |
| |
| /* tracepoint: xdp:xdp_redirect_* */ |
| if (err_only) |
| rec_i = REDIR_ERROR; |
| |
| for (; rec_i < REDIR_RES_MAX; rec_i++) { |
| struct record_u64 *rec, *prev; |
| char *fmt1 = "%-15s %-7d %'-12.0f %'-12.0f %s\n"; |
| char *fmt2 = "%-15s %-7s %'-12.0f %'-12.0f %s\n"; |
| |
| rec = &stats_rec->xdp_redirect[rec_i]; |
| prev = &stats_prev->xdp_redirect[rec_i]; |
| t = calc_period_u64(rec, prev); |
| |
| for (i = 0; i < nr_cpus; i++) { |
| struct u64rec *r = &rec->cpu[i]; |
| struct u64rec *p = &prev->cpu[i]; |
| |
| pps = calc_pps_u64(r, p, t); |
| if (pps > 0) |
| printf(fmt1, "XDP_REDIRECT", i, |
| rec_i ? 0.0: pps, rec_i ? pps : 0.0, |
| err2str(rec_i)); |
| } |
| pps = calc_pps_u64(&rec->total, &prev->total, t); |
| printf(fmt2, "XDP_REDIRECT", "total", |
| rec_i ? 0.0: pps, rec_i ? pps : 0.0, err2str(rec_i)); |
| } |
| |
| /* tracepoint: xdp:xdp_exception */ |
| for (rec_i = 0; rec_i < XDP_ACTION_MAX; rec_i++) { |
| struct record_u64 *rec, *prev; |
| char *fmt1 = "%-15s %-7d %'-12.0f %'-12.0f %s\n"; |
| char *fmt2 = "%-15s %-7s %'-12.0f %'-12.0f %s\n"; |
| |
| rec = &stats_rec->xdp_exception[rec_i]; |
| prev = &stats_prev->xdp_exception[rec_i]; |
| t = calc_period_u64(rec, prev); |
| |
| for (i = 0; i < nr_cpus; i++) { |
| struct u64rec *r = &rec->cpu[i]; |
| struct u64rec *p = &prev->cpu[i]; |
| |
| pps = calc_pps_u64(r, p, t); |
| if (pps > 0) |
| printf(fmt1, "Exception", i, |
| 0.0, pps, action2str(rec_i)); |
| } |
| pps = calc_pps_u64(&rec->total, &prev->total, t); |
| if (pps > 0) |
| printf(fmt2, "Exception", "total", |
| 0.0, pps, action2str(rec_i)); |
| } |
| |
| /* cpumap enqueue stats */ |
| for (to_cpu = 0; to_cpu < MAX_CPUS; to_cpu++) { |
| char *fmt1 = "%-15s %3d:%-3d %'-12.0f %'-12.0f %'-10.2f %s\n"; |
| char *fmt2 = "%-15s %3s:%-3d %'-12.0f %'-12.0f %'-10.2f %s\n"; |
| struct record *rec, *prev; |
| char *info_str = ""; |
| double drop, info; |
| |
| rec = &stats_rec->xdp_cpumap_enqueue[to_cpu]; |
| prev = &stats_prev->xdp_cpumap_enqueue[to_cpu]; |
| t = calc_period(rec, prev); |
| for (i = 0; i < nr_cpus; i++) { |
| struct datarec *r = &rec->cpu[i]; |
| struct datarec *p = &prev->cpu[i]; |
| |
| pps = calc_pps(r, p, t); |
| drop = calc_drop(r, p, t); |
| info = calc_info(r, p, t); |
| if (info > 0) { |
| info_str = "bulk-average"; |
| info = pps / info; /* calc average bulk size */ |
| } |
| if (pps > 0) |
| printf(fmt1, "cpumap-enqueue", |
| i, to_cpu, pps, drop, info, info_str); |
| } |
| pps = calc_pps(&rec->total, &prev->total, t); |
| if (pps > 0) { |
| drop = calc_drop(&rec->total, &prev->total, t); |
| info = calc_info(&rec->total, &prev->total, t); |
| if (info > 0) { |
| info_str = "bulk-average"; |
| info = pps / info; /* calc average bulk size */ |
| } |
| printf(fmt2, "cpumap-enqueue", |
| "sum", to_cpu, pps, drop, info, info_str); |
| } |
| } |
| |
| /* cpumap kthread stats */ |
| { |
| char *fmt1 = "%-15s %-7d %'-12.0f %'-12.0f %'-10.0f %s\n"; |
| char *fmt2 = "%-15s %-7s %'-12.0f %'-12.0f %'-10.0f %s\n"; |
| struct record *rec, *prev; |
| double drop, info; |
| char *i_str = ""; |
| |
| rec = &stats_rec->xdp_cpumap_kthread; |
| prev = &stats_prev->xdp_cpumap_kthread; |
| t = calc_period(rec, prev); |
| for (i = 0; i < nr_cpus; i++) { |
| struct datarec *r = &rec->cpu[i]; |
| struct datarec *p = &prev->cpu[i]; |
| |
| pps = calc_pps(r, p, t); |
| drop = calc_drop(r, p, t); |
| info = calc_info(r, p, t); |
| if (info > 0) |
| i_str = "sched"; |
| if (pps > 0) |
| printf(fmt1, "cpumap-kthread", |
| i, pps, drop, info, i_str); |
| } |
| pps = calc_pps(&rec->total, &prev->total, t); |
| drop = calc_drop(&rec->total, &prev->total, t); |
| info = calc_info(&rec->total, &prev->total, t); |
| if (info > 0) |
| i_str = "sched-sum"; |
| printf(fmt2, "cpumap-kthread", "total", pps, drop, info, i_str); |
| } |
| |
| printf("\n"); |
| } |
| |
| static bool stats_collect(struct stats_record *rec) |
| { |
| int fd; |
| int i; |
| |
| /* TODO: Detect if someone unloaded the perf event_fd's, as |
| * this can happen by someone running perf-record -e |
| */ |
| |
| fd = map_data[0].fd; /* map0: redirect_err_cnt */ |
| for (i = 0; i < REDIR_RES_MAX; i++) |
| map_collect_record_u64(fd, i, &rec->xdp_redirect[i]); |
| |
| fd = map_data[1].fd; /* map1: exception_cnt */ |
| for (i = 0; i < XDP_ACTION_MAX; i++) { |
| map_collect_record_u64(fd, i, &rec->xdp_exception[i]); |
| } |
| |
| fd = map_data[2].fd; /* map2: cpumap_enqueue_cnt */ |
| for (i = 0; i < MAX_CPUS; i++) |
| map_collect_record(fd, i, &rec->xdp_cpumap_enqueue[i]); |
| |
| fd = map_data[3].fd; /* map3: cpumap_kthread_cnt */ |
| map_collect_record(fd, 0, &rec->xdp_cpumap_kthread); |
| |
| return true; |
| } |
| |
| static void *alloc_rec_per_cpu(int record_size) |
| { |
| unsigned int nr_cpus = bpf_num_possible_cpus(); |
| void *array; |
| size_t size; |
| |
| size = record_size * nr_cpus; |
| array = malloc(size); |
| memset(array, 0, size); |
| if (!array) { |
| fprintf(stderr, "Mem alloc error (nr_cpus:%u)\n", nr_cpus); |
| exit(EXIT_FAIL_MEM); |
| } |
| return array; |
| } |
| |
| static struct stats_record *alloc_stats_record(void) |
| { |
| struct stats_record *rec; |
| int rec_sz; |
| int i; |
| |
| /* Alloc main stats_record structure */ |
| rec = malloc(sizeof(*rec)); |
| memset(rec, 0, sizeof(*rec)); |
| if (!rec) { |
| fprintf(stderr, "Mem alloc error\n"); |
| exit(EXIT_FAIL_MEM); |
| } |
| |
| /* Alloc stats stored per CPU for each record */ |
| rec_sz = sizeof(struct u64rec); |
| for (i = 0; i < REDIR_RES_MAX; i++) |
| rec->xdp_redirect[i].cpu = alloc_rec_per_cpu(rec_sz); |
| |
| for (i = 0; i < XDP_ACTION_MAX; i++) |
| rec->xdp_exception[i].cpu = alloc_rec_per_cpu(rec_sz); |
| |
| rec_sz = sizeof(struct datarec); |
| rec->xdp_cpumap_kthread.cpu = alloc_rec_per_cpu(rec_sz); |
| |
| for (i = 0; i < MAX_CPUS; i++) |
| rec->xdp_cpumap_enqueue[i].cpu = alloc_rec_per_cpu(rec_sz); |
| |
| return rec; |
| } |
| |
| static void free_stats_record(struct stats_record *r) |
| { |
| int i; |
| |
| for (i = 0; i < REDIR_RES_MAX; i++) |
| free(r->xdp_redirect[i].cpu); |
| |
| for (i = 0; i < XDP_ACTION_MAX; i++) |
| free(r->xdp_exception[i].cpu); |
| |
| free(r->xdp_cpumap_kthread.cpu); |
| |
| for (i = 0; i < MAX_CPUS; i++) |
| free(r->xdp_cpumap_enqueue[i].cpu); |
| |
| free(r); |
| } |
| |
| /* Pointer swap trick */ |
| static inline void swap(struct stats_record **a, struct stats_record **b) |
| { |
| struct stats_record *tmp; |
| |
| tmp = *a; |
| *a = *b; |
| *b = tmp; |
| } |
| |
| static void stats_poll(int interval, bool err_only) |
| { |
| struct stats_record *rec, *prev; |
| |
| rec = alloc_stats_record(); |
| prev = alloc_stats_record(); |
| stats_collect(rec); |
| |
| if (err_only) |
| printf("\n%s\n", __doc_err_only__); |
| |
| /* Trick to pretty printf with thousands separators use %' */ |
| setlocale(LC_NUMERIC, "en_US"); |
| |
| /* Header */ |
| if (verbose) |
| printf("\n%s", __doc__); |
| |
| /* TODO Need more advanced stats on error types */ |
| if (verbose) { |
| printf(" - Stats map0: %s\n", map_data[0].name); |
| printf(" - Stats map1: %s\n", map_data[1].name); |
| printf("\n"); |
| } |
| fflush(stdout); |
| |
| while (1) { |
| swap(&prev, &rec); |
| stats_collect(rec); |
| stats_print(rec, prev, err_only); |
| fflush(stdout); |
| sleep(interval); |
| } |
| |
| free_stats_record(rec); |
| free_stats_record(prev); |
| } |
| |
| static void print_bpf_prog_info(void) |
| { |
| int i; |
| |
| /* Prog info */ |
| printf("Loaded BPF prog have %d bpf program(s)\n", prog_cnt); |
| for (i = 0; i < prog_cnt; i++) { |
| printf(" - prog_fd[%d] = fd(%d)\n", i, prog_fd[i]); |
| } |
| |
| /* Maps info */ |
| printf("Loaded BPF prog have %d map(s)\n", map_data_count); |
| for (i = 0; i < map_data_count; i++) { |
| char *name = map_data[i].name; |
| int fd = map_data[i].fd; |
| |
| printf(" - map_data[%d] = fd(%d) name:%s\n", i, fd, name); |
| } |
| |
| /* Event info */ |
| printf("Searching for (max:%d) event file descriptor(s)\n", prog_cnt); |
| for (i = 0; i < prog_cnt; i++) { |
| if (event_fd[i] != -1) |
| printf(" - event_fd[%d] = fd(%d)\n", i, event_fd[i]); |
| } |
| } |
| |
| int main(int argc, char **argv) |
| { |
| struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; |
| int longindex = 0, opt; |
| int ret = EXIT_SUCCESS; |
| char bpf_obj_file[256]; |
| |
| /* Default settings: */ |
| bool errors_only = true; |
| int interval = 2; |
| |
| snprintf(bpf_obj_file, sizeof(bpf_obj_file), "%s_kern.o", argv[0]); |
| |
| /* Parse commands line args */ |
| while ((opt = getopt_long(argc, argv, "hDSs:", |
| long_options, &longindex)) != -1) { |
| switch (opt) { |
| case 'D': |
| debug = true; |
| break; |
| case 'S': |
| errors_only = false; |
| break; |
| case 's': |
| interval = atoi(optarg); |
| break; |
| case 'h': |
| default: |
| usage(argv); |
| return EXIT_FAILURE; |
| } |
| } |
| |
| if (setrlimit(RLIMIT_MEMLOCK, &r)) { |
| perror("setrlimit(RLIMIT_MEMLOCK)"); |
| return EXIT_FAILURE; |
| } |
| |
| if (load_bpf_file(bpf_obj_file)) { |
| printf("ERROR - bpf_log_buf: %s", bpf_log_buf); |
| return EXIT_FAILURE; |
| } |
| if (!prog_fd[0]) { |
| printf("ERROR - load_bpf_file: %s\n", strerror(errno)); |
| return EXIT_FAILURE; |
| } |
| |
| if (debug) { |
| print_bpf_prog_info(); |
| } |
| |
| /* Unload/stop tracepoint event by closing fd's */ |
| if (errors_only) { |
| /* The prog_fd[i] and event_fd[i] depend on the |
| * order the functions was defined in _kern.c |
| */ |
| close(event_fd[2]); /* tracepoint/xdp/xdp_redirect */ |
| close(prog_fd[2]); /* func: trace_xdp_redirect */ |
| close(event_fd[3]); /* tracepoint/xdp/xdp_redirect_map */ |
| close(prog_fd[3]); /* func: trace_xdp_redirect_map */ |
| } |
| |
| stats_poll(interval, errors_only); |
| |
| return ret; |
| } |