| // SPDX-License-Identifier: GPL-2.0 |
| #define _GNU_SOURCE |
| #include <sched.h> |
| #include <sys/mount.h> |
| #include <sys/stat.h> |
| #include <sys/types.h> |
| #include <linux/limits.h> |
| #include <stdio.h> |
| #include <stdlib.h> |
| #include <linux/sched.h> |
| #include <fcntl.h> |
| #include <unistd.h> |
| #include <ftw.h> |
| |
| #include "cgroup_helpers.h" |
| #include "bpf_util.h" |
| |
| /* |
| * To avoid relying on the system setup, when setup_cgroup_env is called |
| * we create a new mount namespace, and cgroup namespace. The cgroupv2 |
| * root is mounted at CGROUP_MOUNT_PATH. Unfortunately, most people don't |
| * have cgroupv2 enabled at this point in time. It's easier to create our |
| * own mount namespace and manage it ourselves. We assume /mnt exists. |
| * |
| * Related cgroupv1 helpers are named *classid*(), since we only use the |
| * net_cls controller for tagging net_cls.classid. We assume the default |
| * mount under /sys/fs/cgroup/net_cls, which should be the case for the |
| * vast majority of users. |
| */ |
| |
| #define WALK_FD_LIMIT 16 |
| |
| #define CGROUP_MOUNT_PATH "/mnt" |
| #define CGROUP_MOUNT_DFLT "/sys/fs/cgroup" |
| #define NETCLS_MOUNT_PATH CGROUP_MOUNT_DFLT "/net_cls" |
| #define CGROUP_WORK_DIR "/cgroup-test-work-dir" |
| |
| #define format_cgroup_path_pid(buf, path, pid) \ |
| snprintf(buf, sizeof(buf), "%s%s%d%s", CGROUP_MOUNT_PATH, \ |
| CGROUP_WORK_DIR, pid, path) |
| |
| #define format_cgroup_path(buf, path) \ |
| format_cgroup_path_pid(buf, path, getpid()) |
| |
| #define format_parent_cgroup_path(buf, path) \ |
| format_cgroup_path_pid(buf, path, getppid()) |
| |
| #define format_classid_path_pid(buf, pid) \ |
| snprintf(buf, sizeof(buf), "%s%s%d", NETCLS_MOUNT_PATH, \ |
| CGROUP_WORK_DIR, pid) |
| |
| #define format_classid_path(buf) \ |
| format_classid_path_pid(buf, getpid()) |
| |
| static __thread bool cgroup_workdir_mounted; |
| |
| static void __cleanup_cgroup_environment(void); |
| |
| static int __enable_controllers(const char *cgroup_path, const char *controllers) |
| { |
| char path[PATH_MAX + 1]; |
| char enable[PATH_MAX + 1]; |
| char *c, *c2; |
| int fd, cfd; |
| ssize_t len; |
| |
| /* If not controllers are passed, enable all available controllers */ |
| if (!controllers) { |
| snprintf(path, sizeof(path), "%s/cgroup.controllers", |
| cgroup_path); |
| fd = open(path, O_RDONLY); |
| if (fd < 0) { |
| log_err("Opening cgroup.controllers: %s", path); |
| return 1; |
| } |
| len = read(fd, enable, sizeof(enable) - 1); |
| if (len < 0) { |
| close(fd); |
| log_err("Reading cgroup.controllers: %s", path); |
| return 1; |
| } else if (len == 0) { /* No controllers to enable */ |
| close(fd); |
| return 0; |
| } |
| enable[len] = 0; |
| close(fd); |
| } else { |
| bpf_strlcpy(enable, controllers, sizeof(enable)); |
| } |
| |
| snprintf(path, sizeof(path), "%s/cgroup.subtree_control", cgroup_path); |
| cfd = open(path, O_RDWR); |
| if (cfd < 0) { |
| log_err("Opening cgroup.subtree_control: %s", path); |
| return 1; |
| } |
| |
| for (c = strtok_r(enable, " ", &c2); c; c = strtok_r(NULL, " ", &c2)) { |
| if (dprintf(cfd, "+%s\n", c) <= 0) { |
| log_err("Enabling controller %s: %s", c, path); |
| close(cfd); |
| return 1; |
| } |
| } |
| close(cfd); |
| return 0; |
| } |
| |
| /** |
| * enable_controllers() - Enable cgroup v2 controllers |
| * @relative_path: The cgroup path, relative to the workdir |
| * @controllers: List of controllers to enable in cgroup.controllers format |
| * |
| * |
| * Enable given cgroup v2 controllers, if @controllers is NULL, enable all |
| * available controllers. |
| * |
| * If successful, 0 is returned. |
| */ |
| int enable_controllers(const char *relative_path, const char *controllers) |
| { |
| char cgroup_path[PATH_MAX + 1]; |
| |
| format_cgroup_path(cgroup_path, relative_path); |
| return __enable_controllers(cgroup_path, controllers); |
| } |
| |
| static int __write_cgroup_file(const char *cgroup_path, const char *file, |
| const char *buf) |
| { |
| char file_path[PATH_MAX + 1]; |
| int fd; |
| |
| snprintf(file_path, sizeof(file_path), "%s/%s", cgroup_path, file); |
| fd = open(file_path, O_RDWR); |
| if (fd < 0) { |
| log_err("Opening %s", file_path); |
| return 1; |
| } |
| |
| if (dprintf(fd, "%s", buf) <= 0) { |
| log_err("Writing to %s", file_path); |
| close(fd); |
| return 1; |
| } |
| close(fd); |
| return 0; |
| } |
| |
| /** |
| * write_cgroup_file() - Write to a cgroup file |
| * @relative_path: The cgroup path, relative to the workdir |
| * @file: The name of the file in cgroupfs to write to |
| * @buf: Buffer to write to the file |
| * |
| * Write to a file in the given cgroup's directory. |
| * |
| * If successful, 0 is returned. |
| */ |
| int write_cgroup_file(const char *relative_path, const char *file, |
| const char *buf) |
| { |
| char cgroup_path[PATH_MAX - 24]; |
| |
| format_cgroup_path(cgroup_path, relative_path); |
| return __write_cgroup_file(cgroup_path, file, buf); |
| } |
| |
| /** |
| * write_cgroup_file_parent() - Write to a cgroup file in the parent process |
| * workdir |
| * @relative_path: The cgroup path, relative to the parent process workdir |
| * @file: The name of the file in cgroupfs to write to |
| * @buf: Buffer to write to the file |
| * |
| * Write to a file in the given cgroup's directory under the parent process |
| * workdir. |
| * |
| * If successful, 0 is returned. |
| */ |
| int write_cgroup_file_parent(const char *relative_path, const char *file, |
| const char *buf) |
| { |
| char cgroup_path[PATH_MAX - 24]; |
| |
| format_parent_cgroup_path(cgroup_path, relative_path); |
| return __write_cgroup_file(cgroup_path, file, buf); |
| } |
| |
| /** |
| * setup_cgroup_environment() - Setup the cgroup environment |
| * |
| * After calling this function, cleanup_cgroup_environment should be called |
| * once testing is complete. |
| * |
| * This function will print an error to stderr and return 1 if it is unable |
| * to setup the cgroup environment. If setup is successful, 0 is returned. |
| */ |
| int setup_cgroup_environment(void) |
| { |
| char cgroup_workdir[PATH_MAX - 24]; |
| |
| format_cgroup_path(cgroup_workdir, ""); |
| |
| if (mkdir(CGROUP_MOUNT_PATH, 0777) && errno != EEXIST) { |
| log_err("mkdir mount"); |
| return 1; |
| } |
| |
| if (unshare(CLONE_NEWNS)) { |
| log_err("unshare"); |
| return 1; |
| } |
| |
| if (mount("none", "/", NULL, MS_REC | MS_PRIVATE, NULL)) { |
| log_err("mount fakeroot"); |
| return 1; |
| } |
| |
| if (mount("none", CGROUP_MOUNT_PATH, "cgroup2", 0, NULL) && errno != EBUSY) { |
| log_err("mount cgroup2"); |
| return 1; |
| } |
| cgroup_workdir_mounted = true; |
| |
| /* Cleanup existing failed runs, now that the environment is setup */ |
| __cleanup_cgroup_environment(); |
| |
| if (mkdir(cgroup_workdir, 0777) && errno != EEXIST) { |
| log_err("mkdir cgroup work dir"); |
| return 1; |
| } |
| |
| /* Enable all available controllers to increase test coverage */ |
| if (__enable_controllers(CGROUP_MOUNT_PATH, NULL) || |
| __enable_controllers(cgroup_workdir, NULL)) |
| return 1; |
| |
| return 0; |
| } |
| |
| static int nftwfunc(const char *filename, const struct stat *statptr, |
| int fileflags, struct FTW *pfwt) |
| { |
| if ((fileflags & FTW_D) && rmdir(filename)) |
| log_err("Removing cgroup: %s", filename); |
| return 0; |
| } |
| |
| static int join_cgroup_from_top(const char *cgroup_path) |
| { |
| char cgroup_procs_path[PATH_MAX + 1]; |
| pid_t pid = getpid(); |
| int fd, rc = 0; |
| |
| snprintf(cgroup_procs_path, sizeof(cgroup_procs_path), |
| "%s/cgroup.procs", cgroup_path); |
| |
| fd = open(cgroup_procs_path, O_WRONLY); |
| if (fd < 0) { |
| log_err("Opening Cgroup Procs: %s", cgroup_procs_path); |
| return 1; |
| } |
| |
| if (dprintf(fd, "%d\n", pid) < 0) { |
| log_err("Joining Cgroup"); |
| rc = 1; |
| } |
| |
| close(fd); |
| return rc; |
| } |
| |
| /** |
| * join_cgroup() - Join a cgroup |
| * @relative_path: The cgroup path, relative to the workdir, to join |
| * |
| * This function expects a cgroup to already be created, relative to the cgroup |
| * work dir, and it joins it. For example, passing "/my-cgroup" as the path |
| * would actually put the calling process into the cgroup |
| * "/cgroup-test-work-dir/my-cgroup" |
| * |
| * On success, it returns 0, otherwise on failure it returns 1. |
| */ |
| int join_cgroup(const char *relative_path) |
| { |
| char cgroup_path[PATH_MAX + 1]; |
| |
| format_cgroup_path(cgroup_path, relative_path); |
| return join_cgroup_from_top(cgroup_path); |
| } |
| |
| /** |
| * join_root_cgroup() - Join the root cgroup |
| * |
| * This function joins the root cgroup. |
| * |
| * On success, it returns 0, otherwise on failure it returns 1. |
| */ |
| int join_root_cgroup(void) |
| { |
| return join_cgroup_from_top(CGROUP_MOUNT_PATH); |
| } |
| |
| /** |
| * join_parent_cgroup() - Join a cgroup in the parent process workdir |
| * @relative_path: The cgroup path, relative to parent process workdir, to join |
| * |
| * See join_cgroup(). |
| * |
| * On success, it returns 0, otherwise on failure it returns 1. |
| */ |
| int join_parent_cgroup(const char *relative_path) |
| { |
| char cgroup_path[PATH_MAX + 1]; |
| |
| format_parent_cgroup_path(cgroup_path, relative_path); |
| return join_cgroup_from_top(cgroup_path); |
| } |
| |
| /** |
| * __cleanup_cgroup_environment() - Delete temporary cgroups |
| * |
| * This is a helper for cleanup_cgroup_environment() that is responsible for |
| * deletion of all temporary cgroups that have been created during the test. |
| */ |
| static void __cleanup_cgroup_environment(void) |
| { |
| char cgroup_workdir[PATH_MAX + 1]; |
| |
| format_cgroup_path(cgroup_workdir, ""); |
| join_cgroup_from_top(CGROUP_MOUNT_PATH); |
| nftw(cgroup_workdir, nftwfunc, WALK_FD_LIMIT, FTW_DEPTH | FTW_MOUNT); |
| } |
| |
| /** |
| * cleanup_cgroup_environment() - Cleanup Cgroup Testing Environment |
| * |
| * This is an idempotent function to delete all temporary cgroups that |
| * have been created during the test and unmount the cgroup testing work |
| * directory. |
| * |
| * At call time, it moves the calling process to the root cgroup, and then |
| * runs the deletion process. It is idempotent, and should not fail, unless |
| * a process is lingering. |
| * |
| * On failure, it will print an error to stderr, and try to continue. |
| */ |
| void cleanup_cgroup_environment(void) |
| { |
| __cleanup_cgroup_environment(); |
| if (cgroup_workdir_mounted && umount(CGROUP_MOUNT_PATH)) |
| log_err("umount cgroup2"); |
| cgroup_workdir_mounted = false; |
| } |
| |
| /** |
| * get_root_cgroup() - Get the FD of the root cgroup |
| * |
| * On success, it returns the file descriptor. On failure, it returns -1. |
| * If there is a failure, it prints the error to stderr. |
| */ |
| int get_root_cgroup(void) |
| { |
| int fd; |
| |
| fd = open(CGROUP_MOUNT_PATH, O_RDONLY); |
| if (fd < 0) { |
| log_err("Opening root cgroup"); |
| return -1; |
| } |
| return fd; |
| } |
| |
| /* |
| * remove_cgroup() - Remove a cgroup |
| * @relative_path: The cgroup path, relative to the workdir, to remove |
| * |
| * This function expects a cgroup to already be created, relative to the cgroup |
| * work dir. It also expects the cgroup doesn't have any children or live |
| * processes and it removes the cgroup. |
| * |
| * On failure, it will print an error to stderr. |
| */ |
| void remove_cgroup(const char *relative_path) |
| { |
| char cgroup_path[PATH_MAX + 1]; |
| |
| format_cgroup_path(cgroup_path, relative_path); |
| if (rmdir(cgroup_path)) |
| log_err("rmdiring cgroup %s .. %s", relative_path, cgroup_path); |
| } |
| |
| /** |
| * create_and_get_cgroup() - Create a cgroup, relative to workdir, and get the FD |
| * @relative_path: The cgroup path, relative to the workdir, to join |
| * |
| * This function creates a cgroup under the top level workdir and returns the |
| * file descriptor. It is idempotent. |
| * |
| * On success, it returns the file descriptor. On failure it returns -1. |
| * If there is a failure, it prints the error to stderr. |
| */ |
| int create_and_get_cgroup(const char *relative_path) |
| { |
| char cgroup_path[PATH_MAX + 1]; |
| int fd; |
| |
| format_cgroup_path(cgroup_path, relative_path); |
| if (mkdir(cgroup_path, 0777) && errno != EEXIST) { |
| log_err("mkdiring cgroup %s .. %s", relative_path, cgroup_path); |
| return -1; |
| } |
| |
| fd = open(cgroup_path, O_RDONLY); |
| if (fd < 0) { |
| log_err("Opening Cgroup"); |
| return -1; |
| } |
| |
| return fd; |
| } |
| |
| /** |
| * get_cgroup_id_from_path - Get cgroup id for a particular cgroup path |
| * @cgroup_workdir: The absolute cgroup path |
| * |
| * On success, it returns the cgroup id. On failure it returns 0, |
| * which is an invalid cgroup id. |
| * If there is a failure, it prints the error to stderr. |
| */ |
| static unsigned long long get_cgroup_id_from_path(const char *cgroup_workdir) |
| { |
| int dirfd, err, flags, mount_id, fhsize; |
| union { |
| unsigned long long cgid; |
| unsigned char raw_bytes[8]; |
| } id; |
| struct file_handle *fhp, *fhp2; |
| unsigned long long ret = 0; |
| |
| dirfd = AT_FDCWD; |
| flags = 0; |
| fhsize = sizeof(*fhp); |
| fhp = calloc(1, fhsize); |
| if (!fhp) { |
| log_err("calloc"); |
| return 0; |
| } |
| err = name_to_handle_at(dirfd, cgroup_workdir, fhp, &mount_id, flags); |
| if (err >= 0 || fhp->handle_bytes != 8) { |
| log_err("name_to_handle_at"); |
| goto free_mem; |
| } |
| |
| fhsize = sizeof(struct file_handle) + fhp->handle_bytes; |
| fhp2 = realloc(fhp, fhsize); |
| if (!fhp2) { |
| log_err("realloc"); |
| goto free_mem; |
| } |
| err = name_to_handle_at(dirfd, cgroup_workdir, fhp2, &mount_id, flags); |
| fhp = fhp2; |
| if (err < 0) { |
| log_err("name_to_handle_at"); |
| goto free_mem; |
| } |
| |
| memcpy(id.raw_bytes, fhp->f_handle, 8); |
| ret = id.cgid; |
| |
| free_mem: |
| free(fhp); |
| return ret; |
| } |
| |
| unsigned long long get_cgroup_id(const char *relative_path) |
| { |
| char cgroup_workdir[PATH_MAX + 1]; |
| |
| format_cgroup_path(cgroup_workdir, relative_path); |
| return get_cgroup_id_from_path(cgroup_workdir); |
| } |
| |
| int cgroup_setup_and_join(const char *path) { |
| int cg_fd; |
| |
| if (setup_cgroup_environment()) { |
| fprintf(stderr, "Failed to setup cgroup environment\n"); |
| return -EINVAL; |
| } |
| |
| cg_fd = create_and_get_cgroup(path); |
| if (cg_fd < 0) { |
| fprintf(stderr, "Failed to create test cgroup\n"); |
| cleanup_cgroup_environment(); |
| return cg_fd; |
| } |
| |
| if (join_cgroup(path)) { |
| fprintf(stderr, "Failed to join cgroup\n"); |
| cleanup_cgroup_environment(); |
| return -EINVAL; |
| } |
| return cg_fd; |
| } |
| |
| /** |
| * setup_classid_environment() - Setup the cgroupv1 net_cls environment |
| * |
| * This function should only be called in a custom mount namespace, e.g. |
| * created by running setup_cgroup_environment. |
| * |
| * After calling this function, cleanup_classid_environment should be called |
| * once testing is complete. |
| * |
| * This function will print an error to stderr and return 1 if it is unable |
| * to setup the cgroup environment. If setup is successful, 0 is returned. |
| */ |
| int setup_classid_environment(void) |
| { |
| char cgroup_workdir[PATH_MAX + 1]; |
| |
| format_classid_path(cgroup_workdir); |
| |
| if (mount("tmpfs", CGROUP_MOUNT_DFLT, "tmpfs", 0, NULL) && |
| errno != EBUSY) { |
| log_err("mount cgroup base"); |
| return 1; |
| } |
| |
| if (mkdir(NETCLS_MOUNT_PATH, 0777) && errno != EEXIST) { |
| log_err("mkdir cgroup net_cls"); |
| return 1; |
| } |
| |
| if (mount("net_cls", NETCLS_MOUNT_PATH, "cgroup", 0, "net_cls")) { |
| if (errno != EBUSY) { |
| log_err("mount cgroup net_cls"); |
| return 1; |
| } |
| |
| if (rmdir(NETCLS_MOUNT_PATH)) { |
| log_err("rmdir cgroup net_cls"); |
| return 1; |
| } |
| if (umount(CGROUP_MOUNT_DFLT)) { |
| log_err("umount cgroup base"); |
| return 1; |
| } |
| } |
| |
| cleanup_classid_environment(); |
| |
| if (mkdir(cgroup_workdir, 0777) && errno != EEXIST) { |
| log_err("mkdir cgroup work dir"); |
| return 1; |
| } |
| |
| return 0; |
| } |
| |
| /** |
| * set_classid() - Set a cgroupv1 net_cls classid |
| * |
| * Writes the classid into the cgroup work dir's net_cls.classid |
| * file in order to later on trigger socket tagging. |
| * |
| * We leverage the current pid as the classid, ensuring unique identification. |
| * |
| * On success, it returns 0, otherwise on failure it returns 1. If there |
| * is a failure, it prints the error to stderr. |
| */ |
| int set_classid(void) |
| { |
| char cgroup_workdir[PATH_MAX - 42]; |
| char cgroup_classid_path[PATH_MAX + 1]; |
| int fd, rc = 0; |
| |
| format_classid_path(cgroup_workdir); |
| snprintf(cgroup_classid_path, sizeof(cgroup_classid_path), |
| "%s/net_cls.classid", cgroup_workdir); |
| |
| fd = open(cgroup_classid_path, O_WRONLY); |
| if (fd < 0) { |
| log_err("Opening cgroup classid: %s", cgroup_classid_path); |
| return 1; |
| } |
| |
| if (dprintf(fd, "%u\n", getpid()) < 0) { |
| log_err("Setting cgroup classid"); |
| rc = 1; |
| } |
| |
| close(fd); |
| return rc; |
| } |
| |
| /** |
| * join_classid() - Join a cgroupv1 net_cls classid |
| * |
| * This function expects the cgroup work dir to be already created, as we |
| * join it here. This causes the process sockets to be tagged with the given |
| * net_cls classid. |
| * |
| * On success, it returns 0, otherwise on failure it returns 1. |
| */ |
| int join_classid(void) |
| { |
| char cgroup_workdir[PATH_MAX + 1]; |
| |
| format_classid_path(cgroup_workdir); |
| return join_cgroup_from_top(cgroup_workdir); |
| } |
| |
| /** |
| * cleanup_classid_environment() - Cleanup the cgroupv1 net_cls environment |
| * |
| * At call time, it moves the calling process to the root cgroup, and then |
| * runs the deletion process. |
| * |
| * On failure, it will print an error to stderr, and try to continue. |
| */ |
| void cleanup_classid_environment(void) |
| { |
| char cgroup_workdir[PATH_MAX + 1]; |
| |
| format_classid_path(cgroup_workdir); |
| join_cgroup_from_top(NETCLS_MOUNT_PATH); |
| nftw(cgroup_workdir, nftwfunc, WALK_FD_LIMIT, FTW_DEPTH | FTW_MOUNT); |
| } |
| |
| /** |
| * get_classid_cgroup_id - Get the cgroup id of a net_cls cgroup |
| */ |
| unsigned long long get_classid_cgroup_id(void) |
| { |
| char cgroup_workdir[PATH_MAX + 1]; |
| |
| format_classid_path(cgroup_workdir); |
| return get_cgroup_id_from_path(cgroup_workdir); |
| } |
| |
| /** |
| * get_cgroup1_hierarchy_id - Retrieves the ID of a cgroup1 hierarchy from the cgroup1 subsys name. |
| * @subsys_name: The cgroup1 subsys name, which can be retrieved from /proc/self/cgroup. It can be |
| * a named cgroup like "name=systemd", a controller name like "net_cls", or multi-controllers like |
| * "net_cls,net_prio". |
| */ |
| int get_cgroup1_hierarchy_id(const char *subsys_name) |
| { |
| char *c, *c2, *c3, *c4; |
| bool found = false; |
| char line[1024]; |
| FILE *file; |
| int i, id; |
| |
| if (!subsys_name) |
| return -1; |
| |
| file = fopen("/proc/self/cgroup", "r"); |
| if (!file) { |
| log_err("fopen /proc/self/cgroup"); |
| return -1; |
| } |
| |
| while (fgets(line, 1024, file)) { |
| i = 0; |
| for (c = strtok_r(line, ":", &c2); c && i < 2; c = strtok_r(NULL, ":", &c2)) { |
| if (i == 0) { |
| id = strtol(c, NULL, 10); |
| } else if (i == 1) { |
| if (!strcmp(c, subsys_name)) { |
| found = true; |
| break; |
| } |
| |
| /* Multiple subsystems may share one single mount point */ |
| for (c3 = strtok_r(c, ",", &c4); c3; |
| c3 = strtok_r(NULL, ",", &c4)) { |
| if (!strcmp(c, subsys_name)) { |
| found = true; |
| break; |
| } |
| } |
| } |
| i++; |
| } |
| if (found) |
| break; |
| } |
| fclose(file); |
| return found ? id : -1; |
| } |
| |
| /** |
| * open_classid() - Open a cgroupv1 net_cls classid |
| * |
| * This function expects the cgroup work dir to be already created, as we |
| * open it here. |
| * |
| * On success, it returns the file descriptor. On failure it returns -1. |
| */ |
| int open_classid(void) |
| { |
| char cgroup_workdir[PATH_MAX + 1]; |
| |
| format_classid_path(cgroup_workdir); |
| return open(cgroup_workdir, O_RDONLY); |
| } |