| /* SPDX-License-Identifier: GPL-2.0 */ |
| |
| #define _GNU_SOURCE |
| |
| #include <errno.h> |
| #include <fcntl.h> |
| #include <linux/limits.h> |
| #include <poll.h> |
| #include <signal.h> |
| #include <stdio.h> |
| #include <stdlib.h> |
| #include <string.h> |
| #include <sys/inotify.h> |
| #include <sys/stat.h> |
| #include <sys/types.h> |
| #include <sys/wait.h> |
| #include <unistd.h> |
| |
| #include "cgroup_util.h" |
| #include "../clone3/clone3_selftests.h" |
| |
| static ssize_t read_text(const char *path, char *buf, size_t max_len) |
| { |
| ssize_t len; |
| int fd; |
| |
| fd = open(path, O_RDONLY); |
| if (fd < 0) |
| return fd; |
| |
| len = read(fd, buf, max_len - 1); |
| if (len < 0) |
| goto out; |
| |
| buf[len] = 0; |
| out: |
| close(fd); |
| return len; |
| } |
| |
| static ssize_t write_text(const char *path, char *buf, ssize_t len) |
| { |
| int fd; |
| |
| fd = open(path, O_WRONLY | O_APPEND); |
| if (fd < 0) |
| return fd; |
| |
| len = write(fd, buf, len); |
| if (len < 0) { |
| close(fd); |
| return len; |
| } |
| |
| close(fd); |
| |
| return len; |
| } |
| |
| char *cg_name(const char *root, const char *name) |
| { |
| size_t len = strlen(root) + strlen(name) + 2; |
| char *ret = malloc(len); |
| |
| snprintf(ret, len, "%s/%s", root, name); |
| |
| return ret; |
| } |
| |
| char *cg_name_indexed(const char *root, const char *name, int index) |
| { |
| size_t len = strlen(root) + strlen(name) + 10; |
| char *ret = malloc(len); |
| |
| snprintf(ret, len, "%s/%s_%d", root, name, index); |
| |
| return ret; |
| } |
| |
| char *cg_control(const char *cgroup, const char *control) |
| { |
| size_t len = strlen(cgroup) + strlen(control) + 2; |
| char *ret = malloc(len); |
| |
| snprintf(ret, len, "%s/%s", cgroup, control); |
| |
| return ret; |
| } |
| |
| int cg_read(const char *cgroup, const char *control, char *buf, size_t len) |
| { |
| char path[PATH_MAX]; |
| |
| snprintf(path, sizeof(path), "%s/%s", cgroup, control); |
| |
| if (read_text(path, buf, len) >= 0) |
| return 0; |
| |
| return -1; |
| } |
| |
| int cg_read_strcmp(const char *cgroup, const char *control, |
| const char *expected) |
| { |
| size_t size; |
| char *buf; |
| int ret; |
| |
| /* Handle the case of comparing against empty string */ |
| if (!expected) |
| return -1; |
| else |
| size = strlen(expected) + 1; |
| |
| buf = malloc(size); |
| if (!buf) |
| return -1; |
| |
| if (cg_read(cgroup, control, buf, size)) { |
| free(buf); |
| return -1; |
| } |
| |
| ret = strcmp(expected, buf); |
| free(buf); |
| return ret; |
| } |
| |
| int cg_read_strstr(const char *cgroup, const char *control, const char *needle) |
| { |
| char buf[PAGE_SIZE]; |
| |
| if (cg_read(cgroup, control, buf, sizeof(buf))) |
| return -1; |
| |
| return strstr(buf, needle) ? 0 : -1; |
| } |
| |
| long cg_read_long(const char *cgroup, const char *control) |
| { |
| char buf[128]; |
| |
| if (cg_read(cgroup, control, buf, sizeof(buf))) |
| return -1; |
| |
| return atol(buf); |
| } |
| |
| long cg_read_key_long(const char *cgroup, const char *control, const char *key) |
| { |
| char buf[PAGE_SIZE]; |
| char *ptr; |
| |
| if (cg_read(cgroup, control, buf, sizeof(buf))) |
| return -1; |
| |
| ptr = strstr(buf, key); |
| if (!ptr) |
| return -1; |
| |
| return atol(ptr + strlen(key)); |
| } |
| |
| long cg_read_lc(const char *cgroup, const char *control) |
| { |
| char buf[PAGE_SIZE]; |
| const char delim[] = "\n"; |
| char *line; |
| long cnt = 0; |
| |
| if (cg_read(cgroup, control, buf, sizeof(buf))) |
| return -1; |
| |
| for (line = strtok(buf, delim); line; line = strtok(NULL, delim)) |
| cnt++; |
| |
| return cnt; |
| } |
| |
| int cg_write(const char *cgroup, const char *control, char *buf) |
| { |
| char path[PATH_MAX]; |
| ssize_t len = strlen(buf); |
| |
| snprintf(path, sizeof(path), "%s/%s", cgroup, control); |
| |
| if (write_text(path, buf, len) == len) |
| return 0; |
| |
| return -1; |
| } |
| |
| int cg_find_unified_root(char *root, size_t len) |
| { |
| char buf[10 * PAGE_SIZE]; |
| char *fs, *mount, *type; |
| const char delim[] = "\n\t "; |
| |
| if (read_text("/proc/self/mounts", buf, sizeof(buf)) <= 0) |
| return -1; |
| |
| /* |
| * Example: |
| * cgroup /sys/fs/cgroup cgroup2 rw,seclabel,noexec,relatime 0 0 |
| */ |
| for (fs = strtok(buf, delim); fs; fs = strtok(NULL, delim)) { |
| mount = strtok(NULL, delim); |
| type = strtok(NULL, delim); |
| strtok(NULL, delim); |
| strtok(NULL, delim); |
| strtok(NULL, delim); |
| |
| if (strcmp(type, "cgroup2") == 0) { |
| strncpy(root, mount, len); |
| return 0; |
| } |
| } |
| |
| return -1; |
| } |
| |
| int cg_create(const char *cgroup) |
| { |
| return mkdir(cgroup, 0755); |
| } |
| |
| int cg_wait_for_proc_count(const char *cgroup, int count) |
| { |
| char buf[10 * PAGE_SIZE] = {0}; |
| int attempts; |
| char *ptr; |
| |
| for (attempts = 10; attempts >= 0; attempts--) { |
| int nr = 0; |
| |
| if (cg_read(cgroup, "cgroup.procs", buf, sizeof(buf))) |
| break; |
| |
| for (ptr = buf; *ptr; ptr++) |
| if (*ptr == '\n') |
| nr++; |
| |
| if (nr >= count) |
| return 0; |
| |
| usleep(100000); |
| } |
| |
| return -1; |
| } |
| |
| int cg_killall(const char *cgroup) |
| { |
| char buf[PAGE_SIZE]; |
| char *ptr = buf; |
| |
| /* If cgroup.kill exists use it. */ |
| if (!cg_write(cgroup, "cgroup.kill", "1")) |
| return 0; |
| |
| if (cg_read(cgroup, "cgroup.procs", buf, sizeof(buf))) |
| return -1; |
| |
| while (ptr < buf + sizeof(buf)) { |
| int pid = strtol(ptr, &ptr, 10); |
| |
| if (pid == 0) |
| break; |
| if (*ptr) |
| ptr++; |
| else |
| break; |
| if (kill(pid, SIGKILL)) |
| return -1; |
| } |
| |
| return 0; |
| } |
| |
| int cg_destroy(const char *cgroup) |
| { |
| int ret; |
| |
| retry: |
| ret = rmdir(cgroup); |
| if (ret && errno == EBUSY) { |
| cg_killall(cgroup); |
| usleep(100); |
| goto retry; |
| } |
| |
| if (ret && errno == ENOENT) |
| ret = 0; |
| |
| return ret; |
| } |
| |
| int cg_enter(const char *cgroup, int pid) |
| { |
| char pidbuf[64]; |
| |
| snprintf(pidbuf, sizeof(pidbuf), "%d", pid); |
| return cg_write(cgroup, "cgroup.procs", pidbuf); |
| } |
| |
| int cg_enter_current(const char *cgroup) |
| { |
| return cg_write(cgroup, "cgroup.procs", "0"); |
| } |
| |
| int cg_enter_current_thread(const char *cgroup) |
| { |
| return cg_write(cgroup, "cgroup.threads", "0"); |
| } |
| |
| int cg_run(const char *cgroup, |
| int (*fn)(const char *cgroup, void *arg), |
| void *arg) |
| { |
| int pid, retcode; |
| |
| pid = fork(); |
| if (pid < 0) { |
| return pid; |
| } else if (pid == 0) { |
| char buf[64]; |
| |
| snprintf(buf, sizeof(buf), "%d", getpid()); |
| if (cg_write(cgroup, "cgroup.procs", buf)) |
| exit(EXIT_FAILURE); |
| exit(fn(cgroup, arg)); |
| } else { |
| waitpid(pid, &retcode, 0); |
| if (WIFEXITED(retcode)) |
| return WEXITSTATUS(retcode); |
| else |
| return -1; |
| } |
| } |
| |
| pid_t clone_into_cgroup(int cgroup_fd) |
| { |
| #ifdef CLONE_ARGS_SIZE_VER2 |
| pid_t pid; |
| |
| struct __clone_args args = { |
| .flags = CLONE_INTO_CGROUP, |
| .exit_signal = SIGCHLD, |
| .cgroup = cgroup_fd, |
| }; |
| |
| pid = sys_clone3(&args, sizeof(struct __clone_args)); |
| /* |
| * Verify that this is a genuine test failure: |
| * ENOSYS -> clone3() not available |
| * E2BIG -> CLONE_INTO_CGROUP not available |
| */ |
| if (pid < 0 && (errno == ENOSYS || errno == E2BIG)) |
| goto pretend_enosys; |
| |
| return pid; |
| |
| pretend_enosys: |
| #endif |
| errno = ENOSYS; |
| return -ENOSYS; |
| } |
| |
| int clone_reap(pid_t pid, int options) |
| { |
| int ret; |
| siginfo_t info = { |
| .si_signo = 0, |
| }; |
| |
| again: |
| ret = waitid(P_PID, pid, &info, options | __WALL | __WNOTHREAD); |
| if (ret < 0) { |
| if (errno == EINTR) |
| goto again; |
| return -1; |
| } |
| |
| if (options & WEXITED) { |
| if (WIFEXITED(info.si_status)) |
| return WEXITSTATUS(info.si_status); |
| } |
| |
| if (options & WSTOPPED) { |
| if (WIFSTOPPED(info.si_status)) |
| return WSTOPSIG(info.si_status); |
| } |
| |
| if (options & WCONTINUED) { |
| if (WIFCONTINUED(info.si_status)) |
| return 0; |
| } |
| |
| return -1; |
| } |
| |
| int dirfd_open_opath(const char *dir) |
| { |
| return open(dir, O_DIRECTORY | O_CLOEXEC | O_NOFOLLOW | O_PATH); |
| } |
| |
| #define close_prot_errno(fd) \ |
| if (fd >= 0) { \ |
| int _e_ = errno; \ |
| close(fd); \ |
| errno = _e_; \ |
| } |
| |
| static int clone_into_cgroup_run_nowait(const char *cgroup, |
| int (*fn)(const char *cgroup, void *arg), |
| void *arg) |
| { |
| int cgroup_fd; |
| pid_t pid; |
| |
| cgroup_fd = dirfd_open_opath(cgroup); |
| if (cgroup_fd < 0) |
| return -1; |
| |
| pid = clone_into_cgroup(cgroup_fd); |
| close_prot_errno(cgroup_fd); |
| if (pid == 0) |
| exit(fn(cgroup, arg)); |
| |
| return pid; |
| } |
| |
| int cg_run_nowait(const char *cgroup, |
| int (*fn)(const char *cgroup, void *arg), |
| void *arg) |
| { |
| int pid; |
| |
| pid = clone_into_cgroup_run_nowait(cgroup, fn, arg); |
| if (pid > 0) |
| return pid; |
| |
| /* Genuine test failure. */ |
| if (pid < 0 && errno != ENOSYS) |
| return -1; |
| |
| pid = fork(); |
| if (pid == 0) { |
| char buf[64]; |
| |
| snprintf(buf, sizeof(buf), "%d", getpid()); |
| if (cg_write(cgroup, "cgroup.procs", buf)) |
| exit(EXIT_FAILURE); |
| exit(fn(cgroup, arg)); |
| } |
| |
| return pid; |
| } |
| |
| int get_temp_fd(void) |
| { |
| return open(".", O_TMPFILE | O_RDWR | O_EXCL); |
| } |
| |
| int alloc_pagecache(int fd, size_t size) |
| { |
| char buf[PAGE_SIZE]; |
| struct stat st; |
| int i; |
| |
| if (fstat(fd, &st)) |
| goto cleanup; |
| |
| size += st.st_size; |
| |
| if (ftruncate(fd, size)) |
| goto cleanup; |
| |
| for (i = 0; i < size; i += sizeof(buf)) |
| read(fd, buf, sizeof(buf)); |
| |
| return 0; |
| |
| cleanup: |
| return -1; |
| } |
| |
| int alloc_anon(const char *cgroup, void *arg) |
| { |
| size_t size = (unsigned long)arg; |
| char *buf, *ptr; |
| |
| buf = malloc(size); |
| for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE) |
| *ptr = 0; |
| |
| free(buf); |
| return 0; |
| } |
| |
| int is_swap_enabled(void) |
| { |
| char buf[PAGE_SIZE]; |
| const char delim[] = "\n"; |
| int cnt = 0; |
| char *line; |
| |
| if (read_text("/proc/swaps", buf, sizeof(buf)) <= 0) |
| return -1; |
| |
| for (line = strtok(buf, delim); line; line = strtok(NULL, delim)) |
| cnt++; |
| |
| return cnt > 1; |
| } |
| |
| int set_oom_adj_score(int pid, int score) |
| { |
| char path[PATH_MAX]; |
| int fd, len; |
| |
| sprintf(path, "/proc/%d/oom_score_adj", pid); |
| |
| fd = open(path, O_WRONLY | O_APPEND); |
| if (fd < 0) |
| return fd; |
| |
| len = dprintf(fd, "%d", score); |
| if (len < 0) { |
| close(fd); |
| return len; |
| } |
| |
| close(fd); |
| return 0; |
| } |
| |
| ssize_t proc_read_text(int pid, bool thread, const char *item, char *buf, size_t size) |
| { |
| char path[PATH_MAX]; |
| |
| if (!pid) |
| snprintf(path, sizeof(path), "/proc/%s/%s", |
| thread ? "thread-self" : "self", item); |
| else |
| snprintf(path, sizeof(path), "/proc/%d/%s", pid, item); |
| |
| return read_text(path, buf, size); |
| } |
| |
| int proc_read_strstr(int pid, bool thread, const char *item, const char *needle) |
| { |
| char buf[PAGE_SIZE]; |
| |
| if (proc_read_text(pid, thread, item, buf, sizeof(buf)) < 0) |
| return -1; |
| |
| return strstr(buf, needle) ? 0 : -1; |
| } |
| |
| int clone_into_cgroup_run_wait(const char *cgroup) |
| { |
| int cgroup_fd; |
| pid_t pid; |
| |
| cgroup_fd = dirfd_open_opath(cgroup); |
| if (cgroup_fd < 0) |
| return -1; |
| |
| pid = clone_into_cgroup(cgroup_fd); |
| close_prot_errno(cgroup_fd); |
| if (pid < 0) |
| return -1; |
| |
| if (pid == 0) |
| exit(EXIT_SUCCESS); |
| |
| /* |
| * We don't care whether this fails. We only care whether the initial |
| * clone succeeded. |
| */ |
| (void)clone_reap(pid, WEXITED); |
| return 0; |
| } |
| |
| static int __prepare_for_wait(const char *cgroup, const char *filename) |
| { |
| int fd, ret = -1; |
| |
| fd = inotify_init1(0); |
| if (fd == -1) |
| return fd; |
| |
| ret = inotify_add_watch(fd, cg_control(cgroup, filename), IN_MODIFY); |
| if (ret == -1) { |
| close(fd); |
| fd = -1; |
| } |
| |
| return fd; |
| } |
| |
| int cg_prepare_for_wait(const char *cgroup) |
| { |
| return __prepare_for_wait(cgroup, "cgroup.events"); |
| } |
| |
| int memcg_prepare_for_wait(const char *cgroup) |
| { |
| return __prepare_for_wait(cgroup, "memory.events"); |
| } |
| |
| int cg_wait_for(int fd) |
| { |
| int ret = -1; |
| struct pollfd fds = { |
| .fd = fd, |
| .events = POLLIN, |
| }; |
| |
| while (true) { |
| ret = poll(&fds, 1, 10000); |
| |
| if (ret == -1) { |
| if (errno == EINTR) |
| continue; |
| |
| break; |
| } |
| |
| if (ret > 0 && fds.revents & POLLIN) { |
| ret = 0; |
| break; |
| } |
| } |
| |
| return ret; |
| } |