| #if defined __amd64__ || defined __i386__ |
| /* |
| * Copyright (c) 2022 Alexey Dobriyan <adobriyan@gmail.com> |
| * |
| * Permission to use, copy, modify, and distribute this software for any |
| * purpose with or without fee is hereby granted, provided that the above |
| * copyright notice and this permission notice appear in all copies. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES |
| * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF |
| * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR |
| * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
| * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN |
| * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF |
| * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
| */ |
| /* |
| * Create a process without mappings by unmapping everything at once and |
| * holding it with ptrace(2). See what happens to |
| * |
| * /proc/${pid}/maps |
| * /proc/${pid}/numa_maps |
| * /proc/${pid}/smaps |
| * /proc/${pid}/smaps_rollup |
| */ |
| #undef _GNU_SOURCE |
| #define _GNU_SOURCE |
| |
| #undef NDEBUG |
| #include <assert.h> |
| #include <errno.h> |
| #include <stdint.h> |
| #include <stdio.h> |
| #include <stdlib.h> |
| #include <string.h> |
| #include <fcntl.h> |
| #include <sys/mman.h> |
| #include <sys/ptrace.h> |
| #include <sys/resource.h> |
| #include <sys/syscall.h> |
| #include <sys/types.h> |
| #include <sys/wait.h> |
| #include <unistd.h> |
| |
| #ifdef __amd64__ |
| #define TEST_VSYSCALL |
| #endif |
| |
| #if defined __amd64__ |
| #ifndef SYS_pkey_alloc |
| #define SYS_pkey_alloc 330 |
| #endif |
| #ifndef SYS_pkey_free |
| #define SYS_pkey_free 331 |
| #endif |
| #elif defined __i386__ |
| #ifndef SYS_pkey_alloc |
| #define SYS_pkey_alloc 381 |
| #endif |
| #ifndef SYS_pkey_free |
| #define SYS_pkey_free 382 |
| #endif |
| #else |
| #error "SYS_pkey_alloc" |
| #endif |
| |
| static int g_protection_key_support; |
| |
| static int protection_key_support(void) |
| { |
| long rv = syscall(SYS_pkey_alloc, 0, 0); |
| if (rv > 0) { |
| syscall(SYS_pkey_free, (int)rv); |
| return 1; |
| } else if (rv == -1 && errno == ENOSYS) { |
| return 0; |
| } else if (rv == -1 && errno == EINVAL) { |
| // ospke=n |
| return 0; |
| } else { |
| fprintf(stderr, "%s: error: rv %ld, errno %d\n", __func__, rv, errno); |
| exit(EXIT_FAILURE); |
| } |
| } |
| |
| /* |
| * 0: vsyscall VMA doesn't exist vsyscall=none |
| * 1: vsyscall VMA is --xp vsyscall=xonly |
| * 2: vsyscall VMA is r-xp vsyscall=emulate |
| */ |
| static volatile int g_vsyscall; |
| static const char *g_proc_pid_maps_vsyscall; |
| static const char *g_proc_pid_smaps_vsyscall; |
| |
| static const char proc_pid_maps_vsyscall_0[] = ""; |
| static const char proc_pid_maps_vsyscall_1[] = |
| "ffffffffff600000-ffffffffff601000 --xp 00000000 00:00 0 [vsyscall]\n"; |
| static const char proc_pid_maps_vsyscall_2[] = |
| "ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0 [vsyscall]\n"; |
| |
| static const char proc_pid_smaps_vsyscall_0[] = ""; |
| |
| static const char proc_pid_smaps_vsyscall_1[] = |
| "ffffffffff600000-ffffffffff601000 --xp 00000000 00:00 0 [vsyscall]\n" |
| "Size: 4 kB\n" |
| "KernelPageSize: 4 kB\n" |
| "MMUPageSize: 4 kB\n" |
| "Rss: 0 kB\n" |
| "Pss: 0 kB\n" |
| "Pss_Dirty: 0 kB\n" |
| "Shared_Clean: 0 kB\n" |
| "Shared_Dirty: 0 kB\n" |
| "Private_Clean: 0 kB\n" |
| "Private_Dirty: 0 kB\n" |
| "Referenced: 0 kB\n" |
| "Anonymous: 0 kB\n" |
| "KSM: 0 kB\n" |
| "LazyFree: 0 kB\n" |
| "AnonHugePages: 0 kB\n" |
| "ShmemPmdMapped: 0 kB\n" |
| "FilePmdMapped: 0 kB\n" |
| "Shared_Hugetlb: 0 kB\n" |
| "Private_Hugetlb: 0 kB\n" |
| "Swap: 0 kB\n" |
| "SwapPss: 0 kB\n" |
| "Locked: 0 kB\n" |
| "THPeligible: 0\n" |
| ; |
| |
| static const char proc_pid_smaps_vsyscall_2[] = |
| "ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0 [vsyscall]\n" |
| "Size: 4 kB\n" |
| "KernelPageSize: 4 kB\n" |
| "MMUPageSize: 4 kB\n" |
| "Rss: 0 kB\n" |
| "Pss: 0 kB\n" |
| "Pss_Dirty: 0 kB\n" |
| "Shared_Clean: 0 kB\n" |
| "Shared_Dirty: 0 kB\n" |
| "Private_Clean: 0 kB\n" |
| "Private_Dirty: 0 kB\n" |
| "Referenced: 0 kB\n" |
| "Anonymous: 0 kB\n" |
| "KSM: 0 kB\n" |
| "LazyFree: 0 kB\n" |
| "AnonHugePages: 0 kB\n" |
| "ShmemPmdMapped: 0 kB\n" |
| "FilePmdMapped: 0 kB\n" |
| "Shared_Hugetlb: 0 kB\n" |
| "Private_Hugetlb: 0 kB\n" |
| "Swap: 0 kB\n" |
| "SwapPss: 0 kB\n" |
| "Locked: 0 kB\n" |
| "THPeligible: 0\n" |
| ; |
| |
| static void sigaction_SIGSEGV(int _, siginfo_t *__, void *___) |
| { |
| _exit(EXIT_FAILURE); |
| } |
| |
| #ifdef TEST_VSYSCALL |
| static void sigaction_SIGSEGV_vsyscall(int _, siginfo_t *__, void *___) |
| { |
| _exit(g_vsyscall); |
| } |
| |
| /* |
| * vsyscall page can't be unmapped, probe it directly. |
| */ |
| static void vsyscall(void) |
| { |
| pid_t pid; |
| int wstatus; |
| |
| pid = fork(); |
| if (pid < 0) { |
| fprintf(stderr, "fork, errno %d\n", errno); |
| exit(1); |
| } |
| if (pid == 0) { |
| setrlimit(RLIMIT_CORE, &(struct rlimit){}); |
| |
| /* Hide "segfault at ffffffffff600000" messages. */ |
| struct sigaction act = {}; |
| act.sa_flags = SA_SIGINFO; |
| act.sa_sigaction = sigaction_SIGSEGV_vsyscall; |
| sigaction(SIGSEGV, &act, NULL); |
| |
| g_vsyscall = 0; |
| /* gettimeofday(NULL, NULL); */ |
| uint64_t rax = 0xffffffffff600000; |
| asm volatile ( |
| "call *%[rax]" |
| : [rax] "+a" (rax) |
| : "D" (NULL), "S" (NULL) |
| : "rcx", "r11" |
| ); |
| |
| g_vsyscall = 1; |
| *(volatile int *)0xffffffffff600000UL; |
| |
| g_vsyscall = 2; |
| exit(g_vsyscall); |
| } |
| waitpid(pid, &wstatus, 0); |
| if (WIFEXITED(wstatus)) { |
| g_vsyscall = WEXITSTATUS(wstatus); |
| } else { |
| fprintf(stderr, "error: vsyscall wstatus %08x\n", wstatus); |
| exit(1); |
| } |
| } |
| #endif |
| |
| static int test_proc_pid_maps(pid_t pid) |
| { |
| char buf[4096]; |
| snprintf(buf, sizeof(buf), "/proc/%u/maps", pid); |
| int fd = open(buf, O_RDONLY); |
| if (fd == -1) { |
| perror("open /proc/${pid}/maps"); |
| return EXIT_FAILURE; |
| } else { |
| ssize_t rv = read(fd, buf, sizeof(buf)); |
| close(fd); |
| if (g_vsyscall == 0) { |
| assert(rv == 0); |
| } else { |
| size_t len = strlen(g_proc_pid_maps_vsyscall); |
| assert(rv == len); |
| assert(memcmp(buf, g_proc_pid_maps_vsyscall, len) == 0); |
| } |
| return EXIT_SUCCESS; |
| } |
| } |
| |
| static int test_proc_pid_numa_maps(pid_t pid) |
| { |
| char buf[4096]; |
| snprintf(buf, sizeof(buf), "/proc/%u/numa_maps", pid); |
| int fd = open(buf, O_RDONLY); |
| if (fd == -1) { |
| if (errno == ENOENT) { |
| /* |
| * /proc/${pid}/numa_maps is under CONFIG_NUMA, |
| * it doesn't necessarily exist. |
| */ |
| return EXIT_SUCCESS; |
| } |
| perror("open /proc/${pid}/numa_maps"); |
| return EXIT_FAILURE; |
| } else { |
| ssize_t rv = read(fd, buf, sizeof(buf)); |
| close(fd); |
| assert(rv == 0); |
| return EXIT_SUCCESS; |
| } |
| } |
| |
| static int test_proc_pid_smaps(pid_t pid) |
| { |
| char buf[4096]; |
| snprintf(buf, sizeof(buf), "/proc/%u/smaps", pid); |
| int fd = open(buf, O_RDONLY); |
| if (fd == -1) { |
| if (errno == ENOENT) { |
| /* |
| * /proc/${pid}/smaps is under CONFIG_PROC_PAGE_MONITOR, |
| * it doesn't necessarily exist. |
| */ |
| return EXIT_SUCCESS; |
| } |
| perror("open /proc/${pid}/smaps"); |
| return EXIT_FAILURE; |
| } |
| ssize_t rv = read(fd, buf, sizeof(buf)); |
| close(fd); |
| |
| assert(0 <= rv); |
| assert(rv <= sizeof(buf)); |
| |
| if (g_vsyscall == 0) { |
| assert(rv == 0); |
| } else { |
| size_t len = strlen(g_proc_pid_smaps_vsyscall); |
| assert(rv > len); |
| assert(memcmp(buf, g_proc_pid_smaps_vsyscall, len) == 0); |
| |
| if (g_protection_key_support) { |
| #define PROTECTION_KEY "ProtectionKey: 0\n" |
| assert(memmem(buf, rv, PROTECTION_KEY, strlen(PROTECTION_KEY))); |
| } |
| } |
| |
| return EXIT_SUCCESS; |
| } |
| |
| static const char g_smaps_rollup[] = |
| "00000000-00000000 ---p 00000000 00:00 0 [rollup]\n" |
| "Rss: 0 kB\n" |
| "Pss: 0 kB\n" |
| "Pss_Dirty: 0 kB\n" |
| "Pss_Anon: 0 kB\n" |
| "Pss_File: 0 kB\n" |
| "Pss_Shmem: 0 kB\n" |
| "Shared_Clean: 0 kB\n" |
| "Shared_Dirty: 0 kB\n" |
| "Private_Clean: 0 kB\n" |
| "Private_Dirty: 0 kB\n" |
| "Referenced: 0 kB\n" |
| "Anonymous: 0 kB\n" |
| "KSM: 0 kB\n" |
| "LazyFree: 0 kB\n" |
| "AnonHugePages: 0 kB\n" |
| "ShmemPmdMapped: 0 kB\n" |
| "FilePmdMapped: 0 kB\n" |
| "Shared_Hugetlb: 0 kB\n" |
| "Private_Hugetlb: 0 kB\n" |
| "Swap: 0 kB\n" |
| "SwapPss: 0 kB\n" |
| "Locked: 0 kB\n" |
| ; |
| |
| static int test_proc_pid_smaps_rollup(pid_t pid) |
| { |
| char buf[4096]; |
| snprintf(buf, sizeof(buf), "/proc/%u/smaps_rollup", pid); |
| int fd = open(buf, O_RDONLY); |
| if (fd == -1) { |
| if (errno == ENOENT) { |
| /* |
| * /proc/${pid}/smaps_rollup is under CONFIG_PROC_PAGE_MONITOR, |
| * it doesn't necessarily exist. |
| */ |
| return EXIT_SUCCESS; |
| } |
| perror("open /proc/${pid}/smaps_rollup"); |
| return EXIT_FAILURE; |
| } else { |
| ssize_t rv = read(fd, buf, sizeof(buf)); |
| close(fd); |
| assert(rv == sizeof(g_smaps_rollup) - 1); |
| assert(memcmp(buf, g_smaps_rollup, sizeof(g_smaps_rollup) - 1) == 0); |
| return EXIT_SUCCESS; |
| } |
| } |
| |
| static const char *parse_u64(const char *p, const char *const end, uint64_t *rv) |
| { |
| *rv = 0; |
| for (; p != end; p += 1) { |
| if ('0' <= *p && *p <= '9') { |
| assert(!__builtin_mul_overflow(*rv, 10, rv)); |
| assert(!__builtin_add_overflow(*rv, *p - '0', rv)); |
| } else { |
| break; |
| } |
| } |
| assert(p != end); |
| return p; |
| } |
| |
| /* |
| * There seems to be 2 types of valid output: |
| * "0 A A B 0 0 0\n" for dynamic exeuctables, |
| * "0 0 0 B 0 0 0\n" for static executables. |
| */ |
| static int test_proc_pid_statm(pid_t pid) |
| { |
| char buf[4096]; |
| snprintf(buf, sizeof(buf), "/proc/%u/statm", pid); |
| int fd = open(buf, O_RDONLY); |
| if (fd == -1) { |
| perror("open /proc/${pid}/statm"); |
| return EXIT_FAILURE; |
| } |
| |
| ssize_t rv = read(fd, buf, sizeof(buf)); |
| close(fd); |
| |
| assert(rv >= 0); |
| assert(rv <= sizeof(buf)); |
| if (0) { |
| write(1, buf, rv); |
| } |
| |
| const char *p = buf; |
| const char *const end = p + rv; |
| |
| /* size */ |
| assert(p != end && *p++ == '0'); |
| assert(p != end && *p++ == ' '); |
| |
| uint64_t resident; |
| p = parse_u64(p, end, &resident); |
| assert(p != end && *p++ == ' '); |
| |
| uint64_t shared; |
| p = parse_u64(p, end, &shared); |
| assert(p != end && *p++ == ' '); |
| |
| uint64_t text; |
| p = parse_u64(p, end, &text); |
| assert(p != end && *p++ == ' '); |
| |
| assert(p != end && *p++ == '0'); |
| assert(p != end && *p++ == ' '); |
| |
| /* data */ |
| assert(p != end && *p++ == '0'); |
| assert(p != end && *p++ == ' '); |
| |
| assert(p != end && *p++ == '0'); |
| assert(p != end && *p++ == '\n'); |
| |
| assert(p == end); |
| |
| /* |
| * "text" is "mm->end_code - mm->start_code" at execve(2) time. |
| * munmap() doesn't change it. It can be anything (just link |
| * statically). It can't be 0 because executing to this point |
| * implies at least 1 page of code. |
| */ |
| assert(text > 0); |
| |
| /* |
| * These two are always equal. Always 0 for statically linked |
| * executables and sometimes 0 for dynamically linked executables. |
| * There is no way to tell one from another without parsing ELF |
| * which is too much for this test. |
| */ |
| assert(resident == shared); |
| |
| return EXIT_SUCCESS; |
| } |
| |
| int main(void) |
| { |
| int rv = EXIT_SUCCESS; |
| |
| #ifdef TEST_VSYSCALL |
| vsyscall(); |
| #endif |
| |
| switch (g_vsyscall) { |
| case 0: |
| g_proc_pid_maps_vsyscall = proc_pid_maps_vsyscall_0; |
| g_proc_pid_smaps_vsyscall = proc_pid_smaps_vsyscall_0; |
| break; |
| case 1: |
| g_proc_pid_maps_vsyscall = proc_pid_maps_vsyscall_1; |
| g_proc_pid_smaps_vsyscall = proc_pid_smaps_vsyscall_1; |
| break; |
| case 2: |
| g_proc_pid_maps_vsyscall = proc_pid_maps_vsyscall_2; |
| g_proc_pid_smaps_vsyscall = proc_pid_smaps_vsyscall_2; |
| break; |
| default: |
| abort(); |
| } |
| |
| g_protection_key_support = protection_key_support(); |
| |
| pid_t pid = fork(); |
| if (pid == -1) { |
| perror("fork"); |
| return EXIT_FAILURE; |
| } else if (pid == 0) { |
| rv = ptrace(PTRACE_TRACEME, 0, NULL, NULL); |
| if (rv != 0) { |
| if (errno == EPERM) { |
| fprintf(stderr, |
| "Did you know? ptrace(PTRACE_TRACEME) doesn't work under strace.\n" |
| ); |
| kill(getppid(), SIGTERM); |
| return EXIT_FAILURE; |
| } |
| perror("ptrace PTRACE_TRACEME"); |
| return EXIT_FAILURE; |
| } |
| |
| /* |
| * Hide "segfault at ..." messages. Signal handler won't run. |
| */ |
| struct sigaction act = {}; |
| act.sa_flags = SA_SIGINFO; |
| act.sa_sigaction = sigaction_SIGSEGV; |
| sigaction(SIGSEGV, &act, NULL); |
| |
| #ifdef __amd64__ |
| munmap(NULL, ((size_t)1 << 47) - 4096); |
| #elif defined __i386__ |
| { |
| size_t len; |
| |
| for (len = -4096;; len -= 4096) { |
| munmap(NULL, len); |
| } |
| } |
| #else |
| #error "implement 'unmap everything'" |
| #endif |
| return EXIT_FAILURE; |
| } else { |
| /* |
| * TODO find reliable way to signal parent that munmap(2) completed. |
| * Child can't do it directly because it effectively doesn't exist |
| * anymore. Looking at child's VM files isn't 100% reliable either: |
| * due to a bug they may not become empty or empty-like. |
| */ |
| sleep(1); |
| |
| if (rv == EXIT_SUCCESS) { |
| rv = test_proc_pid_maps(pid); |
| } |
| if (rv == EXIT_SUCCESS) { |
| rv = test_proc_pid_numa_maps(pid); |
| } |
| if (rv == EXIT_SUCCESS) { |
| rv = test_proc_pid_smaps(pid); |
| } |
| if (rv == EXIT_SUCCESS) { |
| rv = test_proc_pid_smaps_rollup(pid); |
| } |
| if (rv == EXIT_SUCCESS) { |
| rv = test_proc_pid_statm(pid); |
| } |
| |
| /* Cut the rope. */ |
| int wstatus; |
| waitpid(pid, &wstatus, 0); |
| assert(WIFSTOPPED(wstatus)); |
| assert(WSTOPSIG(wstatus) == SIGSEGV); |
| } |
| |
| return rv; |
| } |
| #else |
| int main(void) |
| { |
| return 4; |
| } |
| #endif |