| /* SPDX-License-Identifier: GPL-2.0 */ |
| /* |
| * syscall_numbering.c - test calling the x86-64 kernel with various |
| * valid and invalid system call numbers. |
| * |
| * Copyright (c) 2018 Andrew Lutomirski |
| */ |
| |
| #define _GNU_SOURCE |
| |
| #include <stdlib.h> |
| #include <stdio.h> |
| #include <stdbool.h> |
| #include <errno.h> |
| #include <unistd.h> |
| #include <string.h> |
| #include <fcntl.h> |
| #include <limits.h> |
| #include <signal.h> |
| #include <sysexits.h> |
| |
| #include <sys/ptrace.h> |
| #include <sys/user.h> |
| #include <sys/wait.h> |
| #include <sys/mman.h> |
| |
| #include <linux/ptrace.h> |
| |
| /* Common system call numbers */ |
| #define SYS_READ 0 |
| #define SYS_WRITE 1 |
| #define SYS_GETPID 39 |
| /* x64-only system call numbers */ |
| #define X64_IOCTL 16 |
| #define X64_READV 19 |
| #define X64_WRITEV 20 |
| /* x32-only system call numbers (without X32_BIT) */ |
| #define X32_IOCTL 514 |
| #define X32_READV 515 |
| #define X32_WRITEV 516 |
| |
| #define X32_BIT 0x40000000 |
| |
| static int nullfd = -1; /* File descriptor for /dev/null */ |
| static bool with_x32; /* x32 supported on this kernel? */ |
| |
| enum ptrace_pass { |
| PTP_NOTHING, |
| PTP_GETREGS, |
| PTP_WRITEBACK, |
| PTP_FUZZRET, |
| PTP_FUZZHIGH, |
| PTP_INTNUM, |
| PTP_DONE |
| }; |
| |
| static const char * const ptrace_pass_name[] = |
| { |
| [PTP_NOTHING] = "just stop, no data read", |
| [PTP_GETREGS] = "only getregs", |
| [PTP_WRITEBACK] = "getregs, unmodified setregs", |
| [PTP_FUZZRET] = "modifying the default return", |
| [PTP_FUZZHIGH] = "clobbering the top 32 bits", |
| [PTP_INTNUM] = "sign-extending the syscall number", |
| }; |
| |
| /* |
| * Shared memory block between tracer and test |
| */ |
| struct shared { |
| unsigned int nerr; /* Total error count */ |
| unsigned int indent; /* Message indentation level */ |
| enum ptrace_pass ptrace_pass; |
| bool probing_syscall; /* In probe_syscall() */ |
| }; |
| static volatile struct shared *sh; |
| |
| static inline unsigned int offset(void) |
| { |
| unsigned int level = sh ? sh->indent : 0; |
| |
| return 8 + level * 4; |
| } |
| |
| #define msg(lvl, fmt, ...) printf("%-*s" fmt, offset(), "[" #lvl "]", \ |
| ## __VA_ARGS__) |
| |
| #define run(fmt, ...) msg(RUN, fmt, ## __VA_ARGS__) |
| #define info(fmt, ...) msg(INFO, fmt, ## __VA_ARGS__) |
| #define ok(fmt, ...) msg(OK, fmt, ## __VA_ARGS__) |
| |
| #define fail(fmt, ...) \ |
| do { \ |
| msg(FAIL, fmt, ## __VA_ARGS__); \ |
| sh->nerr++; \ |
| } while (0) |
| |
| #define crit(fmt, ...) \ |
| do { \ |
| sh->indent = 0; \ |
| msg(FAIL, fmt, ## __VA_ARGS__); \ |
| msg(SKIP, "Unable to run test\n"); \ |
| exit(EX_OSERR); \ |
| } while (0) |
| |
| /* Sentinel for ptrace-modified return value */ |
| #define MODIFIED_BY_PTRACE -9999 |
| |
| /* |
| * Directly invokes the given syscall with nullfd as the first argument |
| * and the rest zero. Avoids involving glibc wrappers in case they ever |
| * end up intercepting some system calls for some reason, or modify |
| * the system call number itself. |
| */ |
| static long long probe_syscall(int msb, int lsb) |
| { |
| register long long arg1 asm("rdi") = nullfd; |
| register long long arg2 asm("rsi") = 0; |
| register long long arg3 asm("rdx") = 0; |
| register long long arg4 asm("r10") = 0; |
| register long long arg5 asm("r8") = 0; |
| register long long arg6 asm("r9") = 0; |
| long long nr = ((long long)msb << 32) | (unsigned int)lsb; |
| long long ret; |
| |
| /* |
| * We pass in an extra copy of the extended system call number |
| * in %rbx, so we can examine it from the ptrace handler without |
| * worrying about it being possibly modified. This is to test |
| * the validity of struct user regs.orig_rax a.k.a. |
| * struct pt_regs.orig_ax. |
| */ |
| sh->probing_syscall = true; |
| asm volatile("syscall" |
| : "=a" (ret) |
| : "a" (nr), "b" (nr), |
| "r" (arg1), "r" (arg2), "r" (arg3), |
| "r" (arg4), "r" (arg5), "r" (arg6) |
| : "rcx", "r11", "memory", "cc"); |
| sh->probing_syscall = false; |
| |
| return ret; |
| } |
| |
| static const char *syscall_str(int msb, int start, int end) |
| { |
| static char buf[64]; |
| const char * const type = (start & X32_BIT) ? "x32" : "x64"; |
| int lsb = start; |
| |
| /* |
| * Improve readability by stripping the x32 bit, but round |
| * toward zero so we don't display -1 as -1073741825. |
| */ |
| if (lsb < 0) |
| lsb |= X32_BIT; |
| else |
| lsb &= ~X32_BIT; |
| |
| if (start == end) |
| snprintf(buf, sizeof buf, "%s syscall %d:%d", |
| type, msb, lsb); |
| else |
| snprintf(buf, sizeof buf, "%s syscalls %d:%d..%d", |
| type, msb, lsb, lsb + (end-start)); |
| |
| return buf; |
| } |
| |
| static unsigned int _check_for(int msb, int start, int end, long long expect, |
| const char *expect_str) |
| { |
| unsigned int err = 0; |
| |
| sh->indent++; |
| if (start != end) |
| sh->indent++; |
| |
| for (int nr = start; nr <= end; nr++) { |
| long long ret = probe_syscall(msb, nr); |
| |
| if (ret != expect) { |
| fail("%s returned %lld, but it should have returned %s\n", |
| syscall_str(msb, nr, nr), |
| ret, expect_str); |
| err++; |
| } |
| } |
| |
| if (start != end) |
| sh->indent--; |
| |
| if (err) { |
| if (start != end) |
| fail("%s had %u failure%s\n", |
| syscall_str(msb, start, end), |
| err, err == 1 ? "s" : ""); |
| } else { |
| ok("%s returned %s as expected\n", |
| syscall_str(msb, start, end), expect_str); |
| } |
| |
| sh->indent--; |
| |
| return err; |
| } |
| |
| #define check_for(msb,start,end,expect) \ |
| _check_for(msb,start,end,expect,#expect) |
| |
| static bool check_zero(int msb, int nr) |
| { |
| return check_for(msb, nr, nr, 0); |
| } |
| |
| static bool check_enosys(int msb, int nr) |
| { |
| return check_for(msb, nr, nr, -ENOSYS); |
| } |
| |
| /* |
| * Anyone diagnosing a failure will want to know whether the kernel |
| * supports x32. Tell them. This can also be used to conditionalize |
| * tests based on existence or nonexistence of x32. |
| */ |
| static bool test_x32(void) |
| { |
| long long ret; |
| pid_t mypid = getpid(); |
| |
| run("Checking for x32 by calling x32 getpid()\n"); |
| ret = probe_syscall(0, SYS_GETPID | X32_BIT); |
| |
| sh->indent++; |
| if (ret == mypid) { |
| info("x32 is supported\n"); |
| with_x32 = true; |
| } else if (ret == -ENOSYS) { |
| info("x32 is not supported\n"); |
| with_x32 = false; |
| } else { |
| fail("x32 getpid() returned %lld, but it should have returned either %lld or -ENOSYS\n", ret, (long long)mypid); |
| with_x32 = false; |
| } |
| sh->indent--; |
| return with_x32; |
| } |
| |
| static void test_syscalls_common(int msb) |
| { |
| enum ptrace_pass pass = sh->ptrace_pass; |
| |
| run("Checking some common syscalls as 64 bit\n"); |
| check_zero(msb, SYS_READ); |
| check_zero(msb, SYS_WRITE); |
| |
| run("Checking some 64-bit only syscalls as 64 bit\n"); |
| check_zero(msb, X64_READV); |
| check_zero(msb, X64_WRITEV); |
| |
| run("Checking out of range system calls\n"); |
| check_for(msb, -64, -2, -ENOSYS); |
| if (pass >= PTP_FUZZRET) |
| check_for(msb, -1, -1, MODIFIED_BY_PTRACE); |
| else |
| check_for(msb, -1, -1, -ENOSYS); |
| check_for(msb, X32_BIT-64, X32_BIT-1, -ENOSYS); |
| check_for(msb, -64-X32_BIT, -1-X32_BIT, -ENOSYS); |
| check_for(msb, INT_MAX-64, INT_MAX-1, -ENOSYS); |
| } |
| |
| static void test_syscalls_with_x32(int msb) |
| { |
| /* |
| * Syscalls 512-547 are "x32" syscalls. They are |
| * intended to be called with the x32 (0x40000000) bit |
| * set. Calling them without the x32 bit set is |
| * nonsense and should not work. |
| */ |
| run("Checking x32 syscalls as 64 bit\n"); |
| check_for(msb, 512, 547, -ENOSYS); |
| |
| run("Checking some common syscalls as x32\n"); |
| check_zero(msb, SYS_READ | X32_BIT); |
| check_zero(msb, SYS_WRITE | X32_BIT); |
| |
| run("Checking some x32 syscalls as x32\n"); |
| check_zero(msb, X32_READV | X32_BIT); |
| check_zero(msb, X32_WRITEV | X32_BIT); |
| |
| run("Checking some 64-bit syscalls as x32\n"); |
| check_enosys(msb, X64_IOCTL | X32_BIT); |
| check_enosys(msb, X64_READV | X32_BIT); |
| check_enosys(msb, X64_WRITEV | X32_BIT); |
| } |
| |
| static void test_syscalls_without_x32(int msb) |
| { |
| run("Checking for absence of x32 system calls\n"); |
| check_for(msb, 0 | X32_BIT, 999 | X32_BIT, -ENOSYS); |
| } |
| |
| static void test_syscall_numbering(void) |
| { |
| static const int msbs[] = { |
| 0, 1, -1, X32_BIT-1, X32_BIT, X32_BIT-1, -X32_BIT, INT_MAX, |
| INT_MIN, INT_MIN+1 |
| }; |
| |
| sh->indent++; |
| |
| /* |
| * The MSB is supposed to be ignored, so we loop over a few |
| * to test that out. |
| */ |
| for (size_t i = 0; i < sizeof(msbs)/sizeof(msbs[0]); i++) { |
| int msb = msbs[i]; |
| run("Checking system calls with msb = %d (0x%x)\n", |
| msb, msb); |
| |
| sh->indent++; |
| |
| test_syscalls_common(msb); |
| if (with_x32) |
| test_syscalls_with_x32(msb); |
| else |
| test_syscalls_without_x32(msb); |
| |
| sh->indent--; |
| } |
| |
| sh->indent--; |
| } |
| |
| static void syscall_numbering_tracee(void) |
| { |
| enum ptrace_pass pass; |
| |
| if (ptrace(PTRACE_TRACEME, 0, 0, 0)) { |
| crit("Failed to request tracing\n"); |
| return; |
| } |
| raise(SIGSTOP); |
| |
| for (sh->ptrace_pass = pass = PTP_NOTHING; pass < PTP_DONE; |
| sh->ptrace_pass = ++pass) { |
| run("Running tests under ptrace: %s\n", ptrace_pass_name[pass]); |
| test_syscall_numbering(); |
| } |
| } |
| |
| static void mess_with_syscall(pid_t testpid, enum ptrace_pass pass) |
| { |
| struct user_regs_struct regs; |
| |
| sh->probing_syscall = false; /* Do this on entry only */ |
| |
| /* For these, don't even getregs */ |
| if (pass == PTP_NOTHING || pass == PTP_DONE) |
| return; |
| |
| ptrace(PTRACE_GETREGS, testpid, NULL, ®s); |
| |
| if (regs.orig_rax != regs.rbx) { |
| fail("orig_rax %#llx doesn't match syscall number %#llx\n", |
| (unsigned long long)regs.orig_rax, |
| (unsigned long long)regs.rbx); |
| } |
| |
| switch (pass) { |
| case PTP_GETREGS: |
| /* Just read, no writeback */ |
| return; |
| case PTP_WRITEBACK: |
| /* Write back the same register state verbatim */ |
| break; |
| case PTP_FUZZRET: |
| regs.rax = MODIFIED_BY_PTRACE; |
| break; |
| case PTP_FUZZHIGH: |
| regs.rax = MODIFIED_BY_PTRACE; |
| regs.orig_rax = regs.orig_rax | 0xffffffff00000000ULL; |
| break; |
| case PTP_INTNUM: |
| regs.rax = MODIFIED_BY_PTRACE; |
| regs.orig_rax = (int)regs.orig_rax; |
| break; |
| default: |
| crit("invalid ptrace_pass\n"); |
| break; |
| } |
| |
| ptrace(PTRACE_SETREGS, testpid, NULL, ®s); |
| } |
| |
| static void syscall_numbering_tracer(pid_t testpid) |
| { |
| int wstatus; |
| |
| do { |
| pid_t wpid = waitpid(testpid, &wstatus, 0); |
| if (wpid < 0 && errno != EINTR) |
| break; |
| if (wpid != testpid) |
| continue; |
| if (!WIFSTOPPED(wstatus)) |
| break; /* Thread exited? */ |
| |
| if (sh->probing_syscall && WSTOPSIG(wstatus) == SIGTRAP) |
| mess_with_syscall(testpid, sh->ptrace_pass); |
| } while (sh->ptrace_pass != PTP_DONE && |
| !ptrace(PTRACE_SYSCALL, testpid, NULL, NULL)); |
| |
| ptrace(PTRACE_DETACH, testpid, NULL, NULL); |
| |
| /* Wait for the child process to terminate */ |
| while (waitpid(testpid, &wstatus, 0) != testpid || !WIFEXITED(wstatus)) |
| /* wait some more */; |
| } |
| |
| static void test_traced_syscall_numbering(void) |
| { |
| pid_t testpid; |
| |
| /* Launch the test thread; this thread continues as the tracer thread */ |
| testpid = fork(); |
| |
| if (testpid < 0) { |
| crit("Unable to launch tracer process\n"); |
| } else if (testpid == 0) { |
| syscall_numbering_tracee(); |
| _exit(0); |
| } else { |
| syscall_numbering_tracer(testpid); |
| } |
| } |
| |
| int main(void) |
| { |
| unsigned int nerr; |
| |
| /* |
| * It is quite likely to get a segfault on a failure, so make |
| * sure the message gets out by setting stdout to nonbuffered. |
| */ |
| setvbuf(stdout, NULL, _IONBF, 0); |
| |
| /* |
| * Harmless file descriptor to work on... |
| */ |
| nullfd = open("/dev/null", O_RDWR); |
| if (nullfd < 0) { |
| crit("Unable to open /dev/null: %s\n", strerror(errno)); |
| } |
| |
| /* |
| * Set up a block of shared memory... |
| */ |
| sh = mmap(NULL, sysconf(_SC_PAGE_SIZE), PROT_READ|PROT_WRITE, |
| MAP_ANONYMOUS|MAP_SHARED, 0, 0); |
| if (sh == MAP_FAILED) { |
| crit("Unable to allocated shared memory block: %s\n", |
| strerror(errno)); |
| } |
| |
| with_x32 = test_x32(); |
| |
| run("Running tests without ptrace...\n"); |
| test_syscall_numbering(); |
| |
| test_traced_syscall_numbering(); |
| |
| nerr = sh->nerr; |
| if (!nerr) { |
| ok("All system calls succeeded or failed as expected\n"); |
| return 0; |
| } else { |
| fail("A total of %u system call%s had incorrect behavior\n", |
| nerr, nerr != 1 ? "s" : ""); |
| return 1; |
| } |
| } |