| // SPDX-License-Identifier: GPL-2.0-only |
| /* |
| * Copyright (c) 2020 Collabora Ltd. |
| * |
| * Benchmark and test syscall user dispatch |
| */ |
| |
| #define _GNU_SOURCE |
| #include <stdio.h> |
| #include <string.h> |
| #include <stdlib.h> |
| #include <signal.h> |
| #include <errno.h> |
| #include <time.h> |
| #include <sys/time.h> |
| #include <unistd.h> |
| #include <sys/sysinfo.h> |
| #include <sys/prctl.h> |
| #include <sys/syscall.h> |
| |
| #ifndef PR_SET_SYSCALL_USER_DISPATCH |
| # define PR_SET_SYSCALL_USER_DISPATCH 59 |
| # define PR_SYS_DISPATCH_OFF 0 |
| # define PR_SYS_DISPATCH_ON 1 |
| # define SYSCALL_DISPATCH_FILTER_ALLOW 0 |
| # define SYSCALL_DISPATCH_FILTER_BLOCK 1 |
| #endif |
| |
| #ifdef __NR_syscalls |
| # define MAGIC_SYSCALL_1 (__NR_syscalls + 1) /* Bad Linux syscall number */ |
| #else |
| # define MAGIC_SYSCALL_1 (0xff00) /* Bad Linux syscall number */ |
| #endif |
| |
| /* |
| * To test returning from a sigsys with selector blocked, the test |
| * requires some per-architecture support (i.e. knowledge about the |
| * signal trampoline address). On i386, we know it is on the vdso, and |
| * a small trampoline is open-coded for x86_64. Other architectures |
| * that have a trampoline in the vdso will support TEST_BLOCKED_RETURN |
| * out of the box, but don't enable them until they support syscall user |
| * dispatch. |
| */ |
| #if defined(__x86_64__) || defined(__i386__) |
| #define TEST_BLOCKED_RETURN |
| #endif |
| |
| #ifdef __x86_64__ |
| void* (syscall_dispatcher_start)(void); |
| void* (syscall_dispatcher_end)(void); |
| #else |
| unsigned long syscall_dispatcher_start = 0; |
| unsigned long syscall_dispatcher_end = 0; |
| #endif |
| |
| unsigned long trapped_call_count = 0; |
| unsigned long native_call_count = 0; |
| |
| char selector; |
| #define SYSCALL_BLOCK (selector = SYSCALL_DISPATCH_FILTER_BLOCK) |
| #define SYSCALL_UNBLOCK (selector = SYSCALL_DISPATCH_FILTER_ALLOW) |
| |
| #define CALIBRATION_STEP 100000 |
| #define CALIBRATE_TO_SECS 5 |
| int factor; |
| |
| static double one_sysinfo_step(void) |
| { |
| struct timespec t1, t2; |
| int i; |
| struct sysinfo info; |
| |
| clock_gettime(CLOCK_MONOTONIC, &t1); |
| for (i = 0; i < CALIBRATION_STEP; i++) |
| sysinfo(&info); |
| clock_gettime(CLOCK_MONOTONIC, &t2); |
| return (t2.tv_sec - t1.tv_sec) + 1.0e-9 * (t2.tv_nsec - t1.tv_nsec); |
| } |
| |
| static void calibrate_set(void) |
| { |
| double elapsed = 0; |
| |
| printf("Calibrating test set to last ~%d seconds...\n", CALIBRATE_TO_SECS); |
| |
| while (elapsed < 1) { |
| elapsed += one_sysinfo_step(); |
| factor += CALIBRATE_TO_SECS; |
| } |
| |
| printf("test iterations = %d\n", CALIBRATION_STEP * factor); |
| } |
| |
| static double perf_syscall(void) |
| { |
| unsigned int i; |
| double partial = 0; |
| |
| for (i = 0; i < factor; ++i) |
| partial += one_sysinfo_step()/(CALIBRATION_STEP*factor); |
| return partial; |
| } |
| |
| static void handle_sigsys(int sig, siginfo_t *info, void *ucontext) |
| { |
| char buf[1024]; |
| int len; |
| |
| SYSCALL_UNBLOCK; |
| |
| /* printf and friends are not signal-safe. */ |
| len = snprintf(buf, 1024, "Caught sys_%x\n", info->si_syscall); |
| write(1, buf, len); |
| |
| if (info->si_syscall == MAGIC_SYSCALL_1) |
| trapped_call_count++; |
| else |
| native_call_count++; |
| |
| #ifdef TEST_BLOCKED_RETURN |
| SYSCALL_BLOCK; |
| #endif |
| |
| #ifdef __x86_64__ |
| __asm__ volatile("movq $0xf, %rax"); |
| __asm__ volatile("leaveq"); |
| __asm__ volatile("add $0x8, %rsp"); |
| __asm__ volatile("syscall_dispatcher_start:"); |
| __asm__ volatile("syscall"); |
| __asm__ volatile("nop"); /* Landing pad within dispatcher area */ |
| __asm__ volatile("syscall_dispatcher_end:"); |
| #endif |
| |
| } |
| |
| int main(void) |
| { |
| struct sigaction act; |
| double time1, time2; |
| int ret; |
| sigset_t mask; |
| |
| memset(&act, 0, sizeof(act)); |
| sigemptyset(&mask); |
| |
| act.sa_sigaction = handle_sigsys; |
| act.sa_flags = SA_SIGINFO; |
| act.sa_mask = mask; |
| |
| calibrate_set(); |
| |
| time1 = perf_syscall(); |
| printf("Avg syscall time %.0lfns.\n", time1 * 1.0e9); |
| |
| ret = sigaction(SIGSYS, &act, NULL); |
| if (ret) { |
| perror("Error sigaction:"); |
| exit(-1); |
| } |
| |
| fprintf(stderr, "Enabling syscall trapping.\n"); |
| |
| if (prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_ON, |
| syscall_dispatcher_start, |
| (syscall_dispatcher_end - syscall_dispatcher_start + 1), |
| &selector)) { |
| perror("prctl failed\n"); |
| exit(-1); |
| } |
| |
| SYSCALL_BLOCK; |
| syscall(MAGIC_SYSCALL_1); |
| |
| #ifdef TEST_BLOCKED_RETURN |
| if (selector == SYSCALL_DISPATCH_FILTER_ALLOW) { |
| fprintf(stderr, "Failed to return with selector blocked.\n"); |
| exit(-1); |
| } |
| #endif |
| |
| SYSCALL_UNBLOCK; |
| |
| if (!trapped_call_count) { |
| fprintf(stderr, "syscall trapping does not work.\n"); |
| exit(-1); |
| } |
| |
| time2 = perf_syscall(); |
| |
| if (native_call_count) { |
| perror("syscall trapping intercepted more syscalls than expected\n"); |
| exit(-1); |
| } |
| |
| printf("trapped_call_count %lu, native_call_count %lu.\n", |
| trapped_call_count, native_call_count); |
| printf("Avg syscall time %.0lfns.\n", time2 * 1.0e9); |
| printf("Interception overhead: %.1lf%% (+%.0lfns).\n", |
| 100.0 * (time2 / time1 - 1.0), 1.0e9 * (time2 - time1)); |
| return 0; |
| |
| } |