| // SPDX-License-Identifier: GPL-2.0 |
| |
| #include <linux/debug_locks.h> |
| #include <linux/delay.h> |
| #include <linux/jiffies.h> |
| #include <linux/kernel.h> |
| #include <linux/lockdep.h> |
| #include <linux/preempt.h> |
| #include <linux/printk.h> |
| #include <linux/sched.h> |
| #include <linux/spinlock.h> |
| #include <linux/stacktrace.h> |
| |
| #include "kcsan.h" |
| #include "encoding.h" |
| |
| /* |
| * Max. number of stack entries to show in the report. |
| */ |
| #define NUM_STACK_ENTRIES 64 |
| |
| /* Common access info. */ |
| struct access_info { |
| const volatile void *ptr; |
| size_t size; |
| int access_type; |
| int task_pid; |
| int cpu_id; |
| }; |
| |
| /* |
| * Other thread info: communicated from other racing thread to thread that set |
| * up the watchpoint, which then prints the complete report atomically. |
| */ |
| struct other_info { |
| struct access_info ai; |
| unsigned long stack_entries[NUM_STACK_ENTRIES]; |
| int num_stack_entries; |
| |
| /* |
| * Optionally pass @current. Typically we do not need to pass @current |
| * via @other_info since just @task_pid is sufficient. Passing @current |
| * has additional overhead. |
| * |
| * To safely pass @current, we must either use get_task_struct/ |
| * put_task_struct, or stall the thread that populated @other_info. |
| * |
| * We cannot rely on get_task_struct/put_task_struct in case |
| * release_report() races with a task being released, and would have to |
| * free it in release_report(). This may result in deadlock if we want |
| * to use KCSAN on the allocators. |
| * |
| * Since we also want to reliably print held locks for |
| * CONFIG_KCSAN_VERBOSE, the current implementation stalls the thread |
| * that populated @other_info until it has been consumed. |
| */ |
| struct task_struct *task; |
| }; |
| |
| /* |
| * To never block any producers of struct other_info, we need as many elements |
| * as we have watchpoints (upper bound on concurrent races to report). |
| */ |
| static struct other_info other_infos[CONFIG_KCSAN_NUM_WATCHPOINTS + NUM_SLOTS-1]; |
| |
| /* |
| * Information about reported races; used to rate limit reporting. |
| */ |
| struct report_time { |
| /* |
| * The last time the race was reported. |
| */ |
| unsigned long time; |
| |
| /* |
| * The frames of the 2 threads; if only 1 thread is known, one frame |
| * will be 0. |
| */ |
| unsigned long frame1; |
| unsigned long frame2; |
| }; |
| |
| /* |
| * Since we also want to be able to debug allocators with KCSAN, to avoid |
| * deadlock, report_times cannot be dynamically resized with krealloc in |
| * rate_limit_report. |
| * |
| * Therefore, we use a fixed-size array, which at most will occupy a page. This |
| * still adequately rate limits reports, assuming that a) number of unique data |
| * races is not excessive, and b) occurrence of unique races within the |
| * same time window is limited. |
| */ |
| #define REPORT_TIMES_MAX (PAGE_SIZE / sizeof(struct report_time)) |
| #define REPORT_TIMES_SIZE \ |
| (CONFIG_KCSAN_REPORT_ONCE_IN_MS > REPORT_TIMES_MAX ? \ |
| REPORT_TIMES_MAX : \ |
| CONFIG_KCSAN_REPORT_ONCE_IN_MS) |
| static struct report_time report_times[REPORT_TIMES_SIZE]; |
| |
| /* |
| * Spinlock serializing report generation, and access to @other_infos. Although |
| * it could make sense to have a finer-grained locking story for @other_infos, |
| * report generation needs to be serialized either way, so not much is gained. |
| */ |
| static DEFINE_RAW_SPINLOCK(report_lock); |
| |
| /* |
| * Checks if the race identified by thread frames frame1 and frame2 has |
| * been reported since (now - KCSAN_REPORT_ONCE_IN_MS). |
| */ |
| static bool rate_limit_report(unsigned long frame1, unsigned long frame2) |
| { |
| struct report_time *use_entry = &report_times[0]; |
| unsigned long invalid_before; |
| int i; |
| |
| BUILD_BUG_ON(CONFIG_KCSAN_REPORT_ONCE_IN_MS != 0 && REPORT_TIMES_SIZE == 0); |
| |
| if (CONFIG_KCSAN_REPORT_ONCE_IN_MS == 0) |
| return false; |
| |
| invalid_before = jiffies - msecs_to_jiffies(CONFIG_KCSAN_REPORT_ONCE_IN_MS); |
| |
| /* Check if a matching race report exists. */ |
| for (i = 0; i < REPORT_TIMES_SIZE; ++i) { |
| struct report_time *rt = &report_times[i]; |
| |
| /* |
| * Must always select an entry for use to store info as we |
| * cannot resize report_times; at the end of the scan, use_entry |
| * will be the oldest entry, which ideally also happened before |
| * KCSAN_REPORT_ONCE_IN_MS ago. |
| */ |
| if (time_before(rt->time, use_entry->time)) |
| use_entry = rt; |
| |
| /* |
| * Initially, no need to check any further as this entry as well |
| * as following entries have never been used. |
| */ |
| if (rt->time == 0) |
| break; |
| |
| /* Check if entry expired. */ |
| if (time_before(rt->time, invalid_before)) |
| continue; /* before KCSAN_REPORT_ONCE_IN_MS ago */ |
| |
| /* Reported recently, check if race matches. */ |
| if ((rt->frame1 == frame1 && rt->frame2 == frame2) || |
| (rt->frame1 == frame2 && rt->frame2 == frame1)) |
| return true; |
| } |
| |
| use_entry->time = jiffies; |
| use_entry->frame1 = frame1; |
| use_entry->frame2 = frame2; |
| return false; |
| } |
| |
| /* |
| * Special rules to skip reporting. |
| */ |
| static bool |
| skip_report(enum kcsan_value_change value_change, unsigned long top_frame) |
| { |
| /* Should never get here if value_change==FALSE. */ |
| WARN_ON_ONCE(value_change == KCSAN_VALUE_CHANGE_FALSE); |
| |
| /* |
| * The first call to skip_report always has value_change==TRUE, since we |
| * cannot know the value written of an instrumented access. For the 2nd |
| * call there are 6 cases with CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY: |
| * |
| * 1. read watchpoint, conflicting write (value_change==TRUE): report; |
| * 2. read watchpoint, conflicting write (value_change==MAYBE): skip; |
| * 3. write watchpoint, conflicting write (value_change==TRUE): report; |
| * 4. write watchpoint, conflicting write (value_change==MAYBE): skip; |
| * 5. write watchpoint, conflicting read (value_change==MAYBE): skip; |
| * 6. write watchpoint, conflicting read (value_change==TRUE): report; |
| * |
| * Cases 1-4 are intuitive and expected; case 5 ensures we do not report |
| * data races where the write may have rewritten the same value; case 6 |
| * is possible either if the size is larger than what we check value |
| * changes for or the access type is KCSAN_ACCESS_ASSERT. |
| */ |
| if (IS_ENABLED(CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY) && |
| value_change == KCSAN_VALUE_CHANGE_MAYBE) { |
| /* |
| * The access is a write, but the data value did not change. |
| * |
| * We opt-out of this filter for certain functions at request of |
| * maintainers. |
| */ |
| char buf[64]; |
| int len = scnprintf(buf, sizeof(buf), "%ps", (void *)top_frame); |
| |
| if (!strnstr(buf, "rcu_", len) && |
| !strnstr(buf, "_rcu", len) && |
| !strnstr(buf, "_srcu", len)) |
| return true; |
| } |
| |
| return kcsan_skip_report_debugfs(top_frame); |
| } |
| |
| static const char *get_access_type(int type) |
| { |
| if (type & KCSAN_ACCESS_ASSERT) { |
| if (type & KCSAN_ACCESS_SCOPED) { |
| if (type & KCSAN_ACCESS_WRITE) |
| return "assert no accesses (scoped)"; |
| else |
| return "assert no writes (scoped)"; |
| } else { |
| if (type & KCSAN_ACCESS_WRITE) |
| return "assert no accesses"; |
| else |
| return "assert no writes"; |
| } |
| } |
| |
| switch (type) { |
| case 0: |
| return "read"; |
| case KCSAN_ACCESS_ATOMIC: |
| return "read (marked)"; |
| case KCSAN_ACCESS_WRITE: |
| return "write"; |
| case KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ATOMIC: |
| return "write (marked)"; |
| case KCSAN_ACCESS_COMPOUND | KCSAN_ACCESS_WRITE: |
| return "read-write"; |
| case KCSAN_ACCESS_COMPOUND | KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ATOMIC: |
| return "read-write (marked)"; |
| case KCSAN_ACCESS_SCOPED: |
| return "read (scoped)"; |
| case KCSAN_ACCESS_SCOPED | KCSAN_ACCESS_ATOMIC: |
| return "read (marked, scoped)"; |
| case KCSAN_ACCESS_SCOPED | KCSAN_ACCESS_WRITE: |
| return "write (scoped)"; |
| case KCSAN_ACCESS_SCOPED | KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ATOMIC: |
| return "write (marked, scoped)"; |
| default: |
| BUG(); |
| } |
| } |
| |
| static const char *get_bug_type(int type) |
| { |
| return (type & KCSAN_ACCESS_ASSERT) != 0 ? "assert: race" : "data-race"; |
| } |
| |
| /* Return thread description: in task or interrupt. */ |
| static const char *get_thread_desc(int task_id) |
| { |
| if (task_id != -1) { |
| static char buf[32]; /* safe: protected by report_lock */ |
| |
| snprintf(buf, sizeof(buf), "task %i", task_id); |
| return buf; |
| } |
| return "interrupt"; |
| } |
| |
| /* Helper to skip KCSAN-related functions in stack-trace. */ |
| static int get_stack_skipnr(const unsigned long stack_entries[], int num_entries) |
| { |
| char buf[64]; |
| char *cur; |
| int len, skip; |
| |
| for (skip = 0; skip < num_entries; ++skip) { |
| len = scnprintf(buf, sizeof(buf), "%ps", (void *)stack_entries[skip]); |
| |
| /* Never show tsan_* or {read,write}_once_size. */ |
| if (strnstr(buf, "tsan_", len) || |
| strnstr(buf, "_once_size", len)) |
| continue; |
| |
| cur = strnstr(buf, "kcsan_", len); |
| if (cur) { |
| cur += strlen("kcsan_"); |
| if (!str_has_prefix(cur, "test")) |
| continue; /* KCSAN runtime function. */ |
| /* KCSAN related test. */ |
| } |
| |
| /* |
| * No match for runtime functions -- @skip entries to skip to |
| * get to first frame of interest. |
| */ |
| break; |
| } |
| |
| return skip; |
| } |
| |
| /* Compares symbolized strings of addr1 and addr2. */ |
| static int sym_strcmp(void *addr1, void *addr2) |
| { |
| char buf1[64]; |
| char buf2[64]; |
| |
| snprintf(buf1, sizeof(buf1), "%pS", addr1); |
| snprintf(buf2, sizeof(buf2), "%pS", addr2); |
| |
| return strncmp(buf1, buf2, sizeof(buf1)); |
| } |
| |
| static void print_verbose_info(struct task_struct *task) |
| { |
| if (!task) |
| return; |
| |
| /* Restore IRQ state trace for printing. */ |
| kcsan_restore_irqtrace(task); |
| |
| pr_err("\n"); |
| debug_show_held_locks(task); |
| print_irqtrace_events(task); |
| } |
| |
| /* |
| * Returns true if a report was generated, false otherwise. |
| */ |
| static bool print_report(enum kcsan_value_change value_change, |
| enum kcsan_report_type type, |
| const struct access_info *ai, |
| const struct other_info *other_info) |
| { |
| unsigned long stack_entries[NUM_STACK_ENTRIES] = { 0 }; |
| int num_stack_entries = stack_trace_save(stack_entries, NUM_STACK_ENTRIES, 1); |
| int skipnr = get_stack_skipnr(stack_entries, num_stack_entries); |
| unsigned long this_frame = stack_entries[skipnr]; |
| unsigned long other_frame = 0; |
| int other_skipnr = 0; /* silence uninit warnings */ |
| |
| /* |
| * Must check report filter rules before starting to print. |
| */ |
| if (skip_report(KCSAN_VALUE_CHANGE_TRUE, stack_entries[skipnr])) |
| return false; |
| |
| if (type == KCSAN_REPORT_RACE_SIGNAL) { |
| other_skipnr = get_stack_skipnr(other_info->stack_entries, |
| other_info->num_stack_entries); |
| other_frame = other_info->stack_entries[other_skipnr]; |
| |
| /* @value_change is only known for the other thread */ |
| if (skip_report(value_change, other_frame)) |
| return false; |
| } |
| |
| if (rate_limit_report(this_frame, other_frame)) |
| return false; |
| |
| /* Print report header. */ |
| pr_err("==================================================================\n"); |
| switch (type) { |
| case KCSAN_REPORT_RACE_SIGNAL: { |
| int cmp; |
| |
| /* |
| * Order functions lexographically for consistent bug titles. |
| * Do not print offset of functions to keep title short. |
| */ |
| cmp = sym_strcmp((void *)other_frame, (void *)this_frame); |
| pr_err("BUG: KCSAN: %s in %ps / %ps\n", |
| get_bug_type(ai->access_type | other_info->ai.access_type), |
| (void *)(cmp < 0 ? other_frame : this_frame), |
| (void *)(cmp < 0 ? this_frame : other_frame)); |
| } break; |
| |
| case KCSAN_REPORT_RACE_UNKNOWN_ORIGIN: |
| pr_err("BUG: KCSAN: %s in %pS\n", get_bug_type(ai->access_type), |
| (void *)this_frame); |
| break; |
| |
| default: |
| BUG(); |
| } |
| |
| pr_err("\n"); |
| |
| /* Print information about the racing accesses. */ |
| switch (type) { |
| case KCSAN_REPORT_RACE_SIGNAL: |
| pr_err("%s to 0x%px of %zu bytes by %s on cpu %i:\n", |
| get_access_type(other_info->ai.access_type), other_info->ai.ptr, |
| other_info->ai.size, get_thread_desc(other_info->ai.task_pid), |
| other_info->ai.cpu_id); |
| |
| /* Print the other thread's stack trace. */ |
| stack_trace_print(other_info->stack_entries + other_skipnr, |
| other_info->num_stack_entries - other_skipnr, |
| 0); |
| |
| if (IS_ENABLED(CONFIG_KCSAN_VERBOSE)) |
| print_verbose_info(other_info->task); |
| |
| pr_err("\n"); |
| pr_err("%s to 0x%px of %zu bytes by %s on cpu %i:\n", |
| get_access_type(ai->access_type), ai->ptr, ai->size, |
| get_thread_desc(ai->task_pid), ai->cpu_id); |
| break; |
| |
| case KCSAN_REPORT_RACE_UNKNOWN_ORIGIN: |
| pr_err("race at unknown origin, with %s to 0x%px of %zu bytes by %s on cpu %i:\n", |
| get_access_type(ai->access_type), ai->ptr, ai->size, |
| get_thread_desc(ai->task_pid), ai->cpu_id); |
| break; |
| |
| default: |
| BUG(); |
| } |
| /* Print stack trace of this thread. */ |
| stack_trace_print(stack_entries + skipnr, num_stack_entries - skipnr, |
| 0); |
| |
| if (IS_ENABLED(CONFIG_KCSAN_VERBOSE)) |
| print_verbose_info(current); |
| |
| /* Print report footer. */ |
| pr_err("\n"); |
| pr_err("Reported by Kernel Concurrency Sanitizer on:\n"); |
| dump_stack_print_info(KERN_DEFAULT); |
| pr_err("==================================================================\n"); |
| |
| return true; |
| } |
| |
| static void release_report(unsigned long *flags, struct other_info *other_info) |
| { |
| if (other_info) |
| /* |
| * Use size to denote valid/invalid, since KCSAN entirely |
| * ignores 0-sized accesses. |
| */ |
| other_info->ai.size = 0; |
| |
| raw_spin_unlock_irqrestore(&report_lock, *flags); |
| } |
| |
| /* |
| * Sets @other_info->task and awaits consumption of @other_info. |
| * |
| * Precondition: report_lock is held. |
| * Postcondition: report_lock is held. |
| */ |
| static void set_other_info_task_blocking(unsigned long *flags, |
| const struct access_info *ai, |
| struct other_info *other_info) |
| { |
| /* |
| * We may be instrumenting a code-path where current->state is already |
| * something other than TASK_RUNNING. |
| */ |
| const bool is_running = current->state == TASK_RUNNING; |
| /* |
| * To avoid deadlock in case we are in an interrupt here and this is a |
| * race with a task on the same CPU (KCSAN_INTERRUPT_WATCHER), provide a |
| * timeout to ensure this works in all contexts. |
| * |
| * Await approximately the worst case delay of the reporting thread (if |
| * we are not interrupted). |
| */ |
| int timeout = max(kcsan_udelay_task, kcsan_udelay_interrupt); |
| |
| other_info->task = current; |
| do { |
| if (is_running) { |
| /* |
| * Let lockdep know the real task is sleeping, to print |
| * the held locks (recall we turned lockdep off, so |
| * locking/unlocking @report_lock won't be recorded). |
| */ |
| set_current_state(TASK_UNINTERRUPTIBLE); |
| } |
| raw_spin_unlock_irqrestore(&report_lock, *flags); |
| /* |
| * We cannot call schedule() since we also cannot reliably |
| * determine if sleeping here is permitted -- see in_atomic(). |
| */ |
| |
| udelay(1); |
| raw_spin_lock_irqsave(&report_lock, *flags); |
| if (timeout-- < 0) { |
| /* |
| * Abort. Reset @other_info->task to NULL, since it |
| * appears the other thread is still going to consume |
| * it. It will result in no verbose info printed for |
| * this task. |
| */ |
| other_info->task = NULL; |
| break; |
| } |
| /* |
| * If invalid, or @ptr nor @current matches, then @other_info |
| * has been consumed and we may continue. If not, retry. |
| */ |
| } while (other_info->ai.size && other_info->ai.ptr == ai->ptr && |
| other_info->task == current); |
| if (is_running) |
| set_current_state(TASK_RUNNING); |
| } |
| |
| /* Populate @other_info; requires that the provided @other_info not in use. */ |
| static void prepare_report_producer(unsigned long *flags, |
| const struct access_info *ai, |
| struct other_info *other_info) |
| { |
| raw_spin_lock_irqsave(&report_lock, *flags); |
| |
| /* |
| * The same @other_infos entry cannot be used concurrently, because |
| * there is a one-to-one mapping to watchpoint slots (@watchpoints in |
| * core.c), and a watchpoint is only released for reuse after reporting |
| * is done by the consumer of @other_info. Therefore, it is impossible |
| * for another concurrent prepare_report_producer() to set the same |
| * @other_info, and are guaranteed exclusivity for the @other_infos |
| * entry pointed to by @other_info. |
| * |
| * To check this property holds, size should never be non-zero here, |
| * because every consumer of struct other_info resets size to 0 in |
| * release_report(). |
| */ |
| WARN_ON(other_info->ai.size); |
| |
| other_info->ai = *ai; |
| other_info->num_stack_entries = stack_trace_save(other_info->stack_entries, NUM_STACK_ENTRIES, 2); |
| |
| if (IS_ENABLED(CONFIG_KCSAN_VERBOSE)) |
| set_other_info_task_blocking(flags, ai, other_info); |
| |
| raw_spin_unlock_irqrestore(&report_lock, *flags); |
| } |
| |
| /* Awaits producer to fill @other_info and then returns. */ |
| static bool prepare_report_consumer(unsigned long *flags, |
| const struct access_info *ai, |
| struct other_info *other_info) |
| { |
| |
| raw_spin_lock_irqsave(&report_lock, *flags); |
| while (!other_info->ai.size) { /* Await valid @other_info. */ |
| raw_spin_unlock_irqrestore(&report_lock, *flags); |
| cpu_relax(); |
| raw_spin_lock_irqsave(&report_lock, *flags); |
| } |
| |
| /* Should always have a matching access based on watchpoint encoding. */ |
| if (WARN_ON(!matching_access((unsigned long)other_info->ai.ptr & WATCHPOINT_ADDR_MASK, other_info->ai.size, |
| (unsigned long)ai->ptr & WATCHPOINT_ADDR_MASK, ai->size))) |
| goto discard; |
| |
| if (!matching_access((unsigned long)other_info->ai.ptr, other_info->ai.size, |
| (unsigned long)ai->ptr, ai->size)) { |
| /* |
| * If the actual accesses to not match, this was a false |
| * positive due to watchpoint encoding. |
| */ |
| atomic_long_inc(&kcsan_counters[KCSAN_COUNTER_ENCODING_FALSE_POSITIVES]); |
| goto discard; |
| } |
| |
| return true; |
| |
| discard: |
| release_report(flags, other_info); |
| return false; |
| } |
| |
| /* |
| * Depending on the report type either sets @other_info and returns false, or |
| * awaits @other_info and returns true. If @other_info is not required for the |
| * report type, simply acquires @report_lock and returns true. |
| */ |
| static noinline bool prepare_report(unsigned long *flags, |
| enum kcsan_report_type type, |
| const struct access_info *ai, |
| struct other_info *other_info) |
| { |
| switch (type) { |
| case KCSAN_REPORT_CONSUMED_WATCHPOINT: |
| prepare_report_producer(flags, ai, other_info); |
| return false; |
| case KCSAN_REPORT_RACE_SIGNAL: |
| return prepare_report_consumer(flags, ai, other_info); |
| default: |
| /* @other_info not required; just acquire @report_lock. */ |
| raw_spin_lock_irqsave(&report_lock, *flags); |
| return true; |
| } |
| } |
| |
| void kcsan_report(const volatile void *ptr, size_t size, int access_type, |
| enum kcsan_value_change value_change, |
| enum kcsan_report_type type, int watchpoint_idx) |
| { |
| unsigned long flags = 0; |
| const struct access_info ai = { |
| .ptr = ptr, |
| .size = size, |
| .access_type = access_type, |
| .task_pid = in_task() ? task_pid_nr(current) : -1, |
| .cpu_id = raw_smp_processor_id() |
| }; |
| struct other_info *other_info = type == KCSAN_REPORT_RACE_UNKNOWN_ORIGIN |
| ? NULL : &other_infos[watchpoint_idx]; |
| |
| kcsan_disable_current(); |
| if (WARN_ON(watchpoint_idx < 0 || watchpoint_idx >= ARRAY_SIZE(other_infos))) |
| goto out; |
| |
| /* |
| * Because we may generate reports when we're in scheduler code, the use |
| * of printk() could deadlock. Until such time that all printing code |
| * called in print_report() is scheduler-safe, accept the risk, and just |
| * get our message out. As such, also disable lockdep to hide the |
| * warning, and avoid disabling lockdep for the rest of the kernel. |
| */ |
| lockdep_off(); |
| |
| if (prepare_report(&flags, type, &ai, other_info)) { |
| /* |
| * Never report if value_change is FALSE, only if we it is |
| * either TRUE or MAYBE. In case of MAYBE, further filtering may |
| * be done once we know the full stack trace in print_report(). |
| */ |
| bool reported = value_change != KCSAN_VALUE_CHANGE_FALSE && |
| print_report(value_change, type, &ai, other_info); |
| |
| if (reported && panic_on_warn) |
| panic("panic_on_warn set ...\n"); |
| |
| release_report(&flags, other_info); |
| } |
| |
| lockdep_on(); |
| out: |
| kcsan_enable_current(); |
| } |