| /* SPDX-License-Identifier: GPL-2.0 */ |
| #undef TRACE_SYSTEM |
| #define TRACE_SYSTEM sched |
| |
| #if !defined(_TRACE_SCHED_H) || defined(TRACE_HEADER_MULTI_READ) |
| #define _TRACE_SCHED_H |
| |
| #include <linux/sched/numa_balancing.h> |
| #include <linux/tracepoint.h> |
| #include <linux/binfmts.h> |
| |
| /* |
| * Tracepoint for calling kthread_stop, performed to end a kthread: |
| */ |
| TRACE_EVENT(sched_kthread_stop, |
| |
| TP_PROTO(struct task_struct *t), |
| |
| TP_ARGS(t), |
| |
| TP_STRUCT__entry( |
| __array( char, comm, TASK_COMM_LEN ) |
| __field( pid_t, pid ) |
| ), |
| |
| TP_fast_assign( |
| memcpy(__entry->comm, t->comm, TASK_COMM_LEN); |
| __entry->pid = t->pid; |
| ), |
| |
| TP_printk("comm=%s pid=%d", __entry->comm, __entry->pid) |
| ); |
| |
| /* |
| * Tracepoint for the return value of the kthread stopping: |
| */ |
| TRACE_EVENT(sched_kthread_stop_ret, |
| |
| TP_PROTO(int ret), |
| |
| TP_ARGS(ret), |
| |
| TP_STRUCT__entry( |
| __field( int, ret ) |
| ), |
| |
| TP_fast_assign( |
| __entry->ret = ret; |
| ), |
| |
| TP_printk("ret=%d", __entry->ret) |
| ); |
| |
| /* |
| * Tracepoint for waking up a task: |
| */ |
| DECLARE_EVENT_CLASS(sched_wakeup_template, |
| |
| TP_PROTO(struct task_struct *p), |
| |
| TP_ARGS(__perf_task(p)), |
| |
| TP_STRUCT__entry( |
| __array( char, comm, TASK_COMM_LEN ) |
| __field( pid_t, pid ) |
| __field( int, prio ) |
| __field( int, success ) |
| __field( int, target_cpu ) |
| ), |
| |
| TP_fast_assign( |
| memcpy(__entry->comm, p->comm, TASK_COMM_LEN); |
| __entry->pid = p->pid; |
| __entry->prio = p->prio; /* XXX SCHED_DEADLINE */ |
| __entry->success = 1; /* rudiment, kill when possible */ |
| __entry->target_cpu = task_cpu(p); |
| ), |
| |
| TP_printk("comm=%s pid=%d prio=%d target_cpu=%03d", |
| __entry->comm, __entry->pid, __entry->prio, |
| __entry->target_cpu) |
| ); |
| |
| /* |
| * Tracepoint called when waking a task; this tracepoint is guaranteed to be |
| * called from the waking context. |
| */ |
| DEFINE_EVENT(sched_wakeup_template, sched_waking, |
| TP_PROTO(struct task_struct *p), |
| TP_ARGS(p)); |
| |
| /* |
| * Tracepoint called when the task is actually woken; p->state == TASK_RUNNNG. |
| * It is not always called from the waking context. |
| */ |
| DEFINE_EVENT(sched_wakeup_template, sched_wakeup, |
| TP_PROTO(struct task_struct *p), |
| TP_ARGS(p)); |
| |
| /* |
| * Tracepoint for waking up a new task: |
| */ |
| DEFINE_EVENT(sched_wakeup_template, sched_wakeup_new, |
| TP_PROTO(struct task_struct *p), |
| TP_ARGS(p)); |
| |
| #ifdef CREATE_TRACE_POINTS |
| static inline long __trace_sched_switch_state(bool preempt, struct task_struct *p) |
| { |
| unsigned int state; |
| |
| #ifdef CONFIG_SCHED_DEBUG |
| BUG_ON(p != current); |
| #endif /* CONFIG_SCHED_DEBUG */ |
| |
| /* |
| * Preemption ignores task state, therefore preempted tasks are always |
| * RUNNING (we will not have dequeued if state != RUNNING). |
| */ |
| if (preempt) |
| return TASK_REPORT_MAX; |
| |
| /* |
| * task_state_index() uses fls() and returns a value from 0-8 range. |
| * Decrement it by 1 (except TASK_RUNNING state i.e 0) before using |
| * it for left shift operation to get the correct task->state |
| * mapping. |
| */ |
| state = task_state_index(p); |
| |
| return state ? (1 << (state - 1)) : state; |
| } |
| #endif /* CREATE_TRACE_POINTS */ |
| |
| /* |
| * Tracepoint for task switches, performed by the scheduler: |
| */ |
| TRACE_EVENT(sched_switch, |
| |
| TP_PROTO(bool preempt, |
| struct task_struct *prev, |
| struct task_struct *next), |
| |
| TP_ARGS(preempt, prev, next), |
| |
| TP_STRUCT__entry( |
| __array( char, prev_comm, TASK_COMM_LEN ) |
| __field( pid_t, prev_pid ) |
| __field( int, prev_prio ) |
| __field( long, prev_state ) |
| __array( char, next_comm, TASK_COMM_LEN ) |
| __field( pid_t, next_pid ) |
| __field( int, next_prio ) |
| ), |
| |
| TP_fast_assign( |
| memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN); |
| __entry->prev_pid = prev->pid; |
| __entry->prev_prio = prev->prio; |
| __entry->prev_state = __trace_sched_switch_state(preempt, prev); |
| memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN); |
| __entry->next_pid = next->pid; |
| __entry->next_prio = next->prio; |
| /* XXX SCHED_DEADLINE */ |
| ), |
| |
| TP_printk("prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s%s ==> next_comm=%s next_pid=%d next_prio=%d", |
| __entry->prev_comm, __entry->prev_pid, __entry->prev_prio, |
| |
| (__entry->prev_state & (TASK_REPORT_MAX - 1)) ? |
| __print_flags(__entry->prev_state & (TASK_REPORT_MAX - 1), "|", |
| { TASK_INTERRUPTIBLE, "S" }, |
| { TASK_UNINTERRUPTIBLE, "D" }, |
| { __TASK_STOPPED, "T" }, |
| { __TASK_TRACED, "t" }, |
| { EXIT_DEAD, "X" }, |
| { EXIT_ZOMBIE, "Z" }, |
| { TASK_PARKED, "P" }, |
| { TASK_DEAD, "I" }) : |
| "R", |
| |
| __entry->prev_state & TASK_REPORT_MAX ? "+" : "", |
| __entry->next_comm, __entry->next_pid, __entry->next_prio) |
| ); |
| |
| /* |
| * Tracepoint for a task being migrated: |
| */ |
| TRACE_EVENT(sched_migrate_task, |
| |
| TP_PROTO(struct task_struct *p, int dest_cpu), |
| |
| TP_ARGS(p, dest_cpu), |
| |
| TP_STRUCT__entry( |
| __array( char, comm, TASK_COMM_LEN ) |
| __field( pid_t, pid ) |
| __field( int, prio ) |
| __field( int, orig_cpu ) |
| __field( int, dest_cpu ) |
| ), |
| |
| TP_fast_assign( |
| memcpy(__entry->comm, p->comm, TASK_COMM_LEN); |
| __entry->pid = p->pid; |
| __entry->prio = p->prio; /* XXX SCHED_DEADLINE */ |
| __entry->orig_cpu = task_cpu(p); |
| __entry->dest_cpu = dest_cpu; |
| ), |
| |
| TP_printk("comm=%s pid=%d prio=%d orig_cpu=%d dest_cpu=%d", |
| __entry->comm, __entry->pid, __entry->prio, |
| __entry->orig_cpu, __entry->dest_cpu) |
| ); |
| |
| DECLARE_EVENT_CLASS(sched_process_template, |
| |
| TP_PROTO(struct task_struct *p), |
| |
| TP_ARGS(p), |
| |
| TP_STRUCT__entry( |
| __array( char, comm, TASK_COMM_LEN ) |
| __field( pid_t, pid ) |
| __field( int, prio ) |
| ), |
| |
| TP_fast_assign( |
| memcpy(__entry->comm, p->comm, TASK_COMM_LEN); |
| __entry->pid = p->pid; |
| __entry->prio = p->prio; /* XXX SCHED_DEADLINE */ |
| ), |
| |
| TP_printk("comm=%s pid=%d prio=%d", |
| __entry->comm, __entry->pid, __entry->prio) |
| ); |
| |
| /* |
| * Tracepoint for freeing a task: |
| */ |
| DEFINE_EVENT(sched_process_template, sched_process_free, |
| TP_PROTO(struct task_struct *p), |
| TP_ARGS(p)); |
| |
| /* |
| * Tracepoint for a task exiting: |
| */ |
| DEFINE_EVENT(sched_process_template, sched_process_exit, |
| TP_PROTO(struct task_struct *p), |
| TP_ARGS(p)); |
| |
| /* |
| * Tracepoint for waiting on task to unschedule: |
| */ |
| DEFINE_EVENT(sched_process_template, sched_wait_task, |
| TP_PROTO(struct task_struct *p), |
| TP_ARGS(p)); |
| |
| /* |
| * Tracepoint for a waiting task: |
| */ |
| TRACE_EVENT(sched_process_wait, |
| |
| TP_PROTO(struct pid *pid), |
| |
| TP_ARGS(pid), |
| |
| TP_STRUCT__entry( |
| __array( char, comm, TASK_COMM_LEN ) |
| __field( pid_t, pid ) |
| __field( int, prio ) |
| ), |
| |
| TP_fast_assign( |
| memcpy(__entry->comm, current->comm, TASK_COMM_LEN); |
| __entry->pid = pid_nr(pid); |
| __entry->prio = current->prio; /* XXX SCHED_DEADLINE */ |
| ), |
| |
| TP_printk("comm=%s pid=%d prio=%d", |
| __entry->comm, __entry->pid, __entry->prio) |
| ); |
| |
| /* |
| * Tracepoint for do_fork: |
| */ |
| TRACE_EVENT(sched_process_fork, |
| |
| TP_PROTO(struct task_struct *parent, struct task_struct *child), |
| |
| TP_ARGS(parent, child), |
| |
| TP_STRUCT__entry( |
| __array( char, parent_comm, TASK_COMM_LEN ) |
| __field( pid_t, parent_pid ) |
| __array( char, child_comm, TASK_COMM_LEN ) |
| __field( pid_t, child_pid ) |
| ), |
| |
| TP_fast_assign( |
| memcpy(__entry->parent_comm, parent->comm, TASK_COMM_LEN); |
| __entry->parent_pid = parent->pid; |
| memcpy(__entry->child_comm, child->comm, TASK_COMM_LEN); |
| __entry->child_pid = child->pid; |
| ), |
| |
| TP_printk("comm=%s pid=%d child_comm=%s child_pid=%d", |
| __entry->parent_comm, __entry->parent_pid, |
| __entry->child_comm, __entry->child_pid) |
| ); |
| |
| /* |
| * Tracepoint for exec: |
| */ |
| TRACE_EVENT(sched_process_exec, |
| |
| TP_PROTO(struct task_struct *p, pid_t old_pid, |
| struct linux_binprm *bprm), |
| |
| TP_ARGS(p, old_pid, bprm), |
| |
| TP_STRUCT__entry( |
| __string( filename, bprm->filename ) |
| __field( pid_t, pid ) |
| __field( pid_t, old_pid ) |
| ), |
| |
| TP_fast_assign( |
| __assign_str(filename, bprm->filename); |
| __entry->pid = p->pid; |
| __entry->old_pid = old_pid; |
| ), |
| |
| TP_printk("filename=%s pid=%d old_pid=%d", __get_str(filename), |
| __entry->pid, __entry->old_pid) |
| ); |
| |
| |
| #ifdef CONFIG_SCHEDSTATS |
| #define DEFINE_EVENT_SCHEDSTAT DEFINE_EVENT |
| #define DECLARE_EVENT_CLASS_SCHEDSTAT DECLARE_EVENT_CLASS |
| #else |
| #define DEFINE_EVENT_SCHEDSTAT DEFINE_EVENT_NOP |
| #define DECLARE_EVENT_CLASS_SCHEDSTAT DECLARE_EVENT_CLASS_NOP |
| #endif |
| |
| /* |
| * XXX the below sched_stat tracepoints only apply to SCHED_OTHER/BATCH/IDLE |
| * adding sched_stat support to SCHED_FIFO/RR would be welcome. |
| */ |
| DECLARE_EVENT_CLASS_SCHEDSTAT(sched_stat_template, |
| |
| TP_PROTO(struct task_struct *tsk, u64 delay), |
| |
| TP_ARGS(__perf_task(tsk), __perf_count(delay)), |
| |
| TP_STRUCT__entry( |
| __array( char, comm, TASK_COMM_LEN ) |
| __field( pid_t, pid ) |
| __field( u64, delay ) |
| ), |
| |
| TP_fast_assign( |
| memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); |
| __entry->pid = tsk->pid; |
| __entry->delay = delay; |
| ), |
| |
| TP_printk("comm=%s pid=%d delay=%Lu [ns]", |
| __entry->comm, __entry->pid, |
| (unsigned long long)__entry->delay) |
| ); |
| |
| /* |
| * Tracepoint for accounting wait time (time the task is runnable |
| * but not actually running due to scheduler contention). |
| */ |
| DEFINE_EVENT_SCHEDSTAT(sched_stat_template, sched_stat_wait, |
| TP_PROTO(struct task_struct *tsk, u64 delay), |
| TP_ARGS(tsk, delay)); |
| |
| /* |
| * Tracepoint for accounting sleep time (time the task is not runnable, |
| * including iowait, see below). |
| */ |
| DEFINE_EVENT_SCHEDSTAT(sched_stat_template, sched_stat_sleep, |
| TP_PROTO(struct task_struct *tsk, u64 delay), |
| TP_ARGS(tsk, delay)); |
| |
| /* |
| * Tracepoint for accounting iowait time (time the task is not runnable |
| * due to waiting on IO to complete). |
| */ |
| DEFINE_EVENT_SCHEDSTAT(sched_stat_template, sched_stat_iowait, |
| TP_PROTO(struct task_struct *tsk, u64 delay), |
| TP_ARGS(tsk, delay)); |
| |
| /* |
| * Tracepoint for accounting blocked time (time the task is in uninterruptible). |
| */ |
| DEFINE_EVENT_SCHEDSTAT(sched_stat_template, sched_stat_blocked, |
| TP_PROTO(struct task_struct *tsk, u64 delay), |
| TP_ARGS(tsk, delay)); |
| |
| /* |
| * Tracepoint for accounting runtime (time the task is executing |
| * on a CPU). |
| */ |
| DECLARE_EVENT_CLASS(sched_stat_runtime, |
| |
| TP_PROTO(struct task_struct *tsk, u64 runtime, u64 vruntime), |
| |
| TP_ARGS(tsk, __perf_count(runtime), vruntime), |
| |
| TP_STRUCT__entry( |
| __array( char, comm, TASK_COMM_LEN ) |
| __field( pid_t, pid ) |
| __field( u64, runtime ) |
| __field( u64, vruntime ) |
| ), |
| |
| TP_fast_assign( |
| memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); |
| __entry->pid = tsk->pid; |
| __entry->runtime = runtime; |
| __entry->vruntime = vruntime; |
| ), |
| |
| TP_printk("comm=%s pid=%d runtime=%Lu [ns] vruntime=%Lu [ns]", |
| __entry->comm, __entry->pid, |
| (unsigned long long)__entry->runtime, |
| (unsigned long long)__entry->vruntime) |
| ); |
| |
| DEFINE_EVENT(sched_stat_runtime, sched_stat_runtime, |
| TP_PROTO(struct task_struct *tsk, u64 runtime, u64 vruntime), |
| TP_ARGS(tsk, runtime, vruntime)); |
| |
| /* |
| * Tracepoint for showing priority inheritance modifying a tasks |
| * priority. |
| */ |
| TRACE_EVENT(sched_pi_setprio, |
| |
| TP_PROTO(struct task_struct *tsk, struct task_struct *pi_task), |
| |
| TP_ARGS(tsk, pi_task), |
| |
| TP_STRUCT__entry( |
| __array( char, comm, TASK_COMM_LEN ) |
| __field( pid_t, pid ) |
| __field( int, oldprio ) |
| __field( int, newprio ) |
| ), |
| |
| TP_fast_assign( |
| memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); |
| __entry->pid = tsk->pid; |
| __entry->oldprio = tsk->prio; |
| __entry->newprio = pi_task ? |
| min(tsk->normal_prio, pi_task->prio) : |
| tsk->normal_prio; |
| /* XXX SCHED_DEADLINE bits missing */ |
| ), |
| |
| TP_printk("comm=%s pid=%d oldprio=%d newprio=%d", |
| __entry->comm, __entry->pid, |
| __entry->oldprio, __entry->newprio) |
| ); |
| |
| #ifdef CONFIG_DETECT_HUNG_TASK |
| TRACE_EVENT(sched_process_hang, |
| TP_PROTO(struct task_struct *tsk), |
| TP_ARGS(tsk), |
| |
| TP_STRUCT__entry( |
| __array( char, comm, TASK_COMM_LEN ) |
| __field( pid_t, pid ) |
| ), |
| |
| TP_fast_assign( |
| memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); |
| __entry->pid = tsk->pid; |
| ), |
| |
| TP_printk("comm=%s pid=%d", __entry->comm, __entry->pid) |
| ); |
| #endif /* CONFIG_DETECT_HUNG_TASK */ |
| |
| /* |
| * Tracks migration of tasks from one runqueue to another. Can be used to |
| * detect if automatic NUMA balancing is bouncing between nodes. |
| */ |
| TRACE_EVENT(sched_move_numa, |
| |
| TP_PROTO(struct task_struct *tsk, int src_cpu, int dst_cpu), |
| |
| TP_ARGS(tsk, src_cpu, dst_cpu), |
| |
| TP_STRUCT__entry( |
| __field( pid_t, pid ) |
| __field( pid_t, tgid ) |
| __field( pid_t, ngid ) |
| __field( int, src_cpu ) |
| __field( int, src_nid ) |
| __field( int, dst_cpu ) |
| __field( int, dst_nid ) |
| ), |
| |
| TP_fast_assign( |
| __entry->pid = task_pid_nr(tsk); |
| __entry->tgid = task_tgid_nr(tsk); |
| __entry->ngid = task_numa_group_id(tsk); |
| __entry->src_cpu = src_cpu; |
| __entry->src_nid = cpu_to_node(src_cpu); |
| __entry->dst_cpu = dst_cpu; |
| __entry->dst_nid = cpu_to_node(dst_cpu); |
| ), |
| |
| TP_printk("pid=%d tgid=%d ngid=%d src_cpu=%d src_nid=%d dst_cpu=%d dst_nid=%d", |
| __entry->pid, __entry->tgid, __entry->ngid, |
| __entry->src_cpu, __entry->src_nid, |
| __entry->dst_cpu, __entry->dst_nid) |
| ); |
| |
| DECLARE_EVENT_CLASS(sched_numa_pair_template, |
| |
| TP_PROTO(struct task_struct *src_tsk, int src_cpu, |
| struct task_struct *dst_tsk, int dst_cpu), |
| |
| TP_ARGS(src_tsk, src_cpu, dst_tsk, dst_cpu), |
| |
| TP_STRUCT__entry( |
| __field( pid_t, src_pid ) |
| __field( pid_t, src_tgid ) |
| __field( pid_t, src_ngid ) |
| __field( int, src_cpu ) |
| __field( int, src_nid ) |
| __field( pid_t, dst_pid ) |
| __field( pid_t, dst_tgid ) |
| __field( pid_t, dst_ngid ) |
| __field( int, dst_cpu ) |
| __field( int, dst_nid ) |
| ), |
| |
| TP_fast_assign( |
| __entry->src_pid = task_pid_nr(src_tsk); |
| __entry->src_tgid = task_tgid_nr(src_tsk); |
| __entry->src_ngid = task_numa_group_id(src_tsk); |
| __entry->src_cpu = src_cpu; |
| __entry->src_nid = cpu_to_node(src_cpu); |
| __entry->dst_pid = dst_tsk ? task_pid_nr(dst_tsk) : 0; |
| __entry->dst_tgid = dst_tsk ? task_tgid_nr(dst_tsk) : 0; |
| __entry->dst_ngid = dst_tsk ? task_numa_group_id(dst_tsk) : 0; |
| __entry->dst_cpu = dst_cpu; |
| __entry->dst_nid = dst_cpu >= 0 ? cpu_to_node(dst_cpu) : -1; |
| ), |
| |
| TP_printk("src_pid=%d src_tgid=%d src_ngid=%d src_cpu=%d src_nid=%d dst_pid=%d dst_tgid=%d dst_ngid=%d dst_cpu=%d dst_nid=%d", |
| __entry->src_pid, __entry->src_tgid, __entry->src_ngid, |
| __entry->src_cpu, __entry->src_nid, |
| __entry->dst_pid, __entry->dst_tgid, __entry->dst_ngid, |
| __entry->dst_cpu, __entry->dst_nid) |
| ); |
| |
| DEFINE_EVENT(sched_numa_pair_template, sched_stick_numa, |
| |
| TP_PROTO(struct task_struct *src_tsk, int src_cpu, |
| struct task_struct *dst_tsk, int dst_cpu), |
| |
| TP_ARGS(src_tsk, src_cpu, dst_tsk, dst_cpu) |
| ); |
| |
| DEFINE_EVENT(sched_numa_pair_template, sched_swap_numa, |
| |
| TP_PROTO(struct task_struct *src_tsk, int src_cpu, |
| struct task_struct *dst_tsk, int dst_cpu), |
| |
| TP_ARGS(src_tsk, src_cpu, dst_tsk, dst_cpu) |
| ); |
| |
| |
| /* |
| * Tracepoint for waking a polling cpu without an IPI. |
| */ |
| TRACE_EVENT(sched_wake_idle_without_ipi, |
| |
| TP_PROTO(int cpu), |
| |
| TP_ARGS(cpu), |
| |
| TP_STRUCT__entry( |
| __field( int, cpu ) |
| ), |
| |
| TP_fast_assign( |
| __entry->cpu = cpu; |
| ), |
| |
| TP_printk("cpu=%d", __entry->cpu) |
| ); |
| |
| /* |
| * Following tracepoints are not exported in tracefs and provide hooking |
| * mechanisms only for testing and debugging purposes. |
| * |
| * Postfixed with _tp to make them easily identifiable in the code. |
| */ |
| DECLARE_TRACE(pelt_cfs_tp, |
| TP_PROTO(struct cfs_rq *cfs_rq), |
| TP_ARGS(cfs_rq)); |
| |
| DECLARE_TRACE(pelt_rt_tp, |
| TP_PROTO(struct rq *rq), |
| TP_ARGS(rq)); |
| |
| DECLARE_TRACE(pelt_dl_tp, |
| TP_PROTO(struct rq *rq), |
| TP_ARGS(rq)); |
| |
| DECLARE_TRACE(pelt_thermal_tp, |
| TP_PROTO(struct rq *rq), |
| TP_ARGS(rq)); |
| |
| DECLARE_TRACE(pelt_irq_tp, |
| TP_PROTO(struct rq *rq), |
| TP_ARGS(rq)); |
| |
| DECLARE_TRACE(pelt_se_tp, |
| TP_PROTO(struct sched_entity *se), |
| TP_ARGS(se)); |
| |
| DECLARE_TRACE(sched_cpu_capacity_tp, |
| TP_PROTO(struct rq *rq), |
| TP_ARGS(rq)); |
| |
| DECLARE_TRACE(sched_overutilized_tp, |
| TP_PROTO(struct root_domain *rd, bool overutilized), |
| TP_ARGS(rd, overutilized)); |
| |
| DECLARE_TRACE(sched_util_est_cfs_tp, |
| TP_PROTO(struct cfs_rq *cfs_rq), |
| TP_ARGS(cfs_rq)); |
| |
| DECLARE_TRACE(sched_util_est_se_tp, |
| TP_PROTO(struct sched_entity *se), |
| TP_ARGS(se)); |
| |
| DECLARE_TRACE(sched_update_nr_running_tp, |
| TP_PROTO(struct rq *rq, int change), |
| TP_ARGS(rq, change)); |
| |
| #endif /* _TRACE_SCHED_H */ |
| |
| /* This part must be outside protection */ |
| #include <trace/define_trace.h> |