| // SPDX-License-Identifier: GPL-2.0 |
| |
| /* |
| * CPU accounting code for task groups. |
| * |
| * Based on the work by Paul Menage (menage@google.com) and Balbir Singh |
| * (balbir@in.ibm.com). |
| */ |
| |
| /* Time spent by the tasks of the CPU accounting group executing in ... */ |
| enum cpuacct_stat_index { |
| CPUACCT_STAT_USER, /* ... user mode */ |
| CPUACCT_STAT_SYSTEM, /* ... kernel mode */ |
| |
| CPUACCT_STAT_NSTATS, |
| }; |
| |
| static const char * const cpuacct_stat_desc[] = { |
| [CPUACCT_STAT_USER] = "user", |
| [CPUACCT_STAT_SYSTEM] = "system", |
| }; |
| |
| /* track CPU usage of a group of tasks and its child groups */ |
| struct cpuacct { |
| struct cgroup_subsys_state css; |
| /* cpuusage holds pointer to a u64-type object on every CPU */ |
| u64 __percpu *cpuusage; |
| struct kernel_cpustat __percpu *cpustat; |
| }; |
| |
| static inline struct cpuacct *css_ca(struct cgroup_subsys_state *css) |
| { |
| return css ? container_of(css, struct cpuacct, css) : NULL; |
| } |
| |
| /* Return CPU accounting group to which this task belongs */ |
| static inline struct cpuacct *task_ca(struct task_struct *tsk) |
| { |
| return css_ca(task_css(tsk, cpuacct_cgrp_id)); |
| } |
| |
| static inline struct cpuacct *parent_ca(struct cpuacct *ca) |
| { |
| return css_ca(ca->css.parent); |
| } |
| |
| static DEFINE_PER_CPU(u64, root_cpuacct_cpuusage); |
| static struct cpuacct root_cpuacct = { |
| .cpustat = &kernel_cpustat, |
| .cpuusage = &root_cpuacct_cpuusage, |
| }; |
| |
| /* Create a new CPU accounting group */ |
| static struct cgroup_subsys_state * |
| cpuacct_css_alloc(struct cgroup_subsys_state *parent_css) |
| { |
| struct cpuacct *ca; |
| |
| if (!parent_css) |
| return &root_cpuacct.css; |
| |
| ca = kzalloc(sizeof(*ca), GFP_KERNEL); |
| if (!ca) |
| goto out; |
| |
| ca->cpuusage = alloc_percpu(u64); |
| if (!ca->cpuusage) |
| goto out_free_ca; |
| |
| ca->cpustat = alloc_percpu(struct kernel_cpustat); |
| if (!ca->cpustat) |
| goto out_free_cpuusage; |
| |
| return &ca->css; |
| |
| out_free_cpuusage: |
| free_percpu(ca->cpuusage); |
| out_free_ca: |
| kfree(ca); |
| out: |
| return ERR_PTR(-ENOMEM); |
| } |
| |
| /* Destroy an existing CPU accounting group */ |
| static void cpuacct_css_free(struct cgroup_subsys_state *css) |
| { |
| struct cpuacct *ca = css_ca(css); |
| |
| free_percpu(ca->cpustat); |
| free_percpu(ca->cpuusage); |
| kfree(ca); |
| } |
| |
| static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu, |
| enum cpuacct_stat_index index) |
| { |
| u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); |
| u64 *cpustat = per_cpu_ptr(ca->cpustat, cpu)->cpustat; |
| u64 data; |
| |
| /* |
| * We allow index == CPUACCT_STAT_NSTATS here to read |
| * the sum of usages. |
| */ |
| if (WARN_ON_ONCE(index > CPUACCT_STAT_NSTATS)) |
| return 0; |
| |
| #ifndef CONFIG_64BIT |
| /* |
| * Take rq->lock to make 64-bit read safe on 32-bit platforms. |
| */ |
| raw_spin_rq_lock_irq(cpu_rq(cpu)); |
| #endif |
| |
| switch (index) { |
| case CPUACCT_STAT_USER: |
| data = cpustat[CPUTIME_USER] + cpustat[CPUTIME_NICE]; |
| break; |
| case CPUACCT_STAT_SYSTEM: |
| data = cpustat[CPUTIME_SYSTEM] + cpustat[CPUTIME_IRQ] + |
| cpustat[CPUTIME_SOFTIRQ]; |
| break; |
| case CPUACCT_STAT_NSTATS: |
| data = *cpuusage; |
| break; |
| } |
| |
| #ifndef CONFIG_64BIT |
| raw_spin_rq_unlock_irq(cpu_rq(cpu)); |
| #endif |
| |
| return data; |
| } |
| |
| static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu) |
| { |
| u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); |
| u64 *cpustat = per_cpu_ptr(ca->cpustat, cpu)->cpustat; |
| |
| /* Don't allow to reset global kernel_cpustat */ |
| if (ca == &root_cpuacct) |
| return; |
| |
| #ifndef CONFIG_64BIT |
| /* |
| * Take rq->lock to make 64-bit write safe on 32-bit platforms. |
| */ |
| raw_spin_rq_lock_irq(cpu_rq(cpu)); |
| #endif |
| *cpuusage = 0; |
| cpustat[CPUTIME_USER] = cpustat[CPUTIME_NICE] = 0; |
| cpustat[CPUTIME_SYSTEM] = cpustat[CPUTIME_IRQ] = 0; |
| cpustat[CPUTIME_SOFTIRQ] = 0; |
| |
| #ifndef CONFIG_64BIT |
| raw_spin_rq_unlock_irq(cpu_rq(cpu)); |
| #endif |
| } |
| |
| /* Return total CPU usage (in nanoseconds) of a group */ |
| static u64 __cpuusage_read(struct cgroup_subsys_state *css, |
| enum cpuacct_stat_index index) |
| { |
| struct cpuacct *ca = css_ca(css); |
| u64 totalcpuusage = 0; |
| int i; |
| |
| for_each_possible_cpu(i) |
| totalcpuusage += cpuacct_cpuusage_read(ca, i, index); |
| |
| return totalcpuusage; |
| } |
| |
| static u64 cpuusage_user_read(struct cgroup_subsys_state *css, |
| struct cftype *cft) |
| { |
| return __cpuusage_read(css, CPUACCT_STAT_USER); |
| } |
| |
| static u64 cpuusage_sys_read(struct cgroup_subsys_state *css, |
| struct cftype *cft) |
| { |
| return __cpuusage_read(css, CPUACCT_STAT_SYSTEM); |
| } |
| |
| static u64 cpuusage_read(struct cgroup_subsys_state *css, struct cftype *cft) |
| { |
| return __cpuusage_read(css, CPUACCT_STAT_NSTATS); |
| } |
| |
| static int cpuusage_write(struct cgroup_subsys_state *css, struct cftype *cft, |
| u64 val) |
| { |
| struct cpuacct *ca = css_ca(css); |
| int cpu; |
| |
| /* |
| * Only allow '0' here to do a reset. |
| */ |
| if (val) |
| return -EINVAL; |
| |
| for_each_possible_cpu(cpu) |
| cpuacct_cpuusage_write(ca, cpu); |
| |
| return 0; |
| } |
| |
| static int __cpuacct_percpu_seq_show(struct seq_file *m, |
| enum cpuacct_stat_index index) |
| { |
| struct cpuacct *ca = css_ca(seq_css(m)); |
| u64 percpu; |
| int i; |
| |
| for_each_possible_cpu(i) { |
| percpu = cpuacct_cpuusage_read(ca, i, index); |
| seq_printf(m, "%llu ", (unsigned long long) percpu); |
| } |
| seq_printf(m, "\n"); |
| return 0; |
| } |
| |
| static int cpuacct_percpu_user_seq_show(struct seq_file *m, void *V) |
| { |
| return __cpuacct_percpu_seq_show(m, CPUACCT_STAT_USER); |
| } |
| |
| static int cpuacct_percpu_sys_seq_show(struct seq_file *m, void *V) |
| { |
| return __cpuacct_percpu_seq_show(m, CPUACCT_STAT_SYSTEM); |
| } |
| |
| static int cpuacct_percpu_seq_show(struct seq_file *m, void *V) |
| { |
| return __cpuacct_percpu_seq_show(m, CPUACCT_STAT_NSTATS); |
| } |
| |
| static int cpuacct_all_seq_show(struct seq_file *m, void *V) |
| { |
| struct cpuacct *ca = css_ca(seq_css(m)); |
| int index; |
| int cpu; |
| |
| seq_puts(m, "cpu"); |
| for (index = 0; index < CPUACCT_STAT_NSTATS; index++) |
| seq_printf(m, " %s", cpuacct_stat_desc[index]); |
| seq_puts(m, "\n"); |
| |
| for_each_possible_cpu(cpu) { |
| seq_printf(m, "%d", cpu); |
| for (index = 0; index < CPUACCT_STAT_NSTATS; index++) |
| seq_printf(m, " %llu", |
| cpuacct_cpuusage_read(ca, cpu, index)); |
| seq_puts(m, "\n"); |
| } |
| return 0; |
| } |
| |
| static int cpuacct_stats_show(struct seq_file *sf, void *v) |
| { |
| struct cpuacct *ca = css_ca(seq_css(sf)); |
| struct task_cputime cputime; |
| u64 val[CPUACCT_STAT_NSTATS]; |
| int cpu; |
| int stat; |
| |
| memset(&cputime, 0, sizeof(cputime)); |
| for_each_possible_cpu(cpu) { |
| u64 *cpustat = per_cpu_ptr(ca->cpustat, cpu)->cpustat; |
| |
| cputime.utime += cpustat[CPUTIME_USER]; |
| cputime.utime += cpustat[CPUTIME_NICE]; |
| cputime.stime += cpustat[CPUTIME_SYSTEM]; |
| cputime.stime += cpustat[CPUTIME_IRQ]; |
| cputime.stime += cpustat[CPUTIME_SOFTIRQ]; |
| |
| cputime.sum_exec_runtime += *per_cpu_ptr(ca->cpuusage, cpu); |
| } |
| |
| cputime_adjust(&cputime, &seq_css(sf)->cgroup->prev_cputime, |
| &val[CPUACCT_STAT_USER], &val[CPUACCT_STAT_SYSTEM]); |
| |
| for (stat = 0; stat < CPUACCT_STAT_NSTATS; stat++) { |
| seq_printf(sf, "%s %llu\n", cpuacct_stat_desc[stat], |
| nsec_to_clock_t(val[stat])); |
| } |
| |
| return 0; |
| } |
| |
| static struct cftype files[] = { |
| { |
| .name = "usage", |
| .read_u64 = cpuusage_read, |
| .write_u64 = cpuusage_write, |
| }, |
| { |
| .name = "usage_user", |
| .read_u64 = cpuusage_user_read, |
| }, |
| { |
| .name = "usage_sys", |
| .read_u64 = cpuusage_sys_read, |
| }, |
| { |
| .name = "usage_percpu", |
| .seq_show = cpuacct_percpu_seq_show, |
| }, |
| { |
| .name = "usage_percpu_user", |
| .seq_show = cpuacct_percpu_user_seq_show, |
| }, |
| { |
| .name = "usage_percpu_sys", |
| .seq_show = cpuacct_percpu_sys_seq_show, |
| }, |
| { |
| .name = "usage_all", |
| .seq_show = cpuacct_all_seq_show, |
| }, |
| { |
| .name = "stat", |
| .seq_show = cpuacct_stats_show, |
| }, |
| { } /* terminate */ |
| }; |
| |
| /* |
| * charge this task's execution time to its accounting group. |
| * |
| * called with rq->lock held. |
| */ |
| void cpuacct_charge(struct task_struct *tsk, u64 cputime) |
| { |
| unsigned int cpu = task_cpu(tsk); |
| struct cpuacct *ca; |
| |
| lockdep_assert_rq_held(cpu_rq(cpu)); |
| |
| for (ca = task_ca(tsk); ca; ca = parent_ca(ca)) |
| *per_cpu_ptr(ca->cpuusage, cpu) += cputime; |
| } |
| |
| /* |
| * Add user/system time to cpuacct. |
| * |
| * Note: it's the caller that updates the account of the root cgroup. |
| */ |
| void cpuacct_account_field(struct task_struct *tsk, int index, u64 val) |
| { |
| struct cpuacct *ca; |
| |
| for (ca = task_ca(tsk); ca != &root_cpuacct; ca = parent_ca(ca)) |
| __this_cpu_add(ca->cpustat->cpustat[index], val); |
| } |
| |
| struct cgroup_subsys cpuacct_cgrp_subsys = { |
| .css_alloc = cpuacct_css_alloc, |
| .css_free = cpuacct_css_free, |
| .legacy_cftypes = files, |
| .early_init = true, |
| }; |