| #include <linux/cgroup.h> |
| #include <linux/slab.h> |
| #include <linux/percpu.h> |
| #include <linux/spinlock.h> |
| #include <linux/cpumask.h> |
| #include <linux/seq_file.h> |
| #include <linux/rcupdate.h> |
| #include <linux/kernel_stat.h> |
| #include <linux/err.h> |
| |
| #include "sched.h" |
| |
| /* |
| * CPU accounting code for task groups. |
| * |
| * Based on the work by Paul Menage (menage@google.com) and Balbir Singh |
| * (balbir@in.ibm.com). |
| */ |
| |
| /* Time spent by the tasks of the cpu accounting group executing in ... */ |
| enum cpuacct_stat_index { |
| CPUACCT_STAT_USER, /* ... user mode */ |
| CPUACCT_STAT_SYSTEM, /* ... kernel mode */ |
| |
| CPUACCT_STAT_NSTATS, |
| }; |
| |
| enum cpuacct_usage_index { |
| CPUACCT_USAGE_USER, /* ... user mode */ |
| CPUACCT_USAGE_SYSTEM, /* ... kernel mode */ |
| |
| CPUACCT_USAGE_NRUSAGE, |
| }; |
| |
| struct cpuacct_usage { |
| u64 usages[CPUACCT_USAGE_NRUSAGE]; |
| }; |
| |
| /* track cpu usage of a group of tasks and its child groups */ |
| struct cpuacct { |
| struct cgroup_subsys_state css; |
| /* cpuusage holds pointer to a u64-type object on every cpu */ |
| struct cpuacct_usage __percpu *cpuusage; |
| struct kernel_cpustat __percpu *cpustat; |
| }; |
| |
| static inline struct cpuacct *css_ca(struct cgroup_subsys_state *css) |
| { |
| return css ? container_of(css, struct cpuacct, css) : NULL; |
| } |
| |
| /* return cpu accounting group to which this task belongs */ |
| static inline struct cpuacct *task_ca(struct task_struct *tsk) |
| { |
| return css_ca(task_css(tsk, cpuacct_cgrp_id)); |
| } |
| |
| static inline struct cpuacct *parent_ca(struct cpuacct *ca) |
| { |
| return css_ca(ca->css.parent); |
| } |
| |
| static DEFINE_PER_CPU(struct cpuacct_usage, root_cpuacct_cpuusage); |
| static struct cpuacct root_cpuacct = { |
| .cpustat = &kernel_cpustat, |
| .cpuusage = &root_cpuacct_cpuusage, |
| }; |
| |
| /* create a new cpu accounting group */ |
| static struct cgroup_subsys_state * |
| cpuacct_css_alloc(struct cgroup_subsys_state *parent_css) |
| { |
| struct cpuacct *ca; |
| |
| if (!parent_css) |
| return &root_cpuacct.css; |
| |
| ca = kzalloc(sizeof(*ca), GFP_KERNEL); |
| if (!ca) |
| goto out; |
| |
| ca->cpuusage = alloc_percpu(struct cpuacct_usage); |
| if (!ca->cpuusage) |
| goto out_free_ca; |
| |
| ca->cpustat = alloc_percpu(struct kernel_cpustat); |
| if (!ca->cpustat) |
| goto out_free_cpuusage; |
| |
| return &ca->css; |
| |
| out_free_cpuusage: |
| free_percpu(ca->cpuusage); |
| out_free_ca: |
| kfree(ca); |
| out: |
| return ERR_PTR(-ENOMEM); |
| } |
| |
| /* destroy an existing cpu accounting group */ |
| static void cpuacct_css_free(struct cgroup_subsys_state *css) |
| { |
| struct cpuacct *ca = css_ca(css); |
| |
| free_percpu(ca->cpustat); |
| free_percpu(ca->cpuusage); |
| kfree(ca); |
| } |
| |
| static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu, |
| enum cpuacct_usage_index index) |
| { |
| struct cpuacct_usage *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); |
| u64 data; |
| |
| /* |
| * We allow index == CPUACCT_USAGE_NRUSAGE here to read |
| * the sum of suages. |
| */ |
| BUG_ON(index > CPUACCT_USAGE_NRUSAGE); |
| |
| #ifndef CONFIG_64BIT |
| /* |
| * Take rq->lock to make 64-bit read safe on 32-bit platforms. |
| */ |
| raw_spin_lock_irq(&cpu_rq(cpu)->lock); |
| #endif |
| |
| if (index == CPUACCT_USAGE_NRUSAGE) { |
| int i = 0; |
| |
| data = 0; |
| for (i = 0; i < CPUACCT_USAGE_NRUSAGE; i++) |
| data += cpuusage->usages[i]; |
| } else { |
| data = cpuusage->usages[index]; |
| } |
| |
| #ifndef CONFIG_64BIT |
| raw_spin_unlock_irq(&cpu_rq(cpu)->lock); |
| #endif |
| |
| return data; |
| } |
| |
| static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu, u64 val) |
| { |
| struct cpuacct_usage *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); |
| int i; |
| |
| #ifndef CONFIG_64BIT |
| /* |
| * Take rq->lock to make 64-bit write safe on 32-bit platforms. |
| */ |
| raw_spin_lock_irq(&cpu_rq(cpu)->lock); |
| #endif |
| |
| for (i = 0; i < CPUACCT_USAGE_NRUSAGE; i++) |
| cpuusage->usages[i] = val; |
| |
| #ifndef CONFIG_64BIT |
| raw_spin_unlock_irq(&cpu_rq(cpu)->lock); |
| #endif |
| } |
| |
| /* return total cpu usage (in nanoseconds) of a group */ |
| static u64 __cpuusage_read(struct cgroup_subsys_state *css, |
| enum cpuacct_usage_index index) |
| { |
| struct cpuacct *ca = css_ca(css); |
| u64 totalcpuusage = 0; |
| int i; |
| |
| for_each_possible_cpu(i) |
| totalcpuusage += cpuacct_cpuusage_read(ca, i, index); |
| |
| return totalcpuusage; |
| } |
| |
| static u64 cpuusage_user_read(struct cgroup_subsys_state *css, |
| struct cftype *cft) |
| { |
| return __cpuusage_read(css, CPUACCT_USAGE_USER); |
| } |
| |
| static u64 cpuusage_sys_read(struct cgroup_subsys_state *css, |
| struct cftype *cft) |
| { |
| return __cpuusage_read(css, CPUACCT_USAGE_SYSTEM); |
| } |
| |
| static u64 cpuusage_read(struct cgroup_subsys_state *css, struct cftype *cft) |
| { |
| return __cpuusage_read(css, CPUACCT_USAGE_NRUSAGE); |
| } |
| |
| static int cpuusage_write(struct cgroup_subsys_state *css, struct cftype *cft, |
| u64 val) |
| { |
| struct cpuacct *ca = css_ca(css); |
| int cpu; |
| |
| /* |
| * Only allow '0' here to do a reset. |
| */ |
| if (val) |
| return -EINVAL; |
| |
| for_each_possible_cpu(cpu) |
| cpuacct_cpuusage_write(ca, cpu, 0); |
| |
| return 0; |
| } |
| |
| static int __cpuacct_percpu_seq_show(struct seq_file *m, |
| enum cpuacct_usage_index index) |
| { |
| struct cpuacct *ca = css_ca(seq_css(m)); |
| u64 percpu; |
| int i; |
| |
| for_each_possible_cpu(i) { |
| percpu = cpuacct_cpuusage_read(ca, i, index); |
| seq_printf(m, "%llu ", (unsigned long long) percpu); |
| } |
| seq_printf(m, "\n"); |
| return 0; |
| } |
| |
| static int cpuacct_percpu_user_seq_show(struct seq_file *m, void *V) |
| { |
| return __cpuacct_percpu_seq_show(m, CPUACCT_USAGE_USER); |
| } |
| |
| static int cpuacct_percpu_sys_seq_show(struct seq_file *m, void *V) |
| { |
| return __cpuacct_percpu_seq_show(m, CPUACCT_USAGE_SYSTEM); |
| } |
| |
| static int cpuacct_percpu_seq_show(struct seq_file *m, void *V) |
| { |
| return __cpuacct_percpu_seq_show(m, CPUACCT_USAGE_NRUSAGE); |
| } |
| |
| static const char * const cpuacct_stat_desc[] = { |
| [CPUACCT_STAT_USER] = "user", |
| [CPUACCT_STAT_SYSTEM] = "system", |
| }; |
| |
| static int cpuacct_stats_show(struct seq_file *sf, void *v) |
| { |
| struct cpuacct *ca = css_ca(seq_css(sf)); |
| int cpu; |
| s64 val = 0; |
| |
| for_each_possible_cpu(cpu) { |
| struct kernel_cpustat *kcpustat = per_cpu_ptr(ca->cpustat, cpu); |
| val += kcpustat->cpustat[CPUTIME_USER]; |
| val += kcpustat->cpustat[CPUTIME_NICE]; |
| } |
| val = cputime64_to_clock_t(val); |
| seq_printf(sf, "%s %lld\n", cpuacct_stat_desc[CPUACCT_STAT_USER], val); |
| |
| val = 0; |
| for_each_possible_cpu(cpu) { |
| struct kernel_cpustat *kcpustat = per_cpu_ptr(ca->cpustat, cpu); |
| val += kcpustat->cpustat[CPUTIME_SYSTEM]; |
| val += kcpustat->cpustat[CPUTIME_IRQ]; |
| val += kcpustat->cpustat[CPUTIME_SOFTIRQ]; |
| } |
| |
| val = cputime64_to_clock_t(val); |
| seq_printf(sf, "%s %lld\n", cpuacct_stat_desc[CPUACCT_STAT_SYSTEM], val); |
| |
| return 0; |
| } |
| |
| static struct cftype files[] = { |
| { |
| .name = "usage", |
| .read_u64 = cpuusage_read, |
| .write_u64 = cpuusage_write, |
| }, |
| { |
| .name = "usage_user", |
| .read_u64 = cpuusage_user_read, |
| }, |
| { |
| .name = "usage_sys", |
| .read_u64 = cpuusage_sys_read, |
| }, |
| { |
| .name = "usage_percpu", |
| .seq_show = cpuacct_percpu_seq_show, |
| }, |
| { |
| .name = "usage_percpu_user", |
| .seq_show = cpuacct_percpu_user_seq_show, |
| }, |
| { |
| .name = "usage_percpu_sys", |
| .seq_show = cpuacct_percpu_sys_seq_show, |
| }, |
| { |
| .name = "stat", |
| .seq_show = cpuacct_stats_show, |
| }, |
| { } /* terminate */ |
| }; |
| |
| /* |
| * charge this task's execution time to its accounting group. |
| * |
| * called with rq->lock held. |
| */ |
| void cpuacct_charge(struct task_struct *tsk, u64 cputime) |
| { |
| struct cpuacct *ca; |
| int index = CPUACCT_USAGE_SYSTEM; |
| struct pt_regs *regs = task_pt_regs(tsk); |
| |
| if (regs && user_mode(regs)) |
| index = CPUACCT_USAGE_USER; |
| |
| rcu_read_lock(); |
| |
| for (ca = task_ca(tsk); ca; ca = parent_ca(ca)) |
| this_cpu_ptr(ca->cpuusage)->usages[index] += cputime; |
| |
| rcu_read_unlock(); |
| } |
| |
| /* |
| * Add user/system time to cpuacct. |
| * |
| * Note: it's the caller that updates the account of the root cgroup. |
| */ |
| void cpuacct_account_field(struct task_struct *tsk, int index, u64 val) |
| { |
| struct cpuacct *ca; |
| |
| rcu_read_lock(); |
| for (ca = task_ca(tsk); ca != &root_cpuacct; ca = parent_ca(ca)) |
| this_cpu_ptr(ca->cpustat)->cpustat[index] += val; |
| rcu_read_unlock(); |
| } |
| |
| struct cgroup_subsys cpuacct_cgrp_subsys = { |
| .css_alloc = cpuacct_css_alloc, |
| .css_free = cpuacct_css_free, |
| .legacy_cftypes = files, |
| .early_init = true, |
| }; |