| /* drivers/misc/uid_sys_stats.c |
| * |
| * Copyright (C) 2014 - 2015 Google, Inc. |
| * |
| * This software is licensed under the terms of the GNU General Public |
| * License version 2, as published by the Free Software Foundation, and |
| * may be copied, distributed, and modified under those terms. |
| * |
| * This program is distributed in the hope that it will be useful, |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| * GNU General Public License for more details. |
| * |
| */ |
| |
| #include <linux/atomic.h> |
| #include <linux/err.h> |
| #include <linux/hashtable.h> |
| #include <linux/init.h> |
| #include <linux/kernel.h> |
| #include <linux/list.h> |
| #include <linux/llist.h> |
| #include <linux/mm.h> |
| #include <linux/proc_fs.h> |
| #include <linux/profile.h> |
| #include <linux/sched/cputime.h> |
| #include <linux/seq_file.h> |
| #include <linux/slab.h> |
| #include <linux/uaccess.h> |
| #include <linux/spinlock_types.h> |
| |
| #define UID_HASH_BITS 10 |
| #define UID_HASH_NUMS (1 << UID_HASH_BITS) |
| DECLARE_HASHTABLE(hash_table, UID_HASH_BITS); |
| /* uid_lock[bkt] ensure consistency of hash_table[bkt] */ |
| spinlock_t uid_lock[UID_HASH_NUMS]; |
| |
| #define for_each_bkt(bkt) \ |
| for (bkt = 0; bkt < HASH_SIZE(hash_table); bkt++) |
| |
| /* iterate over all uid_entrys hashing to the same bkt */ |
| #define for_each_uid_entry(uid_entry, bkt) \ |
| hlist_for_each_entry(uid_entry, &hash_table[bkt], hash) |
| |
| #define for_each_uid_entry_safe(uid_entry, tmp, bkt) \ |
| hlist_for_each_entry_safe(uid_entry, tmp,\ |
| &hash_table[bkt], hash) |
| |
| static struct proc_dir_entry *cpu_parent; |
| static struct proc_dir_entry *io_parent; |
| static struct proc_dir_entry *proc_parent; |
| |
| struct io_stats { |
| u64 read_bytes; |
| u64 write_bytes; |
| u64 rchar; |
| u64 wchar; |
| u64 fsync; |
| }; |
| |
| #define UID_STATE_FOREGROUND 0 |
| #define UID_STATE_BACKGROUND 1 |
| #define UID_STATE_TOTAL_LAST 2 |
| #define UID_STATE_DEAD_TASKS 3 |
| #define UID_STATE_SIZE 4 |
| |
| #define MAX_TASK_COMM_LEN 256 |
| |
| struct task_entry { |
| char comm[MAX_TASK_COMM_LEN]; |
| pid_t pid; |
| struct io_stats io[UID_STATE_SIZE]; |
| struct hlist_node hash; |
| }; |
| |
| struct uid_entry { |
| uid_t uid; |
| u64 utime; |
| u64 stime; |
| int state; |
| struct io_stats io[UID_STATE_SIZE]; |
| struct hlist_node hash; |
| }; |
| |
| static void init_hash_table_and_lock(void) |
| { |
| int i; |
| |
| hash_init(hash_table); |
| for (i = 0; i < UID_HASH_NUMS; i++) |
| spin_lock_init(&uid_lock[i]); |
| } |
| |
| static inline int uid_to_bkt(uid_t uid) |
| { |
| return hash_min(uid, HASH_BITS(hash_table)); |
| } |
| |
| static inline int trylock_uid(uid_t uid) |
| { |
| return spin_trylock(&uid_lock[uid_to_bkt(uid)]); |
| } |
| |
| static inline void lock_uid(uid_t uid) |
| { |
| spin_lock(&uid_lock[uid_to_bkt(uid)]); |
| } |
| |
| static inline void unlock_uid(uid_t uid) |
| { |
| spin_unlock(&uid_lock[uid_to_bkt(uid)]); |
| } |
| |
| static inline void lock_uid_by_bkt(u32 bkt) |
| { |
| spin_lock(&uid_lock[bkt]); |
| } |
| |
| static inline void unlock_uid_by_bkt(u32 bkt) |
| { |
| spin_unlock(&uid_lock[bkt]); |
| } |
| |
| static u64 compute_write_bytes(struct task_io_accounting *ioac) |
| { |
| if (ioac->write_bytes <= ioac->cancelled_write_bytes) |
| return 0; |
| |
| return ioac->write_bytes - ioac->cancelled_write_bytes; |
| } |
| |
| static void compute_io_bucket_stats(struct io_stats *io_bucket, |
| struct io_stats *io_curr, |
| struct io_stats *io_last, |
| struct io_stats *io_dead) |
| { |
| /* tasks could switch to another uid group, but its io_last in the |
| * previous uid group could still be positive. |
| * therefore before each update, do an overflow check first |
| */ |
| int64_t delta; |
| |
| delta = io_curr->read_bytes + io_dead->read_bytes - |
| io_last->read_bytes; |
| io_bucket->read_bytes += delta > 0 ? delta : 0; |
| delta = io_curr->write_bytes + io_dead->write_bytes - |
| io_last->write_bytes; |
| io_bucket->write_bytes += delta > 0 ? delta : 0; |
| delta = io_curr->rchar + io_dead->rchar - io_last->rchar; |
| io_bucket->rchar += delta > 0 ? delta : 0; |
| delta = io_curr->wchar + io_dead->wchar - io_last->wchar; |
| io_bucket->wchar += delta > 0 ? delta : 0; |
| delta = io_curr->fsync + io_dead->fsync - io_last->fsync; |
| io_bucket->fsync += delta > 0 ? delta : 0; |
| |
| io_last->read_bytes = io_curr->read_bytes; |
| io_last->write_bytes = io_curr->write_bytes; |
| io_last->rchar = io_curr->rchar; |
| io_last->wchar = io_curr->wchar; |
| io_last->fsync = io_curr->fsync; |
| |
| memset(io_dead, 0, sizeof(struct io_stats)); |
| } |
| |
| static struct uid_entry *find_uid_entry(uid_t uid) |
| { |
| struct uid_entry *uid_entry; |
| u32 bkt = uid_to_bkt(uid); |
| |
| for_each_uid_entry(uid_entry, bkt) { |
| if (uid_entry->uid == uid) |
| return uid_entry; |
| } |
| return NULL; |
| } |
| |
| static struct uid_entry *find_or_register_uid(uid_t uid) |
| { |
| struct uid_entry *uid_entry; |
| |
| uid_entry = find_uid_entry(uid); |
| if (uid_entry) |
| return uid_entry; |
| |
| uid_entry = kzalloc(sizeof(struct uid_entry), GFP_ATOMIC); |
| if (!uid_entry) |
| return NULL; |
| |
| uid_entry->uid = uid; |
| hash_add(hash_table, &uid_entry->hash, uid); |
| |
| return uid_entry; |
| } |
| |
| static void calc_uid_cputime(struct uid_entry *uid_entry, |
| u64 *total_utime, u64 *total_stime) |
| { |
| struct user_namespace *user_ns = current_user_ns(); |
| struct task_struct *p, *t; |
| u64 utime, stime; |
| uid_t uid; |
| |
| rcu_read_lock(); |
| for_each_process(p) { |
| uid = from_kuid_munged(user_ns, task_uid(p)); |
| |
| if (uid != uid_entry->uid) |
| continue; |
| |
| for_each_thread(p, t) { |
| /* avoid double accounting of dying threads */ |
| if (!(t->flags & PF_EXITING)) { |
| task_cputime_adjusted(t, &utime, &stime); |
| *total_utime += utime; |
| *total_stime += stime; |
| } |
| } |
| } |
| rcu_read_unlock(); |
| } |
| |
| static int uid_cputime_show(struct seq_file *m, void *v) |
| { |
| struct uid_entry *uid_entry = NULL; |
| u32 bkt; |
| |
| for_each_bkt(bkt) { |
| lock_uid_by_bkt(bkt); |
| for_each_uid_entry(uid_entry, bkt) { |
| u64 total_utime = uid_entry->utime; |
| u64 total_stime = uid_entry->stime; |
| |
| calc_uid_cputime(uid_entry, &total_utime, &total_stime); |
| seq_printf(m, "%d: %llu %llu\n", uid_entry->uid, |
| ktime_to_us(total_utime), ktime_to_us(total_stime)); |
| } |
| unlock_uid_by_bkt(bkt); |
| } |
| |
| return 0; |
| } |
| |
| static int uid_cputime_open(struct inode *inode, struct file *file) |
| { |
| return single_open(file, uid_cputime_show, pde_data(inode)); |
| } |
| |
| static const struct proc_ops uid_cputime_fops = { |
| .proc_open = uid_cputime_open, |
| .proc_read = seq_read, |
| .proc_lseek = seq_lseek, |
| .proc_release = single_release, |
| }; |
| |
| static int uid_remove_open(struct inode *inode, struct file *file) |
| { |
| return single_open(file, NULL, NULL); |
| } |
| |
| static ssize_t uid_remove_write(struct file *file, |
| const char __user *buffer, size_t count, loff_t *ppos) |
| { |
| char uids[128]; |
| char *start_uid, *end_uid = NULL; |
| long int uid_start = 0, uid_end = 0; |
| |
| if (count >= sizeof(uids)) |
| count = sizeof(uids) - 1; |
| |
| if (copy_from_user(uids, buffer, count)) |
| return -EFAULT; |
| |
| uids[count] = '\0'; |
| end_uid = uids; |
| start_uid = strsep(&end_uid, "-"); |
| |
| if (!start_uid || !end_uid) |
| return -EINVAL; |
| |
| if (kstrtol(start_uid, 10, &uid_start) != 0 || |
| kstrtol(end_uid, 10, &uid_end) != 0) { |
| return -EINVAL; |
| } |
| |
| for (; uid_start <= uid_end; uid_start++) { |
| struct uid_entry *uid_entry; |
| struct hlist_node *tmp; |
| u32 bkt = uid_to_bkt((uid_t)uid_start); |
| |
| lock_uid(uid_start); |
| for_each_uid_entry_safe(uid_entry, tmp, bkt) { |
| if (uid_start == uid_entry->uid) { |
| hash_del(&uid_entry->hash); |
| kfree(uid_entry); |
| } |
| } |
| unlock_uid(uid_start); |
| } |
| |
| return count; |
| } |
| |
| static const struct proc_ops uid_remove_fops = { |
| .proc_open = uid_remove_open, |
| .proc_release = single_release, |
| .proc_write = uid_remove_write, |
| }; |
| |
| static void __add_uid_io_stats(struct uid_entry *uid_entry, |
| struct task_io_accounting *ioac, int slot) |
| { |
| struct io_stats *io_slot = &uid_entry->io[slot]; |
| |
| io_slot->read_bytes += ioac->read_bytes; |
| io_slot->write_bytes += compute_write_bytes(ioac); |
| io_slot->rchar += ioac->rchar; |
| io_slot->wchar += ioac->wchar; |
| io_slot->fsync += ioac->syscfs; |
| } |
| |
| static void add_uid_io_stats(struct uid_entry *uid_entry, |
| struct task_struct *task, int slot) |
| { |
| struct task_entry *task_entry __maybe_unused; |
| |
| /* avoid double accounting of dying threads */ |
| if (slot != UID_STATE_DEAD_TASKS && (task->flags & PF_EXITING)) |
| return; |
| |
| __add_uid_io_stats(uid_entry, &task->ioac, slot); |
| } |
| |
| static void update_io_stats_uid(struct uid_entry *uid_entry) |
| { |
| struct user_namespace *user_ns = current_user_ns(); |
| struct task_struct *p, *t; |
| struct io_stats io; |
| |
| memset(&io, 0, sizeof(struct io_stats)); |
| |
| rcu_read_lock(); |
| for_each_process(p) { |
| uid_t uid = from_kuid_munged(user_ns, task_uid(p)); |
| |
| if (uid != uid_entry->uid) |
| continue; |
| |
| for_each_thread(p, t) { |
| /* avoid double accounting of dying threads */ |
| if (!(t->flags & PF_EXITING)) { |
| io.read_bytes += t->ioac.read_bytes; |
| io.write_bytes += compute_write_bytes(&t->ioac); |
| io.rchar += t->ioac.rchar; |
| io.wchar += t->ioac.wchar; |
| io.fsync += t->ioac.syscfs; |
| } |
| } |
| } |
| rcu_read_unlock(); |
| |
| compute_io_bucket_stats(&uid_entry->io[uid_entry->state], &io, |
| &uid_entry->io[UID_STATE_TOTAL_LAST], |
| &uid_entry->io[UID_STATE_DEAD_TASKS]); |
| } |
| |
| static int uid_io_show(struct seq_file *m, void *v) |
| { |
| |
| struct uid_entry *uid_entry = NULL; |
| u32 bkt; |
| |
| for_each_bkt(bkt) { |
| lock_uid_by_bkt(bkt); |
| for_each_uid_entry(uid_entry, bkt) { |
| |
| update_io_stats_uid(uid_entry); |
| |
| seq_printf(m, "%d %llu %llu %llu %llu %llu %llu %llu %llu %llu %llu\n", |
| uid_entry->uid, |
| uid_entry->io[UID_STATE_FOREGROUND].rchar, |
| uid_entry->io[UID_STATE_FOREGROUND].wchar, |
| uid_entry->io[UID_STATE_FOREGROUND].read_bytes, |
| uid_entry->io[UID_STATE_FOREGROUND].write_bytes, |
| uid_entry->io[UID_STATE_BACKGROUND].rchar, |
| uid_entry->io[UID_STATE_BACKGROUND].wchar, |
| uid_entry->io[UID_STATE_BACKGROUND].read_bytes, |
| uid_entry->io[UID_STATE_BACKGROUND].write_bytes, |
| uid_entry->io[UID_STATE_FOREGROUND].fsync, |
| uid_entry->io[UID_STATE_BACKGROUND].fsync); |
| } |
| unlock_uid_by_bkt(bkt); |
| } |
| |
| return 0; |
| } |
| |
| static int uid_io_open(struct inode *inode, struct file *file) |
| { |
| return single_open(file, uid_io_show, pde_data(inode)); |
| } |
| |
| static const struct proc_ops uid_io_fops = { |
| .proc_open = uid_io_open, |
| .proc_read = seq_read, |
| .proc_lseek = seq_lseek, |
| .proc_release = single_release, |
| }; |
| |
| static int uid_procstat_open(struct inode *inode, struct file *file) |
| { |
| return single_open(file, NULL, NULL); |
| } |
| |
| static ssize_t uid_procstat_write(struct file *file, |
| const char __user *buffer, size_t count, loff_t *ppos) |
| { |
| struct uid_entry *uid_entry; |
| uid_t uid; |
| int argc, state; |
| char input[128]; |
| |
| if (count >= sizeof(input)) |
| return -EINVAL; |
| |
| if (copy_from_user(input, buffer, count)) |
| return -EFAULT; |
| |
| input[count] = '\0'; |
| |
| argc = sscanf(input, "%u %d", &uid, &state); |
| if (argc != 2) |
| return -EINVAL; |
| |
| if (state != UID_STATE_BACKGROUND && state != UID_STATE_FOREGROUND) |
| return -EINVAL; |
| |
| lock_uid(uid); |
| uid_entry = find_or_register_uid(uid); |
| if (!uid_entry) { |
| unlock_uid(uid); |
| return -EINVAL; |
| } |
| |
| if (uid_entry->state == state) { |
| unlock_uid(uid); |
| return count; |
| } |
| |
| update_io_stats_uid(uid_entry); |
| uid_entry->state = state; |
| unlock_uid(uid); |
| |
| return count; |
| } |
| |
| static const struct proc_ops uid_procstat_fops = { |
| .proc_open = uid_procstat_open, |
| .proc_release = single_release, |
| .proc_write = uid_procstat_write, |
| }; |
| |
| struct update_stats_work { |
| uid_t uid; |
| struct task_io_accounting ioac; |
| u64 utime; |
| u64 stime; |
| struct llist_node node; |
| }; |
| |
| static LLIST_HEAD(work_usw); |
| |
| static void update_stats_workfn(struct work_struct *work) |
| { |
| struct update_stats_work *usw, *t; |
| struct uid_entry *uid_entry; |
| struct task_entry *task_entry __maybe_unused; |
| struct llist_node *node; |
| |
| node = llist_del_all(&work_usw); |
| llist_for_each_entry_safe(usw, t, node, node) { |
| lock_uid(usw->uid); |
| uid_entry = find_uid_entry(usw->uid); |
| if (!uid_entry) |
| goto next; |
| |
| uid_entry->utime += usw->utime; |
| uid_entry->stime += usw->stime; |
| |
| __add_uid_io_stats(uid_entry, &usw->ioac, UID_STATE_DEAD_TASKS); |
| next: |
| unlock_uid(usw->uid); |
| kfree(usw); |
| } |
| |
| } |
| static DECLARE_WORK(update_stats_work, update_stats_workfn); |
| |
| static int process_notifier(struct notifier_block *self, |
| unsigned long cmd, void *v) |
| { |
| struct task_struct *task = v; |
| struct uid_entry *uid_entry; |
| u64 utime, stime; |
| uid_t uid; |
| |
| if (!task) |
| return NOTIFY_OK; |
| |
| uid = from_kuid_munged(current_user_ns(), task_uid(task)); |
| if (!trylock_uid(uid)) { |
| struct update_stats_work *usw; |
| |
| usw = kmalloc(sizeof(struct update_stats_work), GFP_KERNEL); |
| if (usw) { |
| usw->uid = uid; |
| /* |
| * Copy task->ioac since task might be destroyed before |
| * the work is later performed. |
| */ |
| usw->ioac = task->ioac; |
| task_cputime_adjusted(task, &usw->utime, &usw->stime); |
| llist_add(&usw->node, &work_usw); |
| schedule_work(&update_stats_work); |
| } |
| return NOTIFY_OK; |
| } |
| |
| uid_entry = find_or_register_uid(uid); |
| if (!uid_entry) { |
| pr_err("%s: failed to find uid %d\n", __func__, uid); |
| goto exit; |
| } |
| |
| task_cputime_adjusted(task, &utime, &stime); |
| uid_entry->utime += utime; |
| uid_entry->stime += stime; |
| |
| add_uid_io_stats(uid_entry, task, UID_STATE_DEAD_TASKS); |
| |
| exit: |
| unlock_uid(uid); |
| return NOTIFY_OK; |
| } |
| |
| static struct notifier_block process_notifier_block = { |
| .notifier_call = process_notifier, |
| }; |
| |
| static int __init proc_uid_sys_stats_init(void) |
| { |
| init_hash_table_and_lock(); |
| |
| cpu_parent = proc_mkdir("uid_cputime", NULL); |
| if (!cpu_parent) { |
| pr_err("%s: failed to create uid_cputime proc entry\n", |
| __func__); |
| goto err; |
| } |
| |
| proc_create_data("remove_uid_range", 0222, cpu_parent, |
| &uid_remove_fops, NULL); |
| proc_create_data("show_uid_stat", 0444, cpu_parent, |
| &uid_cputime_fops, NULL); |
| |
| io_parent = proc_mkdir("uid_io", NULL); |
| if (!io_parent) { |
| pr_err("%s: failed to create uid_io proc entry\n", |
| __func__); |
| goto err; |
| } |
| |
| proc_create_data("stats", 0444, io_parent, |
| &uid_io_fops, NULL); |
| |
| proc_parent = proc_mkdir("uid_procstat", NULL); |
| if (!proc_parent) { |
| pr_err("%s: failed to create uid_procstat proc entry\n", |
| __func__); |
| goto err; |
| } |
| |
| proc_create_data("set", 0222, proc_parent, |
| &uid_procstat_fops, NULL); |
| |
| profile_event_register(PROFILE_TASK_EXIT, &process_notifier_block); |
| |
| return 0; |
| |
| err: |
| remove_proc_subtree("uid_cputime", NULL); |
| remove_proc_subtree("uid_io", NULL); |
| remove_proc_subtree("uid_procstat", NULL); |
| return -ENOMEM; |
| } |
| |
| early_initcall(proc_uid_sys_stats_init); |