timers: fix itimer/many thread hang, v2
This is the second resubmission of the posix timer rework patch, posted
a few days ago.
This includes the changes from the previous resubmittion, which addressed
Oleg Nesterov's comments, removing the RCU stuff from the patch and
un-inlining the thread_group_cputime() function for SMP.
In addition, per Ingo Molnar it simplifies the UP code, consolidating much
of it with the SMP version and depending on lower-level SMP/UP handling to
take care of the differences.
It also cleans up some UP compile errors, moves the scheduler stats-related
macros into kernel/sched_stats.h, cleans up a merge error in
kernel/fork.c and has a few other minor fixes and cleanups as suggested
by Oleg and Ingo. Thanks for the review, guys.
Signed-off-by: Frank Mayhar <fmayhar@google.com>
Cc: Roland McGrath <roland@redhat.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/kernel/sched_stats.h b/kernel/sched_stats.h
index 8385d43..d6903bd 100644
--- a/kernel/sched_stats.h
+++ b/kernel/sched_stats.h
@@ -270,3 +270,139 @@
#define sched_info_switch(t, next) do { } while (0)
#endif /* CONFIG_SCHEDSTATS || CONFIG_TASK_DELAY_ACCT */
+/*
+ * The following are functions that support scheduler-internal time accounting.
+ * These functions are generally called at the timer tick. None of this depends
+ * on CONFIG_SCHEDSTATS.
+ */
+
+#ifdef CONFIG_SMP
+
+/**
+ * thread_group_cputime_account_user - Maintain utime for a thread group.
+ *
+ * @tgtimes: Pointer to thread_group_cputime structure.
+ * @cputime: Time value by which to increment the utime field of that
+ * structure.
+ *
+ * If thread group time is being maintained, get the structure for the
+ * running CPU and update the utime field there.
+ */
+static inline void thread_group_cputime_account_user(
+ struct thread_group_cputime *tgtimes,
+ cputime_t cputime)
+{
+ if (tgtimes->totals) {
+ struct task_cputime *times;
+
+ times = per_cpu_ptr(tgtimes->totals, get_cpu());
+ times->utime = cputime_add(times->utime, cputime);
+ put_cpu_no_resched();
+ }
+}
+
+/**
+ * thread_group_cputime_account_system - Maintain stime for a thread group.
+ *
+ * @tgtimes: Pointer to thread_group_cputime structure.
+ * @cputime: Time value by which to increment the stime field of that
+ * structure.
+ *
+ * If thread group time is being maintained, get the structure for the
+ * running CPU and update the stime field there.
+ */
+static inline void thread_group_cputime_account_system(
+ struct thread_group_cputime *tgtimes,
+ cputime_t cputime)
+{
+ if (tgtimes->totals) {
+ struct task_cputime *times;
+
+ times = per_cpu_ptr(tgtimes->totals, get_cpu());
+ times->stime = cputime_add(times->stime, cputime);
+ put_cpu_no_resched();
+ }
+}
+
+/**
+ * thread_group_cputime_account_exec_runtime - Maintain exec runtime for a
+ * thread group.
+ *
+ * @tgtimes: Pointer to thread_group_cputime structure.
+ * @ns: Time value by which to increment the sum_exec_runtime field
+ * of that structure.
+ *
+ * If thread group time is being maintained, get the structure for the
+ * running CPU and update the sum_exec_runtime field there.
+ */
+static inline void thread_group_cputime_account_exec_runtime(
+ struct thread_group_cputime *tgtimes,
+ unsigned long long ns)
+{
+ if (tgtimes->totals) {
+ struct task_cputime *times;
+
+ times = per_cpu_ptr(tgtimes->totals, get_cpu());
+ times->sum_exec_runtime += ns;
+ put_cpu_no_resched();
+ }
+}
+
+#else /* CONFIG_SMP */
+
+static inline void thread_group_cputime_account_user(
+ struct thread_group_cputime *tgtimes,
+ cputime_t cputime)
+{
+ tgtimes->totals->utime = cputime_add(tgtimes->totals->utime, cputime);
+}
+
+static inline void thread_group_cputime_account_system(
+ struct thread_group_cputime *tgtimes,
+ cputime_t cputime)
+{
+ tgtimes->totals->stime = cputime_add(tgtimes->totals->stime, cputime);
+}
+
+static inline void thread_group_cputime_account_exec_runtime(
+ struct thread_group_cputime *tgtimes,
+ unsigned long long ns)
+{
+ tgtimes->totals->sum_exec_runtime += ns;
+}
+
+#endif /* CONFIG_SMP */
+
+/*
+ * These are the generic time-accounting routines that use the above
+ * functions. They are the functions actually called by the scheduler.
+ */
+static inline void account_group_user_time(struct task_struct *tsk,
+ cputime_t cputime)
+{
+ struct signal_struct *sig;
+
+ sig = tsk->signal;
+ if (likely(sig))
+ thread_group_cputime_account_user(&sig->cputime, cputime);
+}
+
+static inline void account_group_system_time(struct task_struct *tsk,
+ cputime_t cputime)
+{
+ struct signal_struct *sig;
+
+ sig = tsk->signal;
+ if (likely(sig))
+ thread_group_cputime_account_system(&sig->cputime, cputime);
+}
+
+static inline void account_group_exec_runtime(struct task_struct *tsk,
+ unsigned long long ns)
+{
+ struct signal_struct *sig;
+
+ sig = tsk->signal;
+ if (likely(sig))
+ thread_group_cputime_account_exec_runtime(&sig->cputime, ns);
+}