sched: Add NEED_RESCHED to the preempt_count

In order to combine the preemption and need_resched test we need to
fold the need_resched information into the preempt_count value.

Since the NEED_RESCHED flag is set across CPUs this needs to be an
atomic operation, however we very much want to avoid making
preempt_count atomic, therefore we keep the existing TIF_NEED_RESCHED
infrastructure in place but at 3 sites test it and fold its value into
preempt_count; namely:

 - resched_task() when setting TIF_NEED_RESCHED on the current task
 - scheduler_ipi() when resched_task() sets TIF_NEED_RESCHED on a
                   remote task it follows it up with a reschedule IPI
                   and we can modify the cpu local preempt_count from
                   there.
 - cpu_idle_loop() for when resched_task() found tsk_is_polling().

We use an inverted bitmask to indicate need_resched so that a 0 means
both need_resched and !atomic.

Also remove the barrier() in preempt_enable() between
preempt_enable_no_resched() and preempt_check_resched() to avoid
having to reload the preemption value and allow the compiler to use
the flags of the previuos decrement. I couldn't come up with any sane
reason for this barrier() to be there as preempt_enable_no_resched()
already has a barrier() before doing the decrement.

Suggested-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/n/tip-7a7m5qqbn5pmwnd4wko9u6da@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index fe89afa..ee61f5a 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -525,8 +525,10 @@
 	set_tsk_need_resched(p);
 
 	cpu = task_cpu(p);
-	if (cpu == smp_processor_id())
+	if (cpu == smp_processor_id()) {
+		set_preempt_need_resched();
 		return;
+	}
 
 	/* NEED_RESCHED must be visible before we test polling */
 	smp_mb();
@@ -1391,6 +1393,14 @@
 
 void scheduler_ipi(void)
 {
+	/*
+	 * Fold TIF_NEED_RESCHED into the preempt_count; anybody setting
+	 * TIF_NEED_RESCHED remotely (for the first time) will also send
+	 * this IPI.
+	 */
+	if (tif_need_resched())
+		set_preempt_need_resched();
+
 	if (llist_empty(&this_rq()->wake_list)
 			&& !tick_nohz_full_cpu(smp_processor_id())
 			&& !got_nohz_idle_kick())
@@ -1714,7 +1724,7 @@
 #endif
 #ifdef CONFIG_PREEMPT_COUNT
 	/* Want to start with kernel preemption disabled. */
-	task_thread_info(p)->preempt_count = 1;
+	task_thread_info(p)->preempt_count = PREEMPT_DISABLED;
 #endif
 #ifdef CONFIG_SMP
 	plist_node_init(&p->pushable_tasks, MAX_PRIO);
@@ -2425,6 +2435,7 @@
 	put_prev_task(rq, prev);
 	next = pick_next_task(rq);
 	clear_tsk_need_resched(prev);
+	clear_preempt_need_resched();
 	rq->skip_clock_update = 0;
 
 	if (likely(prev != next)) {
@@ -2536,11 +2547,10 @@
  */
 asmlinkage void __sched preempt_schedule_irq(void)
 {
-	struct thread_info *ti = current_thread_info();
 	enum ctx_state prev_state;
 
 	/* Catch callers which need to be fixed */
-	BUG_ON(ti->preempt_count || !irqs_disabled());
+	BUG_ON(preempt_count() || !irqs_disabled());
 
 	prev_state = exception_enter();
 
@@ -4207,7 +4217,7 @@
 	raw_spin_unlock_irqrestore(&rq->lock, flags);
 
 	/* Set the preempt count _outside_ the spinlocks! */
-	task_thread_info(idle)->preempt_count = 0;
+	task_thread_info(idle)->preempt_count = PREEMPT_ENABLED;
 
 	/*
 	 * The idle tasks have their own, simple scheduling class: