x86: fix x86_32 stack protector bugs

Impact: fix x86_32 stack protector

Brian Gerst found out that %gs was being initialized to stack_canary
instead of stack_canary - 20, which basically gave the same canary
value for all threads.  Fixing this also exposed the following bugs.

* cpu_idle() didn't call boot_init_stack_canary()

* stack canary switching in switch_to() was being done too late making
  the initial run of a new thread use the old stack canary value.

Fix all of them and while at it update comment in cpu_idle() about
calling boot_init_stack_canary().

Reported-by: Brian Gerst <brgerst@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h
index 2692ee8..7a80f72 100644
--- a/arch/x86/include/asm/system.h
+++ b/arch/x86/include/asm/system.h
@@ -25,13 +25,11 @@
 
 #ifdef CONFIG_CC_STACKPROTECTOR
 #define __switch_canary							\
-	"movl "__percpu_arg([current_task])",%%ebx\n\t"			\
-	"movl %P[task_canary](%%ebx),%%ebx\n\t"				\
-	"movl %%ebx,"__percpu_arg([stack_canary])"\n\t"
+	"movl %P[task_canary](%[next]), %%ebx\n\t"			\
+	"movl %%ebx, "__percpu_arg([stack_canary])"\n\t"
 #define __switch_canary_oparam						\
 	, [stack_canary] "=m" (per_cpu_var(stack_canary))
 #define __switch_canary_iparam						\
-	, [current_task] "m" (per_cpu_var(current_task))		\
 	, [task_canary] "i" (offsetof(struct task_struct, stack_canary))
 #else	/* CC_STACKPROTECTOR */
 #define __switch_canary
@@ -60,9 +58,9 @@
 		     "movl %[next_sp],%%esp\n\t"	/* restore ESP   */ \
 		     "movl $1f,%[prev_ip]\n\t"	/* save    EIP   */	\
 		     "pushl %[next_ip]\n\t"	/* restore EIP   */	\
+		     __switch_canary					\
 		     "jmp __switch_to\n"	/* regparm call  */	\
 		     "1:\t"						\
-		     __switch_canary					\
 		     "popl %%ebp\n\t"		/* restore EBP   */	\
 		     "popfl\n"			/* restore flags */	\
 									\