[PATCH] MTRR suspend/resume cleanup

There has been some discuss about solving the SMP MTRR suspend/resume
breakage, but I didn't find a patch for it.  This is an intent for it.  The
basic idea is moving mtrr initializing into cpu_identify for all APs (so it
works for cpu hotplug).  For BP, restore_processor_state is responsible for
restoring MTRR.

Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Acked-by: Andi Kleen <ak@muc.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c
index 2203a9d..4553ffd 100644
--- a/arch/i386/kernel/cpu/common.c
+++ b/arch/i386/kernel/cpu/common.c
@@ -435,6 +435,11 @@
 	if (c == &boot_cpu_data)
 		sysenter_setup();
 	enable_sep_cpu();
+
+	if (c == &boot_cpu_data)
+		mtrr_bp_init();
+	else
+		mtrr_ap_init();
 }
 
 #ifdef CONFIG_X86_HT
diff --git a/arch/i386/kernel/cpu/mtrr/generic.c b/arch/i386/kernel/cpu/mtrr/generic.c
index 64d91f7..169ac8e 100644
--- a/arch/i386/kernel/cpu/mtrr/generic.c
+++ b/arch/i386/kernel/cpu/mtrr/generic.c
@@ -67,13 +67,6 @@
 	mtrr_state.enabled = (lo & 0xc00) >> 10;
 }
 
-/*  Free resources associated with a struct mtrr_state  */
-void __init finalize_mtrr_state(void)
-{
-	kfree(mtrr_state.var_ranges);
-	mtrr_state.var_ranges = NULL;
-}
-
 /*  Some BIOS's are fucked and don't set all MTRRs the same!  */
 void __init mtrr_state_warn(void)
 {
@@ -334,6 +327,9 @@
 */
 {
 	unsigned long flags;
+	struct mtrr_var_range *vr;
+
+	vr = &mtrr_state.var_ranges[reg];
 
 	local_irq_save(flags);
 	prepare_set();
@@ -342,11 +338,15 @@
 		/* The invalid bit is kept in the mask, so we simply clear the
 		   relevant mask register to disable a range. */
 		mtrr_wrmsr(MTRRphysMask_MSR(reg), 0, 0);
+		memset(vr, 0, sizeof(struct mtrr_var_range));
 	} else {
-		mtrr_wrmsr(MTRRphysBase_MSR(reg), base << PAGE_SHIFT | type,
-		      (base & size_and_mask) >> (32 - PAGE_SHIFT));
-		mtrr_wrmsr(MTRRphysMask_MSR(reg), -size << PAGE_SHIFT | 0x800,
-		      (-size & size_and_mask) >> (32 - PAGE_SHIFT));
+		vr->base_lo = base << PAGE_SHIFT | type;
+		vr->base_hi = (base & size_and_mask) >> (32 - PAGE_SHIFT);
+		vr->mask_lo = -size << PAGE_SHIFT | 0x800;
+		vr->mask_hi = (-size & size_and_mask) >> (32 - PAGE_SHIFT);
+
+		mtrr_wrmsr(MTRRphysBase_MSR(reg), vr->base_lo, vr->base_hi);
+		mtrr_wrmsr(MTRRphysMask_MSR(reg), vr->mask_lo, vr->mask_hi);
 	}
 
 	post_set();
diff --git a/arch/i386/kernel/cpu/mtrr/main.c b/arch/i386/kernel/cpu/mtrr/main.c
index d66b09e..764cac6 100644
--- a/arch/i386/kernel/cpu/mtrr/main.c
+++ b/arch/i386/kernel/cpu/mtrr/main.c
@@ -332,6 +332,8 @@
 
 	error = -EINVAL;
 
+	/* No CPU hotplug when we change MTRR entries */
+	lock_cpu_hotplug();
 	/*  Search for existing MTRR  */
 	down(&main_lock);
 	for (i = 0; i < num_var_ranges; ++i) {
@@ -372,6 +374,7 @@
 	error = i;
  out:
 	up(&main_lock);
+	unlock_cpu_hotplug();
 	return error;
 }
 
@@ -461,6 +464,8 @@
 		return -ENXIO;
 
 	max = num_var_ranges;
+	/* No CPU hotplug when we change MTRR entries */
+	lock_cpu_hotplug();
 	down(&main_lock);
 	if (reg < 0) {
 		/*  Search for existing MTRR  */
@@ -501,6 +506,7 @@
 	error = reg;
  out:
 	up(&main_lock);
+	unlock_cpu_hotplug();
 	return error;
 }
 /**
@@ -544,21 +550,9 @@
 	centaur_init_mtrr();
 }
 
-static void __init init_other_cpus(void)
-{
-	if (use_intel())
-		get_mtrr_state();
-
-	/* bring up the other processors */
-	set_mtrr(~0U,0,0,0);
-
-	if (use_intel()) {
-		finalize_mtrr_state();
-		mtrr_state_warn();
-	}
-}
-
-
+/* The suspend/resume methods are only for CPU without MTRR. CPU using generic
+ * MTRR driver doesn't require this
+ */
 struct mtrr_value {
 	mtrr_type	ltype;
 	unsigned long	lbase;
@@ -611,13 +605,13 @@
 
 
 /**
- * mtrr_init - initialize mtrrs on the boot CPU
+ * mtrr_bp_init - initialize mtrrs on the boot CPU
  *
  * This needs to be called early; before any of the other CPUs are 
  * initialized (i.e. before smp_init()).
  * 
  */
-static int __init mtrr_init(void)
+void __init mtrr_bp_init(void)
 {
 	init_ifs();
 
@@ -674,12 +668,48 @@
 	if (mtrr_if) {
 		set_num_var_ranges();
 		init_table();
-		init_other_cpus();
-
-		return sysdev_driver_register(&cpu_sysdev_class,
-					      &mtrr_sysdev_driver);
+		if (use_intel())
+			get_mtrr_state();
 	}
-	return -ENXIO;
 }
 
-subsys_initcall(mtrr_init);
+void mtrr_ap_init(void)
+{
+	unsigned long flags;
+
+	if (!mtrr_if || !use_intel())
+		return;
+	/*
+	 * Ideally we should hold main_lock here to avoid mtrr entries changed,
+	 * but this routine will be called in cpu boot time, holding the lock
+	 * breaks it. This routine is called in two cases: 1.very earily time
+	 * of software resume, when there absolutely isn't mtrr entry changes;
+	 * 2.cpu hotadd time. We let mtrr_add/del_page hold cpuhotplug lock to
+	 * prevent mtrr entry changes
+	 */
+	local_irq_save(flags);
+
+	mtrr_if->set_all();
+
+	local_irq_restore(flags);
+}
+
+static int __init mtrr_init_finialize(void)
+{
+	if (!mtrr_if)
+		return 0;
+	if (use_intel())
+		mtrr_state_warn();
+	else {
+		/* The CPUs haven't MTRR and seemes not support SMP. They have
+		 * specific drivers, we use a tricky method to support
+		 * suspend/resume for them.
+		 * TBD: is there any system with such CPU which supports
+		 * suspend/resume?  if no, we should remove the code.
+		 */
+		sysdev_driver_register(&cpu_sysdev_class,
+			&mtrr_sysdev_driver);
+	}
+	return 0;
+}
+subsys_initcall(mtrr_init_finialize);
diff --git a/arch/i386/kernel/cpu/mtrr/mtrr.h b/arch/i386/kernel/cpu/mtrr/mtrr.h
index de135124..99c9f26 100644
--- a/arch/i386/kernel/cpu/mtrr/mtrr.h
+++ b/arch/i386/kernel/cpu/mtrr/mtrr.h
@@ -91,7 +91,6 @@
 
 extern unsigned int num_var_ranges;
 
-void finalize_mtrr_state(void);
 void mtrr_state_warn(void);
 char *mtrr_attrib_to_str(int x);
 void mtrr_wrmsr(unsigned, unsigned, unsigned);
diff --git a/arch/i386/power/cpu.c b/arch/i386/power/cpu.c
index 0e6b45b6..c547c1a 100644
--- a/arch/i386/power/cpu.c
+++ b/arch/i386/power/cpu.c
@@ -137,6 +137,7 @@
 
 	fix_processor_context();
 	do_fpu_end();
+	mtrr_ap_init();
 }
 
 void restore_processor_state(void)
diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c
index b02d921..5fd0322 100644
--- a/arch/x86_64/kernel/setup.c
+++ b/arch/x86_64/kernel/setup.c
@@ -1076,6 +1076,10 @@
 #ifdef CONFIG_X86_MCE
 	mcheck_init(c);
 #endif
+	if (c == &boot_cpu_data)
+		mtrr_bp_init();
+	else
+		mtrr_ap_init();
 #ifdef CONFIG_NUMA
 	if (c != &boot_cpu_data)
 		numa_add_cpu(c - cpu_data);
diff --git a/arch/x86_64/kernel/suspend.c b/arch/x86_64/kernel/suspend.c
index 6c0f402e..0612640 100644
--- a/arch/x86_64/kernel/suspend.c
+++ b/arch/x86_64/kernel/suspend.c
@@ -119,6 +119,7 @@
 	fix_processor_context();
 
 	do_fpu_end();
+	mtrr_ap_init();
 }
 
 void restore_processor_state(void)
diff --git a/include/asm-i386/processor.h b/include/asm-i386/processor.h
index 6f0f93d..5d06e6b 100644
--- a/include/asm-i386/processor.h
+++ b/include/asm-i386/processor.h
@@ -694,4 +694,12 @@
 extern void enable_sep_cpu(void);
 extern int sysenter_setup(void);
 
+#ifdef CONFIG_MTRR
+extern void mtrr_ap_init(void);
+extern void mtrr_bp_init(void);
+#else
+#define mtrr_ap_init() do {} while (0)
+#define mtrr_bp_init() do {} while (0)
+#endif
+
 #endif /* __ASM_I386_PROCESSOR_H */
diff --git a/include/asm-x86_64/proto.h b/include/asm-x86_64/proto.h
index f2f0736..6c813eb 100644
--- a/include/asm-x86_64/proto.h
+++ b/include/asm-x86_64/proto.h
@@ -15,6 +15,13 @@
 extern void early_idt_handler(void);
 
 extern void mcheck_init(struct cpuinfo_x86 *c);
+#ifdef CONFIG_MTRR
+extern void mtrr_ap_init(void);
+extern void mtrr_bp_init(void);
+#else
+#define mtrr_ap_init() do {} while (0)
+#define mtrr_bp_init() do {} while (0)
+#endif
 extern void init_memory_mapping(unsigned long start, unsigned long end);
 
 extern void system_call(void);