MIPS: Put PGD in C0_CONTEXT for 64-bit R2 processors.

Processors that support the mips64r2 ISA can in four instructions
convert a shifted PGD pointer stored in the upper bits of c0_context
into a usable pointer.  By doing this we save a memory load and
associated potential cache miss in the TLB exception handlers.

Since the upper bits of c0_context were holding the CPU number, we
move this to the upper bits of c0_xcontext which doesn't have enough
bits to hold the PGD pointer, but has plenty for the CPU number.

Signed-off-by: David Daney <ddaney@caviumnetworks.com>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index f6f3b99..20b223b 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -1427,6 +1427,9 @@
 	bool
 config CPU_SUPPORTS_HUGEPAGES
 	bool
+config MIPS_PGD_C0_CONTEXT
+	bool
+	default y if 64BIT && CPU_MIPSR2
 
 #
 # Set to y for ptrace access to watch registers.
diff --git a/arch/mips/include/asm/mmu_context.h b/arch/mips/include/asm/mmu_context.h
index 6083db5..145bb81 100644
--- a/arch/mips/include/asm/mmu_context.h
+++ b/arch/mips/include/asm/mmu_context.h
@@ -24,6 +24,33 @@
 #endif /* SMTC */
 #include <asm-generic/mm_hooks.h>
 
+#ifdef CONFIG_MIPS_PGD_C0_CONTEXT
+
+#define TLBMISS_HANDLER_SETUP_PGD(pgd)				\
+	tlbmiss_handler_setup_pgd((unsigned long)(pgd))
+
+static inline void tlbmiss_handler_setup_pgd(unsigned long pgd)
+{
+	/* Check for swapper_pg_dir and convert to physical address. */
+	if ((pgd & CKSEG3) == CKSEG0)
+		pgd = CPHYSADDR(pgd);
+	write_c0_context(pgd << 11);
+}
+
+#define TLBMISS_HANDLER_SETUP()						\
+	do {								\
+		TLBMISS_HANDLER_SETUP_PGD(swapper_pg_dir);		\
+		write_c0_xcontext((unsigned long) smp_processor_id() << 51); \
+	} while (0)
+
+
+static inline unsigned long get_current_pgd(void)
+{
+	return PHYS_TO_XKSEG_CACHED((read_c0_context() >> 11) & ~0xfffUL);
+}
+
+#else /* CONFIG_MIPS_PGD_C0_CONTEXT: using  pgd_current*/
+
 /*
  * For the fast tlb miss handlers, we keep a per cpu array of pointers
  * to the current pgd for each processor. Also, the proc. id is stuffed
@@ -46,7 +73,7 @@
 	back_to_back_c0_hazard();					\
 	TLBMISS_HANDLER_SETUP_PGD(swapper_pg_dir)
 #endif
-
+#endif /* CONFIG_MIPS_PGD_C0_CONTEXT*/
 #if defined(CONFIG_CPU_R3000) || defined(CONFIG_CPU_TX39XX)
 
 #define ASID_INC	0x40
diff --git a/arch/mips/include/asm/stackframe.h b/arch/mips/include/asm/stackframe.h
index dd7e220..3b6da33 100644
--- a/arch/mips/include/asm/stackframe.h
+++ b/arch/mips/include/asm/stackframe.h
@@ -87,15 +87,19 @@
 #ifdef CONFIG_SMP
 #ifdef CONFIG_MIPS_MT_SMTC
 #define PTEBASE_SHIFT	19	/* TCBIND */
+#define CPU_ID_REG CP0_TCBIND
+#define CPU_ID_MFC0 mfc0
+#elif defined(CONFIG_MIPS_PGD_C0_CONTEXT)
+#define PTEBASE_SHIFT	48	/* XCONTEXT */
+#define CPU_ID_REG CP0_XCONTEXT
+#define CPU_ID_MFC0 MFC0
 #else
 #define PTEBASE_SHIFT	23	/* CONTEXT */
+#define CPU_ID_REG CP0_CONTEXT
+#define CPU_ID_MFC0 MFC0
 #endif
 		.macro	get_saved_sp	/* SMP variation */
-#ifdef CONFIG_MIPS_MT_SMTC
-		mfc0	k0, CP0_TCBIND
-#else
-		MFC0	k0, CP0_CONTEXT
-#endif
+		CPU_ID_MFC0	k0, CPU_ID_REG
 #if defined(CONFIG_32BIT) || defined(KBUILD_64BIT_SYM32)
 		lui	k1, %hi(kernelsp)
 #else
@@ -111,11 +115,7 @@
 		.endm
 
 		.macro	set_saved_sp stackp temp temp2
-#ifdef CONFIG_MIPS_MT_SMTC
-		mfc0	\temp, CP0_TCBIND
-#else
-		MFC0	\temp, CP0_CONTEXT
-#endif
+		CPU_ID_MFC0	\temp, CPU_ID_REG
 		LONG_SRL	\temp, PTEBASE_SHIFT
 		LONG_S	\stackp, kernelsp(\temp)
 		.endm
diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
index 8d1f4f3..9e8d003 100644
--- a/arch/mips/mm/init.c
+++ b/arch/mips/mm/init.c
@@ -462,7 +462,9 @@
 			__pa_symbol(&__init_end));
 }
 
+#ifndef CONFIG_MIPS_PGD_C0_CONTEXT
 unsigned long pgd_current[NR_CPUS];
+#endif
 /*
  * On 64-bit we've got three-level pagetables with a slightly
  * different layout ...
diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c
index bb1719a..3d0baa4 100644
--- a/arch/mips/mm/tlbex.c
+++ b/arch/mips/mm/tlbex.c
@@ -160,6 +160,12 @@
 static struct uasm_label labels[128] __cpuinitdata;
 static struct uasm_reloc relocs[128] __cpuinitdata;
 
+#ifndef CONFIG_MIPS_PGD_C0_CONTEXT
+/*
+ * CONFIG_MIPS_PGD_C0_CONTEXT implies 64 bit and lack of pgd_current,
+ * we cannot do r3000 under these circumstances.
+ */
+
 /*
  * The R3000 TLB handler is simple.
  */
@@ -199,6 +205,7 @@
 
 	dump_handler((u32 *)ebase, 32);
 }
+#endif /* CONFIG_MIPS_PGD_C0_CONTEXT */
 
 /*
  * The R4000 TLB handler is much more complicated. We have two
@@ -497,8 +504,9 @@
 build_get_pmde64(u32 **p, struct uasm_label **l, struct uasm_reloc **r,
 		 unsigned int tmp, unsigned int ptr)
 {
+#ifndef CONFIG_MIPS_PGD_C0_CONTEXT
 	long pgdc = (long)pgd_current;
-
+#endif
 	/*
 	 * The vmalloc handling is not in the hotpath.
 	 */
@@ -506,7 +514,15 @@
 	uasm_il_bltz(p, r, tmp, label_vmalloc);
 	/* No uasm_i_nop needed here, since the next insn doesn't touch TMP. */
 
-#ifdef CONFIG_SMP
+#ifdef CONFIG_MIPS_PGD_C0_CONTEXT
+	/*
+	 * &pgd << 11 stored in CONTEXT [23..63].
+	 */
+	UASM_i_MFC0(p, ptr, C0_CONTEXT);
+	uasm_i_dins(p, ptr, 0, 0, 23); /* Clear lower 23 bits of context. */
+	uasm_i_ori(p, ptr, ptr, 0x540); /* 1 0  1 0 1  << 6  xkphys cached */
+	uasm_i_drotr(p, ptr, ptr, 11);
+#elif defined(CONFIG_SMP)
 # ifdef  CONFIG_MIPS_MT_SMTC
 	/*
 	 * SMTC uses TCBind value as "CPU" index
@@ -520,7 +536,7 @@
 	 */
 	uasm_i_dmfc0(p, ptr, C0_CONTEXT);
 	uasm_i_dsrl(p, ptr, ptr, 23);
-#endif
+# endif
 	UASM_i_LA_mostly(p, tmp, pgdc);
 	uasm_i_daddu(p, ptr, ptr, tmp);
 	uasm_i_dmfc0(p, tmp, C0_BADVADDR);
@@ -1033,6 +1049,7 @@
 	iPTE_LW(p, pte, ptr);
 }
 
+#ifndef CONFIG_MIPS_PGD_C0_CONTEXT
 /*
  * R3000 style TLB load/store/modify handlers.
  */
@@ -1184,6 +1201,7 @@
 
 	dump_handler(handle_tlbm, ARRAY_SIZE(handle_tlbm));
 }
+#endif /* CONFIG_MIPS_PGD_C0_CONTEXT */
 
 /*
  * R4000 style TLB load/store/modify handlers.
@@ -1400,6 +1418,7 @@
 	case CPU_TX3912:
 	case CPU_TX3922:
 	case CPU_TX3927:
+#ifndef CONFIG_MIPS_PGD_C0_CONTEXT
 		build_r3000_tlb_refill_handler();
 		if (!run_once) {
 			build_r3000_tlb_load_handler();
@@ -1407,6 +1426,9 @@
 			build_r3000_tlb_modify_handler();
 			run_once++;
 		}
+#else
+		panic("No R3000 TLB refill handler");
+#endif
 		break;
 
 	case CPU_R6000: