sh: Definitions for 3-level page table layout

If using 64-bit PTEs and 4K pages then each page table has 512 entries
(as opposed to 1024 entries with 32-bit PTEs). Unlike MIPS, SH follows
the convention that all structures in the page table (pgd_t, pmd_t,
pgprot_t, etc) must be the same size. Therefore, 64-bit PTEs require
64-bit PGD entries, etc. Using 2-levels of page tables and 64-bit PTEs
it is only possible to map 1GB of virtual address space.

In order to map all 4GB of virtual address space we need to adopt a
3-level page table layout. This actually works out better for
CONFIG_SUPERH32 because we only waste 2 PGD entries on the P1 and P2
areas (which are untranslated) instead of 256.

Signed-off-by: Matt Fleming <matt@console-pimps.org>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
diff --git a/arch/sh/include/asm/pgalloc.h b/arch/sh/include/asm/pgalloc.h
index fe9f037..4ea2785 100644
--- a/arch/sh/include/asm/pgalloc.h
+++ b/arch/sh/include/asm/pgalloc.h
@@ -6,7 +6,11 @@
 
 #define QUICK_PT 1	/* Other page table pages that are zero on free */
 
+#ifdef CONFIG_PGTABLE_LEVELS_3
+#include <asm/pgalloc_pmd.h>
+#else
 #include <asm/pgalloc_nopmd.h>
+#endif
 
 static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
 				       pte_t *pte)
diff --git a/arch/sh/include/asm/pgalloc_pmd.h b/arch/sh/include/asm/pgalloc_pmd.h
new file mode 100644
index 0000000..20f75cc
--- /dev/null
+++ b/arch/sh/include/asm/pgalloc_pmd.h
@@ -0,0 +1,41 @@
+#ifndef __ASM_SH_PGALLOC_PMD_H
+#define __ASM_SH_PGALLOC_PMD_H
+
+static inline pgd_t *pgd_alloc(struct mm_struct *mm)
+{
+	pgd_t *pgd;
+	int i;
+
+	pgd = kzalloc(sizeof(*pgd) * PTRS_PER_PGD, GFP_KERNEL | __GFP_REPEAT);
+
+	for (i = USER_PTRS_PER_PGD; i < PTRS_PER_PGD; i++)
+		pgd[i] = swapper_pg_dir[i];
+
+	return pgd;
+}
+
+static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
+{
+	kfree(pgd);
+}
+
+static inline void __check_pgt_cache(void)
+{
+}
+
+static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
+{
+	set_pud(pud, __pud((unsigned long)pmd));
+}
+
+static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address)
+{
+	return quicklist_alloc(QUICK_PT, GFP_KERNEL | __GFP_REPEAT, NULL);
+}
+
+static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
+{
+	quicklist_free(QUICK_PT, NULL, pmd);
+}
+
+#endif /* __ASM_SH_PGALLOC_PMD_H */
diff --git a/arch/sh/include/asm/pgtable.h b/arch/sh/include/asm/pgtable.h
index 9a0f66c..9effcc3 100644
--- a/arch/sh/include/asm/pgtable.h
+++ b/arch/sh/include/asm/pgtable.h
@@ -12,7 +12,11 @@
 #ifndef __ASM_SH_PGTABLE_H
 #define __ASM_SH_PGTABLE_H
 
+#ifdef CONFIG_PGTABLE_LEVELS_3
+#include <asm/pgtable_pmd.h>
+#else
 #include <asm/pgtable_nopmd.h>
+#endif
 #include <asm/page.h>
 
 #ifndef __ASSEMBLY__
diff --git a/arch/sh/include/asm/pgtable_pmd.h b/arch/sh/include/asm/pgtable_pmd.h
new file mode 100644
index 0000000..78dc36e
--- /dev/null
+++ b/arch/sh/include/asm/pgtable_pmd.h
@@ -0,0 +1,55 @@
+#ifndef __ASM_SH_PGTABLE_PMD_H
+#define __ASM_SH_PGTABLE_PMD_H
+
+#include <asm-generic/pgtable-nopud.h>
+
+/*
+ * Some cores need a 3-level page table layout, for example when using
+ * 64-bit PTEs and 4K pages.
+ */
+
+#define PTE_MAGNITUDE	3	/* 64-bit PTEs on extended mode SH-X2 TLB */
+
+/* PGD bits */
+#define PGDIR_SHIFT	30
+
+#define PTRS_PER_PGD		4
+#define USER_PTRS_PER_PGD	2
+
+/* PMD bits */
+#define PMD_SHIFT	(PAGE_SHIFT + (PAGE_SHIFT - 3))
+#define PMD_SIZE	(1UL << PMD_SHIFT)
+#define PMD_MASK	(~(PMD_SIZE-1))
+
+#define PTRS_PER_PMD	(PAGE_SIZE / sizeof(pmd_t))
+
+#define pmd_ERROR(e) \
+	printk("%s:%d: bad pmd %016llx.\n", __FILE__, __LINE__, pmd_val(e))
+
+typedef struct { unsigned long long pmd; } pmd_t;
+#define pmd_val(x)	((x).pmd)
+#define __pmd(x)	((pmd_t) { (x) } )
+
+static inline unsigned long pud_page_vaddr(pud_t pud)
+{
+	return pud_val(pud);
+}
+
+#define pmd_index(address)	(((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1))
+static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address)
+{
+	return (pmd_t *)pud_page_vaddr(*pud) + pmd_index(address);
+}
+
+#define pud_none(x)	(!pud_val(x))
+#define pud_present(x)	(pud_val(x))
+#define pud_clear(xp)	do { set_pud(xp, __pud(0)); } while (0)
+#define	pud_bad(x)	(pud_val(x) & ~PAGE_MASK)
+
+/*
+ * (puds are folded into pgds so this doesn't get actually called,
+ * but the define is needed for a generic inline function.)
+ */
+#define set_pud(pudptr, pudval) do { *(pudptr) = (pudval); } while(0)
+
+#endif /* __ASM_SH_PGTABLE_PMD_H */
diff --git a/arch/sh/mm/Kconfig b/arch/sh/mm/Kconfig
index 0e7ba8e8..b3f6c1a 100644
--- a/arch/sh/mm/Kconfig
+++ b/arch/sh/mm/Kconfig
@@ -190,19 +190,37 @@
 	depends on MEMORY_HOTPLUG
 
 choice
+	prompt "Page table layout"
+	default PGTABLE_LEVELS_3 if X2TLB
+	default PGTABLE_LEVELS_2
+
+config PGTABLE_LEVELS_2
+       bool "2 Levels"
+       help
+         This is the default page table layout for all SuperH CPUs.
+
+config PGTABLE_LEVELS_3
+       bool "3 Levels"
+       depends on X2TLB
+       help
+         This enables a 3 level page table structure.
+
+endchoice
+
+choice
 	prompt "Kernel page size"
 	default PAGE_SIZE_8KB if X2TLB
 	default PAGE_SIZE_4KB
 
 config PAGE_SIZE_4KB
 	bool "4kB"
-	depends on !MMU || !X2TLB
+	depends on !MMU || !X2TLB || PGTABLE_LEVELS_3
 	help
 	  This is the default page size used by all SuperH CPUs.
 
 config PAGE_SIZE_8KB
 	bool "8kB"
-	depends on !MMU || X2TLB
+	depends on !MMU || X2TLB && !PGTABLE_LEVELS_3
 	help
 	  This enables 8kB pages as supported by SH-X2 and later MMUs.
 
@@ -214,7 +232,7 @@
 
 config PAGE_SIZE_64KB
 	bool "64kB"
-	depends on !MMU || CPU_SH4 || CPU_SH5
+	depends on !MMU || CPU_SH4 && !PGTABLE_LEVELS_3 || CPU_SH5
 	help
 	  This enables support for 64kB pages, possible on all SH-4
 	  CPUs and later.
diff --git a/arch/sh/mm/fault_32.c b/arch/sh/mm/fault_32.c
index 4753010..28e2283 100644
--- a/arch/sh/mm/fault_32.c
+++ b/arch/sh/mm/fault_32.c
@@ -53,6 +53,9 @@
 	if (!pud_present(*pud_k))
 		return NULL;
 
+	if (!pud_present(*pud))
+	    set_pud(pud, *pud_k);
+
 	pmd = pmd_offset(pud, address);
 	pmd_k = pmd_offset(pud_k, address);
 	if (!pmd_present(*pmd_k))
diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c
index 432acd0..761910d 100644
--- a/arch/sh/mm/init.c
+++ b/arch/sh/mm/init.c
@@ -120,7 +120,13 @@
 	for ( ; (i < PTRS_PER_PGD) && (vaddr != end); pgd++, i++) {
 		pud = (pud_t *)pgd;
 		for ( ; (j < PTRS_PER_PUD) && (vaddr != end); pud++, j++) {
+#ifdef __PAGETABLE_PMD_FOLDED
 			pmd = (pmd_t *)pud;
+#else
+			pmd = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE);
+			pud_populate(&init_mm, pud, pmd);
+			pmd += k;
+#endif
 			for (; (k < PTRS_PER_PMD) && (vaddr != end); pmd++, k++) {
 				if (pmd_none(*pmd)) {
 					pte = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);