ARCv2: ioremap: Support dynamic peripheral address space

The peripheral address space is architectural address window which is
uncached and typically used to wire up peripherals.

For ARC700 cores (ARCompact ISA based) this was fixed to 1GB region
0xC000_0000 - 0xFFFF_FFFF.

For ARCv2 based HS38 cores the start address is flexible and can be
0xC, 0xD, 0xE, 0xF 000_000 by programming AUX_NON_VOLATILE_LIMIT reg
(typically done in bootloader)

Further in cas of PAE, the physical address can extend beyond 4GB so
need to confine this check, otherwise all pages beyond 4GB will be
treated as uncached

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
diff --git a/arch/arc/include/asm/cache.h b/arch/arc/include/asm/cache.h
index 210ef3e..23706c6 100644
--- a/arch/arc/include/asm/cache.h
+++ b/arch/arc/include/asm/cache.h
@@ -54,6 +54,7 @@
 extern void read_decode_cache_bcr(void);
 
 extern int ioc_exists;
+extern unsigned long perip_base;
 
 #endif	/* !__ASSEMBLY__ */
 
diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c
index e4dc7ba..151acf0 100644
--- a/arch/arc/kernel/setup.c
+++ b/arch/arc/kernel/setup.c
@@ -91,11 +91,9 @@
 
 static void read_arc_build_cfg_regs(void)
 {
-	struct bcr_perip uncached_space;
 	struct bcr_timer timer;
 	struct bcr_generic bcr;
 	struct cpuinfo_arc *cpu = &cpuinfo_arc700[smp_processor_id()];
-	unsigned long perip_space;
 	FIX_PTR(cpu);
 
 	READ_BCR(AUX_IDENTITY, cpu->core);
@@ -108,14 +106,6 @@
 
 	cpu->vec_base = read_aux_reg(AUX_INTR_VEC_BASE);
 
-	READ_BCR(ARC_REG_D_UNCACH_BCR, uncached_space);
-        if (uncached_space.ver < 3)
-		perip_space = uncached_space.start << 24;
-	else
-		perip_space = read_aux_reg(AUX_NON_VOL) & 0xF0000000;
-
-	BUG_ON(perip_space != ARC_UNCACHED_ADDR_SPACE);
-
 	READ_BCR(ARC_REG_MUL_BCR, cpu->extn_mpy);
 
 	cpu->extn.norm = read_aux_reg(ARC_REG_NORM_BCR) > 1 ? 1 : 0; /* 2,3 */
@@ -288,8 +278,8 @@
 	FIX_PTR(cpu);
 
 	n += scnprintf(buf + n, len - n,
-		       "Vector Table\t: %#x\nUncached Base\t: %#x\n",
-		       cpu->vec_base, ARC_UNCACHED_ADDR_SPACE);
+		       "Vector Table\t: %#x\nUncached Base\t: %#lx\n",
+		       cpu->vec_base, perip_base);
 
 	if (cpu->extn.fpu_sp || cpu->extn.fpu_dp)
 		n += scnprintf(buf + n, len - n, "FPU\t\t: %s%s\n",
diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c
index 635cd8c..d7709e3 100644
--- a/arch/arc/mm/cache.c
+++ b/arch/arc/mm/cache.c
@@ -24,6 +24,7 @@
 static int l2_line_sz;
 int ioc_exists;
 volatile int slc_enable = 1, ioc_enable = 1;
+unsigned long perip_base = ARC_UNCACHED_ADDR_SPACE; /* legacy value for boot */
 
 void (*_cache_line_loop_ic_fn)(phys_addr_t paddr, unsigned long vaddr,
 			       unsigned long sz, const int cacheop);
@@ -75,6 +76,7 @@
 static void read_decode_cache_bcr_arcv2(int cpu)
 {
 	struct cpuinfo_arc_cache *p_slc = &cpuinfo_arc700[cpu].slc;
+	struct bcr_generic uncached_space;
 	struct bcr_generic sbcr;
 
 	struct bcr_slc_cfg {
@@ -104,6 +106,11 @@
 	READ_BCR(ARC_REG_CLUSTER_BCR, cbcr);
 	if (cbcr.c && ioc_enable)
 		ioc_exists = 1;
+
+	/* Legacy Data Uncached BCR is deprecated from v3 onwards */
+	READ_BCR(ARC_REG_D_UNCACH_BCR, uncached_space);
+	if (uncached_space.ver > 2)
+		perip_base = read_aux_reg(AUX_NON_VOL) & 0xF0000000;
 }
 
 void read_decode_cache_bcr(void)
diff --git a/arch/arc/mm/ioremap.c b/arch/arc/mm/ioremap.c
index 75b0ca6..49b8abd 100644
--- a/arch/arc/mm/ioremap.c
+++ b/arch/arc/mm/ioremap.c
@@ -14,6 +14,18 @@
 #include <linux/slab.h>
 #include <linux/cache.h>
 
+static inline bool arc_uncached_addr_space(phys_addr_t paddr)
+{
+	if (is_isa_arcompact()) {
+		if (paddr >= ARC_UNCACHED_ADDR_SPACE)
+			return true;
+	} else if (paddr >= perip_base && paddr <= 0xFFFFFFFF) {
+		return true;
+	}
+
+	return false;
+}
+
 void __iomem *ioremap(phys_addr_t paddr, unsigned long size)
 {
 	phys_addr_t end;
@@ -27,7 +39,7 @@
 	 * If the region is h/w uncached, MMU mapping can be elided as optim
 	 * The cast to u32 is fine as this region can only be inside 4GB
 	 */
-	if (paddr >= ARC_UNCACHED_ADDR_SPACE)
+	if (arc_uncached_addr_space(paddr))
 		return (void __iomem *)(u32)paddr;
 
 	return ioremap_prot(paddr, size, PAGE_KERNEL_NO_CACHE);
@@ -85,7 +97,8 @@
 
 void iounmap(const void __iomem *addr)
 {
-	if (addr >= (void __force __iomem *)ARC_UNCACHED_ADDR_SPACE)
+	/* weird double cast to handle phys_addr_t > 32 bits */
+	if (arc_uncached_addr_space((phys_addr_t)(u32)addr))
 		return;
 
 	vfree((void *)(PAGE_MASK & (unsigned long __force)addr));