ARC: [mm] optimise icache flush for kernel mappings

This change continues the theme from prev commit - this time icache
handling for kernel's own code modification (vmalloc: loadable modules,
breakpoints for kprobes/kgdb...)

flush_icache_range() calls the CDU icache helper with vaddr to enable
exact line invalidate.

For a true kernel-virtual mapping, the vaddr is actually virtual hence
valid as index into cache. For kprobes breakpoint however, the vaddr arg
is actually paddr - since that's how normal kernel is mapped in ARC
memory map.  This implies that CDU will use the same addr for
indexing as for tag match - which is fine since kernel code would only
have that "implicit" mapping and none other.

This should speed up module loading significantly - specially on default
ARC700 icache configurations (32k) which alias.

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
diff --git a/arch/arc/mm/cache_arc700.c b/arch/arc/mm/cache_arc700.c
index a65c139..5651e7b 100644
--- a/arch/arc/mm/cache_arc700.c
+++ b/arch/arc/mm/cache_arc700.c
@@ -642,8 +642,8 @@
 EXPORT_SYMBOL(dma_cache_wback);
 
 /*
- * This is API for making I/D Caches consistent when modifying code
- * (loadable modules, kprobes,  etc)
+ * This is API for making I/D Caches consistent when modifying
+ * kernel code (loadable modules, kprobes, kgdb...)
  * This is called on insmod, with kernel virtual address for CODE of
  * the module. ARC cache maintenance ops require PHY address thus we
  * need to convert vmalloc addr to PHY addr
@@ -673,7 +673,13 @@
 
 	/* Case: Kernel Phy addr (0x8000_0000 onwards) */
 	if (likely(kstart > PAGE_OFFSET)) {
-		__ic_line_inv(kstart, kend - kstart);
+		/*
+		 * The 2nd arg despite being paddr will be used to index icache
+		 * This is OK since no alternate virtual mappings will exist
+		 * given the callers for this case: kprobe/kgdb in built-in
+		 * kernel code only.
+		 */
+		__ic_line_inv_vaddr(kstart, kstart, kend - kstart);
 		__dc_line_op(kstart, kend - kstart, OP_FLUSH);
 		return;
 	}
@@ -694,7 +700,7 @@
 		sz = min_t(unsigned int, tot_sz, PAGE_SIZE - off);
 		local_irq_save(flags);
 		__dc_line_op(phy, sz, OP_FLUSH);
-		__ic_line_inv(phy, sz);
+		__ic_line_inv_vaddr(phy, kstart, sz);
 		local_irq_restore(flags);
 		kstart += sz;
 		tot_sz -= sz;