Blackfin arch: add CONFIG_APP_STACKS_L1 to enable or disable putting kernel stacks in L1

use CONFIG_APP_STACKS_L1 to enable or disable putting kernel stacks in L1,
default is enabled, SMP kernel need turn it off

Signed-off-by: Graf Yang <graf.yang@analog.com>
Signed-off-by: Bryan Wu <cooloney@kernel.org>

diff --git a/arch/blackfin/Kconfig b/arch/blackfin/Kconfig
index c507a92..9d936a3 100644
--- a/arch/blackfin/Kconfig
+++ b/arch/blackfin/Kconfig
@@ -629,6 +629,15 @@
 	  If enabled, the CPLB Switch Tables are linked
 	  into L1 data memory. (less latency)
 
+config APP_STACK_L1
+	bool "Support locating application stack in L1 Scratch Memory"
+	default y
+	help
+	  If enabled the application stack can be located in L1
+	  scratch memory (less latency).
+
+	  Currently only works with FLAT binaries.
+
 comment "Speed Optimizations"
 config BFIN_INS_LOWOVERHEAD
 	bool "ins[bwl] low overhead, higher interrupt latency"
diff --git a/arch/blackfin/include/asm/mmu_context.h b/arch/blackfin/include/asm/mmu_context.h
index 8529552..35593dd 100644
--- a/arch/blackfin/include/asm/mmu_context.h
+++ b/arch/blackfin/include/asm/mmu_context.h
@@ -45,49 +45,12 @@
 extern int l1sram_free(const void*);
 extern void *l1sram_alloc_max(void*);
 
-static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
-{
-}
-
-/* Called when creating a new context during fork() or execve().  */
-static inline int
-init_new_context(struct task_struct *tsk, struct mm_struct *mm)
-{
-#ifdef CONFIG_MPU
-	unsigned long p = __get_free_pages(GFP_KERNEL, page_mask_order);
-	mm->context.page_rwx_mask = (unsigned long *)p;
-	memset(mm->context.page_rwx_mask, 0,
-	       page_mask_nelts * 3 * sizeof(long));
-#endif
-	return 0;
-}
-
 static inline void free_l1stack(void)
 {
 	nr_l1stack_tasks--;
 	if (nr_l1stack_tasks == 0)
 		l1sram_free(l1_stack_base);
 }
-static inline void destroy_context(struct mm_struct *mm)
-{
-	struct sram_list_struct *tmp;
-
-	if (current_l1_stack_save == mm->context.l1_stack_save)
-		current_l1_stack_save = NULL;
-	if (mm->context.l1_stack_save)
-		free_l1stack();
-
-	while ((tmp = mm->context.sram_list)) {
-		mm->context.sram_list = tmp->next;
-		sram_free(tmp->addr);
-		kfree(tmp);
-	}
-#ifdef CONFIG_MPU
-	if (current_rwx_mask == mm->context.page_rwx_mask)
-		current_rwx_mask = NULL;
-	free_pages((unsigned long)mm->context.page_rwx_mask, page_mask_order);
-#endif
-}
 
 static inline unsigned long
 alloc_l1stack(unsigned long length, unsigned long *stack_base)
@@ -134,6 +97,7 @@
 	}
 #endif
 
+#ifdef CONFIG_APP_STACK_L1
 	/* L1 stack switching.  */
 	if (!next_mm->context.l1_stack_save)
 		return;
@@ -144,6 +108,7 @@
 	}
 	current_l1_stack_save = next_mm->context.l1_stack_save;
 	memcpy(l1_stack_base, current_l1_stack_save, l1_stack_len);
+#endif
 }
 
 #ifdef CONFIG_MPU
@@ -180,4 +145,44 @@
 }
 #endif
 
+static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
+{
+}
+
+/* Called when creating a new context during fork() or execve().  */
+static inline int
+init_new_context(struct task_struct *tsk, struct mm_struct *mm)
+{
+#ifdef CONFIG_MPU
+	unsigned long p = __get_free_pages(GFP_KERNEL, page_mask_order);
+	mm->context.page_rwx_mask = (unsigned long *)p;
+	memset(mm->context.page_rwx_mask, 0,
+	       page_mask_nelts * 3 * sizeof(long));
+#endif
+	return 0;
+}
+
+static inline void destroy_context(struct mm_struct *mm)
+{
+	struct sram_list_struct *tmp;
+
+#ifdef CONFIG_APP_STACK_L1
+	if (current_l1_stack_save == mm->context.l1_stack_save)
+		current_l1_stack_save = 0;
+	if (mm->context.l1_stack_save)
+		free_l1stack();
+#endif
+
+	while ((tmp = mm->context.sram_list)) {
+		mm->context.sram_list = tmp->next;
+		sram_free(tmp->addr);
+		kfree(tmp);
+	}
+#ifdef CONFIG_MPU
+	if (current_rwx_mask == mm->context.page_rwx_mask)
+		current_rwx_mask = NULL;
+	free_pages((unsigned long)mm->context.page_rwx_mask, page_mask_order);
+#endif
+}
+
 #endif