Merge branches 'amba', 'fixes', 'misc', 'mmci', 'unstable/omap-dma' and 'unstable/sa11x0' into for-next
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index e254198..2a232ce 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -1578,6 +1578,7 @@
 
 choice
 	prompt "Memory split"
+	depends on MMU
 	default VMSPLIT_3G
 	help
 	  Select the desired split between kernel and user memory.
@@ -1595,6 +1596,7 @@
 
 config PAGE_OFFSET
 	hex
+	default PHYS_OFFSET if !MMU
 	default 0x40000000 if VMSPLIT_1G
 	default 0x80000000 if VMSPLIT_2G
 	default 0xC0000000
diff --git a/arch/arm/boot/compressed/.gitignore b/arch/arm/boot/compressed/.gitignore
index 47279aa..0714e03 100644
--- a/arch/arm/boot/compressed/.gitignore
+++ b/arch/arm/boot/compressed/.gitignore
@@ -1,4 +1,5 @@
 ashldi3.S
+bswapsdi2.S
 font.c
 lib1funcs.S
 hyp-stub.S
diff --git a/arch/arm/boot/dts/imx6dl-hummingboard.dts b/arch/arm/boot/dts/imx6dl-hummingboard.dts
index fd8fc7c..5bfae54 100644
--- a/arch/arm/boot/dts/imx6dl-hummingboard.dts
+++ b/arch/arm/boot/dts/imx6dl-hummingboard.dts
@@ -52,12 +52,6 @@
 		};
 	};
 
-	codec: spdif-transmitter {
-		compatible = "linux,spdif-dit";
-		pinctrl-names = "default";
-		pinctrl-0 = <&pinctrl_hummingboard_spdif>;
-	};
-
 	sound-spdif {
 		compatible = "fsl,imx-audio-spdif";
 		model = "imx-spdif";
@@ -111,7 +105,7 @@
 		};
 
 		pinctrl_hummingboard_spdif: hummingboard-spdif {
-			fsl,pins = <MX6QDL_PAD_GPIO_17__SPDIF_OUT 0x1b0b0>;
+			fsl,pins = <MX6QDL_PAD_GPIO_17__SPDIF_OUT 0x13091>;
 		};
 
 		pinctrl_hummingboard_usbh1_vbus: hummingboard-usbh1-vbus {
@@ -142,6 +136,8 @@
 };
 
 &spdif {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_hummingboard_spdif>;
 	status = "okay";
 };
 
diff --git a/arch/arm/boot/dts/imx6qdl-cubox-i.dtsi b/arch/arm/boot/dts/imx6qdl-cubox-i.dtsi
index 64daa3b..c2a2488 100644
--- a/arch/arm/boot/dts/imx6qdl-cubox-i.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-cubox-i.dtsi
@@ -46,12 +46,6 @@
 		};
 	};
 
-	codec: spdif-transmitter {
-		compatible = "linux,spdif-dit";
-		pinctrl-names = "default";
-		pinctrl-0 = <&pinctrl_cubox_i_spdif>;
-	};
-
 	sound-spdif {
 		compatible = "fsl,imx-audio-spdif";
 		model = "imx-spdif";
@@ -89,7 +83,7 @@
 		};
 
 		pinctrl_cubox_i_spdif: cubox-i-spdif {
-			fsl,pins = <MX6QDL_PAD_GPIO_17__SPDIF_OUT 0x1b0b0>;
+			fsl,pins = <MX6QDL_PAD_GPIO_17__SPDIF_OUT 0x13091>;
 		};
 
 		pinctrl_cubox_i_usbh1_vbus: cubox-i-usbh1-vbus {
@@ -121,6 +115,8 @@
 };
 
 &spdif {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_cubox_i_spdif>;
 	status = "okay";
 };
 
diff --git a/arch/arm/common/Makefile b/arch/arm/common/Makefile
index 4bdc416..70b1eff 100644
--- a/arch/arm/common/Makefile
+++ b/arch/arm/common/Makefile
@@ -13,6 +13,7 @@
 obj-$(CONFIG_PCI_HOST_ITE8152)  += it8152.o
 obj-$(CONFIG_ARM_TIMER_SP804)	+= timer-sp.o
 obj-$(CONFIG_MCPM)		+= mcpm_head.o mcpm_entry.o mcpm_platsmp.o vlock.o
+CFLAGS_REMOVE_mcpm_entry.o	= -pg
 AFLAGS_mcpm_head.o		:= -march=armv7-a
 AFLAGS_vlock.o			:= -march=armv7-a
 obj-$(CONFIG_TI_PRIV_EDMA)	+= edma.o
diff --git a/arch/arm/common/scoop.c b/arch/arm/common/scoop.c
index a5c3dc3..6ef146e 100644
--- a/arch/arm/common/scoop.c
+++ b/arch/arm/common/scoop.c
@@ -232,8 +232,6 @@
 
 	return 0;
 
-	if (devptr->gpio.base != -1)
-		temp = gpiochip_remove(&devptr->gpio);
 err_gpio:
 	platform_set_drvdata(pdev, NULL);
 err_ioremap:
diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
index 5c22851..380ac4f 100644
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@@ -30,8 +30,8 @@
  * Endian independent macros for shifting bytes within registers.
  */
 #ifndef __ARMEB__
-#define pull            lsr
-#define push            lsl
+#define lspull          lsr
+#define lspush          lsl
 #define get_byte_0      lsl #0
 #define get_byte_1	lsr #8
 #define get_byte_2	lsr #16
@@ -41,8 +41,8 @@
 #define put_byte_2	lsl #16
 #define put_byte_3	lsl #24
 #else
-#define pull            lsl
-#define push            lsr
+#define lspull          lsl
+#define lspush          lsr
 #define get_byte_0	lsr #24
 #define get_byte_1	lsr #16
 #define get_byte_2	lsr #8
diff --git a/arch/arm/include/asm/atomic.h b/arch/arm/include/asm/atomic.h
index 62d2cb5..9a92fd7 100644
--- a/arch/arm/include/asm/atomic.h
+++ b/arch/arm/include/asm/atomic.h
@@ -60,6 +60,7 @@
 	int result;
 
 	smp_mb();
+	prefetchw(&v->counter);
 
 	__asm__ __volatile__("@ atomic_add_return\n"
 "1:	ldrex	%0, [%3]\n"
@@ -99,6 +100,7 @@
 	int result;
 
 	smp_mb();
+	prefetchw(&v->counter);
 
 	__asm__ __volatile__("@ atomic_sub_return\n"
 "1:	ldrex	%0, [%3]\n"
@@ -121,6 +123,7 @@
 	unsigned long res;
 
 	smp_mb();
+	prefetchw(&ptr->counter);
 
 	do {
 		__asm__ __volatile__("@ atomic_cmpxchg\n"
@@ -138,6 +141,33 @@
 	return oldval;
 }
 
+static inline int __atomic_add_unless(atomic_t *v, int a, int u)
+{
+	int oldval, newval;
+	unsigned long tmp;
+
+	smp_mb();
+	prefetchw(&v->counter);
+
+	__asm__ __volatile__ ("@ atomic_add_unless\n"
+"1:	ldrex	%0, [%4]\n"
+"	teq	%0, %5\n"
+"	beq	2f\n"
+"	add	%1, %0, %6\n"
+"	strex	%2, %1, [%4]\n"
+"	teq	%2, #0\n"
+"	bne	1b\n"
+"2:"
+	: "=&r" (oldval), "=&r" (newval), "=&r" (tmp), "+Qo" (v->counter)
+	: "r" (&v->counter), "r" (u), "r" (a)
+	: "cc");
+
+	if (oldval != u)
+		smp_mb();
+
+	return oldval;
+}
+
 #else /* ARM_ARCH_6 */
 
 #ifdef CONFIG_SMP
@@ -186,10 +216,6 @@
 	return ret;
 }
 
-#endif /* __LINUX_ARM_ARCH__ */
-
-#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
-
 static inline int __atomic_add_unless(atomic_t *v, int a, int u)
 {
 	int c, old;
@@ -200,6 +226,10 @@
 	return c;
 }
 
+#endif /* __LINUX_ARM_ARCH__ */
+
+#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
+
 #define atomic_inc(v)		atomic_add(1, v)
 #define atomic_dec(v)		atomic_sub(1, v)
 
@@ -299,6 +329,7 @@
 	unsigned long tmp;
 
 	smp_mb();
+	prefetchw(&v->counter);
 
 	__asm__ __volatile__("@ atomic64_add_return\n"
 "1:	ldrexd	%0, %H0, [%3]\n"
@@ -340,6 +371,7 @@
 	unsigned long tmp;
 
 	smp_mb();
+	prefetchw(&v->counter);
 
 	__asm__ __volatile__("@ atomic64_sub_return\n"
 "1:	ldrexd	%0, %H0, [%3]\n"
@@ -364,6 +396,7 @@
 	unsigned long res;
 
 	smp_mb();
+	prefetchw(&ptr->counter);
 
 	do {
 		__asm__ __volatile__("@ atomic64_cmpxchg\n"
@@ -388,6 +421,7 @@
 	unsigned long tmp;
 
 	smp_mb();
+	prefetchw(&ptr->counter);
 
 	__asm__ __volatile__("@ atomic64_xchg\n"
 "1:	ldrexd	%0, %H0, [%3]\n"
@@ -409,6 +443,7 @@
 	unsigned long tmp;
 
 	smp_mb();
+	prefetchw(&v->counter);
 
 	__asm__ __volatile__("@ atomic64_dec_if_positive\n"
 "1:	ldrexd	%0, %H0, [%3]\n"
@@ -436,6 +471,7 @@
 	int ret = 1;
 
 	smp_mb();
+	prefetchw(&v->counter);
 
 	__asm__ __volatile__("@ atomic64_add_unless\n"
 "1:	ldrexd	%0, %H0, [%4]\n"
diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h
index e9a49fe..8b8b616 100644
--- a/arch/arm/include/asm/cacheflush.h
+++ b/arch/arm/include/asm/cacheflush.h
@@ -212,6 +212,7 @@
 static inline void __flush_icache_all(void)
 {
 	__flush_icache_preferred();
+	dsb();
 }
 
 /*
diff --git a/arch/arm/include/asm/cmpxchg.h b/arch/arm/include/asm/cmpxchg.h
index df2fbba..abb2c37 100644
--- a/arch/arm/include/asm/cmpxchg.h
+++ b/arch/arm/include/asm/cmpxchg.h
@@ -2,6 +2,7 @@
 #define __ASM_ARM_CMPXCHG_H
 
 #include <linux/irqflags.h>
+#include <linux/prefetch.h>
 #include <asm/barrier.h>
 
 #if defined(CONFIG_CPU_SA1100) || defined(CONFIG_CPU_SA110)
@@ -35,6 +36,7 @@
 #endif
 
 	smp_mb();
+	prefetchw((const void *)ptr);
 
 	switch (size) {
 #if __LINUX_ARM_ARCH__ >= 6
@@ -138,6 +140,8 @@
 {
 	unsigned long oldval, res;
 
+	prefetchw((const void *)ptr);
+
 	switch (size) {
 #ifndef CONFIG_CPU_V6	/* min ARCH >= ARMv6K */
 	case 1:
@@ -230,6 +234,8 @@
 	unsigned long long oldval;
 	unsigned long res;
 
+	prefetchw(ptr);
+
 	__asm__ __volatile__(
 "1:	ldrexd		%1, %H1, [%3]\n"
 "	teq		%1, %4\n"
diff --git a/arch/arm/include/asm/cputype.h b/arch/arm/include/asm/cputype.h
index acdde76..42f0889 100644
--- a/arch/arm/include/asm/cputype.h
+++ b/arch/arm/include/asm/cputype.h
@@ -71,6 +71,7 @@
 #define ARM_CPU_PART_CORTEX_A5		0xC050
 #define ARM_CPU_PART_CORTEX_A15		0xC0F0
 #define ARM_CPU_PART_CORTEX_A7		0xC070
+#define ARM_CPU_PART_CORTEX_A12		0xC0D0
 
 #define ARM_CPU_XSCALE_ARCH_MASK	0xe000
 #define ARM_CPU_XSCALE_ARCH_V1		0x2000
diff --git a/arch/arm/include/asm/floppy.h b/arch/arm/include/asm/floppy.h
index c9f03ec..f488255 100644
--- a/arch/arm/include/asm/floppy.h
+++ b/arch/arm/include/asm/floppy.h
@@ -25,7 +25,7 @@
 
 #define fd_inb(port)		inb((port))
 #define fd_request_irq()	request_irq(IRQ_FLOPPYDISK,floppy_interrupt,\
-					    IRQF_DISABLED,"floppy",NULL)
+					    0,"floppy",NULL)
 #define fd_free_irq()		free_irq(IRQ_FLOPPYDISK,NULL)
 #define fd_disable_irq()	disable_irq(IRQ_FLOPPYDISK)
 #define fd_enable_irq()		enable_irq(IRQ_FLOPPYDISK)
diff --git a/arch/arm/include/asm/futex.h b/arch/arm/include/asm/futex.h
index e42cf59..53e69da 100644
--- a/arch/arm/include/asm/futex.h
+++ b/arch/arm/include/asm/futex.h
@@ -3,11 +3,6 @@
 
 #ifdef __KERNEL__
 
-#if defined(CONFIG_CPU_USE_DOMAINS) && defined(CONFIG_SMP)
-/* ARM doesn't provide unprivileged exclusive memory accessors */
-#include <asm-generic/futex.h>
-#else
-
 #include <linux/futex.h>
 #include <linux/uaccess.h>
 #include <asm/errno.h>
@@ -28,6 +23,7 @@
 
 #define __futex_atomic_op(insn, ret, oldval, tmp, uaddr, oparg)	\
 	smp_mb();						\
+	prefetchw(uaddr);					\
 	__asm__ __volatile__(					\
 	"1:	ldrex	%1, [%3]\n"				\
 	"	" insn "\n"					\
@@ -51,6 +47,8 @@
 		return -EFAULT;
 
 	smp_mb();
+	/* Prefetching cannot fault */
+	prefetchw(uaddr);
 	__asm__ __volatile__("@futex_atomic_cmpxchg_inatomic\n"
 	"1:	ldrex	%1, [%4]\n"
 	"	teq	%1, %2\n"
@@ -164,6 +162,5 @@
 	return ret;
 }
 
-#endif /* !(CPU_USE_DOMAINS && SMP) */
 #endif /* __KERNEL__ */
 #endif /* _ASM_ARM_FUTEX_H */
diff --git a/arch/arm/include/asm/hw_breakpoint.h b/arch/arm/include/asm/hw_breakpoint.h
index eef55ea..8e427c7 100644
--- a/arch/arm/include/asm/hw_breakpoint.h
+++ b/arch/arm/include/asm/hw_breakpoint.h
@@ -51,6 +51,7 @@
 #define ARM_DEBUG_ARCH_V7_ECP14	3
 #define ARM_DEBUG_ARCH_V7_MM	4
 #define ARM_DEBUG_ARCH_V7_1	5
+#define ARM_DEBUG_ARCH_V8	6
 
 /* Breakpoint */
 #define ARM_BREAKPOINT_EXECUTE	0
diff --git a/arch/arm/include/asm/hwcap.h b/arch/arm/include/asm/hwcap.h
index 6ff56ec..6e183fd 100644
--- a/arch/arm/include/asm/hwcap.h
+++ b/arch/arm/include/asm/hwcap.h
@@ -9,6 +9,7 @@
  * instruction set this cpu supports.
  */
 #define ELF_HWCAP	(elf_hwcap)
-extern unsigned int elf_hwcap;
+#define ELF_HWCAP2	(elf_hwcap2)
+extern unsigned int elf_hwcap, elf_hwcap2;
 #endif
 #endif
diff --git a/arch/arm/include/asm/jump_label.h b/arch/arm/include/asm/jump_label.h
index 863c892..70f9b9b 100644
--- a/arch/arm/include/asm/jump_label.h
+++ b/arch/arm/include/asm/jump_label.h
@@ -4,7 +4,6 @@
 #ifdef __KERNEL__
 
 #include <linux/types.h>
-#include <asm/system.h>
 
 #define JUMP_LABEL_NOP_SIZE 4
 
diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h
index 8756e4b..02fa255 100644
--- a/arch/arm/include/asm/memory.h
+++ b/arch/arm/include/asm/memory.h
@@ -30,14 +30,15 @@
  */
 #define UL(x) _AC(x, UL)
 
+/* PAGE_OFFSET - the virtual address of the start of the kernel image */
+#define PAGE_OFFSET		UL(CONFIG_PAGE_OFFSET)
+
 #ifdef CONFIG_MMU
 
 /*
- * PAGE_OFFSET - the virtual address of the start of the kernel image
  * TASK_SIZE - the maximum size of a user space task.
  * TASK_UNMAPPED_BASE - the lower boundary of the mmap VM area
  */
-#define PAGE_OFFSET		UL(CONFIG_PAGE_OFFSET)
 #define TASK_SIZE		(UL(CONFIG_PAGE_OFFSET) - UL(SZ_16M))
 #define TASK_UNMAPPED_BASE	ALIGN(TASK_SIZE / 3, SZ_16M)
 
@@ -104,10 +105,6 @@
 #define END_MEM     		(UL(CONFIG_DRAM_BASE) + CONFIG_DRAM_SIZE)
 #endif
 
-#ifndef PAGE_OFFSET
-#define PAGE_OFFSET		PLAT_PHYS_OFFSET
-#endif
-
 /*
  * The module can be at any place in ram in nommu mode.
  */
@@ -169,9 +166,17 @@
  * Physical vs virtual RAM address space conversion.  These are
  * private definitions which should NOT be used outside memory.h
  * files.  Use virt_to_phys/phys_to_virt/__pa/__va instead.
+ *
+ * PFNs are used to describe any physical page; this means
+ * PFN 0 == physical address 0.
  */
-#ifndef __virt_to_phys
-#ifdef CONFIG_ARM_PATCH_PHYS_VIRT
+#if defined(__virt_to_phys)
+#define PHYS_OFFSET	PLAT_PHYS_OFFSET
+#define PHYS_PFN_OFFSET	((unsigned long)(PHYS_OFFSET >> PAGE_SHIFT))
+
+#define virt_to_pfn(kaddr) (__pa(kaddr) >> PAGE_SHIFT)
+
+#elif defined(CONFIG_ARM_PATCH_PHYS_VIRT)
 
 /*
  * Constants used to force the right instruction encodings and shifts
@@ -180,12 +185,17 @@
 #define __PV_BITS_31_24	0x81000000
 #define __PV_BITS_7_0	0x81
 
-extern u64 __pv_phys_offset;
+extern unsigned long __pv_phys_pfn_offset;
 extern u64 __pv_offset;
 extern void fixup_pv_table(const void *, unsigned long);
 extern const void *__pv_table_begin, *__pv_table_end;
 
-#define PHYS_OFFSET __pv_phys_offset
+#define PHYS_OFFSET	((phys_addr_t)__pv_phys_pfn_offset << PAGE_SHIFT)
+#define PHYS_PFN_OFFSET	(__pv_phys_pfn_offset)
+
+#define virt_to_pfn(kaddr) \
+	((((unsigned long)(kaddr) - PAGE_OFFSET) >> PAGE_SHIFT) + \
+	 PHYS_PFN_OFFSET)
 
 #define __pv_stub(from,to,instr,type)			\
 	__asm__("@ __pv_stub\n"				\
@@ -246,6 +256,7 @@
 #else
 
 #define PHYS_OFFSET	PLAT_PHYS_OFFSET
+#define PHYS_PFN_OFFSET	((unsigned long)(PHYS_OFFSET >> PAGE_SHIFT))
 
 static inline phys_addr_t __virt_to_phys(unsigned long x)
 {
@@ -257,18 +268,11 @@
 	return x - PHYS_OFFSET + PAGE_OFFSET;
 }
 
-#endif
-#endif
+#define virt_to_pfn(kaddr) \
+	((((unsigned long)(kaddr) - PAGE_OFFSET) >> PAGE_SHIFT) + \
+	 PHYS_PFN_OFFSET)
 
-/*
- * PFNs are used to describe any physical page; this means
- * PFN 0 == physical address 0.
- *
- * This is the PFN of the first RAM page in the kernel
- * direct-mapped view.  We assume this is the first page
- * of RAM in the mem_map as well.
- */
-#define PHYS_PFN_OFFSET	((unsigned long)(PHYS_OFFSET >> PAGE_SHIFT))
+#endif
 
 /*
  * These are *only* valid on the kernel direct mapped RAM memory.
@@ -346,9 +350,9 @@
  */
 #define ARCH_PFN_OFFSET		PHYS_PFN_OFFSET
 
-#define virt_to_page(kaddr)	pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
+#define virt_to_page(kaddr)	pfn_to_page(virt_to_pfn(kaddr))
 #define virt_addr_valid(kaddr)	(((unsigned long)(kaddr) >= PAGE_OFFSET && (unsigned long)(kaddr) < (unsigned long)high_memory) \
-					&& pfn_valid(__pa(kaddr) >> PAGE_SHIFT) )
+					&& pfn_valid(virt_to_pfn(kaddr)))
 
 #endif
 
diff --git a/arch/arm/include/asm/pgtable-2level.h b/arch/arm/include/asm/pgtable-2level.h
index dfff709..219ac88 100644
--- a/arch/arm/include/asm/pgtable-2level.h
+++ b/arch/arm/include/asm/pgtable-2level.h
@@ -140,6 +140,7 @@
 #define L_PTE_MT_DEV_NONSHARED	(_AT(pteval_t, 0x0c) << 2)	/* 1100 */
 #define L_PTE_MT_DEV_WC		(_AT(pteval_t, 0x09) << 2)	/* 1001 */
 #define L_PTE_MT_DEV_CACHED	(_AT(pteval_t, 0x0b) << 2)	/* 1011 */
+#define L_PTE_MT_VECTORS	(_AT(pteval_t, 0x0f) << 2)	/* 1111 */
 #define L_PTE_MT_MASK		(_AT(pteval_t, 0x0f) << 2)
 
 #ifndef __ASSEMBLY__
diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h
index 03243f7..85c60ad 100644
--- a/arch/arm/include/asm/pgtable-3level.h
+++ b/arch/arm/include/asm/pgtable-3level.h
@@ -120,13 +120,16 @@
 /*
  * 2nd stage PTE definitions for LPAE.
  */
-#define L_PTE_S2_MT_UNCACHED	 (_AT(pteval_t, 0x5) << 2) /* MemAttr[3:0] */
-#define L_PTE_S2_MT_WRITETHROUGH (_AT(pteval_t, 0xa) << 2) /* MemAttr[3:0] */
-#define L_PTE_S2_MT_WRITEBACK	 (_AT(pteval_t, 0xf) << 2) /* MemAttr[3:0] */
-#define L_PTE_S2_RDONLY		 (_AT(pteval_t, 1) << 6)   /* HAP[1]   */
-#define L_PTE_S2_RDWR		 (_AT(pteval_t, 3) << 6)   /* HAP[2:1] */
+#define L_PTE_S2_MT_UNCACHED		(_AT(pteval_t, 0x0) << 2) /* strongly ordered */
+#define L_PTE_S2_MT_WRITETHROUGH	(_AT(pteval_t, 0xa) << 2) /* normal inner write-through */
+#define L_PTE_S2_MT_WRITEBACK		(_AT(pteval_t, 0xf) << 2) /* normal inner write-back */
+#define L_PTE_S2_MT_DEV_SHARED		(_AT(pteval_t, 0x1) << 2) /* device */
+#define L_PTE_S2_MT_MASK		(_AT(pteval_t, 0xf) << 2)
 
-#define L_PMD_S2_RDWR		 (_AT(pmdval_t, 3) << 6)   /* HAP[2:1] */
+#define L_PTE_S2_RDONLY			(_AT(pteval_t, 1) << 6)   /* HAP[1]   */
+#define L_PTE_S2_RDWR			(_AT(pteval_t, 3) << 6)   /* HAP[2:1] */
+
+#define L_PMD_S2_RDWR			(_AT(pmdval_t, 3) << 6)   /* HAP[2:1] */
 
 /*
  * Hyp-mode PL2 PTE definitions for LPAE.
diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
index 7d59b52..5478e5d 100644
--- a/arch/arm/include/asm/pgtable.h
+++ b/arch/arm/include/asm/pgtable.h
@@ -216,13 +216,16 @@
 
 #define pte_none(pte)		(!pte_val(pte))
 #define pte_present(pte)	(pte_val(pte) & L_PTE_PRESENT)
+#define pte_valid(pte)		(pte_val(pte) & L_PTE_VALID)
+#define pte_accessible(mm, pte)	(mm_tlb_flush_pending(mm) ? pte_present(pte) : pte_valid(pte))
 #define pte_write(pte)		(!(pte_val(pte) & L_PTE_RDONLY))
 #define pte_dirty(pte)		(pte_val(pte) & L_PTE_DIRTY)
 #define pte_young(pte)		(pte_val(pte) & L_PTE_YOUNG)
 #define pte_exec(pte)		(!(pte_val(pte) & L_PTE_XN))
 #define pte_special(pte)	(0)
 
-#define pte_present_user(pte)  (pte_present(pte) && (pte_val(pte) & L_PTE_USER))
+#define pte_valid_user(pte)	\
+	(pte_valid(pte) && (pte_val(pte) & L_PTE_USER) && pte_young(pte))
 
 #if __LINUX_ARM_ARCH__ < 6
 static inline void __sync_icache_dcache(pte_t pteval)
@@ -237,7 +240,7 @@
 {
 	unsigned long ext = 0;
 
-	if (addr < TASK_SIZE && pte_present_user(pteval)) {
+	if (addr < TASK_SIZE && pte_valid_user(pteval)) {
 		__sync_icache_dcache(pteval);
 		ext |= PTE_EXT_NG;
 	}
diff --git a/arch/arm/include/asm/spinlock.h b/arch/arm/include/asm/spinlock.h
index ef3c607..ac4bfae 100644
--- a/arch/arm/include/asm/spinlock.h
+++ b/arch/arm/include/asm/spinlock.h
@@ -37,18 +37,9 @@
 
 static inline void dsb_sev(void)
 {
-#if __LINUX_ARM_ARCH__ >= 7
-	__asm__ __volatile__ (
-		"dsb ishst\n"
-		SEV
-	);
-#else
-	__asm__ __volatile__ (
-		"mcr p15, 0, %0, c7, c10, 4\n"
-		SEV
-		: : "r" (0)
-	);
-#endif
+
+	dsb(ishst);
+	__asm__(SEV);
 }
 
 /*
diff --git a/arch/arm/include/asm/sync_bitops.h b/arch/arm/include/asm/sync_bitops.h
index 63479ee..9732b8e 100644
--- a/arch/arm/include/asm/sync_bitops.h
+++ b/arch/arm/include/asm/sync_bitops.h
@@ -2,7 +2,6 @@
 #define __ASM_SYNC_BITOPS_H__
 
 #include <asm/bitops.h>
-#include <asm/system.h>
 
 /* sync_bitops functions are equivalent to the SMP implementation of the
  * original functions, independently from CONFIG_SMP being defined.
diff --git a/arch/arm/include/asm/system.h b/arch/arm/include/asm/system.h
deleted file mode 100644
index 368165e..0000000
--- a/arch/arm/include/asm/system.h
+++ /dev/null
@@ -1,7 +0,0 @@
-/* FILE TO BE DELETED. DO NOT ADD STUFF HERE! */
-#include <asm/barrier.h>
-#include <asm/compiler.h>
-#include <asm/cmpxchg.h>
-#include <asm/switch_to.h>
-#include <asm/system_info.h>
-#include <asm/system_misc.h>
diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h
index 72abdc5..12c3a5d 100644
--- a/arch/arm/include/asm/uaccess.h
+++ b/arch/arm/include/asm/uaccess.h
@@ -19,7 +19,7 @@
 #include <asm/unified.h>
 #include <asm/compiler.h>
 
-#if __LINUX_ARM_ARCH__ < 6
+#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
 #include <asm-generic/uaccess-unaligned.h>
 #else
 #define __get_user_unaligned __get_user
diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h
index acabef1..4387624 100644
--- a/arch/arm/include/asm/unistd.h
+++ b/arch/arm/include/asm/unistd.h
@@ -48,6 +48,5 @@
  */
 #define __IGNORE_fadvise64_64
 #define __IGNORE_migrate_pages
-#define __IGNORE_kcmp
 
 #endif /* __ASM_ARM_UNISTD_H */
diff --git a/arch/arm/include/uapi/asm/hwcap.h b/arch/arm/include/uapi/asm/hwcap.h
index 7dcc10d..20d12f2 100644
--- a/arch/arm/include/uapi/asm/hwcap.h
+++ b/arch/arm/include/uapi/asm/hwcap.h
@@ -28,4 +28,13 @@
 #define HWCAP_LPAE	(1 << 20)
 #define HWCAP_EVTSTRM	(1 << 21)
 
+/*
+ * HWCAP2 flags - for elf_hwcap2 (in kernel) and AT_HWCAP2
+ */
+#define HWCAP2_AES	(1 << 0)
+#define HWCAP2_PMULL	(1 << 1)
+#define HWCAP2_SHA1	(1 << 2)
+#define HWCAP2_SHA2	(1 << 3)
+#define HWCAP2_CRC32	(1 << 4)
+
 #endif /* _UAPI__ASMARM_HWCAP_H */
diff --git a/arch/arm/kernel/armksyms.c b/arch/arm/kernel/armksyms.c
index 85e664b..f7b450f 100644
--- a/arch/arm/kernel/armksyms.c
+++ b/arch/arm/kernel/armksyms.c
@@ -158,6 +158,6 @@
 #endif
 
 #ifdef CONFIG_ARM_PATCH_PHYS_VIRT
-EXPORT_SYMBOL(__pv_phys_offset);
+EXPORT_SYMBOL(__pv_phys_pfn_offset);
 EXPORT_SYMBOL(__pv_offset);
 #endif
diff --git a/arch/arm/kernel/bios32.c b/arch/arm/kernel/bios32.c
index 317da88..91f4880 100644
--- a/arch/arm/kernel/bios32.c
+++ b/arch/arm/kernel/bios32.c
@@ -608,41 +608,10 @@
  */
 int pcibios_enable_device(struct pci_dev *dev, int mask)
 {
-	u16 cmd, old_cmd;
-	int idx;
-	struct resource *r;
+	if (pci_has_flag(PCI_PROBE_ONLY))
+		return 0;
 
-	pci_read_config_word(dev, PCI_COMMAND, &cmd);
-	old_cmd = cmd;
-	for (idx = 0; idx < 6; idx++) {
-		/* Only set up the requested stuff */
-		if (!(mask & (1 << idx)))
-			continue;
-
-		r = dev->resource + idx;
-		if (!r->start && r->end) {
-			printk(KERN_ERR "PCI: Device %s not available because"
-			       " of resource collisions\n", pci_name(dev));
-			return -EINVAL;
-		}
-		if (r->flags & IORESOURCE_IO)
-			cmd |= PCI_COMMAND_IO;
-		if (r->flags & IORESOURCE_MEM)
-			cmd |= PCI_COMMAND_MEMORY;
-	}
-
-	/*
-	 * Bridges (eg, cardbus bridges) need to be fully enabled
-	 */
-	if ((dev->class >> 16) == PCI_BASE_CLASS_BRIDGE)
-		cmd |= PCI_COMMAND_IO | PCI_COMMAND_MEMORY;
-
-	if (cmd != old_cmd) {
-		printk("PCI: enabling device %s (%04x -> %04x)\n",
-		       pci_name(dev), old_cmd, cmd);
-		pci_write_config_word(dev, PCI_COMMAND, cmd);
-	}
-	return 0;
+	return pci_enable_resources(dev, mask);
 }
 
 int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
diff --git a/arch/arm/kernel/head-common.S b/arch/arm/kernel/head-common.S
index 47cd974..c96ecac 100644
--- a/arch/arm/kernel/head-common.S
+++ b/arch/arm/kernel/head-common.S
@@ -177,6 +177,18 @@
 	.long	__proc_info_end
 	.size	__lookup_processor_type_data, . - __lookup_processor_type_data
 
+__error_lpae:
+#ifdef CONFIG_DEBUG_LL
+	adr	r0, str_lpae
+	bl 	printascii
+	b	__error
+str_lpae: .asciz "\nError: Kernel with LPAE support, but CPU does not support LPAE.\n"
+#else
+	b	__error
+#endif
+	.align
+ENDPROC(__error_lpae)
+
 __error_p:
 #ifdef CONFIG_DEBUG_LL
 	adr	r0, str_p1
diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
index 914616e..f8c0883 100644
--- a/arch/arm/kernel/head.S
+++ b/arch/arm/kernel/head.S
@@ -102,7 +102,7 @@
 	and	r3, r3, #0xf			@ extract VMSA support
 	cmp	r3, #5				@ long-descriptor translation table format?
  THUMB( it	lo )				@ force fixup-able long branch encoding
-	blo	__error_p			@ only classic page table format
+	blo	__error_lpae			@ only classic page table format
 #endif
 
 #ifndef CONFIG_XIP_KERNEL
@@ -584,9 +584,10 @@
 	subs	r3, r0, r3	@ PHYS_OFFSET - PAGE_OFFSET
 	add	r4, r4, r3	@ adjust table start address
 	add	r5, r5, r3	@ adjust table end address
-	add	r6, r6, r3	@ adjust __pv_phys_offset address
+	add	r6, r6, r3	@ adjust __pv_phys_pfn_offset address
 	add	r7, r7, r3	@ adjust __pv_offset address
-	str	r8, [r6, #LOW_OFFSET]	@ save computed PHYS_OFFSET to __pv_phys_offset
+	mov	r0, r8, lsr #12	@ convert to PFN
+	str	r0, [r6, #LOW_OFFSET]	@ save computed PHYS_OFFSET to __pv_phys_pfn_offset
 	strcc	ip, [r7, #HIGH_OFFSET]	@ save to __pv_offset high bits
 	mov	r6, r3, lsr #24	@ constant for add/sub instructions
 	teq	r3, r6, lsl #24 @ must be 16MiB aligned
@@ -600,7 +601,7 @@
 1:	.long	.
 	.long	__pv_table_begin
 	.long	__pv_table_end
-2:	.long	__pv_phys_offset
+2:	.long	__pv_phys_pfn_offset
 	.long	__pv_offset
 
 	.text
@@ -688,11 +689,11 @@
 ENDPROC(fixup_pv_table)
 
 	.data
-	.globl	__pv_phys_offset
-	.type	__pv_phys_offset, %object
-__pv_phys_offset:
-	.quad	0
-	.size	__pv_phys_offset, . -__pv_phys_offset
+	.globl	__pv_phys_pfn_offset
+	.type	__pv_phys_pfn_offset, %object
+__pv_phys_pfn_offset:
+	.word	0
+	.size	__pv_phys_pfn_offset, . -__pv_phys_pfn_offset
 
 	.globl	__pv_offset
 	.type	__pv_offset, %object
diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c
index 3d44660..9da35c6 100644
--- a/arch/arm/kernel/hw_breakpoint.c
+++ b/arch/arm/kernel/hw_breakpoint.c
@@ -167,7 +167,7 @@
 /* Can we determine the watchpoint access type from the fsr? */
 static int debug_exception_updates_fsr(void)
 {
-	return 0;
+	return get_debug_arch() >= ARM_DEBUG_ARCH_V8;
 }
 
 /* Determine number of WRP registers available. */
@@ -257,6 +257,7 @@
 		break;
 	case ARM_DEBUG_ARCH_V7_ECP14:
 	case ARM_DEBUG_ARCH_V7_1:
+	case ARM_DEBUG_ARCH_V8:
 		ARM_DBG_WRITE(c0, c2, 2, (dscr | ARM_DSCR_MDBGEN));
 		isb();
 		break;
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index b0df976..50e198c 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -100,6 +100,9 @@
 unsigned int elf_hwcap __read_mostly;
 EXPORT_SYMBOL(elf_hwcap);
 
+unsigned int elf_hwcap2 __read_mostly;
+EXPORT_SYMBOL(elf_hwcap2);
+
 
 #ifdef MULTI_CPU
 struct processor processor __read_mostly;
@@ -731,7 +734,7 @@
 	kernel_data.end     = virt_to_phys(_end - 1);
 
 	for_each_memblock(memory, region) {
-		res = memblock_virt_alloc_low(sizeof(*res), 0);
+		res = memblock_virt_alloc(sizeof(*res), 0);
 		res->name  = "System RAM";
 		res->start = __pfn_to_phys(memblock_region_memory_base_pfn(region));
 		res->end = __pfn_to_phys(memblock_region_memory_end_pfn(region)) - 1;
@@ -1005,6 +1008,15 @@
 	NULL
 };
 
+static const char *hwcap2_str[] = {
+	"aes",
+	"pmull",
+	"sha1",
+	"sha2",
+	"crc32",
+	NULL
+};
+
 static int c_show(struct seq_file *m, void *v)
 {
 	int i, j;
@@ -1028,6 +1040,10 @@
 			if (elf_hwcap & (1 << j))
 				seq_printf(m, "%s ", hwcap_str[j]);
 
+		for (j = 0; hwcap2_str[j]; j++)
+			if (elf_hwcap2 & (1 << j))
+				seq_printf(m, "%s ", hwcap2_str[j]);
+
 		seq_printf(m, "\nCPU implementer\t: 0x%02x\n", cpuid >> 24);
 		seq_printf(m, "CPU architecture: %s\n",
 			   proc_arch[cpu_architecture()]);
diff --git a/arch/arm/kernel/unwind.c b/arch/arm/kernel/unwind.c
index 00df012..3c217694 100644
--- a/arch/arm/kernel/unwind.c
+++ b/arch/arm/kernel/unwind.c
@@ -68,6 +68,12 @@
 struct unwind_ctrl_block {
 	unsigned long vrs[16];		/* virtual register set */
 	const unsigned long *insn;	/* pointer to the current instructions word */
+	unsigned long sp_high;		/* highest value of sp allowed */
+	/*
+	 * 1 : check for stack overflow for each register pop.
+	 * 0 : save overhead if there is plenty of stack remaining.
+	 */
+	int check_each_pop;
 	int entries;			/* number of entries left to interpret */
 	int byte;			/* current byte number in the instructions word */
 };
@@ -235,12 +241,85 @@
 	return ret;
 }
 
+/* Before poping a register check whether it is feasible or not */
+static int unwind_pop_register(struct unwind_ctrl_block *ctrl,
+				unsigned long **vsp, unsigned int reg)
+{
+	if (unlikely(ctrl->check_each_pop))
+		if (*vsp >= (unsigned long *)ctrl->sp_high)
+			return -URC_FAILURE;
+
+	ctrl->vrs[reg] = *(*vsp)++;
+	return URC_OK;
+}
+
+/* Helper functions to execute the instructions */
+static int unwind_exec_pop_subset_r4_to_r13(struct unwind_ctrl_block *ctrl,
+						unsigned long mask)
+{
+	unsigned long *vsp = (unsigned long *)ctrl->vrs[SP];
+	int load_sp, reg = 4;
+
+	load_sp = mask & (1 << (13 - 4));
+	while (mask) {
+		if (mask & 1)
+			if (unwind_pop_register(ctrl, &vsp, reg))
+				return -URC_FAILURE;
+		mask >>= 1;
+		reg++;
+	}
+	if (!load_sp)
+		ctrl->vrs[SP] = (unsigned long)vsp;
+
+	return URC_OK;
+}
+
+static int unwind_exec_pop_r4_to_rN(struct unwind_ctrl_block *ctrl,
+					unsigned long insn)
+{
+	unsigned long *vsp = (unsigned long *)ctrl->vrs[SP];
+	int reg;
+
+	/* pop R4-R[4+bbb] */
+	for (reg = 4; reg <= 4 + (insn & 7); reg++)
+		if (unwind_pop_register(ctrl, &vsp, reg))
+				return -URC_FAILURE;
+
+	if (insn & 0x80)
+		if (unwind_pop_register(ctrl, &vsp, 14))
+				return -URC_FAILURE;
+
+	ctrl->vrs[SP] = (unsigned long)vsp;
+
+	return URC_OK;
+}
+
+static int unwind_exec_pop_subset_r0_to_r3(struct unwind_ctrl_block *ctrl,
+						unsigned long mask)
+{
+	unsigned long *vsp = (unsigned long *)ctrl->vrs[SP];
+	int reg = 0;
+
+	/* pop R0-R3 according to mask */
+	while (mask) {
+		if (mask & 1)
+			if (unwind_pop_register(ctrl, &vsp, reg))
+				return -URC_FAILURE;
+		mask >>= 1;
+		reg++;
+	}
+	ctrl->vrs[SP] = (unsigned long)vsp;
+
+	return URC_OK;
+}
+
 /*
  * Execute the current unwind instruction.
  */
 static int unwind_exec_insn(struct unwind_ctrl_block *ctrl)
 {
 	unsigned long insn = unwind_get_byte(ctrl);
+	int ret = URC_OK;
 
 	pr_debug("%s: insn = %08lx\n", __func__, insn);
 
@@ -250,8 +329,6 @@
 		ctrl->vrs[SP] -= ((insn & 0x3f) << 2) + 4;
 	else if ((insn & 0xf0) == 0x80) {
 		unsigned long mask;
-		unsigned long *vsp = (unsigned long *)ctrl->vrs[SP];
-		int load_sp, reg = 4;
 
 		insn = (insn << 8) | unwind_get_byte(ctrl);
 		mask = insn & 0x0fff;
@@ -261,29 +338,16 @@
 			return -URC_FAILURE;
 		}
 
-		/* pop R4-R15 according to mask */
-		load_sp = mask & (1 << (13 - 4));
-		while (mask) {
-			if (mask & 1)
-				ctrl->vrs[reg] = *vsp++;
-			mask >>= 1;
-			reg++;
-		}
-		if (!load_sp)
-			ctrl->vrs[SP] = (unsigned long)vsp;
+		ret = unwind_exec_pop_subset_r4_to_r13(ctrl, mask);
+		if (ret)
+			goto error;
 	} else if ((insn & 0xf0) == 0x90 &&
 		   (insn & 0x0d) != 0x0d)
 		ctrl->vrs[SP] = ctrl->vrs[insn & 0x0f];
 	else if ((insn & 0xf0) == 0xa0) {
-		unsigned long *vsp = (unsigned long *)ctrl->vrs[SP];
-		int reg;
-
-		/* pop R4-R[4+bbb] */
-		for (reg = 4; reg <= 4 + (insn & 7); reg++)
-			ctrl->vrs[reg] = *vsp++;
-		if (insn & 0x80)
-			ctrl->vrs[14] = *vsp++;
-		ctrl->vrs[SP] = (unsigned long)vsp;
+		ret = unwind_exec_pop_r4_to_rN(ctrl, insn);
+		if (ret)
+			goto error;
 	} else if (insn == 0xb0) {
 		if (ctrl->vrs[PC] == 0)
 			ctrl->vrs[PC] = ctrl->vrs[LR];
@@ -291,8 +355,6 @@
 		ctrl->entries = 0;
 	} else if (insn == 0xb1) {
 		unsigned long mask = unwind_get_byte(ctrl);
-		unsigned long *vsp = (unsigned long *)ctrl->vrs[SP];
-		int reg = 0;
 
 		if (mask == 0 || mask & 0xf0) {
 			pr_warning("unwind: Spare encoding %04lx\n",
@@ -300,14 +362,9 @@
 			return -URC_FAILURE;
 		}
 
-		/* pop R0-R3 according to mask */
-		while (mask) {
-			if (mask & 1)
-				ctrl->vrs[reg] = *vsp++;
-			mask >>= 1;
-			reg++;
-		}
-		ctrl->vrs[SP] = (unsigned long)vsp;
+		ret = unwind_exec_pop_subset_r0_to_r3(ctrl, mask);
+		if (ret)
+			goto error;
 	} else if (insn == 0xb2) {
 		unsigned long uleb128 = unwind_get_byte(ctrl);
 
@@ -320,7 +377,8 @@
 	pr_debug("%s: fp = %08lx sp = %08lx lr = %08lx pc = %08lx\n", __func__,
 		 ctrl->vrs[FP], ctrl->vrs[SP], ctrl->vrs[LR], ctrl->vrs[PC]);
 
-	return URC_OK;
+error:
+	return ret;
 }
 
 /*
@@ -329,13 +387,13 @@
  */
 int unwind_frame(struct stackframe *frame)
 {
-	unsigned long high, low;
+	unsigned long low;
 	const struct unwind_idx *idx;
 	struct unwind_ctrl_block ctrl;
 
-	/* only go to a higher address on the stack */
+	/* store the highest address on the stack to avoid crossing it*/
 	low = frame->sp;
-	high = ALIGN(low, THREAD_SIZE);
+	ctrl.sp_high = ALIGN(low, THREAD_SIZE);
 
 	pr_debug("%s(pc = %08lx lr = %08lx sp = %08lx)\n", __func__,
 		 frame->pc, frame->lr, frame->sp);
@@ -382,11 +440,16 @@
 		return -URC_FAILURE;
 	}
 
+	ctrl.check_each_pop = 0;
+
 	while (ctrl.entries > 0) {
-		int urc = unwind_exec_insn(&ctrl);
+		int urc;
+		if ((ctrl.sp_high - ctrl.vrs[SP]) < sizeof(ctrl.vrs))
+			ctrl.check_each_pop = 1;
+		urc = unwind_exec_insn(&ctrl);
 		if (urc < 0)
 			return urc;
-		if (ctrl.vrs[SP] < low || ctrl.vrs[SP] >= high)
+		if (ctrl.vrs[SP] < low || ctrl.vrs[SP] >= ctrl.sp_high)
 			return -URC_FAILURE;
 	}
 
diff --git a/arch/arm/lib/bitops.h b/arch/arm/lib/bitops.h
index 52886b8..9f12ed1 100644
--- a/arch/arm/lib/bitops.h
+++ b/arch/arm/lib/bitops.h
@@ -37,6 +37,11 @@
 	add	r1, r1, r0, lsl #2	@ Get word offset
 	mov	r3, r2, lsl r3		@ create mask
 	smp_dmb
+#if __LINUX_ARM_ARCH__ >= 7 && defined(CONFIG_SMP)
+	.arch_extension	mp
+	ALT_SMP(W(pldw)	[r1])
+	ALT_UP(W(nop))
+#endif
 1:	ldrex	r2, [r1]
 	ands	r0, r2, r3		@ save old value of bit
 	\instr	r2, r2, r3		@ toggle bit
diff --git a/arch/arm/lib/copy_template.S b/arch/arm/lib/copy_template.S
index 805e3f8..3bc8eb8 100644
--- a/arch/arm/lib/copy_template.S
+++ b/arch/arm/lib/copy_template.S
@@ -197,24 +197,24 @@
 
 12:	PLD(	pld	[r1, #124]		)
 13:		ldr4w	r1, r4, r5, r6, r7, abort=19f
-		mov	r3, lr, pull #\pull
+		mov	r3, lr, lspull #\pull
 		subs	r2, r2, #32
 		ldr4w	r1, r8, r9, ip, lr, abort=19f
-		orr	r3, r3, r4, push #\push
-		mov	r4, r4, pull #\pull
-		orr	r4, r4, r5, push #\push
-		mov	r5, r5, pull #\pull
-		orr	r5, r5, r6, push #\push
-		mov	r6, r6, pull #\pull
-		orr	r6, r6, r7, push #\push
-		mov	r7, r7, pull #\pull
-		orr	r7, r7, r8, push #\push
-		mov	r8, r8, pull #\pull
-		orr	r8, r8, r9, push #\push
-		mov	r9, r9, pull #\pull
-		orr	r9, r9, ip, push #\push
-		mov	ip, ip, pull #\pull
-		orr	ip, ip, lr, push #\push
+		orr	r3, r3, r4, lspush #\push
+		mov	r4, r4, lspull #\pull
+		orr	r4, r4, r5, lspush #\push
+		mov	r5, r5, lspull #\pull
+		orr	r5, r5, r6, lspush #\push
+		mov	r6, r6, lspull #\pull
+		orr	r6, r6, r7, lspush #\push
+		mov	r7, r7, lspull #\pull
+		orr	r7, r7, r8, lspush #\push
+		mov	r8, r8, lspull #\pull
+		orr	r8, r8, r9, lspush #\push
+		mov	r9, r9, lspull #\pull
+		orr	r9, r9, ip, lspush #\push
+		mov	ip, ip, lspull #\pull
+		orr	ip, ip, lr, lspush #\push
 		str8w	r0, r3, r4, r5, r6, r7, r8, r9, ip, , abort=19f
 		bge	12b
 	PLD(	cmn	r2, #96			)
@@ -225,10 +225,10 @@
 14:		ands	ip, r2, #28
 		beq	16f
 
-15:		mov	r3, lr, pull #\pull
+15:		mov	r3, lr, lspull #\pull
 		ldr1w	r1, lr, abort=21f
 		subs	ip, ip, #4
-		orr	r3, r3, lr, push #\push
+		orr	r3, r3, lr, lspush #\push
 		str1w	r0, r3, abort=21f
 		bgt	15b
 	CALGN(	cmp	r2, #0			)
diff --git a/arch/arm/lib/csumpartialcopygeneric.S b/arch/arm/lib/csumpartialcopygeneric.S
index d620a5f..d6e742d 100644
--- a/arch/arm/lib/csumpartialcopygeneric.S
+++ b/arch/arm/lib/csumpartialcopygeneric.S
@@ -141,7 +141,7 @@
 		tst	len, #2
 		mov	r5, r4, get_byte_0
 		beq	.Lexit
-		adcs	sum, sum, r4, push #16
+		adcs	sum, sum, r4, lspush #16
 		strb	r5, [dst], #1
 		mov	r5, r4, get_byte_1
 		strb	r5, [dst], #1
@@ -171,23 +171,23 @@
 		cmp	ip, #2
 		beq	.Lsrc2_aligned
 		bhi	.Lsrc3_aligned
-		mov	r4, r5, pull #8		@ C = 0
+		mov	r4, r5, lspull #8		@ C = 0
 		bics	ip, len, #15
 		beq	2f
 1:		load4l	r5, r6, r7, r8
-		orr	r4, r4, r5, push #24
-		mov	r5, r5, pull #8
-		orr	r5, r5, r6, push #24
-		mov	r6, r6, pull #8
-		orr	r6, r6, r7, push #24
-		mov	r7, r7, pull #8
-		orr	r7, r7, r8, push #24
+		orr	r4, r4, r5, lspush #24
+		mov	r5, r5, lspull #8
+		orr	r5, r5, r6, lspush #24
+		mov	r6, r6, lspull #8
+		orr	r6, r6, r7, lspush #24
+		mov	r7, r7, lspull #8
+		orr	r7, r7, r8, lspush #24
 		stmia	dst!, {r4, r5, r6, r7}
 		adcs	sum, sum, r4
 		adcs	sum, sum, r5
 		adcs	sum, sum, r6
 		adcs	sum, sum, r7
-		mov	r4, r8, pull #8
+		mov	r4, r8, lspull #8
 		sub	ip, ip, #16
 		teq	ip, #0
 		bne	1b
@@ -196,50 +196,50 @@
 		tst	ip, #8
 		beq	3f
 		load2l	r5, r6
-		orr	r4, r4, r5, push #24
-		mov	r5, r5, pull #8
-		orr	r5, r5, r6, push #24
+		orr	r4, r4, r5, lspush #24
+		mov	r5, r5, lspull #8
+		orr	r5, r5, r6, lspush #24
 		stmia	dst!, {r4, r5}
 		adcs	sum, sum, r4
 		adcs	sum, sum, r5
-		mov	r4, r6, pull #8
+		mov	r4, r6, lspull #8
 		tst	ip, #4
 		beq	4f
 3:		load1l	r5
-		orr	r4, r4, r5, push #24
+		orr	r4, r4, r5, lspush #24
 		str	r4, [dst], #4
 		adcs	sum, sum, r4
-		mov	r4, r5, pull #8
+		mov	r4, r5, lspull #8
 4:		ands	len, len, #3
 		beq	.Ldone
 		mov	r5, r4, get_byte_0
 		tst	len, #2
 		beq	.Lexit
-		adcs	sum, sum, r4, push #16
+		adcs	sum, sum, r4, lspush #16
 		strb	r5, [dst], #1
 		mov	r5, r4, get_byte_1
 		strb	r5, [dst], #1
 		mov	r5, r4, get_byte_2
 		b	.Lexit
 
-.Lsrc2_aligned:	mov	r4, r5, pull #16
+.Lsrc2_aligned:	mov	r4, r5, lspull #16
 		adds	sum, sum, #0
 		bics	ip, len, #15
 		beq	2f
 1:		load4l	r5, r6, r7, r8
-		orr	r4, r4, r5, push #16
-		mov	r5, r5, pull #16
-		orr	r5, r5, r6, push #16
-		mov	r6, r6, pull #16
-		orr	r6, r6, r7, push #16
-		mov	r7, r7, pull #16
-		orr	r7, r7, r8, push #16
+		orr	r4, r4, r5, lspush #16
+		mov	r5, r5, lspull #16
+		orr	r5, r5, r6, lspush #16
+		mov	r6, r6, lspull #16
+		orr	r6, r6, r7, lspush #16
+		mov	r7, r7, lspull #16
+		orr	r7, r7, r8, lspush #16
 		stmia	dst!, {r4, r5, r6, r7}
 		adcs	sum, sum, r4
 		adcs	sum, sum, r5
 		adcs	sum, sum, r6
 		adcs	sum, sum, r7
-		mov	r4, r8, pull #16
+		mov	r4, r8, lspull #16
 		sub	ip, ip, #16
 		teq	ip, #0
 		bne	1b
@@ -248,20 +248,20 @@
 		tst	ip, #8
 		beq	3f
 		load2l	r5, r6
-		orr	r4, r4, r5, push #16
-		mov	r5, r5, pull #16
-		orr	r5, r5, r6, push #16
+		orr	r4, r4, r5, lspush #16
+		mov	r5, r5, lspull #16
+		orr	r5, r5, r6, lspush #16
 		stmia	dst!, {r4, r5}
 		adcs	sum, sum, r4
 		adcs	sum, sum, r5
-		mov	r4, r6, pull #16
+		mov	r4, r6, lspull #16
 		tst	ip, #4
 		beq	4f
 3:		load1l	r5
-		orr	r4, r4, r5, push #16
+		orr	r4, r4, r5, lspush #16
 		str	r4, [dst], #4
 		adcs	sum, sum, r4
-		mov	r4, r5, pull #16
+		mov	r4, r5, lspull #16
 4:		ands	len, len, #3
 		beq	.Ldone
 		mov	r5, r4, get_byte_0
@@ -276,24 +276,24 @@
 		load1b	r5
 		b	.Lexit
 
-.Lsrc3_aligned:	mov	r4, r5, pull #24
+.Lsrc3_aligned:	mov	r4, r5, lspull #24
 		adds	sum, sum, #0
 		bics	ip, len, #15
 		beq	2f
 1:		load4l	r5, r6, r7, r8
-		orr	r4, r4, r5, push #8
-		mov	r5, r5, pull #24
-		orr	r5, r5, r6, push #8
-		mov	r6, r6, pull #24
-		orr	r6, r6, r7, push #8
-		mov	r7, r7, pull #24
-		orr	r7, r7, r8, push #8
+		orr	r4, r4, r5, lspush #8
+		mov	r5, r5, lspull #24
+		orr	r5, r5, r6, lspush #8
+		mov	r6, r6, lspull #24
+		orr	r6, r6, r7, lspush #8
+		mov	r7, r7, lspull #24
+		orr	r7, r7, r8, lspush #8
 		stmia	dst!, {r4, r5, r6, r7}
 		adcs	sum, sum, r4
 		adcs	sum, sum, r5
 		adcs	sum, sum, r6
 		adcs	sum, sum, r7
-		mov	r4, r8, pull #24
+		mov	r4, r8, lspull #24
 		sub	ip, ip, #16
 		teq	ip, #0
 		bne	1b
@@ -302,20 +302,20 @@
 		tst	ip, #8
 		beq	3f
 		load2l	r5, r6
-		orr	r4, r4, r5, push #8
-		mov	r5, r5, pull #24
-		orr	r5, r5, r6, push #8
+		orr	r4, r4, r5, lspush #8
+		mov	r5, r5, lspull #24
+		orr	r5, r5, r6, lspush #8
 		stmia	dst!, {r4, r5}
 		adcs	sum, sum, r4
 		adcs	sum, sum, r5
-		mov	r4, r6, pull #24
+		mov	r4, r6, lspull #24
 		tst	ip, #4
 		beq	4f
 3:		load1l	r5
-		orr	r4, r4, r5, push #8
+		orr	r4, r4, r5, lspush #8
 		str	r4, [dst], #4
 		adcs	sum, sum, r4
-		mov	r4, r5, pull #24
+		mov	r4, r5, lspull #24
 4:		ands	len, len, #3
 		beq	.Ldone
 		mov	r5, r4, get_byte_0
@@ -326,7 +326,7 @@
 		load1l	r4
 		mov	r5, r4, get_byte_0
 		strb	r5, [dst], #1
-		adcs	sum, sum, r4, push #24
+		adcs	sum, sum, r4, lspush #24
 		mov	r5, r4, get_byte_1
 		b	.Lexit
 FN_EXIT
diff --git a/arch/arm/lib/io-readsl.S b/arch/arm/lib/io-readsl.S
index 5fb97e7..7a74309 100644
--- a/arch/arm/lib/io-readsl.S
+++ b/arch/arm/lib/io-readsl.S
@@ -47,25 +47,25 @@
 		strb	ip, [r1], #1
 
 4:		subs	r2, r2, #1
-		mov	ip, r3, pull #24
+		mov	ip, r3, lspull #24
 		ldrne	r3, [r0]
-		orrne	ip, ip, r3, push #8
+		orrne	ip, ip, r3, lspush #8
 		strne	ip, [r1], #4
 		bne	4b
 		b	8f
 
 5:		subs	r2, r2, #1
-		mov	ip, r3, pull #16
+		mov	ip, r3, lspull #16
 		ldrne	r3, [r0]
-		orrne	ip, ip, r3, push #16
+		orrne	ip, ip, r3, lspush #16
 		strne	ip, [r1], #4
 		bne	5b
 		b	7f
 
 6:		subs	r2, r2, #1
-		mov	ip, r3, pull #8
+		mov	ip, r3, lspull #8
 		ldrne	r3, [r0]
-		orrne	ip, ip, r3, push #24
+		orrne	ip, ip, r3, lspush #24
 		strne	ip, [r1], #4
 		bne	6b
 
diff --git a/arch/arm/lib/io-writesl.S b/arch/arm/lib/io-writesl.S
index 8d3b781..d0d104a 100644
--- a/arch/arm/lib/io-writesl.S
+++ b/arch/arm/lib/io-writesl.S
@@ -41,26 +41,26 @@
 		blt	5f
 		bgt	6f
 
-4:		mov	ip, r3, pull #16
+4:		mov	ip, r3, lspull #16
 		ldr	r3, [r1], #4
 		subs	r2, r2, #1
-		orr	ip, ip, r3, push #16
+		orr	ip, ip, r3, lspush #16
 		str	ip, [r0]
 		bne	4b
 		mov	pc, lr
 
-5:		mov	ip, r3, pull #8
+5:		mov	ip, r3, lspull #8
 		ldr	r3, [r1], #4
 		subs	r2, r2, #1
-		orr	ip, ip, r3, push #24
+		orr	ip, ip, r3, lspush #24
 		str	ip, [r0]
 		bne	5b
 		mov	pc, lr
 
-6:		mov	ip, r3, pull #24
+6:		mov	ip, r3, lspull #24
 		ldr	r3, [r1], #4
 		subs	r2, r2, #1
-		orr	ip, ip, r3, push #8
+		orr	ip, ip, r3, lspush #8
 		str	ip, [r0]
 		bne	6b
 		mov	pc, lr
diff --git a/arch/arm/lib/memmove.S b/arch/arm/lib/memmove.S
index 938fc14..d1fc0c0 100644
--- a/arch/arm/lib/memmove.S
+++ b/arch/arm/lib/memmove.S
@@ -147,24 +147,24 @@
 
 12:	PLD(	pld	[r1, #-128]		)
 13:		ldmdb   r1!, {r7, r8, r9, ip}
-		mov     lr, r3, push #\push
+		mov     lr, r3, lspush #\push
 		subs    r2, r2, #32
 		ldmdb   r1!, {r3, r4, r5, r6}
-		orr     lr, lr, ip, pull #\pull
-		mov     ip, ip, push #\push
-		orr     ip, ip, r9, pull #\pull
-		mov     r9, r9, push #\push
-		orr     r9, r9, r8, pull #\pull
-		mov     r8, r8, push #\push
-		orr     r8, r8, r7, pull #\pull
-		mov     r7, r7, push #\push
-		orr     r7, r7, r6, pull #\pull
-		mov     r6, r6, push #\push
-		orr     r6, r6, r5, pull #\pull
-		mov     r5, r5, push #\push
-		orr     r5, r5, r4, pull #\pull
-		mov     r4, r4, push #\push
-		orr     r4, r4, r3, pull #\pull
+		orr     lr, lr, ip, lspull #\pull
+		mov     ip, ip, lspush #\push
+		orr     ip, ip, r9, lspull #\pull
+		mov     r9, r9, lspush #\push
+		orr     r9, r9, r8, lspull #\pull
+		mov     r8, r8, lspush #\push
+		orr     r8, r8, r7, lspull #\pull
+		mov     r7, r7, lspush #\push
+		orr     r7, r7, r6, lspull #\pull
+		mov     r6, r6, lspush #\push
+		orr     r6, r6, r5, lspull #\pull
+		mov     r5, r5, lspush #\push
+		orr     r5, r5, r4, lspull #\pull
+		mov     r4, r4, lspush #\push
+		orr     r4, r4, r3, lspull #\pull
 		stmdb   r0!, {r4 - r9, ip, lr}
 		bge	12b
 	PLD(	cmn	r2, #96			)
@@ -175,10 +175,10 @@
 14:		ands	ip, r2, #28
 		beq	16f
 
-15:		mov     lr, r3, push #\push
+15:		mov     lr, r3, lspush #\push
 		ldr	r3, [r1, #-4]!
 		subs	ip, ip, #4
-		orr	lr, lr, r3, pull #\pull
+		orr	lr, lr, r3, lspull #\pull
 		str	lr, [r0, #-4]!
 		bgt	15b
 	CALGN(	cmp	r2, #0			)
diff --git a/arch/arm/lib/uaccess.S b/arch/arm/lib/uaccess.S
index 5c908b1..e505209 100644
--- a/arch/arm/lib/uaccess.S
+++ b/arch/arm/lib/uaccess.S
@@ -117,9 +117,9 @@
 .Lc2u_1fupi:	subs	r2, r2, #4
 		addmi	ip, r2, #4
 		bmi	.Lc2u_1nowords
-		mov	r3, r7, pull #8
+		mov	r3, r7, lspull #8
 		ldr	r7, [r1], #4
-		orr	r3, r3, r7, push #24
+		orr	r3, r3, r7, lspush #24
 USER(	TUSER(	str)	r3, [r0], #4)			@ May fault
 		mov	ip, r0, lsl #32 - PAGE_SHIFT
 		rsb	ip, ip, #0
@@ -131,30 +131,30 @@
 		subs	ip, ip, #16
 		blt	.Lc2u_1rem8lp
 
-.Lc2u_1cpy8lp:	mov	r3, r7, pull #8
+.Lc2u_1cpy8lp:	mov	r3, r7, lspull #8
 		ldmia	r1!, {r4 - r7}
 		subs	ip, ip, #16
-		orr	r3, r3, r4, push #24
-		mov	r4, r4, pull #8
-		orr	r4, r4, r5, push #24
-		mov	r5, r5, pull #8
-		orr	r5, r5, r6, push #24
-		mov	r6, r6, pull #8
-		orr	r6, r6, r7, push #24
+		orr	r3, r3, r4, lspush #24
+		mov	r4, r4, lspull #8
+		orr	r4, r4, r5, lspush #24
+		mov	r5, r5, lspull #8
+		orr	r5, r5, r6, lspush #24
+		mov	r6, r6, lspull #8
+		orr	r6, r6, r7, lspush #24
 		stmia	r0!, {r3 - r6}			@ Shouldnt fault
 		bpl	.Lc2u_1cpy8lp
 
 .Lc2u_1rem8lp:	tst	ip, #8
-		movne	r3, r7, pull #8
+		movne	r3, r7, lspull #8
 		ldmneia	r1!, {r4, r7}
-		orrne	r3, r3, r4, push #24
-		movne	r4, r4, pull #8
-		orrne	r4, r4, r7, push #24
+		orrne	r3, r3, r4, lspush #24
+		movne	r4, r4, lspull #8
+		orrne	r4, r4, r7, lspush #24
 		stmneia	r0!, {r3 - r4}			@ Shouldnt fault
 		tst	ip, #4
-		movne	r3, r7, pull #8
+		movne	r3, r7, lspull #8
 		ldrne	r7, [r1], #4
-		orrne	r3, r3, r7, push #24
+		orrne	r3, r3, r7, lspush #24
 	TUSER(	strne) r3, [r0], #4			@ Shouldnt fault
 		ands	ip, ip, #3
 		beq	.Lc2u_1fupi
@@ -172,9 +172,9 @@
 .Lc2u_2fupi:	subs	r2, r2, #4
 		addmi	ip, r2, #4
 		bmi	.Lc2u_2nowords
-		mov	r3, r7, pull #16
+		mov	r3, r7, lspull #16
 		ldr	r7, [r1], #4
-		orr	r3, r3, r7, push #16
+		orr	r3, r3, r7, lspush #16
 USER(	TUSER(	str)	r3, [r0], #4)			@ May fault
 		mov	ip, r0, lsl #32 - PAGE_SHIFT
 		rsb	ip, ip, #0
@@ -186,30 +186,30 @@
 		subs	ip, ip, #16
 		blt	.Lc2u_2rem8lp
 
-.Lc2u_2cpy8lp:	mov	r3, r7, pull #16
+.Lc2u_2cpy8lp:	mov	r3, r7, lspull #16
 		ldmia	r1!, {r4 - r7}
 		subs	ip, ip, #16
-		orr	r3, r3, r4, push #16
-		mov	r4, r4, pull #16
-		orr	r4, r4, r5, push #16
-		mov	r5, r5, pull #16
-		orr	r5, r5, r6, push #16
-		mov	r6, r6, pull #16
-		orr	r6, r6, r7, push #16
+		orr	r3, r3, r4, lspush #16
+		mov	r4, r4, lspull #16
+		orr	r4, r4, r5, lspush #16
+		mov	r5, r5, lspull #16
+		orr	r5, r5, r6, lspush #16
+		mov	r6, r6, lspull #16
+		orr	r6, r6, r7, lspush #16
 		stmia	r0!, {r3 - r6}			@ Shouldnt fault
 		bpl	.Lc2u_2cpy8lp
 
 .Lc2u_2rem8lp:	tst	ip, #8
-		movne	r3, r7, pull #16
+		movne	r3, r7, lspull #16
 		ldmneia	r1!, {r4, r7}
-		orrne	r3, r3, r4, push #16
-		movne	r4, r4, pull #16
-		orrne	r4, r4, r7, push #16
+		orrne	r3, r3, r4, lspush #16
+		movne	r4, r4, lspull #16
+		orrne	r4, r4, r7, lspush #16
 		stmneia	r0!, {r3 - r4}			@ Shouldnt fault
 		tst	ip, #4
-		movne	r3, r7, pull #16
+		movne	r3, r7, lspull #16
 		ldrne	r7, [r1], #4
-		orrne	r3, r3, r7, push #16
+		orrne	r3, r3, r7, lspush #16
 	TUSER(	strne) r3, [r0], #4			@ Shouldnt fault
 		ands	ip, ip, #3
 		beq	.Lc2u_2fupi
@@ -227,9 +227,9 @@
 .Lc2u_3fupi:	subs	r2, r2, #4
 		addmi	ip, r2, #4
 		bmi	.Lc2u_3nowords
-		mov	r3, r7, pull #24
+		mov	r3, r7, lspull #24
 		ldr	r7, [r1], #4
-		orr	r3, r3, r7, push #8
+		orr	r3, r3, r7, lspush #8
 USER(	TUSER(	str)	r3, [r0], #4)			@ May fault
 		mov	ip, r0, lsl #32 - PAGE_SHIFT
 		rsb	ip, ip, #0
@@ -241,30 +241,30 @@
 		subs	ip, ip, #16
 		blt	.Lc2u_3rem8lp
 
-.Lc2u_3cpy8lp:	mov	r3, r7, pull #24
+.Lc2u_3cpy8lp:	mov	r3, r7, lspull #24
 		ldmia	r1!, {r4 - r7}
 		subs	ip, ip, #16
-		orr	r3, r3, r4, push #8
-		mov	r4, r4, pull #24
-		orr	r4, r4, r5, push #8
-		mov	r5, r5, pull #24
-		orr	r5, r5, r6, push #8
-		mov	r6, r6, pull #24
-		orr	r6, r6, r7, push #8
+		orr	r3, r3, r4, lspush #8
+		mov	r4, r4, lspull #24
+		orr	r4, r4, r5, lspush #8
+		mov	r5, r5, lspull #24
+		orr	r5, r5, r6, lspush #8
+		mov	r6, r6, lspull #24
+		orr	r6, r6, r7, lspush #8
 		stmia	r0!, {r3 - r6}			@ Shouldnt fault
 		bpl	.Lc2u_3cpy8lp
 
 .Lc2u_3rem8lp:	tst	ip, #8
-		movne	r3, r7, pull #24
+		movne	r3, r7, lspull #24
 		ldmneia	r1!, {r4, r7}
-		orrne	r3, r3, r4, push #8
-		movne	r4, r4, pull #24
-		orrne	r4, r4, r7, push #8
+		orrne	r3, r3, r4, lspush #8
+		movne	r4, r4, lspull #24
+		orrne	r4, r4, r7, lspush #8
 		stmneia	r0!, {r3 - r4}			@ Shouldnt fault
 		tst	ip, #4
-		movne	r3, r7, pull #24
+		movne	r3, r7, lspull #24
 		ldrne	r7, [r1], #4
-		orrne	r3, r3, r7, push #8
+		orrne	r3, r3, r7, lspush #8
 	TUSER(	strne) r3, [r0], #4			@ Shouldnt fault
 		ands	ip, ip, #3
 		beq	.Lc2u_3fupi
@@ -382,9 +382,9 @@
 .Lcfu_1fupi:	subs	r2, r2, #4
 		addmi	ip, r2, #4
 		bmi	.Lcfu_1nowords
-		mov	r3, r7, pull #8
+		mov	r3, r7, lspull #8
 USER(	TUSER(	ldr)	r7, [r1], #4)			@ May fault
-		orr	r3, r3, r7, push #24
+		orr	r3, r3, r7, lspush #24
 		str	r3, [r0], #4
 		mov	ip, r1, lsl #32 - PAGE_SHIFT
 		rsb	ip, ip, #0
@@ -396,30 +396,30 @@
 		subs	ip, ip, #16
 		blt	.Lcfu_1rem8lp
 
-.Lcfu_1cpy8lp:	mov	r3, r7, pull #8
+.Lcfu_1cpy8lp:	mov	r3, r7, lspull #8
 		ldmia	r1!, {r4 - r7}			@ Shouldnt fault
 		subs	ip, ip, #16
-		orr	r3, r3, r4, push #24
-		mov	r4, r4, pull #8
-		orr	r4, r4, r5, push #24
-		mov	r5, r5, pull #8
-		orr	r5, r5, r6, push #24
-		mov	r6, r6, pull #8
-		orr	r6, r6, r7, push #24
+		orr	r3, r3, r4, lspush #24
+		mov	r4, r4, lspull #8
+		orr	r4, r4, r5, lspush #24
+		mov	r5, r5, lspull #8
+		orr	r5, r5, r6, lspush #24
+		mov	r6, r6, lspull #8
+		orr	r6, r6, r7, lspush #24
 		stmia	r0!, {r3 - r6}
 		bpl	.Lcfu_1cpy8lp
 
 .Lcfu_1rem8lp:	tst	ip, #8
-		movne	r3, r7, pull #8
+		movne	r3, r7, lspull #8
 		ldmneia	r1!, {r4, r7}			@ Shouldnt fault
-		orrne	r3, r3, r4, push #24
-		movne	r4, r4, pull #8
-		orrne	r4, r4, r7, push #24
+		orrne	r3, r3, r4, lspush #24
+		movne	r4, r4, lspull #8
+		orrne	r4, r4, r7, lspush #24
 		stmneia	r0!, {r3 - r4}
 		tst	ip, #4
-		movne	r3, r7, pull #8
+		movne	r3, r7, lspull #8
 USER(	TUSER(	ldrne) r7, [r1], #4)			@ May fault
-		orrne	r3, r3, r7, push #24
+		orrne	r3, r3, r7, lspush #24
 		strne	r3, [r0], #4
 		ands	ip, ip, #3
 		beq	.Lcfu_1fupi
@@ -437,9 +437,9 @@
 .Lcfu_2fupi:	subs	r2, r2, #4
 		addmi	ip, r2, #4
 		bmi	.Lcfu_2nowords
-		mov	r3, r7, pull #16
+		mov	r3, r7, lspull #16
 USER(	TUSER(	ldr)	r7, [r1], #4)			@ May fault
-		orr	r3, r3, r7, push #16
+		orr	r3, r3, r7, lspush #16
 		str	r3, [r0], #4
 		mov	ip, r1, lsl #32 - PAGE_SHIFT
 		rsb	ip, ip, #0
@@ -452,30 +452,30 @@
 		blt	.Lcfu_2rem8lp
 
 
-.Lcfu_2cpy8lp:	mov	r3, r7, pull #16
+.Lcfu_2cpy8lp:	mov	r3, r7, lspull #16
 		ldmia	r1!, {r4 - r7}			@ Shouldnt fault
 		subs	ip, ip, #16
-		orr	r3, r3, r4, push #16
-		mov	r4, r4, pull #16
-		orr	r4, r4, r5, push #16
-		mov	r5, r5, pull #16
-		orr	r5, r5, r6, push #16
-		mov	r6, r6, pull #16
-		orr	r6, r6, r7, push #16
+		orr	r3, r3, r4, lspush #16
+		mov	r4, r4, lspull #16
+		orr	r4, r4, r5, lspush #16
+		mov	r5, r5, lspull #16
+		orr	r5, r5, r6, lspush #16
+		mov	r6, r6, lspull #16
+		orr	r6, r6, r7, lspush #16
 		stmia	r0!, {r3 - r6}
 		bpl	.Lcfu_2cpy8lp
 
 .Lcfu_2rem8lp:	tst	ip, #8
-		movne	r3, r7, pull #16
+		movne	r3, r7, lspull #16
 		ldmneia	r1!, {r4, r7}			@ Shouldnt fault
-		orrne	r3, r3, r4, push #16
-		movne	r4, r4, pull #16
-		orrne	r4, r4, r7, push #16
+		orrne	r3, r3, r4, lspush #16
+		movne	r4, r4, lspull #16
+		orrne	r4, r4, r7, lspush #16
 		stmneia	r0!, {r3 - r4}
 		tst	ip, #4
-		movne	r3, r7, pull #16
+		movne	r3, r7, lspull #16
 USER(	TUSER(	ldrne) r7, [r1], #4)			@ May fault
-		orrne	r3, r3, r7, push #16
+		orrne	r3, r3, r7, lspush #16
 		strne	r3, [r0], #4
 		ands	ip, ip, #3
 		beq	.Lcfu_2fupi
@@ -493,9 +493,9 @@
 .Lcfu_3fupi:	subs	r2, r2, #4
 		addmi	ip, r2, #4
 		bmi	.Lcfu_3nowords
-		mov	r3, r7, pull #24
+		mov	r3, r7, lspull #24
 USER(	TUSER(	ldr)	r7, [r1], #4)			@ May fault
-		orr	r3, r3, r7, push #8
+		orr	r3, r3, r7, lspush #8
 		str	r3, [r0], #4
 		mov	ip, r1, lsl #32 - PAGE_SHIFT
 		rsb	ip, ip, #0
@@ -507,30 +507,30 @@
 		subs	ip, ip, #16
 		blt	.Lcfu_3rem8lp
 
-.Lcfu_3cpy8lp:	mov	r3, r7, pull #24
+.Lcfu_3cpy8lp:	mov	r3, r7, lspull #24
 		ldmia	r1!, {r4 - r7}			@ Shouldnt fault
-		orr	r3, r3, r4, push #8
-		mov	r4, r4, pull #24
-		orr	r4, r4, r5, push #8
-		mov	r5, r5, pull #24
-		orr	r5, r5, r6, push #8
-		mov	r6, r6, pull #24
-		orr	r6, r6, r7, push #8
+		orr	r3, r3, r4, lspush #8
+		mov	r4, r4, lspull #24
+		orr	r4, r4, r5, lspush #8
+		mov	r5, r5, lspull #24
+		orr	r5, r5, r6, lspush #8
+		mov	r6, r6, lspull #24
+		orr	r6, r6, r7, lspush #8
 		stmia	r0!, {r3 - r6}
 		subs	ip, ip, #16
 		bpl	.Lcfu_3cpy8lp
 
 .Lcfu_3rem8lp:	tst	ip, #8
-		movne	r3, r7, pull #24
+		movne	r3, r7, lspull #24
 		ldmneia	r1!, {r4, r7}			@ Shouldnt fault
-		orrne	r3, r3, r4, push #8
-		movne	r4, r4, pull #24
-		orrne	r4, r4, r7, push #8
+		orrne	r3, r3, r4, lspush #8
+		movne	r4, r4, lspull #24
+		orrne	r4, r4, r7, lspush #8
 		stmneia	r0!, {r3 - r4}
 		tst	ip, #4
-		movne	r3, r7, pull #24
+		movne	r3, r7, lspull #24
 USER(	TUSER(	ldrne) r7, [r1], #4)			@ May fault
-		orrne	r3, r3, r7, push #8
+		orrne	r3, r3, r7, lspush #8
 		strne	r3, [r0], #4
 		ands	ip, ip, #3
 		beq	.Lcfu_3fupi
diff --git a/arch/arm/mach-cns3xxx/core.c b/arch/arm/mach-cns3xxx/core.c
index e38b279..384dc85 100644
--- a/arch/arm/mach-cns3xxx/core.c
+++ b/arch/arm/mach-cns3xxx/core.c
@@ -155,7 +155,7 @@
 
 static struct irqaction cns3xxx_timer_irq = {
 	.name		= "timer",
-	.flags		= IRQF_DISABLED | IRQF_TIMER | IRQF_IRQPOLL,
+	.flags		= IRQF_TIMER | IRQF_IRQPOLL,
 	.handler	= cns3xxx_timer_interrupt,
 };
 
diff --git a/arch/arm/mach-ebsa110/core.c b/arch/arm/mach-ebsa110/core.c
index 68ac934..8254e71 100644
--- a/arch/arm/mach-ebsa110/core.c
+++ b/arch/arm/mach-ebsa110/core.c
@@ -206,7 +206,7 @@
 
 static struct irqaction ebsa110_timer_irq = {
 	.name		= "EBSA110 Timer Tick",
-	.flags		= IRQF_DISABLED | IRQF_TIMER | IRQF_IRQPOLL,
+	.flags		= IRQF_TIMER | IRQF_IRQPOLL,
 	.handler	= ebsa110_timer_interrupt,
 };
 
diff --git a/arch/arm/mach-footbridge/dc21285-timer.c b/arch/arm/mach-footbridge/dc21285-timer.c
index 3971104..bf7aa7d 100644
--- a/arch/arm/mach-footbridge/dc21285-timer.c
+++ b/arch/arm/mach-footbridge/dc21285-timer.c
@@ -105,7 +105,7 @@
 static struct irqaction footbridge_timer_irq = {
 	.name		= "dc21285_timer1",
 	.handler	= timer1_interrupt,
-	.flags		= IRQF_DISABLED | IRQF_TIMER | IRQF_IRQPOLL,
+	.flags		= IRQF_TIMER | IRQF_IRQPOLL,
 	.dev_id		= &ckevt_dc21285,
 };
 
@@ -125,7 +125,7 @@
 	clockevents_config_and_register(ce, rate, 0x4, 0xffffff);
 }
 
-static u32 notrace footbridge_read_sched_clock(void)
+static u64 notrace footbridge_read_sched_clock(void)
 {
 	return ~*CSR_TIMER3_VALUE;
 }
@@ -138,5 +138,5 @@
 	*CSR_TIMER3_CLR = 0;
 	*CSR_TIMER3_CNTL = TIMER_CNTL_ENABLE | TIMER_CNTL_DIV16;
 
-	setup_sched_clock(footbridge_read_sched_clock, 24, rate);
+	sched_clock_register(footbridge_read_sched_clock, 24, rate);
 }
diff --git a/arch/arm/mach-footbridge/dc21285.c b/arch/arm/mach-footbridge/dc21285.c
index 7c2fdae..96a3d73 100644
--- a/arch/arm/mach-footbridge/dc21285.c
+++ b/arch/arm/mach-footbridge/dc21285.c
@@ -334,15 +334,15 @@
 	/*
 	 * We don't care if these fail.
 	 */
-	dc21285_request_irq(IRQ_PCI_SERR, dc21285_serr_irq, IRQF_DISABLED,
+	dc21285_request_irq(IRQ_PCI_SERR, dc21285_serr_irq, 0,
 			    "PCI system error", &serr_timer);
-	dc21285_request_irq(IRQ_PCI_PERR, dc21285_parity_irq, IRQF_DISABLED,
+	dc21285_request_irq(IRQ_PCI_PERR, dc21285_parity_irq, 0,
 			    "PCI parity error", &perr_timer);
-	dc21285_request_irq(IRQ_PCI_ABORT, dc21285_abort_irq, IRQF_DISABLED,
+	dc21285_request_irq(IRQ_PCI_ABORT, dc21285_abort_irq, 0,
 			    "PCI abort", NULL);
-	dc21285_request_irq(IRQ_DISCARD_TIMER, dc21285_discard_irq, IRQF_DISABLED,
+	dc21285_request_irq(IRQ_DISCARD_TIMER, dc21285_discard_irq, 0,
 			    "Discard timer", NULL);
-	dc21285_request_irq(IRQ_PCI_DPERR, dc21285_dparity_irq, IRQF_DISABLED,
+	dc21285_request_irq(IRQ_PCI_DPERR, dc21285_dparity_irq, 0,
 			    "PCI data parity", NULL);
 
 	if (cfn_mode) {
diff --git a/arch/arm/mach-footbridge/isa-timer.c b/arch/arm/mach-footbridge/isa-timer.c
index d9301dd..b73f52e 100644
--- a/arch/arm/mach-footbridge/isa-timer.c
+++ b/arch/arm/mach-footbridge/isa-timer.c
@@ -27,7 +27,7 @@
 static struct irqaction pit_timer_irq = {
 	.name		= "pit",
 	.handler	= pit_timer_interrupt,
-	.flags		= IRQF_DISABLED | IRQF_TIMER | IRQF_IRQPOLL,
+	.flags		= IRQF_TIMER | IRQF_IRQPOLL,
 	.dev_id		= &i8253_clockevent,
 };
 
diff --git a/arch/arm/mach-gemini/idle.c b/arch/arm/mach-gemini/idle.c
index 87dff4f..ddf8ec9 100644
--- a/arch/arm/mach-gemini/idle.c
+++ b/arch/arm/mach-gemini/idle.c
@@ -3,7 +3,7 @@
  */
 
 #include <linux/init.h>
-#include <asm/system.h>
+#include <asm/system_misc.h>
 #include <asm/proc-fns.h>
 
 static void gemini_idle(void)
diff --git a/arch/arm/mach-integrator/integrator_ap.c b/arch/arm/mach-integrator/integrator_ap.c
index 17c0fe6..e4f27f0 100644
--- a/arch/arm/mach-integrator/integrator_ap.c
+++ b/arch/arm/mach-integrator/integrator_ap.c
@@ -358,7 +358,7 @@
 
 static struct irqaction integrator_timer_irq = {
 	.name		= "timer",
-	.flags		= IRQF_DISABLED | IRQF_TIMER | IRQF_IRQPOLL,
+	.flags		= IRQF_TIMER | IRQF_IRQPOLL,
 	.handler	= integrator_timer_interrupt,
 	.dev_id		= &integrator_clockevent,
 };
diff --git a/arch/arm/mach-ixp4xx/common.c b/arch/arm/mach-ixp4xx/common.c
index 6d68aed..a465f27 100644
--- a/arch/arm/mach-ixp4xx/common.c
+++ b/arch/arm/mach-ixp4xx/common.c
@@ -312,7 +312,7 @@
 
 static struct irqaction ixp4xx_timer_irq = {
 	.name		= "timer1",
-	.flags		= IRQF_DISABLED | IRQF_TIMER | IRQF_IRQPOLL,
+	.flags		= IRQF_TIMER | IRQF_IRQPOLL,
 	.handler	= ixp4xx_timer_interrupt,
 	.dev_id		= &clockevent_ixp4xx,
 };
diff --git a/arch/arm/mach-ixp4xx/dsmg600-setup.c b/arch/arm/mach-ixp4xx/dsmg600-setup.c
index 736dc69..43ee06d 100644
--- a/arch/arm/mach-ixp4xx/dsmg600-setup.c
+++ b/arch/arm/mach-ixp4xx/dsmg600-setup.c
@@ -233,8 +233,7 @@
 
 	gpio_request(DSMG600_RB_GPIO, "reset button");
 	if (request_irq(gpio_to_irq(DSMG600_RB_GPIO), &dsmg600_reset_handler,
-		IRQF_DISABLED | IRQF_TRIGGER_LOW,
-		"DSM-G600 reset button", NULL) < 0) {
+		IRQF_TRIGGER_LOW, "DSM-G600 reset button", NULL) < 0) {
 
 		printk(KERN_DEBUG "Reset Button IRQ %d not available\n",
 			gpio_to_irq(DSMG600_RB_GPIO));
diff --git a/arch/arm/mach-ixp4xx/fsg-setup.c b/arch/arm/mach-ixp4xx/fsg-setup.c
index 429966b7..5c4b0c4 100644
--- a/arch/arm/mach-ixp4xx/fsg-setup.c
+++ b/arch/arm/mach-ixp4xx/fsg-setup.c
@@ -208,16 +208,14 @@
 	platform_add_devices(fsg_devices, ARRAY_SIZE(fsg_devices));
 
 	if (request_irq(gpio_to_irq(FSG_RB_GPIO), &fsg_reset_handler,
-			IRQF_DISABLED | IRQF_TRIGGER_LOW,
-			"FSG reset button", NULL) < 0) {
+			IRQF_TRIGGER_LOW, "FSG reset button", NULL) < 0) {
 
 		printk(KERN_DEBUG "Reset Button IRQ %d not available\n",
 			gpio_to_irq(FSG_RB_GPIO));
 	}
 
 	if (request_irq(gpio_to_irq(FSG_SB_GPIO), &fsg_power_handler,
-			IRQF_DISABLED | IRQF_TRIGGER_LOW,
-			"FSG power button", NULL) < 0) {
+			IRQF_TRIGGER_LOW, "FSG power button", NULL) < 0) {
 
 		printk(KERN_DEBUG "Power Button IRQ %d not available\n",
 			gpio_to_irq(FSG_SB_GPIO));
diff --git a/arch/arm/mach-ixp4xx/nas100d-setup.c b/arch/arm/mach-ixp4xx/nas100d-setup.c
index 507cb52..4e0f762 100644
--- a/arch/arm/mach-ixp4xx/nas100d-setup.c
+++ b/arch/arm/mach-ixp4xx/nas100d-setup.c
@@ -295,8 +295,7 @@
 	pm_power_off = nas100d_power_off;
 
 	if (request_irq(gpio_to_irq(NAS100D_RB_GPIO), &nas100d_reset_handler,
-		IRQF_DISABLED | IRQF_TRIGGER_LOW,
-		"NAS100D reset button", NULL) < 0) {
+		IRQF_TRIGGER_LOW, "NAS100D reset button", NULL) < 0) {
 
 		printk(KERN_DEBUG "Reset Button IRQ %d not available\n",
 			gpio_to_irq(NAS100D_RB_GPIO));
diff --git a/arch/arm/mach-ixp4xx/nslu2-setup.c b/arch/arm/mach-ixp4xx/nslu2-setup.c
index ba5f1cd..88c025f 100644
--- a/arch/arm/mach-ixp4xx/nslu2-setup.c
+++ b/arch/arm/mach-ixp4xx/nslu2-setup.c
@@ -265,16 +265,14 @@
 	pm_power_off = nslu2_power_off;
 
 	if (request_irq(gpio_to_irq(NSLU2_RB_GPIO), &nslu2_reset_handler,
-		IRQF_DISABLED | IRQF_TRIGGER_LOW,
-		"NSLU2 reset button", NULL) < 0) {
+		IRQF_TRIGGER_LOW, "NSLU2 reset button", NULL) < 0) {
 
 		printk(KERN_DEBUG "Reset Button IRQ %d not available\n",
 			gpio_to_irq(NSLU2_RB_GPIO));
 	}
 
 	if (request_irq(gpio_to_irq(NSLU2_PB_GPIO), &nslu2_power_handler,
-		IRQF_DISABLED | IRQF_TRIGGER_HIGH,
-		"NSLU2 power button", NULL) < 0) {
+		IRQF_TRIGGER_HIGH, "NSLU2 power button", NULL) < 0) {
 
 		printk(KERN_DEBUG "Power Button IRQ %d not available\n",
 			gpio_to_irq(NSLU2_PB_GPIO));
diff --git a/arch/arm/mach-ks8695/time.c b/arch/arm/mach-ks8695/time.c
index 426c976..a197874 100644
--- a/arch/arm/mach-ks8695/time.c
+++ b/arch/arm/mach-ks8695/time.c
@@ -122,7 +122,7 @@
 
 static struct irqaction ks8695_timer_irq = {
 	.name		= "ks8695_tick",
-	.flags		= IRQF_DISABLED | IRQF_TIMER,
+	.flags		= IRQF_TIMER,
 	.handler	= ks8695_timer_interrupt,
 };
 
diff --git a/arch/arm/mach-lpc32xx/timer.c b/arch/arm/mach-lpc32xx/timer.c
index 20eab63..4e583729 100644
--- a/arch/arm/mach-lpc32xx/timer.c
+++ b/arch/arm/mach-lpc32xx/timer.c
@@ -90,7 +90,7 @@
 
 static struct irqaction lpc32xx_timer_irq = {
 	.name		= "LPC32XX Timer Tick",
-	.flags		= IRQF_DISABLED | IRQF_TIMER | IRQF_IRQPOLL,
+	.flags		= IRQF_TIMER | IRQF_IRQPOLL,
 	.handler	= lpc32xx_timer_interrupt,
 };
 
diff --git a/arch/arm/mach-mmp/time.c b/arch/arm/mach-mmp/time.c
index 024022d..bbcd232 100644
--- a/arch/arm/mach-mmp/time.c
+++ b/arch/arm/mach-mmp/time.c
@@ -186,7 +186,7 @@
 
 static struct irqaction timer_irq = {
 	.name		= "timer",
-	.flags		= IRQF_DISABLED | IRQF_TIMER | IRQF_IRQPOLL,
+	.flags		= IRQF_TIMER | IRQF_IRQPOLL,
 	.handler	= timer_interrupt,
 	.dev_id		= &ckevt,
 };
diff --git a/arch/arm/mach-netx/time.c b/arch/arm/mach-netx/time.c
index 6df42e6..3177c7a 100644
--- a/arch/arm/mach-netx/time.c
+++ b/arch/arm/mach-netx/time.c
@@ -99,7 +99,7 @@
 
 static struct irqaction netx_timer_irq = {
 	.name		= "NetX Timer Tick",
-	.flags		= IRQF_DISABLED | IRQF_TIMER | IRQF_IRQPOLL,
+	.flags		= IRQF_TIMER | IRQF_IRQPOLL,
 	.handler	= netx_timer_interrupt,
 };
 
diff --git a/arch/arm/mach-omap1/dma.c b/arch/arm/mach-omap1/dma.c
index 5bb8ce8..4be601b6 100644
--- a/arch/arm/mach-omap1/dma.c
+++ b/arch/arm/mach-omap1/dma.c
@@ -32,55 +32,51 @@
 
 #define OMAP1_DMA_BASE			(0xfffed800)
 #define OMAP1_LOGICAL_DMA_CH_COUNT	17
-#define OMAP1_DMA_STRIDE		0x40
 
-static u32 errata;
 static u32 enable_1510_mode;
-static u8 dma_stride;
-static enum omap_reg_offsets dma_common_ch_start, dma_common_ch_end;
 
-static u16 reg_map[] = {
-	[GCR]		= 0x400,
-	[GSCR]		= 0x404,
-	[GRST1]		= 0x408,
-	[HW_ID]		= 0x442,
-	[PCH2_ID]	= 0x444,
-	[PCH0_ID]	= 0x446,
-	[PCH1_ID]	= 0x448,
-	[PCHG_ID]	= 0x44a,
-	[PCHD_ID]	= 0x44c,
-	[CAPS_0]	= 0x44e,
-	[CAPS_1]	= 0x452,
-	[CAPS_2]	= 0x456,
-	[CAPS_3]	= 0x458,
-	[CAPS_4]	= 0x45a,
-	[PCH2_SR]	= 0x460,
-	[PCH0_SR]	= 0x480,
-	[PCH1_SR]	= 0x482,
-	[PCHD_SR]	= 0x4c0,
+static const struct omap_dma_reg reg_map[] = {
+	[GCR]		= { 0x0400, 0x00, OMAP_DMA_REG_16BIT },
+	[GSCR]		= { 0x0404, 0x00, OMAP_DMA_REG_16BIT },
+	[GRST1]		= { 0x0408, 0x00, OMAP_DMA_REG_16BIT },
+	[HW_ID]		= { 0x0442, 0x00, OMAP_DMA_REG_16BIT },
+	[PCH2_ID]	= { 0x0444, 0x00, OMAP_DMA_REG_16BIT },
+	[PCH0_ID]	= { 0x0446, 0x00, OMAP_DMA_REG_16BIT },
+	[PCH1_ID]	= { 0x0448, 0x00, OMAP_DMA_REG_16BIT },
+	[PCHG_ID]	= { 0x044a, 0x00, OMAP_DMA_REG_16BIT },
+	[PCHD_ID]	= { 0x044c, 0x00, OMAP_DMA_REG_16BIT },
+	[CAPS_0]	= { 0x044e, 0x00, OMAP_DMA_REG_2X16BIT },
+	[CAPS_1]	= { 0x0452, 0x00, OMAP_DMA_REG_2X16BIT },
+	[CAPS_2]	= { 0x0456, 0x00, OMAP_DMA_REG_16BIT },
+	[CAPS_3]	= { 0x0458, 0x00, OMAP_DMA_REG_16BIT },
+	[CAPS_4]	= { 0x045a, 0x00, OMAP_DMA_REG_16BIT },
+	[PCH2_SR]	= { 0x0460, 0x00, OMAP_DMA_REG_16BIT },
+	[PCH0_SR]	= { 0x0480, 0x00, OMAP_DMA_REG_16BIT },
+	[PCH1_SR]	= { 0x0482, 0x00, OMAP_DMA_REG_16BIT },
+	[PCHD_SR]	= { 0x04c0, 0x00, OMAP_DMA_REG_16BIT },
 
 	/* Common Registers */
-	[CSDP]		= 0x00,
-	[CCR]		= 0x02,
-	[CICR]		= 0x04,
-	[CSR]		= 0x06,
-	[CEN]		= 0x10,
-	[CFN]		= 0x12,
-	[CSFI]		= 0x14,
-	[CSEI]		= 0x16,
-	[CPC]		= 0x18,	/* 15xx only */
-	[CSAC]		= 0x18,
-	[CDAC]		= 0x1a,
-	[CDEI]		= 0x1c,
-	[CDFI]		= 0x1e,
-	[CLNK_CTRL]	= 0x28,
+	[CSDP]		= { 0x0000, 0x40, OMAP_DMA_REG_16BIT },
+	[CCR]		= { 0x0002, 0x40, OMAP_DMA_REG_16BIT },
+	[CICR]		= { 0x0004, 0x40, OMAP_DMA_REG_16BIT },
+	[CSR]		= { 0x0006, 0x40, OMAP_DMA_REG_16BIT },
+	[CEN]		= { 0x0010, 0x40, OMAP_DMA_REG_16BIT },
+	[CFN]		= { 0x0012, 0x40, OMAP_DMA_REG_16BIT },
+	[CSFI]		= { 0x0014, 0x40, OMAP_DMA_REG_16BIT },
+	[CSEI]		= { 0x0016, 0x40, OMAP_DMA_REG_16BIT },
+	[CPC]		= { 0x0018, 0x40, OMAP_DMA_REG_16BIT },	/* 15xx only */
+	[CSAC]		= { 0x0018, 0x40, OMAP_DMA_REG_16BIT },
+	[CDAC]		= { 0x001a, 0x40, OMAP_DMA_REG_16BIT },
+	[CDEI]		= { 0x001c, 0x40, OMAP_DMA_REG_16BIT },
+	[CDFI]		= { 0x001e, 0x40, OMAP_DMA_REG_16BIT },
+	[CLNK_CTRL]	= { 0x0028, 0x40, OMAP_DMA_REG_16BIT },
 
 	/* Channel specific register offsets */
-	[CSSA]		= 0x08,
-	[CDSA]		= 0x0c,
-	[COLOR]		= 0x20,
-	[CCR2]		= 0x24,
-	[LCH_CTRL]	= 0x2a,
+	[CSSA]		= { 0x0008, 0x40, OMAP_DMA_REG_2X16BIT },
+	[CDSA]		= { 0x000c, 0x40, OMAP_DMA_REG_2X16BIT },
+	[COLOR]		= { 0x0020, 0x40, OMAP_DMA_REG_2X16BIT },
+	[CCR2]		= { 0x0024, 0x40, OMAP_DMA_REG_16BIT },
+	[LCH_CTRL]	= { 0x002a, 0x40, OMAP_DMA_REG_16BIT },
 };
 
 static struct resource res[] __initdata = {
@@ -181,44 +177,36 @@
 static void __iomem *dma_base;
 static inline void dma_write(u32 val, int reg, int lch)
 {
-	u8  stride;
-	u32 offset;
+	void __iomem *addr = dma_base;
 
-	stride = (reg >= dma_common_ch_start) ? dma_stride : 0;
-	offset = reg_map[reg] + (stride * lch);
+	addr += reg_map[reg].offset;
+	addr += reg_map[reg].stride * lch;
 
-	__raw_writew(val, dma_base + offset);
-	if ((reg > CLNK_CTRL && reg < CCEN) ||
-			(reg > PCHD_ID && reg < CAPS_2)) {
-		u32 offset2 = reg_map[reg] + 2 + (stride * lch);
-		__raw_writew(val >> 16, dma_base + offset2);
-	}
+	__raw_writew(val, addr);
+	if (reg_map[reg].type == OMAP_DMA_REG_2X16BIT)
+		__raw_writew(val >> 16, addr + 2);
 }
 
 static inline u32 dma_read(int reg, int lch)
 {
-	u8 stride;
-	u32 offset, val;
+	void __iomem *addr = dma_base;
+	uint32_t val;
 
-	stride = (reg >= dma_common_ch_start) ? dma_stride : 0;
-	offset = reg_map[reg] + (stride * lch);
+	addr += reg_map[reg].offset;
+	addr += reg_map[reg].stride * lch;
 
-	val = __raw_readw(dma_base + offset);
-	if ((reg > CLNK_CTRL && reg < CCEN) ||
-			(reg > PCHD_ID && reg < CAPS_2)) {
-		u16 upper;
-		u32 offset2 = reg_map[reg] + 2 + (stride * lch);
-		upper = __raw_readw(dma_base + offset2);
-		val |= (upper << 16);
-	}
+	val = __raw_readw(addr);
+	if (reg_map[reg].type == OMAP_DMA_REG_2X16BIT)
+		val |= __raw_readw(addr + 2) << 16;
+
 	return val;
 }
 
 static void omap1_clear_lch_regs(int lch)
 {
-	int i = dma_common_ch_start;
+	int i;
 
-	for (; i <= dma_common_ch_end; i += 1)
+	for (i = CPC; i <= COLOR; i += 1)
 		dma_write(0, i, lch);
 }
 
@@ -255,8 +243,9 @@
 	return;
 }
 
-static u32 configure_dma_errata(void)
+static unsigned configure_dma_errata(void)
 {
+	unsigned errata = 0;
 
 	/*
 	 * Erratum 3.2/3.3: sometimes 0 is returned if CSAC/CDAC is
@@ -272,11 +261,23 @@
 	.name = "omap-dma-engine",
 	.id = -1,
 	.dma_mask = DMA_BIT_MASK(32),
+	.res = res,
+	.num_res = 1,
+};
+
+static struct omap_system_dma_plat_info dma_plat_info __initdata = {
+	.reg_map	= reg_map,
+	.channel_stride	= 0x40,
+	.show_dma_caps	= omap1_show_dma_caps,
+	.clear_lch_regs	= omap1_clear_lch_regs,
+	.clear_dma	= omap1_clear_dma,
+	.dma_write	= dma_write,
+	.dma_read	= dma_read,
 };
 
 static int __init omap1_system_dma_init(void)
 {
-	struct omap_system_dma_plat_info	*p;
+	struct omap_system_dma_plat_info	p;
 	struct omap_dma_dev_attr		*d;
 	struct platform_device			*pdev, *dma_pdev;
 	int ret;
@@ -302,20 +303,12 @@
 		goto exit_iounmap;
 	}
 
-	p = kzalloc(sizeof(struct omap_system_dma_plat_info), GFP_KERNEL);
-	if (!p) {
-		dev_err(&pdev->dev, "%s: Unable to allocate 'p' for %s\n",
-			__func__, pdev->name);
-		ret = -ENOMEM;
-		goto exit_iounmap;
-	}
-
 	d = kzalloc(sizeof(struct omap_dma_dev_attr), GFP_KERNEL);
 	if (!d) {
 		dev_err(&pdev->dev, "%s: Unable to allocate 'd' for %s\n",
 			__func__, pdev->name);
 		ret = -ENOMEM;
-		goto exit_release_p;
+		goto exit_iounmap;
 	}
 
 	d->lch_count		= OMAP1_LOGICAL_DMA_CH_COUNT;
@@ -336,17 +329,6 @@
 	d->dev_caps		|= CLEAR_CSR_ON_READ;
 	d->dev_caps		|= IS_WORD_16;
 
-
-	d->chan = kzalloc(sizeof(struct omap_dma_lch) *
-					(d->lch_count), GFP_KERNEL);
-	if (!d->chan) {
-		dev_err(&pdev->dev,
-			"%s: Memory allocation failed for d->chan!\n",
-			__func__);
-		ret = -ENOMEM;
-		goto exit_release_d;
-	}
-
 	if (cpu_is_omap15xx())
 		d->chan_count = 9;
 	else if (cpu_is_omap16xx() || cpu_is_omap7xx()) {
@@ -356,35 +338,24 @@
 			d->chan_count = 9;
 	}
 
-	p->dma_attr = d;
+	p = dma_plat_info;
+	p.dma_attr = d;
+	p.errata = configure_dma_errata();
 
-	p->show_dma_caps	= omap1_show_dma_caps;
-	p->clear_lch_regs	= omap1_clear_lch_regs;
-	p->clear_dma		= omap1_clear_dma;
-	p->dma_write		= dma_write;
-	p->dma_read		= dma_read;
-	p->disable_irq_lch	= NULL;
-
-	p->errata = configure_dma_errata();
-
-	ret = platform_device_add_data(pdev, p, sizeof(*p));
+	ret = platform_device_add_data(pdev, &p, sizeof(p));
 	if (ret) {
 		dev_err(&pdev->dev, "%s: Unable to add resources for %s%d\n",
 			__func__, pdev->name, pdev->id);
-		goto exit_release_chan;
+		goto exit_release_d;
 	}
 
 	ret = platform_device_add(pdev);
 	if (ret) {
 		dev_err(&pdev->dev, "%s: Unable to add resources for %s%d\n",
 			__func__, pdev->name, pdev->id);
-		goto exit_release_chan;
+		goto exit_release_d;
 	}
 
-	dma_stride		= OMAP1_DMA_STRIDE;
-	dma_common_ch_start	= CPC;
-	dma_common_ch_end	= COLOR;
-
 	dma_pdev = platform_device_register_full(&omap_dma_dev_info);
 	if (IS_ERR(dma_pdev)) {
 		ret = PTR_ERR(dma_pdev);
@@ -395,12 +366,8 @@
 
 exit_release_pdev:
 	platform_device_del(pdev);
-exit_release_chan:
-	kfree(d->chan);
 exit_release_d:
 	kfree(d);
-exit_release_p:
-	kfree(p);
 exit_iounmap:
 	iounmap(dma_base);
 exit_device_put:
diff --git a/arch/arm/mach-omap2/am35xx-emac.c b/arch/arm/mach-omap2/am35xx-emac.c
index 25b79a29..6a6935c 100644
--- a/arch/arm/mach-omap2/am35xx-emac.c
+++ b/arch/arm/mach-omap2/am35xx-emac.c
@@ -17,7 +17,6 @@
 
 #include <linux/err.h>
 #include <linux/davinci_emac.h>
-#include <asm/system.h>
 #include "omap_device.h"
 #include "am35xx.h"
 #include "control.h"
diff --git a/arch/arm/mach-omap2/dma.c b/arch/arm/mach-omap2/dma.c
index 49fd0d5..5689c88 100644
--- a/arch/arm/mach-omap2/dma.c
+++ b/arch/arm/mach-omap2/dma.c
@@ -35,97 +35,80 @@
 #include "omap_hwmod.h"
 #include "omap_device.h"
 
-#define OMAP2_DMA_STRIDE	0x60
+static enum omap_reg_offsets dma_common_ch_end;
 
-static u32 errata;
-static u8 dma_stride;
-
-static struct omap_dma_dev_attr *d;
-
-static enum omap_reg_offsets dma_common_ch_start, dma_common_ch_end;
-
-static u16 reg_map[] = {
-	[REVISION]		= 0x00,
-	[GCR]			= 0x78,
-	[IRQSTATUS_L0]		= 0x08,
-	[IRQSTATUS_L1]		= 0x0c,
-	[IRQSTATUS_L2]		= 0x10,
-	[IRQSTATUS_L3]		= 0x14,
-	[IRQENABLE_L0]		= 0x18,
-	[IRQENABLE_L1]		= 0x1c,
-	[IRQENABLE_L2]		= 0x20,
-	[IRQENABLE_L3]		= 0x24,
-	[SYSSTATUS]		= 0x28,
-	[OCP_SYSCONFIG]		= 0x2c,
-	[CAPS_0]		= 0x64,
-	[CAPS_2]		= 0x6c,
-	[CAPS_3]		= 0x70,
-	[CAPS_4]		= 0x74,
+static const struct omap_dma_reg reg_map[] = {
+	[REVISION]	= { 0x0000, 0x00, OMAP_DMA_REG_32BIT },
+	[GCR]		= { 0x0078, 0x00, OMAP_DMA_REG_32BIT },
+	[IRQSTATUS_L0]	= { 0x0008, 0x00, OMAP_DMA_REG_32BIT },
+	[IRQSTATUS_L1]	= { 0x000c, 0x00, OMAP_DMA_REG_32BIT },
+	[IRQSTATUS_L2]	= { 0x0010, 0x00, OMAP_DMA_REG_32BIT },
+	[IRQSTATUS_L3]	= { 0x0014, 0x00, OMAP_DMA_REG_32BIT },
+	[IRQENABLE_L0]	= { 0x0018, 0x00, OMAP_DMA_REG_32BIT },
+	[IRQENABLE_L1]	= { 0x001c, 0x00, OMAP_DMA_REG_32BIT },
+	[IRQENABLE_L2]	= { 0x0020, 0x00, OMAP_DMA_REG_32BIT },
+	[IRQENABLE_L3]	= { 0x0024, 0x00, OMAP_DMA_REG_32BIT },
+	[SYSSTATUS]	= { 0x0028, 0x00, OMAP_DMA_REG_32BIT },
+	[OCP_SYSCONFIG]	= { 0x002c, 0x00, OMAP_DMA_REG_32BIT },
+	[CAPS_0]	= { 0x0064, 0x00, OMAP_DMA_REG_32BIT },
+	[CAPS_2]	= { 0x006c, 0x00, OMAP_DMA_REG_32BIT },
+	[CAPS_3]	= { 0x0070, 0x00, OMAP_DMA_REG_32BIT },
+	[CAPS_4]	= { 0x0074, 0x00, OMAP_DMA_REG_32BIT },
 
 	/* Common register offsets */
-	[CCR]			= 0x80,
-	[CLNK_CTRL]		= 0x84,
-	[CICR]			= 0x88,
-	[CSR]			= 0x8c,
-	[CSDP]			= 0x90,
-	[CEN]			= 0x94,
-	[CFN]			= 0x98,
-	[CSEI]			= 0xa4,
-	[CSFI]			= 0xa8,
-	[CDEI]			= 0xac,
-	[CDFI]			= 0xb0,
-	[CSAC]			= 0xb4,
-	[CDAC]			= 0xb8,
+	[CCR]		= { 0x0080, 0x60, OMAP_DMA_REG_32BIT },
+	[CLNK_CTRL]	= { 0x0084, 0x60, OMAP_DMA_REG_32BIT },
+	[CICR]		= { 0x0088, 0x60, OMAP_DMA_REG_32BIT },
+	[CSR]		= { 0x008c, 0x60, OMAP_DMA_REG_32BIT },
+	[CSDP]		= { 0x0090, 0x60, OMAP_DMA_REG_32BIT },
+	[CEN]		= { 0x0094, 0x60, OMAP_DMA_REG_32BIT },
+	[CFN]		= { 0x0098, 0x60, OMAP_DMA_REG_32BIT },
+	[CSEI]		= { 0x00a4, 0x60, OMAP_DMA_REG_32BIT },
+	[CSFI]		= { 0x00a8, 0x60, OMAP_DMA_REG_32BIT },
+	[CDEI]		= { 0x00ac, 0x60, OMAP_DMA_REG_32BIT },
+	[CDFI]		= { 0x00b0, 0x60, OMAP_DMA_REG_32BIT },
+	[CSAC]		= { 0x00b4, 0x60, OMAP_DMA_REG_32BIT },
+	[CDAC]		= { 0x00b8, 0x60, OMAP_DMA_REG_32BIT },
 
 	/* Channel specific register offsets */
-	[CSSA]			= 0x9c,
-	[CDSA]			= 0xa0,
-	[CCEN]			= 0xbc,
-	[CCFN]			= 0xc0,
-	[COLOR]			= 0xc4,
+	[CSSA]		= { 0x009c, 0x60, OMAP_DMA_REG_32BIT },
+	[CDSA]		= { 0x00a0, 0x60, OMAP_DMA_REG_32BIT },
+	[CCEN]		= { 0x00bc, 0x60, OMAP_DMA_REG_32BIT },
+	[CCFN]		= { 0x00c0, 0x60, OMAP_DMA_REG_32BIT },
+	[COLOR]		= { 0x00c4, 0x60, OMAP_DMA_REG_32BIT },
 
 	/* OMAP4 specific registers */
-	[CDP]			= 0xd0,
-	[CNDP]			= 0xd4,
-	[CCDN]			= 0xd8,
+	[CDP]		= { 0x00d0, 0x60, OMAP_DMA_REG_32BIT },
+	[CNDP]		= { 0x00d4, 0x60, OMAP_DMA_REG_32BIT },
+	[CCDN]		= { 0x00d8, 0x60, OMAP_DMA_REG_32BIT },
 };
 
 static void __iomem *dma_base;
 static inline void dma_write(u32 val, int reg, int lch)
 {
-	u8  stride;
-	u32 offset;
+	void __iomem *addr = dma_base;
 
-	stride = (reg >= dma_common_ch_start) ? dma_stride : 0;
-	offset = reg_map[reg] + (stride * lch);
-	__raw_writel(val, dma_base + offset);
+	addr += reg_map[reg].offset;
+	addr += reg_map[reg].stride * lch;
+
+	__raw_writel(val, addr);
 }
 
 static inline u32 dma_read(int reg, int lch)
 {
-	u8 stride;
-	u32 offset, val;
+	void __iomem *addr = dma_base;
 
-	stride = (reg >= dma_common_ch_start) ? dma_stride : 0;
-	offset = reg_map[reg] + (stride * lch);
-	val = __raw_readl(dma_base + offset);
-	return val;
-}
+	addr += reg_map[reg].offset;
+	addr += reg_map[reg].stride * lch;
 
-static inline void omap2_disable_irq_lch(int lch)
-{
-	u32 val;
-
-	val = dma_read(IRQENABLE_L0, lch);
-	val &= ~(1 << lch);
-	dma_write(val, IRQENABLE_L0, lch);
+	return __raw_readl(addr);
 }
 
 static void omap2_clear_dma(int lch)
 {
-	int i = dma_common_ch_start;
+	int i;
 
-	for (; i <= dma_common_ch_end; i += 1)
+	for (i = CSDP; i <= dma_common_ch_end; i += 1)
 		dma_write(0, i, lch);
 }
 
@@ -137,8 +120,9 @@
 	return;
 }
 
-static u32 configure_dma_errata(void)
+static unsigned configure_dma_errata(void)
 {
+	unsigned errata = 0;
 
 	/*
 	 * Errata applicable for OMAP2430ES1.0 and all omap2420
@@ -220,48 +204,50 @@
 	return errata;
 }
 
+static struct omap_system_dma_plat_info dma_plat_info __initdata = {
+	.reg_map	= reg_map,
+	.channel_stride	= 0x60,
+	.show_dma_caps	= omap2_show_dma_caps,
+	.clear_dma	= omap2_clear_dma,
+	.dma_write	= dma_write,
+	.dma_read	= dma_read,
+};
+
+static struct platform_device_info omap_dma_dev_info = {
+	.name = "omap-dma-engine",
+	.id = -1,
+	.dma_mask = DMA_BIT_MASK(32),
+};
+
 /* One time initializations */
 static int __init omap2_system_dma_init_dev(struct omap_hwmod *oh, void *unused)
 {
 	struct platform_device			*pdev;
-	struct omap_system_dma_plat_info	*p;
+	struct omap_system_dma_plat_info	p;
+	struct omap_dma_dev_attr		*d;
 	struct resource				*mem;
 	char					*name = "omap_dma_system";
 
-	dma_stride		= OMAP2_DMA_STRIDE;
-	dma_common_ch_start	= CSDP;
+	p = dma_plat_info;
+	p.dma_attr = (struct omap_dma_dev_attr *)oh->dev_attr;
+	p.errata = configure_dma_errata();
 
-	p = kzalloc(sizeof(struct omap_system_dma_plat_info), GFP_KERNEL);
-	if (!p) {
-		pr_err("%s: Unable to allocate pdata for %s:%s\n",
-			__func__, name, oh->name);
-		return -ENOMEM;
-	}
-
-	p->dma_attr		= (struct omap_dma_dev_attr *)oh->dev_attr;
-	p->disable_irq_lch	= omap2_disable_irq_lch;
-	p->show_dma_caps	= omap2_show_dma_caps;
-	p->clear_dma		= omap2_clear_dma;
-	p->dma_write		= dma_write;
-	p->dma_read		= dma_read;
-
-	p->clear_lch_regs	= NULL;
-
-	p->errata		= configure_dma_errata();
-
-	pdev = omap_device_build(name, 0, oh, p, sizeof(*p));
-	kfree(p);
+	pdev = omap_device_build(name, 0, oh, &p, sizeof(p));
 	if (IS_ERR(pdev)) {
 		pr_err("%s: Can't build omap_device for %s:%s.\n",
 			__func__, name, oh->name);
 		return PTR_ERR(pdev);
 	}
 
+	omap_dma_dev_info.res = pdev->resource;
+	omap_dma_dev_info.num_res = pdev->num_resources;
+
 	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	if (!mem) {
 		dev_err(&pdev->dev, "%s: no mem resource\n", __func__);
 		return -EINVAL;
 	}
+
 	dma_base = ioremap(mem->start, resource_size(mem));
 	if (!dma_base) {
 		dev_err(&pdev->dev, "%s: ioremap fail\n", __func__);
@@ -269,13 +255,6 @@
 	}
 
 	d = oh->dev_attr;
-	d->chan = kzalloc(sizeof(struct omap_dma_lch) *
-					(d->lch_count), GFP_KERNEL);
-
-	if (!d->chan) {
-		dev_err(&pdev->dev, "%s: kzalloc fail\n", __func__);
-		return -ENOMEM;
-	}
 
 	if (cpu_is_omap34xx() && (omap_type() != OMAP2_DEVICE_TYPE_GP))
 		d->dev_caps |= HS_CHANNELS_RESERVED;
@@ -289,12 +268,6 @@
 	return 0;
 }
 
-static const struct platform_device_info omap_dma_dev_info = {
-	.name = "omap-dma-engine",
-	.id = -1,
-	.dma_mask = DMA_BIT_MASK(32),
-};
-
 static int __init omap2_system_dma_init(void)
 {
 	struct platform_device *pdev;
diff --git a/arch/arm/mach-rpc/dma.c b/arch/arm/mach-rpc/dma.c
index 85883b2..6d3517d 100644
--- a/arch/arm/mach-rpc/dma.c
+++ b/arch/arm/mach-rpc/dma.c
@@ -141,7 +141,7 @@
 	struct iomd_dma *idma = container_of(dma, struct iomd_dma, dma);
 
 	return request_irq(idma->irq, iomd_dma_handle,
-			   IRQF_DISABLED, idma->dma.device_id, idma);
+			   0, idma->dma.device_id, idma);
 }
 
 static void iomd_free_dma(unsigned int chan, dma_t *dma)
diff --git a/arch/arm/mach-rpc/time.c b/arch/arm/mach-rpc/time.c
index 9a6def1..9a51588 100644
--- a/arch/arm/mach-rpc/time.c
+++ b/arch/arm/mach-rpc/time.c
@@ -75,7 +75,6 @@
 
 static struct irqaction ioc_timer_irq = {
 	.name		= "timer",
-	.flags		= IRQF_DISABLED,
 	.handler	= ioc_timer_interrupt
 };
 
diff --git a/arch/arm/mach-sa1100/collie.c b/arch/arm/mach-sa1100/collie.c
index 831a158..f9874ba 100644
--- a/arch/arm/mach-sa1100/collie.c
+++ b/arch/arm/mach-sa1100/collie.c
@@ -43,6 +43,7 @@
 #include <asm/mach/arch.h>
 #include <asm/mach/flash.h>
 #include <asm/mach/map.h>
+#include <asm/mach/irda.h>
 
 #include <asm/hardware/scoop.h>
 #include <asm/mach/sharpsl_param.h>
@@ -96,6 +97,37 @@
 	.codec_pdata	= &collie_ucb1x00_data,
 };
 
+static int collie_ir_startup(struct device *dev)
+{
+	int rc = gpio_request(COLLIE_GPIO_IR_ON, "IrDA");
+	if (rc)
+		return rc;
+	rc = gpio_direction_output(COLLIE_GPIO_IR_ON, 1);
+
+	if (!rc)
+		return 0;
+
+	gpio_free(COLLIE_GPIO_IR_ON);
+	return rc;
+}
+
+static void collie_ir_shutdown(struct device *dev)
+{
+	gpio_free(COLLIE_GPIO_IR_ON);
+}
+
+static int collie_ir_set_power(struct device *dev, unsigned int state)
+{
+	gpio_set_value(COLLIE_GPIO_IR_ON, !state);
+	return 0;
+}
+
+static struct irda_platform_data collie_ir_data = {
+	.startup = collie_ir_startup,
+	.shutdown = collie_ir_shutdown,
+	.set_power = collie_ir_set_power,
+};
+
 /*
  * Collie AC IN
  */
@@ -400,6 +432,7 @@
 	sa11x0_register_mtd(&collie_flash_data, collie_flash_resources,
 			    ARRAY_SIZE(collie_flash_resources));
 	sa11x0_register_mcp(&collie_mcp_data);
+	sa11x0_register_irda(&collie_ir_data);
 
 	sharpsl_save_param();
 }
diff --git a/arch/arm/mach-sa1100/h3100.c b/arch/arm/mach-sa1100/h3100.c
index daa27c4..3c43219 100644
--- a/arch/arm/mach-sa1100/h3100.c
+++ b/arch/arm/mach-sa1100/h3100.c
@@ -122,15 +122,8 @@
 	.shutdown	= h3100_irda_shutdown,
 };
 
-static struct gpio_default_state h3100_default_gpio[] = {
-	{ H3XXX_GPIO_COM_DCD,	GPIO_MODE_IN,	"COM DCD" },
-	{ H3XXX_GPIO_COM_CTS,	GPIO_MODE_IN,	"COM CTS" },
-	{ H3XXX_GPIO_COM_RTS,	GPIO_MODE_OUT0,	"COM RTS" },
-};
-
 static void __init h3100_mach_init(void)
 {
-	h3xxx_init_gpio(h3100_default_gpio, ARRAY_SIZE(h3100_default_gpio));
 	h3xxx_mach_init();
 
 	sa11x0_register_lcd(&h3100_lcd_info);
diff --git a/arch/arm/mach-sa1100/h3600.c b/arch/arm/mach-sa1100/h3600.c
index a663e72..5be54c2 100644
--- a/arch/arm/mach-sa1100/h3600.c
+++ b/arch/arm/mach-sa1100/h3600.c
@@ -130,15 +130,8 @@
 	.shutdown	= h3600_irda_shutdown,
 };
 
-static struct gpio_default_state h3600_default_gpio[] = {
-	{ H3XXX_GPIO_COM_DCD,	GPIO_MODE_IN,	"COM DCD" },
-	{ H3XXX_GPIO_COM_CTS,	GPIO_MODE_IN,	"COM CTS" },
-	{ H3XXX_GPIO_COM_RTS,	GPIO_MODE_OUT0,	"COM RTS" },
-};
-
 static void __init h3600_mach_init(void)
 {
-	h3xxx_init_gpio(h3600_default_gpio, ARRAY_SIZE(h3600_default_gpio));
 	h3xxx_mach_init();
 
 	sa11x0_register_lcd(&h3600_lcd_info);
diff --git a/arch/arm/mach-sa1100/h3xxx.c b/arch/arm/mach-sa1100/h3xxx.c
index f17e738..c79bf46 100644
--- a/arch/arm/mach-sa1100/h3xxx.c
+++ b/arch/arm/mach-sa1100/h3xxx.c
@@ -28,37 +28,6 @@
 
 #include "generic.h"
 
-void h3xxx_init_gpio(struct gpio_default_state *s, size_t n)
-{
-	while (n--) {
-		const char *name = s->name;
-		int err;
-
-		if (!name)
-			name = "[init]";
-		err = gpio_request(s->gpio, name);
-		if (err) {
-			printk(KERN_ERR "gpio%u: unable to request: %d\n",
-				s->gpio, err);
-			continue;
-		}
-		if (s->mode >= 0) {
-			err = gpio_direction_output(s->gpio, s->mode);
-		} else {
-			err = gpio_direction_input(s->gpio);
-		}
-		if (err) {
-			printk(KERN_ERR "gpio%u: unable to set direction: %d\n",
-				s->gpio, err);
-			continue;
-		}
-		if (!s->name)
-			gpio_free(s->gpio);
-		s++;
-	}
-}
-
-
 /*
  * H3xxx flash support
  */
@@ -116,9 +85,34 @@
 /*
  * H3xxx uart support
  */
+static struct gpio h3xxx_uart_gpio[] = {
+	{ H3XXX_GPIO_COM_DCD,	GPIOF_IN,		"COM DCD" },
+	{ H3XXX_GPIO_COM_CTS,	GPIOF_IN,		"COM CTS" },
+	{ H3XXX_GPIO_COM_RTS,	GPIOF_OUT_INIT_LOW,	"COM RTS" },
+};
+
+static bool h3xxx_uart_request_gpios(void)
+{
+	static bool h3xxx_uart_gpio_ok;
+	int rc;
+
+	if (h3xxx_uart_gpio_ok)
+		return true;
+
+	rc = gpio_request_array(h3xxx_uart_gpio, ARRAY_SIZE(h3xxx_uart_gpio));
+	if (rc)
+		pr_err("h3xxx_uart_request_gpios: error %d\n", rc);
+	else
+		h3xxx_uart_gpio_ok = true;
+
+	return h3xxx_uart_gpio_ok;
+}
+
 static void h3xxx_uart_set_mctrl(struct uart_port *port, u_int mctrl)
 {
 	if (port->mapbase == _Ser3UTCR0) {
+		if (!h3xxx_uart_request_gpios())
+			return;
 		gpio_set_value(H3XXX_GPIO_COM_RTS, !(mctrl & TIOCM_RTS));
 	}
 }
@@ -128,6 +122,8 @@
 	u_int ret = TIOCM_CD | TIOCM_CTS | TIOCM_DSR;
 
 	if (port->mapbase == _Ser3UTCR0) {
+		if (!h3xxx_uart_request_gpios())
+			return ret;
 		/*
 		 * DCD and CTS bits are inverted in GPLR by RS232 transceiver
 		 */
diff --git a/arch/arm/mach-sa1100/include/mach/collie.h b/arch/arm/mach-sa1100/include/mach/collie.h
index f33679d..b478ca1 100644
--- a/arch/arm/mach-sa1100/include/mach/collie.h
+++ b/arch/arm/mach-sa1100/include/mach/collie.h
@@ -13,6 +13,8 @@
 #ifndef __ASM_ARCH_COLLIE_H
 #define __ASM_ARCH_COLLIE_H
 
+#include "hardware.h" /* Gives GPIO_MAX */
+
 extern void locomolcd_power(int on);
 
 #define COLLIE_SCOOP_GPIO_BASE	(GPIO_MAX + 1)
@@ -78,7 +80,7 @@
 #define COLLIE_TC35143_GPIO_VERSION0    UCB_IO_0
 #define COLLIE_TC35143_GPIO_TBL_CHK     UCB_IO_1
 #define COLLIE_TC35143_GPIO_VPEN_ON     UCB_IO_2
-#define COLLIE_TC35143_GPIO_IR_ON       UCB_IO_3
+#define COLLIE_GPIO_IR_ON		(COLLIE_TC35143_GPIO_BASE + 3)
 #define COLLIE_TC35143_GPIO_AMP_ON      UCB_IO_4
 #define COLLIE_TC35143_GPIO_VERSION1    UCB_IO_5
 #define COLLIE_TC35143_GPIO_FS8KLPF     UCB_IO_5
diff --git a/arch/arm/mach-sa1100/include/mach/h3xxx.h b/arch/arm/mach-sa1100/include/mach/h3xxx.h
index c810620..603d434 100644
--- a/arch/arm/mach-sa1100/include/mach/h3xxx.h
+++ b/arch/arm/mach-sa1100/include/mach/h3xxx.h
@@ -79,17 +79,6 @@
 #define H3600_EGPIO_LCD_5V_ON		(H3XXX_EGPIO_BASE + 14) /* enable 5V to LCD. active high. */
 #define H3600_EGPIO_LVDD_ON		(H3XXX_EGPIO_BASE + 15) /* enable 9V and -6.5V to LCD. */
 
-struct gpio_default_state {
-	int gpio;
-	int mode;
-	const char *name;
-};
-
-#define GPIO_MODE_IN	-1
-#define GPIO_MODE_OUT0	0
-#define GPIO_MODE_OUT1	1
-
-void h3xxx_init_gpio(struct gpio_default_state *s, size_t n);
 void __init h3xxx_map_io(void);
 void __init h3xxx_mach_init(void);
 
diff --git a/arch/arm/mach-sa1100/time.c b/arch/arm/mach-sa1100/time.c
index 6fd4acb..4852c08 100644
--- a/arch/arm/mach-sa1100/time.c
+++ b/arch/arm/mach-sa1100/time.c
@@ -112,7 +112,7 @@
 
 static struct irqaction sa1100_timer_irq = {
 	.name		= "ost0",
-	.flags		= IRQF_DISABLED | IRQF_TIMER | IRQF_IRQPOLL,
+	.flags		= IRQF_TIMER | IRQF_IRQPOLL,
 	.handler	= sa1100_ost0_interrupt,
 	.dev_id		= &ckevt_sa1100_osmr0,
 };
diff --git a/arch/arm/mach-spear/time.c b/arch/arm/mach-spear/time.c
index d449673..218ba5b 100644
--- a/arch/arm/mach-spear/time.c
+++ b/arch/arm/mach-spear/time.c
@@ -172,7 +172,7 @@
 
 static struct irqaction spear_timer_irq = {
 	.name = "timer",
-	.flags = IRQF_DISABLED | IRQF_TIMER,
+	.flags = IRQF_TIMER,
 	.handler = spear_timer_interrupt
 };
 
diff --git a/arch/arm/mach-u300/timer.c b/arch/arm/mach-u300/timer.c
index fe08fd3..de52cb3 100644
--- a/arch/arm/mach-u300/timer.c
+++ b/arch/arm/mach-u300/timer.c
@@ -337,7 +337,7 @@
 
 static struct irqaction u300_timer_irq = {
 	.name		= "U300 Timer Tick",
-	.flags		= IRQF_DISABLED | IRQF_TIMER | IRQF_IRQPOLL,
+	.flags		= IRQF_TIMER | IRQF_IRQPOLL,
 	.handler	= u300_timer_interrupt,
 };
 
diff --git a/arch/arm/mach-vexpress/Makefile b/arch/arm/mach-vexpress/Makefile
index 0997e0b..fc649bc 100644
--- a/arch/arm/mach-vexpress/Makefile
+++ b/arch/arm/mach-vexpress/Makefile
@@ -8,8 +8,11 @@
 obj-$(CONFIG_ARCH_VEXPRESS_CA9X4)	+= ct-ca9x4.o
 obj-$(CONFIG_ARCH_VEXPRESS_DCSCB)	+= dcscb.o	dcscb_setup.o
 CFLAGS_dcscb.o				+= -march=armv7-a
+CFLAGS_REMOVE_dcscb.o			= -pg
 obj-$(CONFIG_ARCH_VEXPRESS_SPC)		+= spc.o
+CFLAGS_REMOVE_spc.o			= -pg
 obj-$(CONFIG_ARCH_VEXPRESS_TC2_PM)	+= tc2_pm.o
 CFLAGS_tc2_pm.o				+= -march=armv7-a
+CFLAGS_REMOVE_tc2_pm.o			= -pg
 obj-$(CONFIG_SMP)			+= platsmp.o
 obj-$(CONFIG_HOTPLUG_CPU)		+= hotplug.o
diff --git a/arch/arm/mach-w90x900/time.c b/arch/arm/mach-w90x900/time.c
index 30fbca8..9230d37 100644
--- a/arch/arm/mach-w90x900/time.c
+++ b/arch/arm/mach-w90x900/time.c
@@ -111,7 +111,7 @@
 
 static struct irqaction nuc900_timer0_irq = {
 	.name		= "nuc900-timer0",
-	.flags		= IRQF_DISABLED | IRQF_TIMER | IRQF_IRQPOLL,
+	.flags		= IRQF_TIMER | IRQF_IRQPOLL,
 	.handler	= nuc900_timer0_interrupt,
 };
 
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index 1f8fed9..ca8ecde 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -446,7 +446,6 @@
 
 config CPU_32v6
 	bool
-	select CPU_USE_DOMAINS if CPU_V6 && MMU
 	select TLS_REG_EMUL if !CPU_32v6K && !MMU
 
 config CPU_32v6K
@@ -671,7 +670,7 @@
 
 config SWP_EMULATE
 	bool "Emulate SWP/SWPB instructions"
-	depends on !CPU_USE_DOMAINS && CPU_V7
+	depends on CPU_V7
 	default y if SMP
 	select HAVE_PROC_CPU if PROC_FS
 	help
diff --git a/arch/arm/mm/cache-feroceon-l2.c b/arch/arm/mm/cache-feroceon-l2.c
index 48bc3c0..aae8918 100644
--- a/arch/arm/mm/cache-feroceon-l2.c
+++ b/arch/arm/mm/cache-feroceon-l2.c
@@ -331,7 +331,9 @@
 			enable_icache();
 		if (d)
 			enable_dcache();
-	}
+	} else
+		pr_err(FW_BUG
+		       "Feroceon L2: bootloader left the L2 cache on!\n");
 }
 
 void __init feroceon_l2_init(int __l2_wt_override)
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index 1a77450..5bb4e00 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -284,9 +284,6 @@
 }
 
 #ifdef CONFIG_MMU
-#ifdef CONFIG_HUGETLB_PAGE
-#warning ARM Coherent DMA allocator does not (yet) support huge TLB
-#endif
 
 static void *__alloc_from_contiguous(struct device *dev, size_t size,
 				     pgprot_t prot, struct page **ret_page,
diff --git a/arch/arm/mm/dump.c b/arch/arm/mm/dump.c
index 2b3a564..ef69152 100644
--- a/arch/arm/mm/dump.c
+++ b/arch/arm/mm/dump.c
@@ -264,6 +264,9 @@
 			note_page(st, addr, 3, pmd_val(*pmd));
 		else
 			walk_pte(st, pmd, addr);
+
+		if (SECTION_SIZE < PMD_SIZE && pmd_large(pmd[1]))
+			note_page(st, addr + SECTION_SIZE, 3, pmd_val(pmd[1]));
 	}
 }
 
diff --git a/arch/arm/mm/mm.h b/arch/arm/mm/mm.h
index d5a982d..7ea641b 100644
--- a/arch/arm/mm/mm.h
+++ b/arch/arm/mm/mm.h
@@ -38,6 +38,7 @@
 
 struct mem_type {
 	pteval_t prot_pte;
+	pteval_t prot_pte_s2;
 	pmdval_t prot_l1;
 	pmdval_t prot_sect;
 	unsigned int domain;
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index 4f08c13..b68c6b2 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -232,12 +232,16 @@
 #endif /* ifdef CONFIG_CPU_CP15 / else */
 
 #define PROT_PTE_DEVICE		L_PTE_PRESENT|L_PTE_YOUNG|L_PTE_DIRTY|L_PTE_XN
+#define PROT_PTE_S2_DEVICE	PROT_PTE_DEVICE
 #define PROT_SECT_DEVICE	PMD_TYPE_SECT|PMD_SECT_AP_WRITE
 
 static struct mem_type mem_types[] = {
 	[MT_DEVICE] = {		  /* Strongly ordered / ARMv6 shared device */
 		.prot_pte	= PROT_PTE_DEVICE | L_PTE_MT_DEV_SHARED |
 				  L_PTE_SHARED,
+		.prot_pte_s2	= s2_policy(PROT_PTE_S2_DEVICE) |
+				  s2_policy(L_PTE_S2_MT_DEV_SHARED) |
+				  L_PTE_SHARED,
 		.prot_l1	= PMD_TYPE_TABLE,
 		.prot_sect	= PROT_SECT_DEVICE | PMD_SECT_S,
 		.domain		= DOMAIN_IO,
@@ -508,7 +512,18 @@
 	cp = &cache_policies[cachepolicy];
 	vecs_pgprot = kern_pgprot = user_pgprot = cp->pte;
 	s2_pgprot = cp->pte_s2;
-	hyp_device_pgprot = s2_device_pgprot = mem_types[MT_DEVICE].prot_pte;
+	hyp_device_pgprot = mem_types[MT_DEVICE].prot_pte;
+	s2_device_pgprot = mem_types[MT_DEVICE].prot_pte_s2;
+
+	/*
+	 * We don't use domains on ARMv6 (since this causes problems with
+	 * v6/v7 kernels), so we must use a separate memory type for user
+	 * r/o, kernel r/w to map the vectors page.
+	 */
+#ifndef CONFIG_ARM_LPAE
+	if (cpu_arch == CPU_ARCH_ARMv6)
+		vecs_pgprot |= L_PTE_MT_VECTORS;
+#endif
 
 	/*
 	 * ARMv6 and above have extended page tables.
diff --git a/arch/arm/mm/proc-macros.S b/arch/arm/mm/proc-macros.S
index e3c48a3..ee1d805 100644
--- a/arch/arm/mm/proc-macros.S
+++ b/arch/arm/mm/proc-macros.S
@@ -112,13 +112,9 @@
  *  100x   1   0   1	r/o	no acc
  *  10x0   1   0   1	r/o	no acc
  *  1011   0   0   1	r/w	no acc
- *  110x   0   1   0	r/w	r/o
- *  11x0   0   1   0	r/w	r/o
- *  1111   0   1   1	r/w	r/w
- *
- * If !CONFIG_CPU_USE_DOMAINS, the following permissions are changed:
  *  110x   1   1   1	r/o	r/o
  *  11x0   1   1   1	r/o	r/o
+ *  1111   0   1   1	r/w	r/w
  */
 	.macro	armv6_mt_table pfx
 \pfx\()_mt_table:
@@ -137,7 +133,7 @@
 	.long	PTE_EXT_TEX(2)					@ L_PTE_MT_DEV_NONSHARED
 	.long	0x00						@ unused
 	.long	0x00						@ unused
-	.long	0x00						@ unused
+	.long	PTE_CACHEABLE | PTE_BUFFERABLE | PTE_EXT_APX	@ L_PTE_MT_VECTORS
 	.endm
 
 	.macro	armv6_set_pte_ext pfx
@@ -158,24 +154,21 @@
 
 	tst	r1, #L_PTE_USER
 	orrne	r3, r3, #PTE_EXT_AP1
-#ifdef CONFIG_CPU_USE_DOMAINS
-	@ allow kernel read/write access to read-only user pages
 	tstne	r3, #PTE_EXT_APX
-	bicne	r3, r3, #PTE_EXT_APX | PTE_EXT_AP0
-#endif
+
+	@ user read-only -> kernel read-only
+	bicne	r3, r3, #PTE_EXT_AP0
 
 	tst	r1, #L_PTE_XN
 	orrne	r3, r3, #PTE_EXT_XN
 
-	orr	r3, r3, r2
+	eor	r3, r3, r2
 
 	tst	r1, #L_PTE_YOUNG
 	tstne	r1, #L_PTE_PRESENT
 	moveq	r3, #0
-#ifndef CONFIG_CPU_USE_DOMAINS
 	tstne	r1, #L_PTE_NONE
 	movne	r3, #0
-#endif
 
 	str	r3, [r0]
 	mcr	p15, 0, r0, c7, c10, 1		@ flush_pte
diff --git a/arch/arm/mm/proc-v6.S b/arch/arm/mm/proc-v6.S
index 45dc29f..32b3558 100644
--- a/arch/arm/mm/proc-v6.S
+++ b/arch/arm/mm/proc-v6.S
@@ -208,7 +208,6 @@
 	mcr	p15, 0, r0, c7, c14, 0		@ clean+invalidate D cache
 	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
 	mcr	p15, 0, r0, c7, c15, 0		@ clean+invalidate cache
-	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer
 #ifdef CONFIG_MMU
 	mcr	p15, 0, r0, c8, c7, 0		@ invalidate I + D TLBs
 	mcr	p15, 0, r0, c2, c0, 2		@ TTB control register
@@ -218,6 +217,8 @@
 	ALT_UP(orr	r8, r8, #TTB_FLAGS_UP)
 	mcr	p15, 0, r8, c2, c0, 1		@ load TTB1
 #endif /* CONFIG_MMU */
+	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer and
+						@ complete invalidations
 	adr	r5, v6_crval
 	ldmia	r5, {r5, r6}
  ARM_BE8(orr	r6, r6, #1 << 25)		@ big-endian page tables
diff --git a/arch/arm/mm/proc-v7-2level.S b/arch/arm/mm/proc-v7-2level.S
index bdd3be4..1f52915f 100644
--- a/arch/arm/mm/proc-v7-2level.S
+++ b/arch/arm/mm/proc-v7-2level.S
@@ -90,21 +90,14 @@
 
 	tst	r1, #L_PTE_USER
 	orrne	r3, r3, #PTE_EXT_AP1
-#ifdef CONFIG_CPU_USE_DOMAINS
-	@ allow kernel read/write access to read-only user pages
-	tstne	r3, #PTE_EXT_APX
-	bicne	r3, r3, #PTE_EXT_APX | PTE_EXT_AP0
-#endif
 
 	tst	r1, #L_PTE_XN
 	orrne	r3, r3, #PTE_EXT_XN
 
 	tst	r1, #L_PTE_YOUNG
 	tstne	r1, #L_PTE_VALID
-#ifndef CONFIG_CPU_USE_DOMAINS
 	eorne	r1, r1, #L_PTE_NONE
 	tstne	r1, #L_PTE_NONE
-#endif
 	moveq	r3, #0
 
  ARM(	str	r3, [r0, #2048]! )
diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
index bd17819..195731d 100644
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -192,6 +192,7 @@
 	mov	r10, #(1 << 0)			@ Cache/TLB ops broadcasting
 	b	1f
 __v7_ca7mp_setup:
+__v7_ca12mp_setup:
 __v7_ca15mp_setup:
 	mov	r10, #0
 1:
@@ -351,7 +352,6 @@
 
 4:	mov	r10, #0
 	mcr	p15, 0, r10, c7, c5, 0		@ I+BTB cache invalidate
-	dsb
 #ifdef CONFIG_MMU
 	mcr	p15, 0, r10, c8, c7, 0		@ invalidate I + D TLBs
 	v7_ttb_setup r10, r4, r8, r5		@ TTBCR, TTBRx setup
@@ -360,6 +360,7 @@
 	mcr	p15, 0, r5, c10, c2, 0		@ write PRRR
 	mcr	p15, 0, r6, c10, c2, 1		@ write NMRR
 #endif
+	dsb					@ Complete invalidations
 #ifndef CONFIG_ARM_THUMBEE
 	mrc	p15, 0, r0, c0, c1, 0		@ read ID_PFR0 for ThumbEE
 	and	r0, r0, #(0xf << 12)		@ ThumbEE enabled field
@@ -484,6 +485,16 @@
 	.size	__v7_ca7mp_proc_info, . - __v7_ca7mp_proc_info
 
 	/*
+	 * ARM Ltd. Cortex A12 processor.
+	 */
+	.type	__v7_ca12mp_proc_info, #object
+__v7_ca12mp_proc_info:
+	.long	0x410fc0d0
+	.long	0xff0ffff0
+	__v7_proc __v7_ca12mp_setup
+	.size	__v7_ca12mp_proc_info, . - __v7_ca12mp_proc_info
+
+	/*
 	 * ARM Ltd. Cortex A15 processor.
 	 */
 	.type	__v7_ca15mp_proc_info, #object
diff --git a/arch/arm/plat-iop/time.c b/arch/arm/plat-iop/time.c
index d70b733..6ad65d8 100644
--- a/arch/arm/plat-iop/time.c
+++ b/arch/arm/plat-iop/time.c
@@ -127,7 +127,7 @@
 static struct irqaction iop_timer_irq = {
 	.name		= "IOP Timer Tick",
 	.handler	= iop_timer_interrupt,
-	.flags		= IRQF_DISABLED | IRQF_TIMER | IRQF_IRQPOLL,
+	.flags		= IRQF_TIMER | IRQF_IRQPOLL,
 	.dev_id		= &iop_clockevent,
 };
 
diff --git a/arch/arm/plat-omap/dma.c b/arch/arm/plat-omap/dma.c
index 01619c2..5f5b975 100644
--- a/arch/arm/plat-omap/dma.c
+++ b/arch/arm/plat-omap/dma.c
@@ -2000,6 +2000,12 @@
 			omap_clear_dma(ch);
 }
 
+struct omap_system_dma_plat_info *omap_get_plat_info(void)
+{
+	return p;
+}
+EXPORT_SYMBOL_GPL(omap_get_plat_info);
+
 static int omap_system_dma_probe(struct platform_device *pdev)
 {
 	int ch, ret = 0;
@@ -2024,9 +2030,16 @@
 
 	dma_lch_count		= d->lch_count;
 	dma_chan_count		= dma_lch_count;
-	dma_chan		= d->chan;
 	enable_1510_mode	= d->dev_caps & ENABLE_1510_MODE;
 
+	dma_chan = devm_kcalloc(&pdev->dev, dma_lch_count,
+				sizeof(struct omap_dma_lch), GFP_KERNEL);
+	if (!dma_chan) {
+		dev_err(&pdev->dev, "%s: kzalloc fail\n", __func__);
+		return -ENOMEM;
+	}
+
+
 	if (dma_omap2plus()) {
 		dma_linked_lch = kzalloc(sizeof(struct dma_link_info) *
 						dma_lch_count, GFP_KERNEL);
@@ -2111,7 +2124,6 @@
 	}
 
 exit_dma_lch_fail:
-	kfree(dma_chan);
 	return ret;
 }
 
@@ -2131,7 +2143,6 @@
 			free_irq(dma_irq, (void *)(irq_rel + 1));
 		}
 	}
-	kfree(dma_chan);
 	return 0;
 }
 
diff --git a/drivers/amba/bus.c b/drivers/amba/bus.c
index 9e60291..3cf61a1 100644
--- a/drivers/amba/bus.c
+++ b/drivers/amba/bus.c
@@ -83,7 +83,7 @@
 	__ATTR_NULL,
 };
 
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 /*
  * Hooks to provide runtime PM of the pclk (bus clock).  It is safe to
  * enable/disable the bus clock at runtime PM suspend/resume as this
@@ -123,7 +123,7 @@
 	.thaw		= pm_generic_thaw,
 	.poweroff	= pm_generic_poweroff,
 	.restore	= pm_generic_restore,
-	SET_RUNTIME_PM_OPS(
+	SET_PM_RUNTIME_PM_OPS(
 		amba_pm_runtime_suspend,
 		amba_pm_runtime_resume,
 		NULL
diff --git a/drivers/dma/omap-dma.c b/drivers/dma/omap-dma.c
index 362e7c4..64ceca29 100644
--- a/drivers/dma/omap-dma.c
+++ b/drivers/dma/omap-dma.c
@@ -5,6 +5,7 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
+#include <linux/delay.h>
 #include <linux/dmaengine.h>
 #include <linux/dma-mapping.h>
 #include <linux/err.h>
@@ -26,11 +27,21 @@
 	spinlock_t lock;
 	struct tasklet_struct task;
 	struct list_head pending;
+	void __iomem *base;
+	const struct omap_dma_reg *reg_map;
+	struct omap_system_dma_plat_info *plat;
+	bool legacy;
+	spinlock_t irq_lock;
+	uint32_t irq_enable_mask;
+	struct omap_chan *lch_map[32];
 };
 
 struct omap_chan {
 	struct virt_dma_chan vc;
 	struct list_head node;
+	void __iomem *channel_base;
+	const struct omap_dma_reg *reg_map;
+	uint32_t ccr;
 
 	struct dma_slave_config	cfg;
 	unsigned dma_sig;
@@ -54,19 +65,93 @@
 	dma_addr_t dev_addr;
 
 	int16_t fi;		/* for OMAP_DMA_SYNC_PACKET */
-	uint8_t es;		/* OMAP_DMA_DATA_TYPE_xxx */
-	uint8_t sync_mode;	/* OMAP_DMA_SYNC_xxx */
-	uint8_t sync_type;	/* OMAP_DMA_xxx_SYNC* */
-	uint8_t periph_port;	/* Peripheral port */
+	uint8_t es;		/* CSDP_DATA_TYPE_xxx */
+	uint32_t ccr;		/* CCR value */
+	uint16_t clnk_ctrl;	/* CLNK_CTRL value */
+	uint16_t cicr;		/* CICR value */
+	uint32_t csdp;		/* CSDP value */
 
 	unsigned sglen;
 	struct omap_sg sg[0];
 };
 
+enum {
+	CCR_FS			= BIT(5),
+	CCR_READ_PRIORITY	= BIT(6),
+	CCR_ENABLE		= BIT(7),
+	CCR_AUTO_INIT		= BIT(8),	/* OMAP1 only */
+	CCR_REPEAT		= BIT(9),	/* OMAP1 only */
+	CCR_OMAP31_DISABLE	= BIT(10),	/* OMAP1 only */
+	CCR_SUSPEND_SENSITIVE	= BIT(8),	/* OMAP2+ only */
+	CCR_RD_ACTIVE		= BIT(9),	/* OMAP2+ only */
+	CCR_WR_ACTIVE		= BIT(10),	/* OMAP2+ only */
+	CCR_SRC_AMODE_CONSTANT	= 0 << 12,
+	CCR_SRC_AMODE_POSTINC	= 1 << 12,
+	CCR_SRC_AMODE_SGLIDX	= 2 << 12,
+	CCR_SRC_AMODE_DBLIDX	= 3 << 12,
+	CCR_DST_AMODE_CONSTANT	= 0 << 14,
+	CCR_DST_AMODE_POSTINC	= 1 << 14,
+	CCR_DST_AMODE_SGLIDX	= 2 << 14,
+	CCR_DST_AMODE_DBLIDX	= 3 << 14,
+	CCR_CONSTANT_FILL	= BIT(16),
+	CCR_TRANSPARENT_COPY	= BIT(17),
+	CCR_BS			= BIT(18),
+	CCR_SUPERVISOR		= BIT(22),
+	CCR_PREFETCH		= BIT(23),
+	CCR_TRIGGER_SRC		= BIT(24),
+	CCR_BUFFERING_DISABLE	= BIT(25),
+	CCR_WRITE_PRIORITY	= BIT(26),
+	CCR_SYNC_ELEMENT	= 0,
+	CCR_SYNC_FRAME		= CCR_FS,
+	CCR_SYNC_BLOCK		= CCR_BS,
+	CCR_SYNC_PACKET		= CCR_BS | CCR_FS,
+
+	CSDP_DATA_TYPE_8	= 0,
+	CSDP_DATA_TYPE_16	= 1,
+	CSDP_DATA_TYPE_32	= 2,
+	CSDP_SRC_PORT_EMIFF	= 0 << 2, /* OMAP1 only */
+	CSDP_SRC_PORT_EMIFS	= 1 << 2, /* OMAP1 only */
+	CSDP_SRC_PORT_OCP_T1	= 2 << 2, /* OMAP1 only */
+	CSDP_SRC_PORT_TIPB	= 3 << 2, /* OMAP1 only */
+	CSDP_SRC_PORT_OCP_T2	= 4 << 2, /* OMAP1 only */
+	CSDP_SRC_PORT_MPUI	= 5 << 2, /* OMAP1 only */
+	CSDP_SRC_PACKED		= BIT(6),
+	CSDP_SRC_BURST_1	= 0 << 7,
+	CSDP_SRC_BURST_16	= 1 << 7,
+	CSDP_SRC_BURST_32	= 2 << 7,
+	CSDP_SRC_BURST_64	= 3 << 7,
+	CSDP_DST_PORT_EMIFF	= 0 << 9, /* OMAP1 only */
+	CSDP_DST_PORT_EMIFS	= 1 << 9, /* OMAP1 only */
+	CSDP_DST_PORT_OCP_T1	= 2 << 9, /* OMAP1 only */
+	CSDP_DST_PORT_TIPB	= 3 << 9, /* OMAP1 only */
+	CSDP_DST_PORT_OCP_T2	= 4 << 9, /* OMAP1 only */
+	CSDP_DST_PORT_MPUI	= 5 << 9, /* OMAP1 only */
+	CSDP_DST_PACKED		= BIT(13),
+	CSDP_DST_BURST_1	= 0 << 14,
+	CSDP_DST_BURST_16	= 1 << 14,
+	CSDP_DST_BURST_32	= 2 << 14,
+	CSDP_DST_BURST_64	= 3 << 14,
+
+	CICR_TOUT_IE		= BIT(0),	/* OMAP1 only */
+	CICR_DROP_IE		= BIT(1),
+	CICR_HALF_IE		= BIT(2),
+	CICR_FRAME_IE		= BIT(3),
+	CICR_LAST_IE		= BIT(4),
+	CICR_BLOCK_IE		= BIT(5),
+	CICR_PKT_IE		= BIT(7),	/* OMAP2+ only */
+	CICR_TRANS_ERR_IE	= BIT(8),	/* OMAP2+ only */
+	CICR_SUPERVISOR_ERR_IE	= BIT(10),	/* OMAP2+ only */
+	CICR_MISALIGNED_ERR_IE	= BIT(11),	/* OMAP2+ only */
+	CICR_DRAIN_IE		= BIT(12),	/* OMAP2+ only */
+	CICR_SUPER_BLOCK_IE	= BIT(14),	/* OMAP2+ only */
+
+	CLNK_CTRL_ENABLE_LNK	= BIT(15),
+};
+
 static const unsigned es_bytes[] = {
-	[OMAP_DMA_DATA_TYPE_S8] = 1,
-	[OMAP_DMA_DATA_TYPE_S16] = 2,
-	[OMAP_DMA_DATA_TYPE_S32] = 4,
+	[CSDP_DATA_TYPE_8] = 1,
+	[CSDP_DATA_TYPE_16] = 2,
+	[CSDP_DATA_TYPE_32] = 4,
 };
 
 static struct of_dma_filter_info omap_dma_info = {
@@ -93,28 +178,214 @@
 	kfree(container_of(vd, struct omap_desc, vd));
 }
 
+static void omap_dma_write(uint32_t val, unsigned type, void __iomem *addr)
+{
+	switch (type) {
+	case OMAP_DMA_REG_16BIT:
+		writew_relaxed(val, addr);
+		break;
+	case OMAP_DMA_REG_2X16BIT:
+		writew_relaxed(val, addr);
+		writew_relaxed(val >> 16, addr + 2);
+		break;
+	case OMAP_DMA_REG_32BIT:
+		writel_relaxed(val, addr);
+		break;
+	default:
+		WARN_ON(1);
+	}
+}
+
+static unsigned omap_dma_read(unsigned type, void __iomem *addr)
+{
+	unsigned val;
+
+	switch (type) {
+	case OMAP_DMA_REG_16BIT:
+		val = readw_relaxed(addr);
+		break;
+	case OMAP_DMA_REG_2X16BIT:
+		val = readw_relaxed(addr);
+		val |= readw_relaxed(addr + 2) << 16;
+		break;
+	case OMAP_DMA_REG_32BIT:
+		val = readl_relaxed(addr);
+		break;
+	default:
+		WARN_ON(1);
+		val = 0;
+	}
+
+	return val;
+}
+
+static void omap_dma_glbl_write(struct omap_dmadev *od, unsigned reg, unsigned val)
+{
+	const struct omap_dma_reg *r = od->reg_map + reg;
+
+	WARN_ON(r->stride);
+
+	omap_dma_write(val, r->type, od->base + r->offset);
+}
+
+static unsigned omap_dma_glbl_read(struct omap_dmadev *od, unsigned reg)
+{
+	const struct omap_dma_reg *r = od->reg_map + reg;
+
+	WARN_ON(r->stride);
+
+	return omap_dma_read(r->type, od->base + r->offset);
+}
+
+static void omap_dma_chan_write(struct omap_chan *c, unsigned reg, unsigned val)
+{
+	const struct omap_dma_reg *r = c->reg_map + reg;
+
+	omap_dma_write(val, r->type, c->channel_base + r->offset);
+}
+
+static unsigned omap_dma_chan_read(struct omap_chan *c, unsigned reg)
+{
+	const struct omap_dma_reg *r = c->reg_map + reg;
+
+	return omap_dma_read(r->type, c->channel_base + r->offset);
+}
+
+static void omap_dma_clear_csr(struct omap_chan *c)
+{
+	if (dma_omap1())
+		omap_dma_chan_read(c, CSR);
+	else
+		omap_dma_chan_write(c, CSR, ~0);
+}
+
+static unsigned omap_dma_get_csr(struct omap_chan *c)
+{
+	unsigned val = omap_dma_chan_read(c, CSR);
+
+	if (!dma_omap1())
+		omap_dma_chan_write(c, CSR, val);
+
+	return val;
+}
+
+static void omap_dma_assign(struct omap_dmadev *od, struct omap_chan *c,
+	unsigned lch)
+{
+	c->channel_base = od->base + od->plat->channel_stride * lch;
+
+	od->lch_map[lch] = c;
+}
+
+static void omap_dma_start(struct omap_chan *c, struct omap_desc *d)
+{
+	struct omap_dmadev *od = to_omap_dma_dev(c->vc.chan.device);
+
+	if (__dma_omap15xx(od->plat->dma_attr))
+		omap_dma_chan_write(c, CPC, 0);
+	else
+		omap_dma_chan_write(c, CDAC, 0);
+
+	omap_dma_clear_csr(c);
+
+	/* Enable interrupts */
+	omap_dma_chan_write(c, CICR, d->cicr);
+
+	/* Enable channel */
+	omap_dma_chan_write(c, CCR, d->ccr | CCR_ENABLE);
+}
+
+static void omap_dma_stop(struct omap_chan *c)
+{
+	struct omap_dmadev *od = to_omap_dma_dev(c->vc.chan.device);
+	uint32_t val;
+
+	/* disable irq */
+	omap_dma_chan_write(c, CICR, 0);
+
+	omap_dma_clear_csr(c);
+
+	val = omap_dma_chan_read(c, CCR);
+	if (od->plat->errata & DMA_ERRATA_i541 && val & CCR_TRIGGER_SRC) {
+		uint32_t sysconfig;
+		unsigned i;
+
+		sysconfig = omap_dma_glbl_read(od, OCP_SYSCONFIG);
+		val = sysconfig & ~DMA_SYSCONFIG_MIDLEMODE_MASK;
+		val |= DMA_SYSCONFIG_MIDLEMODE(DMA_IDLEMODE_NO_IDLE);
+		omap_dma_glbl_write(od, OCP_SYSCONFIG, val);
+
+		val = omap_dma_chan_read(c, CCR);
+		val &= ~CCR_ENABLE;
+		omap_dma_chan_write(c, CCR, val);
+
+		/* Wait for sDMA FIFO to drain */
+		for (i = 0; ; i++) {
+			val = omap_dma_chan_read(c, CCR);
+			if (!(val & (CCR_RD_ACTIVE | CCR_WR_ACTIVE)))
+				break;
+
+			if (i > 100)
+				break;
+
+			udelay(5);
+		}
+
+		if (val & (CCR_RD_ACTIVE | CCR_WR_ACTIVE))
+			dev_err(c->vc.chan.device->dev,
+				"DMA drain did not complete on lch %d\n",
+			        c->dma_ch);
+
+		omap_dma_glbl_write(od, OCP_SYSCONFIG, sysconfig);
+	} else {
+		val &= ~CCR_ENABLE;
+		omap_dma_chan_write(c, CCR, val);
+	}
+
+	mb();
+
+	if (!__dma_omap15xx(od->plat->dma_attr) && c->cyclic) {
+		val = omap_dma_chan_read(c, CLNK_CTRL);
+
+		if (dma_omap1())
+			val |= 1 << 14; /* set the STOP_LNK bit */
+		else
+			val &= ~CLNK_CTRL_ENABLE_LNK;
+
+		omap_dma_chan_write(c, CLNK_CTRL, val);
+	}
+}
+
 static void omap_dma_start_sg(struct omap_chan *c, struct omap_desc *d,
 	unsigned idx)
 {
 	struct omap_sg *sg = d->sg + idx;
+	unsigned cxsa, cxei, cxfi;
 
-	if (d->dir == DMA_DEV_TO_MEM)
-		omap_set_dma_dest_params(c->dma_ch, OMAP_DMA_PORT_EMIFF,
-			OMAP_DMA_AMODE_POST_INC, sg->addr, 0, 0);
-	else
-		omap_set_dma_src_params(c->dma_ch, OMAP_DMA_PORT_EMIFF,
-			OMAP_DMA_AMODE_POST_INC, sg->addr, 0, 0);
+	if (d->dir == DMA_DEV_TO_MEM) {
+		cxsa = CDSA;
+		cxei = CDEI;
+		cxfi = CDFI;
+	} else {
+		cxsa = CSSA;
+		cxei = CSEI;
+		cxfi = CSFI;
+	}
 
-	omap_set_dma_transfer_params(c->dma_ch, d->es, sg->en, sg->fn,
-		d->sync_mode, c->dma_sig, d->sync_type);
+	omap_dma_chan_write(c, cxsa, sg->addr);
+	omap_dma_chan_write(c, cxei, 0);
+	omap_dma_chan_write(c, cxfi, 0);
+	omap_dma_chan_write(c, CEN, sg->en);
+	omap_dma_chan_write(c, CFN, sg->fn);
 
-	omap_start_dma(c->dma_ch);
+	omap_dma_start(c, d);
 }
 
 static void omap_dma_start_desc(struct omap_chan *c)
 {
 	struct virt_dma_desc *vd = vchan_next_desc(&c->vc);
 	struct omap_desc *d;
+	unsigned cxsa, cxei, cxfi;
 
 	if (!vd) {
 		c->desc = NULL;
@@ -126,12 +397,32 @@
 	c->desc = d = to_omap_dma_desc(&vd->tx);
 	c->sgidx = 0;
 
-	if (d->dir == DMA_DEV_TO_MEM)
-		omap_set_dma_src_params(c->dma_ch, d->periph_port,
-			OMAP_DMA_AMODE_CONSTANT, d->dev_addr, 0, d->fi);
-	else
-		omap_set_dma_dest_params(c->dma_ch, d->periph_port,
-			OMAP_DMA_AMODE_CONSTANT, d->dev_addr, 0, d->fi);
+	/*
+	 * This provides the necessary barrier to ensure data held in
+	 * DMA coherent memory is visible to the DMA engine prior to
+	 * the transfer starting.
+	 */
+	mb();
+
+	omap_dma_chan_write(c, CCR, d->ccr);
+	if (dma_omap1())
+		omap_dma_chan_write(c, CCR2, d->ccr >> 16);
+
+	if (d->dir == DMA_DEV_TO_MEM) {
+		cxsa = CSSA;
+		cxei = CSEI;
+		cxfi = CSFI;
+	} else {
+		cxsa = CDSA;
+		cxei = CDEI;
+		cxfi = CDFI;
+	}
+
+	omap_dma_chan_write(c, cxsa, d->dev_addr);
+	omap_dma_chan_write(c, cxei, 0);
+	omap_dma_chan_write(c, cxfi, d->fi);
+	omap_dma_chan_write(c, CSDP, d->csdp);
+	omap_dma_chan_write(c, CLNK_CTRL, d->clnk_ctrl);
 
 	omap_dma_start_sg(c, d, 0);
 }
@@ -186,24 +477,118 @@
 	}
 }
 
+static irqreturn_t omap_dma_irq(int irq, void *devid)
+{
+	struct omap_dmadev *od = devid;
+	unsigned status, channel;
+
+	spin_lock(&od->irq_lock);
+
+	status = omap_dma_glbl_read(od, IRQSTATUS_L1);
+	status &= od->irq_enable_mask;
+	if (status == 0) {
+		spin_unlock(&od->irq_lock);
+		return IRQ_NONE;
+	}
+
+	while ((channel = ffs(status)) != 0) {
+		unsigned mask, csr;
+		struct omap_chan *c;
+
+		channel -= 1;
+		mask = BIT(channel);
+		status &= ~mask;
+
+		c = od->lch_map[channel];
+		if (c == NULL) {
+			/* This should never happen */
+			dev_err(od->ddev.dev, "invalid channel %u\n", channel);
+			continue;
+		}
+
+		csr = omap_dma_get_csr(c);
+		omap_dma_glbl_write(od, IRQSTATUS_L1, mask);
+
+		omap_dma_callback(channel, csr, c);
+	}
+
+	spin_unlock(&od->irq_lock);
+
+	return IRQ_HANDLED;
+}
+
 static int omap_dma_alloc_chan_resources(struct dma_chan *chan)
 {
+	struct omap_dmadev *od = to_omap_dma_dev(chan->device);
 	struct omap_chan *c = to_omap_dma_chan(chan);
+	int ret;
 
-	dev_dbg(c->vc.chan.device->dev, "allocating channel for %u\n", c->dma_sig);
+	if (od->legacy) {
+		ret = omap_request_dma(c->dma_sig, "DMA engine",
+				       omap_dma_callback, c, &c->dma_ch);
+	} else {
+		ret = omap_request_dma(c->dma_sig, "DMA engine", NULL, NULL,
+				       &c->dma_ch);
+	}
 
-	return omap_request_dma(c->dma_sig, "DMA engine",
-		omap_dma_callback, c, &c->dma_ch);
+	dev_dbg(od->ddev.dev, "allocating channel %u for %u\n",
+		c->dma_ch, c->dma_sig);
+
+	if (ret >= 0) {
+		omap_dma_assign(od, c, c->dma_ch);
+
+		if (!od->legacy) {
+			unsigned val;
+
+			spin_lock_irq(&od->irq_lock);
+			val = BIT(c->dma_ch);
+			omap_dma_glbl_write(od, IRQSTATUS_L1, val);
+			od->irq_enable_mask |= val;
+			omap_dma_glbl_write(od, IRQENABLE_L1, od->irq_enable_mask);
+
+			val = omap_dma_glbl_read(od, IRQENABLE_L0);
+			val &= ~BIT(c->dma_ch);
+			omap_dma_glbl_write(od, IRQENABLE_L0, val);
+			spin_unlock_irq(&od->irq_lock);
+		}
+	}
+
+	if (dma_omap1()) {
+		if (__dma_omap16xx(od->plat->dma_attr)) {
+			c->ccr = CCR_OMAP31_DISABLE;
+			/* Duplicate what plat-omap/dma.c does */
+			c->ccr |= c->dma_ch + 1;
+		} else {
+			c->ccr = c->dma_sig & 0x1f;
+		}
+	} else {
+		c->ccr = c->dma_sig & 0x1f;
+		c->ccr |= (c->dma_sig & ~0x1f) << 14;
+	}
+	if (od->plat->errata & DMA_ERRATA_IFRAME_BUFFERING)
+		c->ccr |= CCR_BUFFERING_DISABLE;
+
+	return ret;
 }
 
 static void omap_dma_free_chan_resources(struct dma_chan *chan)
 {
+	struct omap_dmadev *od = to_omap_dma_dev(chan->device);
 	struct omap_chan *c = to_omap_dma_chan(chan);
 
+	if (!od->legacy) {
+		spin_lock_irq(&od->irq_lock);
+		od->irq_enable_mask &= ~BIT(c->dma_ch);
+		omap_dma_glbl_write(od, IRQENABLE_L1, od->irq_enable_mask);
+		spin_unlock_irq(&od->irq_lock);
+	}
+
+	c->channel_base = NULL;
+	od->lch_map[c->dma_ch] = NULL;
 	vchan_free_chan_resources(&c->vc);
 	omap_free_dma(c->dma_ch);
 
-	dev_dbg(c->vc.chan.device->dev, "freeing channel for %u\n", c->dma_sig);
+	dev_dbg(od->ddev.dev, "freeing channel for %u\n", c->dma_sig);
 }
 
 static size_t omap_dma_sg_size(struct omap_sg *sg)
@@ -239,6 +624,74 @@
 	return size;
 }
 
+/*
+ * OMAP 3.2/3.3 erratum: sometimes 0 is returned if CSAC/CDAC is
+ * read before the DMA controller finished disabling the channel.
+ */
+static uint32_t omap_dma_chan_read_3_3(struct omap_chan *c, unsigned reg)
+{
+	struct omap_dmadev *od = to_omap_dma_dev(c->vc.chan.device);
+	uint32_t val;
+
+	val = omap_dma_chan_read(c, reg);
+	if (val == 0 && od->plat->errata & DMA_ERRATA_3_3)
+		val = omap_dma_chan_read(c, reg);
+
+	return val;
+}
+
+static dma_addr_t omap_dma_get_src_pos(struct omap_chan *c)
+{
+	struct omap_dmadev *od = to_omap_dma_dev(c->vc.chan.device);
+	dma_addr_t addr, cdac;
+
+	if (__dma_omap15xx(od->plat->dma_attr)) {
+		addr = omap_dma_chan_read(c, CPC);
+	} else {
+		addr = omap_dma_chan_read_3_3(c, CSAC);
+		cdac = omap_dma_chan_read_3_3(c, CDAC);
+
+		/*
+		 * CDAC == 0 indicates that the DMA transfer on the channel has
+		 * not been started (no data has been transferred so far).
+		 * Return the programmed source start address in this case.
+		 */
+		if (cdac == 0)
+			addr = omap_dma_chan_read(c, CSSA);
+	}
+
+	if (dma_omap1())
+		addr |= omap_dma_chan_read(c, CSSA) & 0xffff0000;
+
+	return addr;
+}
+
+static dma_addr_t omap_dma_get_dst_pos(struct omap_chan *c)
+{
+	struct omap_dmadev *od = to_omap_dma_dev(c->vc.chan.device);
+	dma_addr_t addr;
+
+	if (__dma_omap15xx(od->plat->dma_attr)) {
+		addr = omap_dma_chan_read(c, CPC);
+	} else {
+		addr = omap_dma_chan_read_3_3(c, CDAC);
+
+		/*
+		 * CDAC == 0 indicates that the DMA transfer on the channel
+		 * has not been started (no data has been transferred so
+		 * far).  Return the programmed destination start address in
+		 * this case.
+		 */
+		if (addr == 0)
+			addr = omap_dma_chan_read(c, CDSA);
+	}
+
+	if (dma_omap1())
+		addr |= omap_dma_chan_read(c, CDSA) & 0xffff0000;
+
+	return addr;
+}
+
 static enum dma_status omap_dma_tx_status(struct dma_chan *chan,
 	dma_cookie_t cookie, struct dma_tx_state *txstate)
 {
@@ -260,9 +713,9 @@
 		dma_addr_t pos;
 
 		if (d->dir == DMA_MEM_TO_DEV)
-			pos = omap_get_dma_src_pos(c->dma_ch);
+			pos = omap_dma_get_src_pos(c);
 		else if (d->dir == DMA_DEV_TO_MEM)
-			pos = omap_get_dma_dst_pos(c->dma_ch);
+			pos = omap_dma_get_dst_pos(c);
 		else
 			pos = 0;
 
@@ -304,24 +757,23 @@
 	struct dma_chan *chan, struct scatterlist *sgl, unsigned sglen,
 	enum dma_transfer_direction dir, unsigned long tx_flags, void *context)
 {
+	struct omap_dmadev *od = to_omap_dma_dev(chan->device);
 	struct omap_chan *c = to_omap_dma_chan(chan);
 	enum dma_slave_buswidth dev_width;
 	struct scatterlist *sgent;
 	struct omap_desc *d;
 	dma_addr_t dev_addr;
-	unsigned i, j = 0, es, en, frame_bytes, sync_type;
+	unsigned i, j = 0, es, en, frame_bytes;
 	u32 burst;
 
 	if (dir == DMA_DEV_TO_MEM) {
 		dev_addr = c->cfg.src_addr;
 		dev_width = c->cfg.src_addr_width;
 		burst = c->cfg.src_maxburst;
-		sync_type = OMAP_DMA_SRC_SYNC;
 	} else if (dir == DMA_MEM_TO_DEV) {
 		dev_addr = c->cfg.dst_addr;
 		dev_width = c->cfg.dst_addr_width;
 		burst = c->cfg.dst_maxburst;
-		sync_type = OMAP_DMA_DST_SYNC;
 	} else {
 		dev_err(chan->device->dev, "%s: bad direction?\n", __func__);
 		return NULL;
@@ -330,13 +782,13 @@
 	/* Bus width translates to the element size (ES) */
 	switch (dev_width) {
 	case DMA_SLAVE_BUSWIDTH_1_BYTE:
-		es = OMAP_DMA_DATA_TYPE_S8;
+		es = CSDP_DATA_TYPE_8;
 		break;
 	case DMA_SLAVE_BUSWIDTH_2_BYTES:
-		es = OMAP_DMA_DATA_TYPE_S16;
+		es = CSDP_DATA_TYPE_16;
 		break;
 	case DMA_SLAVE_BUSWIDTH_4_BYTES:
-		es = OMAP_DMA_DATA_TYPE_S32;
+		es = CSDP_DATA_TYPE_32;
 		break;
 	default: /* not reached */
 		return NULL;
@@ -350,9 +802,31 @@
 	d->dir = dir;
 	d->dev_addr = dev_addr;
 	d->es = es;
-	d->sync_mode = OMAP_DMA_SYNC_FRAME;
-	d->sync_type = sync_type;
-	d->periph_port = OMAP_DMA_PORT_TIPB;
+
+	d->ccr = c->ccr | CCR_SYNC_FRAME;
+	if (dir == DMA_DEV_TO_MEM)
+		d->ccr |= CCR_DST_AMODE_POSTINC | CCR_SRC_AMODE_CONSTANT;
+	else
+		d->ccr |= CCR_DST_AMODE_CONSTANT | CCR_SRC_AMODE_POSTINC;
+
+	d->cicr = CICR_DROP_IE | CICR_BLOCK_IE;
+	d->csdp = es;
+
+	if (dma_omap1()) {
+		d->cicr |= CICR_TOUT_IE;
+
+		if (dir == DMA_DEV_TO_MEM)
+			d->csdp |= CSDP_DST_PORT_EMIFF | CSDP_SRC_PORT_TIPB;
+		else
+			d->csdp |= CSDP_DST_PORT_TIPB | CSDP_SRC_PORT_EMIFF;
+	} else {
+		if (dir == DMA_DEV_TO_MEM)
+			d->ccr |= CCR_TRIGGER_SRC;
+
+		d->cicr |= CICR_MISALIGNED_ERR_IE | CICR_TRANS_ERR_IE;
+	}
+	if (od->plat->errata & DMA_ERRATA_PARALLEL_CHANNELS)
+		d->clnk_ctrl = c->dma_ch;
 
 	/*
 	 * Build our scatterlist entries: each contains the address,
@@ -382,23 +856,22 @@
 	size_t period_len, enum dma_transfer_direction dir, unsigned long flags,
 	void *context)
 {
+	struct omap_dmadev *od = to_omap_dma_dev(chan->device);
 	struct omap_chan *c = to_omap_dma_chan(chan);
 	enum dma_slave_buswidth dev_width;
 	struct omap_desc *d;
 	dma_addr_t dev_addr;
-	unsigned es, sync_type;
+	unsigned es;
 	u32 burst;
 
 	if (dir == DMA_DEV_TO_MEM) {
 		dev_addr = c->cfg.src_addr;
 		dev_width = c->cfg.src_addr_width;
 		burst = c->cfg.src_maxburst;
-		sync_type = OMAP_DMA_SRC_SYNC;
 	} else if (dir == DMA_MEM_TO_DEV) {
 		dev_addr = c->cfg.dst_addr;
 		dev_width = c->cfg.dst_addr_width;
 		burst = c->cfg.dst_maxburst;
-		sync_type = OMAP_DMA_DST_SYNC;
 	} else {
 		dev_err(chan->device->dev, "%s: bad direction?\n", __func__);
 		return NULL;
@@ -407,13 +880,13 @@
 	/* Bus width translates to the element size (ES) */
 	switch (dev_width) {
 	case DMA_SLAVE_BUSWIDTH_1_BYTE:
-		es = OMAP_DMA_DATA_TYPE_S8;
+		es = CSDP_DATA_TYPE_8;
 		break;
 	case DMA_SLAVE_BUSWIDTH_2_BYTES:
-		es = OMAP_DMA_DATA_TYPE_S16;
+		es = CSDP_DATA_TYPE_16;
 		break;
 	case DMA_SLAVE_BUSWIDTH_4_BYTES:
-		es = OMAP_DMA_DATA_TYPE_S32;
+		es = CSDP_DATA_TYPE_32;
 		break;
 	default: /* not reached */
 		return NULL;
@@ -428,31 +901,50 @@
 	d->dev_addr = dev_addr;
 	d->fi = burst;
 	d->es = es;
-	if (burst)
-		d->sync_mode = OMAP_DMA_SYNC_PACKET;
-	else
-		d->sync_mode = OMAP_DMA_SYNC_ELEMENT;
-	d->sync_type = sync_type;
-	d->periph_port = OMAP_DMA_PORT_MPUI;
 	d->sg[0].addr = buf_addr;
 	d->sg[0].en = period_len / es_bytes[es];
 	d->sg[0].fn = buf_len / period_len;
 	d->sglen = 1;
 
-	if (!c->cyclic) {
-		c->cyclic = true;
-		omap_dma_link_lch(c->dma_ch, c->dma_ch);
+	d->ccr = c->ccr;
+	if (dir == DMA_DEV_TO_MEM)
+		d->ccr |= CCR_DST_AMODE_POSTINC | CCR_SRC_AMODE_CONSTANT;
+	else
+		d->ccr |= CCR_DST_AMODE_CONSTANT | CCR_SRC_AMODE_POSTINC;
 
-		if (flags & DMA_PREP_INTERRUPT)
-			omap_enable_dma_irq(c->dma_ch, OMAP_DMA_FRAME_IRQ);
+	d->cicr = CICR_DROP_IE;
+	if (flags & DMA_PREP_INTERRUPT)
+		d->cicr |= CICR_FRAME_IE;
 
-		omap_disable_dma_irq(c->dma_ch, OMAP_DMA_BLOCK_IRQ);
+	d->csdp = es;
+
+	if (dma_omap1()) {
+		d->cicr |= CICR_TOUT_IE;
+
+		if (dir == DMA_DEV_TO_MEM)
+			d->csdp |= CSDP_DST_PORT_EMIFF | CSDP_SRC_PORT_MPUI;
+		else
+			d->csdp |= CSDP_DST_PORT_MPUI | CSDP_SRC_PORT_EMIFF;
+	} else {
+		if (burst)
+			d->ccr |= CCR_SYNC_PACKET;
+		else
+			d->ccr |= CCR_SYNC_ELEMENT;
+
+		if (dir == DMA_DEV_TO_MEM)
+			d->ccr |= CCR_TRIGGER_SRC;
+
+		d->cicr |= CICR_MISALIGNED_ERR_IE | CICR_TRANS_ERR_IE;
+
+		d->csdp |= CSDP_DST_BURST_64 | CSDP_SRC_BURST_64;
 	}
 
-	if (dma_omap2plus()) {
-		omap_set_dma_src_burst_mode(c->dma_ch, OMAP_DMA_DATA_BURST_16);
-		omap_set_dma_dest_burst_mode(c->dma_ch, OMAP_DMA_DATA_BURST_16);
-	}
+	if (__dma_omap15xx(od->plat->dma_attr))
+		d->ccr |= CCR_AUTO_INIT | CCR_REPEAT;
+	else
+		d->clnk_ctrl = c->dma_ch | CLNK_CTRL_ENABLE_LNK;
+
+	c->cyclic = true;
 
 	return vchan_tx_prep(&c->vc, &d->vd, flags);
 }
@@ -483,20 +975,19 @@
 
 	/*
 	 * Stop DMA activity: we assume the callback will not be called
-	 * after omap_stop_dma() returns (even if it does, it will see
+	 * after omap_dma_stop() returns (even if it does, it will see
 	 * c->desc is NULL and exit.)
 	 */
 	if (c->desc) {
 		c->desc = NULL;
 		/* Avoid stopping the dma twice */
 		if (!c->paused)
-			omap_stop_dma(c->dma_ch);
+			omap_dma_stop(c);
 	}
 
 	if (c->cyclic) {
 		c->cyclic = false;
 		c->paused = false;
-		omap_dma_unlink_lch(c->dma_ch, c->dma_ch);
 	}
 
 	vchan_get_all_descriptors(&c->vc, &head);
@@ -513,7 +1004,7 @@
 		return -EINVAL;
 
 	if (!c->paused) {
-		omap_stop_dma(c->dma_ch);
+		omap_dma_stop(c);
 		c->paused = true;
 	}
 
@@ -527,7 +1018,7 @@
 		return -EINVAL;
 
 	if (c->paused) {
-		omap_start_dma(c->dma_ch);
+		omap_dma_start(c, c->desc);
 		c->paused = false;
 	}
 
@@ -573,6 +1064,7 @@
 	if (!c)
 		return -ENOMEM;
 
+	c->reg_map = od->reg_map;
 	c->dma_sig = dma_sig;
 	c->vc.desc_free = omap_dma_desc_free;
 	vchan_init(&c->vc, &od->ddev);
@@ -594,18 +1086,29 @@
 		tasklet_kill(&c->vc.task);
 		kfree(c);
 	}
-	kfree(od);
 }
 
 static int omap_dma_probe(struct platform_device *pdev)
 {
 	struct omap_dmadev *od;
-	int rc, i;
+	struct resource *res;
+	int rc, i, irq;
 
-	od = kzalloc(sizeof(*od), GFP_KERNEL);
+	od = devm_kzalloc(&pdev->dev, sizeof(*od), GFP_KERNEL);
 	if (!od)
 		return -ENOMEM;
 
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	od->base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(od->base))
+		return PTR_ERR(od->base);
+
+	od->plat = omap_get_plat_info();
+	if (!od->plat)
+		return -EPROBE_DEFER;
+
+	od->reg_map = od->plat->reg_map;
+
 	dma_cap_set(DMA_SLAVE, od->ddev.cap_mask);
 	dma_cap_set(DMA_CYCLIC, od->ddev.cap_mask);
 	od->ddev.device_alloc_chan_resources = omap_dma_alloc_chan_resources;
@@ -619,6 +1122,7 @@
 	INIT_LIST_HEAD(&od->ddev.channels);
 	INIT_LIST_HEAD(&od->pending);
 	spin_lock_init(&od->lock);
+	spin_lock_init(&od->irq_lock);
 
 	tasklet_init(&od->task, omap_dma_sched, (unsigned long)od);
 
@@ -630,6 +1134,21 @@
 		}
 	}
 
+	irq = platform_get_irq(pdev, 1);
+	if (irq <= 0) {
+		dev_info(&pdev->dev, "failed to get L1 IRQ: %d\n", irq);
+		od->legacy = true;
+	} else {
+		/* Disable all interrupts */
+		od->irq_enable_mask = 0;
+		omap_dma_glbl_write(od, IRQENABLE_L1, 0);
+
+		rc = devm_request_irq(&pdev->dev, irq, omap_dma_irq,
+				      IRQF_SHARED, "omap-dma-engine", od);
+		if (rc)
+			return rc;
+	}
+
 	rc = dma_async_device_register(&od->ddev);
 	if (rc) {
 		pr_warn("OMAP-DMA: failed to register slave DMA engine device: %d\n",
@@ -666,6 +1185,12 @@
 		of_dma_controller_free(pdev->dev.of_node);
 
 	dma_async_device_unregister(&od->ddev);
+
+	if (!od->legacy) {
+		/* Disable all interrupts */
+		omap_dma_glbl_write(od, IRQENABLE_L0, 0);
+	}
+
 	omap_dma_free(od);
 
 	return 0;
diff --git a/drivers/mmc/card/queue.c b/drivers/mmc/card/queue.c
index 357bbc5..3e049c1 100644
--- a/drivers/mmc/card/queue.c
+++ b/drivers/mmc/card/queue.c
@@ -197,7 +197,7 @@
 	struct mmc_queue_req *mqrq_prev = &mq->mqrq[1];
 
 	if (mmc_dev(host)->dma_mask && *mmc_dev(host)->dma_mask)
-		limit = dma_max_pfn(mmc_dev(host)) << PAGE_SHIFT;
+		limit = (u64)dma_max_pfn(mmc_dev(host)) << PAGE_SHIFT;
 
 	mq->card = card;
 	mq->queue = blk_init_queue(mmc_request_fn, lock);
diff --git a/drivers/scsi/arm/acornscsi.c b/drivers/scsi/arm/acornscsi.c
index 09ba186..059ff47 100644
--- a/drivers/scsi/arm/acornscsi.c
+++ b/drivers/scsi/arm/acornscsi.c
@@ -2971,7 +2971,7 @@
 	ec->irqaddr	= ashost->fast + INT_REG;
 	ec->irqmask	= 0x0a;
 
-	ret = request_irq(host->irq, acornscsi_intr, IRQF_DISABLED, "acornscsi", ashost);
+	ret = request_irq(host->irq, acornscsi_intr, 0, "acornscsi", ashost);
 	if (ret) {
 		printk(KERN_CRIT "scsi%d: IRQ%d not free: %d\n",
 			host->host_no, ashost->scsi.irq, ret);
diff --git a/drivers/scsi/arm/cumana_1.c b/drivers/scsi/arm/cumana_1.c
index b679778..f8e0609 100644
--- a/drivers/scsi/arm/cumana_1.c
+++ b/drivers/scsi/arm/cumana_1.c
@@ -262,7 +262,7 @@
 		goto out_unmap;
 	}
 
-	ret = request_irq(host->irq, cumanascsi_intr, IRQF_DISABLED,
+	ret = request_irq(host->irq, cumanascsi_intr, 0,
 			  "CumanaSCSI-1", host);
 	if (ret) {
 		printk("scsi%d: IRQ%d not free: %d\n",
diff --git a/drivers/scsi/arm/cumana_2.c b/drivers/scsi/arm/cumana_2.c
index 58915f2..abc66f5 100644
--- a/drivers/scsi/arm/cumana_2.c
+++ b/drivers/scsi/arm/cumana_2.c
@@ -431,7 +431,7 @@
 		goto out_free;
 
 	ret = request_irq(ec->irq, cumanascsi_2_intr,
-			  IRQF_DISABLED, "cumanascsi2", info);
+			  0, "cumanascsi2", info);
 	if (ret) {
 		printk("scsi%d: IRQ%d not free: %d\n",
 		       host->host_no, ec->irq, ret);
diff --git a/drivers/scsi/arm/powertec.c b/drivers/scsi/arm/powertec.c
index abc9593..5e1b73e 100644
--- a/drivers/scsi/arm/powertec.c
+++ b/drivers/scsi/arm/powertec.c
@@ -358,7 +358,7 @@
 		goto out_free;
 
 	ret = request_irq(ec->irq, powertecscsi_intr,
-			  IRQF_DISABLED, "powertec", info);
+			  0, "powertec", info);
 	if (ret) {
 		printk("scsi%d: IRQ%d not free: %d\n",
 		       host->host_no, ec->irq, ret);
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 7bd7f0d..62ec84b 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1684,7 +1684,7 @@
 
 	host_dev = scsi_get_device(shost);
 	if (host_dev && host_dev->dma_mask)
-		bounce_limit = dma_max_pfn(host_dev) << PAGE_SHIFT;
+		bounce_limit = (u64)dma_max_pfn(host_dev) << PAGE_SHIFT;
 
 	return bounce_limit;
 }
diff --git a/drivers/usb/gadget/lpc32xx_udc.c b/drivers/usb/gadget/lpc32xx_udc.c
index 049ebab..a94bb10 100644
--- a/drivers/usb/gadget/lpc32xx_udc.c
+++ b/drivers/usb/gadget/lpc32xx_udc.c
@@ -55,7 +55,6 @@
 #include <mach/hardware.h>
 #include <linux/io.h>
 #include <asm/irq.h>
-#include <asm/system.h>
 
 #include <mach/platform.h>
 #include <mach/irqs.h>
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 1ef6636..8a20a51 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -252,6 +252,8 @@
 void memblock_set_current_limit(phys_addr_t limit);
 
 
+phys_addr_t memblock_get_current_limit(void);
+
 /*
  * pfn conversion functions
  *
diff --git a/include/linux/omap-dma.h b/include/linux/omap-dma.h
index 7af25a9..41a13e7 100644
--- a/include/linux/omap-dma.h
+++ b/include/linux/omap-dma.h
@@ -268,14 +268,27 @@
 	u32 dev_caps;
 	u16 lch_count;
 	u16 chan_count;
-	struct omap_dma_lch *chan;
+};
+
+enum {
+	OMAP_DMA_REG_NONE,
+	OMAP_DMA_REG_16BIT,
+	OMAP_DMA_REG_2X16BIT,
+	OMAP_DMA_REG_32BIT,
+};
+
+struct omap_dma_reg {
+	u16	offset;
+	u8	stride;
+	u8	type;
 };
 
 /* System DMA platform data structure */
 struct omap_system_dma_plat_info {
+	const struct omap_dma_reg *reg_map;
+	unsigned channel_stride;
 	struct omap_dma_dev_attr *dma_attr;
 	u32 errata;
-	void (*disable_irq_lch)(int lch);
 	void (*show_dma_caps)(void);
 	void (*clear_lch_regs)(int lch);
 	void (*clear_dma)(int lch);
@@ -289,8 +302,12 @@
 #define dma_omap2plus()	0
 #endif
 #define dma_omap1()	(!dma_omap2plus())
-#define dma_omap15xx()	((dma_omap1() && (d->dev_caps & ENABLE_1510_MODE)))
-#define dma_omap16xx()	((dma_omap1() && (d->dev_caps & ENABLE_16XX_MODE)))
+#define __dma_omap15xx(d) (dma_omap1() && (d)->dev_caps & ENABLE_1510_MODE)
+#define __dma_omap16xx(d) (dma_omap1() && (d)->dev_caps & ENABLE_16XX_MODE)
+#define dma_omap15xx()	__dma_omap15xx(d)
+#define dma_omap16xx()	__dma_omap16xx(d)
+
+extern struct omap_system_dma_plat_info *omap_get_plat_info(void);
 
 extern void omap_set_dma_priority(int lch, int dst_port, int priority);
 extern int omap_request_dma(int dev_id, const char *dev_name,
diff --git a/mm/memblock.c b/mm/memblock.c
index 39a31e7..7fe5354 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1407,6 +1407,11 @@
 	memblock.current_limit = limit;
 }
 
+phys_addr_t __init_memblock memblock_get_current_limit(void)
+{
+	return memblock.current_limit;
+}
+
 static void __init_memblock memblock_dump(struct memblock_type *type, char *name)
 {
 	unsigned long long base, size;
diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index 4061098..99a45fd 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -1502,6 +1502,16 @@
 #define R_ARM_JUMP24	29
 #endif
 
+#ifndef	R_ARM_THM_CALL
+#define	R_ARM_THM_CALL		10
+#endif
+#ifndef	R_ARM_THM_JUMP24
+#define	R_ARM_THM_JUMP24	30
+#endif
+#ifndef	R_ARM_THM_JUMP19
+#define	R_ARM_THM_JUMP19	51
+#endif
+
 static int addend_arm_rel(struct elf_info *elf, Elf_Shdr *sechdr, Elf_Rela *r)
 {
 	unsigned int r_typ = ELF_R_TYPE(r->r_info);
@@ -1515,6 +1525,9 @@
 	case R_ARM_PC24:
 	case R_ARM_CALL:
 	case R_ARM_JUMP24:
+	case R_ARM_THM_CALL:
+	case R_ARM_THM_JUMP24:
+	case R_ARM_THM_JUMP19:
 		/* From ARM ABI: ((S + A) | T) - P */
 		r->r_addend = (int)(long)(elf->hdr +
 		              sechdr->sh_offset +