Merge branch 'devel-stable' into for-next

Conflicts:
	arch/arm/kernel/entry-armv.S
diff --git a/Documentation/arm/Booting b/Documentation/arm/Booting
index 4e686a2..a341d87 100644
--- a/Documentation/arm/Booting
+++ b/Documentation/arm/Booting
@@ -164,3 +164,8 @@
 - The boot loader is expected to call the kernel image by jumping
   directly to the first instruction of the kernel image.
 
+  On CPUs supporting the ARM instruction set, the entry must be
+  made in ARM state, even for a Thumb-2 kernel.
+
+  On CPUs supporting only the Thumb instruction set such as
+  Cortex-M class CPUs, the entry must be made in Thumb state.
diff --git a/Documentation/arm/SH-Mobile/zboot-rom-sdhi.txt b/Documentation/arm/SH-Mobile/zboot-rom-sdhi.txt
new file mode 100644
index 0000000..4419598
--- /dev/null
+++ b/Documentation/arm/SH-Mobile/zboot-rom-sdhi.txt
@@ -0,0 +1,42 @@
+ROM-able zImage boot from eSD
+-----------------------------
+
+An ROM-able zImage compiled with ZBOOT_ROM_SDHI may be written to eSD and
+SuperH Mobile ARM will to boot directly from the SDHI hardware block.
+
+This is achieved by the mask ROM loading the first portion of the image into
+MERAM and then jumping to it. This portion contains loader code which
+copies the entire image to SDRAM and jumps to it. From there the zImage
+boot code proceeds as normal, uncompressing the image into its final
+location and then jumping to it.
+
+This code has been tested on an mackerel board using the developer 1A eSD
+boot mode which is configured using the following jumper settings.
+
+   8 7 6 5 4 3 2 1
+   x|x|x|x| |x|x|
+S4 -+-+-+-+-+-+-+-
+    | | | |x| | |x on
+
+The eSD card needs to be present in SDHI slot 1 (CN7).
+As such S1 and S33 also need to be configured as per
+the notes in arch/arm/mach-shmobile/board-mackerel.c.
+
+A partial zImage must be written to physical partition #1 (boot)
+of the eSD at sector 0 in vrl4 format. A utility vrl4 is supplied to
+accomplish this.
+
+e.g.
+	vrl4 < zImage | dd of=/dev/sdX bs=512 count=17
+
+A full copy of _the same_ zImage should be written to physical partition #1
+(boot) of the eSD at sector 0. This should _not_ be in vrl4 format.
+
+	vrl4 < zImage | dd of=/dev/sdX bs=512
+
+Note: The commands above assume that the physical partition has been
+switched. No such facility currently exists in the Linux Kernel.
+
+Physical partitions are described in the eSD specification.  At the time of
+writing they are not the same as partitions that are typically configured
+using fdisk and visible through /proc/partitions
diff --git a/Documentation/devicetree/bindings/arm/pmu.txt b/Documentation/devicetree/bindings/arm/pmu.txt
new file mode 100644
index 0000000..1c044eb
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/pmu.txt
@@ -0,0 +1,21 @@
+* ARM Performance Monitor Units
+
+ARM cores often have a PMU for counting cpu and cache events like cache misses
+and hits. The interface to the PMU is part of the ARM ARM. The ARM PMU
+representation in the device tree should be done as under:-
+
+Required properties:
+
+- compatible : should be one of
+	"arm,cortex-a9-pmu"
+	"arm,cortex-a8-pmu"
+	"arm,arm1176-pmu"
+	"arm,arm1136-pmu"
+- interrupts : 1 combined interrupt or 1 per core.
+
+Example:
+
+pmu {
+        compatible = "arm,cortex-a9-pmu";
+        interrupts = <100 101>;
+};
diff --git a/Documentation/filesystems/nilfs2.txt b/Documentation/filesystems/nilfs2.txt
index d5c0cef..873a2ab 100644
--- a/Documentation/filesystems/nilfs2.txt
+++ b/Documentation/filesystems/nilfs2.txt
@@ -40,7 +40,6 @@
 	- POSIX ACLs
 	- quotas
 	- fsck
-	- resize
 	- defragmentation
 
 Mount options
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index d3d653a..bfe9242 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -346,7 +346,7 @@
 	when RTO retransmissions remain unacknowledged.
 	See tcp_retries2 for more details.
 
-	The default value is 7.
+	The default value is 8.
 	If your machine is a loaded WEB server,
 	you should think about lowering this value, such sockets
 	may consume significant resources. Cf. tcp_max_orphans.
diff --git a/Documentation/x86/boot.txt b/Documentation/x86/boot.txt
index 9b7221a..7c3a880 100644
--- a/Documentation/x86/boot.txt
+++ b/Documentation/x86/boot.txt
@@ -674,7 +674,7 @@
 
 Field name:	init_size
 Type:		read
-Offset/size:	0x25c/4
+Offset/size:	0x260/4
 
   This field indicates the amount of linear contiguous memory starting
   at the kernel runtime start address that the kernel needs before it
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 68b4561..8944874 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -37,6 +37,9 @@
 	  Europe.  There is an ARM Linux project with a web page at
 	  <http://www.arm.linux.org.uk/>.
 
+config ARM_HAS_SG_CHAIN
+	bool
+
 config HAVE_PWM
 	bool
 
@@ -1346,7 +1349,6 @@
 
 config HAVE_ARM_SCU
 	bool
-	depends on SMP
 	help
 	  This option enables support for the ARM system coherency unit
 
@@ -1715,17 +1717,34 @@
 	  Say Y here if you intend to execute your compressed kernel image
 	  (zImage) directly from ROM or flash.  If unsure, say N.
 
+choice
+	prompt "Include SD/MMC loader in zImage (EXPERIMENTAL)"
+	depends on ZBOOT_ROM && ARCH_SH7372 && EXPERIMENTAL
+	default ZBOOT_ROM_NONE
+	help
+	  Include experimental SD/MMC loading code in the ROM-able zImage.
+	  With this enabled it is possible to write the the ROM-able zImage
+	  kernel image to an MMC or SD card and boot the kernel straight
+	  from the reset vector. At reset the processor Mask ROM will load
+	  the first part of the the ROM-able zImage which in turn loads the
+	  rest the kernel image to RAM.
+
+config ZBOOT_ROM_NONE
+	bool "No SD/MMC loader in zImage (EXPERIMENTAL)"
+	help
+	  Do not load image from SD or MMC
+
 config ZBOOT_ROM_MMCIF
 	bool "Include MMCIF loader in zImage (EXPERIMENTAL)"
-	depends on ZBOOT_ROM && ARCH_SH7372 && EXPERIMENTAL
 	help
-	  Say Y here to include experimental MMCIF loading code in the
-	  ROM-able zImage. With this enabled it is possible to write the
-	  the ROM-able zImage kernel image to an MMC card and boot the
-	  kernel straight from the reset vector. At reset the processor
-	  Mask ROM will load the first part of the the ROM-able zImage
-	  which in turn loads the rest the kernel image to RAM using the
-	  MMCIF hardware block.
+	  Load image from MMCIF hardware block.
+
+config ZBOOT_ROM_SH_MOBILE_SDHI
+	bool "Include SuperH Mobile SDHI loader in zImage (EXPERIMENTAL)"
+	help
+	  Load image from SDHI hardware block
+
+endchoice
 
 config CMDLINE
 	string "Default kernel command string"
diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile
index 23aad07..0c74a6f 100644
--- a/arch/arm/boot/compressed/Makefile
+++ b/arch/arm/boot/compressed/Makefile
@@ -6,13 +6,19 @@
 
 OBJS		=
 
-# Ensure that mmcif loader code appears early in the image
+# Ensure that MMCIF loader code appears early in the image
 # to minimise that number of bocks that have to be read in
 # order to load it.
 ifeq ($(CONFIG_ZBOOT_ROM_MMCIF),y)
-ifeq ($(CONFIG_ARCH_SH7372),y)
 OBJS		+= mmcif-sh7372.o
 endif
+
+# Ensure that SDHI loader code appears early in the image
+# to minimise that number of bocks that have to be read in
+# order to load it.
+ifeq ($(CONFIG_ZBOOT_ROM_SH_MOBILE_SDHI),y)
+OBJS		+= sdhi-shmobile.o
+OBJS		+= sdhi-sh7372.o
 endif
 
 AFLAGS_head.o += -DTEXT_OFFSET=$(TEXT_OFFSET)
diff --git a/arch/arm/boot/compressed/head-shmobile.S b/arch/arm/boot/compressed/head-shmobile.S
index c943d2e..fe3719b 100644
--- a/arch/arm/boot/compressed/head-shmobile.S
+++ b/arch/arm/boot/compressed/head-shmobile.S
@@ -25,14 +25,14 @@
 	/* load board-specific initialization code */
 #include <mach/zboot.h>
 
-#ifdef CONFIG_ZBOOT_ROM_MMCIF
-	/* Load image from MMC */
-	adr	sp, __tmp_stack + 128
+#if defined(CONFIG_ZBOOT_ROM_MMCIF) || defined(CONFIG_ZBOOT_ROM_SH_MOBILE_SDHI)
+	/* Load image from MMC/SD */
+	adr	sp, __tmp_stack + 256
 	ldr	r0, __image_start
 	ldr	r1, __image_end
 	subs	r1, r1, r0
 	ldr	r0, __load_base
-	bl	mmcif_loader
+	bl	mmc_loader
 
 	/* Jump to loaded code */
 	ldr	r0, __loaded
@@ -51,9 +51,9 @@
 	.long	__continue
 	.align
 __tmp_stack:
-	.space	128
+	.space	256
 __continue:
-#endif /* CONFIG_ZBOOT_ROM_MMCIF */
+#endif /* CONFIG_ZBOOT_ROM_MMC || CONFIG_ZBOOT_ROM_SH_MOBILE_SDHI */
 
 	b	1f
 __atags:@ tag #1
diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S
index 940b201..e95a598 100644
--- a/arch/arm/boot/compressed/head.S
+++ b/arch/arm/boot/compressed/head.S
@@ -353,7 +353,8 @@
 		mov	r0, #0			@ must be zero
 		mov	r1, r7			@ restore architecture number
 		mov	r2, r8			@ restore atags pointer
-		mov	pc, r4			@ call kernel
+ ARM(		mov	pc, r4	)		@ call kernel
+ THUMB(		bx	r4	)		@ entry point is always ARM
 
 		.align	2
 		.type	LC0, #object
diff --git a/arch/arm/boot/compressed/mmcif-sh7372.c b/arch/arm/boot/compressed/mmcif-sh7372.c
index 7453c83..b6f61d9 100644
--- a/arch/arm/boot/compressed/mmcif-sh7372.c
+++ b/arch/arm/boot/compressed/mmcif-sh7372.c
@@ -40,7 +40,7 @@
  * to an MMC card
  * # dd if=vrl4.out of=/dev/sdx bs=512 seek=1
  */
-asmlinkage void mmcif_loader(unsigned char *buf, unsigned long len)
+asmlinkage void mmc_loader(unsigned char *buf, unsigned long len)
 {
 	mmc_init_progress();
 	mmc_update_progress(MMC_PROGRESS_ENTER);
diff --git a/arch/arm/boot/compressed/sdhi-sh7372.c b/arch/arm/boot/compressed/sdhi-sh7372.c
new file mode 100644
index 0000000..d403a8b
--- /dev/null
+++ b/arch/arm/boot/compressed/sdhi-sh7372.c
@@ -0,0 +1,95 @@
+/*
+ * SuperH Mobile SDHI
+ *
+ * Copyright (C) 2010 Magnus Damm
+ * Copyright (C) 2010 Kuninori Morimoto
+ * Copyright (C) 2010 Simon Horman
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Parts inspired by u-boot
+ */
+
+#include <linux/io.h>
+#include <mach/mmc.h>
+#include <linux/mmc/boot.h>
+#include <linux/mmc/tmio.h>
+
+#include "sdhi-shmobile.h"
+
+#define PORT179CR       0xe60520b3
+#define PORT180CR       0xe60520b4
+#define PORT181CR       0xe60520b5
+#define PORT182CR       0xe60520b6
+#define PORT183CR       0xe60520b7
+#define PORT184CR       0xe60520b8
+
+#define SMSTPCR3        0xe615013c
+
+#define CR_INPUT_ENABLE 0x10
+#define CR_FUNCTION1    0x01
+
+#define SDHI1_BASE	(void __iomem *)0xe6860000
+#define SDHI_BASE	SDHI1_BASE
+
+/*  SuperH Mobile SDHI loader
+ *
+ * loads the zImage from an SD card starting from block 0
+ * on physical partition 1
+ *
+ * The image must be start with a vrl4 header and
+ * the zImage must start at offset 512 of the image. That is,
+ * at block 1 (=byte 512) of physical partition 1
+ *
+ * Use the following line to write the vrl4 formated zImage
+ * to an SD card
+ * # dd if=vrl4.out of=/dev/sdx bs=512
+ */
+asmlinkage void mmc_loader(unsigned short *buf, unsigned long len)
+{
+	int high_capacity;
+
+	mmc_init_progress();
+
+	mmc_update_progress(MMC_PROGRESS_ENTER);
+        /* Initialise SDHI1 */
+        /* PORT184CR: GPIO_FN_SDHICMD1 Control */
+        __raw_writeb(CR_FUNCTION1, PORT184CR);
+        /* PORT179CR: GPIO_FN_SDHICLK1 Control */
+        __raw_writeb(CR_INPUT_ENABLE|CR_FUNCTION1, PORT179CR);
+        /* PORT181CR: GPIO_FN_SDHID1_3 Control */
+        __raw_writeb(CR_FUNCTION1, PORT183CR);
+        /* PORT182CR: GPIO_FN_SDHID1_2 Control */
+        __raw_writeb(CR_FUNCTION1, PORT182CR);
+        /* PORT183CR: GPIO_FN_SDHID1_1 Control */
+        __raw_writeb(CR_FUNCTION1, PORT181CR);
+        /* PORT180CR: GPIO_FN_SDHID1_0 Control */
+        __raw_writeb(CR_FUNCTION1, PORT180CR);
+
+        /* Enable clock to SDHI1 hardware block */
+        __raw_writel(__raw_readl(SMSTPCR3) & ~(1 << 13), SMSTPCR3);
+
+	/* setup SDHI hardware */
+	mmc_update_progress(MMC_PROGRESS_INIT);
+	high_capacity = sdhi_boot_init(SDHI_BASE);
+	if (high_capacity < 0)
+		goto err;
+
+	mmc_update_progress(MMC_PROGRESS_LOAD);
+	/* load kernel */
+	if (sdhi_boot_do_read(SDHI_BASE, high_capacity,
+			      0, /* Kernel is at block 1 */
+			      (len + TMIO_BBS - 1) / TMIO_BBS, buf))
+		goto err;
+
+        /* Disable clock to SDHI1 hardware block */
+        __raw_writel(__raw_readl(SMSTPCR3) & (1 << 13), SMSTPCR3);
+
+	mmc_update_progress(MMC_PROGRESS_DONE);
+
+	return;
+err:
+	for(;;);
+}
diff --git a/arch/arm/boot/compressed/sdhi-shmobile.c b/arch/arm/boot/compressed/sdhi-shmobile.c
new file mode 100644
index 0000000..bd3d469
--- /dev/null
+++ b/arch/arm/boot/compressed/sdhi-shmobile.c
@@ -0,0 +1,449 @@
+/*
+ * SuperH Mobile SDHI
+ *
+ * Copyright (C) 2010 Magnus Damm
+ * Copyright (C) 2010 Kuninori Morimoto
+ * Copyright (C) 2010 Simon Horman
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Parts inspired by u-boot
+ */
+
+#include <linux/io.h>
+#include <linux/mmc/host.h>
+#include <linux/mmc/core.h>
+#include <linux/mmc/mmc.h>
+#include <linux/mmc/sd.h>
+#include <linux/mmc/tmio.h>
+#include <mach/sdhi.h>
+
+#define OCR_FASTBOOT		(1<<29)
+#define OCR_HCS			(1<<30)
+#define OCR_BUSY		(1<<31)
+
+#define RESP_CMD12		0x00000030
+
+static inline u16 sd_ctrl_read16(void __iomem *base, int addr)
+{
+        return __raw_readw(base + addr);
+}
+
+static inline u32 sd_ctrl_read32(void __iomem *base, int addr)
+{
+	return __raw_readw(base + addr) |
+	       __raw_readw(base + addr + 2) << 16;
+}
+
+static inline void sd_ctrl_write16(void __iomem *base, int addr, u16 val)
+{
+	__raw_writew(val, base + addr);
+}
+
+static inline void sd_ctrl_write32(void __iomem *base, int addr, u32 val)
+{
+	__raw_writew(val, base + addr);
+	__raw_writew(val >> 16, base + addr + 2);
+}
+
+#define ALL_ERROR (TMIO_STAT_CMD_IDX_ERR | TMIO_STAT_CRCFAIL |		\
+		   TMIO_STAT_STOPBIT_ERR | TMIO_STAT_DATATIMEOUT |	\
+		   TMIO_STAT_RXOVERFLOW | TMIO_STAT_TXUNDERRUN |	\
+		   TMIO_STAT_CMDTIMEOUT | TMIO_STAT_ILL_ACCESS |	\
+		   TMIO_STAT_ILL_FUNC)
+
+static int sdhi_intr(void __iomem *base)
+{
+	unsigned long state = sd_ctrl_read32(base, CTL_STATUS);
+
+	if (state & ALL_ERROR) {
+		sd_ctrl_write32(base, CTL_STATUS, ~ALL_ERROR);
+		sd_ctrl_write32(base, CTL_IRQ_MASK,
+				ALL_ERROR |
+				sd_ctrl_read32(base, CTL_IRQ_MASK));
+		return -EINVAL;
+	}
+	if (state & TMIO_STAT_CMDRESPEND) {
+		sd_ctrl_write32(base, CTL_STATUS, ~TMIO_STAT_CMDRESPEND);
+		sd_ctrl_write32(base, CTL_IRQ_MASK,
+				TMIO_STAT_CMDRESPEND |
+				sd_ctrl_read32(base, CTL_IRQ_MASK));
+		return 0;
+	}
+	if (state & TMIO_STAT_RXRDY) {
+		sd_ctrl_write32(base, CTL_STATUS, ~TMIO_STAT_RXRDY);
+		sd_ctrl_write32(base, CTL_IRQ_MASK,
+				TMIO_STAT_RXRDY | TMIO_STAT_TXUNDERRUN |
+				sd_ctrl_read32(base, CTL_IRQ_MASK));
+		return 0;
+	}
+	if (state & TMIO_STAT_DATAEND) {
+		sd_ctrl_write32(base, CTL_STATUS, ~TMIO_STAT_DATAEND);
+		sd_ctrl_write32(base, CTL_IRQ_MASK,
+				TMIO_STAT_DATAEND |
+				sd_ctrl_read32(base, CTL_IRQ_MASK));
+		return 0;
+	}
+
+	return -EAGAIN;
+}
+
+static int sdhi_boot_wait_resp_end(void __iomem *base)
+{
+	int err = -EAGAIN, timeout = 10000000;
+
+	while (timeout--) {
+		err = sdhi_intr(base);
+		if (err != -EAGAIN)
+			break;
+		udelay(1);
+	}
+
+	return err;
+}
+
+/* SDHI_CLK_CTRL */
+#define CLK_MMC_ENABLE                 (1 << 8)
+#define CLK_MMC_INIT                   (1 << 6)        /* clk / 256 */
+
+static void sdhi_boot_mmc_clk_stop(void __iomem *base)
+{
+	sd_ctrl_write16(base, CTL_CLK_AND_WAIT_CTL, 0x0000);
+	msleep(10);
+	sd_ctrl_write16(base, CTL_SD_CARD_CLK_CTL, ~CLK_MMC_ENABLE &
+		sd_ctrl_read16(base, CTL_SD_CARD_CLK_CTL));
+	msleep(10);
+}
+
+static void sdhi_boot_mmc_clk_start(void __iomem *base)
+{
+	sd_ctrl_write16(base, CTL_SD_CARD_CLK_CTL, CLK_MMC_ENABLE |
+		sd_ctrl_read16(base, CTL_SD_CARD_CLK_CTL));
+	msleep(10);
+	sd_ctrl_write16(base, CTL_CLK_AND_WAIT_CTL, CLK_MMC_ENABLE);
+	msleep(10);
+}
+
+static void sdhi_boot_reset(void __iomem *base)
+{
+	sd_ctrl_write16(base, CTL_RESET_SD, 0x0000);
+	msleep(10);
+	sd_ctrl_write16(base, CTL_RESET_SD, 0x0001);
+	msleep(10);
+}
+
+/* Set MMC clock / power.
+ * Note: This controller uses a simple divider scheme therefore it cannot
+ * run a MMC card at full speed (20MHz). The max clock is 24MHz on SD, but as
+ * MMC wont run that fast, it has to be clocked at 12MHz which is the next
+ * slowest setting.
+ */
+static int sdhi_boot_mmc_set_ios(void __iomem *base, struct mmc_ios *ios)
+{
+	if (sd_ctrl_read32(base, CTL_STATUS) & TMIO_STAT_CMD_BUSY)
+		return -EBUSY;
+
+	if (ios->clock)
+		sd_ctrl_write16(base, CTL_SD_CARD_CLK_CTL,
+				ios->clock | CLK_MMC_ENABLE);
+
+	/* Power sequence - OFF -> ON -> UP */
+	switch (ios->power_mode) {
+	case MMC_POWER_OFF: /* power down SD bus */
+		sdhi_boot_mmc_clk_stop(base);
+		break;
+	case MMC_POWER_ON: /* power up SD bus */
+		break;
+	case MMC_POWER_UP: /* start bus clock */
+		sdhi_boot_mmc_clk_start(base);
+		break;
+	}
+
+	switch (ios->bus_width) {
+	case MMC_BUS_WIDTH_1:
+		sd_ctrl_write16(base, CTL_SD_MEM_CARD_OPT, 0x80e0);
+	break;
+	case MMC_BUS_WIDTH_4:
+		sd_ctrl_write16(base, CTL_SD_MEM_CARD_OPT, 0x00e0);
+	break;
+	}
+
+	/* Let things settle. delay taken from winCE driver */
+	udelay(140);
+
+	return 0;
+}
+
+/* These are the bitmasks the tmio chip requires to implement the MMC response
+ * types. Note that R1 and R6 are the same in this scheme. */
+#define RESP_NONE      0x0300
+#define RESP_R1        0x0400
+#define RESP_R1B       0x0500
+#define RESP_R2        0x0600
+#define RESP_R3        0x0700
+#define DATA_PRESENT   0x0800
+#define TRANSFER_READ  0x1000
+
+static int sdhi_boot_request(void __iomem *base, struct mmc_command *cmd)
+{
+	int err, c = cmd->opcode;
+
+	switch (mmc_resp_type(cmd)) {
+	case MMC_RSP_NONE: c |= RESP_NONE; break;
+	case MMC_RSP_R1:   c |= RESP_R1;   break;
+	case MMC_RSP_R1B:  c |= RESP_R1B;  break;
+	case MMC_RSP_R2:   c |= RESP_R2;   break;
+	case MMC_RSP_R3:   c |= RESP_R3;   break;
+	default:
+		return -EINVAL;
+	}
+
+	/* No interrupts so this may not be cleared */
+	sd_ctrl_write32(base, CTL_STATUS, ~TMIO_STAT_CMDRESPEND);
+
+	sd_ctrl_write32(base, CTL_IRQ_MASK, TMIO_STAT_CMDRESPEND |
+			sd_ctrl_read32(base, CTL_IRQ_MASK));
+	sd_ctrl_write32(base, CTL_ARG_REG, cmd->arg);
+	sd_ctrl_write16(base, CTL_SD_CMD, c);
+
+
+	sd_ctrl_write32(base, CTL_IRQ_MASK,
+			~(TMIO_STAT_CMDRESPEND | ALL_ERROR) &
+			sd_ctrl_read32(base, CTL_IRQ_MASK));
+
+	err = sdhi_boot_wait_resp_end(base);
+	if (err)
+		return err;
+
+	cmd->resp[0] = sd_ctrl_read32(base, CTL_RESPONSE);
+
+	return 0;
+}
+
+static int sdhi_boot_do_read_single(void __iomem *base, int high_capacity,
+				    unsigned long block, unsigned short *buf)
+{
+	int err, i;
+
+	/* CMD17 - Read */
+	{
+		struct mmc_command cmd;
+
+		cmd.opcode = MMC_READ_SINGLE_BLOCK | \
+			     TRANSFER_READ | DATA_PRESENT;
+		if (high_capacity)
+			cmd.arg = block;
+		else
+			cmd.arg = block * TMIO_BBS;
+		cmd.flags = MMC_RSP_R1;
+		err = sdhi_boot_request(base, &cmd);
+		if (err)
+			return err;
+	}
+
+	sd_ctrl_write32(base, CTL_IRQ_MASK,
+			~(TMIO_STAT_DATAEND | TMIO_STAT_RXRDY |
+			  TMIO_STAT_TXUNDERRUN) &
+			sd_ctrl_read32(base, CTL_IRQ_MASK));
+	err = sdhi_boot_wait_resp_end(base);
+	if (err)
+		return err;
+
+	sd_ctrl_write16(base, CTL_SD_XFER_LEN, TMIO_BBS);
+	for (i = 0; i < TMIO_BBS / sizeof(*buf); i++)
+		*buf++ = sd_ctrl_read16(base, RESP_CMD12);
+
+	err = sdhi_boot_wait_resp_end(base);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+int sdhi_boot_do_read(void __iomem *base, int high_capacity,
+		      unsigned long offset, unsigned short count,
+		      unsigned short *buf)
+{
+	unsigned long i;
+	int err = 0;
+
+	for (i = 0; i < count; i++) {
+		err = sdhi_boot_do_read_single(base, high_capacity, offset + i,
+					       buf + (i * TMIO_BBS /
+						      sizeof(*buf)));
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+#define VOLTAGES (MMC_VDD_32_33 | MMC_VDD_33_34)
+
+int sdhi_boot_init(void __iomem *base)
+{
+	bool sd_v2 = false, sd_v1_0 = false;
+	unsigned short cid;
+	int err, high_capacity = 0;
+
+	sdhi_boot_mmc_clk_stop(base);
+	sdhi_boot_reset(base);
+
+	/* mmc0: clock 400000Hz busmode 1 powermode 2 cs 0 Vdd 21 width 0 timing 0 */
+	{
+		struct mmc_ios ios;
+		ios.power_mode = MMC_POWER_ON;
+		ios.bus_width = MMC_BUS_WIDTH_1;
+		ios.clock = CLK_MMC_INIT;
+		err = sdhi_boot_mmc_set_ios(base, &ios);
+		if (err)
+			return err;
+	}
+
+	/* CMD0 */
+	{
+		struct mmc_command cmd;
+		msleep(1);
+		cmd.opcode = MMC_GO_IDLE_STATE;
+		cmd.arg = 0;
+		cmd.flags = MMC_RSP_NONE;
+		err = sdhi_boot_request(base, &cmd);
+		if (err)
+			return err;
+		msleep(2);
+	}
+
+	/* CMD8 - Test for SD version 2 */
+	{
+		struct mmc_command cmd;
+		cmd.opcode = SD_SEND_IF_COND;
+		cmd.arg = (VOLTAGES != 0) << 8 | 0xaa;
+		cmd.flags = MMC_RSP_R1;
+		err = sdhi_boot_request(base, &cmd); /* Ignore error */
+		if ((cmd.resp[0] & 0xff) == 0xaa)
+			sd_v2 = true;
+	}
+
+	/* CMD55 - Get OCR (SD) */
+	{
+		int timeout = 1000;
+		struct mmc_command cmd;
+
+		cmd.arg = 0;
+
+		do {
+			cmd.opcode = MMC_APP_CMD;
+			cmd.flags = MMC_RSP_R1;
+			cmd.arg = 0;
+			err = sdhi_boot_request(base, &cmd);
+			if (err)
+				break;
+
+			cmd.opcode = SD_APP_OP_COND;
+			cmd.flags = MMC_RSP_R3;
+			cmd.arg = (VOLTAGES & 0xff8000);
+			if (sd_v2)
+				cmd.arg |= OCR_HCS;
+			cmd.arg |= OCR_FASTBOOT;
+			err = sdhi_boot_request(base, &cmd);
+			if (err)
+				break;
+
+			msleep(1);
+		} while((!(cmd.resp[0] & OCR_BUSY)) && --timeout);
+
+		if (!err && timeout) {
+			if (!sd_v2)
+				sd_v1_0 = true;
+			high_capacity = (cmd.resp[0] & OCR_HCS) == OCR_HCS;
+		}
+	}
+
+	/* CMD1 - Get OCR (MMC) */
+	if (!sd_v2 && !sd_v1_0) {
+		int timeout = 1000;
+		struct mmc_command cmd;
+
+		do {
+			cmd.opcode = MMC_SEND_OP_COND;
+			cmd.arg = VOLTAGES | OCR_HCS;
+			cmd.flags = MMC_RSP_R3;
+			err = sdhi_boot_request(base, &cmd);
+			if (err)
+				return err;
+
+			msleep(1);
+		} while((!(cmd.resp[0] & OCR_BUSY)) && --timeout);
+
+		if (!timeout)
+			return -EAGAIN;
+
+		high_capacity = (cmd.resp[0] & OCR_HCS) == OCR_HCS;
+	}
+
+	/* CMD2 - Get CID */
+	{
+		struct mmc_command cmd;
+		cmd.opcode = MMC_ALL_SEND_CID;
+		cmd.arg = 0;
+		cmd.flags = MMC_RSP_R2;
+		err = sdhi_boot_request(base, &cmd);
+		if (err)
+			return err;
+	}
+
+	/* CMD3
+	 * MMC: Set the relative address
+	 * SD:  Get the relative address
+	 * Also puts the card into the standby state
+	 */
+	{
+		struct mmc_command cmd;
+		cmd.opcode = MMC_SET_RELATIVE_ADDR;
+		cmd.arg = 0;
+		cmd.flags = MMC_RSP_R1;
+		err = sdhi_boot_request(base, &cmd);
+		if (err)
+			return err;
+		cid = cmd.resp[0] >> 16;
+	}
+
+	/* CMD9 - Get CSD */
+	{
+		struct mmc_command cmd;
+		cmd.opcode = MMC_SEND_CSD;
+		cmd.arg = cid << 16;
+		cmd.flags = MMC_RSP_R2;
+		err = sdhi_boot_request(base, &cmd);
+		if (err)
+			return err;
+	}
+
+	/* CMD7 - Select the card */
+	{
+		struct mmc_command cmd;
+		cmd.opcode = MMC_SELECT_CARD;
+		//cmd.arg = rca << 16;
+		cmd.arg = cid << 16;
+		//cmd.flags = MMC_RSP_R1B;
+		cmd.flags = MMC_RSP_R1;
+		err = sdhi_boot_request(base, &cmd);
+		if (err)
+			return err;
+	}
+
+	/* CMD16 - Set the block size */
+	{
+		struct mmc_command cmd;
+		cmd.opcode = MMC_SET_BLOCKLEN;
+		cmd.arg = TMIO_BBS;
+		cmd.flags = MMC_RSP_R1;
+		err = sdhi_boot_request(base, &cmd);
+		if (err)
+			return err;
+	}
+
+	return high_capacity;
+}
diff --git a/arch/arm/boot/compressed/sdhi-shmobile.h b/arch/arm/boot/compressed/sdhi-shmobile.h
new file mode 100644
index 0000000..92eaa09
--- /dev/null
+++ b/arch/arm/boot/compressed/sdhi-shmobile.h
@@ -0,0 +1,11 @@
+#ifndef SDHI_MOBILE_H
+#define SDHI_MOBILE_H
+
+#include <linux/compiler.h>
+
+int sdhi_boot_do_read(void __iomem *base, int high_capacity,
+		      unsigned long offset, unsigned short count,
+		      unsigned short *buf);
+int sdhi_boot_init(void __iomem *base);
+
+#endif
diff --git a/arch/arm/boot/compressed/vmlinux.lds.in b/arch/arm/boot/compressed/vmlinux.lds.in
index ea80abe..4e72883 100644
--- a/arch/arm/boot/compressed/vmlinux.lds.in
+++ b/arch/arm/boot/compressed/vmlinux.lds.in
@@ -33,20 +33,24 @@
     *(.text.*)
     *(.fixup)
     *(.gnu.warning)
+    *(.glue_7t)
+    *(.glue_7)
+  }
+  .rodata : {
     *(.rodata)
     *(.rodata.*)
-    *(.glue_7)
-    *(.glue_7t)
+  }
+  .piggydata : {
     *(.piggydata)
-    . = ALIGN(4);
   }
 
+  . = ALIGN(4);
   _etext = .;
 
+  .got.plt		: { *(.got.plt) }
   _got_start = .;
   .got			: { *(.got) }
   _got_end = .;
-  .got.plt		: { *(.got.plt) }
   _edata = .;
 
   . = BSS_START;
diff --git a/arch/arm/common/gic.c b/arch/arm/common/gic.c
index 4ddd0a6..7bdd917 100644
--- a/arch/arm/common/gic.c
+++ b/arch/arm/common/gic.c
@@ -179,22 +179,21 @@
 {
 	void __iomem *reg = gic_dist_base(d) + GIC_DIST_TARGET + (gic_irq(d) & ~3);
 	unsigned int shift = (d->irq % 4) * 8;
-	unsigned int cpu = cpumask_first(mask_val);
+	unsigned int cpu = cpumask_any_and(mask_val, cpu_online_mask);
 	u32 val, mask, bit;
 
-	if (cpu >= 8)
+	if (cpu >= 8 || cpu >= nr_cpu_ids)
 		return -EINVAL;
 
 	mask = 0xff << shift;
 	bit = 1 << (cpu + shift);
 
 	spin_lock(&irq_controller_lock);
-	d->node = cpu;
 	val = readl_relaxed(reg) & ~mask;
 	writel_relaxed(val | bit, reg);
 	spin_unlock(&irq_controller_lock);
 
-	return 0;
+	return IRQ_SET_MASK_OK;
 }
 #endif
 
diff --git a/arch/arm/include/asm/bitops.h b/arch/arm/include/asm/bitops.h
index b4892a0..f428059 100644
--- a/arch/arm/include/asm/bitops.h
+++ b/arch/arm/include/asm/bitops.h
@@ -26,8 +26,8 @@
 #include <linux/compiler.h>
 #include <asm/system.h>
 
-#define smp_mb__before_clear_bit()	mb()
-#define smp_mb__after_clear_bit()	mb()
+#define smp_mb__before_clear_bit()	smp_mb()
+#define smp_mb__after_clear_bit()	smp_mb()
 
 /*
  * These functions are the basis of our bit ops.
diff --git a/arch/arm/include/asm/entry-macro-multi.S b/arch/arm/include/asm/entry-macro-multi.S
index 2da8547..2f1e209 100644
--- a/arch/arm/include/asm/entry-macro-multi.S
+++ b/arch/arm/include/asm/entry-macro-multi.S
@@ -4,8 +4,8 @@
  * Interrupt handling.  Preserves r7, r8, r9
  */
 	.macro	arch_irq_handler_default
-	get_irqnr_preamble r5, lr
-1:	get_irqnr_and_base r0, r6, r5, lr
+	get_irqnr_preamble r6, lr
+1:	get_irqnr_and_base r0, r2, r6, lr
 	movne	r1, sp
 	@
 	@ routine called with r0 = irq number, r1 = struct pt_regs *
@@ -17,17 +17,17 @@
 	/*
 	 * XXX
 	 *
-	 * this macro assumes that irqstat (r6) and base (r5) are
+	 * this macro assumes that irqstat (r2) and base (r6) are
 	 * preserved from get_irqnr_and_base above
 	 */
-	ALT_SMP(test_for_ipi r0, r6, r5, lr)
+	ALT_SMP(test_for_ipi r0, r2, r6, lr)
 	ALT_UP_B(9997f)
 	movne	r1, sp
 	adrne	lr, BSYM(1b)
 	bne	do_IPI
 
 #ifdef CONFIG_LOCAL_TIMERS
-	test_for_ltirq r0, r6, r5, lr
+	test_for_ltirq r0, r2, r6, lr
 	movne	r0, sp
 	adrne	lr, BSYM(1b)
 	bne	do_local_timer
@@ -40,7 +40,7 @@
 	.align	5
 	.global \symbol_name
 \symbol_name:
-	mov	r4, lr
+	mov	r8, lr
 	arch_irq_handler_default
-	mov     pc, r4
+	mov     pc, r8
 	.endm
diff --git a/arch/arm/include/asm/pmu.h b/arch/arm/include/asm/pmu.h
index 7544ce6..67c70a3 100644
--- a/arch/arm/include/asm/pmu.h
+++ b/arch/arm/include/asm/pmu.h
@@ -52,7 +52,7 @@
  * a cookie.
  */
 extern int
-release_pmu(struct platform_device *pdev);
+release_pmu(enum arm_pmu_type type);
 
 /**
  * init_pmu() - Initialise the PMU.
diff --git a/arch/arm/include/asm/proc-fns.h b/arch/arm/include/asm/proc-fns.h
index 8ec535e..633d1cb 100644
--- a/arch/arm/include/asm/proc-fns.h
+++ b/arch/arm/include/asm/proc-fns.h
@@ -82,13 +82,13 @@
 extern void cpu_set_pte_ext(pte_t *ptep, pte_t pte, unsigned int ext);
 extern void cpu_reset(unsigned long addr) __attribute__((noreturn));
 #else
-#define cpu_proc_init()			processor._proc_init()
-#define cpu_proc_fin()			processor._proc_fin()
-#define cpu_reset(addr)			processor.reset(addr)
-#define cpu_do_idle()			processor._do_idle()
-#define cpu_dcache_clean_area(addr,sz)	processor.dcache_clean_area(addr,sz)
-#define cpu_set_pte_ext(ptep,pte,ext)	processor.set_pte_ext(ptep,pte,ext)
-#define cpu_do_switch_mm(pgd,mm)	processor.switch_mm(pgd,mm)
+#define cpu_proc_init			processor._proc_init
+#define cpu_proc_fin			processor._proc_fin
+#define cpu_reset			processor.reset
+#define cpu_do_idle			processor._do_idle
+#define cpu_dcache_clean_area		processor.dcache_clean_area
+#define cpu_set_pte_ext			processor.set_pte_ext
+#define cpu_do_switch_mm		processor.switch_mm
 #endif
 
 extern void cpu_resume(void);
diff --git a/arch/arm/include/asm/scatterlist.h b/arch/arm/include/asm/scatterlist.h
index 2f87870..cefdb8f 100644
--- a/arch/arm/include/asm/scatterlist.h
+++ b/arch/arm/include/asm/scatterlist.h
@@ -1,6 +1,10 @@
 #ifndef _ASMARM_SCATTERLIST_H
 #define _ASMARM_SCATTERLIST_H
 
+#ifdef CONFIG_ARM_HAS_SG_CHAIN
+#define ARCH_HAS_SG_CHAIN
+#endif
+
 #include <asm/memory.h>
 #include <asm/types.h>
 #include <asm-generic/scatterlist.h>
diff --git a/arch/arm/include/asm/setup.h b/arch/arm/include/asm/setup.h
index ee2ad8a..915696d 100644
--- a/arch/arm/include/asm/setup.h
+++ b/arch/arm/include/asm/setup.h
@@ -187,12 +187,16 @@
 
 #define __tag __used __attribute__((__section__(".taglist.init")))
 #define __tagtable(tag, fn) \
-static struct tagtable __tagtable_##fn __tag = { tag, fn }
+static const struct tagtable __tagtable_##fn __tag = { tag, fn }
 
 /*
  * Memory map description
  */
-#define NR_BANKS 8
+#ifdef CONFIG_ARCH_EP93XX
+# define NR_BANKS 16
+#else
+# define NR_BANKS 8
+#endif
 
 struct membank {
 	phys_addr_t start;
diff --git a/arch/arm/include/asm/suspend.h b/arch/arm/include/asm/suspend.h
new file mode 100644
index 0000000..b0e4e1a
--- /dev/null
+++ b/arch/arm/include/asm/suspend.h
@@ -0,0 +1,22 @@
+#ifndef __ASM_ARM_SUSPEND_H
+#define __ASM_ARM_SUSPEND_H
+
+#include <asm/memory.h>
+#include <asm/tlbflush.h>
+
+extern void cpu_resume(void);
+
+/*
+ * Hide the first two arguments to __cpu_suspend - these are an implementation
+ * detail which platform code shouldn't have to know about.
+ */
+static inline int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
+{
+	extern int __cpu_suspend(int, long, unsigned long,
+				 int (*)(unsigned long));
+	int ret = __cpu_suspend(0, PHYS_OFFSET - PAGE_OFFSET, arg, fn);
+	flush_tlb_all();
+	return ret;
+}
+
+#endif
diff --git a/arch/arm/include/asm/tcm.h b/arch/arm/include/asm/tcm.h
index 5929ef5..8578d72 100644
--- a/arch/arm/include/asm/tcm.h
+++ b/arch/arm/include/asm/tcm.h
@@ -27,5 +27,7 @@
 
 void *tcm_alloc(size_t len);
 void tcm_free(void *addr, size_t len);
+bool tcm_dtcm_present(void);
+bool tcm_itcm_present(void);
 
 #endif
diff --git a/arch/arm/include/asm/tlbflush.h b/arch/arm/include/asm/tlbflush.h
index d2005de..8077145 100644
--- a/arch/arm/include/asm/tlbflush.h
+++ b/arch/arm/include/asm/tlbflush.h
@@ -34,16 +34,12 @@
 #define TLB_V6_D_ASID	(1 << 17)
 #define TLB_V6_I_ASID	(1 << 18)
 
-#define TLB_BTB		(1 << 28)
-
 /* Unified Inner Shareable TLB operations (ARMv7 MP extensions) */
 #define TLB_V7_UIS_PAGE	(1 << 19)
 #define TLB_V7_UIS_FULL (1 << 20)
 #define TLB_V7_UIS_ASID (1 << 21)
 
-/* Inner Shareable BTB operation (ARMv7 MP extensions) */
-#define TLB_V7_IS_BTB	(1 << 22)
-
+#define TLB_BARRIER	(1 << 28)
 #define TLB_L2CLEAN_FR	(1 << 29)		/* Feroceon */
 #define TLB_DCLEAN	(1 << 30)
 #define TLB_WB		(1 << 31)
@@ -58,7 +54,7 @@
  *	  v4wb  - ARMv4 with write buffer without I TLB flush entry instruction
  *	  v4wbi - ARMv4 with write buffer with I TLB flush entry instruction
  *	  fr    - Feroceon (v4wbi with non-outer-cacheable page table walks)
- *	  fa    - Faraday (v4 with write buffer with UTLB and branch target buffer (BTB))
+ *	  fa    - Faraday (v4 with write buffer with UTLB)
  *	  v6wbi - ARMv6 with write buffer with I TLB flush entry instruction
  *	  v7wbi - identical to v6wbi
  */
@@ -99,7 +95,7 @@
 # define v4_always_flags	(-1UL)
 #endif
 
-#define fa_tlb_flags	(TLB_WB | TLB_BTB | TLB_DCLEAN | \
+#define fa_tlb_flags	(TLB_WB | TLB_DCLEAN | TLB_BARRIER | \
 			 TLB_V4_U_FULL | TLB_V4_U_PAGE)
 
 #ifdef CONFIG_CPU_TLB_FA
@@ -166,7 +162,7 @@
 # define v4wb_always_flags	(-1UL)
 #endif
 
-#define v6wbi_tlb_flags (TLB_WB | TLB_DCLEAN | TLB_BTB | \
+#define v6wbi_tlb_flags (TLB_WB | TLB_DCLEAN | TLB_BARRIER | \
 			 TLB_V6_I_FULL | TLB_V6_D_FULL | \
 			 TLB_V6_I_PAGE | TLB_V6_D_PAGE | \
 			 TLB_V6_I_ASID | TLB_V6_D_ASID)
@@ -184,9 +180,9 @@
 # define v6wbi_always_flags	(-1UL)
 #endif
 
-#define v7wbi_tlb_flags_smp	(TLB_WB | TLB_DCLEAN | TLB_V7_IS_BTB | \
+#define v7wbi_tlb_flags_smp	(TLB_WB | TLB_DCLEAN | TLB_BARRIER | \
 			 TLB_V7_UIS_FULL | TLB_V7_UIS_PAGE | TLB_V7_UIS_ASID)
-#define v7wbi_tlb_flags_up	(TLB_WB | TLB_DCLEAN | TLB_BTB | \
+#define v7wbi_tlb_flags_up	(TLB_WB | TLB_DCLEAN | TLB_BARRIER | \
 			 TLB_V6_U_FULL | TLB_V6_U_PAGE | TLB_V6_U_ASID)
 
 #ifdef CONFIG_CPU_TLB_V7
@@ -341,15 +337,7 @@
 	if (tlb_flag(TLB_V7_UIS_FULL))
 		asm("mcr p15, 0, %0, c8, c3, 0" : : "r" (zero) : "cc");
 
-	if (tlb_flag(TLB_BTB)) {
-		/* flush the branch target cache */
-		asm("mcr p15, 0, %0, c7, c5, 6" : : "r" (zero) : "cc");
-		dsb();
-		isb();
-	}
-	if (tlb_flag(TLB_V7_IS_BTB)) {
-		/* flush the branch target cache */
-		asm("mcr p15, 0, %0, c7, c1, 6" : : "r" (zero) : "cc");
+	if (tlb_flag(TLB_BARRIER)) {
 		dsb();
 		isb();
 	}
@@ -389,17 +377,8 @@
 		asm("mcr p15, 0, %0, c8, c3, 2" : : "r" (asid) : "cc");
 #endif
 
-	if (tlb_flag(TLB_BTB)) {
-		/* flush the branch target cache */
-		asm("mcr p15, 0, %0, c7, c5, 6" : : "r" (zero) : "cc");
+	if (tlb_flag(TLB_BARRIER))
 		dsb();
-	}
-	if (tlb_flag(TLB_V7_IS_BTB)) {
-		/* flush the branch target cache */
-		asm("mcr p15, 0, %0, c7, c1, 6" : : "r" (zero) : "cc");
-		dsb();
-		isb();
-	}
 }
 
 static inline void
@@ -439,17 +418,8 @@
 		asm("mcr p15, 0, %0, c8, c3, 1" : : "r" (uaddr) : "cc");
 #endif
 
-	if (tlb_flag(TLB_BTB)) {
-		/* flush the branch target cache */
-		asm("mcr p15, 0, %0, c7, c5, 6" : : "r" (zero) : "cc");
+	if (tlb_flag(TLB_BARRIER))
 		dsb();
-	}
-	if (tlb_flag(TLB_V7_IS_BTB)) {
-		/* flush the branch target cache */
-		asm("mcr p15, 0, %0, c7, c1, 6" : : "r" (zero) : "cc");
-		dsb();
-		isb();
-	}
 }
 
 static inline void local_flush_tlb_kernel_page(unsigned long kaddr)
@@ -482,15 +452,7 @@
 	if (tlb_flag(TLB_V7_UIS_PAGE))
 		asm("mcr p15, 0, %0, c8, c3, 1" : : "r" (kaddr) : "cc");
 
-	if (tlb_flag(TLB_BTB)) {
-		/* flush the branch target cache */
-		asm("mcr p15, 0, %0, c7, c5, 6" : : "r" (zero) : "cc");
-		dsb();
-		isb();
-	}
-	if (tlb_flag(TLB_V7_IS_BTB)) {
-		/* flush the branch target cache */
-		asm("mcr p15, 0, %0, c7, c1, 6" : : "r" (zero) : "cc");
+	if (tlb_flag(TLB_BARRIER)) {
 		dsb();
 		isb();
 	}
diff --git a/arch/arm/include/asm/traps.h b/arch/arm/include/asm/traps.h
index f90756d..5b29a66 100644
--- a/arch/arm/include/asm/traps.h
+++ b/arch/arm/include/asm/traps.h
@@ -3,6 +3,9 @@
 
 #include <linux/list.h>
 
+struct pt_regs;
+struct task_struct;
+
 struct undef_hook {
 	struct list_head node;
 	u32 instr_mask;
diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c
index 927522c..16baba2 100644
--- a/arch/arm/kernel/asm-offsets.c
+++ b/arch/arm/kernel/asm-offsets.c
@@ -59,6 +59,9 @@
   DEFINE(TI_TP_VALUE,		offsetof(struct thread_info, tp_value));
   DEFINE(TI_FPSTATE,		offsetof(struct thread_info, fpstate));
   DEFINE(TI_VFPSTATE,		offsetof(struct thread_info, vfpstate));
+#ifdef CONFIG_SMP
+  DEFINE(VFP_CPU,		offsetof(union vfp_state, hard.cpu));
+#endif
 #ifdef CONFIG_ARM_THUMBEE
   DEFINE(TI_THUMBEE_STATE,	offsetof(struct thread_info, thumbee_state));
 #endif
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 8f9ab5c..a87cbf8 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -29,21 +29,53 @@
 #include <asm/entry-macro-multi.S>
 
 /*
- * Interrupt handling.  Preserves r7, r8, r9
+ * Interrupt handling.
  */
 	.macro	irq_handler
 #ifdef CONFIG_MULTI_IRQ_HANDLER
-	ldr	r5, =handle_arch_irq
+	ldr	r1, =handle_arch_irq
 	mov	r0, sp
-	ldr	r5, [r5]
+	ldr	r1, [r1]
 	adr	lr, BSYM(9997f)
-	teq	r5, #0
-	movne	pc, r5
+	teq	r1, #0
+	movne	pc, r1
 #endif
 	arch_irq_handler_default
 9997:
 	.endm
 
+	.macro	pabt_helper
+	@ PABORT handler takes pt_regs in r2, fault address in r4 and psr in r5
+#ifdef MULTI_PABORT
+	ldr	ip, .LCprocfns
+	mov	lr, pc
+	ldr	pc, [ip, #PROCESSOR_PABT_FUNC]
+#else
+	bl	CPU_PABORT_HANDLER
+#endif
+	.endm
+
+	.macro	dabt_helper
+
+	@
+	@ Call the processor-specific abort handler:
+	@
+	@  r2 - pt_regs
+	@  r4 - aborted context pc
+	@  r5 - aborted context psr
+	@
+	@ The abort handler must return the aborted address in r0, and
+	@ the fault status register in r1.  r9 must be preserved.
+	@
+#ifdef MULTI_DABORT
+	ldr	ip, .LCprocfns
+	mov	lr, pc
+	ldr	pc, [ip, #PROCESSOR_DABT_FUNC]
+#else
+	bl	CPU_DABORT_HANDLER
+#endif
+	.endm
+
 #ifdef CONFIG_KPROBES
 	.section	.kprobes.text,"ax",%progbits
 #else
@@ -126,106 +158,74 @@
  SPFIX(	subeq	sp, sp, #4	)
 	stmia	sp, {r1 - r12}
 
-	ldmia	r0, {r1 - r3}
-	add	r5, sp, #S_SP - 4	@ here for interlock avoidance
-	mov	r4, #-1			@  ""  ""      ""       ""
-	add	r0, sp, #(S_FRAME_SIZE + \stack_hole - 4)
- SPFIX(	addeq	r0, r0, #4	)
-	str	r1, [sp, #-4]!		@ save the "real" r0 copied
+	ldmia	r0, {r3 - r5}
+	add	r7, sp, #S_SP - 4	@ here for interlock avoidance
+	mov	r6, #-1			@  ""  ""      ""       ""
+	add	r2, sp, #(S_FRAME_SIZE + \stack_hole - 4)
+ SPFIX(	addeq	r2, r2, #4	)
+	str	r3, [sp, #-4]!		@ save the "real" r0 copied
 					@ from the exception stack
 
-	mov	r1, lr
+	mov	r3, lr
 
 	@
 	@ We are now ready to fill in the remaining blanks on the stack:
 	@
-	@  r0 - sp_svc
-	@  r1 - lr_svc
-	@  r2 - lr_<exception>, already fixed up for correct return/restart
-	@  r3 - spsr_<exception>
-	@  r4 - orig_r0 (see pt_regs definition in ptrace.h)
+	@  r2 - sp_svc
+	@  r3 - lr_svc
+	@  r4 - lr_<exception>, already fixed up for correct return/restart
+	@  r5 - spsr_<exception>
+	@  r6 - orig_r0 (see pt_regs definition in ptrace.h)
 	@
-	stmia	r5, {r0 - r4}
+	stmia	r7, {r2 - r6}
+
+#ifdef CONFIG_TRACE_IRQFLAGS
+	bl	trace_hardirqs_off
+#endif
 	.endm
 
 	.align	5
 __dabt_svc:
 	svc_entry
-
-	@
-	@ get ready to re-enable interrupts if appropriate
-	@
-	mrs	r9, cpsr
-	tst	r3, #PSR_I_BIT
-	biceq	r9, r9, #PSR_I_BIT
-
-	@
-	@ Call the processor-specific abort handler:
-	@
-	@  r2 - aborted context pc
-	@  r3 - aborted context cpsr
-	@
-	@ The abort handler must return the aborted address in r0, and
-	@ the fault status register in r1.  r9 must be preserved.
-	@
-#ifdef MULTI_DABORT
-	ldr	r4, .LCprocfns
-	mov	lr, pc
-	ldr	pc, [r4, #PROCESSOR_DABT_FUNC]
-#else
-	bl	CPU_DABORT_HANDLER
-#endif
-
-	@
-	@ set desired IRQ state, then call main handler
-	@
-	debug_entry r1
-	msr	cpsr_c, r9
 	mov	r2, sp
-	bl	do_DataAbort
+	dabt_helper
 
 	@
 	@ IRQs off again before pulling preserved data off the stack
 	@
 	disable_irq_notrace
 
-	@
-	@ restore SPSR and restart the instruction
-	@
-	ldr	r2, [sp, #S_PSR]
-	svc_exit r2				@ return from exception
+#ifdef CONFIG_TRACE_IRQFLAGS
+	tst	r5, #PSR_I_BIT
+	bleq	trace_hardirqs_on
+	tst	r5, #PSR_I_BIT
+	blne	trace_hardirqs_off
+#endif
+	svc_exit r5				@ return from exception
  UNWIND(.fnend		)
 ENDPROC(__dabt_svc)
 
 	.align	5
 __irq_svc:
 	svc_entry
+	irq_handler
 
-#ifdef CONFIG_TRACE_IRQFLAGS
-	bl	trace_hardirqs_off
-#endif
 #ifdef CONFIG_PREEMPT
 	get_thread_info tsk
 	ldr	r8, [tsk, #TI_PREEMPT]		@ get preempt count
-	add	r7, r8, #1			@ increment it
-	str	r7, [tsk, #TI_PREEMPT]
-#endif
-
-	irq_handler
-#ifdef CONFIG_PREEMPT
-	str	r8, [tsk, #TI_PREEMPT]		@ restore preempt count
 	ldr	r0, [tsk, #TI_FLAGS]		@ get flags
 	teq	r8, #0				@ if preempt count != 0
 	movne	r0, #0				@ force flags to 0
 	tst	r0, #_TIF_NEED_RESCHED
 	blne	svc_preempt
 #endif
-	ldr	r4, [sp, #S_PSR]		@ irqs are already disabled
+
 #ifdef CONFIG_TRACE_IRQFLAGS
-	tst	r4, #PSR_I_BIT
-	bleq	trace_hardirqs_on
+	@ The parent context IRQs must have been enabled to get here in
+	@ the first place, so there's no point checking the PSR I bit.
+	bl	trace_hardirqs_on
 #endif
-	svc_exit r4				@ return from exception
+	svc_exit r5				@ return from exception
  UNWIND(.fnend		)
 ENDPROC(__irq_svc)
 
@@ -251,7 +251,6 @@
 #else
 	svc_entry
 #endif
-
 	@
 	@ call emulation code, which returns using r9 if it has emulated
 	@ the instruction, or the more conventional lr if we are to treat
@@ -260,15 +259,16 @@
 	@  r0 - instruction
 	@
 #ifndef	CONFIG_THUMB2_KERNEL
-	ldr	r0, [r2, #-4]
+	ldr	r0, [r4, #-4]
 #else
-	ldrh	r0, [r2, #-2]			@ Thumb instruction at LR - 2
+	ldrh	r0, [r4, #-2]			@ Thumb instruction at LR - 2
 	and	r9, r0, #0xf800
 	cmp	r9, #0xe800			@ 32-bit instruction if xx >= 0
-	ldrhhs	r9, [r2]			@ bottom 16 bits
+	ldrhhs	r9, [r4]			@ bottom 16 bits
 	orrhs	r0, r9, r0, lsl #16
 #endif
 	adr	r9, BSYM(1f)
+	mov	r2, r4
 	bl	call_fpe
 
 	mov	r0, sp				@ struct pt_regs *regs
@@ -282,45 +282,35 @@
 	@
 	@ restore SPSR and restart the instruction
 	@
-	ldr	r2, [sp, #S_PSR]		@ Get SVC cpsr
-	svc_exit r2				@ return from exception
+	ldr	r5, [sp, #S_PSR]		@ Get SVC cpsr
+#ifdef CONFIG_TRACE_IRQFLAGS
+	tst	r5, #PSR_I_BIT
+	bleq	trace_hardirqs_on
+	tst	r5, #PSR_I_BIT
+	blne	trace_hardirqs_off
+#endif
+	svc_exit r5				@ return from exception
  UNWIND(.fnend		)
 ENDPROC(__und_svc)
 
 	.align	5
 __pabt_svc:
 	svc_entry
-
-	@
-	@ re-enable interrupts if appropriate
-	@
-	mrs	r9, cpsr
-	tst	r3, #PSR_I_BIT
-	biceq	r9, r9, #PSR_I_BIT
-
-	mov	r0, r2			@ pass address of aborted instruction.
-#ifdef MULTI_PABORT
-	ldr	r4, .LCprocfns
-	mov	lr, pc
-	ldr	pc, [r4, #PROCESSOR_PABT_FUNC]
-#else
-	bl	CPU_PABORT_HANDLER
-#endif
-	debug_entry r1
-	msr	cpsr_c, r9			@ Maybe enable interrupts
 	mov	r2, sp				@ regs
-	bl	do_PrefetchAbort		@ call abort handler
+	pabt_helper
 
 	@
 	@ IRQs off again before pulling preserved data off the stack
 	@
 	disable_irq_notrace
 
-	@
-	@ restore SPSR and restart the instruction
-	@
-	ldr	r2, [sp, #S_PSR]
-	svc_exit r2				@ return from exception
+#ifdef CONFIG_TRACE_IRQFLAGS
+	tst	r5, #PSR_I_BIT
+	bleq	trace_hardirqs_on
+	tst	r5, #PSR_I_BIT
+	blne	trace_hardirqs_off
+#endif
+	svc_exit r5				@ return from exception
  UNWIND(.fnend		)
 ENDPROC(__pabt_svc)
 
@@ -351,23 +341,23 @@
  ARM(	stmib	sp, {r1 - r12}	)
  THUMB(	stmia	sp, {r0 - r12}	)
 
-	ldmia	r0, {r1 - r3}
+	ldmia	r0, {r3 - r5}
 	add	r0, sp, #S_PC		@ here for interlock avoidance
-	mov	r4, #-1			@  ""  ""     ""        ""
+	mov	r6, #-1			@  ""  ""     ""        ""
 
-	str	r1, [sp]		@ save the "real" r0 copied
+	str	r3, [sp]		@ save the "real" r0 copied
 					@ from the exception stack
 
 	@
 	@ We are now ready to fill in the remaining blanks on the stack:
 	@
-	@  r2 - lr_<exception>, already fixed up for correct return/restart
-	@  r3 - spsr_<exception>
-	@  r4 - orig_r0 (see pt_regs definition in ptrace.h)
+	@  r4 - lr_<exception>, already fixed up for correct return/restart
+	@  r5 - spsr_<exception>
+	@  r6 - orig_r0 (see pt_regs definition in ptrace.h)
 	@
 	@ Also, separately save sp_usr and lr_usr
 	@
-	stmia	r0, {r2 - r4}
+	stmia	r0, {r4 - r6}
  ARM(	stmdb	r0, {sp, lr}^			)
  THUMB(	store_user_sp_lr r0, r1, S_SP - S_PC	)
 
@@ -380,6 +370,10 @@
 	@ Clear FP to mark the first stack frame
 	@
 	zero_fp
+
+#ifdef CONFIG_IRQSOFF_TRACER
+	bl	trace_hardirqs_off
+#endif
 	.endm
 
 	.macro	kuser_cmpxchg_check
@@ -391,7 +385,7 @@
 	@ if it was interrupted in a critical region.  Here we
 	@ perform a quick test inline since it should be false
 	@ 99.9999% of the time.  The rest is done out of line.
-	cmp	r2, #TASK_SIZE
+	cmp	r4, #TASK_SIZE
 	blhs	kuser_cmpxchg64_fixup
 #endif
 #endif
@@ -401,32 +395,9 @@
 __dabt_usr:
 	usr_entry
 	kuser_cmpxchg_check
-
-	@
-	@ Call the processor-specific abort handler:
-	@
-	@  r2 - aborted context pc
-	@  r3 - aborted context cpsr
-	@
-	@ The abort handler must return the aborted address in r0, and
-	@ the fault status register in r1.
-	@
-#ifdef MULTI_DABORT
-	ldr	r4, .LCprocfns
-	mov	lr, pc
-	ldr	pc, [r4, #PROCESSOR_DABT_FUNC]
-#else
-	bl	CPU_DABORT_HANDLER
-#endif
-
-	@
-	@ IRQs on, then call the main handler
-	@
-	debug_entry r1
-	enable_irq
 	mov	r2, sp
-	adr	lr, BSYM(ret_from_exception)
-	b	do_DataAbort
+	dabt_helper
+	b	ret_from_exception
  UNWIND(.fnend		)
 ENDPROC(__dabt_usr)
 
@@ -434,28 +405,8 @@
 __irq_usr:
 	usr_entry
 	kuser_cmpxchg_check
-
-#ifdef CONFIG_IRQSOFF_TRACER
-	bl	trace_hardirqs_off
-#endif
-
-	get_thread_info tsk
-#ifdef CONFIG_PREEMPT
-	ldr	r8, [tsk, #TI_PREEMPT]		@ get preempt count
-	add	r7, r8, #1			@ increment it
-	str	r7, [tsk, #TI_PREEMPT]
-#endif
-
 	irq_handler
-#ifdef CONFIG_PREEMPT
-	ldr	r0, [tsk, #TI_PREEMPT]
-	str	r8, [tsk, #TI_PREEMPT]
-	teq	r0, r7
- ARM(	strne	r0, [r0, -r0]	)
- THUMB(	movne	r0, #0		)
- THUMB(	strne	r0, [r0]	)
-#endif
-
+	get_thread_info tsk
 	mov	why, #0
 	b	ret_to_user_from_irq
  UNWIND(.fnend		)
@@ -467,6 +418,9 @@
 __und_usr:
 	usr_entry
 
+	mov	r2, r4
+	mov	r3, r5
+
 	@
 	@ fall through to the emulation code, which returns using r9 if
 	@ it has emulated the instruction, or the more conventional lr
@@ -682,19 +636,8 @@
 	.align	5
 __pabt_usr:
 	usr_entry
-
-	mov	r0, r2			@ pass address of aborted instruction.
-#ifdef MULTI_PABORT
-	ldr	r4, .LCprocfns
-	mov	lr, pc
-	ldr	pc, [r4, #PROCESSOR_PABT_FUNC]
-#else
-	bl	CPU_PABORT_HANDLER
-#endif
-	debug_entry r1
-	enable_irq				@ Enable interrupts
 	mov	r2, sp				@ regs
-	bl	do_PrefetchAbort		@ call abort handler
+	pabt_helper
  UNWIND(.fnend		)
 	/* fall through */
 /*
@@ -839,13 +782,13 @@
 	.text
 kuser_cmpxchg64_fixup:
 	@ Called from kuser_cmpxchg_fixup.
-	@ r2 = address of interrupted insn (must be preserved).
+	@ r4 = address of interrupted insn (must be preserved).
 	@ sp = saved regs. r7 and r8 are clobbered.
 	@ 1b = first critical insn, 2b = last critical insn.
-	@ If r2 >= 1b and r2 <= 2b then saved pc_usr is set to 1b.
+	@ If r4 >= 1b and r4 <= 2b then saved pc_usr is set to 1b.
 	mov	r7, #0xffff0fff
 	sub	r7, r7, #(0xffff0fff - (0xffff0f60 + (1b - __kuser_cmpxchg64)))
-	subs	r8, r2, r7
+	subs	r8, r4, r7
 	rsbcss	r8, r8, #(2b - 1b)
 	strcs	r7, [sp, #S_PC]
 #if __LINUX_ARM_ARCH__ < 6
@@ -915,13 +858,13 @@
 	.text
 kuser_cmpxchg32_fixup:
 	@ Called from kuser_cmpxchg_check macro.
-	@ r2 = address of interrupted insn (must be preserved).
+	@ r4 = address of interrupted insn (must be preserved).
 	@ sp = saved regs. r7 and r8 are clobbered.
 	@ 1b = first critical insn, 2b = last critical insn.
-	@ If r2 >= 1b and r2 <= 2b then saved pc_usr is set to 1b.
+	@ If r4 >= 1b and r4 <= 2b then saved pc_usr is set to 1b.
 	mov	r7, #0xffff0fff
 	sub	r7, r7, #(0xffff0fff - (0xffff0fc0 + (1b - __kuser_cmpxchg)))
-	subs	r8, r2, r7
+	subs	r8, r4, r7
 	rsbcss	r8, r8, #(2b - 1b)
 	strcs	r7, [sp, #S_PC]
 	mov	pc, lr
diff --git a/arch/arm/kernel/entry-header.S b/arch/arm/kernel/entry-header.S
index 83e29ad..9a8531e 100644
--- a/arch/arm/kernel/entry-header.S
+++ b/arch/arm/kernel/entry-header.S
@@ -163,25 +163,6 @@
 	.endm
 #endif	/* !CONFIG_THUMB2_KERNEL */
 
-	@
-	@ Debug exceptions are taken as prefetch or data aborts.
-	@ We must disable preemption during the handler so that
-	@ we can access the debug registers safely.
-	@
-	.macro	debug_entry, fsr
-#if defined(CONFIG_HAVE_HW_BREAKPOINT) && defined(CONFIG_PREEMPT)
-	ldr	r4, =0x40f		@ mask out fsr.fs
-	and	r5, r4, \fsr
-	cmp	r5, #2			@ debug exception
-	bne	1f
-	get_thread_info r10
-	ldr	r6, [r10, #TI_PREEMPT]	@ get preempt count
-	add	r11, r6, #1		@ increment it
-	str	r11, [r10, #TI_PREEMPT]
-1:
-#endif
-	.endm
-
 /*
  * These are the registers used in the syscall handler, and allow us to
  * have in theory up to 7 arguments to a function - r0 to r6.
diff --git a/arch/arm/kernel/head-nommu.S b/arch/arm/kernel/head-nommu.S
index 6b1e0ad..d46f259 100644
--- a/arch/arm/kernel/head-nommu.S
+++ b/arch/arm/kernel/head-nommu.S
@@ -32,8 +32,16 @@
  * numbers for r1.
  *
  */
+	.arm
+
 	__HEAD
 ENTRY(stext)
+
+ THUMB(	adr	r9, BSYM(1f)	)	@ Kernel is always entered in ARM.
+ THUMB(	bx	r9		)	@ If this is a Thumb-2 kernel,
+ THUMB(	.thumb			)	@ switch to Thumb now.
+ THUMB(1:			)
+
 	setmode	PSR_F_BIT | PSR_I_BIT | SVC_MODE, r9 @ ensure svc mode
 						@ and irqs disabled
 #ifndef CONFIG_CPU_CP15
diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
index 278c1b0..742b610 100644
--- a/arch/arm/kernel/head.S
+++ b/arch/arm/kernel/head.S
@@ -71,8 +71,16 @@
  * crap here - that's what the boot loader (or in extreme, well justified
  * circumstances, zImage) is for.
  */
+	.arm
+
 	__HEAD
 ENTRY(stext)
+
+ THUMB(	adr	r9, BSYM(1f)	)	@ Kernel is always entered in ARM.
+ THUMB(	bx	r9		)	@ If this is a Thumb-2 kernel,
+ THUMB(	.thumb			)	@ switch to Thumb now.
+ THUMB(1:			)
+
 	setmode	PSR_F_BIT | PSR_I_BIT | SVC_MODE, r9 @ ensure svc mode
 						@ and irqs disabled
 	mrc	p15, 0, r9, c0, c0		@ get processor id
diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c
index 87acc25..a927ca1 100644
--- a/arch/arm/kernel/hw_breakpoint.c
+++ b/arch/arm/kernel/hw_breakpoint.c
@@ -796,7 +796,7 @@
 
 /*
  * Called from either the Data Abort Handler [watchpoint] or the
- * Prefetch Abort Handler [breakpoint] with preemption disabled.
+ * Prefetch Abort Handler [breakpoint] with interrupts disabled.
  */
 static int hw_breakpoint_pending(unsigned long addr, unsigned int fsr,
 				 struct pt_regs *regs)
@@ -804,8 +804,10 @@
 	int ret = 0;
 	u32 dscr;
 
-	/* We must be called with preemption disabled. */
-	WARN_ON(preemptible());
+	preempt_disable();
+
+	if (interrupts_enabled(regs))
+		local_irq_enable();
 
 	/* We only handle watchpoints and hardware breakpoints. */
 	ARM_DBG_READ(c1, 0, dscr);
@@ -824,10 +826,6 @@
 		ret = 1; /* Unhandled fault. */
 	}
 
-	/*
-	 * Re-enable preemption after it was disabled in the
-	 * low-level exception handling code.
-	 */
 	preempt_enable();
 
 	return ret;
diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c
index 83bbad0..0f928a1 100644
--- a/arch/arm/kernel/irq.c
+++ b/arch/arm/kernel/irq.c
@@ -131,54 +131,63 @@
 
 #ifdef CONFIG_HOTPLUG_CPU
 
-static bool migrate_one_irq(struct irq_data *d)
+static bool migrate_one_irq(struct irq_desc *desc)
 {
-	unsigned int cpu = cpumask_any_and(d->affinity, cpu_online_mask);
+	struct irq_data *d = irq_desc_get_irq_data(desc);
+	const struct cpumask *affinity = d->affinity;
+	struct irq_chip *c;
 	bool ret = false;
 
-	if (cpu >= nr_cpu_ids) {
-		cpu = cpumask_any(cpu_online_mask);
+	/*
+	 * If this is a per-CPU interrupt, or the affinity does not
+	 * include this CPU, then we have nothing to do.
+	 */
+	if (irqd_is_per_cpu(d) || !cpumask_test_cpu(smp_processor_id(), affinity))
+		return false;
+
+	if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) {
+		affinity = cpu_online_mask;
 		ret = true;
 	}
 
-	pr_debug("IRQ%u: moving from cpu%u to cpu%u\n", d->irq, d->node, cpu);
-
-	d->chip->irq_set_affinity(d, cpumask_of(cpu), true);
+	c = irq_data_get_irq_chip(d);
+	if (c->irq_set_affinity)
+		c->irq_set_affinity(d, affinity, true);
+	else
+		pr_debug("IRQ%u: unable to set affinity\n", d->irq);
 
 	return ret;
 }
 
 /*
- * The CPU has been marked offline.  Migrate IRQs off this CPU.  If
- * the affinity settings do not allow other CPUs, force them onto any
+ * The current CPU has been marked offline.  Migrate IRQs off this CPU.
+ * If the affinity settings do not allow other CPUs, force them onto any
  * available CPU.
+ *
+ * Note: we must iterate over all IRQs, whether they have an attached
+ * action structure or not, as we need to get chained interrupts too.
  */
 void migrate_irqs(void)
 {
-	unsigned int i, cpu = smp_processor_id();
+	unsigned int i;
 	struct irq_desc *desc;
 	unsigned long flags;
 
 	local_irq_save(flags);
 
 	for_each_irq_desc(i, desc) {
-		struct irq_data *d = &desc->irq_data;
 		bool affinity_broken = false;
 
+		if (!desc)
+			continue;
+
 		raw_spin_lock(&desc->lock);
-		do {
-			if (desc->action == NULL)
-				break;
-
-			if (d->node != cpu)
-				break;
-
-			affinity_broken = migrate_one_irq(d);
-		} while (0);
+		affinity_broken = migrate_one_irq(desc);
 		raw_spin_unlock(&desc->lock);
 
 		if (affinity_broken && printk_ratelimit())
-			pr_warning("IRQ%u no longer affine to CPU%u\n", i, cpu);
+			pr_warning("IRQ%u no longer affine to CPU%u\n", i,
+				smp_processor_id());
 	}
 
 	local_irq_restore(flags);
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index 3132699..53c9c26 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -435,7 +435,7 @@
 			if (irq >= 0)
 				free_irq(irq, NULL);
 		}
-		release_pmu(pmu_device);
+		release_pmu(ARM_PMU_DEVICE_CPU);
 		pmu_device = NULL;
 	}
 
@@ -454,7 +454,7 @@
 	}
 	armpmu->stop();
 
-	release_pmu(pmu_device);
+	release_pmu(ARM_PMU_DEVICE_CPU);
 	pmu_device = NULL;
 }
 
diff --git a/arch/arm/kernel/pmu.c b/arch/arm/kernel/pmu.c
index 2c79eec..2b70709 100644
--- a/arch/arm/kernel/pmu.c
+++ b/arch/arm/kernel/pmu.c
@@ -17,6 +17,7 @@
 #include <linux/interrupt.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/of_device.h>
 #include <linux/platform_device.h>
 
 #include <asm/pmu.h>
@@ -25,36 +26,88 @@
 
 static struct platform_device *pmu_devices[ARM_NUM_PMU_DEVICES];
 
-static int __devinit pmu_device_probe(struct platform_device *pdev)
+static int __devinit pmu_register(struct platform_device *pdev,
+					enum arm_pmu_type type)
 {
-
-	if (pdev->id < 0 || pdev->id >= ARM_NUM_PMU_DEVICES) {
+	if (type < 0 || type >= ARM_NUM_PMU_DEVICES) {
 		pr_warning("received registration request for unknown "
-				"device %d\n", pdev->id);
+				"device %d\n", type);
 		return -EINVAL;
 	}
 
-	if (pmu_devices[pdev->id])
-		pr_warning("registering new PMU device type %d overwrites "
-				"previous registration!\n", pdev->id);
-	else
-		pr_info("registered new PMU device of type %d\n",
-				pdev->id);
+	if (pmu_devices[type]) {
+		pr_warning("rejecting duplicate registration of PMU device "
+			"type %d.", type);
+		return -ENOSPC;
+	}
 
-	pmu_devices[pdev->id] = pdev;
+	pr_info("registered new PMU device of type %d\n", type);
+	pmu_devices[type] = pdev;
 	return 0;
 }
 
-static struct platform_driver pmu_driver = {
+#define OF_MATCH_PMU(_name, _type) {	\
+	.compatible = _name,		\
+	.data = (void *)_type,		\
+}
+
+#define OF_MATCH_CPU(name)	OF_MATCH_PMU(name, ARM_PMU_DEVICE_CPU)
+
+static struct of_device_id armpmu_of_device_ids[] = {
+	OF_MATCH_CPU("arm,cortex-a9-pmu"),
+	OF_MATCH_CPU("arm,cortex-a8-pmu"),
+	OF_MATCH_CPU("arm,arm1136-pmu"),
+	OF_MATCH_CPU("arm,arm1176-pmu"),
+	{},
+};
+
+#define PLAT_MATCH_PMU(_name, _type) {	\
+	.name		= _name,	\
+	.driver_data	= _type,	\
+}
+
+#define PLAT_MATCH_CPU(_name)	PLAT_MATCH_PMU(_name, ARM_PMU_DEVICE_CPU)
+
+static struct platform_device_id armpmu_plat_device_ids[] = {
+	PLAT_MATCH_CPU("arm-pmu"),
+	{},
+};
+
+enum arm_pmu_type armpmu_device_type(struct platform_device *pdev)
+{
+	const struct of_device_id	*of_id;
+	const struct platform_device_id *pdev_id;
+
+	/* provided by of_device_id table */
+	if (pdev->dev.of_node) {
+		of_id = of_match_device(armpmu_of_device_ids, &pdev->dev);
+		BUG_ON(!of_id);
+		return (enum arm_pmu_type)of_id->data;
+	}
+
+	/* Provided by platform_device_id table */
+	pdev_id = platform_get_device_id(pdev);
+	BUG_ON(!pdev_id);
+	return pdev_id->driver_data;
+}
+
+static int __devinit armpmu_device_probe(struct platform_device *pdev)
+{
+	return pmu_register(pdev, armpmu_device_type(pdev));
+}
+
+static struct platform_driver armpmu_driver = {
 	.driver		= {
 		.name	= "arm-pmu",
+		.of_match_table = armpmu_of_device_ids,
 	},
-	.probe		= pmu_device_probe,
+	.probe		= armpmu_device_probe,
+	.id_table	= armpmu_plat_device_ids,
 };
 
 static int __init register_pmu_driver(void)
 {
-	return platform_driver_register(&pmu_driver);
+	return platform_driver_register(&armpmu_driver);
 }
 device_initcall(register_pmu_driver);
 
@@ -77,11 +130,11 @@
 EXPORT_SYMBOL_GPL(reserve_pmu);
 
 int
-release_pmu(struct platform_device *pdev)
+release_pmu(enum arm_pmu_type device)
 {
-	if (WARN_ON(pdev != pmu_devices[pdev->id]))
+	if (WARN_ON(!pmu_devices[device]))
 		return -EINVAL;
-	clear_bit_unlock(pdev->id, &pmu_lock);
+	clear_bit_unlock(device, &pmu_lock);
 	return 0;
 }
 EXPORT_SYMBOL_GPL(release_pmu);
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index f59653d..70bca64 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -343,6 +343,59 @@
 		elf_hwcap &= ~HWCAP_TLS;
 }
 
+/*
+ * cpu_init - initialise one CPU.
+ *
+ * cpu_init sets up the per-CPU stacks.
+ */
+void cpu_init(void)
+{
+	unsigned int cpu = smp_processor_id();
+	struct stack *stk = &stacks[cpu];
+
+	if (cpu >= NR_CPUS) {
+		printk(KERN_CRIT "CPU%u: bad primary CPU number\n", cpu);
+		BUG();
+	}
+
+	cpu_proc_init();
+
+	/*
+	 * Define the placement constraint for the inline asm directive below.
+	 * In Thumb-2, msr with an immediate value is not allowed.
+	 */
+#ifdef CONFIG_THUMB2_KERNEL
+#define PLC	"r"
+#else
+#define PLC	"I"
+#endif
+
+	/*
+	 * setup stacks for re-entrant exception handlers
+	 */
+	__asm__ (
+	"msr	cpsr_c, %1\n\t"
+	"add	r14, %0, %2\n\t"
+	"mov	sp, r14\n\t"
+	"msr	cpsr_c, %3\n\t"
+	"add	r14, %0, %4\n\t"
+	"mov	sp, r14\n\t"
+	"msr	cpsr_c, %5\n\t"
+	"add	r14, %0, %6\n\t"
+	"mov	sp, r14\n\t"
+	"msr	cpsr_c, %7"
+	    :
+	    : "r" (stk),
+	      PLC (PSR_F_BIT | PSR_I_BIT | IRQ_MODE),
+	      "I" (offsetof(struct stack, irq[0])),
+	      PLC (PSR_F_BIT | PSR_I_BIT | ABT_MODE),
+	      "I" (offsetof(struct stack, abt[0])),
+	      PLC (PSR_F_BIT | PSR_I_BIT | UND_MODE),
+	      "I" (offsetof(struct stack, und[0])),
+	      PLC (PSR_F_BIT | PSR_I_BIT | SVC_MODE)
+	    : "r14");
+}
+
 static void __init setup_processor(void)
 {
 	struct proc_info_list *list;
@@ -388,58 +441,7 @@
 	feat_v6_fixup();
 
 	cacheid_init();
-	cpu_proc_init();
-}
-
-/*
- * cpu_init - initialise one CPU.
- *
- * cpu_init sets up the per-CPU stacks.
- */
-void cpu_init(void)
-{
-	unsigned int cpu = smp_processor_id();
-	struct stack *stk = &stacks[cpu];
-
-	if (cpu >= NR_CPUS) {
-		printk(KERN_CRIT "CPU%u: bad primary CPU number\n", cpu);
-		BUG();
-	}
-
-	/*
-	 * Define the placement constraint for the inline asm directive below.
-	 * In Thumb-2, msr with an immediate value is not allowed.
-	 */
-#ifdef CONFIG_THUMB2_KERNEL
-#define PLC	"r"
-#else
-#define PLC	"I"
-#endif
-
-	/*
-	 * setup stacks for re-entrant exception handlers
-	 */
-	__asm__ (
-	"msr	cpsr_c, %1\n\t"
-	"add	r14, %0, %2\n\t"
-	"mov	sp, r14\n\t"
-	"msr	cpsr_c, %3\n\t"
-	"add	r14, %0, %4\n\t"
-	"mov	sp, r14\n\t"
-	"msr	cpsr_c, %5\n\t"
-	"add	r14, %0, %6\n\t"
-	"mov	sp, r14\n\t"
-	"msr	cpsr_c, %7"
-	    :
-	    : "r" (stk),
-	      PLC (PSR_F_BIT | PSR_I_BIT | IRQ_MODE),
-	      "I" (offsetof(struct stack, irq[0])),
-	      PLC (PSR_F_BIT | PSR_I_BIT | ABT_MODE),
-	      "I" (offsetof(struct stack, abt[0])),
-	      PLC (PSR_F_BIT | PSR_I_BIT | UND_MODE),
-	      "I" (offsetof(struct stack, und[0])),
-	      PLC (PSR_F_BIT | PSR_I_BIT | SVC_MODE)
-	    : "r14");
+	cpu_init();
 }
 
 void __init dump_machine_table(void)
@@ -915,7 +917,6 @@
 #endif
 	reserve_crashkernel();
 
-	cpu_init();
 	tcm_init();
 
 #ifdef CONFIG_ZONE_DMA
diff --git a/arch/arm/kernel/sleep.S b/arch/arm/kernel/sleep.S
index 6398ead..dc902f2 100644
--- a/arch/arm/kernel/sleep.S
+++ b/arch/arm/kernel/sleep.S
@@ -10,64 +10,61 @@
 /*
  * Save CPU state for a suspend
  *  r1 = v:p offset
- *  r3 = virtual return function
- * Note: sp is decremented to allocate space for CPU state on stack
- * r0-r3,r9,r10,lr corrupted
+ *  r2 = suspend function arg0
+ *  r3 = suspend function
  */
-ENTRY(cpu_suspend)
-	mov	r9, lr
+ENTRY(__cpu_suspend)
+	stmfd	sp!, {r4 - r11, lr}
 #ifdef MULTI_CPU
 	ldr	r10, =processor
-	mov	r2, sp			@ current virtual SP
-	ldr	r0, [r10, #CPU_SLEEP_SIZE] @ size of CPU sleep state
+	ldr	r5, [r10, #CPU_SLEEP_SIZE] @ size of CPU sleep state
 	ldr	ip, [r10, #CPU_DO_RESUME] @ virtual resume function
-	sub	sp, sp, r0		@ allocate CPU state on stack
-	mov	r0, sp			@ save pointer
+#else
+	ldr	r5, =cpu_suspend_size
+	ldr	ip, =cpu_do_resume
+#endif
+	mov	r6, sp			@ current virtual SP
+	sub	sp, sp, r5		@ allocate CPU state on stack
+	mov	r0, sp			@ save pointer to CPU save block
 	add	ip, ip, r1		@ convert resume fn to phys
-	stmfd	sp!, {r1, r2, r3, ip}	@ save v:p, virt SP, retfn, phys resume fn
-	ldr	r3, =sleep_save_sp
-	add	r2, sp, r1		@ convert SP to phys
+	stmfd	sp!, {r1, r6, ip}	@ save v:p, virt SP, phys resume fn
+	ldr	r5, =sleep_save_sp
+	add	r6, sp, r1		@ convert SP to phys
+	stmfd	sp!, {r2, r3}		@ save suspend func arg and pointer
 #ifdef CONFIG_SMP
 	ALT_SMP(mrc p15, 0, lr, c0, c0, 5)
 	ALT_UP(mov lr, #0)
 	and	lr, lr, #15
-	str	r2, [r3, lr, lsl #2]	@ save phys SP
+	str	r6, [r5, lr, lsl #2]	@ save phys SP
 #else
-	str	r2, [r3]		@ save phys SP
+	str	r6, [r5]		@ save phys SP
 #endif
+#ifdef MULTI_CPU
 	mov	lr, pc
 	ldr	pc, [r10, #CPU_DO_SUSPEND] @ save CPU state
 #else
-	mov	r2, sp			@ current virtual SP
-	ldr	r0, =cpu_suspend_size
-	sub	sp, sp, r0		@ allocate CPU state on stack
-	mov	r0, sp			@ save pointer
-	stmfd	sp!, {r1, r2, r3}	@ save v:p, virt SP, return fn
-	ldr	r3, =sleep_save_sp
-	add	r2, sp, r1		@ convert SP to phys
-#ifdef CONFIG_SMP
-	ALT_SMP(mrc p15, 0, lr, c0, c0, 5)
-	ALT_UP(mov lr, #0)
-	and	lr, lr, #15
-	str	r2, [r3, lr, lsl #2]	@ save phys SP
-#else
-	str	r2, [r3]		@ save phys SP
-#endif
 	bl	cpu_do_suspend
 #endif
 
 	@ flush data cache
 #ifdef MULTI_CACHE
 	ldr	r10, =cpu_cache
-	mov	lr, r9
+	mov	lr, pc
 	ldr	pc, [r10, #CACHE_FLUSH_KERN_ALL]
 #else
-	mov	lr, r9
-	b	__cpuc_flush_kern_all
+	bl	__cpuc_flush_kern_all
 #endif
-ENDPROC(cpu_suspend)
+	adr	lr, BSYM(cpu_suspend_abort)
+	ldmfd	sp!, {r0, pc}		@ call suspend fn
+ENDPROC(__cpu_suspend)
 	.ltorg
 
+cpu_suspend_abort:
+	ldmia	sp!, {r1 - r3}		@ pop v:p, virt SP, phys resume fn
+	mov	sp, r2
+	ldmfd	sp!, {r4 - r11, pc}
+ENDPROC(cpu_suspend_abort)
+
 /*
  * r0 = control register value
  * r1 = v:p offset (preserved by cpu_do_resume)
@@ -97,7 +94,9 @@
 cpu_resume_after_mmu:
 	str	r5, [r2, r4, lsl #2]	@ restore old mapping
 	mcr	p15, 0, r0, c1, c0, 0	@ turn on D-cache
-	mov	pc, lr
+	bl	cpu_init		@ restore the und/abt/irq banked regs
+	mov	r0, #0			@ return zero on success
+	ldmfd	sp!, {r4 - r11, pc}
 ENDPROC(cpu_resume_after_mmu)
 
 /*
@@ -120,20 +119,11 @@
 	ldr	r0, sleep_save_sp	@ stack phys addr
 #endif
 	setmode	PSR_I_BIT | PSR_F_BIT | SVC_MODE, r1  @ set SVC, irqs off
-#ifdef MULTI_CPU
-	@ load v:p, stack, return fn, resume fn
-  ARM(	ldmia	r0!, {r1, sp, lr, pc}	)
-THUMB(	ldmia	r0!, {r1, r2, r3, r4}	)
+	@ load v:p, stack, resume fn
+  ARM(	ldmia	r0!, {r1, sp, pc}	)
+THUMB(	ldmia	r0!, {r1, r2, r3}	)
 THUMB(	mov	sp, r2			)
-THUMB(	mov	lr, r3			)
-THUMB(	bx	r4			)
-#else
-	@ load v:p, stack, return fn
-  ARM(	ldmia	r0!, {r1, sp, lr}	)
-THUMB(	ldmia	r0!, {r1, r2, lr}	)
-THUMB(	mov	sp, r2			)
-	b	cpu_do_resume
-#endif
+THUMB(	bx	r3			)
 ENDPROC(cpu_resume)
 
 sleep_save_sp:
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index e7f92a4..167e3cb 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -365,8 +365,7 @@
 	 */
 	if (max_cpus > ncores)
 		max_cpus = ncores;
-
-	if (max_cpus > 1) {
+	if (ncores > 1 && max_cpus) {
 		/*
 		 * Enable the local timer or broadcast device for the
 		 * boot CPU, but only if we have more than one CPU.
@@ -374,6 +373,14 @@
 		percpu_timer_setup();
 
 		/*
+		 * Initialise the present map, which describes the set of CPUs
+		 * actually populated at the present time. A platform should
+		 * re-initialize the map in platform_smp_prepare_cpus() if
+		 * present != possible (e.g. physical hotplug).
+		 */
+		init_cpu_present(&cpu_possible_map);
+
+		/*
 		 * Initialise the SCU if there are more than one CPU
 		 * and let them know where to start.
 		 */
diff --git a/arch/arm/kernel/smp_scu.c b/arch/arm/kernel/smp_scu.c
index a1e757c..79ed5e7 100644
--- a/arch/arm/kernel/smp_scu.c
+++ b/arch/arm/kernel/smp_scu.c
@@ -20,6 +20,7 @@
 #define SCU_INVALIDATE		0x0c
 #define SCU_FPGA_REVISION	0x10
 
+#ifdef CONFIG_SMP
 /*
  * Get the number of CPU cores from the SCU configuration
  */
@@ -50,6 +51,7 @@
 	 */
 	flush_cache_all();
 }
+#endif
 
 /*
  * Set the executing CPUs power mode as defined.  This will be in
diff --git a/arch/arm/kernel/tcm.c b/arch/arm/kernel/tcm.c
index f5cf660..30e302d 100644
--- a/arch/arm/kernel/tcm.c
+++ b/arch/arm/kernel/tcm.c
@@ -19,6 +19,8 @@
 #include "tcm.h"
 
 static struct gen_pool *tcm_pool;
+static bool dtcm_present;
+static bool itcm_present;
 
 /* TCM section definitions from the linker */
 extern char __itcm_start, __sitcm_text, __eitcm_text;
@@ -90,6 +92,18 @@
 }
 EXPORT_SYMBOL(tcm_free);
 
+bool tcm_dtcm_present(void)
+{
+	return dtcm_present;
+}
+EXPORT_SYMBOL(tcm_dtcm_present);
+
+bool tcm_itcm_present(void)
+{
+	return itcm_present;
+}
+EXPORT_SYMBOL(tcm_itcm_present);
+
 static int __init setup_tcm_bank(u8 type, u8 bank, u8 banks,
 				  u32 *offset)
 {
@@ -134,6 +148,10 @@
 			(tcm_region & 1) ? "" : "not ");
 	}
 
+	/* Not much fun you can do with a size 0 bank */
+	if (tcm_size == 0)
+		return 0;
+
 	/* Force move the TCM bank to where we want it, enable */
 	tcm_region = *offset | (tcm_region & 0x00000ffeU) | 1;
 
@@ -165,12 +183,20 @@
 	u32 tcm_status = read_cpuid_tcmstatus();
 	u8 dtcm_banks = (tcm_status >> 16) & 0x03;
 	u8 itcm_banks = (tcm_status & 0x03);
+	size_t dtcm_code_sz = &__edtcm_data - &__sdtcm_data;
+	size_t itcm_code_sz = &__eitcm_text - &__sitcm_text;
 	char *start;
 	char *end;
 	char *ram;
 	int ret;
 	int i;
 
+	/* Values greater than 2 for D/ITCM banks are "reserved" */
+	if (dtcm_banks > 2)
+		dtcm_banks = 0;
+	if (itcm_banks > 2)
+		itcm_banks = 0;
+
 	/* Setup DTCM if present */
 	if (dtcm_banks > 0) {
 		for (i = 0; i < dtcm_banks; i++) {
@@ -178,6 +204,13 @@
 			if (ret)
 				return;
 		}
+		/* This means you compiled more code than fits into DTCM */
+		if (dtcm_code_sz > (dtcm_end - DTCM_OFFSET)) {
+			pr_info("CPU DTCM: %u bytes of code compiled to "
+				"DTCM but only %lu bytes of DTCM present\n",
+				dtcm_code_sz, (dtcm_end - DTCM_OFFSET));
+			goto no_dtcm;
+		}
 		dtcm_res.end = dtcm_end - 1;
 		request_resource(&iomem_resource, &dtcm_res);
 		dtcm_iomap[0].length = dtcm_end - DTCM_OFFSET;
@@ -186,12 +219,16 @@
 		start = &__sdtcm_data;
 		end   = &__edtcm_data;
 		ram   = &__dtcm_start;
-		/* This means you compiled more code than fits into DTCM */
-		BUG_ON((end - start) > (dtcm_end - DTCM_OFFSET));
-		memcpy(start, ram, (end-start));
-		pr_debug("CPU DTCM: copied data from %p - %p\n", start, end);
+		memcpy(start, ram, dtcm_code_sz);
+		pr_debug("CPU DTCM: copied data from %p - %p\n",
+			 start, end);
+		dtcm_present = true;
+	} else if (dtcm_code_sz) {
+		pr_info("CPU DTCM: %u bytes of code compiled to DTCM but no "
+			"DTCM banks present in CPU\n", dtcm_code_sz);
 	}
 
+no_dtcm:
 	/* Setup ITCM if present */
 	if (itcm_banks > 0) {
 		for (i = 0; i < itcm_banks; i++) {
@@ -199,6 +236,13 @@
 			if (ret)
 				return;
 		}
+		/* This means you compiled more code than fits into ITCM */
+		if (itcm_code_sz > (itcm_end - ITCM_OFFSET)) {
+			pr_info("CPU ITCM: %u bytes of code compiled to "
+				"ITCM but only %lu bytes of ITCM present\n",
+				itcm_code_sz, (itcm_end - ITCM_OFFSET));
+			return;
+		}
 		itcm_res.end = itcm_end - 1;
 		request_resource(&iomem_resource, &itcm_res);
 		itcm_iomap[0].length = itcm_end - ITCM_OFFSET;
@@ -207,10 +251,13 @@
 		start = &__sitcm_text;
 		end   = &__eitcm_text;
 		ram   = &__itcm_start;
-		/* This means you compiled more code than fits into ITCM */
-		BUG_ON((end - start) > (itcm_end - ITCM_OFFSET));
-		memcpy(start, ram, (end-start));
-		pr_debug("CPU ITCM: copied code from %p - %p\n", start, end);
+		memcpy(start, ram, itcm_code_sz);
+		pr_debug("CPU ITCM: copied code from %p - %p\n",
+			 start, end);
+		itcm_present = true;
+	} else if (itcm_code_sz) {
+		pr_info("CPU ITCM: %u bytes of code compiled to ITCM but no "
+			"ITCM banks present in CPU\n", itcm_code_sz);
 	}
 }
 
@@ -221,7 +268,6 @@
  */
 static int __init setup_tcm_pool(void)
 {
-	u32 tcm_status = read_cpuid_tcmstatus();
 	u32 dtcm_pool_start = (u32) &__edtcm_data;
 	u32 itcm_pool_start = (u32) &__eitcm_text;
 	int ret;
@@ -236,7 +282,7 @@
 	pr_debug("Setting up TCM memory pool\n");
 
 	/* Add the rest of DTCM to the TCM pool */
-	if (tcm_status & (0x03 << 16)) {
+	if (dtcm_present) {
 		if (dtcm_pool_start < dtcm_end) {
 			ret = gen_pool_add(tcm_pool, dtcm_pool_start,
 					   dtcm_end - dtcm_pool_start, -1);
@@ -253,7 +299,7 @@
 	}
 
 	/* Add the rest of ITCM to the TCM pool */
-	if (tcm_status & 0x03) {
+	if (itcm_present) {
 		if (itcm_pool_start < itcm_end) {
 			ret = gen_pool_add(tcm_pool, itcm_pool_start,
 					   itcm_end - itcm_pool_start, -1);
diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S
index e5287f2..bf977f8 100644
--- a/arch/arm/kernel/vmlinux.lds.S
+++ b/arch/arm/kernel/vmlinux.lds.S
@@ -38,57 +38,6 @@
 
 SECTIONS
 {
-#ifdef CONFIG_XIP_KERNEL
-	. = XIP_VIRT_ADDR(CONFIG_XIP_PHYS_ADDR);
-#else
-	. = PAGE_OFFSET + TEXT_OFFSET;
-#endif
-
-	.init : {			/* Init code and data		*/
-		_stext = .;
-		_sinittext = .;
-			HEAD_TEXT
-			INIT_TEXT
-			ARM_EXIT_KEEP(EXIT_TEXT)
-		_einittext = .;
-		ARM_CPU_DISCARD(PROC_INFO)
-		__arch_info_begin = .;
-			*(.arch.info.init)
-		__arch_info_end = .;
-		__tagtable_begin = .;
-			*(.taglist.init)
-		__tagtable_end = .;
-#ifdef CONFIG_SMP_ON_UP
-		__smpalt_begin = .;
-			*(.alt.smp.init)
-		__smpalt_end = .;
-#endif
-
-		__pv_table_begin = .;
-			*(.pv_table)
-		__pv_table_end = .;
-
-		INIT_SETUP(16)
-
-		INIT_CALLS
-		CON_INITCALL
-		SECURITY_INITCALL
-		INIT_RAM_FS
-
-#ifndef CONFIG_XIP_KERNEL
-		__init_begin = _stext;
-		INIT_DATA
-		ARM_EXIT_KEEP(EXIT_DATA)
-#endif
-	}
-
-	PERCPU_SECTION(32)
-
-#ifndef CONFIG_XIP_KERNEL
-	. = ALIGN(PAGE_SIZE);
-	__init_end = .;
-#endif
-
 	/*
 	 * unwind exit sections must be discarded before the rest of the
 	 * unwind sections get included.
@@ -106,10 +55,22 @@
 		*(.fixup)
 		*(__ex_table)
 #endif
+#ifndef CONFIG_SMP_ON_UP
+		*(.alt.smp.init)
+#endif
 	}
 
+#ifdef CONFIG_XIP_KERNEL
+	. = XIP_VIRT_ADDR(CONFIG_XIP_PHYS_ADDR);
+#else
+	. = PAGE_OFFSET + TEXT_OFFSET;
+#endif
+	.head.text : {
+		_text = .;
+		HEAD_TEXT
+	}
 	.text : {			/* Real text segment		*/
-		_text = .;		/* Text and read-only data	*/
+		_stext = .;		/* Text and read-only data	*/
 			__exception_text_start = .;
 			*(.exception.text)
 			__exception_text_end = .;
@@ -122,8 +83,6 @@
 			*(.fixup)
 #endif
 			*(.gnu.warning)
-			*(.rodata)
-			*(.rodata.*)
 			*(.glue_7)
 			*(.glue_7t)
 		. = ALIGN(4);
@@ -152,10 +111,63 @@
 
 	_etext = .;			/* End of text and rodata section */
 
+#ifndef CONFIG_XIP_KERNEL
+	. = ALIGN(PAGE_SIZE);
+	__init_begin = .;
+#endif
+
+	INIT_TEXT_SECTION(8)
+	.exit.text : {
+		ARM_EXIT_KEEP(EXIT_TEXT)
+	}
+	.init.proc.info : {
+		ARM_CPU_DISCARD(PROC_INFO)
+	}
+	.init.arch.info : {
+		__arch_info_begin = .;
+		*(.arch.info.init)
+		__arch_info_end = .;
+	}
+	.init.tagtable : {
+		__tagtable_begin = .;
+		*(.taglist.init)
+		__tagtable_end = .;
+	}
+#ifdef CONFIG_SMP_ON_UP
+	.init.smpalt : {
+		__smpalt_begin = .;
+		*(.alt.smp.init)
+		__smpalt_end = .;
+	}
+#endif
+	.init.pv_table : {
+		__pv_table_begin = .;
+		*(.pv_table)
+		__pv_table_end = .;
+	}
+	.init.data : {
+#ifndef CONFIG_XIP_KERNEL
+		INIT_DATA
+#endif
+		INIT_SETUP(16)
+		INIT_CALLS
+		CON_INITCALL
+		SECURITY_INITCALL
+		INIT_RAM_FS
+	}
+#ifndef CONFIG_XIP_KERNEL
+	.exit.data : {
+		ARM_EXIT_KEEP(EXIT_DATA)
+	}
+#endif
+
+	PERCPU_SECTION(32)
+
 #ifdef CONFIG_XIP_KERNEL
 	__data_loc = ALIGN(4);		/* location in binary */
 	. = PAGE_OFFSET + TEXT_OFFSET;
 #else
+	__init_end = .;
 	. = ALIGN(THREAD_SIZE);
 	__data_loc = .;
 #endif
@@ -270,12 +282,6 @@
 
 	/* Default discards */
 	DISCARDS
-
-#ifndef CONFIG_SMP_ON_UP
-	/DISCARD/ : {
-		*(.alt.smp.init)
-	}
-#endif
 }
 
 /*
diff --git a/arch/arm/mach-bcmring/include/mach/entry-macro.S b/arch/arm/mach-bcmring/include/mach/entry-macro.S
index 7d393ca..94c950d 100644
--- a/arch/arm/mach-bcmring/include/mach/entry-macro.S
+++ b/arch/arm/mach-bcmring/include/mach/entry-macro.S
@@ -80,7 +80,3 @@
 
 		.macro  arch_ret_to_user, tmp1, tmp2
 		.endm
-
-		.macro	irq_prio_table
-		.endm
-
diff --git a/arch/arm/mach-davinci/board-dm365-evm.c b/arch/arm/mach-davinci/board-dm365-evm.c
index 9844fa4..9818f21 100644
--- a/arch/arm/mach-davinci/board-dm365-evm.c
+++ b/arch/arm/mach-davinci/board-dm365-evm.c
@@ -520,7 +520,7 @@
 	 */
 	if (have_imager()) {
 		label = "HD imager";
-		mux |= 1;
+		mux |= 2;
 
 		/* externally mux MMC1/ENET/AIC33 to imager */
 		mux |= BIT(6) | BIT(5) | BIT(3);
@@ -540,7 +540,7 @@
 		resets &= ~BIT(1);
 
 		if (have_tvp7002()) {
-			mux |= 2;
+			mux |= 1;
 			resets &= ~BIT(2);
 			label = "tvp7002 HD";
 		} else {
diff --git a/arch/arm/mach-davinci/gpio.c b/arch/arm/mach-davinci/gpio.c
index e722139..cafbe13 100644
--- a/arch/arm/mach-davinci/gpio.c
+++ b/arch/arm/mach-davinci/gpio.c
@@ -254,8 +254,10 @@
 {
 	struct davinci_gpio_regs __iomem *g;
 	u32 mask = 0xffff;
+	struct davinci_gpio_controller *d;
 
-	g = (__force struct davinci_gpio_regs __iomem *) irq_desc_get_handler_data(desc);
+	d = (struct davinci_gpio_controller *)irq_desc_get_handler_data(desc);
+	g = (struct davinci_gpio_regs __iomem *)d->regs;
 
 	/* we only care about one bank */
 	if (irq & 1)
@@ -274,11 +276,14 @@
 		if (!status)
 			break;
 		__raw_writel(status, &g->intstat);
-		if (irq & 1)
-			status >>= 16;
 
 		/* now demux them to the right lowlevel handler */
-		n = (int)irq_get_handler_data(irq);
+		n = d->irq_base;
+		if (irq & 1) {
+			n += 16;
+			status >>= 16;
+		}
+
 		while (status) {
 			res = ffs(status);
 			n += res;
@@ -424,7 +429,13 @@
 
 		/* set up all irqs in this bank */
 		irq_set_chained_handler(bank_irq, gpio_irq_handler);
-		irq_set_handler_data(bank_irq, (__force void *)g);
+
+		/*
+		 * Each chip handles 32 gpios, and each irq bank consists of 16
+		 * gpio irqs. Pass the irq bank's corresponding controller to
+		 * the chained irq handler.
+		 */
+		irq_set_handler_data(bank_irq, &chips[gpio / 32]);
 
 		for (i = 0; i < 16 && gpio < ngpio; i++, irq++, gpio++) {
 			irq_set_chip(irq, &gpio_irqchip);
diff --git a/arch/arm/mach-davinci/include/mach/entry-macro.S b/arch/arm/mach-davinci/include/mach/entry-macro.S
index fbdebc7..e14c0dc 100644
--- a/arch/arm/mach-davinci/include/mach/entry-macro.S
+++ b/arch/arm/mach-davinci/include/mach/entry-macro.S
@@ -46,6 +46,3 @@
 #endif
 1002:
 		.endm
-
-		.macro	irq_prio_table
-		.endm
diff --git a/arch/arm/mach-davinci/irq.c b/arch/arm/mach-davinci/irq.c
index bfe68ec..952dc12 100644
--- a/arch/arm/mach-davinci/irq.c
+++ b/arch/arm/mach-davinci/irq.c
@@ -52,8 +52,14 @@
 	struct irq_chip_type *ct;
 
 	gc = irq_alloc_generic_chip("AINTC", 1, irq_start, base, handle_edge_irq);
+	if (!gc) {
+		pr_err("%s: irq_alloc_generic_chip for IRQ %u failed\n",
+		       __func__, irq_start);
+		return;
+	}
+
 	ct = gc->chip_types;
-	ct->chip.irq_ack = irq_gc_ack;
+	ct->chip.irq_ack = irq_gc_ack_set_bit;
 	ct->chip.irq_mask = irq_gc_mask_clr_bit;
 	ct->chip.irq_unmask = irq_gc_mask_set_bit;
 
diff --git a/arch/arm/mach-exynos4/platsmp.c b/arch/arm/mach-exynos4/platsmp.c
index c5e65a0..b68d5bd 100644
--- a/arch/arm/mach-exynos4/platsmp.c
+++ b/arch/arm/mach-exynos4/platsmp.c
@@ -154,14 +154,6 @@
 
 void __init platform_smp_prepare_cpus(unsigned int max_cpus)
 {
-	int i;
-
-	/*
-	 * Initialise the present map, which describes the set of CPUs
-	 * actually populated at the present time.
-	 */
-	for (i = 0; i < max_cpus; i++)
-		set_cpu_present(i, true);
 
 	scu_enable(scu_base_addr());
 
diff --git a/arch/arm/mach-exynos4/pm.c b/arch/arm/mach-exynos4/pm.c
index 8755ca8..533c28f 100644
--- a/arch/arm/mach-exynos4/pm.c
+++ b/arch/arm/mach-exynos4/pm.c
@@ -280,7 +280,7 @@
 	SAVE_ITEM(S5P_VA_L2CC + L2X0_AUX_CTRL),
 };
 
-void exynos4_cpu_suspend(void)
+static int exynos4_cpu_suspend(unsigned long arg)
 {
 	unsigned long tmp;
 	unsigned long mask = 0xFFFFFFFF;
diff --git a/arch/arm/mach-exynos4/sleep.S b/arch/arm/mach-exynos4/sleep.S
index 6b624254..0984078 100644
--- a/arch/arm/mach-exynos4/sleep.S
+++ b/arch/arm/mach-exynos4/sleep.S
@@ -33,28 +33,6 @@
 	.text
 
 	/*
-	 * s3c_cpu_save
-	 *
-	 * entry:
-	 *	r1 = v:p offset
-	 */
-
-ENTRY(s3c_cpu_save)
-
-	stmfd	sp!, { r3 - r12, lr }
-	ldr	r3, =resume_with_mmu
-	bl	cpu_suspend
-
-	ldr	r0, =pm_cpu_sleep
-	ldr	r0, [ r0 ]
-	mov	pc, r0
-
-resume_with_mmu:
-	ldmfd	sp!, { r3 - r12, pc }
-
-	.ltorg
-
-	/*
 	 * sleep magic, to allow the bootloader to check for an valid
 	 * image to resume to. Must be the first word before the
 	 * s3c_cpu_resume entry.
diff --git a/arch/arm/mach-h720x/include/mach/entry-macro.S b/arch/arm/mach-h720x/include/mach/entry-macro.S
index 6d3b917..c3948e5 100644
--- a/arch/arm/mach-h720x/include/mach/entry-macro.S
+++ b/arch/arm/mach-h720x/include/mach/entry-macro.S
@@ -57,9 +57,6 @@
 		tst     \irqstat, #1		       @ bit 0 should be set
 		.endm
 
-		.macro  irq_prio_table
-		.endm
-
 #else
 #error hynix processor selection missmatch
 #endif
diff --git a/arch/arm/mach-ixp4xx/common.c b/arch/arm/mach-ixp4xx/common.c
index 74ed81a..0777257 100644
--- a/arch/arm/mach-ixp4xx/common.c
+++ b/arch/arm/mach-ixp4xx/common.c
@@ -419,14 +419,20 @@
 /*
  * clocksource
  */
+
+static cycle_t ixp4xx_clocksource_read(struct clocksource *c)
+{
+	return *IXP4XX_OSTS;
+}
+
 unsigned long ixp4xx_timer_freq = IXP4XX_TIMER_FREQ;
 EXPORT_SYMBOL(ixp4xx_timer_freq);
 static void __init ixp4xx_clocksource_init(void)
 {
 	init_sched_clock(&cd, ixp4xx_update_sched_clock, 32, ixp4xx_timer_freq);
 
-	clocksource_mmio_init(&IXP4XX_OSTS, "OSTS", ixp4xx_timer_freq, 200, 32,
-			clocksource_mmio_readl_up);
+	clocksource_mmio_init(NULL, "OSTS", ixp4xx_timer_freq, 200, 32,
+			ixp4xx_clocksource_read);
 }
 
 /*
diff --git a/arch/arm/mach-lpc32xx/include/mach/entry-macro.S b/arch/arm/mach-lpc32xx/include/mach/entry-macro.S
index 870227c..b725f6c 100644
--- a/arch/arm/mach-lpc32xx/include/mach/entry-macro.S
+++ b/arch/arm/mach-lpc32xx/include/mach/entry-macro.S
@@ -41,7 +41,3 @@
 	rsb	\irqnr, \irqnr, #31
 	teq	\irqstat, #0
 	.endm
-
-	.macro	irq_prio_table
-	.endm
-
diff --git a/arch/arm/mach-mmp/pxa168.c b/arch/arm/mach-mmp/pxa168.c
index 72b4e76..ab9f999 100644
--- a/arch/arm/mach-mmp/pxa168.c
+++ b/arch/arm/mach-mmp/pxa168.c
@@ -79,7 +79,7 @@
 static APBC_CLK(ssp5, PXA168_SSP5, 4, 0);
 static APBC_CLK(keypad, PXA168_KPC, 0, 32000);
 
-static APMU_CLK(nand, NAND, 0x01db, 208000000);
+static APMU_CLK(nand, NAND, 0x19b, 156000000);
 static APMU_CLK(lcd, LCD, 0x7f, 312000000);
 
 /* device and clock bindings */
diff --git a/arch/arm/mach-mmp/pxa910.c b/arch/arm/mach-mmp/pxa910.c
index 8f92ccd..1464607 100644
--- a/arch/arm/mach-mmp/pxa910.c
+++ b/arch/arm/mach-mmp/pxa910.c
@@ -110,7 +110,7 @@
 static APBC_CLK(pwm3, PXA910_PWM3, 1, 13000000);
 static APBC_CLK(pwm4, PXA910_PWM4, 1, 13000000);
 
-static APMU_CLK(nand, NAND, 0x01db, 208000000);
+static APMU_CLK(nand, NAND, 0x19b, 156000000);
 static APMU_CLK(u2o, USB, 0x1b, 480000000);
 
 /* device and clock bindings */
diff --git a/arch/arm/mach-msm/platsmp.c b/arch/arm/mach-msm/platsmp.c
index 2034098..315b9f3 100644
--- a/arch/arm/mach-msm/platsmp.c
+++ b/arch/arm/mach-msm/platsmp.c
@@ -157,12 +157,4 @@
 
 void __init platform_smp_prepare_cpus(unsigned int max_cpus)
 {
-	int i;
-
-	/*
-	 * Initialise the present map, which describes the set of CPUs
-	 * actually populated at the present time.
-	 */
-	for (i = 0; i < max_cpus; i++)
-		set_cpu_present(i, true);
 }
diff --git a/arch/arm/mach-omap2/control.c b/arch/arm/mach-omap2/control.c
index da53ba3..aab884f 100644
--- a/arch/arm/mach-omap2/control.c
+++ b/arch/arm/mach-omap2/control.c
@@ -286,14 +286,15 @@
 	scratchpad_contents.boot_config_ptr = 0x0;
 	if (cpu_is_omap3630())
 		scratchpad_contents.public_restore_ptr =
-			virt_to_phys(get_omap3630_restore_pointer());
+			virt_to_phys(omap3_restore_3630);
 	else if (omap_rev() != OMAP3430_REV_ES3_0 &&
 					omap_rev() != OMAP3430_REV_ES3_1)
 		scratchpad_contents.public_restore_ptr =
-			virt_to_phys(get_restore_pointer());
+			virt_to_phys(omap3_restore);
 	else
 		scratchpad_contents.public_restore_ptr =
-			virt_to_phys(get_es3_restore_pointer());
+			virt_to_phys(omap3_restore_es3);
+
 	if (omap_type() == OMAP2_DEVICE_TYPE_GP)
 		scratchpad_contents.secure_ram_restore_ptr = 0x0;
 	else
diff --git a/arch/arm/mach-omap2/control.h b/arch/arm/mach-omap2/control.h
index a016c8b..d4ef75d 100644
--- a/arch/arm/mach-omap2/control.h
+++ b/arch/arm/mach-omap2/control.h
@@ -386,9 +386,9 @@
 
 extern void omap3_save_scratchpad_contents(void);
 extern void omap3_clear_scratchpad_contents(void);
-extern u32 *get_restore_pointer(void);
-extern u32 *get_es3_restore_pointer(void);
-extern u32 *get_omap3630_restore_pointer(void);
+extern void omap3_restore(void);
+extern void omap3_restore_es3(void);
+extern void omap3_restore_3630(void);
 extern u32 omap3_arm_context[128];
 extern void omap3_control_save_context(void);
 extern void omap3_control_restore_context(void);
diff --git a/arch/arm/mach-omap2/include/mach/entry-macro.S b/arch/arm/mach-omap2/include/mach/entry-macro.S
index a48690b..ceb8b7e 100644
--- a/arch/arm/mach-omap2/include/mach/entry-macro.S
+++ b/arch/arm/mach-omap2/include/mach/entry-macro.S
@@ -165,6 +165,3 @@
 #endif
 
 #endif	/* MULTI_OMAP2 */
-
-		.macro	irq_prio_table
-		.endm
diff --git a/arch/arm/mach-omap2/omap-smp.c b/arch/arm/mach-omap2/omap-smp.c
index ecfe93c..ce65e93 100644
--- a/arch/arm/mach-omap2/omap-smp.c
+++ b/arch/arm/mach-omap2/omap-smp.c
@@ -125,14 +125,6 @@
 
 void __init platform_smp_prepare_cpus(unsigned int max_cpus)
 {
-	int i;
-
-	/*
-	 * Initialise the present map, which describes the set of CPUs
-	 * actually populated at the present time.
-	 */
-	for (i = 0; i < max_cpus; i++)
-		set_cpu_present(i, true);
 
 	/*
 	 * Initialise the SCU and wake up the secondary core using
diff --git a/arch/arm/mach-omap2/pm.h b/arch/arm/mach-omap2/pm.h
index 45bcfce..04ee566 100644
--- a/arch/arm/mach-omap2/pm.h
+++ b/arch/arm/mach-omap2/pm.h
@@ -88,18 +88,28 @@
 #define pm_dbg_regset_init(reg_set) do {} while (0);
 #endif /* CONFIG_PM_DEBUG */
 
+/* 24xx */
 extern void omap24xx_idle_loop_suspend(void);
+extern unsigned int omap24xx_idle_loop_suspend_sz;
 
 extern void omap24xx_cpu_suspend(u32 dll_ctrl, void __iomem *sdrc_dlla_ctrl,
 					void __iomem *sdrc_power);
-extern void omap34xx_cpu_suspend(u32 *addr, int save_state);
-extern int save_secure_ram_context(u32 *addr);
-extern void omap3_save_scratchpad_contents(void);
-
-extern unsigned int omap24xx_idle_loop_suspend_sz;
-extern unsigned int save_secure_ram_context_sz;
 extern unsigned int omap24xx_cpu_suspend_sz;
-extern unsigned int omap34xx_cpu_suspend_sz;
+
+/* 3xxx */
+extern void omap34xx_cpu_suspend(int save_state);
+
+/* omap3_do_wfi function pointer and size, for copy to SRAM */
+extern void omap3_do_wfi(void);
+extern unsigned int omap3_do_wfi_sz;
+/* ... and its pointer from SRAM after copy */
+extern void (*omap3_do_wfi_sram)(void);
+
+/* save_secure_ram_context function pointer and size, for copy to SRAM */
+extern int save_secure_ram_context(u32 *addr);
+extern unsigned int save_secure_ram_context_sz;
+
+extern void omap3_save_scratchpad_contents(void);
 
 #define PM_RTA_ERRATUM_i608		(1 << 0)
 #define PM_SDRC_WAKEUP_ERRATUM_i583	(1 << 1)
diff --git a/arch/arm/mach-omap2/pm34xx.c b/arch/arm/mach-omap2/pm34xx.c
index c155c9d..b77d826 100644
--- a/arch/arm/mach-omap2/pm34xx.c
+++ b/arch/arm/mach-omap2/pm34xx.c
@@ -31,6 +31,8 @@
 #include <linux/console.h>
 #include <trace/events/power.h>
 
+#include <asm/suspend.h>
+
 #include <plat/sram.h>
 #include "clockdomain.h"
 #include "powerdomain.h"
@@ -40,8 +42,6 @@
 #include <plat/gpmc.h>
 #include <plat/dma.h>
 
-#include <asm/tlbflush.h>
-
 #include "cm2xxx_3xxx.h"
 #include "cm-regbits-34xx.h"
 #include "prm-regbits-34xx.h"
@@ -64,11 +64,6 @@
 }
 #endif
 
-/* Scratchpad offsets */
-#define OMAP343X_TABLE_ADDRESS_OFFSET	   0xc4
-#define OMAP343X_TABLE_VALUE_OFFSET	   0xc0
-#define OMAP343X_CONTROL_REG_VALUE_OFFSET  0xc8
-
 /* pm34xx errata defined in pm.h */
 u16 pm34xx_errata;
 
@@ -83,9 +78,8 @@
 
 static LIST_HEAD(pwrst_list);
 
-static void (*_omap_sram_idle)(u32 *addr, int save_state);
-
 static int (*_omap_save_secure_sram)(u32 *addr);
+void (*omap3_do_wfi_sram)(void);
 
 static struct powerdomain *mpu_pwrdm, *neon_pwrdm;
 static struct powerdomain *core_pwrdm, *per_pwrdm;
@@ -312,28 +306,25 @@
 	return IRQ_HANDLED;
 }
 
-/* Function to restore the table entry that was modified for enabling MMU */
-static void restore_table_entry(void)
+static void omap34xx_save_context(u32 *save)
 {
-	void __iomem *scratchpad_address;
-	u32 previous_value, control_reg_value;
-	u32 *address;
+	u32 val;
 
-	scratchpad_address = OMAP2_L4_IO_ADDRESS(OMAP343X_SCRATCHPAD);
+	/* Read Auxiliary Control Register */
+	asm("mrc p15, 0, %0, c1, c0, 1" : "=r" (val));
+	*save++ = 1;
+	*save++ = val;
 
-	/* Get address of entry that was modified */
-	address = (u32 *)__raw_readl(scratchpad_address +
-				     OMAP343X_TABLE_ADDRESS_OFFSET);
-	/* Get the previous value which needs to be restored */
-	previous_value = __raw_readl(scratchpad_address +
-				     OMAP343X_TABLE_VALUE_OFFSET);
-	address = __va(address);
-	*address = previous_value;
-	flush_tlb_all();
-	control_reg_value = __raw_readl(scratchpad_address
-					+ OMAP343X_CONTROL_REG_VALUE_OFFSET);
-	/* This will enable caches and prediction */
-	set_cr(control_reg_value);
+	/* Read L2 AUX ctrl register */
+	asm("mrc p15, 1, %0, c9, c0, 2" : "=r" (val));
+	*save++ = 1;
+	*save++ = val;
+}
+
+static int omap34xx_do_sram_idle(unsigned long save_state)
+{
+	omap34xx_cpu_suspend(save_state);
+	return 0;
 }
 
 void omap_sram_idle(void)
@@ -352,9 +343,6 @@
 	int core_prev_state, per_prev_state;
 	u32 sdrc_pwr = 0;
 
-	if (!_omap_sram_idle)
-		return;
-
 	pwrdm_clear_all_prev_pwrst(mpu_pwrdm);
 	pwrdm_clear_all_prev_pwrst(neon_pwrdm);
 	pwrdm_clear_all_prev_pwrst(core_pwrdm);
@@ -432,12 +420,16 @@
 		sdrc_pwr = sdrc_read_reg(SDRC_POWER);
 
 	/*
-	 * omap3_arm_context is the location where ARM registers
-	 * get saved. The restore path then reads from this
-	 * location and restores them back.
+	 * omap3_arm_context is the location where some ARM context
+	 * get saved. The rest is placed on the stack, and restored
+	 * from there before resuming.
 	 */
-	_omap_sram_idle(omap3_arm_context, save_state);
-	cpu_init();
+	if (save_state)
+		omap34xx_save_context(omap3_arm_context);
+	if (save_state == 1 || save_state == 3)
+		cpu_suspend(save_state, omap34xx_do_sram_idle);
+	else
+		omap34xx_do_sram_idle(save_state);
 
 	/* Restore normal SDRC POWER settings */
 	if (omap_rev() >= OMAP3430_REV_ES3_0 &&
@@ -445,10 +437,6 @@
 	    core_next_state == PWRDM_POWER_OFF)
 		sdrc_write_reg(sdrc_pwr, SDRC_POWER);
 
-	/* Restore table entry modified during MMU restoration */
-	if (pwrdm_read_prev_pwrst(mpu_pwrdm) == PWRDM_POWER_OFF)
-		restore_table_entry();
-
 	/* CORE */
 	if (core_next_state < PWRDM_POWER_ON) {
 		core_prev_state = pwrdm_read_prev_pwrst(core_pwrdm);
@@ -852,10 +840,17 @@
 	return 0;
 }
 
+/*
+ * Push functions to SRAM
+ *
+ * The minimum set of functions is pushed to SRAM for execution:
+ * - omap3_do_wfi for erratum i581 WA,
+ * - save_secure_ram_context for security extensions.
+ */
 void omap_push_sram_idle(void)
 {
-	_omap_sram_idle = omap_sram_push(omap34xx_cpu_suspend,
-					omap34xx_cpu_suspend_sz);
+	omap3_do_wfi_sram = omap_sram_push(omap3_do_wfi, omap3_do_wfi_sz);
+
 	if (omap_type() != OMAP2_DEVICE_TYPE_GP)
 		_omap_save_secure_sram = omap_sram_push(save_secure_ram_context,
 				save_secure_ram_context_sz);
@@ -920,7 +915,6 @@
 	per_clkdm = clkdm_lookup("per_clkdm");
 	core_clkdm = clkdm_lookup("core_clkdm");
 
-	omap_push_sram_idle();
 #ifdef CONFIG_SUSPEND
 	suspend_set_ops(&omap_pm_ops);
 #endif /* CONFIG_SUSPEND */
diff --git a/arch/arm/mach-omap2/sleep34xx.S b/arch/arm/mach-omap2/sleep34xx.S
index 63f1066..f2ea1bd 100644
--- a/arch/arm/mach-omap2/sleep34xx.S
+++ b/arch/arm/mach-omap2/sleep34xx.S
@@ -74,46 +74,6 @@
  * API functions
  */
 
-/*
- * The "get_*restore_pointer" functions are used to provide a
- * physical restore address where the ROM code jumps while waking
- * up from MPU OFF/OSWR state.
- * The restore pointer is stored into the scratchpad.
- */
-
-	.text
-/* Function call to get the restore pointer for resume from OFF */
-ENTRY(get_restore_pointer)
-	stmfd	sp!, {lr}	@ save registers on stack
-	adr	r0, restore
-	ldmfd	sp!, {pc}	@ restore regs and return
-ENDPROC(get_restore_pointer)
-	.align
-ENTRY(get_restore_pointer_sz)
-	.word	. - get_restore_pointer
-
-	.text
-/* Function call to get the restore pointer for 3630 resume from OFF */
-ENTRY(get_omap3630_restore_pointer)
-	stmfd	sp!, {lr}	@ save registers on stack
-	adr	r0, restore_3630
-	ldmfd	sp!, {pc}	@ restore regs and return
-ENDPROC(get_omap3630_restore_pointer)
-	.align
-ENTRY(get_omap3630_restore_pointer_sz)
-	.word	. - get_omap3630_restore_pointer
-
-	.text
-/* Function call to get the restore pointer for ES3 to resume from OFF */
-ENTRY(get_es3_restore_pointer)
-	stmfd	sp!, {lr}	@ save registers on stack
-	adr	r0, restore_es3
-	ldmfd	sp!, {pc}	@ restore regs and return
-ENDPROC(get_es3_restore_pointer)
-	.align
-ENTRY(get_es3_restore_pointer_sz)
-	.word	. - get_es3_restore_pointer
-
 	.text
 /*
  * L2 cache needs to be toggled for stable OFF mode functionality on 3630.
@@ -133,7 +93,7 @@
 /* Function to call rom code to save secure ram context */
 	.align	3
 ENTRY(save_secure_ram_context)
-	stmfd	sp!, {r1-r12, lr}	@ save registers on stack
+	stmfd	sp!, {r4 - r11, lr}	@ save registers on stack
 	adr	r3, api_params		@ r3 points to parameters
 	str	r0, [r3,#0x4]		@ r0 has sdram address
 	ldr	r12, high_mask
@@ -152,7 +112,7 @@
 	nop
 	nop
 	nop
-	ldmfd	sp!, {r1-r12, pc}
+	ldmfd	sp!, {r4 - r11, pc}
 	.align
 sram_phy_addr_mask:
 	.word	SRAM_BASE_P
@@ -179,69 +139,38 @@
  *
  *
  * Notes:
- * - this code gets copied to internal SRAM at boot and after wake-up
- *   from OFF mode. The execution pointer in SRAM is _omap_sram_idle.
+ * - only the minimum set of functions gets copied to internal SRAM at boot
+ *   and after wake-up from OFF mode, cf. omap_push_sram_idle. The function
+ *   pointers in SDRAM or SRAM are called depending on the desired low power
+ *   target state.
  * - when the OMAP wakes up it continues at different execution points
  *   depending on the low power mode (non-OFF vs OFF modes),
  *   cf. 'Resume path for xxx mode' comments.
  */
 	.align	3
 ENTRY(omap34xx_cpu_suspend)
-	stmfd	sp!, {r0-r12, lr}	@ save registers on stack
+	stmfd	sp!, {r4 - r11, lr}	@ save registers on stack
 
 	/*
-	 * r0 contains CPU context save/restore pointer in sdram
-	 * r1 contains information about saving context:
+	 * r0 contains information about saving context:
 	 *   0 - No context lost
 	 *   1 - Only L1 and logic lost
 	 *   2 - Only L2 lost (Even L1 is retained we clean it along with L2)
 	 *   3 - Both L1 and L2 lost and logic lost
 	 */
 
-	/* Directly jump to WFI is the context save is not required */
-	cmp	r1, #0x0
-	beq	omap3_do_wfi
+	/*
+	 * For OFF mode: save context and jump to WFI in SDRAM (omap3_do_wfi)
+	 * For non-OFF modes: jump to the WFI code in SRAM (omap3_do_wfi_sram)
+	 */
+	ldr	r4, omap3_do_wfi_sram_addr
+	ldr	r5, [r4]
+	cmp	r0, #0x0		@ If no context save required,
+	bxeq	r5			@  jump to the WFI code in SRAM
+
 
 	/* Otherwise fall through to the save context code */
 save_context_wfi:
-	mov	r8, r0			@ Store SDRAM address in r8
-	mrc	p15, 0, r5, c1, c0, 1	@ Read Auxiliary Control Register
-	mov	r4, #0x1		@ Number of parameters for restore call
-	stmia	r8!, {r4-r5}		@ Push parameters for restore call
-	mrc	p15, 1, r5, c9, c0, 2	@ Read L2 AUX ctrl register
-	stmia	r8!, {r4-r5}		@ Push parameters for restore call
-
-        /* Check what that target sleep state is from r1 */
-	cmp	r1, #0x2		@ Only L2 lost, no need to save context
-	beq	clean_caches
-
-l1_logic_lost:
-	mov	r4, sp			@ Store sp
-	mrs	r5, spsr		@ Store spsr
-	mov	r6, lr			@ Store lr
-	stmia	r8!, {r4-r6}
-
-	mrc	p15, 0, r4, c1, c0, 2	@ Coprocessor access control register
-	mrc	p15, 0, r5, c2, c0, 0	@ TTBR0
-	mrc	p15, 0, r6, c2, c0, 1	@ TTBR1
-	mrc	p15, 0, r7, c2, c0, 2	@ TTBCR
-	stmia	r8!, {r4-r7}
-
-	mrc	p15, 0, r4, c3, c0, 0	@ Domain access Control Register
-	mrc	p15, 0, r5, c10, c2, 0	@ PRRR
-	mrc	p15, 0, r6, c10, c2, 1	@ NMRR
-	stmia	r8!,{r4-r6}
-
-	mrc	p15, 0, r4, c13, c0, 1	@ Context ID
-	mrc	p15, 0, r5, c13, c0, 2	@ User r/w thread and process ID
-	mrc	p15, 0, r6, c12, c0, 0	@ Secure or NS vector base address
-	mrs	r7, cpsr		@ Store current cpsr
-	stmia	r8!, {r4-r7}
-
-	mrc	p15, 0, r4, c1, c0, 0	@ save control register
-	stmia	r8!, {r4}
-
-clean_caches:
 	/*
 	 * jump out to kernel flush routine
 	 *  - reuse that code is better
@@ -284,7 +213,32 @@
  THUMB(	nop		)
 	.arm
 
-omap3_do_wfi:
+	b	omap3_do_wfi
+
+/*
+ * Local variables
+ */
+omap3_do_wfi_sram_addr:
+	.word omap3_do_wfi_sram
+kernel_flush:
+	.word v7_flush_dcache_all
+
+/* ===================================
+ * == WFI instruction => Enter idle ==
+ * ===================================
+ */
+
+/*
+ * Do WFI instruction
+ * Includes the resume path for non-OFF modes
+ *
+ * This code gets copied to internal SRAM and is accessible
+ * from both SDRAM and SRAM:
+ * - executed from SRAM for non-off modes (omap3_do_wfi_sram),
+ * - executed from SDRAM for OFF mode (omap3_do_wfi).
+ */
+	.align	3
+ENTRY(omap3_do_wfi)
 	ldr	r4, sdrc_power		@ read the SDRC_POWER register
 	ldr	r5, [r4]		@ read the contents of SDRC_POWER
 	orr	r5, r5, #0x40		@ enable self refresh on idle req
@@ -316,8 +270,86 @@
 	nop
 	nop
 	nop
-	bl wait_sdrc_ok
 
+/*
+ * This function implements the erratum ID i581 WA:
+ *  SDRC state restore before accessing the SDRAM
+ *
+ * Only used at return from non-OFF mode. For OFF
+ * mode the ROM code configures the SDRC and
+ * the DPLL before calling the restore code directly
+ * from DDR.
+ */
+
+/* Make sure SDRC accesses are ok */
+wait_sdrc_ok:
+
+/* DPLL3 must be locked before accessing the SDRC. Maybe the HW ensures this */
+	ldr	r4, cm_idlest_ckgen
+wait_dpll3_lock:
+	ldr	r5, [r4]
+	tst	r5, #1
+	beq	wait_dpll3_lock
+
+	ldr	r4, cm_idlest1_core
+wait_sdrc_ready:
+	ldr	r5, [r4]
+	tst	r5, #0x2
+	bne	wait_sdrc_ready
+	/* allow DLL powerdown upon hw idle req */
+	ldr	r4, sdrc_power
+	ldr	r5, [r4]
+	bic	r5, r5, #0x40
+	str	r5, [r4]
+
+/*
+ * PC-relative stores lead to undefined behaviour in Thumb-2: use a r7 as a
+ * base instead.
+ * Be careful not to clobber r7 when maintaing this code.
+ */
+
+is_dll_in_lock_mode:
+	/* Is dll in lock mode? */
+	ldr	r4, sdrc_dlla_ctrl
+	ldr	r5, [r4]
+	tst	r5, #0x4
+	bne	exit_nonoff_modes	@ Return if locked
+	/* wait till dll locks */
+	adr	r7, kick_counter
+wait_dll_lock_timed:
+	ldr	r4, wait_dll_lock_counter
+	add	r4, r4, #1
+	str	r4, [r7, #wait_dll_lock_counter - kick_counter]
+	ldr	r4, sdrc_dlla_status
+	/* Wait 20uS for lock */
+	mov	r6, #8
+wait_dll_lock:
+	subs	r6, r6, #0x1
+	beq	kick_dll
+	ldr	r5, [r4]
+	and	r5, r5, #0x4
+	cmp	r5, #0x4
+	bne	wait_dll_lock
+	b	exit_nonoff_modes	@ Return when locked
+
+	/* disable/reenable DLL if not locked */
+kick_dll:
+	ldr	r4, sdrc_dlla_ctrl
+	ldr	r5, [r4]
+	mov	r6, r5
+	bic	r6, #(1<<3)		@ disable dll
+	str	r6, [r4]
+	dsb
+	orr	r6, r6, #(1<<3)		@ enable dll
+	str	r6, [r4]
+	dsb
+	ldr	r4, kick_counter
+	add	r4, r4, #1
+	str	r4, [r7]		@ kick_counter
+	b	wait_dll_lock_timed
+
+exit_nonoff_modes:
+	/* Re-enable C-bit if needed */
 	mrc	p15, 0, r0, c1, c0, 0
 	tst	r0, #(1 << 2)		@ Check C bit enabled?
 	orreq	r0, r0, #(1 << 2)	@ Enable the C bit if cleared
@@ -329,7 +361,32 @@
  * == Exit point from non-OFF modes ==
  * ===================================
  */
-	ldmfd	sp!, {r0-r12, pc}	@ restore regs and return
+	ldmfd	sp!, {r4 - r11, pc}	@ restore regs and return
+
+/*
+ * Local variables
+ */
+sdrc_power:
+	.word	SDRC_POWER_V
+cm_idlest1_core:
+	.word	CM_IDLEST1_CORE_V
+cm_idlest_ckgen:
+	.word	CM_IDLEST_CKGEN_V
+sdrc_dlla_status:
+	.word	SDRC_DLLA_STATUS_V
+sdrc_dlla_ctrl:
+	.word	SDRC_DLLA_CTRL_V
+	/*
+	 * When exporting to userspace while the counters are in SRAM,
+	 * these 2 words need to be at the end to facilitate retrival!
+	 */
+kick_counter:
+	.word	0
+wait_dll_lock_counter:
+	.word	0
+
+ENTRY(omap3_do_wfi_sz)
+	.word	. - omap3_do_wfi
 
 
 /*
@@ -346,13 +403,17 @@
  *  restore_es3: applies to 34xx >= ES3.0
  *  restore_3630: applies to 36xx
  *  restore: common code for 3xxx
+ *
+ * Note: when back from CORE and MPU OFF mode we are running
+ *  from SDRAM, without MMU, without the caches and prediction.
+ *  Also the SRAM content has been cleared.
  */
-restore_es3:
+ENTRY(omap3_restore_es3)
 	ldr	r5, pm_prepwstst_core_p
 	ldr	r4, [r5]
 	and	r4, r4, #0x3
 	cmp	r4, #0x0	@ Check if previous power state of CORE is OFF
-	bne	restore
+	bne	omap3_restore	@ Fall through to OMAP3 common code
 	adr	r0, es3_sdrc_fix
 	ldr	r1, sram_base
 	ldr	r2, es3_sdrc_fix_sz
@@ -364,35 +425,32 @@
 	bne	copy_to_sram
 	ldr	r1, sram_base
 	blx	r1
-	b	restore
+	b	omap3_restore	@ Fall through to OMAP3 common code
+ENDPROC(omap3_restore_es3)
 
-restore_3630:
+ENTRY(omap3_restore_3630)
 	ldr	r1, pm_prepwstst_core_p
 	ldr	r2, [r1]
 	and	r2, r2, #0x3
 	cmp	r2, #0x0	@ Check if previous power state of CORE is OFF
-	bne	restore
+	bne	omap3_restore	@ Fall through to OMAP3 common code
 	/* Disable RTA before giving control */
 	ldr	r1, control_mem_rta
 	mov	r2, #OMAP36XX_RTA_DISABLE
 	str	r2, [r1]
+ENDPROC(omap3_restore_3630)
 
 	/* Fall through to common code for the remaining logic */
 
-restore:
+ENTRY(omap3_restore)
 	/*
-	 * Check what was the reason for mpu reset and store the reason in r9:
-	 *  0 - No context lost
-	 *  1 - Only L1 and logic lost
-	 *  2 - Only L2 lost - In this case, we wont be here
-	 *  3 - Both L1 and L2 lost
+	 * Read the pwstctrl register to check the reason for mpu reset.
+	 * This tells us what was lost.
 	 */
 	ldr	r1, pm_pwstctrl_mpu
 	ldr	r2, [r1]
 	and	r2, r2, #0x3
 	cmp	r2, #0x0	@ Check if target power state was OFF or RET
-	moveq	r9, #0x3	@ MPU OFF => L1 and L2 lost
-	movne	r9, #0x1	@ Only L1 and L2 lost => avoid L2 invalidation
 	bne	logic_l1_restore
 
 	ldr	r0, l2dis_3630
@@ -471,115 +529,39 @@
 	orr	r1, r1, #2		@ re-enable L2 cache
 	mcr	p15, 0, r1, c1, c0, 1
 skipl2reen:
-	mov	r1, #0
-	/*
-	 * Invalidate all instruction caches to PoU
-	 * and flush branch target cache
-	 */
-	mcr	p15, 0, r1, c7, c5, 0
 
-	ldr	r4, scratchpad_base
-	ldr	r3, [r4,#0xBC]
-	adds	r3, r3, #16
+	/* Now branch to the common CPU resume function */
+	b	cpu_resume
+ENDPROC(omap3_restore)
 
-	ldmia	r3!, {r4-r6}
-	mov	sp, r4			@ Restore sp
-	msr	spsr_cxsf, r5		@ Restore spsr
-	mov	lr, r6			@ Restore lr
-
-	ldmia	r3!, {r4-r7}
-	mcr	p15, 0, r4, c1, c0, 2	@ Coprocessor access Control Register
-	mcr	p15, 0, r5, c2, c0, 0	@ TTBR0
-	mcr	p15, 0, r6, c2, c0, 1	@ TTBR1
-	mcr	p15, 0, r7, c2, c0, 2	@ TTBCR
-
-	ldmia	r3!,{r4-r6}
-	mcr	p15, 0, r4, c3, c0, 0	@ Domain access Control Register
-	mcr	p15, 0, r5, c10, c2, 0	@ PRRR
-	mcr	p15, 0, r6, c10, c2, 1	@ NMRR
-
-
-	ldmia	r3!,{r4-r7}
-	mcr	p15, 0, r4, c13, c0, 1	@ Context ID
-	mcr	p15, 0, r5, c13, c0, 2	@ User r/w thread and process ID
-	mrc	p15, 0, r6, c12, c0, 0	@ Secure or NS vector base address
-	msr	cpsr, r7		@ store cpsr
-
-	/* Enabling MMU here */
-	mrc	p15, 0, r7, c2, c0, 2 	@ Read TTBRControl
-	/* Extract N (0:2) bits and decide whether to use TTBR0 or TTBR1 */
-	and	r7, #0x7
-	cmp	r7, #0x0
-	beq	usettbr0
-ttbr_error:
-	/*
-	 * More work needs to be done to support N[0:2] value other than 0
-	 * So looping here so that the error can be detected
-	 */
-	b	ttbr_error
-usettbr0:
-	mrc	p15, 0, r2, c2, c0, 0
-	ldr	r5, ttbrbit_mask
-	and	r2, r5
-	mov	r4, pc
-	ldr	r5, table_index_mask
-	and	r4, r5			@ r4 = 31 to 20 bits of pc
-	/* Extract the value to be written to table entry */
-	ldr	r1, table_entry
-	/* r1 has the value to be written to table entry*/
-	add	r1, r1, r4
-	/* Getting the address of table entry to modify */
-	lsr	r4, #18
-	/* r2 has the location which needs to be modified */
-	add	r2, r4
-	/* Storing previous entry of location being modified */
-	ldr	r5, scratchpad_base
-	ldr	r4, [r2]
-	str	r4, [r5, #0xC0]
-	/* Modify the table entry */
-	str	r1, [r2]
-	/*
-	 * Storing address of entry being modified
-	 * - will be restored after enabling MMU
-	 */
-	ldr	r5, scratchpad_base
-	str	r2, [r5, #0xC4]
-
-	mov	r0, #0
-	mcr	p15, 0, r0, c7, c5, 4	@ Flush prefetch buffer
-	mcr	p15, 0, r0, c7, c5, 6	@ Invalidate branch predictor array
-	mcr	p15, 0, r0, c8, c5, 0	@ Invalidate instruction TLB
-	mcr	p15, 0, r0, c8, c6, 0	@ Invalidate data TLB
-	/*
-	 * Restore control register. This enables the MMU.
-	 * The caches and prediction are not enabled here, they
-	 * will be enabled after restoring the MMU table entry.
-	 */
-	ldmia	r3!, {r4}
-	/* Store previous value of control register in scratchpad */
-	str	r4, [r5, #0xC8]
-	ldr	r2, cache_pred_disable_mask
-	and	r4, r2
-	mcr	p15, 0, r4, c1, c0, 0
-	dsb
-	isb
-	ldr     r0, =restoremmu_on
-	bx      r0
+	.ltorg
 
 /*
- * ==============================
- * == Exit point from OFF mode ==
- * ==============================
+ * Local variables
  */
-restoremmu_on:
-	ldmfd	sp!, {r0-r12, pc}	@ restore regs and return
-
+pm_prepwstst_core_p:
+	.word	PM_PREPWSTST_CORE_P
+pm_pwstctrl_mpu:
+	.word	PM_PWSTCTRL_MPU_P
+scratchpad_base:
+	.word	SCRATCHPAD_BASE_P
+sram_base:
+	.word	SRAM_BASE_P + 0x8000
+control_stat:
+	.word	CONTROL_STAT
+control_mem_rta:
+	.word	CONTROL_MEM_RTA_CTRL
+l2dis_3630:
+	.word	0
 
 /*
  * Internal functions
  */
 
-/* This function implements the erratum ID i443 WA, applies to 34xx >= ES3.0 */
+/*
+ * This function implements the erratum ID i443 WA, applies to 34xx >= ES3.0
+ * Copied to and run from SRAM in order to reconfigure the SDRC parameters.
+ */
 	.text
 	.align	3
 ENTRY(es3_sdrc_fix)
@@ -609,6 +591,9 @@
 	str	r5, [r4]		@ kick off refreshes
 	bx	lr
 
+/*
+ * Local variables
+ */
 	.align
 sdrc_syscfg:
 	.word	SDRC_SYSCONFIG_P
@@ -627,128 +612,3 @@
 ENDPROC(es3_sdrc_fix)
 ENTRY(es3_sdrc_fix_sz)
 	.word	. - es3_sdrc_fix
-
-/*
- * This function implements the erratum ID i581 WA:
- *  SDRC state restore before accessing the SDRAM
- *
- * Only used at return from non-OFF mode. For OFF
- * mode the ROM code configures the SDRC and
- * the DPLL before calling the restore code directly
- * from DDR.
- */
-
-/* Make sure SDRC accesses are ok */
-wait_sdrc_ok:
-
-/* DPLL3 must be locked before accessing the SDRC. Maybe the HW ensures this */
-	ldr	r4, cm_idlest_ckgen
-wait_dpll3_lock:
-	ldr	r5, [r4]
-	tst	r5, #1
-	beq	wait_dpll3_lock
-
-	ldr	r4, cm_idlest1_core
-wait_sdrc_ready:
-	ldr	r5, [r4]
-	tst	r5, #0x2
-	bne	wait_sdrc_ready
-	/* allow DLL powerdown upon hw idle req */
-	ldr	r4, sdrc_power
-	ldr	r5, [r4]
-	bic	r5, r5, #0x40
-	str	r5, [r4]
-
-/*
- * PC-relative stores lead to undefined behaviour in Thumb-2: use a r7 as a
- * base instead.
- * Be careful not to clobber r7 when maintaing this code.
- */
-
-is_dll_in_lock_mode:
-	/* Is dll in lock mode? */
-	ldr	r4, sdrc_dlla_ctrl
-	ldr	r5, [r4]
-	tst	r5, #0x4
-	bxne	lr			@ Return if locked
-	/* wait till dll locks */
-	adr	r7, kick_counter
-wait_dll_lock_timed:
-	ldr	r4, wait_dll_lock_counter
-	add	r4, r4, #1
-	str	r4, [r7, #wait_dll_lock_counter - kick_counter]
-	ldr	r4, sdrc_dlla_status
-	/* Wait 20uS for lock */
-	mov	r6, #8
-wait_dll_lock:
-	subs	r6, r6, #0x1
-	beq	kick_dll
-	ldr	r5, [r4]
-	and	r5, r5, #0x4
-	cmp	r5, #0x4
-	bne	wait_dll_lock
-	bx	lr			@ Return when locked
-
-	/* disable/reenable DLL if not locked */
-kick_dll:
-	ldr	r4, sdrc_dlla_ctrl
-	ldr	r5, [r4]
-	mov	r6, r5
-	bic	r6, #(1<<3)		@ disable dll
-	str	r6, [r4]
-	dsb
-	orr	r6, r6, #(1<<3)		@ enable dll
-	str	r6, [r4]
-	dsb
-	ldr	r4, kick_counter
-	add	r4, r4, #1
-	str	r4, [r7]		@ kick_counter
-	b	wait_dll_lock_timed
-
-	.align
-cm_idlest1_core:
-	.word	CM_IDLEST1_CORE_V
-cm_idlest_ckgen:
-	.word	CM_IDLEST_CKGEN_V
-sdrc_dlla_status:
-	.word	SDRC_DLLA_STATUS_V
-sdrc_dlla_ctrl:
-	.word	SDRC_DLLA_CTRL_V
-pm_prepwstst_core_p:
-	.word	PM_PREPWSTST_CORE_P
-pm_pwstctrl_mpu:
-	.word	PM_PWSTCTRL_MPU_P
-scratchpad_base:
-	.word	SCRATCHPAD_BASE_P
-sram_base:
-	.word	SRAM_BASE_P + 0x8000
-sdrc_power:
-	.word	SDRC_POWER_V
-ttbrbit_mask:
-	.word	0xFFFFC000
-table_index_mask:
-	.word	0xFFF00000
-table_entry:
-	.word	0x00000C02
-cache_pred_disable_mask:
-	.word	0xFFFFE7FB
-control_stat:
-	.word	CONTROL_STAT
-control_mem_rta:
-	.word	CONTROL_MEM_RTA_CTRL
-kernel_flush:
-	.word	v7_flush_dcache_all
-l2dis_3630:
-	.word	0
-	/*
-	 * When exporting to userspace while the counters are in SRAM,
-	 * these 2 words need to be at the end to facilitate retrival!
-	 */
-kick_counter:
-	.word	0
-wait_dll_lock_counter:
-	.word	0
-ENDPROC(omap34xx_cpu_suspend)
-
-ENTRY(omap34xx_cpu_suspend_sz)
-	.word	. - omap34xx_cpu_suspend
diff --git a/arch/arm/mach-pnx4008/include/mach/entry-macro.S b/arch/arm/mach-pnx4008/include/mach/entry-macro.S
index 8003037..db7eeeb 100644
--- a/arch/arm/mach-pnx4008/include/mach/entry-macro.S
+++ b/arch/arm/mach-pnx4008/include/mach/entry-macro.S
@@ -120,8 +120,3 @@
 1003:
 		.endm
 
-
-		.macro	irq_prio_table
-		.endm
-
-
diff --git a/arch/arm/mach-pxa/include/mach/pm.h b/arch/arm/mach-pxa/include/mach/pm.h
index f15afe0..51558bc 100644
--- a/arch/arm/mach-pxa/include/mach/pm.h
+++ b/arch/arm/mach-pxa/include/mach/pm.h
@@ -22,8 +22,8 @@
 extern struct pxa_cpu_pm_fns *pxa_cpu_pm_fns;
 
 /* sleep.S */
-extern void pxa25x_cpu_suspend(unsigned int, long);
-extern void pxa27x_cpu_suspend(unsigned int, long);
+extern int pxa25x_finish_suspend(unsigned long);
+extern int pxa27x_finish_suspend(unsigned long);
 
 extern int pxa_pm_enter(suspend_state_t state);
 extern int pxa_pm_prepare(void);
diff --git a/arch/arm/mach-pxa/mfp-pxa2xx.c b/arch/arm/mach-pxa/mfp-pxa2xx.c
index 87ae312..b27544b 100644
--- a/arch/arm/mach-pxa/mfp-pxa2xx.c
+++ b/arch/arm/mach-pxa/mfp-pxa2xx.c
@@ -347,9 +347,9 @@
 		if ((gpio_desc[i].config & MFP_LPM_KEEP_OUTPUT) &&
 		    (GPDR(i) & GPIO_bit(i))) {
 			if (GPLR(i) & GPIO_bit(i))
-				PGSR(i) |= GPIO_bit(i);
+				PGSR(gpio_to_bank(i)) |= GPIO_bit(i);
 			else
-				PGSR(i) &= ~GPIO_bit(i);
+				PGSR(gpio_to_bank(i)) &= ~GPIO_bit(i);
 		}
 	}
 
diff --git a/arch/arm/mach-pxa/palmz72.c b/arch/arm/mach-pxa/palmz72.c
index 65f24f0..5a5329b 100644
--- a/arch/arm/mach-pxa/palmz72.c
+++ b/arch/arm/mach-pxa/palmz72.c
@@ -33,6 +33,7 @@
 #include <linux/i2c-gpio.h>
 
 #include <asm/mach-types.h>
+#include <asm/suspend.h>
 #include <asm/mach/arch.h>
 #include <asm/mach/map.h>
 
diff --git a/arch/arm/mach-pxa/pm.c b/arch/arm/mach-pxa/pm.c
index 51e1583..37178a8 100644
--- a/arch/arm/mach-pxa/pm.c
+++ b/arch/arm/mach-pxa/pm.c
@@ -42,7 +42,6 @@
 
 	/* *** go zzz *** */
 	pxa_cpu_pm_fns->enter(state);
-	cpu_init();
 
 	if (state != PM_SUSPEND_STANDBY && pxa_cpu_pm_fns->restore) {
 		/* after sleeping, validate the checksum */
diff --git a/arch/arm/mach-pxa/pxa25x.c b/arch/arm/mach-pxa/pxa25x.c
index fed363c..9c434d2 100644
--- a/arch/arm/mach-pxa/pxa25x.c
+++ b/arch/arm/mach-pxa/pxa25x.c
@@ -25,6 +25,7 @@
 #include <linux/irq.h>
 
 #include <asm/mach/map.h>
+#include <asm/suspend.h>
 #include <mach/hardware.h>
 #include <mach/irqs.h>
 #include <mach/gpio.h>
@@ -244,7 +245,7 @@
 
 	switch (state) {
 	case PM_SUSPEND_MEM:
-		pxa25x_cpu_suspend(PWRMODE_SLEEP, PLAT_PHYS_OFFSET - PAGE_OFFSET);
+		cpu_suspend(PWRMODE_SLEEP, pxa25x_finish_suspend);
 		break;
 	}
 }
diff --git a/arch/arm/mach-pxa/pxa27x.c b/arch/arm/mach-pxa/pxa27x.c
index 2fecbec..9d2400b 100644
--- a/arch/arm/mach-pxa/pxa27x.c
+++ b/arch/arm/mach-pxa/pxa27x.c
@@ -24,6 +24,7 @@
 #include <asm/mach/map.h>
 #include <mach/hardware.h>
 #include <asm/irq.h>
+#include <asm/suspend.h>
 #include <mach/irqs.h>
 #include <mach/gpio.h>
 #include <mach/pxa27x.h>
@@ -284,6 +285,11 @@
 void pxa27x_cpu_pm_enter(suspend_state_t state)
 {
 	extern void pxa_cpu_standby(void);
+#ifndef CONFIG_IWMMXT
+	u64 acc0;
+
+	asm volatile("mra %Q0, %R0, acc0" : "=r" (acc0));
+#endif
 
 	/* ensure voltage-change sequencer not initiated, which hangs */
 	PCFR &= ~PCFR_FVC;
@@ -299,7 +305,10 @@
 		pxa_cpu_standby();
 		break;
 	case PM_SUSPEND_MEM:
-		pxa27x_cpu_suspend(pwrmode, PLAT_PHYS_OFFSET - PAGE_OFFSET);
+		cpu_suspend(pwrmode, pxa27x_finish_suspend);
+#ifndef CONFIG_IWMMXT
+		asm volatile("mar acc0, %Q0, %R0" : "=r" (acc0));
+#endif
 		break;
 	}
 }
diff --git a/arch/arm/mach-pxa/pxa3xx.c b/arch/arm/mach-pxa/pxa3xx.c
index 8521d7d..ef1c56a 100644
--- a/arch/arm/mach-pxa/pxa3xx.c
+++ b/arch/arm/mach-pxa/pxa3xx.c
@@ -24,6 +24,7 @@
 #include <linux/i2c/pxa-i2c.h>
 
 #include <asm/mach/map.h>
+#include <asm/suspend.h>
 #include <mach/hardware.h>
 #include <mach/gpio.h>
 #include <mach/pxa3xx-regs.h>
@@ -141,8 +142,13 @@
 {
 	volatile unsigned long *p = (volatile void *)0xc0000000;
 	unsigned long saved_data = *p;
+#ifndef CONFIG_IWMMXT
+	u64 acc0;
 
-	extern void pxa3xx_cpu_suspend(long);
+	asm volatile("mra %Q0, %R0, acc0" : "=r" (acc0));
+#endif
+
+	extern int pxa3xx_finish_suspend(unsigned long);
 
 	/* resuming from D2 requires the HSIO2/BOOT/TPM clocks enabled */
 	CKENA |= (1 << CKEN_BOOT) | (1 << CKEN_TPM);
@@ -162,11 +168,15 @@
 	/* overwrite with the resume address */
 	*p = virt_to_phys(cpu_resume);
 
-	pxa3xx_cpu_suspend(PLAT_PHYS_OFFSET - PAGE_OFFSET);
+	cpu_suspend(0, pxa3xx_finish_suspend);
 
 	*p = saved_data;
 
 	AD3ER = 0;
+
+#ifndef CONFIG_IWMMXT
+	asm volatile("mar acc0, %Q0, %R0" : "=r" (acc0));
+#endif
 }
 
 static void pxa3xx_cpu_pm_enter(suspend_state_t state)
diff --git a/arch/arm/mach-pxa/raumfeld.c b/arch/arm/mach-pxa/raumfeld.c
index d130f77..2f37d43 100644
--- a/arch/arm/mach-pxa/raumfeld.c
+++ b/arch/arm/mach-pxa/raumfeld.c
@@ -573,10 +573,10 @@
 	.xres		= 480,
 	.yres		= 272,
 	.bpp		= 16,
-	.hsync_len	= 4,
+	.hsync_len	= 41,
 	.left_margin	= 2,
 	.right_margin	= 1,
-	.vsync_len	= 1,
+	.vsync_len	= 10,
 	.upper_margin	= 3,
 	.lower_margin	= 1,
 	.sync		= 0,
@@ -596,29 +596,31 @@
 {
 	int ret;
 
-	pxa_set_fb_info(NULL, &raumfeld_sharp_lcd_info);
-
-	/* Earlier devices had the backlight regulator controlled
-	 * via PWM, later versions use another controller for that */
-	if ((system_rev & 0xff) < 2) {
-		mfp_cfg_t raumfeld_pwm_pin_config = GPIO17_PWM0_OUT;
-		pxa3xx_mfp_config(&raumfeld_pwm_pin_config, 1);
-		platform_device_register(&raumfeld_pwm_backlight_device);
-	} else
-		platform_device_register(&raumfeld_lt3593_device);
-
 	ret = gpio_request(GPIO_TFT_VA_EN, "display VA enable");
 	if (ret < 0)
 		pr_warning("Unable to request GPIO_TFT_VA_EN\n");
 	else
 		gpio_direction_output(GPIO_TFT_VA_EN, 1);
 
+	msleep(100);
+
 	ret = gpio_request(GPIO_DISPLAY_ENABLE, "display enable");
 	if (ret < 0)
 		pr_warning("Unable to request GPIO_DISPLAY_ENABLE\n");
 	else
 		gpio_direction_output(GPIO_DISPLAY_ENABLE, 1);
 
+	/* Hardware revision 2 has the backlight regulator controlled
+	 * by an LT3593, earlier and later devices use PWM for that. */
+	if ((system_rev & 0xff) == 2) {
+		platform_device_register(&raumfeld_lt3593_device);
+	} else {
+		mfp_cfg_t raumfeld_pwm_pin_config = GPIO17_PWM0_OUT;
+		pxa3xx_mfp_config(&raumfeld_pwm_pin_config, 1);
+		platform_device_register(&raumfeld_pwm_backlight_device);
+	}
+
+	pxa_set_fb_info(NULL, &raumfeld_sharp_lcd_info);
 	platform_device_register(&pxa3xx_device_gcu);
 }
 
@@ -657,10 +659,10 @@
 
 #define SPI_AK4104	\
 {			\
-	.modalias	= "ak4104",	\
-	.max_speed_hz	= 10000,	\
-	.bus_num	= 0,		\
-	.chip_select	= 0,		\
+	.modalias	= "ak4104-codec",	\
+	.max_speed_hz	= 10000,		\
+	.bus_num	= 0,			\
+	.chip_select	= 0,			\
 	.controller_data = (void *) GPIO_SPDIF_CS,	\
 }
 
diff --git a/arch/arm/mach-pxa/sleep.S b/arch/arm/mach-pxa/sleep.S
index 6f53688..1e544be 100644
--- a/arch/arm/mach-pxa/sleep.S
+++ b/arch/arm/mach-pxa/sleep.S
@@ -24,20 +24,9 @@
 
 #ifdef CONFIG_PXA3xx
 /*
- * pxa3xx_cpu_suspend() - forces CPU into sleep state (S2D3C4)
- *
- * r0 = v:p offset
+ * pxa3xx_finish_suspend() - forces CPU into sleep state (S2D3C4)
  */
-ENTRY(pxa3xx_cpu_suspend)
-
-#ifndef CONFIG_IWMMXT
-	mra	r2, r3, acc0
-#endif
-	stmfd	sp!, {r2 - r12, lr}	@ save registers on stack
-	mov	r1, r0
-	ldr	r3, =pxa_cpu_resume	@ resume function
-	bl	cpu_suspend
-
+ENTRY(pxa3xx_finish_suspend)
 	mov	r0, #0x06		@ S2D3C4 mode
 	mcr	p14, 0, r0, c7, c0, 0	@ enter sleep
 
@@ -46,28 +35,18 @@
 
 #ifdef CONFIG_PXA27x
 /*
- * pxa27x_cpu_suspend()
+ * pxa27x_finish_suspend()
  *
  * Forces CPU into sleep state.
  *
  * r0 = value for PWRMODE M field for desired sleep state
- * r1 = v:p offset
  */
-ENTRY(pxa27x_cpu_suspend)
-
-#ifndef CONFIG_IWMMXT
-	mra	r2, r3, acc0
-#endif
-	stmfd	sp!, {r2 - r12, lr}		@ save registers on stack
-	mov	r4, r0				@ save sleep mode
-	ldr	r3, =pxa_cpu_resume		@ resume function
-	bl	cpu_suspend
-
+ENTRY(pxa27x_finish_suspend)
 	@ Put the processor to sleep
 	@ (also workaround for sighting 28071)
 
 	@ prepare value for sleep mode
-	mov	r1, r4				@ sleep mode
+	mov	r1, r0				@ sleep mode
 
 	@ prepare pointer to physical address 0 (virtual mapping in generic.c)
 	mov	r2, #UNCACHED_PHYS_0
@@ -99,21 +78,16 @@
 
 #ifdef CONFIG_PXA25x
 /*
- * pxa25x_cpu_suspend()
+ * pxa25x_finish_suspend()
  *
  * Forces CPU into sleep state.
  *
  * r0 = value for PWRMODE M field for desired sleep state
- * r1 = v:p offset
  */
 
-ENTRY(pxa25x_cpu_suspend)
-	stmfd	sp!, {r2 - r12, lr}		@ save registers on stack
-	mov	r4, r0				@ save sleep mode
-	ldr	r3, =pxa_cpu_resume		@ resume function
-	bl	cpu_suspend
+ENTRY(pxa25x_finish_suspend)
 	@ prepare value for sleep mode
-	mov	r1, r4				@ sleep mode
+	mov	r1, r0				@ sleep mode
 
 	@ prepare pointer to physical address 0 (virtual mapping in generic.c)
 	mov	r2, #UNCACHED_PHYS_0
@@ -195,16 +169,3 @@
 	mcr	p14, 0, r1, c7, c0, 0		@ PWRMODE
 
 20:	b	20b				@ loop waiting for sleep
-
-/*
- * pxa_cpu_resume()
- *
- * entry point from bootloader into kernel during resume
- */
-	.align 5
-pxa_cpu_resume:
-	ldmfd	sp!, {r2, r3}
-#ifndef CONFIG_IWMMXT
-	mar	acc0, r2, r3
-#endif
-	ldmfd	sp!, {r4 - r12, pc}		@ return to caller
diff --git a/arch/arm/mach-pxa/zeus.c b/arch/arm/mach-pxa/zeus.c
index 00363c7..9b99cc1 100644
--- a/arch/arm/mach-pxa/zeus.c
+++ b/arch/arm/mach-pxa/zeus.c
@@ -31,6 +31,7 @@
 #include <linux/can/platform/mcp251x.h>
 
 #include <asm/mach-types.h>
+#include <asm/suspend.h>
 #include <asm/mach/arch.h>
 #include <asm/mach/map.h>
 
@@ -676,7 +677,7 @@
 static void zeus_power_off(void)
 {
 	local_irq_disable();
-	pxa27x_cpu_suspend(PWRMODE_DEEPSLEEP, PLAT_PHYS_OFFSET - PAGE_OFFSET);
+	cpu_suspend(PWRMODE_DEEPSLEEP, pxa27x_finish_suspend);
 }
 #else
 #define zeus_power_off   NULL
diff --git a/arch/arm/mach-realview/Kconfig b/arch/arm/mach-realview/Kconfig
index b9a9805..dba6d0c 100644
--- a/arch/arm/mach-realview/Kconfig
+++ b/arch/arm/mach-realview/Kconfig
@@ -50,6 +50,7 @@
 	bool "Support RealView(R) Platform Baseboard for ARM1176JZF-S"
 	select CPU_V6
 	select ARM_GIC
+	select HAVE_TCM
 	help
 	  Include support for the ARM(R) RealView(R) Platform Baseboard for
 	  ARM1176JZF-S.
diff --git a/arch/arm/mach-realview/platsmp.c b/arch/arm/mach-realview/platsmp.c
index 963bf0d..4ae943b 100644
--- a/arch/arm/mach-realview/platsmp.c
+++ b/arch/arm/mach-realview/platsmp.c
@@ -68,14 +68,6 @@
 
 void __init platform_smp_prepare_cpus(unsigned int max_cpus)
 {
-	int i;
-
-	/*
-	 * Initialise the present map, which describes the set of CPUs
-	 * actually populated at the present time.
-	 */
-	for (i = 0; i < max_cpus; i++)
-		set_cpu_present(i, true);
 
 	scu_enable(scu_base_addr());
 
diff --git a/arch/arm/mach-s3c2412/pm.c b/arch/arm/mach-s3c2412/pm.c
index 752b13a..f4077ef 100644
--- a/arch/arm/mach-s3c2412/pm.c
+++ b/arch/arm/mach-s3c2412/pm.c
@@ -37,12 +37,10 @@
 
 extern void s3c2412_sleep_enter(void);
 
-static void s3c2412_cpu_suspend(void)
+static int s3c2412_cpu_suspend(unsigned long arg)
 {
 	unsigned long tmp;
 
-	flush_cache_all();
-
 	/* set our standby method to sleep */
 
 	tmp = __raw_readl(S3C2412_PWRCFG);
@@ -50,6 +48,8 @@
 	__raw_writel(tmp, S3C2412_PWRCFG);
 
 	s3c2412_sleep_enter();
+
+	panic("sleep resumed to originator?");
 }
 
 static void s3c2412_pm_prepare(void)
diff --git a/arch/arm/mach-s3c2416/pm.c b/arch/arm/mach-s3c2416/pm.c
index 41db2b2..9ec54f1 100644
--- a/arch/arm/mach-s3c2416/pm.c
+++ b/arch/arm/mach-s3c2416/pm.c
@@ -24,10 +24,8 @@
 
 extern void s3c2412_sleep_enter(void);
 
-static void s3c2416_cpu_suspend(void)
+static int s3c2416_cpu_suspend(unsigned long arg)
 {
-	flush_cache_all();
-
 	/* enable wakeup sources regardless of battery state */
 	__raw_writel(S3C2443_PWRCFG_SLEEP, S3C2443_PWRCFG);
 
@@ -35,6 +33,8 @@
 	__raw_writel(0x2BED, S3C2443_PWRMODE);
 
 	s3c2412_sleep_enter();
+
+	panic("sleep resumed to originator?");
 }
 
 static void s3c2416_pm_prepare(void)
diff --git a/arch/arm/mach-s3c64xx/dma.c b/arch/arm/mach-s3c64xx/dma.c
index b197171..204bfaf 100644
--- a/arch/arm/mach-s3c64xx/dma.c
+++ b/arch/arm/mach-s3c64xx/dma.c
@@ -113,7 +113,7 @@
 	return chan;
 }
 
-int s3c2410_dma_config(unsigned int channel, int xferunit)
+int s3c2410_dma_config(enum dma_ch channel, int xferunit)
 {
 	struct s3c2410_dma_chan *chan = s3c_dma_lookup_channel(channel);
 
@@ -297,7 +297,7 @@
 	return 0;
 }
 
-int s3c2410_dma_ctrl(unsigned int channel, enum s3c2410_chan_op op)
+int s3c2410_dma_ctrl(enum dma_ch channel, enum s3c2410_chan_op op)
 {
 	struct s3c2410_dma_chan *chan = s3c_dma_lookup_channel(channel);
 
@@ -331,7 +331,7 @@
  *
  */
 
-int s3c2410_dma_enqueue(unsigned int channel, void *id,
+int s3c2410_dma_enqueue(enum dma_ch channel, void *id,
 			dma_addr_t data, int size)
 {
 	struct s3c2410_dma_chan *chan = s3c_dma_lookup_channel(channel);
@@ -415,7 +415,7 @@
 EXPORT_SYMBOL(s3c2410_dma_enqueue);
 
 
-int s3c2410_dma_devconfig(unsigned int channel,
+int s3c2410_dma_devconfig(enum dma_ch channel,
 			  enum s3c2410_dmasrc source,
 			  unsigned long devaddr)
 {
@@ -463,7 +463,7 @@
 EXPORT_SYMBOL(s3c2410_dma_devconfig);
 
 
-int s3c2410_dma_getposition(unsigned int channel,
+int s3c2410_dma_getposition(enum dma_ch channel,
 			    dma_addr_t *src, dma_addr_t *dst)
 {
 	struct s3c2410_dma_chan *chan = s3c_dma_lookup_channel(channel);
@@ -487,7 +487,7 @@
  * get control of an dma channel
 */
 
-int s3c2410_dma_request(unsigned int channel,
+int s3c2410_dma_request(enum dma_ch channel,
 			struct s3c2410_dma_client *client,
 			void *dev)
 {
@@ -533,7 +533,7 @@
  * allowed to go through.
 */
 
-int s3c2410_dma_free(unsigned int channel, struct s3c2410_dma_client *client)
+int s3c2410_dma_free(enum dma_ch channel, struct s3c2410_dma_client *client)
 {
 	struct s3c2410_dma_chan *chan = s3c_dma_lookup_channel(channel);
 	unsigned long flags;
diff --git a/arch/arm/mach-s3c64xx/pm.c b/arch/arm/mach-s3c64xx/pm.c
index bc1c470..8bad643 100644
--- a/arch/arm/mach-s3c64xx/pm.c
+++ b/arch/arm/mach-s3c64xx/pm.c
@@ -112,7 +112,7 @@
  * this.
  */
 
-static void s3c64xx_cpu_suspend(void)
+static int s3c64xx_cpu_suspend(unsigned long arg)
 {
 	unsigned long tmp;
 
diff --git a/arch/arm/mach-s3c64xx/sleep.S b/arch/arm/mach-s3c64xx/sleep.S
index 1f87732..34313f9 100644
--- a/arch/arm/mach-s3c64xx/sleep.S
+++ b/arch/arm/mach-s3c64xx/sleep.S
@@ -25,29 +25,6 @@
 
 	.text
 
-	/* s3c_cpu_save
-	 *
-	 * Save enough processor state to allow the restart of the pm.c
-	 * code after resume.
-	 *
-	 * entry:
-	 *	r1 = v:p offset
-	*/
-
-ENTRY(s3c_cpu_save)
-	stmfd	sp!, { r4 - r12, lr }
-	ldr	r3, =resume_with_mmu
-	bl	cpu_suspend
-
-	@@ call final suspend code
-	ldr	r0, =pm_cpu_sleep
-	ldr	pc, [r0]
-	
-	@@ return to the caller, after the MMU is turned on.
-	@@ restore the last bits of the stack and return.
-resume_with_mmu:
-	ldmfd	sp!, { r4 - r12, pc }	@ return, from sp from s3c_cpu_save
-
 	/* Sleep magic, the word before the resume entry point so that the
 	 * bootloader can check for a resumeable image. */
 
diff --git a/arch/arm/mach-s5pv210/pm.c b/arch/arm/mach-s5pv210/pm.c
index 24febae..309e388 100644
--- a/arch/arm/mach-s5pv210/pm.c
+++ b/arch/arm/mach-s5pv210/pm.c
@@ -88,7 +88,7 @@
 	SAVE_ITEM(S3C2410_TCNTO(0)),
 };
 
-void s5pv210_cpu_suspend(void)
+void s5pv210_cpu_suspend(unsigned long arg)
 {
 	unsigned long tmp;
 
diff --git a/arch/arm/mach-s5pv210/sleep.S b/arch/arm/mach-s5pv210/sleep.S
index a3d6494..e3452cc 100644
--- a/arch/arm/mach-s5pv210/sleep.S
+++ b/arch/arm/mach-s5pv210/sleep.S
@@ -32,27 +32,6 @@
 
 	.text
 
-	/* s3c_cpu_save
-	 *
-	 * entry:
-	 *	r1 = v:p offset
-	*/
-
-ENTRY(s3c_cpu_save)
-
-	stmfd	sp!, { r3 - r12, lr }
-	ldr	r3, =resume_with_mmu
-	bl	cpu_suspend
-
-	ldr	r0, =pm_cpu_sleep
-	ldr	r0, [ r0 ]
-	mov	pc, r0
-
-resume_with_mmu:
-	ldmfd	sp!, { r3 - r12, pc }
-
-	.ltorg
-
 	/* sleep magic, to allow the bootloader to check for an valid
 	 * image to resume to. Must be the first word before the
 	 * s3c_cpu_resume entry.
diff --git a/arch/arm/mach-sa1100/pm.c b/arch/arm/mach-sa1100/pm.c
index c4661aa..bf85b8b 100644
--- a/arch/arm/mach-sa1100/pm.c
+++ b/arch/arm/mach-sa1100/pm.c
@@ -29,10 +29,11 @@
 
 #include <mach/hardware.h>
 #include <asm/memory.h>
+#include <asm/suspend.h>
 #include <asm/system.h>
 #include <asm/mach/time.h>
 
-extern void sa1100_cpu_suspend(long);
+extern int sa1100_finish_suspend(unsigned long);
 
 #define SAVE(x)		sleep_save[SLEEP_SAVE_##x] = x
 #define RESTORE(x)	x = sleep_save[SLEEP_SAVE_##x]
@@ -75,9 +76,7 @@
 	PSPR = virt_to_phys(cpu_resume);
 
 	/* go zzz */
-	sa1100_cpu_suspend(PLAT_PHYS_OFFSET - PAGE_OFFSET);
-
-	cpu_init();
+	cpu_suspend(0, sa1100_finish_suspend);
 
 	/*
 	 * Ensure not to come back here if it wasn't intended
diff --git a/arch/arm/mach-sa1100/sleep.S b/arch/arm/mach-sa1100/sleep.S
index 04f2a61..e822331 100644
--- a/arch/arm/mach-sa1100/sleep.S
+++ b/arch/arm/mach-sa1100/sleep.S
@@ -22,18 +22,13 @@
 
 		.text
 /*
- * sa1100_cpu_suspend()
+ * sa1100_finish_suspend()
  *
  * Causes sa11x0 to enter sleep state
  *
  */
 
-ENTRY(sa1100_cpu_suspend)
-	stmfd	sp!, {r4 - r12, lr}		@ save registers on stack
-	mov	r1, r0
-	ldr	r3, =sa1100_cpu_resume		@ return function
-	bl	cpu_suspend
-
+ENTRY(sa1100_finish_suspend)
 	@ disable clock switching
 	mcr	p15, 0, r1, c15, c2, 2
 
@@ -139,13 +134,3 @@
 	str	r13, [r12]
 
 20:	b	20b			@ loop waiting for sleep
-
-/*
- * cpu_sa1100_resume()
- *
- * entry point from bootloader into kernel during resume
- */
-	.align 5
-sa1100_cpu_resume:
-	mcr	p15, 0, r1, c15, c1, 2		@ enable clock switching
-	ldmfd	sp!, {r4 - r12, pc}		@ return to caller
diff --git a/arch/arm/mach-shark/include/mach/entry-macro.S b/arch/arm/mach-shark/include/mach/entry-macro.S
index e2853c0..0bb6cc6 100644
--- a/arch/arm/mach-shark/include/mach/entry-macro.S
+++ b/arch/arm/mach-shark/include/mach/entry-macro.S
@@ -11,17 +11,17 @@
 		.endm
 
 		.macro  get_irqnr_preamble, base, tmp
+		mov	\base, #0xe0000000
 		.endm
 
 		.macro  arch_ret_to_user, tmp1, tmp2
 		.endm
 
 		.macro	get_irqnr_and_base, irqnr, irqstat, base, tmp
-		mov	r4, #0xe0000000
 
 		mov	\irqstat, #0x0C
-		strb	\irqstat, [r4, #0x20]		@outb(0x0C, 0x20) /* Poll command */
-		ldrb	\irqnr, [r4, #0x20]		@irq = inb(0x20) & 7
+		strb	\irqstat, [\base, #0x20]	@outb(0x0C, 0x20) /* Poll command */
+		ldrb	\irqnr, [\base, #0x20]		@irq = inb(0x20) & 7
 		and	\irqstat, \irqnr, #0x80
 		teq	\irqstat, #0
 		beq	43f
@@ -29,8 +29,8 @@
 		teq	\irqnr, #2
 		bne	44f
 43:		mov	\irqstat, #0x0C
-		strb	\irqstat, [r4, #0xa0]		@outb(0x0C, 0xA0) /* Poll command */
-		ldrb	\irqnr, [r4, #0xa0]		@irq = (inb(0xA0) & 7) + 8
+		strb	\irqstat, [\base, #0xa0]	@outb(0x0C, 0xA0) /* Poll command */
+		ldrb	\irqnr, [\base, #0xa0]		@irq = (inb(0xA0) & 7) + 8
 		and	\irqstat, \irqnr, #0x80
 		teq	\irqstat, #0
 		beq	44f
diff --git a/arch/arm/mach-shmobile/include/mach/sdhi-sh7372.h b/arch/arm/mach-shmobile/include/mach/sdhi-sh7372.h
new file mode 100644
index 0000000..4a81b01
--- /dev/null
+++ b/arch/arm/mach-shmobile/include/mach/sdhi-sh7372.h
@@ -0,0 +1,21 @@
+#ifndef SDHI_SH7372_H
+#define SDHI_SH7372_H
+
+#define SDGENCNTA       0xfe40009c
+
+/* The countdown of SDGENCNTA is controlled by
+ * ZB3D2CLK which runs at 149.5MHz.
+ * That is 149.5ticks/us. Approximate this as 150ticks/us.
+ */
+static void udelay(int us)
+{
+	__raw_writel(us * 150, SDGENCNTA);
+	while(__raw_readl(SDGENCNTA)) ;
+}
+
+static void msleep(int ms)
+{
+	udelay(ms * 1000);
+}
+
+#endif
diff --git a/arch/arm/mach-shmobile/include/mach/sdhi.h b/arch/arm/mach-shmobile/include/mach/sdhi.h
new file mode 100644
index 0000000..0ec9e69
--- /dev/null
+++ b/arch/arm/mach-shmobile/include/mach/sdhi.h
@@ -0,0 +1,16 @@
+#ifndef SDHI_H
+#define SDHI_H
+
+/**************************************************
+ *
+ *		CPU specific settings
+ *
+ **************************************************/
+
+#ifdef CONFIG_ARCH_SH7372
+#include "mach/sdhi-sh7372.h"
+#else
+#error "unsupported CPU."
+#endif
+
+#endif /* SDHI_H */
diff --git a/arch/arm/mach-shmobile/platsmp.c b/arch/arm/mach-shmobile/platsmp.c
index f3888fe..66f9806 100644
--- a/arch/arm/mach-shmobile/platsmp.c
+++ b/arch/arm/mach-shmobile/platsmp.c
@@ -64,10 +64,5 @@
 
 void __init platform_smp_prepare_cpus(unsigned int max_cpus)
 {
-	int i;
-
-	for (i = 0; i < max_cpus; i++)
-		set_cpu_present(i, true);
-
 	shmobile_smp_prepare_cpus();
 }
diff --git a/arch/arm/mach-tegra/platsmp.c b/arch/arm/mach-tegra/platsmp.c
index b8ae3c9..1a594dc 100644
--- a/arch/arm/mach-tegra/platsmp.c
+++ b/arch/arm/mach-tegra/platsmp.c
@@ -129,14 +129,6 @@
 
 void __init platform_smp_prepare_cpus(unsigned int max_cpus)
 {
-	int i;
-
-	/*
-	 * Initialise the present map, which describes the set of CPUs
-	 * actually populated at the present time.
-	 */
-	for (i = 0; i < max_cpus; i++)
-		set_cpu_present(i, true);
 
 	scu_enable(scu_base);
 }
diff --git a/arch/arm/mach-ux500/platsmp.c b/arch/arm/mach-ux500/platsmp.c
index 0c527fe..a33df5f 100644
--- a/arch/arm/mach-ux500/platsmp.c
+++ b/arch/arm/mach-ux500/platsmp.c
@@ -172,14 +172,6 @@
 
 void __init platform_smp_prepare_cpus(unsigned int max_cpus)
 {
-	int i;
-
-	/*
-	 * Initialise the present map, which describes the set of CPUs
-	 * actually populated at the present time.
-	 */
-	for (i = 0; i < max_cpus; i++)
-		set_cpu_present(i, true);
 
 	scu_enable(scu_base_addr());
 	wakeup_secondary();
diff --git a/arch/arm/mach-vexpress/ct-ca9x4.c b/arch/arm/mach-vexpress/ct-ca9x4.c
index 765a71f..bfd32f5 100644
--- a/arch/arm/mach-vexpress/ct-ca9x4.c
+++ b/arch/arm/mach-vexpress/ct-ca9x4.c
@@ -229,10 +229,6 @@
 
 static void ct_ca9x4_smp_enable(unsigned int max_cpus)
 {
-	int i;
-	for (i = 0; i < max_cpus; i++)
-		set_cpu_present(i, true);
-
 	scu_enable(MMIO_P2V(A9_MPCORE_SCU));
 }
 #endif
diff --git a/arch/arm/mm/abort-ev4.S b/arch/arm/mm/abort-ev4.S
index 4f18f9e..54473cd4 100644
--- a/arch/arm/mm/abort-ev4.S
+++ b/arch/arm/mm/abort-ev4.S
@@ -3,14 +3,11 @@
 /*
  * Function: v4_early_abort
  *
- * Params  : r2 = address of aborted instruction
- *         : r3 = saved SPSR
+ * Params  : r2 = pt_regs
+ *	   : r4 = aborted context pc
+ *	   : r5 = aborted context psr
  *
- * Returns : r0 = address of abort
- *	   : r1 = FSR, bit 11 = write
- *	   : r2-r8 = corrupted
- *	   : r9 = preserved
- *	   : sp = pointer to registers
+ * Returns : r4 - r11, r13 preserved
  *
  * Purpose : obtain information about current aborted instruction.
  * Note: we read user space.  This means we might cause a data
@@ -21,10 +18,8 @@
 ENTRY(v4_early_abort)
 	mrc	p15, 0, r1, c5, c0, 0		@ get FSR
 	mrc	p15, 0, r0, c6, c0, 0		@ get FAR
-	ldr	r3, [r2]			@ read aborted ARM instruction
+	ldr	r3, [r4]			@ read aborted ARM instruction
 	bic	r1, r1, #1 << 11 | 1 << 10	@ clear bits 11 and 10 of FSR
 	tst	r3, #1 << 20			@ L = 1 -> write?
 	orreq	r1, r1, #1 << 11		@ yes.
-	mov	pc, lr
-
-
+	b	do_DataAbort
diff --git a/arch/arm/mm/abort-ev4t.S b/arch/arm/mm/abort-ev4t.S
index b628254..9da704e 100644
--- a/arch/arm/mm/abort-ev4t.S
+++ b/arch/arm/mm/abort-ev4t.S
@@ -4,14 +4,11 @@
 /*
  * Function: v4t_early_abort
  *
- * Params  : r2 = address of aborted instruction
- *         : r3 = saved SPSR
+ * Params  : r2 = pt_regs
+ *	   : r4 = aborted context pc
+ *	   : r5 = aborted context psr
  *
- * Returns : r0 = address of abort
- *	   : r1 = FSR, bit 11 = write
- *	   : r2-r8 = corrupted
- *	   : r9 = preserved
- *	   : sp = pointer to registers
+ * Returns : r4 - r11, r13 preserved
  *
  * Purpose : obtain information about current aborted instruction.
  * Note: we read user space.  This means we might cause a data
@@ -22,9 +19,9 @@
 ENTRY(v4t_early_abort)
 	mrc	p15, 0, r1, c5, c0, 0		@ get FSR
 	mrc	p15, 0, r0, c6, c0, 0		@ get FAR
-	do_thumb_abort
-	ldreq	r3, [r2]			@ read aborted ARM instruction
+	do_thumb_abort fsr=r1, pc=r4, psr=r5, tmp=r3
+	ldreq	r3, [r4]			@ read aborted ARM instruction
 	bic	r1, r1, #1 << 11 | 1 << 10	@ clear bits 11 and 10 of FSR
 	tst	r3, #1 << 20			@ check write
 	orreq	r1, r1, #1 << 11
-	mov	pc, lr
+	b	do_DataAbort
diff --git a/arch/arm/mm/abort-ev5t.S b/arch/arm/mm/abort-ev5t.S
index 02251b5..a0908d4 100644
--- a/arch/arm/mm/abort-ev5t.S
+++ b/arch/arm/mm/abort-ev5t.S
@@ -4,14 +4,11 @@
 /*
  * Function: v5t_early_abort
  *
- * Params  : r2 = address of aborted instruction
- *         : r3 = saved SPSR
+ * Params  : r2 = pt_regs
+ *	   : r4 = aborted context pc
+ *	   : r5 = aborted context psr
  *
- * Returns : r0 = address of abort
- *	   : r1 = FSR, bit 11 = write
- *	   : r2-r8 = corrupted
- *	   : r9 = preserved
- *	   : sp = pointer to registers
+ * Returns : r4 - r11, r13 preserved
  *
  * Purpose : obtain information about current aborted instruction.
  * Note: we read user space.  This means we might cause a data
@@ -22,10 +19,10 @@
 ENTRY(v5t_early_abort)
 	mrc	p15, 0, r1, c5, c0, 0		@ get FSR
 	mrc	p15, 0, r0, c6, c0, 0		@ get FAR
-	do_thumb_abort
-	ldreq	r3, [r2]			@ read aborted ARM instruction
+	do_thumb_abort fsr=r1, pc=r4, psr=r5, tmp=r3
+	ldreq	r3, [r4]			@ read aborted ARM instruction
 	bic	r1, r1, #1 << 11		@ clear bits 11 of FSR
-	do_ldrd_abort
+	do_ldrd_abort tmp=ip, insn=r3
 	tst	r3, #1 << 20			@ check write
 	orreq	r1, r1, #1 << 11
-	mov	pc, lr
+	b	do_DataAbort
diff --git a/arch/arm/mm/abort-ev5tj.S b/arch/arm/mm/abort-ev5tj.S
index bce68d6..4006b7a 100644
--- a/arch/arm/mm/abort-ev5tj.S
+++ b/arch/arm/mm/abort-ev5tj.S
@@ -4,14 +4,11 @@
 /*
  * Function: v5tj_early_abort
  *
- * Params  : r2 = address of aborted instruction
- *         : r3 = saved SPSR
+ * Params  : r2 = pt_regs
+ *	   : r4 = aborted context pc
+ *	   : r5 = aborted context psr
  *
- * Returns : r0 = address of abort
- *	   : r1 = FSR, bit 11 = write
- *	   : r2-r8 = corrupted
- *	   : r9 = preserved
- *	   : sp = pointer to registers
+ * Returns : r4 - r11, r13 preserved
  *
  * Purpose : obtain information about current aborted instruction.
  * Note: we read user space.  This means we might cause a data
@@ -23,13 +20,11 @@
 	mrc	p15, 0, r1, c5, c0, 0		@ get FSR
 	mrc	p15, 0, r0, c6, c0, 0		@ get FAR
 	bic	r1, r1, #1 << 11 | 1 << 10	@ clear bits 11 and 10 of FSR
-	tst	r3, #PSR_J_BIT			@ Java?
-	movne	pc, lr
-	do_thumb_abort
-	ldreq	r3, [r2]			@ read aborted ARM instruction
-	do_ldrd_abort
+	tst	r5, #PSR_J_BIT			@ Java?
+	bne	do_DataAbort
+	do_thumb_abort fsr=r1, pc=r4, psr=r5, tmp=r3
+	ldreq	r3, [r4]			@ read aborted ARM instruction
+	do_ldrd_abort tmp=ip, insn=r3
 	tst	r3, #1 << 20			@ L = 0 -> write
 	orreq	r1, r1, #1 << 11		@ yes.
-	mov	pc, lr
-
-
+	b	do_DataAbort
diff --git a/arch/arm/mm/abort-ev6.S b/arch/arm/mm/abort-ev6.S
index 1478aa5..ff1f7cc 100644
--- a/arch/arm/mm/abort-ev6.S
+++ b/arch/arm/mm/abort-ev6.S
@@ -4,14 +4,11 @@
 /*
  * Function: v6_early_abort
  *
- * Params  : r2 = address of aborted instruction
- *         : r3 = saved SPSR
+ * Params  : r2 = pt_regs
+ *	   : r4 = aborted context pc
+ *	   : r5 = aborted context psr
  *
- * Returns : r0 = address of abort
- *	   : r1 = FSR, bit 11 = write
- *	   : r2-r8 = corrupted
- *	   : r9 = preserved
- *	   : sp = pointer to registers
+ * Returns : r4 - r11, r13 preserved
  *
  * Purpose : obtain information about current aborted instruction.
  * Note: we read user space.  This means we might cause a data
@@ -33,16 +30,14 @@
  * The test below covers all the write situations, including Java bytecodes
  */
 	bic	r1, r1, #1 << 11		@ clear bit 11 of FSR
-	tst	r3, #PSR_J_BIT			@ Java?
-	movne	pc, lr
-	do_thumb_abort
-	ldreq	r3, [r2]			@ read aborted ARM instruction
+	tst	r5, #PSR_J_BIT			@ Java?
+	bne	do_DataAbort
+	do_thumb_abort fsr=r1, pc=r4, psr=r5, tmp=r3
+	ldreq	r3, [r4]			@ read aborted ARM instruction
 #ifdef CONFIG_CPU_ENDIAN_BE8
 	reveq	r3, r3
 #endif
-	do_ldrd_abort
+	do_ldrd_abort tmp=ip, insn=r3
 	tst	r3, #1 << 20			@ L = 0 -> write
 	orreq	r1, r1, #1 << 11		@ yes.
-	mov	pc, lr
-
-
+	b	do_DataAbort
diff --git a/arch/arm/mm/abort-ev7.S b/arch/arm/mm/abort-ev7.S
index ec88b15..7033752 100644
--- a/arch/arm/mm/abort-ev7.S
+++ b/arch/arm/mm/abort-ev7.S
@@ -3,14 +3,11 @@
 /*
  * Function: v7_early_abort
  *
- * Params  : r2 = address of aborted instruction
- *         : r3 = saved SPSR
+ * Params  : r2 = pt_regs
+ *	   : r4 = aborted context pc
+ *	   : r5 = aborted context psr
  *
- * Returns : r0 = address of abort
- *	   : r1 = FSR, bit 11 = write
- *	   : r2-r8 = corrupted
- *	   : r9 = preserved
- *	   : sp = pointer to registers
+ * Returns : r4 - r11, r13 preserved
  *
  * Purpose : obtain information about current aborted instruction.
  */
@@ -37,18 +34,18 @@
 	ldr	r3, =0x40d			@ On permission fault
 	and	r3, r1, r3
 	cmp	r3, #0x0d
-	movne	pc, lr
+	bne	do_DataAbort
 
 	mcr	p15, 0, r0, c7, c8, 0   	@ Retranslate FAR
 	isb
-	mrc	p15, 0, r2, c7, c4, 0   	@ Read the PAR
-	and	r3, r2, #0x7b   		@ On translation fault
+	mrc	p15, 0, ip, c7, c4, 0   	@ Read the PAR
+	and	r3, ip, #0x7b   		@ On translation fault
 	cmp	r3, #0x0b
-	movne	pc, lr
+	bne	do_DataAbort
 	bic	r1, r1, #0xf			@ Fix up FSR FS[5:0]
-	and	r2, r2, #0x7e
-	orr	r1, r1, r2, LSR #1
+	and	ip, ip, #0x7e
+	orr	r1, r1, ip, LSR #1
 #endif
 
-	mov	pc, lr
+	b	do_DataAbort
 ENDPROC(v7_early_abort)
diff --git a/arch/arm/mm/abort-lv4t.S b/arch/arm/mm/abort-lv4t.S
index 9fb7b0e..f398258 100644
--- a/arch/arm/mm/abort-lv4t.S
+++ b/arch/arm/mm/abort-lv4t.S
@@ -3,14 +3,11 @@
 /*
  * Function: v4t_late_abort
  *
- * Params  : r2 = address of aborted instruction
- *         : r3 = saved SPSR
+ * Params  : r2 = pt_regs
+ *	   : r4 = aborted context pc
+ *	   : r5 = aborted context psr
  *
- * Returns : r0 = address of abort
- *	   : r1 = FSR, bit 11 = write
- *	   : r2-r8 = corrupted
- *	   : r9 = preserved
- *	   : sp = pointer to registers
+ * Returns : r4-r5, r10-r11, r13 preserved
  *
  * Purpose : obtain information about current aborted instruction.
  * Note: we read user space.  This means we might cause a data
@@ -18,7 +15,7 @@
  * picture.  Unfortunately, this does happen.  We live with it.
  */
 ENTRY(v4t_late_abort)
-	tst	r3, #PSR_T_BIT			@ check for thumb mode
+	tst	r5, #PSR_T_BIT			@ check for thumb mode
 #ifdef CONFIG_CPU_CP15_MMU
 	mrc	p15, 0, r1, c5, c0, 0		@ get FSR
 	mrc	p15, 0, r0, c6, c0, 0		@ get FAR
@@ -28,7 +25,7 @@
 	mov	r1, #0
 #endif
 	bne	.data_thumb_abort
-	ldr	r8, [r2]			@ read arm instruction
+	ldr	r8, [r4]			@ read arm instruction
 	tst	r8, #1 << 20			@ L = 1 -> write?
 	orreq	r1, r1, #1 << 11		@ yes.
 	and	r7, r8, #15 << 24
@@ -47,86 +44,84 @@
 /* 9 */	b	.data_arm_ldmstm		@ ldm*b	rn, <rlist>
 /* a */	b	.data_unknown
 /* b */	b	.data_unknown
-/* c */	mov	pc, lr				@ ldc	rd, [rn], #m	@ Same as ldr	rd, [rn], #m
-/* d */	mov	pc, lr				@ ldc	rd, [rn, #m]
+/* c */	b	do_DataAbort			@ ldc	rd, [rn], #m	@ Same as ldr	rd, [rn], #m
+/* d */	b	do_DataAbort			@ ldc	rd, [rn, #m]
 /* e */	b	.data_unknown
 /* f */
 .data_unknown:	@ Part of jumptable
-	mov	r0, r2
+	mov	r0, r4
 	mov	r1, r8
-	mov	r2, sp
-	bl	baddataabort
-	b	ret_from_exception
+	b	baddataabort
 
 .data_arm_ldmstm:
 	tst	r8, #1 << 21			@ check writeback bit
-	moveq	pc, lr				@ no writeback -> no fixup
+	beq	do_DataAbort			@ no writeback -> no fixup
 	mov	r7, #0x11
 	orr	r7, r7, #0x1100
 	and	r6, r8, r7
-	and	r2, r8, r7, lsl #1
-	add	r6, r6, r2, lsr #1
-	and	r2, r8, r7, lsl #2
-	add	r6, r6, r2, lsr #2
-	and	r2, r8, r7, lsl #3
-	add	r6, r6, r2, lsr #3
+	and	r9, r8, r7, lsl #1
+	add	r6, r6, r9, lsr #1
+	and	r9, r8, r7, lsl #2
+	add	r6, r6, r9, lsr #2
+	and	r9, r8, r7, lsl #3
+	add	r6, r6, r9, lsr #3
 	add	r6, r6, r6, lsr #8
 	add	r6, r6, r6, lsr #4
 	and	r6, r6, #15			@ r6 = no. of registers to transfer.
-	and	r5, r8, #15 << 16		@ Extract 'n' from instruction
-	ldr	r7, [sp, r5, lsr #14]		@ Get register 'Rn'
+	and	r9, r8, #15 << 16		@ Extract 'n' from instruction
+	ldr	r7, [r2, r9, lsr #14]		@ Get register 'Rn'
 	tst	r8, #1 << 23			@ Check U bit
 	subne	r7, r7, r6, lsl #2		@ Undo increment
 	addeq	r7, r7, r6, lsl #2		@ Undo decrement
-	str	r7, [sp, r5, lsr #14]		@ Put register 'Rn'
-	mov	pc, lr
+	str	r7, [r2, r9, lsr #14]		@ Put register 'Rn'
+	b	do_DataAbort
 
 .data_arm_lateldrhpre:
 	tst	r8, #1 << 21			@ Check writeback bit
-	moveq	pc, lr				@ No writeback -> no fixup
+	beq	do_DataAbort			@ No writeback -> no fixup
 .data_arm_lateldrhpost:
-	and	r5, r8, #0x00f			@ get Rm / low nibble of immediate value
+	and	r9, r8, #0x00f			@ get Rm / low nibble of immediate value
 	tst	r8, #1 << 22			@ if (immediate offset)
 	andne	r6, r8, #0xf00			@ { immediate high nibble
-	orrne	r6, r5, r6, lsr #4		@   combine nibbles } else
-	ldreq	r6, [sp, r5, lsl #2]		@ { load Rm value }
+	orrne	r6, r9, r6, lsr #4		@   combine nibbles } else
+	ldreq	r6, [r2, r9, lsl #2]		@ { load Rm value }
 .data_arm_apply_r6_and_rn:
-	and	r5, r8, #15 << 16		@ Extract 'n' from instruction
-	ldr	r7, [sp, r5, lsr #14]		@ Get register 'Rn'
+	and	r9, r8, #15 << 16		@ Extract 'n' from instruction
+	ldr	r7, [r2, r9, lsr #14]		@ Get register 'Rn'
 	tst	r8, #1 << 23			@ Check U bit
 	subne	r7, r7, r6			@ Undo incrmenet
 	addeq	r7, r7, r6			@ Undo decrement
-	str	r7, [sp, r5, lsr #14]		@ Put register 'Rn'
-	mov	pc, lr
+	str	r7, [r2, r9, lsr #14]		@ Put register 'Rn'
+	b	do_DataAbort
 
 .data_arm_lateldrpreconst:
 	tst	r8, #1 << 21			@ check writeback bit
-	moveq	pc, lr				@ no writeback -> no fixup
+	beq	do_DataAbort			@ no writeback -> no fixup
 .data_arm_lateldrpostconst:
-	movs	r2, r8, lsl #20			@ Get offset
-	moveq	pc, lr				@ zero -> no fixup
-	and	r5, r8, #15 << 16		@ Extract 'n' from instruction
-	ldr	r7, [sp, r5, lsr #14]		@ Get register 'Rn'
+	movs	r6, r8, lsl #20			@ Get offset
+	beq	do_DataAbort			@ zero -> no fixup
+	and	r9, r8, #15 << 16		@ Extract 'n' from instruction
+	ldr	r7, [r2, r9, lsr #14]		@ Get register 'Rn'
 	tst	r8, #1 << 23			@ Check U bit
-	subne	r7, r7, r2, lsr #20		@ Undo increment
-	addeq	r7, r7, r2, lsr #20		@ Undo decrement
-	str	r7, [sp, r5, lsr #14]		@ Put register 'Rn'
-	mov	pc, lr
+	subne	r7, r7, r6, lsr #20		@ Undo increment
+	addeq	r7, r7, r6, lsr #20		@ Undo decrement
+	str	r7, [r2, r9, lsr #14]		@ Put register 'Rn'
+	b	do_DataAbort
 
 .data_arm_lateldrprereg:
 	tst	r8, #1 << 21			@ check writeback bit
-	moveq	pc, lr				@ no writeback -> no fixup
+	beq	do_DataAbort			@ no writeback -> no fixup
 .data_arm_lateldrpostreg:
 	and	r7, r8, #15			@ Extract 'm' from instruction
-	ldr	r6, [sp, r7, lsl #2]		@ Get register 'Rm'
-	mov	r5, r8, lsr #7			@ get shift count
-	ands	r5, r5, #31
+	ldr	r6, [r2, r7, lsl #2]		@ Get register 'Rm'
+	mov	r9, r8, lsr #7			@ get shift count
+	ands	r9, r9, #31
 	and	r7, r8, #0x70			@ get shift type
 	orreq	r7, r7, #8			@ shift count = 0
 	add	pc, pc, r7
 	nop
 
-	mov	r6, r6, lsl r5			@ 0: LSL #!0
+	mov	r6, r6, lsl r9			@ 0: LSL #!0
 	b	.data_arm_apply_r6_and_rn
 	b	.data_arm_apply_r6_and_rn	@ 1: LSL #0
 	nop
@@ -134,7 +129,7 @@
 	nop
 	b	.data_unknown			@ 3: MUL?
 	nop
-	mov	r6, r6, lsr r5			@ 4: LSR #!0
+	mov	r6, r6, lsr r9			@ 4: LSR #!0
 	b	.data_arm_apply_r6_and_rn
 	mov	r6, r6, lsr #32			@ 5: LSR #32
 	b	.data_arm_apply_r6_and_rn
@@ -142,7 +137,7 @@
 	nop
 	b	.data_unknown			@ 7: MUL?
 	nop
-	mov	r6, r6, asr r5			@ 8: ASR #!0
+	mov	r6, r6, asr r9			@ 8: ASR #!0
 	b	.data_arm_apply_r6_and_rn
 	mov	r6, r6, asr #32			@ 9: ASR #32
 	b	.data_arm_apply_r6_and_rn
@@ -150,7 +145,7 @@
 	nop
 	b	.data_unknown			@ B: MUL?
 	nop
-	mov	r6, r6, ror r5			@ C: ROR #!0
+	mov	r6, r6, ror r9			@ C: ROR #!0
 	b	.data_arm_apply_r6_and_rn
 	mov	r6, r6, rrx			@ D: RRX
 	b	.data_arm_apply_r6_and_rn
@@ -159,7 +154,7 @@
 	b	.data_unknown			@ F: MUL?
 
 .data_thumb_abort:
-	ldrh	r8, [r2]			@ read instruction
+	ldrh	r8, [r4]			@ read instruction
 	tst	r8, #1 << 11			@ L = 1 -> write?
 	orreq	r1, r1, #1 << 8			@ yes
 	and	r7, r8, #15 << 12
@@ -172,10 +167,10 @@
 /* 3 */	b	.data_unknown
 /* 4 */	b	.data_unknown
 /* 5 */	b	.data_thumb_reg
-/* 6 */	mov	pc, lr
-/* 7 */	mov	pc, lr
-/* 8 */	mov	pc, lr
-/* 9 */	mov	pc, lr
+/* 6 */	b	do_DataAbort
+/* 7 */	b	do_DataAbort
+/* 8 */	b	do_DataAbort
+/* 9 */	b	do_DataAbort
 /* A */	b	.data_unknown
 /* B */	b	.data_thumb_pushpop
 /* C */	b	.data_thumb_ldmstm
@@ -185,41 +180,41 @@
 
 .data_thumb_reg:
 	tst	r8, #1 << 9
-	moveq	pc, lr
+	beq	do_DataAbort
 	tst	r8, #1 << 10			@ If 'S' (signed) bit is set
 	movne	r1, #0				@ it must be a load instr
-	mov	pc, lr
+	b	do_DataAbort
 
 .data_thumb_pushpop:
 	tst	r8, #1 << 10
 	beq	.data_unknown
 	and	r6, r8, #0x55			@ hweight8(r8) + R bit
-	and	r2, r8, #0xaa
-	add	r6, r6, r2, lsr #1
-	and	r2, r6, #0xcc
+	and	r9, r8, #0xaa
+	add	r6, r6, r9, lsr #1
+	and	r9, r6, #0xcc
 	and	r6, r6, #0x33
-	add	r6, r6, r2, lsr #2
+	add	r6, r6, r9, lsr #2
 	movs	r7, r8, lsr #9			@ C = r8 bit 8 (R bit)
 	adc	r6, r6, r6, lsr #4		@ high + low nibble + R bit
 	and	r6, r6, #15			@ number of regs to transfer
-	ldr	r7, [sp, #13 << 2]
+	ldr	r7, [r2, #13 << 2]
 	tst	r8, #1 << 11
 	addeq	r7, r7, r6, lsl #2		@ increment SP if PUSH
 	subne	r7, r7, r6, lsl #2		@ decrement SP if POP
-	str	r7, [sp, #13 << 2]
-	mov	pc, lr
+	str	r7, [r2, #13 << 2]
+	b	do_DataAbort
 
 .data_thumb_ldmstm:
 	and	r6, r8, #0x55			@ hweight8(r8)
-	and	r2, r8, #0xaa
-	add	r6, r6, r2, lsr #1
-	and	r2, r6, #0xcc
+	and	r9, r8, #0xaa
+	add	r6, r6, r9, lsr #1
+	and	r9, r6, #0xcc
 	and	r6, r6, #0x33
-	add	r6, r6, r2, lsr #2
+	add	r6, r6, r9, lsr #2
 	add	r6, r6, r6, lsr #4
-	and	r5, r8, #7 << 8
-	ldr	r7, [sp, r5, lsr #6]
+	and	r9, r8, #7 << 8
+	ldr	r7, [r2, r9, lsr #6]
 	and	r6, r6, #15			@ number of regs to transfer
 	sub	r7, r7, r6, lsl #2		@ always decrement
-	str	r7, [sp, r5, lsr #6]
-	mov	pc, lr
+	str	r7, [r2, r9, lsr #6]
+	b	do_DataAbort
diff --git a/arch/arm/mm/abort-macro.S b/arch/arm/mm/abort-macro.S
index d7cb1bf..52162d5 100644
--- a/arch/arm/mm/abort-macro.S
+++ b/arch/arm/mm/abort-macro.S
@@ -9,34 +9,32 @@
  *
  */
 
-	.macro	do_thumb_abort
-	tst	r3, #PSR_T_BIT
+	.macro	do_thumb_abort, fsr, pc, psr, tmp
+	tst	\psr, #PSR_T_BIT
 	beq	not_thumb
-	ldrh	r3, [r2]			@ Read aborted Thumb instruction
-	and	r3, r3, # 0xfe00		@ Mask opcode field
-	cmp	r3, # 0x5600			@ Is it ldrsb?
-	orreq	r3, r3, #1 << 11		@ Set L-bit if yes
-	tst	r3, #1 << 11			@ L = 0 -> write
-	orreq	r1, r1, #1 << 11		@ yes.
-	mov	pc, lr
+	ldrh	\tmp, [\pc]			@ Read aborted Thumb instruction
+	and	\tmp, \tmp, # 0xfe00		@ Mask opcode field
+	cmp	\tmp, # 0x5600			@ Is it ldrsb?
+	orreq	\tmp, \tmp, #1 << 11		@ Set L-bit if yes
+	tst	\tmp, #1 << 11			@ L = 0 -> write
+	orreq	\psr, \psr, #1 << 11		@ yes.
+	b	do_DataAbort
 not_thumb:
 	.endm
 
 /*
- * We check for the following insturction encoding for LDRD.
+ * We check for the following instruction encoding for LDRD.
  *
- * [27:25] == 0
+ * [27:25] == 000
  *   [7:4] == 1101
  *    [20] == 0
  */
- 	.macro	do_ldrd_abort
- 	tst	r3, #0x0e000000			@ [27:25] == 0
+	.macro	do_ldrd_abort, tmp, insn
+	tst	\insn, #0x0e100000		@ [27:25,20] == 0
 	bne	not_ldrd
-	and	r2, r3, #0x000000f0		@ [7:4] == 1101
-	cmp	r2, #0x000000d0
-	bne	not_ldrd
-	tst	r3, #1 << 20			@ [20] == 0
-	moveq	pc, lr
+	and	\tmp, \insn, #0x000000f0	@ [7:4] == 1101
+	cmp	\tmp, #0x000000d0
+	beq	do_DataAbort
 not_ldrd:
 	.endm
 
diff --git a/arch/arm/mm/abort-nommu.S b/arch/arm/mm/abort-nommu.S
index 625e580..119cb47 100644
--- a/arch/arm/mm/abort-nommu.S
+++ b/arch/arm/mm/abort-nommu.S
@@ -3,11 +3,11 @@
 /*
  * Function: nommu_early_abort
  *
- * Params  : r2 = address of aborted instruction
- *         : r3 = saved SPSR
+ * Params  : r2 = pt_regs
+ *	   : r4 = aborted context pc
+ *	   : r5 = aborted context psr
  *
- * Returns : r0 = 0 (abort address)
- *	   : r1 = 0 (FSR)
+ * Returns : r4 - r11, r13 preserved
  *
  * Note: There is no FSR/FAR on !CPU_CP15_MMU cores.
  *       Just fill zero into the registers.
@@ -16,5 +16,5 @@
 ENTRY(nommu_early_abort)
 	mov	r0, #0				@ clear r0, r1 (no FSR/FAR)
 	mov	r1, #0
-	mov	pc, lr
+	b	do_DataAbort
 ENDPROC(nommu_early_abort)
diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c
index 724ba3b..be7c638 100644
--- a/arch/arm/mm/alignment.c
+++ b/arch/arm/mm/alignment.c
@@ -727,6 +727,9 @@
 	int isize = 4;
 	int thumb2_32b = 0;
 
+	if (interrupts_enabled(regs))
+		local_irq_enable();
+
 	instrptr = instruction_pointer(regs);
 
 	fs = get_fs();
diff --git a/arch/arm/mm/copypage-v6.c b/arch/arm/mm/copypage-v6.c
index bdba6c6..63cca00 100644
--- a/arch/arm/mm/copypage-v6.c
+++ b/arch/arm/mm/copypage-v6.c
@@ -41,7 +41,6 @@
 	kfrom = kmap_atomic(from, KM_USER0);
 	kto = kmap_atomic(to, KM_USER1);
 	copy_page(kto, kfrom);
-	__cpuc_flush_dcache_area(kto, PAGE_SIZE);
 	kunmap_atomic(kto, KM_USER1);
 	kunmap_atomic(kfrom, KM_USER0);
 }
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index bc0e1d8..55657c2 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -94,7 +94,7 @@
 
 		pud = pud_offset(pgd, addr);
 		if (PTRS_PER_PUD != 1)
-			printk(", *pud=%08lx", pud_val(*pud));
+			printk(", *pud=%08llx", (long long)pud_val(*pud));
 
 		if (pud_none(*pud))
 			break;
@@ -285,6 +285,10 @@
 	tsk = current;
 	mm  = tsk->mm;
 
+	/* Enable interrupts if they were enabled in the parent context. */
+	if (interrupts_enabled(regs))
+		local_irq_enable();
+
 	/*
 	 * If we're in an interrupt or have no user
 	 * context, we must not take the fault..
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index 90a38c6..2fee782 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -434,6 +434,17 @@
 	return pages;
 }
 
+/*
+ * Poison init memory with an undefined instruction (ARM) or a branch to an
+ * undefined instruction (Thumb).
+ */
+static inline void poison_init_mem(void *s, size_t count)
+{
+	u32 *p = (u32 *)s;
+	while ((count = count - 4))
+		*p++ = 0xe7fddef0;
+}
+
 static inline void
 free_memmap(unsigned long start_pfn, unsigned long end_pfn)
 {
@@ -651,8 +662,8 @@
 			"    pkmap   : 0x%08lx - 0x%08lx   (%4ld MB)\n"
 #endif
 			"    modules : 0x%08lx - 0x%08lx   (%4ld MB)\n"
-			"      .init : 0x%p" " - 0x%p" "   (%4d kB)\n"
 			"      .text : 0x%p" " - 0x%p" "   (%4d kB)\n"
+			"      .init : 0x%p" " - 0x%p" "   (%4d kB)\n"
 			"      .data : 0x%p" " - 0x%p" "   (%4d kB)\n"
 			"       .bss : 0x%p" " - 0x%p" "   (%4d kB)\n",
 
@@ -674,8 +685,8 @@
 #endif
 			MLM(MODULES_VADDR, MODULES_END),
 
-			MLK_ROUNDUP(__init_begin, __init_end),
 			MLK_ROUNDUP(_text, _etext),
+			MLK_ROUNDUP(__init_begin, __init_end),
 			MLK_ROUNDUP(_sdata, _edata),
 			MLK_ROUNDUP(__bss_start, __bss_stop));
 
@@ -716,11 +727,13 @@
 #ifdef CONFIG_HAVE_TCM
 	extern char __tcm_start, __tcm_end;
 
+	poison_init_mem(&__tcm_start, &__tcm_end - &__tcm_start);
 	totalram_pages += free_area(__phys_to_pfn(__pa(&__tcm_start)),
 				    __phys_to_pfn(__pa(&__tcm_end)),
 				    "TCM link");
 #endif
 
+	poison_init_mem(__init_begin, __init_end - __init_begin);
 	if (!machine_is_integrator() && !machine_is_cintegrator())
 		totalram_pages += free_area(__phys_to_pfn(__pa(__init_begin)),
 					    __phys_to_pfn(__pa(__init_end)),
@@ -733,10 +746,12 @@
 
 void free_initrd_mem(unsigned long start, unsigned long end)
 {
-	if (!keep_initrd)
+	if (!keep_initrd) {
+		poison_init_mem((void *)start, PAGE_ALIGN(end) - start);
 		totalram_pages += free_area(__phys_to_pfn(__pa(start)),
 					    __phys_to_pfn(__pa(end)),
 					    "initrd");
+	}
 }
 
 static int __init keepinitrd_setup(char *__unused)
diff --git a/arch/arm/mm/pabort-legacy.S b/arch/arm/mm/pabort-legacy.S
index 87970eb..8bbff02 100644
--- a/arch/arm/mm/pabort-legacy.S
+++ b/arch/arm/mm/pabort-legacy.S
@@ -4,16 +4,18 @@
 /*
  * Function: legacy_pabort
  *
- * Params  : r0 = address of aborted instruction
+ * Params  : r2 = pt_regs
+ *	   : r4 = address of aborted instruction
+ *	   : r5 = psr for parent context
  *
- * Returns : r0 = address of abort
- *	   : r1 = Simulated IFSR with section translation fault status
+ * Returns : r4 - r11, r13 preserved
  *
  * Purpose : obtain information about current prefetch abort.
  */
 
 	.align	5
 ENTRY(legacy_pabort)
+	mov	r0, r4
 	mov	r1, #5
-	mov	pc, lr
+	b	do_PrefetchAbort
 ENDPROC(legacy_pabort)
diff --git a/arch/arm/mm/pabort-v6.S b/arch/arm/mm/pabort-v6.S
index 06e3d1e..9627646 100644
--- a/arch/arm/mm/pabort-v6.S
+++ b/arch/arm/mm/pabort-v6.S
@@ -4,16 +4,18 @@
 /*
  * Function: v6_pabort
  *
- * Params  : r0 = address of aborted instruction
+ * Params  : r2 = pt_regs
+ *	   : r4 = address of aborted instruction
+ *	   : r5 = psr for parent context
  *
- * Returns : r0 = address of abort
- *	   : r1 = IFSR
+ * Returns : r4 - r11, r13 preserved
  *
  * Purpose : obtain information about current prefetch abort.
  */
 
 	.align	5
 ENTRY(v6_pabort)
+	mov	r0, r4
 	mrc	p15, 0, r1, c5, c0, 1		@ get IFSR
-	mov	pc, lr
+	b	do_PrefetchAbort
 ENDPROC(v6_pabort)
diff --git a/arch/arm/mm/pabort-v7.S b/arch/arm/mm/pabort-v7.S
index a8b3b30..875761f 100644
--- a/arch/arm/mm/pabort-v7.S
+++ b/arch/arm/mm/pabort-v7.S
@@ -2,12 +2,13 @@
 #include <asm/assembler.h>
 
 /*
- * Function: v6_pabort
+ * Function: v7_pabort
  *
- * Params  : r0 = address of aborted instruction
+ * Params  : r2 = pt_regs
+ *	   : r4 = address of aborted instruction
+ *	   : r5 = psr for parent context
  *
- * Returns : r0 = address of abort
- *	   : r1 = IFSR
+ * Returns : r4 - r11, r13 preserved
  *
  * Purpose : obtain information about current prefetch abort.
  */
@@ -16,5 +17,5 @@
 ENTRY(v7_pabort)
 	mrc	p15, 0, r0, c6, c0, 2		@ get IFAR
 	mrc	p15, 0, r1, c5, c0, 1		@ get IFSR
-	mov	pc, lr
+	b	do_PrefetchAbort
 ENDPROC(v7_pabort)
diff --git a/arch/arm/mm/proc-arm6_7.S b/arch/arm/mm/proc-arm6_7.S
index ebc0ca7..e5b974c 100644
--- a/arch/arm/mm/proc-arm6_7.S
+++ b/arch/arm/mm/proc-arm6_7.S
@@ -29,19 +29,19 @@
 /*
  * Function: arm6_7_data_abort ()
  *
- * Params  : r2 = address of aborted instruction
- *	   : sp = pointer to registers
+ * Params  : r2 = pt_regs
+ *	   : r4 = aborted context pc
+ *	   : r5 = aborted context psr
  *
  * Purpose : obtain information about current aborted instruction
  *
- * Returns : r0 = address of abort
- *	   : r1 = FSR
+ * Returns : r4-r5, r10-r11, r13 preserved
  */
 
 ENTRY(cpu_arm7_data_abort)
 	mrc	p15, 0, r1, c5, c0, 0		@ get FSR
 	mrc	p15, 0, r0, c6, c0, 0		@ get FAR
-	ldr	r8, [r2]			@ read arm instruction
+	ldr	r8, [r4]			@ read arm instruction
 	tst	r8, #1 << 20			@ L = 0 -> write?
 	orreq	r1, r1, #1 << 11		@ yes.
 	and	r7, r8, #15 << 24
@@ -49,7 +49,7 @@
 	nop
 
 /* 0 */	b	.data_unknown
-/* 1 */	mov	pc, lr				@ swp
+/* 1 */	b	do_DataAbort			@ swp
 /* 2 */	b	.data_unknown
 /* 3 */	b	.data_unknown
 /* 4 */	b	.data_arm_lateldrpostconst	@ ldr	rd, [rn], #m
@@ -60,87 +60,85 @@
 /* 9 */	b	.data_arm_ldmstm		@ ldm*b	rn, <rlist>
 /* a */	b	.data_unknown
 /* b */	b	.data_unknown
-/* c */	mov	pc, lr				@ ldc	rd, [rn], #m	@ Same as ldr	rd, [rn], #m
-/* d */	mov	pc, lr				@ ldc	rd, [rn, #m]
+/* c */	b	do_DataAbort			@ ldc	rd, [rn], #m	@ Same as ldr	rd, [rn], #m
+/* d */	b	do_DataAbort			@ ldc	rd, [rn, #m]
 /* e */	b	.data_unknown
 /* f */
 .data_unknown:	@ Part of jumptable
-	mov	r0, r2
+	mov	r0, r4
 	mov	r1, r8
-	mov	r2, sp
-	bl	baddataabort
-	b	ret_from_exception
+	b	baddataabort
 
 ENTRY(cpu_arm6_data_abort)
 	mrc	p15, 0, r1, c5, c0, 0		@ get FSR
 	mrc	p15, 0, r0, c6, c0, 0		@ get FAR
-	ldr	r8, [r2]			@ read arm instruction
+	ldr	r8, [r4]			@ read arm instruction
 	tst	r8, #1 << 20			@ L = 0 -> write?
 	orreq	r1, r1, #1 << 11		@ yes.
 	and	r7, r8, #14 << 24
 	teq	r7, #8 << 24			@ was it ldm/stm
-	movne	pc, lr
+	bne	do_DataAbort
 
 .data_arm_ldmstm:
 	tst	r8, #1 << 21			@ check writeback bit
-	moveq	pc, lr				@ no writeback -> no fixup
+	beq	do_DataAbort			@ no writeback -> no fixup
 	mov	r7, #0x11
 	orr	r7, r7, #0x1100
 	and	r6, r8, r7
-	and	r2, r8, r7, lsl #1
-	add	r6, r6, r2, lsr #1
-	and	r2, r8, r7, lsl #2
-	add	r6, r6, r2, lsr #2
-	and	r2, r8, r7, lsl #3
-	add	r6, r6, r2, lsr #3
+	and	r9, r8, r7, lsl #1
+	add	r6, r6, r9, lsr #1
+	and	r9, r8, r7, lsl #2
+	add	r6, r6, r9, lsr #2
+	and	r9, r8, r7, lsl #3
+	add	r6, r6, r9, lsr #3
 	add	r6, r6, r6, lsr #8
 	add	r6, r6, r6, lsr #4
 	and	r6, r6, #15			@ r6 = no. of registers to transfer.
-	and	r5, r8, #15 << 16		@ Extract 'n' from instruction
-	ldr	r7, [sp, r5, lsr #14]		@ Get register 'Rn'
+	and	r9, r8, #15 << 16		@ Extract 'n' from instruction
+	ldr	r7, [r2, r9, lsr #14]		@ Get register 'Rn'
 	tst	r8, #1 << 23			@ Check U bit
 	subne	r7, r7, r6, lsl #2		@ Undo increment
 	addeq	r7, r7, r6, lsl #2		@ Undo decrement
-	str	r7, [sp, r5, lsr #14]		@ Put register 'Rn'
-	mov	pc, lr
+	str	r7, [r2, r9, lsr #14]		@ Put register 'Rn'
+	b	do_DataAbort
 
 .data_arm_apply_r6_and_rn:
-	and	r5, r8, #15 << 16		@ Extract 'n' from instruction
-	ldr	r7, [sp, r5, lsr #14]		@ Get register 'Rn'
+	and	r9, r8, #15 << 16		@ Extract 'n' from instruction
+	ldr	r7, [r2, r9, lsr #14]		@ Get register 'Rn'
 	tst	r8, #1 << 23			@ Check U bit
 	subne	r7, r7, r6			@ Undo incrmenet
 	addeq	r7, r7, r6			@ Undo decrement
-	str	r7, [sp, r5, lsr #14]		@ Put register 'Rn'
-	mov	pc, lr
+	str	r7, [r2, r9, lsr #14]		@ Put register 'Rn'
+	b	do_DataAbort
 
 .data_arm_lateldrpreconst:
 	tst	r8, #1 << 21			@ check writeback bit
-	moveq	pc, lr				@ no writeback -> no fixup
+	beq	do_DataAbort			@ no writeback -> no fixup
 .data_arm_lateldrpostconst:
-	movs	r2, r8, lsl #20			@ Get offset
-	moveq	pc, lr				@ zero -> no fixup
-	and	r5, r8, #15 << 16		@ Extract 'n' from instruction
-	ldr	r7, [sp, r5, lsr #14]		@ Get register 'Rn'
+	movs	r6, r8, lsl #20			@ Get offset
+	beq	do_DataAbort			@ zero -> no fixup
+	and	r9, r8, #15 << 16		@ Extract 'n' from instruction
+	ldr	r7, [r2, r9, lsr #14]		@ Get register 'Rn'
 	tst	r8, #1 << 23			@ Check U bit
-	subne	r7, r7, r2, lsr #20		@ Undo increment
-	addeq	r7, r7, r2, lsr #20		@ Undo decrement
-	str	r7, [sp, r5, lsr #14]		@ Put register 'Rn'
-	mov	pc, lr
+	subne	r7, r7, r6, lsr #20		@ Undo increment
+	addeq	r7, r7, r6, lsr #20		@ Undo decrement
+	str	r7, [r2, r9, lsr #14]		@ Put register 'Rn'
+	b	do_DataAbort
 
 .data_arm_lateldrprereg:
 	tst	r8, #1 << 21			@ check writeback bit
-	moveq	pc, lr				@ no writeback -> no fixup
+	beq	do_DataAbort			@ no writeback -> no fixup
 .data_arm_lateldrpostreg:
 	and	r7, r8, #15			@ Extract 'm' from instruction
-	ldr	r6, [sp, r7, lsl #2]		@ Get register 'Rm'
-	mov	r5, r8, lsr #7			@ get shift count
-	ands	r5, r5, #31
+	ldr	r6, [r2, r7, lsl #2]		@ Get register 'Rm'
+	mov	r9, r8, lsr #7			@ get shift count
+	ands	r9, r9, #31
 	and	r7, r8, #0x70			@ get shift type
 	orreq	r7, r7, #8			@ shift count = 0
 	add	pc, pc, r7
 	nop
 
-	mov	r6, r6, lsl r5			@ 0: LSL #!0
+	mov	r6, r6, lsl r9			@ 0: LSL #!0
 	b	.data_arm_apply_r6_and_rn
 	b	.data_arm_apply_r6_and_rn	@ 1: LSL #0
 	nop
@@ -148,7 +146,7 @@
 	nop
 	b	.data_unknown			@ 3: MUL?
 	nop
-	mov	r6, r6, lsr r5			@ 4: LSR #!0
+	mov	r6, r6, lsr r9			@ 4: LSR #!0
 	b	.data_arm_apply_r6_and_rn
 	mov	r6, r6, lsr #32			@ 5: LSR #32
 	b	.data_arm_apply_r6_and_rn
@@ -156,7 +154,7 @@
 	nop
 	b	.data_unknown			@ 7: MUL?
 	nop
-	mov	r6, r6, asr r5			@ 8: ASR #!0
+	mov	r6, r6, asr r9			@ 8: ASR #!0
 	b	.data_arm_apply_r6_and_rn
 	mov	r6, r6, asr #32			@ 9: ASR #32
 	b	.data_arm_apply_r6_and_rn
@@ -164,7 +162,7 @@
 	nop
 	b	.data_unknown			@ B: MUL?
 	nop
-	mov	r6, r6, ror r5			@ C: ROR #!0
+	mov	r6, r6, ror r9			@ C: ROR #!0
 	b	.data_arm_apply_r6_and_rn
 	mov	r6, r6, rrx			@ D: RRX
 	b	.data_arm_apply_r6_and_rn
diff --git a/arch/arm/mm/proc-sa1100.S b/arch/arm/mm/proc-sa1100.S
index e7158785..07219c2 100644
--- a/arch/arm/mm/proc-sa1100.S
+++ b/arch/arm/mm/proc-sa1100.S
@@ -34,7 +34,7 @@
  */
 #define DCACHELINESIZE	32
 
-	__INIT
+	.section .text
 
 /*
  * cpu_sa1100_proc_init()
@@ -45,8 +45,6 @@
 	mcr	p15, 0, r0, c9, c0, 5		@ Allow read-buffer operations from userland
 	mov	pc, lr
 
-	.section .text
-
 /*
  * cpu_sa1100_proc_fin()
  *
diff --git a/arch/arm/mm/tlb-fa.S b/arch/arm/mm/tlb-fa.S
index 7a2e56c..d3ddcf9 100644
--- a/arch/arm/mm/tlb-fa.S
+++ b/arch/arm/mm/tlb-fa.S
@@ -46,7 +46,6 @@
 	add	r0, r0, #PAGE_SZ
 	cmp	r0, r1
 	blo	1b
-	mcr	p15, 0, r3, c7, c5, 6		@ invalidate BTB
 	mcr	p15, 0, r3, c7, c10, 4		@ data write barrier
 	mov	pc, lr
 
@@ -60,9 +59,8 @@
 	add	r0, r0, #PAGE_SZ
 	cmp	r0, r1
 	blo	1b
-	mcr	p15, 0, r3, c7, c5, 6		@ invalidate BTB
 	mcr	p15, 0, r3, c7, c10, 4		@ data write barrier
-	mcr	p15, 0, r3, c7, c5, 4		@ prefetch flush
+	mcr	p15, 0, r3, c7, c5, 4		@ prefetch flush (isb)
 	mov	pc, lr
 
 	__INITDATA
diff --git a/arch/arm/mm/tlb-v6.S b/arch/arm/mm/tlb-v6.S
index a685944..eca07f5 100644
--- a/arch/arm/mm/tlb-v6.S
+++ b/arch/arm/mm/tlb-v6.S
@@ -54,7 +54,6 @@
 	add	r0, r0, #PAGE_SZ
 	cmp	r0, r1
 	blo	1b
-	mcr	p15, 0, ip, c7, c5, 6		@ flush BTAC/BTB
 	mcr	p15, 0, ip, c7, c10, 4		@ data synchronization barrier
 	mov	pc, lr
 
@@ -83,9 +82,8 @@
 	add	r0, r0, #PAGE_SZ
 	cmp	r0, r1
 	blo	1b
-	mcr	p15, 0, r2, c7, c5, 6		@ flush BTAC/BTB
 	mcr	p15, 0, r2, c7, c10, 4		@ data synchronization barrier
-	mcr	p15, 0, r2, c7, c5, 4		@ prefetch flush
+	mcr	p15, 0, r2, c7, c5, 4		@ prefetch flush (isb)
 	mov	pc, lr
 
 	__INIT
diff --git a/arch/arm/mm/tlb-v7.S b/arch/arm/mm/tlb-v7.S
index ebd4290..845f461 100644
--- a/arch/arm/mm/tlb-v7.S
+++ b/arch/arm/mm/tlb-v7.S
@@ -48,9 +48,6 @@
 	add	r0, r0, #PAGE_SZ
 	cmp	r0, r1
 	blo	1b
-	mov	ip, #0
-	ALT_SMP(mcr	p15, 0, ip, c7, c1, 6)	@ flush BTAC/BTB Inner Shareable
-	ALT_UP(mcr	p15, 0, ip, c7, c5, 6)	@ flush BTAC/BTB
 	dsb
 	mov	pc, lr
 ENDPROC(v7wbi_flush_user_tlb_range)
@@ -75,9 +72,6 @@
 	add	r0, r0, #PAGE_SZ
 	cmp	r0, r1
 	blo	1b
-	mov	r2, #0
-	ALT_SMP(mcr	p15, 0, r2, c7, c1, 6)	@ flush BTAC/BTB Inner Shareable
-	ALT_UP(mcr	p15, 0, r2, c7, c5, 6)	@ flush BTAC/BTB
 	dsb
 	isb
 	mov	pc, lr
diff --git a/arch/arm/plat-mxc/include/mach/entry-macro.S b/arch/arm/plat-mxc/include/mach/entry-macro.S
index 2e49e71..066d464 100644
--- a/arch/arm/plat-mxc/include/mach/entry-macro.S
+++ b/arch/arm/plat-mxc/include/mach/entry-macro.S
@@ -78,7 +78,3 @@
 	movs \irqnr, \irqnr
 #endif
 	.endm
-
-	@ irq priority table (not used)
-	.macro	irq_prio_table
-	.endm
diff --git a/arch/arm/plat-omap/sram.c b/arch/arm/plat-omap/sram.c
index 6af3d0b..363c91e 100644
--- a/arch/arm/plat-omap/sram.c
+++ b/arch/arm/plat-omap/sram.c
@@ -394,20 +394,15 @@
 }
 #endif /* CONFIG_PM */
 
-static int __init omap34xx_sram_init(void)
-{
-	_omap3_sram_configure_core_dpll =
-		omap_sram_push(omap3_sram_configure_core_dpll,
-			       omap3_sram_configure_core_dpll_sz);
-	omap_push_sram_idle();
-	return 0;
-}
-#else
+#endif /* CONFIG_ARCH_OMAP3 */
+
 static inline int omap34xx_sram_init(void)
 {
+#if defined(CONFIG_ARCH_OMAP3) && defined(CONFIG_PM)
+	omap3_sram_restore_context();
+#endif
 	return 0;
 }
-#endif
 
 int __init omap_sram_init(void)
 {
diff --git a/arch/arm/plat-orion/gpio.c b/arch/arm/plat-orion/gpio.c
index 5b4fffa..41ab97e 100644
--- a/arch/arm/plat-orion/gpio.c
+++ b/arch/arm/plat-orion/gpio.c
@@ -432,7 +432,7 @@
 	ct->regs.mask = ochip->mask_offset + GPIO_EDGE_MASK_OFF;
 	ct->regs.ack = GPIO_EDGE_CAUSE_OFF;
 	ct->type = IRQ_TYPE_EDGE_RISING | IRQ_TYPE_EDGE_FALLING;
-	ct->chip.irq_ack = irq_gc_ack;
+	ct->chip.irq_ack = irq_gc_ack_clr_bit;
 	ct->chip.irq_mask = irq_gc_mask_clr_bit;
 	ct->chip.irq_unmask = irq_gc_mask_set_bit;
 	ct->chip.irq_set_type = gpio_irq_set_type;
diff --git a/arch/arm/plat-pxa/gpio.c b/arch/arm/plat-pxa/gpio.c
index 48ebb94..a11dc36 100644
--- a/arch/arm/plat-pxa/gpio.c
+++ b/arch/arm/plat-pxa/gpio.c
@@ -50,7 +50,7 @@
 	return container_of(c, struct pxa_gpio_chip, chip)->regbase;
 }
 
-static inline struct pxa_gpio_chip *gpio_to_chip(unsigned gpio)
+static inline struct pxa_gpio_chip *gpio_to_pxachip(unsigned gpio)
 {
 	return &pxa_gpio_chips[gpio_to_bank(gpio)];
 }
@@ -161,7 +161,7 @@
 	int gpio = irq_to_gpio(d->irq);
 	unsigned long gpdr, mask = GPIO_bit(gpio);
 
-	c = gpio_to_chip(gpio);
+	c = gpio_to_pxachip(gpio);
 
 	if (type == IRQ_TYPE_PROBE) {
 		/* Don't mess with enabled GPIOs using preconfigured edges or
@@ -230,7 +230,7 @@
 static void pxa_ack_muxed_gpio(struct irq_data *d)
 {
 	int gpio = irq_to_gpio(d->irq);
-	struct pxa_gpio_chip *c = gpio_to_chip(gpio);
+	struct pxa_gpio_chip *c = gpio_to_pxachip(gpio);
 
 	__raw_writel(GPIO_bit(gpio), c->regbase + GEDR_OFFSET);
 }
@@ -238,7 +238,7 @@
 static void pxa_mask_muxed_gpio(struct irq_data *d)
 {
 	int gpio = irq_to_gpio(d->irq);
-	struct pxa_gpio_chip *c = gpio_to_chip(gpio);
+	struct pxa_gpio_chip *c = gpio_to_pxachip(gpio);
 	uint32_t grer, gfer;
 
 	c->irq_mask &= ~GPIO_bit(gpio);
@@ -252,7 +252,7 @@
 static void pxa_unmask_muxed_gpio(struct irq_data *d)
 {
 	int gpio = irq_to_gpio(d->irq);
-	struct pxa_gpio_chip *c = gpio_to_chip(gpio);
+	struct pxa_gpio_chip *c = gpio_to_pxachip(gpio);
 
 	c->irq_mask |= GPIO_bit(gpio);
 	update_edge_detect(c);
diff --git a/arch/arm/plat-s3c24xx/dma.c b/arch/arm/plat-s3c24xx/dma.c
index a79a8ccd..539bd0e 100644
--- a/arch/arm/plat-s3c24xx/dma.c
+++ b/arch/arm/plat-s3c24xx/dma.c
@@ -712,7 +712,7 @@
  * get control of an dma channel
 */
 
-int s3c2410_dma_request(unsigned int channel,
+int s3c2410_dma_request(enum dma_ch channel,
 			struct s3c2410_dma_client *client,
 			void *dev)
 {
@@ -783,7 +783,7 @@
  * allowed to go through.
 */
 
-int s3c2410_dma_free(unsigned int channel, struct s3c2410_dma_client *client)
+int s3c2410_dma_free(enum dma_ch channel, struct s3c2410_dma_client *client)
 {
 	struct s3c2410_dma_chan *chan = s3c_dma_lookup_channel(channel);
 	unsigned long flags;
@@ -974,7 +974,7 @@
 }
 
 int
-s3c2410_dma_ctrl(unsigned int channel, enum s3c2410_chan_op op)
+s3c2410_dma_ctrl(enum dma_ch channel, enum s3c2410_chan_op op)
 {
 	struct s3c2410_dma_chan *chan = s3c_dma_lookup_channel(channel);
 
@@ -1021,7 +1021,7 @@
  * xfersize:     size of unit in bytes (1,2,4)
 */
 
-int s3c2410_dma_config(unsigned int channel,
+int s3c2410_dma_config(enum dma_ch channel,
 		       int xferunit)
 {
 	struct s3c2410_dma_chan *chan = s3c_dma_lookup_channel(channel);
@@ -1100,7 +1100,7 @@
  * devaddr:   physical address of the source
 */
 
-int s3c2410_dma_devconfig(unsigned int channel,
+int s3c2410_dma_devconfig(enum dma_ch channel,
 			  enum s3c2410_dmasrc source,
 			  unsigned long devaddr)
 {
@@ -1173,7 +1173,7 @@
  * returns the current transfer points for the dma source and destination
 */
 
-int s3c2410_dma_getposition(unsigned int channel, dma_addr_t *src, dma_addr_t *dst)
+int s3c2410_dma_getposition(enum dma_ch channel, dma_addr_t *src, dma_addr_t *dst)
 {
 	struct s3c2410_dma_chan *chan = s3c_dma_lookup_channel(channel);
 
diff --git a/arch/arm/plat-s3c24xx/sleep.S b/arch/arm/plat-s3c24xx/sleep.S
index fd7032f..c5661256 100644
--- a/arch/arm/plat-s3c24xx/sleep.S
+++ b/arch/arm/plat-s3c24xx/sleep.S
@@ -41,31 +41,6 @@
 
 	.text
 
-	/* s3c_cpu_save
-	 *
-	 * entry:
-	 *	r1 = v:p offset
-	*/
-
-ENTRY(s3c_cpu_save)
-	stmfd	sp!, { r4 - r12, lr }
-	ldr	r3, =resume_with_mmu
-	bl	cpu_suspend
-
-	@@ jump to final code to send system to sleep
-	ldr	r0, =pm_cpu_sleep
-	@@ldr	pc, [ r0 ]
-	ldr	r0, [ r0 ]
-	mov	pc, r0
-	
-	@@ return to the caller, after having the MMU
-	@@ turned on, this restores the last bits from the
-	@@ stack
-resume_with_mmu:
-	ldmfd	sp!, { r4 - r12, pc }
-
-	.ltorg
-
 	/* sleep magic, to allow the bootloader to check for an valid
 	 * image to resume to. Must be the first word before the
 	 * s3c_cpu_resume entry.
diff --git a/arch/arm/plat-s5p/irq-gpioint.c b/arch/arm/plat-s5p/irq-gpioint.c
index 135abda..327ab9f 100644
--- a/arch/arm/plat-s5p/irq-gpioint.c
+++ b/arch/arm/plat-s5p/irq-gpioint.c
@@ -152,7 +152,7 @@
 	if (!gc)
 		return -ENOMEM;
 	ct = gc->chip_types;
-	ct->chip.irq_ack = irq_gc_ack;
+	ct->chip.irq_ack = irq_gc_ack_set_bit;
 	ct->chip.irq_mask = irq_gc_mask_set_bit;
 	ct->chip.irq_unmask = irq_gc_mask_clr_bit;
 	ct->chip.irq_set_type = s5p_gpioint_set_type,
diff --git a/arch/arm/plat-samsung/dma.c b/arch/arm/plat-samsung/dma.c
index cb459dd..6143aa1 100644
--- a/arch/arm/plat-samsung/dma.c
+++ b/arch/arm/plat-samsung/dma.c
@@ -41,7 +41,7 @@
  * irq?
 */
 
-int s3c2410_dma_set_opfn(unsigned int channel, s3c2410_dma_opfn_t rtn)
+int s3c2410_dma_set_opfn(enum dma_ch channel, s3c2410_dma_opfn_t rtn)
 {
 	struct s3c2410_dma_chan *chan = s3c_dma_lookup_channel(channel);
 
@@ -56,7 +56,7 @@
 }
 EXPORT_SYMBOL(s3c2410_dma_set_opfn);
 
-int s3c2410_dma_set_buffdone_fn(unsigned int channel, s3c2410_dma_cbfn_t rtn)
+int s3c2410_dma_set_buffdone_fn(enum dma_ch channel, s3c2410_dma_cbfn_t rtn)
 {
 	struct s3c2410_dma_chan *chan = s3c_dma_lookup_channel(channel);
 
@@ -71,7 +71,7 @@
 }
 EXPORT_SYMBOL(s3c2410_dma_set_buffdone_fn);
 
-int s3c2410_dma_setflags(unsigned int channel, unsigned int flags)
+int s3c2410_dma_setflags(enum dma_ch channel, unsigned int flags)
 {
 	struct s3c2410_dma_chan *chan = s3c_dma_lookup_channel(channel);
 
diff --git a/arch/arm/plat-samsung/include/plat/dma.h b/arch/arm/plat-samsung/include/plat/dma.h
index 2e8f8c6..8c273b7 100644
--- a/arch/arm/plat-samsung/include/plat/dma.h
+++ b/arch/arm/plat-samsung/include/plat/dma.h
@@ -42,6 +42,7 @@
 };
 
 struct s3c2410_dma_chan;
+enum dma_ch;
 
 /* s3c2410_dma_cbfn_t
  *
@@ -62,7 +63,7 @@
  * request a dma channel exclusivley
 */
 
-extern int s3c2410_dma_request(unsigned int channel,
+extern int s3c2410_dma_request(enum dma_ch channel,
 			       struct s3c2410_dma_client *, void *dev);
 
 
@@ -71,14 +72,14 @@
  * change the state of the dma channel
 */
 
-extern int s3c2410_dma_ctrl(unsigned int channel, enum s3c2410_chan_op op);
+extern int s3c2410_dma_ctrl(enum dma_ch channel, enum s3c2410_chan_op op);
 
 /* s3c2410_dma_setflags
  *
  * set the channel's flags to a given state
 */
 
-extern int s3c2410_dma_setflags(unsigned int channel,
+extern int s3c2410_dma_setflags(enum dma_ch channel,
 				unsigned int flags);
 
 /* s3c2410_dma_free
@@ -86,7 +87,7 @@
  * free the dma channel (will also abort any outstanding operations)
 */
 
-extern int s3c2410_dma_free(unsigned int channel, struct s3c2410_dma_client *);
+extern int s3c2410_dma_free(enum dma_ch channel, struct s3c2410_dma_client *);
 
 /* s3c2410_dma_enqueue
  *
@@ -95,7 +96,7 @@
  * drained before the buffer is given to the DMA system.
 */
 
-extern int s3c2410_dma_enqueue(unsigned int channel, void *id,
+extern int s3c2410_dma_enqueue(enum dma_ch channel, void *id,
 			       dma_addr_t data, int size);
 
 /* s3c2410_dma_config
@@ -103,14 +104,14 @@
  * configure the dma channel
 */
 
-extern int s3c2410_dma_config(unsigned int channel, int xferunit);
+extern int s3c2410_dma_config(enum dma_ch channel, int xferunit);
 
 /* s3c2410_dma_devconfig
  *
  * configure the device we're talking to
 */
 
-extern int s3c2410_dma_devconfig(unsigned int channel,
+extern int s3c2410_dma_devconfig(enum dma_ch channel,
 		enum s3c2410_dmasrc source, unsigned long devaddr);
 
 /* s3c2410_dma_getposition
@@ -118,10 +119,10 @@
  * get the position that the dma transfer is currently at
 */
 
-extern int s3c2410_dma_getposition(unsigned int channel,
+extern int s3c2410_dma_getposition(enum dma_ch channel,
 				   dma_addr_t *src, dma_addr_t *dest);
 
-extern int s3c2410_dma_set_opfn(unsigned int, s3c2410_dma_opfn_t rtn);
-extern int s3c2410_dma_set_buffdone_fn(unsigned int, s3c2410_dma_cbfn_t rtn);
+extern int s3c2410_dma_set_opfn(enum dma_ch, s3c2410_dma_opfn_t rtn);
+extern int s3c2410_dma_set_buffdone_fn(enum dma_ch, s3c2410_dma_cbfn_t rtn);
 
 
diff --git a/arch/arm/plat-samsung/include/plat/pm.h b/arch/arm/plat-samsung/include/plat/pm.h
index 7fb6f6b..f6749916 100644
--- a/arch/arm/plat-samsung/include/plat/pm.h
+++ b/arch/arm/plat-samsung/include/plat/pm.h
@@ -42,7 +42,7 @@
 /* per-cpu sleep functions */
 
 extern void (*pm_cpu_prep)(void);
-extern void (*pm_cpu_sleep)(void);
+extern int (*pm_cpu_sleep)(unsigned long);
 
 /* Flags for PM Control */
 
@@ -52,10 +52,9 @@
 
 /* from sleep.S */
 
-extern int  s3c_cpu_save(unsigned long *saveblk, long);
 extern void s3c_cpu_resume(void);
 
-extern void s3c2410_cpu_suspend(void);
+extern int s3c2410_cpu_suspend(unsigned long);
 
 /* sleep save info */
 
diff --git a/arch/arm/plat-samsung/irq-uart.c b/arch/arm/plat-samsung/irq-uart.c
index 32582c0..657405c 100644
--- a/arch/arm/plat-samsung/irq-uart.c
+++ b/arch/arm/plat-samsung/irq-uart.c
@@ -54,8 +54,15 @@
 
 	gc = irq_alloc_generic_chip("s3c-uart", 1, uirq->base_irq, reg_base,
 				    handle_level_irq);
+
+	if (!gc) {
+		pr_err("%s: irq_alloc_generic_chip for IRQ %u failed\n",
+		       __func__, uirq->base_irq);
+		return;
+	}
+
 	ct = gc->chip_types;
-	ct->chip.irq_ack = irq_gc_ack;
+	ct->chip.irq_ack = irq_gc_ack_set_bit;
 	ct->chip.irq_mask = irq_gc_mask_set_bit;
 	ct->chip.irq_unmask = irq_gc_mask_clr_bit;
 	ct->regs.ack = S3C64XX_UINTP;
diff --git a/arch/arm/plat-samsung/irq-vic-timer.c b/arch/arm/plat-samsung/irq-vic-timer.c
index a607546..f714d06 100644
--- a/arch/arm/plat-samsung/irq-vic-timer.c
+++ b/arch/arm/plat-samsung/irq-vic-timer.c
@@ -54,6 +54,13 @@
 
 	s3c_tgc = irq_alloc_generic_chip("s3c-timer", 1, timer_irq,
 					 S3C64XX_TINT_CSTAT, handle_level_irq);
+
+	if (!s3c_tgc) {
+		pr_err("%s: irq_alloc_generic_chip for IRQ %d failed\n",
+		       __func__, timer_irq);
+		return;
+	}
+
 	ct = s3c_tgc->chip_types;
 	ct->chip.irq_mask = irq_gc_mask_clr_bit;
 	ct->chip.irq_unmask = irq_gc_mask_set_bit;
diff --git a/arch/arm/plat-samsung/pm.c b/arch/arm/plat-samsung/pm.c
index 5c0a440..5fa1742 100644
--- a/arch/arm/plat-samsung/pm.c
+++ b/arch/arm/plat-samsung/pm.c
@@ -20,6 +20,7 @@
 #include <linux/io.h>
 
 #include <asm/cacheflush.h>
+#include <asm/suspend.h>
 #include <mach/hardware.h>
 #include <mach/map.h>
 
@@ -231,7 +232,7 @@
 
 
 void (*pm_cpu_prep)(void);
-void (*pm_cpu_sleep)(void);
+int (*pm_cpu_sleep)(unsigned long);
 
 #define any_allowed(mask, allow) (((mask) & (allow)) != (allow))
 
@@ -294,15 +295,11 @@
 
 	s3c_pm_arch_stop_clocks();
 
-	/* s3c_cpu_save will also act as our return point from when
+	/* this will also act as our return point from when
 	 * we resume as it saves its own register state and restores it
 	 * during the resume.  */
 
-	s3c_cpu_save(0, PLAT_PHYS_OFFSET - PAGE_OFFSET);
-
-	/* restore the cpu state using the kernel's cpu init code. */
-
-	cpu_init();
+	cpu_suspend(0, pm_cpu_sleep);
 
 	/* restore the system state */
 
diff --git a/arch/arm/vfp/vfphw.S b/arch/arm/vfp/vfphw.S
index 9897dcf..2d30c7f 100644
--- a/arch/arm/vfp/vfphw.S
+++ b/arch/arm/vfp/vfphw.S
@@ -77,27 +77,27 @@
 	bne	look_for_VFP_exceptions	@ VFP is already enabled
 
 	DBGSTR1 "enable %x", r10
-	ldr	r3, last_VFP_context_address
+	ldr	r3, vfp_current_hw_state_address
 	orr	r1, r1, #FPEXC_EN	@ user FPEXC has the enable bit set
-	ldr	r4, [r3, r11, lsl #2]	@ last_VFP_context pointer
+	ldr	r4, [r3, r11, lsl #2]	@ vfp_current_hw_state pointer
 	bic	r5, r1, #FPEXC_EX	@ make sure exceptions are disabled
-	cmp	r4, r10
-	beq	check_for_exception	@ we are returning to the same
-					@ process, so the registers are
-					@ still there.  In this case, we do
-					@ not want to drop a pending exception.
+	cmp	r4, r10			@ this thread owns the hw context?
+#ifndef CONFIG_SMP
+	@ For UP, checking that this thread owns the hw context is
+	@ sufficient to determine that the hardware state is valid.
+	beq	vfp_hw_state_valid
+
+	@ On UP, we lazily save the VFP context.  As a different
+	@ thread wants ownership of the VFP hardware, save the old
+	@ state if there was a previous (valid) owner.
 
 	VFPFMXR	FPEXC, r5		@ enable VFP, disable any pending
 					@ exceptions, so we can get at the
 					@ rest of it
 
-#ifndef CONFIG_SMP
-	@ Save out the current registers to the old thread state
-	@ No need for SMP since this is not done lazily
-
 	DBGSTR1	"save old state %p", r4
-	cmp	r4, #0
-	beq	no_old_VFP_process
+	cmp	r4, #0			@ if the vfp_current_hw_state is NULL
+	beq	vfp_reload_hw		@ then the hw state needs reloading
 	VFPFSTMIA r4, r5		@ save the working registers
 	VFPFMRX	r5, FPSCR		@ current status
 #ifndef CONFIG_CPU_FEROCEON
@@ -110,13 +110,35 @@
 1:
 #endif
 	stmia	r4, {r1, r5, r6, r8}	@ save FPEXC, FPSCR, FPINST, FPINST2
-					@ and point r4 at the word at the
-					@ start of the register dump
+vfp_reload_hw:
+
+#else
+	@ For SMP, if this thread does not own the hw context, then we
+	@ need to reload it.  No need to save the old state as on SMP,
+	@ we always save the state when we switch away from a thread.
+	bne	vfp_reload_hw
+
+	@ This thread has ownership of the current hardware context.
+	@ However, it may have been migrated to another CPU, in which
+	@ case the saved state is newer than the hardware context.
+	@ Check this by looking at the CPU number which the state was
+	@ last loaded onto.
+	ldr	ip, [r10, #VFP_CPU]
+	teq	ip, r11
+	beq	vfp_hw_state_valid
+
+vfp_reload_hw:
+	@ We're loading this threads state into the VFP hardware. Update
+	@ the CPU number which contains the most up to date VFP context.
+	str	r11, [r10, #VFP_CPU]
+
+	VFPFMXR	FPEXC, r5		@ enable VFP, disable any pending
+					@ exceptions, so we can get at the
+					@ rest of it
 #endif
 
-no_old_VFP_process:
 	DBGSTR1	"load state %p", r10
-	str	r10, [r3, r11, lsl #2]	@ update the last_VFP_context pointer
+	str	r10, [r3, r11, lsl #2]	@ update the vfp_current_hw_state pointer
 					@ Load the saved state back into the VFP
 	VFPFLDMIA r10, r5		@ reload the working registers while
 					@ FPEXC is in a safe state
@@ -132,7 +154,8 @@
 #endif
 	VFPFMXR	FPSCR, r5		@ restore status
 
-check_for_exception:
+@ The context stored in the VFP hardware is up to date with this thread
+vfp_hw_state_valid:
 	tst	r1, #FPEXC_EX
 	bne	process_exception	@ might as well handle the pending
 					@ exception before retrying branch
@@ -207,8 +230,8 @@
 ENDPROC(vfp_save_state)
 
 	.align
-last_VFP_context_address:
-	.word	last_VFP_context
+vfp_current_hw_state_address:
+	.word	vfp_current_hw_state
 
 	.macro	tbl_branch, base, tmp, shift
 #ifdef CONFIG_THUMB2_KERNEL
diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c
index 650d90b..79bcb43 100644
--- a/arch/arm/vfp/vfpmodule.c
+++ b/arch/arm/vfp/vfpmodule.c
@@ -33,7 +33,6 @@
 void vfp_null_entry(void);
 
 void (*vfp_vector)(void) = vfp_null_entry;
-union vfp_state *last_VFP_context[NR_CPUS];
 
 /*
  * Dual-use variable.
@@ -43,6 +42,46 @@
 unsigned int VFP_arch;
 
 /*
+ * The pointer to the vfpstate structure of the thread which currently
+ * owns the context held in the VFP hardware, or NULL if the hardware
+ * context is invalid.
+ *
+ * For UP, this is sufficient to tell which thread owns the VFP context.
+ * However, for SMP, we also need to check the CPU number stored in the
+ * saved state too to catch migrations.
+ */
+union vfp_state *vfp_current_hw_state[NR_CPUS];
+
+/*
+ * Is 'thread's most up to date state stored in this CPUs hardware?
+ * Must be called from non-preemptible context.
+ */
+static bool vfp_state_in_hw(unsigned int cpu, struct thread_info *thread)
+{
+#ifdef CONFIG_SMP
+	if (thread->vfpstate.hard.cpu != cpu)
+		return false;
+#endif
+	return vfp_current_hw_state[cpu] == &thread->vfpstate;
+}
+
+/*
+ * Force a reload of the VFP context from the thread structure.  We do
+ * this by ensuring that access to the VFP hardware is disabled, and
+ * clear last_VFP_context.  Must be called from non-preemptible context.
+ */
+static void vfp_force_reload(unsigned int cpu, struct thread_info *thread)
+{
+	if (vfp_state_in_hw(cpu, thread)) {
+		fmxr(FPEXC, fmrx(FPEXC) & ~FPEXC_EN);
+		vfp_current_hw_state[cpu] = NULL;
+	}
+#ifdef CONFIG_SMP
+	thread->vfpstate.hard.cpu = NR_CPUS;
+#endif
+}
+
+/*
  * Per-thread VFP initialization.
  */
 static void vfp_thread_flush(struct thread_info *thread)
@@ -50,21 +89,27 @@
 	union vfp_state *vfp = &thread->vfpstate;
 	unsigned int cpu;
 
+	/*
+	 * Disable VFP to ensure we initialize it first.  We must ensure
+	 * that the modification of vfp_current_hw_state[] and hardware
+	 * disable are done for the same CPU and without preemption.
+	 *
+	 * Do this first to ensure that preemption won't overwrite our
+	 * state saving should access to the VFP be enabled at this point.
+	 */
+	cpu = get_cpu();
+	if (vfp_current_hw_state[cpu] == vfp)
+		vfp_current_hw_state[cpu] = NULL;
+	fmxr(FPEXC, fmrx(FPEXC) & ~FPEXC_EN);
+	put_cpu();
+
 	memset(vfp, 0, sizeof(union vfp_state));
 
 	vfp->hard.fpexc = FPEXC_EN;
 	vfp->hard.fpscr = FPSCR_ROUND_NEAREST;
-
-	/*
-	 * Disable VFP to ensure we initialize it first.  We must ensure
-	 * that the modification of last_VFP_context[] and hardware disable
-	 * are done for the same CPU and without preemption.
-	 */
-	cpu = get_cpu();
-	if (last_VFP_context[cpu] == vfp)
-		last_VFP_context[cpu] = NULL;
-	fmxr(FPEXC, fmrx(FPEXC) & ~FPEXC_EN);
-	put_cpu();
+#ifdef CONFIG_SMP
+	vfp->hard.cpu = NR_CPUS;
+#endif
 }
 
 static void vfp_thread_exit(struct thread_info *thread)
@@ -73,8 +118,8 @@
 	union vfp_state *vfp = &thread->vfpstate;
 	unsigned int cpu = get_cpu();
 
-	if (last_VFP_context[cpu] == vfp)
-		last_VFP_context[cpu] = NULL;
+	if (vfp_current_hw_state[cpu] == vfp)
+		vfp_current_hw_state[cpu] = NULL;
 	put_cpu();
 }
 
@@ -84,6 +129,9 @@
 
 	vfp_sync_hwstate(parent);
 	thread->vfpstate = parent->vfpstate;
+#ifdef CONFIG_SMP
+	thread->vfpstate.hard.cpu = NR_CPUS;
+#endif
 }
 
 /*
@@ -129,17 +177,8 @@
 		 * case the thread migrates to a different CPU. The
 		 * restoring is done lazily.
 		 */
-		if ((fpexc & FPEXC_EN) && last_VFP_context[cpu]) {
-			vfp_save_state(last_VFP_context[cpu], fpexc);
-			last_VFP_context[cpu]->hard.cpu = cpu;
-		}
-		/*
-		 * Thread migration, just force the reloading of the
-		 * state on the new CPU in case the VFP registers
-		 * contain stale data.
-		 */
-		if (thread->vfpstate.hard.cpu != cpu)
-			last_VFP_context[cpu] = NULL;
+		if ((fpexc & FPEXC_EN) && vfp_current_hw_state[cpu])
+			vfp_save_state(vfp_current_hw_state[cpu], fpexc);
 #endif
 
 		/*
@@ -415,7 +454,7 @@
 	}
 
 	/* clear any information we had about last context state */
-	memset(last_VFP_context, 0, sizeof(last_VFP_context));
+	memset(vfp_current_hw_state, 0, sizeof(vfp_current_hw_state));
 
 	return 0;
 }
@@ -443,15 +482,15 @@
 static inline void vfp_pm_init(void) { }
 #endif /* CONFIG_PM */
 
+/*
+ * Ensure that the VFP state stored in 'thread->vfpstate' is up to date
+ * with the hardware state.
+ */
 void vfp_sync_hwstate(struct thread_info *thread)
 {
 	unsigned int cpu = get_cpu();
 
-	/*
-	 * If the thread we're interested in is the current owner of the
-	 * hardware VFP state, then we need to save its state.
-	 */
-	if (last_VFP_context[cpu] == &thread->vfpstate) {
+	if (vfp_state_in_hw(cpu, thread)) {
 		u32 fpexc = fmrx(FPEXC);
 
 		/*
@@ -465,36 +504,13 @@
 	put_cpu();
 }
 
+/* Ensure that the thread reloads the hardware VFP state on the next use. */
 void vfp_flush_hwstate(struct thread_info *thread)
 {
 	unsigned int cpu = get_cpu();
 
-	/*
-	 * If the thread we're interested in is the current owner of the
-	 * hardware VFP state, then we need to save its state.
-	 */
-	if (last_VFP_context[cpu] == &thread->vfpstate) {
-		u32 fpexc = fmrx(FPEXC);
+	vfp_force_reload(cpu, thread);
 
-		fmxr(FPEXC, fpexc & ~FPEXC_EN);
-
-		/*
-		 * Set the context to NULL to force a reload the next time
-		 * the thread uses the VFP.
-		 */
-		last_VFP_context[cpu] = NULL;
-	}
-
-#ifdef CONFIG_SMP
-	/*
-	 * For SMP we still have to take care of the case where the thread
-	 * migrates to another CPU and then back to the original CPU on which
-	 * the last VFP user is still the same thread. Mark the thread VFP
-	 * state as belonging to a non-existent CPU so that the saved one will
-	 * be reloaded in the above case.
-	 */
-	thread->vfpstate.hard.cpu = NR_CPUS;
-#endif
 	put_cpu();
 }
 
@@ -513,8 +529,7 @@
 	void *hcpu)
 {
 	if (action == CPU_DYING || action == CPU_DYING_FROZEN) {
-		unsigned int cpu = (long)hcpu;
-		last_VFP_context[cpu] = NULL;
+		vfp_force_reload((long)hcpu, current_thread_info());
 	} else if (action == CPU_STARTING || action == CPU_STARTING_FROZEN)
 		vfp_enable(NULL);
 	return NOTIFY_OK;
diff --git a/arch/mips/kernel/i8259.c b/arch/mips/kernel/i8259.c
index c018696..5c74eb7 100644
--- a/arch/mips/kernel/i8259.c
+++ b/arch/mips/kernel/i8259.c
@@ -14,7 +14,7 @@
 #include <linux/interrupt.h>
 #include <linux/kernel.h>
 #include <linux/spinlock.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/irq.h>
 
 #include <asm/i8259.h>
@@ -215,14 +215,13 @@
 	}
 }
 
-static int i8259A_resume(struct sys_device *dev)
+static void i8259A_resume(void)
 {
 	if (i8259A_auto_eoi >= 0)
 		init_8259A(i8259A_auto_eoi);
-	return 0;
 }
 
-static int i8259A_shutdown(struct sys_device *dev)
+static void i8259A_shutdown(void)
 {
 	/* Put the i8259A into a quiescent state that
 	 * the kernel initialization code can get it
@@ -232,26 +231,17 @@
 		outb(0xff, PIC_MASTER_IMR);	/* mask all of 8259A-1 */
 		outb(0xff, PIC_SLAVE_IMR);	/* mask all of 8259A-1 */
 	}
-	return 0;
 }
 
-static struct sysdev_class i8259_sysdev_class = {
-	.name = "i8259",
+static struct syscore_ops i8259_syscore_ops = {
 	.resume = i8259A_resume,
 	.shutdown = i8259A_shutdown,
 };
 
-static struct sys_device device_i8259A = {
-	.id	= 0,
-	.cls	= &i8259_sysdev_class,
-};
-
 static int __init i8259A_init_sysfs(void)
 {
-	int error = sysdev_class_register(&i8259_sysdev_class);
-	if (!error)
-		error = sysdev_register(&device_i8259A);
-	return error;
+	register_syscore_ops(&i8259_syscore_ops);
+	return 0;
 }
 
 device_initcall(i8259A_init_sysfs);
diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
index 33867ec..9d6a8ef 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -12,6 +12,8 @@
 #include <linux/of.h>
 #include <linux/memblock.h>
 #include <linux/vmalloc.h>
+#include <linux/memory.h>
+
 #include <asm/firmware.h>
 #include <asm/machdep.h>
 #include <asm/pSeries_reconfig.h>
@@ -20,24 +22,25 @@
 static unsigned long get_memblock_size(void)
 {
 	struct device_node *np;
-	unsigned int memblock_size = 0;
+	unsigned int memblock_size = MIN_MEMORY_BLOCK_SIZE;
+	struct resource r;
 
 	np = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
 	if (np) {
-		const unsigned long *size;
+		const __be64 *size;
 
 		size = of_get_property(np, "ibm,lmb-size", NULL);
-		memblock_size = size ? *size : 0;
-
+		if (size)
+			memblock_size = be64_to_cpup(size);
 		of_node_put(np);
-	} else {
+	} else  if (machine_is(pseries)) {
+		/* This fallback really only applies to pseries */
 		unsigned int memzero_size = 0;
-		const unsigned int *regs;
 
 		np = of_find_node_by_path("/memory@0");
 		if (np) {
-			regs = of_get_property(np, "reg", NULL);
-			memzero_size = regs ? regs[3] : 0;
+			if (!of_address_to_resource(np, 0, &r))
+				memzero_size = resource_size(&r);
 			of_node_put(np);
 		}
 
@@ -50,16 +53,21 @@
 			sprintf(buf, "/memory@%x", memzero_size);
 			np = of_find_node_by_path(buf);
 			if (np) {
-				regs = of_get_property(np, "reg", NULL);
-				memblock_size = regs ? regs[3] : 0;
+				if (!of_address_to_resource(np, 0, &r))
+					memblock_size = resource_size(&r);
 				of_node_put(np);
 			}
 		}
 	}
-
 	return memblock_size;
 }
 
+/* WARNING: This is going to override the generic definition whenever
+ * pseries is built-in regardless of what platform is active at boot
+ * time. This is fine for now as this is the only "option" and it
+ * should work everywhere. If not, we'll have to turn this into a
+ * ppc_md. callback
+ */
 unsigned long memory_block_size_bytes(void)
 {
 	return get_memblock_size();
diff --git a/arch/sparc/include/asm/irqflags_32.h b/arch/sparc/include/asm/irqflags_32.h
index d4d0711..1484890 100644
--- a/arch/sparc/include/asm/irqflags_32.h
+++ b/arch/sparc/include/asm/irqflags_32.h
@@ -18,7 +18,7 @@
 extern unsigned long arch_local_irq_save(void);
 extern void arch_local_irq_enable(void);
 
-static inline unsigned long arch_local_save_flags(void)
+static inline notrace unsigned long arch_local_save_flags(void)
 {
 	unsigned long flags;
 
@@ -26,17 +26,17 @@
 	return flags;
 }
 
-static inline void arch_local_irq_disable(void)
+static inline notrace void arch_local_irq_disable(void)
 {
 	arch_local_irq_save();
 }
 
-static inline bool arch_irqs_disabled_flags(unsigned long flags)
+static inline notrace bool arch_irqs_disabled_flags(unsigned long flags)
 {
 	return (flags & PSR_PIL) != 0;
 }
 
-static inline bool arch_irqs_disabled(void)
+static inline notrace bool arch_irqs_disabled(void)
 {
 	return arch_irqs_disabled_flags(arch_local_save_flags());
 }
diff --git a/arch/sparc/include/asm/irqflags_64.h b/arch/sparc/include/asm/irqflags_64.h
index aab969c..23cd27f 100644
--- a/arch/sparc/include/asm/irqflags_64.h
+++ b/arch/sparc/include/asm/irqflags_64.h
@@ -14,7 +14,7 @@
 
 #ifndef __ASSEMBLY__
 
-static inline unsigned long arch_local_save_flags(void)
+static inline notrace unsigned long arch_local_save_flags(void)
 {
 	unsigned long flags;
 
@@ -26,7 +26,7 @@
 	return flags;
 }
 
-static inline void arch_local_irq_restore(unsigned long flags)
+static inline notrace void arch_local_irq_restore(unsigned long flags)
 {
 	__asm__ __volatile__(
 		"wrpr	%0, %%pil"
@@ -36,7 +36,7 @@
 	);
 }
 
-static inline void arch_local_irq_disable(void)
+static inline notrace void arch_local_irq_disable(void)
 {
 	__asm__ __volatile__(
 		"wrpr	%0, %%pil"
@@ -46,7 +46,7 @@
 	);
 }
 
-static inline void arch_local_irq_enable(void)
+static inline notrace void arch_local_irq_enable(void)
 {
 	__asm__ __volatile__(
 		"wrpr	0, %%pil"
@@ -56,17 +56,17 @@
 	);
 }
 
-static inline int arch_irqs_disabled_flags(unsigned long flags)
+static inline notrace int arch_irqs_disabled_flags(unsigned long flags)
 {
 	return (flags > 0);
 }
 
-static inline int arch_irqs_disabled(void)
+static inline notrace int arch_irqs_disabled(void)
 {
 	return arch_irqs_disabled_flags(arch_local_save_flags());
 }
 
-static inline unsigned long arch_local_irq_save(void)
+static inline notrace unsigned long arch_local_irq_save(void)
 {
 	unsigned long flags, tmp;
 
diff --git a/arch/sparc/kernel/entry.S b/arch/sparc/kernel/entry.S
index 9fe08a1..f445e98 100644
--- a/arch/sparc/kernel/entry.S
+++ b/arch/sparc/kernel/entry.S
@@ -293,7 +293,7 @@
 	WRITE_PAUSE
 	wr	%l4, PSR_ET, %psr
 	WRITE_PAUSE
-	sll	%o3, 28, %o2		! shift for simpler checks below
+	srl	%o3, 28, %o2		! shift for simpler checks below
 maybe_smp4m_msg_check_single:
 	andcc	%o2, 0x1, %g0
 	beq,a	maybe_smp4m_msg_check_mask
diff --git a/arch/sparc/mm/leon_mm.c b/arch/sparc/mm/leon_mm.c
index c0e0129..e485a68 100644
--- a/arch/sparc/mm/leon_mm.c
+++ b/arch/sparc/mm/leon_mm.c
@@ -226,7 +226,7 @@
  * Leon2 and Leon3 differ in their way of telling cache information
  *
  */
-int leon_flush_needed(void)
+int __init leon_flush_needed(void)
 {
 	int flush_needed = -1;
 	unsigned int ssize, sets;
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index da34972..37357a5 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1170,7 +1170,7 @@
 config AMD_NUMA
 	def_bool y
 	prompt "Old style AMD Opteron NUMA detection"
-	depends on NUMA && PCI
+	depends on X86_64 && NUMA && PCI
 	---help---
 	  Enable AMD NUMA node topology detection.  You should say Y here if
 	  you have a multi processor AMD system. This uses an old method to
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 4f0d46f..14eed21 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -419,6 +419,14 @@
 			DMI_MATCH(DMI_PRODUCT_NAME, "iMac9,1"),
 		},
 	},
+	{	/* Handle problems with rebooting on the Latitude E6320. */
+		.callback = set_pci_reboot,
+		.ident = "Dell Latitude E6320",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+			DMI_MATCH(DMI_PRODUCT_NAME, "Latitude E6320"),
+		},
+	},
 	{ }
 };
 
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index d865c4ae..bbaaa00 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -28,6 +28,7 @@
 #include <linux/poison.h>
 #include <linux/dma-mapping.h>
 #include <linux/module.h>
+#include <linux/memory.h>
 #include <linux/memory_hotplug.h>
 #include <linux/nmi.h>
 #include <linux/gfp.h>
@@ -895,8 +896,6 @@
 }
 
 #ifdef CONFIG_X86_UV
-#define MIN_MEMORY_BLOCK_SIZE   (1 << SECTION_SIZE_BITS)
-
 unsigned long memory_block_size_bytes(void)
 {
 	if (is_uv_system()) {
diff --git a/drivers/acpi/apei/hest.c b/drivers/acpi/apei/hest.c
index abda378..181bc2f 100644
--- a/drivers/acpi/apei/hest.c
+++ b/drivers/acpi/apei/hest.c
@@ -139,13 +139,23 @@
 {
 	struct platform_device *ghes_dev;
 	struct ghes_arr *ghes_arr = data;
-	int rc;
+	int rc, i;
 
 	if (hest_hdr->type != ACPI_HEST_TYPE_GENERIC_ERROR)
 		return 0;
 
 	if (!((struct acpi_hest_generic *)hest_hdr)->enabled)
 		return 0;
+	for (i = 0; i < ghes_arr->count; i++) {
+		struct acpi_hest_header *hdr;
+		ghes_dev = ghes_arr->ghes_devs[i];
+		hdr = *(struct acpi_hest_header **)ghes_dev->dev.platform_data;
+		if (hdr->source_id == hest_hdr->source_id) {
+			pr_warning(FW_WARN HEST_PFX "Duplicated hardware error source ID: %d.\n",
+				   hdr->source_id);
+			return -EIO;
+		}
+	}
 	ghes_dev = platform_device_alloc("GHES", hest_hdr->source_id);
 	if (!ghes_dev)
 		return -ENOMEM;
diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
index 52ca964..372f9b7 100644
--- a/drivers/acpi/osl.c
+++ b/drivers/acpi/osl.c
@@ -1333,23 +1333,6 @@
 EXPORT_SYMBOL(acpi_resources_are_enforced);
 
 /*
- * Create and initialize a spinlock.
- */
-acpi_status
-acpi_os_create_lock(acpi_spinlock *out_handle)
-{
-	spinlock_t *lock;
-
-	lock = ACPI_ALLOCATE(sizeof(spinlock_t));
-	if (!lock)
-		return AE_NO_MEMORY;
-	spin_lock_init(lock);
-	*out_handle = lock;
-
-	return AE_OK;
-}
-
-/*
  * Deallocate the memory for a spinlock.
  */
 void acpi_os_delete_lock(acpi_spinlock handle)
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index 9f9b235..45d7c8f 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -30,7 +30,6 @@
 static DEFINE_MUTEX(mem_sysfs_mutex);
 
 #define MEMORY_CLASS_NAME	"memory"
-#define MIN_MEMORY_BLOCK_SIZE	(1 << SECTION_SIZE_BITS)
 
 static int sections_per_block;
 
diff --git a/drivers/char/agp/intel-agp.h b/drivers/char/agp/intel-agp.h
index 999803c..5da67f1 100644
--- a/drivers/char/agp/intel-agp.h
+++ b/drivers/char/agp/intel-agp.h
@@ -90,9 +90,10 @@
 #define G4x_GMCH_SIZE_MASK	(0xf << 8)
 #define G4x_GMCH_SIZE_1M	(0x1 << 8)
 #define G4x_GMCH_SIZE_2M	(0x3 << 8)
-#define G4x_GMCH_SIZE_VT_1M	(0x9 << 8)
-#define G4x_GMCH_SIZE_VT_1_5M	(0xa << 8)
-#define G4x_GMCH_SIZE_VT_2M	(0xc << 8)
+#define G4x_GMCH_SIZE_VT_EN	(0x8 << 8)
+#define G4x_GMCH_SIZE_VT_1M	(G4x_GMCH_SIZE_1M | G4x_GMCH_SIZE_VT_EN)
+#define G4x_GMCH_SIZE_VT_1_5M	((0x2 << 8) | G4x_GMCH_SIZE_VT_EN)
+#define G4x_GMCH_SIZE_VT_2M	(G4x_GMCH_SIZE_2M | G4x_GMCH_SIZE_VT_EN)
 
 #define GFX_FLSH_CNTL		0x2170 /* 915+ */
 
diff --git a/drivers/gpio/wm831x-gpio.c b/drivers/gpio/wm831x-gpio.c
index 309644c..2bcfb0b 100644
--- a/drivers/gpio/wm831x-gpio.c
+++ b/drivers/gpio/wm831x-gpio.c
@@ -180,6 +180,7 @@
 			break;
 		case WM831X_GPIO_PULL_UP:
 			pull = "pullup";
+			break;
 		default:
 			pull = "INVALID PULL";
 			break;
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index e178702..296fbd6 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1943,7 +1943,7 @@
 	if (!dev_priv->mm.gtt) {
 		DRM_ERROR("Failed to initialize GTT\n");
 		ret = -ENODEV;
-		goto out_iomapfree;
+		goto out_rmmap;
 	}
 
 	agp_size = dev_priv->mm.gtt->gtt_mappable_entries << PAGE_SHIFT;
@@ -1987,7 +1987,7 @@
 	if (dev_priv->wq == NULL) {
 		DRM_ERROR("Failed to create our workqueue.\n");
 		ret = -ENOMEM;
-		goto out_iomapfree;
+		goto out_mtrrfree;
 	}
 
 	/* enable GEM by default */
@@ -2074,13 +2074,21 @@
 	return 0;
 
 out_gem_unload:
+	if (dev_priv->mm.inactive_shrinker.shrink)
+		unregister_shrinker(&dev_priv->mm.inactive_shrinker);
+
 	if (dev->pdev->msi_enabled)
 		pci_disable_msi(dev->pdev);
 
 	intel_teardown_gmbus(dev);
 	intel_teardown_mchbar(dev);
 	destroy_workqueue(dev_priv->wq);
-out_iomapfree:
+out_mtrrfree:
+	if (dev_priv->mm.gtt_mtrr >= 0) {
+		mtrr_del(dev_priv->mm.gtt_mtrr, dev->agp->base,
+			 dev->agp->agp_info.aper_size * 1024 * 1024);
+		dev_priv->mm.gtt_mtrr = -1;
+	}
 	io_mapping_free(dev_priv->mm.gtt_mapping);
 out_rmmap:
 	pci_iounmap(dev->pdev, dev_priv->regs);
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 013d304..eb91e2d 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -52,7 +52,7 @@
 unsigned int i915_semaphores = 0;
 module_param_named(semaphores, i915_semaphores, int, 0600);
 
-unsigned int i915_enable_rc6 = 1;
+unsigned int i915_enable_rc6 = 0;
 module_param_named(i915_enable_rc6, i915_enable_rc6, int, 0600);
 
 unsigned int i915_enable_fbc = 0;
@@ -577,6 +577,7 @@
 	if (get_seconds() - dev_priv->last_gpu_reset < 5) {
 		DRM_ERROR("GPU hanging too fast, declaring wedged!\n");
 	} else switch (INTEL_INFO(dev)->gen) {
+	case 7:
 	case 6:
 		ret = gen6_do_reset(dev, flags);
 		/* If reset with a user forcewake, try to restore */
diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index 391b55f..e2aced6 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -50,7 +50,6 @@
 	bool has_audio;
 	int force_audio;
 	uint32_t color_range;
-	int dpms_mode;
 	uint8_t link_bw;
 	uint8_t lane_count;
 	uint8_t dpcd[4];
@@ -138,8 +137,8 @@
 {
 	int max_lane_count = 4;
 
-	if (intel_dp->dpcd[0] >= 0x11) {
-		max_lane_count = intel_dp->dpcd[2] & 0x1f;
+	if (intel_dp->dpcd[DP_DPCD_REV] >= 0x11) {
+		max_lane_count = intel_dp->dpcd[DP_MAX_LANE_COUNT] & 0x1f;
 		switch (max_lane_count) {
 		case 1: case 2: case 4:
 			break;
@@ -153,7 +152,7 @@
 static int
 intel_dp_max_link_bw(struct intel_dp *intel_dp)
 {
-	int max_link_bw = intel_dp->dpcd[1];
+	int max_link_bw = intel_dp->dpcd[DP_MAX_LINK_RATE];
 
 	switch (max_link_bw) {
 	case DP_LINK_BW_1_62:
@@ -774,7 +773,8 @@
 	/*
 	 * Check for DPCD version > 1.1 and enhanced framing support
 	 */
-	if (intel_dp->dpcd[0] >= 0x11 && (intel_dp->dpcd[2] & DP_ENHANCED_FRAME_CAP)) {
+	if (intel_dp->dpcd[DP_DPCD_REV] >= 0x11 &&
+	    (intel_dp->dpcd[DP_MAX_LANE_COUNT] & DP_ENHANCED_FRAME_CAP)) {
 		intel_dp->link_configuration[1] |= DP_LANE_COUNT_ENHANCED_FRAME_EN;
 		intel_dp->DP |= DP_ENHANCED_FRAMING;
 	}
@@ -942,11 +942,44 @@
 	udelay(200);
 }
 
+/* If the sink supports it, try to set the power state appropriately */
+static void intel_dp_sink_dpms(struct intel_dp *intel_dp, int mode)
+{
+	int ret, i;
+
+	/* Should have a valid DPCD by this point */
+	if (intel_dp->dpcd[DP_DPCD_REV] < 0x11)
+		return;
+
+	if (mode != DRM_MODE_DPMS_ON) {
+		ret = intel_dp_aux_native_write_1(intel_dp, DP_SET_POWER,
+						  DP_SET_POWER_D3);
+		if (ret != 1)
+			DRM_DEBUG_DRIVER("failed to write sink power state\n");
+	} else {
+		/*
+		 * When turning on, we need to retry for 1ms to give the sink
+		 * time to wake up.
+		 */
+		for (i = 0; i < 3; i++) {
+			ret = intel_dp_aux_native_write_1(intel_dp,
+							  DP_SET_POWER,
+							  DP_SET_POWER_D0);
+			if (ret == 1)
+				break;
+			msleep(1);
+		}
+	}
+}
+
 static void intel_dp_prepare(struct drm_encoder *encoder)
 {
 	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 	struct drm_device *dev = encoder->dev;
 
+	/* Wake up the sink first */
+	intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_ON);
+
 	if (is_edp(intel_dp)) {
 		ironlake_edp_backlight_off(dev);
 		ironlake_edp_panel_off(dev);
@@ -990,6 +1023,7 @@
 	if (mode != DRM_MODE_DPMS_ON) {
 		if (is_edp(intel_dp))
 			ironlake_edp_backlight_off(dev);
+		intel_dp_sink_dpms(intel_dp, mode);
 		intel_dp_link_down(intel_dp);
 		if (is_edp(intel_dp))
 			ironlake_edp_panel_off(dev);
@@ -998,6 +1032,7 @@
 	} else {
 		if (is_edp(intel_dp))
 			ironlake_edp_panel_vdd_on(intel_dp);
+		intel_dp_sink_dpms(intel_dp, mode);
 		if (!(dp_reg & DP_PORT_EN)) {
 			intel_dp_start_link_train(intel_dp);
 			if (is_edp(intel_dp)) {
@@ -1009,7 +1044,31 @@
 		if (is_edp(intel_dp))
 			ironlake_edp_backlight_on(dev);
 	}
-	intel_dp->dpms_mode = mode;
+}
+
+/*
+ * Native read with retry for link status and receiver capability reads for
+ * cases where the sink may still be asleep.
+ */
+static bool
+intel_dp_aux_native_read_retry(struct intel_dp *intel_dp, uint16_t address,
+			       uint8_t *recv, int recv_bytes)
+{
+	int ret, i;
+
+	/*
+	 * Sinks are *supposed* to come up within 1ms from an off state,
+	 * but we're also supposed to retry 3 times per the spec.
+	 */
+	for (i = 0; i < 3; i++) {
+		ret = intel_dp_aux_native_read(intel_dp, address, recv,
+					       recv_bytes);
+		if (ret == recv_bytes)
+			return true;
+		msleep(1);
+	}
+
+	return false;
 }
 
 /*
@@ -1019,14 +1078,10 @@
 static bool
 intel_dp_get_link_status(struct intel_dp *intel_dp)
 {
-	int ret;
-
-	ret = intel_dp_aux_native_read(intel_dp,
-				       DP_LANE0_1_STATUS,
-				       intel_dp->link_status, DP_LINK_STATUS_SIZE);
-	if (ret != DP_LINK_STATUS_SIZE)
-		return false;
-	return true;
+	return intel_dp_aux_native_read_retry(intel_dp,
+					      DP_LANE0_1_STATUS,
+					      intel_dp->link_status,
+					      DP_LINK_STATUS_SIZE);
 }
 
 static uint8_t
@@ -1515,6 +1570,8 @@
 static void
 intel_dp_check_link_status(struct intel_dp *intel_dp)
 {
+	int ret;
+
 	if (!intel_dp->base.base.crtc)
 		return;
 
@@ -1523,6 +1580,15 @@
 		return;
 	}
 
+	/* Try to read receiver status if the link appears to be up */
+	ret = intel_dp_aux_native_read(intel_dp,
+				       0x000, intel_dp->dpcd,
+				       sizeof (intel_dp->dpcd));
+	if (ret != sizeof(intel_dp->dpcd)) {
+		intel_dp_link_down(intel_dp);
+		return;
+	}
+
 	if (!intel_channel_eq_ok(intel_dp)) {
 		intel_dp_start_link_train(intel_dp);
 		intel_dp_complete_link_train(intel_dp);
@@ -1533,6 +1599,7 @@
 ironlake_dp_detect(struct intel_dp *intel_dp)
 {
 	enum drm_connector_status status;
+	bool ret;
 
 	/* Can't disconnect eDP, but you can close the lid... */
 	if (is_edp(intel_dp)) {
@@ -1543,13 +1610,11 @@
 	}
 
 	status = connector_status_disconnected;
-	if (intel_dp_aux_native_read(intel_dp,
-				     0x000, intel_dp->dpcd,
-				     sizeof (intel_dp->dpcd))
-	    == sizeof(intel_dp->dpcd)) {
-		if (intel_dp->dpcd[0] != 0)
-			status = connector_status_connected;
-	}
+	ret = intel_dp_aux_native_read_retry(intel_dp,
+					     0x000, intel_dp->dpcd,
+					     sizeof (intel_dp->dpcd));
+	if (ret && intel_dp->dpcd[DP_DPCD_REV] != 0)
+		status = connector_status_connected;
 	DRM_DEBUG_KMS("DPCD: %hx%hx%hx%hx\n", intel_dp->dpcd[0],
 		      intel_dp->dpcd[1], intel_dp->dpcd[2], intel_dp->dpcd[3]);
 	return status;
@@ -1586,7 +1651,7 @@
 	if (intel_dp_aux_native_read(intel_dp, 0x000, intel_dp->dpcd,
 				     sizeof (intel_dp->dpcd)) == sizeof (intel_dp->dpcd))
 	{
-		if (intel_dp->dpcd[0] != 0)
+		if (intel_dp->dpcd[DP_DPCD_REV] != 0)
 			status = connector_status_connected;
 	}
 
@@ -1790,8 +1855,7 @@
 {
 	struct intel_dp *intel_dp = container_of(intel_encoder, struct intel_dp, base);
 
-	if (intel_dp->dpms_mode == DRM_MODE_DPMS_ON)
-		intel_dp_check_link_status(intel_dp);
+	intel_dp_check_link_status(intel_dp);
 }
 
 /* Return which DP Port should be selected for Transcoder DP control */
@@ -1859,7 +1923,6 @@
 		return;
 
 	intel_dp->output_reg = output_reg;
-	intel_dp->dpms_mode = -1;
 
 	intel_connector = kzalloc(sizeof(struct intel_connector), GFP_KERNEL);
 	if (!intel_connector) {
@@ -1954,8 +2017,9 @@
 					       sizeof(intel_dp->dpcd));
 		ironlake_edp_panel_vdd_off(intel_dp);
 		if (ret == sizeof(intel_dp->dpcd)) {
-			if (intel_dp->dpcd[0] >= 0x11)
-				dev_priv->no_aux_handshake = intel_dp->dpcd[3] &
+			if (intel_dp->dpcd[DP_DPCD_REV] >= 0x11)
+				dev_priv->no_aux_handshake =
+					intel_dp->dpcd[DP_MAX_DOWNSPREAD] &
 					DP_NO_AUX_HANDSHAKE_LINK_TRAINING;
 		} else {
 			/* if this fails, presume the device is a ghost */
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index c0e0ee6..39ac2b6 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -165,7 +165,7 @@
 int __must_check intel_wait_ring_buffer(struct intel_ring_buffer *ring, int n);
 static inline int intel_wait_ring_idle(struct intel_ring_buffer *ring)
 {
-	return intel_wait_ring_buffer(ring, ring->space - 8);
+	return intel_wait_ring_buffer(ring, ring->size - 8);
 }
 
 int __must_check intel_ring_begin(struct intel_ring_buffer *ring, int n);
diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c
index 660f964..15bd047 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -2000,7 +2000,7 @@
 			gb_backend_map = 0x66442200;
 			break;
 		case CHIP_JUNIPER:
-			gb_backend_map = 0x00006420;
+			gb_backend_map = 0x00002200;
 			break;
 		default:
 			gb_backend_map =
diff --git a/drivers/gpu/drm/radeon/evergreen_blit_kms.c b/drivers/gpu/drm/radeon/evergreen_blit_kms.c
index 57f3bc1..2eb2518 100644
--- a/drivers/gpu/drm/radeon/evergreen_blit_kms.c
+++ b/drivers/gpu/drm/radeon/evergreen_blit_kms.c
@@ -252,7 +252,7 @@
 
 }
 
-/* emits 36 */
+/* emits 39 */
 static void
 set_default_state(struct radeon_device *rdev)
 {
@@ -531,6 +531,11 @@
 		radeon_ring_write(rdev, (SQ_DYN_GPR_CNTL_PS_FLUSH_REQ - PACKET3_SET_CONFIG_REG_START) >> 2);
 		radeon_ring_write(rdev, 0);
 
+		/* setup LDS */
+		radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1));
+		radeon_ring_write(rdev, (SQ_LDS_RESOURCE_MGMT - PACKET3_SET_CONFIG_REG_START) >> 2);
+		radeon_ring_write(rdev, 0x10001000);
+
 		/* SQ config */
 		radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 11));
 		radeon_ring_write(rdev, (SQ_CONFIG - PACKET3_SET_CONFIG_REG_START) >> 2);
@@ -773,7 +778,7 @@
 	/* calculate number of loops correctly */
 	ring_size = num_loops * dwords_per_loop;
 	/* set default  + shaders */
-	ring_size += 52; /* shaders + def state */
+	ring_size += 55; /* shaders + def state */
 	ring_size += 10; /* fence emit for VB IB */
 	ring_size += 5; /* done copy */
 	ring_size += 10; /* fence emit for done copy */
diff --git a/drivers/gpu/drm/radeon/radeon_bios.c b/drivers/gpu/drm/radeon/radeon_bios.c
index 3fc5fa1..229a20f 100644
--- a/drivers/gpu/drm/radeon/radeon_bios.c
+++ b/drivers/gpu/drm/radeon/radeon_bios.c
@@ -331,7 +331,7 @@
 
 	seprom_cntl1 = RREG32(RADEON_SEPROM_CNTL1);
 	viph_control = RREG32(RADEON_VIPH_CONTROL);
-	bus_cntl = RREG32(RADEON_BUS_CNTL);
+	bus_cntl = RREG32(RV370_BUS_CNTL);
 	d1vga_control = RREG32(AVIVO_D1VGA_CONTROL);
 	d2vga_control = RREG32(AVIVO_D2VGA_CONTROL);
 	vga_render_control = RREG32(AVIVO_VGA_RENDER_CONTROL);
@@ -350,7 +350,7 @@
 	WREG32(RADEON_VIPH_CONTROL, (viph_control & ~RADEON_VIPH_EN));
 
 	/* enable the rom */
-	WREG32(RADEON_BUS_CNTL, (bus_cntl & ~RADEON_BUS_BIOS_DIS_ROM));
+	WREG32(RV370_BUS_CNTL, (bus_cntl & ~RV370_BUS_BIOS_DIS_ROM));
 
 	/* Disable VGA mode */
 	WREG32(AVIVO_D1VGA_CONTROL,
@@ -367,7 +367,7 @@
 	/* restore regs */
 	WREG32(RADEON_SEPROM_CNTL1, seprom_cntl1);
 	WREG32(RADEON_VIPH_CONTROL, viph_control);
-	WREG32(RADEON_BUS_CNTL, bus_cntl);
+	WREG32(RV370_BUS_CNTL, bus_cntl);
 	WREG32(AVIVO_D1VGA_CONTROL, d1vga_control);
 	WREG32(AVIVO_D2VGA_CONTROL, d2vga_control);
 	WREG32(AVIVO_VGA_RENDER_CONTROL, vga_render_control);
@@ -390,7 +390,10 @@
 
 	seprom_cntl1 = RREG32(RADEON_SEPROM_CNTL1);
 	viph_control = RREG32(RADEON_VIPH_CONTROL);
-	bus_cntl = RREG32(RADEON_BUS_CNTL);
+	if (rdev->flags & RADEON_IS_PCIE)
+		bus_cntl = RREG32(RV370_BUS_CNTL);
+	else
+		bus_cntl = RREG32(RADEON_BUS_CNTL);
 	crtc_gen_cntl = RREG32(RADEON_CRTC_GEN_CNTL);
 	crtc2_gen_cntl = 0;
 	crtc_ext_cntl = RREG32(RADEON_CRTC_EXT_CNTL);
@@ -412,7 +415,10 @@
 	WREG32(RADEON_VIPH_CONTROL, (viph_control & ~RADEON_VIPH_EN));
 
 	/* enable the rom */
-	WREG32(RADEON_BUS_CNTL, (bus_cntl & ~RADEON_BUS_BIOS_DIS_ROM));
+	if (rdev->flags & RADEON_IS_PCIE)
+		WREG32(RV370_BUS_CNTL, (bus_cntl & ~RV370_BUS_BIOS_DIS_ROM));
+	else
+		WREG32(RADEON_BUS_CNTL, (bus_cntl & ~RADEON_BUS_BIOS_DIS_ROM));
 
 	/* Turn off mem requests and CRTC for both controllers */
 	WREG32(RADEON_CRTC_GEN_CNTL,
@@ -439,7 +445,10 @@
 	/* restore regs */
 	WREG32(RADEON_SEPROM_CNTL1, seprom_cntl1);
 	WREG32(RADEON_VIPH_CONTROL, viph_control);
-	WREG32(RADEON_BUS_CNTL, bus_cntl);
+	if (rdev->flags & RADEON_IS_PCIE)
+		WREG32(RV370_BUS_CNTL, bus_cntl);
+	else
+		WREG32(RADEON_BUS_CNTL, bus_cntl);
 	WREG32(RADEON_CRTC_GEN_CNTL, crtc_gen_cntl);
 	if (!(rdev->flags & RADEON_SINGLE_CRTC)) {
 		WREG32(RADEON_CRTC2_GEN_CNTL, crtc2_gen_cntl);
diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c
index cbfca3a..9792d4f 100644
--- a/drivers/gpu/drm/radeon/radeon_connectors.c
+++ b/drivers/gpu/drm/radeon/radeon_connectors.c
@@ -52,6 +52,12 @@
 	struct radeon_device *rdev = dev->dev_private;
 	struct radeon_connector *radeon_connector = to_radeon_connector(connector);
 
+	/* bail if the connector does not have hpd pin, e.g.,
+	 * VGA, TV, etc.
+	 */
+	if (radeon_connector->hpd.hpd == RADEON_HPD_NONE)
+		return;
+
 	radeon_hpd_set_polarity(rdev, radeon_connector->hpd.hpd);
 
 	/* powering up/down the eDP panel generates hpd events which
diff --git a/drivers/gpu/drm/radeon/radeon_reg.h b/drivers/gpu/drm/radeon/radeon_reg.h
index ec93a75..bc44a3d 100644
--- a/drivers/gpu/drm/radeon/radeon_reg.h
+++ b/drivers/gpu/drm/radeon/radeon_reg.h
@@ -300,6 +300,8 @@
 #       define RADEON_BUS_READ_BURST         (1 << 30)
 #define RADEON_BUS_CNTL1                    0x0034
 #       define RADEON_BUS_WAIT_ON_LOCK_EN    (1 << 4)
+#define RV370_BUS_CNTL                      0x004c
+#       define RV370_BUS_BIOS_DIS_ROM        (1 << 2)
 /* rv370/rv380, rv410, r423/r430/r480, r5xx */
 #define RADEON_MSI_REARM_EN		    0x0160
 #	define RV370_MSI_REARM_EN	     (1 << 0)
diff --git a/drivers/gpu/drm/radeon/rs600.c b/drivers/gpu/drm/radeon/rs600.c
index 6e3b11e..1f5850e 100644
--- a/drivers/gpu/drm/radeon/rs600.c
+++ b/drivers/gpu/drm/radeon/rs600.c
@@ -426,7 +426,7 @@
 	return radeon_gart_table_vram_alloc(rdev);
 }
 
-int rs600_gart_enable(struct radeon_device *rdev)
+static int rs600_gart_enable(struct radeon_device *rdev)
 {
 	u32 tmp;
 	int r, i;
@@ -440,8 +440,8 @@
 		return r;
 	radeon_gart_restore(rdev);
 	/* Enable bus master */
-	tmp = RREG32(R_00004C_BUS_CNTL) & C_00004C_BUS_MASTER_DIS;
-	WREG32(R_00004C_BUS_CNTL, tmp);
+	tmp = RREG32(RADEON_BUS_CNTL) & ~RS600_BUS_MASTER_DIS;
+	WREG32(RADEON_BUS_CNTL, tmp);
 	/* FIXME: setup default page */
 	WREG32_MC(R_000100_MC_PT0_CNTL,
 		  (S_000100_EFFECTIVE_L2_CACHE_SIZE(6) |
diff --git a/drivers/hwmon/adm1275.c b/drivers/hwmon/adm1275.c
index b9b7caf..8bc1bd6 100644
--- a/drivers/hwmon/adm1275.c
+++ b/drivers/hwmon/adm1275.c
@@ -53,23 +53,23 @@
 	info->direct[PSC_VOLTAGE_IN] = true;
 	info->direct[PSC_VOLTAGE_OUT] = true;
 	info->direct[PSC_CURRENT_OUT] = true;
-	info->m[PSC_CURRENT_OUT] = 800;
+	info->m[PSC_CURRENT_OUT] = 807;
 	info->b[PSC_CURRENT_OUT] = 20475;
 	info->R[PSC_CURRENT_OUT] = -1;
 	info->func[0] = PMBUS_HAVE_IOUT | PMBUS_HAVE_STATUS_IOUT;
 
 	if (config & ADM1275_VRANGE) {
-		info->m[PSC_VOLTAGE_IN] = 19045;
+		info->m[PSC_VOLTAGE_IN] = 19199;
 		info->b[PSC_VOLTAGE_IN] = 0;
 		info->R[PSC_VOLTAGE_IN] = -2;
-		info->m[PSC_VOLTAGE_OUT] = 19045;
+		info->m[PSC_VOLTAGE_OUT] = 19199;
 		info->b[PSC_VOLTAGE_OUT] = 0;
 		info->R[PSC_VOLTAGE_OUT] = -2;
 	} else {
-		info->m[PSC_VOLTAGE_IN] = 6666;
+		info->m[PSC_VOLTAGE_IN] = 6720;
 		info->b[PSC_VOLTAGE_IN] = 0;
 		info->R[PSC_VOLTAGE_IN] = -1;
-		info->m[PSC_VOLTAGE_OUT] = 6666;
+		info->m[PSC_VOLTAGE_OUT] = 6720;
 		info->b[PSC_VOLTAGE_OUT] = 0;
 		info->R[PSC_VOLTAGE_OUT] = -1;
 	}
diff --git a/drivers/hwmon/asus_atk0110.c b/drivers/hwmon/asus_atk0110.c
index dcb78a7..00e9851 100644
--- a/drivers/hwmon/asus_atk0110.c
+++ b/drivers/hwmon/asus_atk0110.c
@@ -674,6 +674,7 @@
 	else
 		err = -EIO;
 
+	ACPI_FREE(ret);
 	return err;
 }
 
diff --git a/drivers/hwmon/it87.c b/drivers/hwmon/it87.c
index bb6405b..5f52477 100644
--- a/drivers/hwmon/it87.c
+++ b/drivers/hwmon/it87.c
@@ -1538,7 +1538,7 @@
 };
 
 static const struct attribute_group it87_group_label = {
-	.attrs = it87_attributes_vid,
+	.attrs = it87_attributes_label,
 };
 
 /* SuperIO detection - will change isa_address if a chip is found */
diff --git a/drivers/hwmon/max1111.c b/drivers/hwmon/max1111.c
index 12a54aa..14335bb 100644
--- a/drivers/hwmon/max1111.c
+++ b/drivers/hwmon/max1111.c
@@ -40,6 +40,8 @@
 	struct spi_transfer	xfer[2];
 	uint8_t *tx_buf;
 	uint8_t *rx_buf;
+	struct mutex		drvdata_lock;
+	/* protect msg, xfer and buffers from multiple access */
 };
 
 static int max1111_read(struct device *dev, int channel)
@@ -48,6 +50,9 @@
 	uint8_t v1, v2;
 	int err;
 
+	/* writing to drvdata struct is not thread safe, wait on mutex */
+	mutex_lock(&data->drvdata_lock);
+
 	data->tx_buf[0] = (channel << MAX1111_CTRL_SEL_SH) |
 		MAX1111_CTRL_PD0 | MAX1111_CTRL_PD1 |
 		MAX1111_CTRL_SGL | MAX1111_CTRL_UNI | MAX1111_CTRL_STR;
@@ -55,12 +60,15 @@
 	err = spi_sync(data->spi, &data->msg);
 	if (err < 0) {
 		dev_err(dev, "spi_sync failed with %d\n", err);
+		mutex_unlock(&data->drvdata_lock);
 		return err;
 	}
 
 	v1 = data->rx_buf[0];
 	v2 = data->rx_buf[1];
 
+	mutex_unlock(&data->drvdata_lock);
+
 	if ((v1 & 0xc0) || (v2 & 0x3f))
 		return -EINVAL;
 
@@ -176,6 +184,8 @@
 	if (err)
 		goto err_free_data;
 
+	mutex_init(&data->drvdata_lock);
+
 	data->spi = spi;
 	spi_set_drvdata(spi, data);
 
@@ -213,6 +223,7 @@
 
 	hwmon_device_unregister(data->hwmon_dev);
 	sysfs_remove_group(&spi->dev.kobj, &max1111_attr_group);
+	mutex_destroy(&data->drvdata_lock);
 	kfree(data->rx_buf);
 	kfree(data->tx_buf);
 	kfree(data);
diff --git a/drivers/hwmon/pmbus_core.c b/drivers/hwmon/pmbus_core.c
index 744672c..8e31a8e 100644
--- a/drivers/hwmon/pmbus_core.c
+++ b/drivers/hwmon/pmbus_core.c
@@ -362,8 +362,8 @@
  * Convert linear sensor values to milli- or micro-units
  * depending on sensor type.
  */
-static int pmbus_reg2data_linear(struct pmbus_data *data,
-				 struct pmbus_sensor *sensor)
+static long pmbus_reg2data_linear(struct pmbus_data *data,
+				  struct pmbus_sensor *sensor)
 {
 	s16 exponent;
 	s32 mantissa;
@@ -397,15 +397,15 @@
 	else
 		val >>= -exponent;
 
-	return (int)val;
+	return val;
 }
 
 /*
  * Convert direct sensor values to milli- or micro-units
  * depending on sensor type.
  */
-static int pmbus_reg2data_direct(struct pmbus_data *data,
-				 struct pmbus_sensor *sensor)
+static long pmbus_reg2data_direct(struct pmbus_data *data,
+				  struct pmbus_sensor *sensor)
 {
 	long val = (s16) sensor->data;
 	long m, b, R;
@@ -440,12 +440,12 @@
 		R++;
 	}
 
-	return (int)((val - b) / m);
+	return (val - b) / m;
 }
 
-static int pmbus_reg2data(struct pmbus_data *data, struct pmbus_sensor *sensor)
+static long pmbus_reg2data(struct pmbus_data *data, struct pmbus_sensor *sensor)
 {
-	int val;
+	long val;
 
 	if (data->info->direct[sensor->class])
 		val = pmbus_reg2data_direct(data, sensor);
@@ -619,7 +619,7 @@
 	if (!s1 && !s2)
 		*val = !!regval;
 	else {
-		int v1, v2;
+		long v1, v2;
 		struct pmbus_sensor *sensor1, *sensor2;
 
 		sensor1 = &data->sensors[s1];
@@ -661,7 +661,7 @@
 	if (sensor->data < 0)
 		return sensor->data;
 
-	return snprintf(buf, PAGE_SIZE, "%d\n", pmbus_reg2data(data, sensor));
+	return snprintf(buf, PAGE_SIZE, "%ld\n", pmbus_reg2data(data, sensor));
 }
 
 static ssize_t pmbus_set_sensor(struct device *dev,
diff --git a/drivers/media/dvb/dvb-core/dvb_frontend.c b/drivers/media/dvb/dvb-core/dvb_frontend.c
index 9827804..5b6b451 100644
--- a/drivers/media/dvb/dvb-core/dvb_frontend.c
+++ b/drivers/media/dvb/dvb-core/dvb_frontend.c
@@ -1988,6 +1988,14 @@
 	if (dvbdev->users == -1 && fe->ops.ts_bus_ctrl) {
 		if ((ret = fe->ops.ts_bus_ctrl(fe, 1)) < 0)
 			goto err0;
+
+		/* If we took control of the bus, we need to force
+		   reinitialization.  This is because many ts_bus_ctrl()
+		   functions strobe the RESET pin on the demod, and if the
+		   frontend thread already exists then the dvb_init() routine
+		   won't get called (which is what usually does initial
+		   register configuration). */
+		fepriv->reinitialise = 1;
 	}
 
 	if ((ret = dvb_generic_open (inode, file)) < 0)
diff --git a/drivers/media/radio/Kconfig b/drivers/media/radio/Kconfig
index e4c97fd..52798a1 100644
--- a/drivers/media/radio/Kconfig
+++ b/drivers/media/radio/Kconfig
@@ -168,7 +168,7 @@
 
 config RADIO_MIROPCM20
 	tristate "miroSOUND PCM20 radio"
-	depends on ISA && VIDEO_V4L2 && SND
+	depends on ISA && ISA_DMA_API && VIDEO_V4L2 && SND
 	select SND_ISA
 	select SND_MIRO
 	---help---
@@ -201,7 +201,7 @@
 
 config RADIO_SF16FMR2
 	tristate "SF16FMR2 Radio"
-	depends on ISA && VIDEO_V4L2
+	depends on ISA && VIDEO_V4L2 && SND
 	---help---
 	  Choose Y here if you have one of these FM radio cards.
 
diff --git a/drivers/media/radio/si4713-i2c.c b/drivers/media/radio/si4713-i2c.c
index deca2e0..c9f4a8e6 100644
--- a/drivers/media/radio/si4713-i2c.c
+++ b/drivers/media/radio/si4713-i2c.c
@@ -1033,7 +1033,7 @@
 		char ps_name[MAX_RDS_PS_NAME + 1];
 
 		len = control->size - 1;
-		if (len > MAX_RDS_PS_NAME) {
+		if (len < 0 || len > MAX_RDS_PS_NAME) {
 			rval = -ERANGE;
 			goto exit;
 		}
@@ -1057,7 +1057,7 @@
 		char radio_text[MAX_RDS_RADIO_TEXT + 1];
 
 		len = control->size - 1;
-		if (len > MAX_RDS_RADIO_TEXT) {
+		if (len < 0 || len > MAX_RDS_RADIO_TEXT) {
 			rval = -ERANGE;
 			goto exit;
 		}
diff --git a/drivers/media/rc/mceusb.c b/drivers/media/rc/mceusb.c
index 06dfe09..ec972dc 100644
--- a/drivers/media/rc/mceusb.c
+++ b/drivers/media/rc/mceusb.c
@@ -558,9 +558,10 @@
 				 inout, data1);
 			break;
 		case MCE_CMD_S_TIMEOUT:
-			/* value is in units of 50us, so x*50/100 or x/2 ms */
+			/* value is in units of 50us, so x*50/1000 ms */
 			dev_info(dev, "%s receive timeout of %d ms\n",
-				 inout, ((data1 << 8) | data2) / 2);
+				 inout,
+				 ((data1 << 8) | data2) * MCE_TIME_UNIT / 1000);
 			break;
 		case MCE_CMD_G_TIMEOUT:
 			dev_info(dev, "Get receive timeout\n");
@@ -847,7 +848,7 @@
 	switch (ir->buf_in[index]) {
 	/* 2-byte return value commands */
 	case MCE_CMD_S_TIMEOUT:
-		ir->rc->timeout = US_TO_NS((hi << 8 | lo) / 2);
+		ir->rc->timeout = US_TO_NS((hi << 8 | lo) * MCE_TIME_UNIT);
 		break;
 
 	/* 1-byte return value commands */
@@ -1078,7 +1079,7 @@
 	rc->priv = ir;
 	rc->driver_type = RC_DRIVER_IR_RAW;
 	rc->allowed_protos = RC_TYPE_ALL;
-	rc->timeout = US_TO_NS(1000);
+	rc->timeout = MS_TO_NS(100);
 	if (!ir->flags.no_tx) {
 		rc->s_tx_mask = mceusb_set_tx_mask;
 		rc->s_tx_carrier = mceusb_set_tx_carrier;
diff --git a/drivers/media/rc/nuvoton-cir.c b/drivers/media/rc/nuvoton-cir.c
index 565f24c..ce595f9 100644
--- a/drivers/media/rc/nuvoton-cir.c
+++ b/drivers/media/rc/nuvoton-cir.c
@@ -1110,7 +1110,7 @@
 	rdev->dev.parent = &pdev->dev;
 	rdev->driver_name = NVT_DRIVER_NAME;
 	rdev->map_name = RC_MAP_RC6_MCE;
-	rdev->timeout = US_TO_NS(1000);
+	rdev->timeout = MS_TO_NS(100);
 	/* rx resolution is hardwired to 50us atm, 1, 25, 100 also possible */
 	rdev->rx_resolution = US_TO_NS(CIR_SAMPLE_PERIOD);
 #if 0
diff --git a/drivers/media/video/cx23885/cx23885-core.c b/drivers/media/video/cx23885/cx23885-core.c
index 64d9b21..419777a 100644
--- a/drivers/media/video/cx23885/cx23885-core.c
+++ b/drivers/media/video/cx23885/cx23885-core.c
@@ -2060,12 +2060,8 @@
 		goto fail_irq;
 	}
 
-	if (!pci_enable_msi(pci_dev))
-		err = request_irq(pci_dev->irq, cx23885_irq,
-				  IRQF_DISABLED, dev->name, dev);
-	else
-		err = request_irq(pci_dev->irq, cx23885_irq,
-				  IRQF_SHARED | IRQF_DISABLED, dev->name, dev);
+	err = request_irq(pci_dev->irq, cx23885_irq,
+			  IRQF_SHARED | IRQF_DISABLED, dev->name, dev);
 	if (err < 0) {
 		printk(KERN_ERR "%s: can't get IRQ %d\n",
 		       dev->name, pci_dev->irq);
@@ -2114,7 +2110,6 @@
 
 	/* unregister stuff */
 	free_irq(pci_dev->irq, dev);
-	pci_disable_msi(pci_dev);
 
 	cx23885_dev_unregister(dev);
 	v4l2_device_unregister(v4l2_dev);
diff --git a/drivers/media/video/tuner-core.c b/drivers/media/video/tuner-core.c
index cfa9f7e..a03945a 100644
--- a/drivers/media/video/tuner-core.c
+++ b/drivers/media/video/tuner-core.c
@@ -714,10 +714,19 @@
  * returns 0.
  * This function is needed for boards that have a separate tuner for
  * radio (like devices with tea5767).
+ * NOTE: mt20xx uses V4L2_TUNER_DIGITAL_TV and calls set_tv_freq to
+ *       select a TV frequency. So, t_mode = T_ANALOG_TV could actually
+ *	 be used to represent a Digital TV too.
  */
 static inline int check_mode(struct tuner *t, enum v4l2_tuner_type mode)
 {
-	if ((1 << mode & t->mode_mask) == 0)
+	int t_mode;
+	if (mode == V4L2_TUNER_RADIO)
+		t_mode = T_RADIO;
+	else
+		t_mode = T_ANALOG_TV;
+
+	if ((t_mode & t->mode_mask) == 0)
 		return -EINVAL;
 
 	return 0;
@@ -984,7 +993,7 @@
 	case V4L2_TUNER_RADIO:
 		p = "radio";
 		break;
-	case V4L2_TUNER_DIGITAL_TV:
+	case V4L2_TUNER_DIGITAL_TV: /* Used by mt20xx */
 		p = "digital TV";
 		break;
 	case V4L2_TUNER_ANALOG_TV:
@@ -1135,9 +1144,8 @@
 		return 0;
 	if (vt->type == t->mode && analog_ops->get_afc)
 		vt->afc = analog_ops->get_afc(&t->fe);
-	if (vt->type == V4L2_TUNER_ANALOG_TV)
+	if (t->mode != V4L2_TUNER_RADIO) {
 		vt->capability |= V4L2_TUNER_CAP_NORM;
-	if (vt->type != V4L2_TUNER_RADIO) {
 		vt->rangelow = tv_range[0] * 16;
 		vt->rangehigh = tv_range[1] * 16;
 		return 0;
diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index 2a7e43b..aa7d1d7 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -247,12 +247,12 @@
 		return 0;
 
 	/* Version is coded in the CSD_STRUCTURE byte in the EXT_CSD register */
+	card->ext_csd.raw_ext_csd_structure = ext_csd[EXT_CSD_STRUCTURE];
 	if (card->csd.structure == 3) {
-		int ext_csd_struct = ext_csd[EXT_CSD_STRUCTURE];
-		if (ext_csd_struct > 2) {
+		if (card->ext_csd.raw_ext_csd_structure > 2) {
 			printk(KERN_ERR "%s: unrecognised EXT_CSD structure "
 				"version %d\n", mmc_hostname(card->host),
-					ext_csd_struct);
+					card->ext_csd.raw_ext_csd_structure);
 			err = -EINVAL;
 			goto out;
 		}
@@ -266,6 +266,10 @@
 		goto out;
 	}
 
+	card->ext_csd.raw_sectors[0] = ext_csd[EXT_CSD_SEC_CNT + 0];
+	card->ext_csd.raw_sectors[1] = ext_csd[EXT_CSD_SEC_CNT + 1];
+	card->ext_csd.raw_sectors[2] = ext_csd[EXT_CSD_SEC_CNT + 2];
+	card->ext_csd.raw_sectors[3] = ext_csd[EXT_CSD_SEC_CNT + 3];
 	if (card->ext_csd.rev >= 2) {
 		card->ext_csd.sectors =
 			ext_csd[EXT_CSD_SEC_CNT + 0] << 0 |
@@ -277,7 +281,7 @@
 		if (card->ext_csd.sectors > (2u * 1024 * 1024 * 1024) / 512)
 			mmc_card_set_blockaddr(card);
 	}
-
+	card->ext_csd.raw_card_type = ext_csd[EXT_CSD_CARD_TYPE];
 	switch (ext_csd[EXT_CSD_CARD_TYPE] & EXT_CSD_CARD_TYPE_MASK) {
 	case EXT_CSD_CARD_TYPE_DDR_52 | EXT_CSD_CARD_TYPE_52 |
 	     EXT_CSD_CARD_TYPE_26:
@@ -307,6 +311,11 @@
 			mmc_hostname(card->host));
 	}
 
+	card->ext_csd.raw_s_a_timeout = ext_csd[EXT_CSD_S_A_TIMEOUT];
+	card->ext_csd.raw_erase_timeout_mult =
+		ext_csd[EXT_CSD_ERASE_TIMEOUT_MULT];
+	card->ext_csd.raw_hc_erase_grp_size =
+		ext_csd[EXT_CSD_HC_ERASE_GRP_SIZE];
 	if (card->ext_csd.rev >= 3) {
 		u8 sa_shift = ext_csd[EXT_CSD_S_A_TIMEOUT];
 		card->ext_csd.part_config = ext_csd[EXT_CSD_PART_CONFIG];
@@ -334,6 +343,16 @@
 		card->ext_csd.boot_size = ext_csd[EXT_CSD_BOOT_MULT] << 17;
 	}
 
+	card->ext_csd.raw_hc_erase_gap_size =
+		ext_csd[EXT_CSD_PARTITION_ATTRIBUTE];
+	card->ext_csd.raw_sec_trim_mult =
+		ext_csd[EXT_CSD_SEC_TRIM_MULT];
+	card->ext_csd.raw_sec_erase_mult =
+		ext_csd[EXT_CSD_SEC_ERASE_MULT];
+	card->ext_csd.raw_sec_feature_support =
+		ext_csd[EXT_CSD_SEC_FEATURE_SUPPORT];
+	card->ext_csd.raw_trim_mult =
+		ext_csd[EXT_CSD_TRIM_MULT];
 	if (card->ext_csd.rev >= 4) {
 		/*
 		 * Enhanced area feature support -- check whether the eMMC
@@ -341,7 +360,7 @@
 		 * area offset and size to user by adding sysfs interface.
 		 */
 		if ((ext_csd[EXT_CSD_PARTITION_SUPPORT] & 0x2) &&
-				(ext_csd[EXT_CSD_PARTITION_ATTRIBUTE] & 0x1)) {
+		    (ext_csd[EXT_CSD_PARTITION_ATTRIBUTE] & 0x1)) {
 			u8 hc_erase_grp_sz =
 				ext_csd[EXT_CSD_HC_ERASE_GRP_SIZE];
 			u8 hc_wp_grp_sz =
@@ -401,17 +420,17 @@
 }
 
 
-static int mmc_compare_ext_csds(struct mmc_card *card, u8 *ext_csd,
-			unsigned bus_width)
+static int mmc_compare_ext_csds(struct mmc_card *card, unsigned bus_width)
 {
 	u8 *bw_ext_csd;
 	int err;
 
-	err = mmc_get_ext_csd(card, &bw_ext_csd);
-	if (err)
-		return err;
+	if (bus_width == MMC_BUS_WIDTH_1)
+		return 0;
 
-	if ((ext_csd == NULL || bw_ext_csd == NULL)) {
+	err = mmc_get_ext_csd(card, &bw_ext_csd);
+
+	if (err || bw_ext_csd == NULL) {
 		if (bus_width != MMC_BUS_WIDTH_1)
 			err = -EINVAL;
 		goto out;
@@ -421,35 +440,40 @@
 		goto out;
 
 	/* only compare read only fields */
-	err = (!(ext_csd[EXT_CSD_PARTITION_SUPPORT] ==
+	err = (!(card->ext_csd.raw_partition_support ==
 			bw_ext_csd[EXT_CSD_PARTITION_SUPPORT]) &&
-		(ext_csd[EXT_CSD_ERASED_MEM_CONT] ==
+		(card->ext_csd.raw_erased_mem_count ==
 			bw_ext_csd[EXT_CSD_ERASED_MEM_CONT]) &&
-		(ext_csd[EXT_CSD_REV] ==
+		(card->ext_csd.rev ==
 			bw_ext_csd[EXT_CSD_REV]) &&
-		(ext_csd[EXT_CSD_STRUCTURE] ==
+		(card->ext_csd.raw_ext_csd_structure ==
 			bw_ext_csd[EXT_CSD_STRUCTURE]) &&
-		(ext_csd[EXT_CSD_CARD_TYPE] ==
+		(card->ext_csd.raw_card_type ==
 			bw_ext_csd[EXT_CSD_CARD_TYPE]) &&
-		(ext_csd[EXT_CSD_S_A_TIMEOUT] ==
+		(card->ext_csd.raw_s_a_timeout ==
 			bw_ext_csd[EXT_CSD_S_A_TIMEOUT]) &&
-		(ext_csd[EXT_CSD_HC_WP_GRP_SIZE] ==
+		(card->ext_csd.raw_hc_erase_gap_size ==
 			bw_ext_csd[EXT_CSD_HC_WP_GRP_SIZE]) &&
-		(ext_csd[EXT_CSD_ERASE_TIMEOUT_MULT] ==
+		(card->ext_csd.raw_erase_timeout_mult ==
 			bw_ext_csd[EXT_CSD_ERASE_TIMEOUT_MULT]) &&
-		(ext_csd[EXT_CSD_HC_ERASE_GRP_SIZE] ==
+		(card->ext_csd.raw_hc_erase_grp_size ==
 			bw_ext_csd[EXT_CSD_HC_ERASE_GRP_SIZE]) &&
-		(ext_csd[EXT_CSD_SEC_TRIM_MULT] ==
+		(card->ext_csd.raw_sec_trim_mult ==
 			bw_ext_csd[EXT_CSD_SEC_TRIM_MULT]) &&
-		(ext_csd[EXT_CSD_SEC_ERASE_MULT] ==
+		(card->ext_csd.raw_sec_erase_mult ==
 			bw_ext_csd[EXT_CSD_SEC_ERASE_MULT]) &&
-		(ext_csd[EXT_CSD_SEC_FEATURE_SUPPORT] ==
+		(card->ext_csd.raw_sec_feature_support ==
 			bw_ext_csd[EXT_CSD_SEC_FEATURE_SUPPORT]) &&
-		(ext_csd[EXT_CSD_TRIM_MULT] ==
+		(card->ext_csd.raw_trim_mult ==
 			bw_ext_csd[EXT_CSD_TRIM_MULT]) &&
-		memcmp(&ext_csd[EXT_CSD_SEC_CNT],
-		       &bw_ext_csd[EXT_CSD_SEC_CNT],
-		       4) != 0);
+		(card->ext_csd.raw_sectors[0] ==
+			bw_ext_csd[EXT_CSD_SEC_CNT + 0]) &&
+		(card->ext_csd.raw_sectors[1] ==
+			bw_ext_csd[EXT_CSD_SEC_CNT + 1]) &&
+		(card->ext_csd.raw_sectors[2] ==
+			bw_ext_csd[EXT_CSD_SEC_CNT + 2]) &&
+		(card->ext_csd.raw_sectors[3] ==
+			bw_ext_csd[EXT_CSD_SEC_CNT + 3]));
 	if (err)
 		err = -EINVAL;
 
@@ -770,7 +794,6 @@
 				 */
 				if (!(host->caps & MMC_CAP_BUS_WIDTH_TEST))
 					err = mmc_compare_ext_csds(card,
-						ext_csd,
 						bus_width);
 				else
 					err = mmc_bus_test(card, bus_width);
diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c
index fe14072..50f4f77 100644
--- a/drivers/mmc/host/mmci.c
+++ b/drivers/mmc/host/mmci.c
@@ -1063,7 +1063,15 @@
 	}
 
 	mmc->ops = &mmci_ops;
-	mmc->f_min = (host->mclk + 511) / 512;
+	/*
+	 * The ARM and ST versions of the block have slightly different
+	 * clock divider equations which means that the minimum divider
+	 * differs too.
+	 */
+	if (variant->st_clkdiv)
+		mmc->f_min = DIV_ROUND_UP(host->mclk, 257);
+	else
+		mmc->f_min = DIV_ROUND_UP(host->mclk, 512);
 	/*
 	 * If the platform data supplies a maximum operating
 	 * frequency, this takes precedence. Else, we fall back
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index eafe44a..63c22b0 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1428,9 +1428,9 @@
 	return features;
 }
 
-#define BOND_VLAN_FEATURES	(NETIF_F_ALL_TX_OFFLOADS | \
-				 NETIF_F_SOFT_FEATURES | \
-				 NETIF_F_LRO)
+#define BOND_VLAN_FEATURES	(NETIF_F_ALL_CSUM | NETIF_F_SG | \
+				 NETIF_F_FRAGLIST | NETIF_F_ALL_TSO | \
+				 NETIF_F_HIGHDMA | NETIF_F_LRO)
 
 static void bond_compute_features(struct bonding *bond)
 {
diff --git a/drivers/net/gianfar.c b/drivers/net/gianfar.c
index 2dfcc80..dfa55f9 100644
--- a/drivers/net/gianfar.c
+++ b/drivers/net/gianfar.c
@@ -2289,6 +2289,23 @@
 	return 0;
 }
 
+/* Check if rx parser should be activated */
+void gfar_check_rx_parser_mode(struct gfar_private *priv)
+{
+	struct gfar __iomem *regs;
+	u32 tempval;
+
+	regs = priv->gfargrp[0].regs;
+
+	tempval = gfar_read(&regs->rctrl);
+	/* If parse is no longer required, then disable parser */
+	if (tempval & RCTRL_REQ_PARSER)
+		tempval |= RCTRL_PRSDEP_INIT;
+	else
+		tempval &= ~RCTRL_PRSDEP_INIT;
+	gfar_write(&regs->rctrl, tempval);
+}
+
 
 /* Enables and disables VLAN insertion/extraction */
 static void gfar_vlan_rx_register(struct net_device *dev,
@@ -2325,12 +2342,9 @@
 		/* Disable VLAN tag extraction */
 		tempval = gfar_read(&regs->rctrl);
 		tempval &= ~RCTRL_VLEX;
-		/* If parse is no longer required, then disable parser */
-		if (tempval & RCTRL_REQ_PARSER)
-			tempval |= RCTRL_PRSDEP_INIT;
-		else
-			tempval &= ~RCTRL_PRSDEP_INIT;
 		gfar_write(&regs->rctrl, tempval);
+
+		gfar_check_rx_parser_mode(priv);
 	}
 
 	gfar_change_mtu(dev, dev->mtu);
diff --git a/drivers/net/gianfar.h b/drivers/net/gianfar.h
index ba36dc7..440e69d 100644
--- a/drivers/net/gianfar.h
+++ b/drivers/net/gianfar.h
@@ -274,7 +274,7 @@
 #define RCTRL_PROM		0x00000008
 #define RCTRL_EMEN		0x00000002
 #define RCTRL_REQ_PARSER	(RCTRL_VLEX | RCTRL_IPCSEN | \
-				 RCTRL_TUCSEN)
+				 RCTRL_TUCSEN | RCTRL_FILREN)
 #define RCTRL_CHECKSUMMING	(RCTRL_IPCSEN | RCTRL_TUCSEN | \
 				RCTRL_PRSDEP_INIT)
 #define RCTRL_EXTHASH		(RCTRL_GHTX)
@@ -1156,6 +1156,7 @@
 		unsigned long tx_mask, unsigned long rx_mask);
 void gfar_init_sysfs(struct net_device *dev);
 int gfar_set_features(struct net_device *dev, u32 features);
+extern void gfar_check_rx_parser_mode(struct gfar_private *priv);
 
 extern const struct ethtool_ops gfar_ethtool_ops;
 
diff --git a/drivers/net/natsemi.c b/drivers/net/natsemi.c
index 8f8b65a..60f46bc 100644
--- a/drivers/net/natsemi.c
+++ b/drivers/net/natsemi.c
@@ -140,7 +140,7 @@
 module_param(mtu, int, 0);
 module_param(debug, int, 0);
 module_param(rx_copybreak, int, 0);
-module_param(dspcfg_workaround, int, 1);
+module_param(dspcfg_workaround, int, 0);
 module_param_array(options, int, NULL, 0);
 module_param_array(full_duplex, int, NULL, 0);
 MODULE_PARM_DESC(mtu, "DP8381x MTU (all boards)");
@@ -2028,8 +2028,8 @@
 		np->rx_ring[i].cmd_status = 0;
 		np->rx_ring[i].addr = cpu_to_le32(0xBADF00D0); /* An invalid address. */
 		if (np->rx_skbuff[i]) {
-			pci_unmap_single(np->pci_dev,
-				np->rx_dma[i], buflen,
+			pci_unmap_single(np->pci_dev, np->rx_dma[i],
+				buflen + NATSEMI_PADDING,
 				PCI_DMA_FROMDEVICE);
 			dev_kfree_skb(np->rx_skbuff[i]);
 		}
diff --git a/drivers/net/pppoe.c b/drivers/net/pppoe.c
index 718879b..bc9a4bb 100644
--- a/drivers/net/pppoe.c
+++ b/drivers/net/pppoe.c
@@ -348,8 +348,9 @@
 
 	/* Only look at sockets that are using this specific device. */
 	switch (event) {
+	case NETDEV_CHANGEADDR:
 	case NETDEV_CHANGEMTU:
-		/* A change in mtu is a bad thing, requiring
+		/* A change in mtu or address is a bad thing, requiring
 		 * LCP re-negotiation.
 		 */
 
diff --git a/drivers/net/r6040.c b/drivers/net/r6040.c
index 200a363..0ffec46 100644
--- a/drivers/net/r6040.c
+++ b/drivers/net/r6040.c
@@ -677,9 +677,11 @@
 		if (status & RX_FIFO_FULL)
 			dev->stats.rx_fifo_errors++;
 
-		/* Mask off RX interrupt */
-		misr &= ~RX_INTS;
-		napi_schedule(&lp->napi);
+		if (likely(napi_schedule_prep(&lp->napi))) {
+			/* Mask off RX interrupt */
+			misr &= ~RX_INTS;
+			__napi_schedule(&lp->napi);
+		}
 	}
 
 	/* TX interrupt request */
diff --git a/drivers/net/slip.c b/drivers/net/slip.c
index 8ec1a9a..2f110fb 100644
--- a/drivers/net/slip.c
+++ b/drivers/net/slip.c
@@ -182,11 +182,11 @@
 #ifdef SL_INCLUDE_CSLIP
 	cbuff = xchg(&sl->cbuff, cbuff);
 	slcomp = xchg(&sl->slcomp, slcomp);
+#endif
 #ifdef CONFIG_SLIP_MODE_SLIP6
 	sl->xdata    = 0;
 	sl->xbits    = 0;
 #endif
-#endif
 	spin_unlock_bh(&sl->lock);
 	err = 0;
 
diff --git a/drivers/net/tulip/dmfe.c b/drivers/net/tulip/dmfe.c
index 4685127..9a21ca3 100644
--- a/drivers/net/tulip/dmfe.c
+++ b/drivers/net/tulip/dmfe.c
@@ -879,7 +879,6 @@
 	txptr = db->tx_remove_ptr;
 	while(db->tx_packet_cnt) {
 		tdes0 = le32_to_cpu(txptr->tdes0);
-		pr_debug("tdes0=%x\n", tdes0);
 		if (tdes0 & 0x80000000)
 			break;
 
@@ -889,7 +888,6 @@
 
 		/* Transmit statistic counter */
 		if ( tdes0 != 0x7fffffff ) {
-			pr_debug("tdes0=%x\n", tdes0);
 			dev->stats.collisions += (tdes0 >> 3) & 0xf;
 			dev->stats.tx_bytes += le32_to_cpu(txptr->tdes1) & 0x7ff;
 			if (tdes0 & TDES0_ERR_MASK) {
@@ -986,7 +984,6 @@
 			/* error summary bit check */
 			if (rdes0 & 0x8000) {
 				/* This is a error packet */
-				pr_debug("rdes0: %x\n", rdes0);
 				dev->stats.rx_errors++;
 				if (rdes0 & 1)
 					dev->stats.rx_fifo_errors++;
@@ -1638,7 +1635,6 @@
 		else 				/* DM9102/DM9102A */
 			phy_mode = phy_read(db->ioaddr,
 				    db->phy_addr, 17, db->chip_id) & 0xf000;
-		pr_debug("Phy_mode %x\n", phy_mode);
 		switch (phy_mode) {
 		case 0x1000: db->op_mode = DMFE_10MHF; break;
 		case 0x2000: db->op_mode = DMFE_10MFD; break;
diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c
index 387ca43..304fe78 100644
--- a/drivers/net/usb/hso.c
+++ b/drivers/net/usb/hso.c
@@ -2421,10 +2421,8 @@
 
 	remove_net_device(hso_net->parent);
 
-	if (hso_net->net) {
+	if (hso_net->net)
 		unregister_netdev(hso_net->net);
-		free_netdev(hso_net->net);
-	}
 
 	/* start freeing */
 	for (i = 0; i < MUX_BULK_RX_BUF_COUNT; i++) {
@@ -2436,6 +2434,9 @@
 	kfree(hso_net->mux_bulk_tx_buf);
 	hso_net->mux_bulk_tx_buf = NULL;
 
+	if (hso_net->net)
+		free_netdev(hso_net->net);
+
 	kfree(hso_dev);
 }
 
diff --git a/drivers/net/wireless/ath/ath5k/pci.c b/drivers/net/wireless/ath/ath5k/pci.c
index 296c316..f2c0c23 100644
--- a/drivers/net/wireless/ath/ath5k/pci.c
+++ b/drivers/net/wireless/ath/ath5k/pci.c
@@ -297,7 +297,9 @@
 #ifdef CONFIG_PM_SLEEP
 static int ath5k_pci_suspend(struct device *dev)
 {
-	struct ath5k_softc *sc = pci_get_drvdata(to_pci_dev(dev));
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct ieee80211_hw *hw = pci_get_drvdata(pdev);
+	struct ath5k_softc *sc = hw->priv;
 
 	ath5k_led_off(sc);
 	return 0;
@@ -306,7 +308,8 @@
 static int ath5k_pci_resume(struct device *dev)
 {
 	struct pci_dev *pdev = to_pci_dev(dev);
-	struct ath5k_softc *sc = pci_get_drvdata(pdev);
+	struct ieee80211_hw *hw = pci_get_drvdata(pdev);
+	struct ath5k_softc *sc = hw->priv;
 
 	/*
 	 * Suspend/Resume resets the PCI configuration space, so we have to
diff --git a/drivers/net/wireless/ath/ath5k/sysfs.c b/drivers/net/wireless/ath/ath5k/sysfs.c
index 929c68c..a073cdc 100644
--- a/drivers/net/wireless/ath/ath5k/sysfs.c
+++ b/drivers/net/wireless/ath/ath5k/sysfs.c
@@ -10,7 +10,8 @@
 			struct device_attribute *attr,			\
 			char *buf)					\
 {									\
-	struct ath5k_softc *sc = dev_get_drvdata(dev);			\
+	struct ieee80211_hw *hw = dev_get_drvdata(dev);			\
+	struct ath5k_softc *sc = hw->priv;				\
 	return snprintf(buf, PAGE_SIZE, "%d\n", get); 			\
 }									\
 									\
@@ -18,7 +19,8 @@
 			struct device_attribute *attr,			\
 			const char *buf, size_t count)			\
 {									\
-	struct ath5k_softc *sc = dev_get_drvdata(dev);			\
+	struct ieee80211_hw *hw = dev_get_drvdata(dev);			\
+	struct ath5k_softc *sc = hw->priv;				\
 	int val;							\
 									\
 	val = (int)simple_strtoul(buf, NULL, 10);			\
@@ -33,7 +35,8 @@
 			struct device_attribute *attr,			\
 			char *buf)					\
 {									\
-	struct ath5k_softc *sc = dev_get_drvdata(dev);			\
+	struct ieee80211_hw *hw = dev_get_drvdata(dev);			\
+	struct ath5k_softc *sc = hw->priv;				\
 	return snprintf(buf, PAGE_SIZE, "%d\n", get); 			\
 }									\
 static DEVICE_ATTR(name, S_IRUGO, ath5k_attr_show_##name, NULL)
diff --git a/drivers/net/wireless/ath/ath9k/xmit.c b/drivers/net/wireless/ath/ath9k/xmit.c
index 3779b89..33443bc 100644
--- a/drivers/net/wireless/ath/ath9k/xmit.c
+++ b/drivers/net/wireless/ath/ath9k/xmit.c
@@ -671,7 +671,8 @@
 	 * TODO - this could be improved to be dependent on the rate.
 	 *      The hardware can keep up at lower rates, but not higher rates
 	 */
-	if (fi->keyix != ATH9K_TXKEYIX_INVALID)
+	if ((fi->keyix != ATH9K_TXKEYIX_INVALID) &&
+	    !(sc->sc_ah->caps.hw_caps & ATH9K_HW_CAP_EDMA))
 		ndelim += ATH_AGGR_ENCRYPTDELIM;
 
 	/*
diff --git a/drivers/net/wireless/ath/carl9170/usb.c b/drivers/net/wireless/ath/carl9170/usb.c
index 2fb53d0..333b69e 100644
--- a/drivers/net/wireless/ath/carl9170/usb.c
+++ b/drivers/net/wireless/ath/carl9170/usb.c
@@ -112,6 +112,8 @@
 	{ USB_DEVICE(0x04bb, 0x093f) },
 	/* NEC WL300NU-G */
 	{ USB_DEVICE(0x0409, 0x0249) },
+	/* NEC WL300NU-AG */
+	{ USB_DEVICE(0x0409, 0x02b4) },
 	/* AVM FRITZ!WLAN USB Stick N */
 	{ USB_DEVICE(0x057c, 0x8401) },
 	/* AVM FRITZ!WLAN USB Stick N 2.4 */
diff --git a/drivers/net/wireless/rtlwifi/rtl8192cu/sw.c b/drivers/net/wireless/rtlwifi/rtl8192cu/sw.c
index 092e342..942f7a3 100644
--- a/drivers/net/wireless/rtlwifi/rtl8192cu/sw.c
+++ b/drivers/net/wireless/rtlwifi/rtl8192cu/sw.c
@@ -298,6 +298,7 @@
 	{RTL_USB_DEVICE(0x06f8, 0xe033, rtl92cu_hal_cfg)}, /*Hercules - Edimax*/
 	{RTL_USB_DEVICE(0x07b8, 0x8188, rtl92cu_hal_cfg)}, /*Abocom - Abocom*/
 	{RTL_USB_DEVICE(0x07b8, 0x8189, rtl92cu_hal_cfg)}, /*Funai - Abocom*/
+	{RTL_USB_DEVICE(0x0846, 0x9041, rtl92cu_hal_cfg)}, /*NetGear WNA1000M*/
 	{RTL_USB_DEVICE(0x0Df6, 0x0052, rtl92cu_hal_cfg)}, /*Sitecom - Edimax*/
 	{RTL_USB_DEVICE(0x0eb0, 0x9071, rtl92cu_hal_cfg)}, /*NO Brand - Etop*/
 	/* HP - Lite-On ,8188CUS Slim Combo */
diff --git a/drivers/pcmcia/pxa2xx_vpac270.c b/drivers/pcmcia/pxa2xx_vpac270.c
index 712baab..e956f65 100644
--- a/drivers/pcmcia/pxa2xx_vpac270.c
+++ b/drivers/pcmcia/pxa2xx_vpac270.c
@@ -76,10 +76,10 @@
 static void vpac270_pcmcia_hw_shutdown(struct soc_pcmcia_socket *skt)
 {
 	if (skt->nr == 0)
-		gpio_request_array(vpac270_pcmcia_gpios,
+		gpio_free_array(vpac270_pcmcia_gpios,
 					ARRAY_SIZE(vpac270_pcmcia_gpios));
 	else
-		gpio_request_array(vpac270_cf_gpios,
+		gpio_free_array(vpac270_cf_gpios,
 					ARRAY_SIZE(vpac270_cf_gpios));
 }
 
diff --git a/drivers/ssb/driver_pcicore.c b/drivers/ssb/driver_pcicore.c
index 2a20dab..d6620ad3 100644
--- a/drivers/ssb/driver_pcicore.c
+++ b/drivers/ssb/driver_pcicore.c
@@ -516,8 +516,17 @@
 
 static void ssb_pcicore_init_clientmode(struct ssb_pcicore *pc)
 {
+	ssb_pcicore_fix_sprom_core_index(pc);
+
 	/* Disable PCI interrupts. */
 	ssb_write32(pc->dev, SSB_INTVEC, 0);
+
+	/* Additional PCIe always once-executed workarounds */
+	if (pc->dev->id.coreid == SSB_DEV_PCIE) {
+		ssb_pcicore_serdes_workaround(pc);
+		/* TODO: ASPM */
+		/* TODO: Clock Request Update */
+	}
 }
 
 void ssb_pcicore_init(struct ssb_pcicore *pc)
@@ -529,8 +538,6 @@
 	if (!ssb_device_is_enabled(dev))
 		ssb_device_enable(dev, 0);
 
-	ssb_pcicore_fix_sprom_core_index(pc);
-
 #ifdef CONFIG_SSB_PCICORE_HOSTMODE
 	pc->hostmode = pcicore_is_in_hostmode(pc);
 	if (pc->hostmode)
@@ -538,13 +545,6 @@
 #endif /* CONFIG_SSB_PCICORE_HOSTMODE */
 	if (!pc->hostmode)
 		ssb_pcicore_init_clientmode(pc);
-
-	/* Additional PCIe always once-executed workarounds */
-	if (dev->id.coreid == SSB_DEV_PCIE) {
-		ssb_pcicore_serdes_workaround(pc);
-		/* TODO: ASPM */
-		/* TODO: Clock Request Update */
-	}
 }
 
 static u32 ssb_pcie_read(struct ssb_pcicore *pc, u32 address)
diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
index 9536d38..21d816e 100644
--- a/drivers/watchdog/Kconfig
+++ b/drivers/watchdog/Kconfig
@@ -599,8 +599,7 @@
 
 config HP_WATCHDOG
 	tristate "HP ProLiant iLO2+ Hardware Watchdog Timer"
-	depends on X86
-	default m
+	depends on X86 && PCI
 	help
 	  A software monitoring watchdog and NMI sourcing driver. This driver
 	  will detect lockups and provide a stack trace. This is a driver that
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 79743d1..0c1d917 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1438,12 +1438,15 @@
 	struct dentry *temp;
 	char *path;
 	int len, pos;
+	unsigned seq;
 
 	if (dentry == NULL)
 		return ERR_PTR(-EINVAL);
 
 retry:
 	len = 0;
+	seq = read_seqbegin(&rename_lock);
+	rcu_read_lock();
 	for (temp = dentry; !IS_ROOT(temp);) {
 		struct inode *inode = temp->d_inode;
 		if (inode && ceph_snap(inode) == CEPH_SNAPDIR)
@@ -1455,10 +1458,12 @@
 			len += 1 + temp->d_name.len;
 		temp = temp->d_parent;
 		if (temp == NULL) {
+			rcu_read_unlock();
 			pr_err("build_path corrupt dentry %p\n", dentry);
 			return ERR_PTR(-EINVAL);
 		}
 	}
+	rcu_read_unlock();
 	if (len)
 		len--;  /* no leading '/' */
 
@@ -1467,9 +1472,12 @@
 		return ERR_PTR(-ENOMEM);
 	pos = len;
 	path[pos] = 0;	/* trailing null */
+	rcu_read_lock();
 	for (temp = dentry; !IS_ROOT(temp) && pos != 0; ) {
-		struct inode *inode = temp->d_inode;
+		struct inode *inode;
 
+		spin_lock(&temp->d_lock);
+		inode = temp->d_inode;
 		if (inode && ceph_snap(inode) == CEPH_SNAPDIR) {
 			dout("build_path path+%d: %p SNAPDIR\n",
 			     pos, temp);
@@ -1478,21 +1486,26 @@
 			break;
 		} else {
 			pos -= temp->d_name.len;
-			if (pos < 0)
+			if (pos < 0) {
+				spin_unlock(&temp->d_lock);
 				break;
+			}
 			strncpy(path + pos, temp->d_name.name,
 				temp->d_name.len);
 		}
+		spin_unlock(&temp->d_lock);
 		if (pos)
 			path[--pos] = '/';
 		temp = temp->d_parent;
 		if (temp == NULL) {
+			rcu_read_unlock();
 			pr_err("build_path corrupt dentry\n");
 			kfree(path);
 			return ERR_PTR(-EINVAL);
 		}
 	}
-	if (pos != 0) {
+	rcu_read_unlock();
+	if (pos != 0 || read_seqretry(&rename_lock, seq)) {
 		pr_err("build_path did not end path lookup where "
 		       "expected, namelen is %d, pos is %d\n", len, pos);
 		/* presumably this is only possible if racing with a
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 3e29899..bc4b12c 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -35,6 +35,7 @@
 #include <linux/delay.h>
 #include <linux/kthread.h>
 #include <linux/freezer.h>
+#include <linux/namei.h>
 #include <net/ipv6.h>
 #include "cifsfs.h"
 #include "cifspdu.h"
@@ -542,14 +543,12 @@
 static struct dentry *
 cifs_get_root(struct smb_vol *vol, struct super_block *sb)
 {
-	int xid, rc;
-	struct inode *inode;
-	struct qstr name;
-	struct dentry *dparent = NULL, *dchild = NULL, *alias;
+	struct dentry *dentry;
 	struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
-	unsigned int i, full_len, len;
-	char *full_path = NULL, *pstart;
+	char *full_path = NULL;
+	char *s, *p;
 	char sep;
+	int xid;
 
 	full_path = cifs_build_path_to_root(vol, cifs_sb,
 					    cifs_sb_master_tcon(cifs_sb));
@@ -560,73 +559,32 @@
 
 	xid = GetXid();
 	sep = CIFS_DIR_SEP(cifs_sb);
-	dparent = dget(sb->s_root);
-	full_len = strlen(full_path);
-	full_path[full_len] = sep;
-	pstart = full_path + 1;
+	dentry = dget(sb->s_root);
+	p = s = full_path;
 
-	for (i = 1, len = 0; i <= full_len; i++) {
-		if (full_path[i] != sep || !len) {
-			len++;
-			continue;
-		}
+	do {
+		struct inode *dir = dentry->d_inode;
+		struct dentry *child;
 
-		full_path[i] = 0;
-		cFYI(1, "get dentry for %s", pstart);
+		/* skip separators */
+		while (*s == sep)
+			s++;
+		if (!*s)
+			break;
+		p = s++;
+		/* next separator */
+		while (*s && *s != sep)
+			s++;
 
-		name.name = pstart;
-		name.len = len;
-		name.hash = full_name_hash(pstart, len);
-		dchild = d_lookup(dparent, &name);
-		if (dchild == NULL) {
-			cFYI(1, "not exists");
-			dchild = d_alloc(dparent, &name);
-			if (dchild == NULL) {
-				dput(dparent);
-				dparent = ERR_PTR(-ENOMEM);
-				goto out;
-			}
-		}
-
-		cFYI(1, "get inode");
-		if (dchild->d_inode == NULL) {
-			cFYI(1, "not exists");
-			inode = NULL;
-			if (cifs_sb_master_tcon(CIFS_SB(sb))->unix_ext)
-				rc = cifs_get_inode_info_unix(&inode, full_path,
-							      sb, xid);
-			else
-				rc = cifs_get_inode_info(&inode, full_path,
-							 NULL, sb, xid, NULL);
-			if (rc) {
-				dput(dchild);
-				dput(dparent);
-				dparent = ERR_PTR(rc);
-				goto out;
-			}
-			alias = d_materialise_unique(dchild, inode);
-			if (alias != NULL) {
-				dput(dchild);
-				if (IS_ERR(alias)) {
-					dput(dparent);
-					dparent = ERR_PTR(-EINVAL); /* XXX */
-					goto out;
-				}
-				dchild = alias;
-			}
-		}
-		cFYI(1, "parent %p, child %p", dparent, dchild);
-
-		dput(dparent);
-		dparent = dchild;
-		len = 0;
-		pstart = full_path + i + 1;
-		full_path[i] = sep;
-	}
-out:
+		mutex_lock(&dir->i_mutex);
+		child = lookup_one_len(p, dentry, s - p);
+		mutex_unlock(&dir->i_mutex);
+		dput(dentry);
+		dentry = child;
+	} while (!IS_ERR(dentry));
 	_FreeXid(xid);
 	kfree(full_path);
-	return dparent;
+	return dentry;
 }
 
 static int cifs_set_super(struct super_block *sb, void *data)
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 0900e16..036ca83 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -129,5 +129,5 @@
 extern const struct export_operations cifs_export_ops;
 #endif /* CIFS_NFSD_EXPORT */
 
-#define CIFS_VERSION   "1.73"
+#define CIFS_VERSION   "1.74"
 #endif				/* _CIFSFS_H */
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index dbd669cc..ccc1afa 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -3485,7 +3485,7 @@
 		goto out;
 	}
 
-	snprintf(username, MAX_USERNAME_SIZE, "krb50x%x", fsuid);
+	snprintf(username, sizeof(username), "krb50x%x", fsuid);
 	vol_info->username = username;
 	vol_info->local_nls = cifs_sb->local_nls;
 	vol_info->linux_uid = fsuid;
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 81914df..fa8c21d9 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -55,6 +55,7 @@
 	char dirsep;
 	struct cifs_sb_info *cifs_sb = CIFS_SB(direntry->d_sb);
 	struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
+	unsigned seq;
 
 	if (direntry == NULL)
 		return NULL;  /* not much we can do if dentry is freed and
@@ -68,22 +69,29 @@
 		dfsplen = 0;
 cifs_bp_rename_retry:
 	namelen = dfsplen;
+	seq = read_seqbegin(&rename_lock);
+	rcu_read_lock();
 	for (temp = direntry; !IS_ROOT(temp);) {
 		namelen += (1 + temp->d_name.len);
 		temp = temp->d_parent;
 		if (temp == NULL) {
 			cERROR(1, "corrupt dentry");
+			rcu_read_unlock();
 			return NULL;
 		}
 	}
+	rcu_read_unlock();
 
 	full_path = kmalloc(namelen+1, GFP_KERNEL);
 	if (full_path == NULL)
 		return full_path;
 	full_path[namelen] = 0;	/* trailing null */
+	rcu_read_lock();
 	for (temp = direntry; !IS_ROOT(temp);) {
+		spin_lock(&temp->d_lock);
 		namelen -= 1 + temp->d_name.len;
 		if (namelen < 0) {
+			spin_unlock(&temp->d_lock);
 			break;
 		} else {
 			full_path[namelen] = dirsep;
@@ -91,14 +99,17 @@
 				temp->d_name.len);
 			cFYI(0, "name: %s", full_path + namelen);
 		}
+		spin_unlock(&temp->d_lock);
 		temp = temp->d_parent;
 		if (temp == NULL) {
 			cERROR(1, "corrupt dentry");
+			rcu_read_unlock();
 			kfree(full_path);
 			return NULL;
 		}
 	}
-	if (namelen != dfsplen) {
+	rcu_read_unlock();
+	if (namelen != dfsplen || read_seqretry(&rename_lock, seq)) {
 		cERROR(1, "did not end path lookup where expected namelen is %d",
 			namelen);
 		/* presumably this is only possible if racing with a rename
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 3892ab8..d3e6196 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -428,8 +428,7 @@
 			(SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) {
 		flags |= NTLMSSP_NEGOTIATE_SIGN;
 		if (!ses->server->session_estab)
-			flags |= NTLMSSP_NEGOTIATE_KEY_XCH |
-				NTLMSSP_NEGOTIATE_EXTENDED_SEC;
+			flags |= NTLMSSP_NEGOTIATE_KEY_XCH;
 	}
 
 	sec_blob->NegotiateFlags = cpu_to_le32(flags);
@@ -465,10 +464,11 @@
 		NTLMSSP_NEGOTIATE_128 | NTLMSSP_NEGOTIATE_UNICODE |
 		NTLMSSP_NEGOTIATE_NTLM | NTLMSSP_NEGOTIATE_EXTENDED_SEC;
 	if (ses->server->sec_mode &
-	   (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
+	   (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) {
 		flags |= NTLMSSP_NEGOTIATE_SIGN;
-	if (ses->server->sec_mode & SECMODE_SIGN_REQUIRED)
-		flags |= NTLMSSP_NEGOTIATE_ALWAYS_SIGN;
+		if (!ses->server->session_estab)
+			flags |= NTLMSSP_NEGOTIATE_KEY_XCH;
+	}
 
 	tmp = pbuffer + sizeof(AUTHENTICATE_MESSAGE);
 	sec_blob->NegotiateFlags = cpu_to_le32(flags);
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index e1419390..739fb59 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -37,7 +37,7 @@
 /* These macros may change in future, to provide better st_ino semantics. */
 #define OFFSET(x)	((x)->i_ino)
 
-static unsigned long cramino(struct cramfs_inode *cino, unsigned int offset)
+static unsigned long cramino(const struct cramfs_inode *cino, unsigned int offset)
 {
 	if (!cino->offset)
 		return offset + 1;
@@ -61,7 +61,7 @@
 }
 
 static struct inode *get_cramfs_inode(struct super_block *sb,
-	struct cramfs_inode *cramfs_inode, unsigned int offset)
+	const struct cramfs_inode *cramfs_inode, unsigned int offset)
 {
 	struct inode *inode;
 	static struct timespec zerotime;
@@ -317,7 +317,7 @@
 	/* Set it all up.. */
 	sb->s_op = &cramfs_ops;
 	root = get_cramfs_inode(sb, &super.root, 0);
-	if (!root)
+	if (IS_ERR(root))
 		goto out;
 	sb->s_root = d_alloc_root(root);
 	if (!sb->s_root) {
@@ -423,6 +423,7 @@
 static struct dentry * cramfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
 {
 	unsigned int offset = 0;
+	struct inode *inode = NULL;
 	int sorted;
 
 	mutex_lock(&read_mutex);
@@ -449,8 +450,8 @@
 
 		for (;;) {
 			if (!namelen) {
-				mutex_unlock(&read_mutex);
-				return ERR_PTR(-EIO);
+				inode = ERR_PTR(-EIO);
+				goto out;
 			}
 			if (name[namelen-1])
 				break;
@@ -462,17 +463,18 @@
 		if (retval > 0)
 			continue;
 		if (!retval) {
-			struct cramfs_inode entry = *de;
-			mutex_unlock(&read_mutex);
-			d_add(dentry, get_cramfs_inode(dir->i_sb, &entry, dir_off));
-			return NULL;
+			inode = get_cramfs_inode(dir->i_sb, de, dir_off);
+			break;
 		}
 		/* else (retval < 0) */
 		if (sorted)
 			break;
 	}
+out:
 	mutex_unlock(&read_mutex);
-	d_add(dentry, NULL);
+	if (IS_ERR(inode))
+		return ERR_CAST(inode);
+	d_add(dentry, inode);
 	return NULL;
 }
 
diff --git a/fs/dcache.c b/fs/dcache.c
index 37f72ee..6e4ea6d 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2213,14 +2213,15 @@
  * The hash value has to match the hash queue that the dentry is on..
  */
 /*
- * d_move - move a dentry
+ * __d_move - move a dentry
  * @dentry: entry to move
  * @target: new dentry
  *
  * Update the dcache to reflect the move of a file name. Negative
- * dcache entries should not be moved in this way.
+ * dcache entries should not be moved in this way.  Caller hold
+ * rename_lock.
  */
-void d_move(struct dentry * dentry, struct dentry * target)
+static void __d_move(struct dentry * dentry, struct dentry * target)
 {
 	if (!dentry->d_inode)
 		printk(KERN_WARNING "VFS: moving negative dcache entry\n");
@@ -2228,8 +2229,6 @@
 	BUG_ON(d_ancestor(dentry, target));
 	BUG_ON(d_ancestor(target, dentry));
 
-	write_seqlock(&rename_lock);
-
 	dentry_lock_for_move(dentry, target);
 
 	write_seqcount_begin(&dentry->d_seq);
@@ -2275,6 +2274,20 @@
 	spin_unlock(&target->d_lock);
 	fsnotify_d_move(dentry);
 	spin_unlock(&dentry->d_lock);
+}
+
+/*
+ * d_move - move a dentry
+ * @dentry: entry to move
+ * @target: new dentry
+ *
+ * Update the dcache to reflect the move of a file name. Negative
+ * dcache entries should not be moved in this way.
+ */
+void d_move(struct dentry *dentry, struct dentry *target)
+{
+	write_seqlock(&rename_lock);
+	__d_move(dentry, target);
 	write_sequnlock(&rename_lock);
 }
 EXPORT_SYMBOL(d_move);
@@ -2302,7 +2315,7 @@
  * This helper attempts to cope with remotely renamed directories
  *
  * It assumes that the caller is already holding
- * dentry->d_parent->d_inode->i_mutex and the inode->i_lock
+ * dentry->d_parent->d_inode->i_mutex, inode->i_lock and rename_lock
  *
  * Note: If ever the locking in lock_rename() changes, then please
  * remember to update this too...
@@ -2317,11 +2330,6 @@
 	if (alias->d_parent == dentry->d_parent)
 		goto out_unalias;
 
-	/* Check for loops */
-	ret = ERR_PTR(-ELOOP);
-	if (d_ancestor(alias, dentry))
-		goto out_err;
-
 	/* See lock_rename() */
 	ret = ERR_PTR(-EBUSY);
 	if (!mutex_trylock(&dentry->d_sb->s_vfs_rename_mutex))
@@ -2331,7 +2339,7 @@
 		goto out_err;
 	m2 = &alias->d_parent->d_inode->i_mutex;
 out_unalias:
-	d_move(alias, dentry);
+	__d_move(alias, dentry);
 	ret = alias;
 out_err:
 	spin_unlock(&inode->i_lock);
@@ -2416,15 +2424,24 @@
 		alias = __d_find_alias(inode, 0);
 		if (alias) {
 			actual = alias;
-			/* Is this an anonymous mountpoint that we could splice
-			 * into our tree? */
-			if (IS_ROOT(alias)) {
+			write_seqlock(&rename_lock);
+
+			if (d_ancestor(alias, dentry)) {
+				/* Check for loops */
+				actual = ERR_PTR(-ELOOP);
+			} else if (IS_ROOT(alias)) {
+				/* Is this an anonymous mountpoint that we
+				 * could splice into our tree? */
 				__d_materialise_dentry(dentry, alias);
+				write_sequnlock(&rename_lock);
 				__d_drop(alias);
 				goto found;
+			} else {
+				/* Nope, but we must(!) avoid directory
+				 * aliasing */
+				actual = __d_unalias(inode, dentry, alias);
 			}
-			/* Nope, but we must(!) avoid directory aliasing */
-			actual = __d_unalias(inode, dentry, alias);
+			write_sequnlock(&rename_lock);
 			if (IS_ERR(actual))
 				dput(alias);
 			goto out_nolock;
diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index 06065bd..c57bedd 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -913,7 +913,7 @@
 	unsigned long ino = exofs_parent_ino(child);
 
 	if (!ino)
-		return NULL;
+		return ERR_PTR(-ESTALE);
 
 	return d_obtain_alias(exofs_iget(child->d_inode->i_sb, ino));
 }
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 802ac5e..f9fbbe9 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -1069,6 +1069,7 @@
 		return 0;
 
 	gfs2_log_lock(sdp);
+	spin_lock(&sdp->sd_ail_lock);
 	head = bh = page_buffers(page);
 	do {
 		if (atomic_read(&bh->b_count))
@@ -1080,6 +1081,7 @@
 			goto not_possible;
 		bh = bh->b_this_page;
 	} while(bh != head);
+	spin_unlock(&sdp->sd_ail_lock);
 	gfs2_log_unlock(sdp);
 
 	head = bh = page_buffers(page);
@@ -1112,6 +1114,7 @@
 	WARN_ON(buffer_dirty(bh));
 	WARN_ON(buffer_pinned(bh));
 cannot_release:
+	spin_unlock(&sdp->sd_ail_lock);
 	gfs2_log_unlock(sdp);
 	return 0;
 }
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 8ef70f4..2cca2931 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -47,10 +47,10 @@
 				bd_ail_gl_list);
 		bh = bd->bd_bh;
 		gfs2_remove_from_ail(bd);
-		spin_unlock(&sdp->sd_ail_lock);
-
 		bd->bd_bh = NULL;
 		bh->b_private = NULL;
+		spin_unlock(&sdp->sd_ail_lock);
+
 		bd->bd_blkno = bh->b_blocknr;
 		gfs2_log_lock(sdp);
 		gfs2_assert_withdraw(sdp, !buffer_busy(bh));
@@ -221,8 +221,10 @@
 		}
 	}
 
-	if (ip == GFS2_I(gl->gl_sbd->sd_rindex))
+	if (ip == GFS2_I(gl->gl_sbd->sd_rindex)) {
+		gfs2_log_flush(gl->gl_sbd, NULL);
 		gl->gl_sbd->sd_rindex_uptodate = 0;
+	}
 	if (ip && S_ISREG(ip->i_inode.i_mode))
 		truncate_inode_pages(ip->i_inode.i_mapping, 0);
 }
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 0a064e9..81206e7 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -17,6 +17,7 @@
 #include <linux/buffer_head.h>
 #include <linux/rcupdate.h>
 #include <linux/rculist_bl.h>
+#include <linux/completion.h>
 
 #define DIO_WAIT	0x00000010
 #define DIO_METADATA	0x00000020
@@ -546,6 +547,7 @@
 	struct gfs2_glock *sd_trans_gl;
 	wait_queue_head_t sd_glock_wait;
 	atomic_t sd_glock_disposal;
+	struct completion sd_locking_init;
 
 	/* Inode Stuff */
 
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 903115f..85c6292 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -903,6 +903,7 @@
 		if (gfs2_ail1_empty(sdp))
 			break;
 	}
+	gfs2_log_flush(sdp, NULL);
 }
 
 static inline int gfs2_jrnl_flush_reqd(struct gfs2_sbd *sdp)
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 8ac9ae1..2a77071 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -72,6 +72,7 @@
 
 	init_waitqueue_head(&sdp->sd_glock_wait);
 	atomic_set(&sdp->sd_glock_disposal, 0);
+	init_completion(&sdp->sd_locking_init);
 	spin_lock_init(&sdp->sd_statfs_spin);
 
 	spin_lock_init(&sdp->sd_rindex_spin);
@@ -1017,11 +1018,13 @@
 		fsname++;
 	if (lm->lm_mount == NULL) {
 		fs_info(sdp, "Now mounting FS...\n");
+		complete(&sdp->sd_locking_init);
 		return 0;
 	}
 	ret = lm->lm_mount(sdp, fsname);
 	if (ret == 0)
 		fs_info(sdp, "Joined cluster. Now mounting FS...\n");
+	complete(&sdp->sd_locking_init);
 	return ret;
 }
 
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index ed540e7..fb0edf7 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -757,13 +757,17 @@
 	struct timespec atime;
 	struct gfs2_dinode *di;
 	int ret = -EAGAIN;
+	int unlock_required = 0;
 
 	/* Skip timestamp update, if this is from a memalloc */
 	if (current->flags & PF_MEMALLOC)
 		goto do_flush;
-	ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
-	if (ret)
-		goto do_flush;
+	if (!gfs2_glock_is_locked_by_me(ip->i_gl)) {
+		ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
+		if (ret)
+			goto do_flush;
+		unlock_required = 1;
+	}
 	ret = gfs2_trans_begin(sdp, RES_DINODE, 0);
 	if (ret)
 		goto do_unlock;
@@ -780,7 +784,8 @@
 	}
 	gfs2_trans_end(sdp);
 do_unlock:
-	gfs2_glock_dq_uninit(&gh);
+	if (unlock_required)
+		gfs2_glock_dq_uninit(&gh);
 do_flush:
 	if (wbc->sync_mode == WB_SYNC_ALL)
 		gfs2_log_flush(GFS2_SB(inode), ip->i_gl);
@@ -1427,7 +1432,20 @@
 	return error;
 }
 
-/*
+/**
+ * gfs2_evict_inode - Remove an inode from cache
+ * @inode: The inode to evict
+ *
+ * There are three cases to consider:
+ * 1. i_nlink == 0, we are final opener (and must deallocate)
+ * 2. i_nlink == 0, we are not the final opener (and cannot deallocate)
+ * 3. i_nlink > 0
+ *
+ * If the fs is read only, then we have to treat all cases as per #3
+ * since we are unable to do any deallocation. The inode will be
+ * deallocated by the next read/write node to attempt an allocation
+ * in the same resource group
+ *
  * We have to (at the moment) hold the inodes main lock to cover
  * the gap between unlocking the shared lock on the iopen lock and
  * taking the exclusive lock. I'd rather do a shared -> exclusive
@@ -1470,6 +1488,8 @@
 	if (error)
 		goto out_truncate;
 
+	/* Case 1 starts here */
+
 	if (S_ISDIR(inode->i_mode) &&
 	    (ip->i_diskflags & GFS2_DIF_EXHASH)) {
 		error = gfs2_dir_exhash_dealloc(ip);
@@ -1493,13 +1513,16 @@
 	goto out_unlock;
 
 out_truncate:
+	/* Case 2 starts here */
 	error = gfs2_trans_begin(sdp, 0, sdp->sd_jdesc->jd_blocks);
 	if (error)
 		goto out_unlock;
-	gfs2_final_release_pages(ip);
+	/* Needs to be done before glock release & also in a transaction */
+	truncate_inode_pages(&inode->i_data, 0);
 	gfs2_trans_end(sdp);
 
 out_unlock:
+	/* Error path for case 1 */
 	if (test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags))
 		gfs2_glock_dq(&ip->i_iopen_gh);
 	gfs2_holder_uninit(&ip->i_iopen_gh);
@@ -1507,6 +1530,7 @@
 	if (error && error != GLR_TRYFAILED && error != -EROFS)
 		fs_warn(sdp, "gfs2_evict_inode: %d\n", error);
 out:
+	/* Case 3 starts here */
 	truncate_inode_pages(&inode->i_data, 0);
 	end_writeback(inode);
 
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index e20eab3..443cabc 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -338,6 +338,9 @@
 	rv = sscanf(buf, "%u", &first);
 	if (rv != 1 || first > 1)
 		return -EINVAL;
+	rv = wait_for_completion_killable(&sdp->sd_locking_init);
+	if (rv)
+		return rv;
 	spin_lock(&sdp->sd_jindex_spin);
 	rv = -EBUSY;
 	if (test_bit(SDF_NOJOURNALID, &sdp->sd_flags) == 0)
@@ -414,7 +417,9 @@
 	rv = sscanf(buf, "%d", &jid);
 	if (rv != 1)
 		return -EINVAL;
-
+	rv = wait_for_completion_killable(&sdp->sd_locking_init);
+	if (rv)
+		return rv;
 	spin_lock(&sdp->sd_jindex_spin);
 	rv = -EINVAL;
 	if (sdp->sd_lockstruct.ls_ops->lm_mount == NULL)
diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c
index 87ed48e..85c098a 100644
--- a/fs/hppfs/hppfs.c
+++ b/fs/hppfs/hppfs.c
@@ -139,7 +139,8 @@
 static struct dentry *hppfs_lookup(struct inode *ino, struct dentry *dentry,
 				   struct nameidata *nd)
 {
-	struct dentry *proc_dentry, *new, *parent;
+	struct dentry *proc_dentry, *parent;
+	struct qstr *name = &dentry->d_name;
 	struct inode *inode;
 	int err, deleted;
 
@@ -149,23 +150,9 @@
 	else if (deleted)
 		return ERR_PTR(-ENOENT);
 
-	err = -ENOMEM;
 	parent = HPPFS_I(ino)->proc_dentry;
 	mutex_lock(&parent->d_inode->i_mutex);
-	proc_dentry = d_lookup(parent, &dentry->d_name);
-	if (proc_dentry == NULL) {
-		proc_dentry = d_alloc(parent, &dentry->d_name);
-		if (proc_dentry == NULL) {
-			mutex_unlock(&parent->d_inode->i_mutex);
-			goto out;
-		}
-		new = (*parent->d_inode->i_op->lookup)(parent->d_inode,
-						       proc_dentry, NULL);
-		if (new) {
-			dput(proc_dentry);
-			proc_dentry = new;
-		}
-	}
+	proc_dentry = lookup_one_len(name->name, parent, name->len);
 	mutex_unlock(&parent->d_inode->i_mutex);
 
 	if (IS_ERR(proc_dentry))
@@ -174,13 +161,11 @@
 	err = -ENOMEM;
 	inode = get_inode(ino->i_sb, proc_dentry);
 	if (!inode)
-		goto out_dput;
+		goto out;
 
  	d_add(dentry, inode);
 	return NULL;
 
- out_dput:
-	dput(proc_dentry);
  out:
 	return ERR_PTR(err);
 }
@@ -690,8 +675,10 @@
 	struct inode *proc_ino = dentry->d_inode;
 	struct inode *inode = new_inode(sb);
 
-	if (!inode)
+	if (!inode) {
+		dput(dentry);
 		return ERR_PTR(-ENOMEM);
+	}
 
 	if (S_ISDIR(dentry->d_inode->i_mode)) {
 		inode->i_op = &hppfs_dir_iops;
@@ -704,7 +691,7 @@
 		inode->i_fop = &hppfs_file_fops;
 	}
 
-	HPPFS_I(inode)->proc_dentry = dget(dentry);
+	HPPFS_I(inode)->proc_dentry = dentry;
 
 	inode->i_uid = proc_ino->i_uid;
 	inode->i_gid = proc_ino->i_gid;
@@ -737,7 +724,7 @@
 	sb->s_fs_info = proc_mnt;
 
 	err = -ENOMEM;
-	root_inode = get_inode(sb, proc_mnt->mnt_sb->s_root);
+	root_inode = get_inode(sb, dget(proc_mnt->mnt_sb->s_root));
 	if (!root_inode)
 		goto out_mntput;
 
diff --git a/fs/libfs.c b/fs/libfs.c
index c88eab5..275ca474 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -822,7 +822,7 @@
 		goto out;
 
 	attr->set_buf[size] = '\0';
-	val = simple_strtol(attr->set_buf, NULL, 0);
+	val = simple_strtoll(attr->set_buf, NULL, 0);
 	ret = attr->set(attr->data, val);
 	if (ret == 0)
 		ret = len; /* on success, claim we got the whole input */
diff --git a/fs/namei.c b/fs/namei.c
index 0223c41..14ab8d3 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -433,6 +433,8 @@
 			goto err_parent;
 		BUG_ON(nd->inode != parent->d_inode);
 	} else {
+		if (dentry->d_parent != parent)
+			goto err_parent;
 		spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
 		if (!__d_rcu_to_refcount(dentry, nd->seq))
 			goto err_child;
@@ -940,7 +942,6 @@
 		 * Don't forget we might have a non-mountpoint managed dentry
 		 * that wants to block transit.
 		 */
-		*inode = path->dentry->d_inode;
 		if (unlikely(managed_dentry_might_block(path->dentry)))
 			return false;
 
@@ -953,6 +954,12 @@
 		path->mnt = mounted;
 		path->dentry = mounted->mnt_root;
 		nd->seq = read_seqcount_begin(&path->dentry->d_seq);
+		/*
+		 * Update the inode too. We don't need to re-check the
+		 * dentry sequence number here after this d_inode read,
+		 * because a mount-point is always pinned.
+		 */
+		*inode = path->dentry->d_inode;
 	}
 	return true;
 }
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index 0bafcc9..f9d03ab 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -398,7 +398,6 @@
 	 * this offset and save the original offset.
 	 */
 	data->args.offset = filelayout_get_dserver_offset(lseg, offset);
-	data->mds_offset = offset;
 
 	/* Perform an asynchronous write */
 	status = nfs_initiate_write(data, ds->ds_clp->cl_rpcclient,
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 6870bc6..e6e8f3b 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -91,7 +91,7 @@
 #define encode_getfh_maxsz      (op_encode_hdr_maxsz)
 #define decode_getfh_maxsz      (op_decode_hdr_maxsz + 1 + \
 				((3+NFS4_FHSIZE) >> 2))
-#define nfs4_fattr_bitmap_maxsz 3
+#define nfs4_fattr_bitmap_maxsz 4
 #define encode_getattr_maxsz    (op_encode_hdr_maxsz + nfs4_fattr_bitmap_maxsz)
 #define nfs4_name_maxsz		(1 + ((3 + NFS4_MAXNAMLEN) >> 2))
 #define nfs4_path_maxsz		(1 + ((3 + NFS4_MAXPATHLEN) >> 2))
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index e268e3b..72716805 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -864,6 +864,8 @@
 
 	data->args.fh     = NFS_FH(inode);
 	data->args.offset = req_offset(req) + offset;
+	/* pnfs_set_layoutcommit needs this */
+	data->mds_offset = data->args.offset;
 	data->args.pgbase = req->wb_pgbase + offset;
 	data->args.pages  = data->pagevec;
 	data->args.count  = count;
diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c
index 29309e2..b57aab9 100644
--- a/fs/ufs/namei.c
+++ b/fs/ufs/namei.c
@@ -56,16 +56,12 @@
 
 	lock_ufs(dir->i_sb);
 	ino = ufs_inode_by_name(dir, &dentry->d_name);
-	if (ino) {
+	if (ino)
 		inode = ufs_iget(dir->i_sb, ino);
-		if (IS_ERR(inode)) {
-			unlock_ufs(dir->i_sb);
-			return ERR_CAST(inode);
-		}
-	}
 	unlock_ufs(dir->i_sb);
-	d_add(dentry, inode);
-	return NULL;
+	if (IS_ERR(inode))
+		return ERR_CAST(inode);
+	return d_splice_alias(inode, dentry);
 }
 
 /*
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index 3a10ef5..6cd5b64 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -210,7 +210,7 @@
 struct acpi_device_power {
 	int state;		/* Current state */
 	struct acpi_device_power_flags flags;
-	struct acpi_device_power_state states[4];	/* Power states (D0-D3) */
+	struct acpi_device_power_state states[ACPI_D_STATE_COUNT];	/* Power states (D0-D3Cold) */
 };
 
 /* Performance Management */
diff --git a/include/acpi/acpiosxf.h b/include/acpi/acpiosxf.h
index a756bc8..4543b6f 100644
--- a/include/acpi/acpiosxf.h
+++ b/include/acpi/acpiosxf.h
@@ -98,8 +98,11 @@
 /*
  * Spinlock primitives
  */
+
+#ifndef acpi_os_create_lock
 acpi_status
 acpi_os_create_lock(acpi_spinlock *out_handle);
+#endif
 
 void acpi_os_delete_lock(acpi_spinlock handle);
 
diff --git a/include/acpi/platform/aclinux.h b/include/acpi/platform/aclinux.h
index 5d2a5e9..2ce1be9 100644
--- a/include/acpi/platform/aclinux.h
+++ b/include/acpi/platform/aclinux.h
@@ -159,6 +159,24 @@
 	} while (0)
 #endif
 
+/*
+ * When lockdep is enabled, the spin_lock_init() macro stringifies it's
+ * argument and uses that as a name for the lock in debugging.
+ * By executing spin_lock_init() in a macro the key changes from "lock" for
+ * all locks to the name of the argument of acpi_os_create_lock(), which
+ * prevents lockdep from reporting false positives for ACPICA locks.
+ */
+#define acpi_os_create_lock(__handle)				\
+({								\
+	spinlock_t *lock = ACPI_ALLOCATE(sizeof(*lock));	\
+								\
+	if (lock) {						\
+		*(__handle) = lock;				\
+		spin_lock_init(*(__handle));			\
+	}							\
+	lock ? AE_OK : AE_NO_MEMORY;				\
+})
+
 #endif /* __KERNEL__ */
 
 #endif /* __ACLINUX_H__ */
diff --git a/include/drm/drm_pciids.h b/include/drm/drm_pciids.h
index e08f344..3d53efd 100644
--- a/include/drm/drm_pciids.h
+++ b/include/drm/drm_pciids.h
@@ -182,6 +182,7 @@
 	{0x1002, 0x6750, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TURKS|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x6758, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TURKS|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x6759, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TURKS|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x675F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TURKS|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x6760, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CAICOS|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x6761, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CAICOS|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x6762, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CAICOS|RADEON_NEW_MEMMAP}, \
@@ -192,6 +193,7 @@
 	{0x1002, 0x6767, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CAICOS|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x6768, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CAICOS|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x6770, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CAICOS|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x6778, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CAICOS|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x6779, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CAICOS|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x6880, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CYPRESS|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x6888, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CYPRESS|RADEON_NEW_MEMMAP}, \
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 8b45384..baa397e 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -676,7 +676,8 @@
 void irq_gc_mask_set_bit(struct irq_data *d);
 void irq_gc_mask_clr_bit(struct irq_data *d);
 void irq_gc_unmask_enable_reg(struct irq_data *d);
-void irq_gc_ack(struct irq_data *d);
+void irq_gc_ack_set_bit(struct irq_data *d);
+void irq_gc_ack_clr_bit(struct irq_data *d);
 void irq_gc_mask_disable_reg_and_ack(struct irq_data *d);
 void irq_gc_eoi(struct irq_data *d);
 int irq_gc_set_wake(struct irq_data *d, unsigned int on);
diff --git a/include/linux/memory.h b/include/linux/memory.h
index e1e3b2b..935699b 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -20,6 +20,8 @@
 #include <linux/compiler.h>
 #include <linux/mutex.h>
 
+#define MIN_MEMORY_BLOCK_SIZE     (1 << SECTION_SIZE_BITS)
+
 struct memory_block {
 	unsigned long start_section_nr;
 	unsigned long end_section_nr;
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index c6927a4..6ad4355 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -64,6 +64,19 @@
 	unsigned long long	enhanced_area_offset;	/* Units: Byte */
 	unsigned int		enhanced_area_size;	/* Units: KB */
 	unsigned int		boot_size;		/* in bytes */
+	u8			raw_partition_support;	/* 160 */
+	u8			raw_erased_mem_count;	/* 181 */
+	u8			raw_ext_csd_structure;	/* 194 */
+	u8			raw_card_type;		/* 196 */
+	u8			raw_s_a_timeout;		/* 217 */
+	u8			raw_hc_erase_gap_size;	/* 221 */
+	u8			raw_erase_timeout_mult;	/* 223 */
+	u8			raw_hc_erase_grp_size;	/* 224 */
+	u8			raw_sec_trim_mult;	/* 229 */
+	u8			raw_sec_erase_mult;	/* 230 */
+	u8			raw_sec_feature_support;/* 231 */
+	u8			raw_trim_mult;		/* 232 */
+	u8			raw_sectors[4];		/* 212 - 4 bytes */
 };
 
 struct sd_scr {
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 54b8b4d..9e19477 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1097,12 +1097,6 @@
 #define NETIF_F_ALL_FCOE	(NETIF_F_FCOE_CRC | NETIF_F_FCOE_MTU | \
 				 NETIF_F_FSO)
 
-#define NETIF_F_ALL_TX_OFFLOADS	(NETIF_F_ALL_CSUM | NETIF_F_SG | \
-				 NETIF_F_FRAGLIST | NETIF_F_ALL_TSO | \
-				 NETIF_F_HIGHDMA | \
-				 NETIF_F_SCTP_CSUM | \
-				 NETIF_F_ALL_FCOE)
-
 	/*
 	 * If one device supports one of these features, then enable them
 	 * for all in netdev_increment_features.
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 496770a..14a6c7b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -844,6 +844,7 @@
 #define SD_SERIALIZE		0x0400	/* Only a single load balancing instance */
 #define SD_ASYM_PACKING		0x0800  /* Place busy groups earlier in the domain */
 #define SD_PREFER_SIBLING	0x1000	/* Prefer to place tasks in a sibling domain */
+#define SD_OVERLAP		0x2000	/* sched_domains of this level overlap */
 
 enum powersavings_balance_level {
 	POWERSAVINGS_BALANCE_NONE = 0,  /* No power saving load balance */
@@ -893,16 +894,21 @@
 	return 0;
 }
 
-struct sched_group {
-	struct sched_group *next;	/* Must be a circular list */
+struct sched_group_power {
 	atomic_t ref;
-
 	/*
 	 * CPU power of this group, SCHED_LOAD_SCALE being max power for a
 	 * single CPU.
 	 */
-	unsigned int cpu_power, cpu_power_orig;
+	unsigned int power, power_orig;
+};
+
+struct sched_group {
+	struct sched_group *next;	/* Must be a circular list */
+	atomic_t ref;
+
 	unsigned int group_weight;
+	struct sched_group_power *sgp;
 
 	/*
 	 * The CPUs this group covers.
@@ -1254,6 +1260,9 @@
 #ifdef CONFIG_PREEMPT_RCU
 	int rcu_read_lock_nesting;
 	char rcu_read_unlock_special;
+#if defined(CONFIG_RCU_BOOST) && defined(CONFIG_TREE_PREEMPT_RCU)
+	int rcu_boosted;
+#endif /* #if defined(CONFIG_RCU_BOOST) && defined(CONFIG_TREE_PREEMPT_RCU) */
 	struct list_head rcu_node_entry;
 #endif /* #ifdef CONFIG_PREEMPT_RCU */
 #ifdef CONFIG_TREE_PREEMPT_RCU
diff --git a/include/linux/sdla.h b/include/linux/sdla.h
index 564acd3..9995c7f 100644
--- a/include/linux/sdla.h
+++ b/include/linux/sdla.h
@@ -112,11 +112,7 @@
    short Tb_max;
 };
 
-#ifndef __KERNEL__
-
-void sdla(void *cfg_info, char *dev, struct frad_conf *conf, int quiet);
-
-#else
+#ifdef __KERNEL__
 
 /* important Z80 window addresses */
 #define SDLA_CONTROL_WND		0xE000
diff --git a/include/net/sctp/command.h b/include/net/sctp/command.h
index dd6847e..6506458 100644
--- a/include/net/sctp/command.h
+++ b/include/net/sctp/command.h
@@ -63,6 +63,7 @@
 	SCTP_CMD_ECN_ECNE,	/* Do delayed ECNE processing. */
 	SCTP_CMD_ECN_CWR,	/* Do delayed CWR processing.  */
 	SCTP_CMD_TIMER_START,	/* Start a timer.  */
+	SCTP_CMD_TIMER_START_ONCE, /* Start a timer once */
 	SCTP_CMD_TIMER_RESTART,	/* Restart a timer. */
 	SCTP_CMD_TIMER_STOP,	/* Stop a timer. */
 	SCTP_CMD_INIT_CHOOSE_TRANSPORT, /* Choose transport for an INIT. */
diff --git a/include/net/sctp/ulpevent.h b/include/net/sctp/ulpevent.h
index 99b027b..ca4693b 100644
--- a/include/net/sctp/ulpevent.h
+++ b/include/net/sctp/ulpevent.h
@@ -80,7 +80,7 @@
 
 void sctp_ulpevent_free(struct sctp_ulpevent *);
 int sctp_ulpevent_is_notification(const struct sctp_ulpevent *);
-void sctp_queue_purge_ulpevents(struct sk_buff_head *list);
+unsigned int sctp_queue_purge_ulpevents(struct sk_buff_head *list);
 
 struct sctp_ulpevent *sctp_ulpevent_make_assoc_change(
 	const struct sctp_association *asoc,
diff --git a/kernel/irq/generic-chip.c b/kernel/irq/generic-chip.c
index 31a9db7..3a2cab4 100644
--- a/kernel/irq/generic-chip.c
+++ b/kernel/irq/generic-chip.c
@@ -101,10 +101,10 @@
 }
 
 /**
- * irq_gc_ack - Ack pending interrupt
+ * irq_gc_ack_set_bit - Ack pending interrupt via setting bit
  * @d: irq_data
  */
-void irq_gc_ack(struct irq_data *d)
+void irq_gc_ack_set_bit(struct irq_data *d)
 {
 	struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
 	u32 mask = 1 << (d->irq - gc->irq_base);
@@ -115,6 +115,20 @@
 }
 
 /**
+ * irq_gc_ack_clr_bit - Ack pending interrupt via clearing bit
+ * @d: irq_data
+ */
+void irq_gc_ack_clr_bit(struct irq_data *d)
+{
+	struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
+	u32 mask = ~(1 << (d->irq - gc->irq_base));
+
+	irq_gc_lock(gc);
+	irq_reg_writel(mask, gc->reg_base + cur_regs(d)->ack);
+	irq_gc_unlock(gc);
+}
+
+/**
  * irq_gc_mask_disable_reg_and_ack- Mask and ack pending interrupt
  * @d: irq_data
  */
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 7e59ffb..ba06207 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -84,9 +84,32 @@
 
 static struct rcu_state *rcu_state;
 
+/*
+ * The rcu_scheduler_active variable transitions from zero to one just
+ * before the first task is spawned.  So when this variable is zero, RCU
+ * can assume that there is but one task, allowing RCU to (for example)
+ * optimized synchronize_sched() to a simple barrier().  When this variable
+ * is one, RCU must actually do all the hard work required to detect real
+ * grace periods.  This variable is also used to suppress boot-time false
+ * positives from lockdep-RCU error checking.
+ */
 int rcu_scheduler_active __read_mostly;
 EXPORT_SYMBOL_GPL(rcu_scheduler_active);
 
+/*
+ * The rcu_scheduler_fully_active variable transitions from zero to one
+ * during the early_initcall() processing, which is after the scheduler
+ * is capable of creating new tasks.  So RCU processing (for example,
+ * creating tasks for RCU priority boosting) must be delayed until after
+ * rcu_scheduler_fully_active transitions from zero to one.  We also
+ * currently delay invocation of any RCU callbacks until after this point.
+ *
+ * It might later prove better for people registering RCU callbacks during
+ * early boot to take responsibility for these callbacks, but one step at
+ * a time.
+ */
+static int rcu_scheduler_fully_active __read_mostly;
+
 #ifdef CONFIG_RCU_BOOST
 
 /*
@@ -98,7 +121,6 @@
 DEFINE_PER_CPU(int, rcu_cpu_kthread_cpu);
 DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops);
 DEFINE_PER_CPU(char, rcu_cpu_has_work);
-static char rcu_kthreads_spawnable;
 
 #endif /* #ifdef CONFIG_RCU_BOOST */
 
@@ -1467,6 +1489,8 @@
  */
 static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp)
 {
+	if (unlikely(!ACCESS_ONCE(rcu_scheduler_fully_active)))
+		return;
 	if (likely(!rsp->boost)) {
 		rcu_do_batch(rsp, rdp);
 		return;
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index 14dc7dd..8aafbb8 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -68,6 +68,7 @@
 DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data);
 static struct rcu_state *rcu_state = &rcu_preempt_state;
 
+static void rcu_read_unlock_special(struct task_struct *t);
 static int rcu_preempted_readers_exp(struct rcu_node *rnp);
 
 /*
@@ -147,7 +148,7 @@
 	struct rcu_data *rdp;
 	struct rcu_node *rnp;
 
-	if (t->rcu_read_lock_nesting &&
+	if (t->rcu_read_lock_nesting > 0 &&
 	    (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) {
 
 		/* Possibly blocking in an RCU read-side critical section. */
@@ -190,6 +191,14 @@
 				rnp->gp_tasks = &t->rcu_node_entry;
 		}
 		raw_spin_unlock_irqrestore(&rnp->lock, flags);
+	} else if (t->rcu_read_lock_nesting < 0 &&
+		   t->rcu_read_unlock_special) {
+
+		/*
+		 * Complete exit from RCU read-side critical section on
+		 * behalf of preempted instance of __rcu_read_unlock().
+		 */
+		rcu_read_unlock_special(t);
 	}
 
 	/*
@@ -284,7 +293,7 @@
  * notify RCU core processing or task having blocked during the RCU
  * read-side critical section.
  */
-static void rcu_read_unlock_special(struct task_struct *t)
+static noinline void rcu_read_unlock_special(struct task_struct *t)
 {
 	int empty;
 	int empty_exp;
@@ -309,7 +318,7 @@
 	}
 
 	/* Hardware IRQ handlers cannot block. */
-	if (in_irq()) {
+	if (in_irq() || in_serving_softirq()) {
 		local_irq_restore(flags);
 		return;
 	}
@@ -342,6 +351,11 @@
 #ifdef CONFIG_RCU_BOOST
 		if (&t->rcu_node_entry == rnp->boost_tasks)
 			rnp->boost_tasks = np;
+		/* Snapshot and clear ->rcu_boosted with rcu_node lock held. */
+		if (t->rcu_boosted) {
+			special |= RCU_READ_UNLOCK_BOOSTED;
+			t->rcu_boosted = 0;
+		}
 #endif /* #ifdef CONFIG_RCU_BOOST */
 		t->rcu_blocked_node = NULL;
 
@@ -358,7 +372,6 @@
 #ifdef CONFIG_RCU_BOOST
 		/* Unboost if we were boosted. */
 		if (special & RCU_READ_UNLOCK_BOOSTED) {
-			t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BOOSTED;
 			rt_mutex_unlock(t->rcu_boost_mutex);
 			t->rcu_boost_mutex = NULL;
 		}
@@ -387,13 +400,22 @@
 	struct task_struct *t = current;
 
 	barrier();  /* needed if we ever invoke rcu_read_unlock in rcutree.c */
-	--t->rcu_read_lock_nesting;
-	barrier();  /* decrement before load of ->rcu_read_unlock_special */
-	if (t->rcu_read_lock_nesting == 0 &&
-	    unlikely(ACCESS_ONCE(t->rcu_read_unlock_special)))
-		rcu_read_unlock_special(t);
+	if (t->rcu_read_lock_nesting != 1)
+		--t->rcu_read_lock_nesting;
+	else {
+		t->rcu_read_lock_nesting = INT_MIN;
+		barrier();  /* assign before ->rcu_read_unlock_special load */
+		if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special)))
+			rcu_read_unlock_special(t);
+		barrier();  /* ->rcu_read_unlock_special load before assign */
+		t->rcu_read_lock_nesting = 0;
+	}
 #ifdef CONFIG_PROVE_LOCKING
-	WARN_ON_ONCE(ACCESS_ONCE(t->rcu_read_lock_nesting) < 0);
+	{
+		int rrln = ACCESS_ONCE(t->rcu_read_lock_nesting);
+
+		WARN_ON_ONCE(rrln < 0 && rrln > INT_MIN / 2);
+	}
 #endif /* #ifdef CONFIG_PROVE_LOCKING */
 }
 EXPORT_SYMBOL_GPL(__rcu_read_unlock);
@@ -589,7 +611,8 @@
 		rcu_preempt_qs(cpu);
 		return;
 	}
-	if (per_cpu(rcu_preempt_data, cpu).qs_pending)
+	if (t->rcu_read_lock_nesting > 0 &&
+	    per_cpu(rcu_preempt_data, cpu).qs_pending)
 		t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS;
 }
 
@@ -695,9 +718,12 @@
 
 	raw_spin_lock_irqsave(&rnp->lock, flags);
 	for (;;) {
-		if (!sync_rcu_preempt_exp_done(rnp))
+		if (!sync_rcu_preempt_exp_done(rnp)) {
+			raw_spin_unlock_irqrestore(&rnp->lock, flags);
 			break;
+		}
 		if (rnp->parent == NULL) {
+			raw_spin_unlock_irqrestore(&rnp->lock, flags);
 			wake_up(&sync_rcu_preempt_exp_wq);
 			break;
 		}
@@ -707,7 +733,6 @@
 		raw_spin_lock(&rnp->lock); /* irqs already disabled */
 		rnp->expmask &= ~mask;
 	}
-	raw_spin_unlock_irqrestore(&rnp->lock, flags);
 }
 
 /*
@@ -1174,7 +1199,7 @@
 	t = container_of(tb, struct task_struct, rcu_node_entry);
 	rt_mutex_init_proxy_locked(&mtx, t);
 	t->rcu_boost_mutex = &mtx;
-	t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BOOSTED;
+	t->rcu_boosted = 1;
 	raw_spin_unlock_irqrestore(&rnp->lock, flags);
 	rt_mutex_lock(&mtx);  /* Side effect: boosts task t's priority. */
 	rt_mutex_unlock(&mtx);  /* Keep lockdep happy. */
@@ -1532,7 +1557,7 @@
 	struct sched_param sp;
 	struct task_struct *t;
 
-	if (!rcu_kthreads_spawnable ||
+	if (!rcu_scheduler_fully_active ||
 	    per_cpu(rcu_cpu_kthread_task, cpu) != NULL)
 		return 0;
 	t = kthread_create(rcu_cpu_kthread, (void *)(long)cpu, "rcuc%d", cpu);
@@ -1639,7 +1664,7 @@
 	struct sched_param sp;
 	struct task_struct *t;
 
-	if (!rcu_kthreads_spawnable ||
+	if (!rcu_scheduler_fully_active ||
 	    rnp->qsmaskinit == 0)
 		return 0;
 	if (rnp->node_kthread_task == NULL) {
@@ -1665,7 +1690,7 @@
 	int cpu;
 	struct rcu_node *rnp;
 
-	rcu_kthreads_spawnable = 1;
+	rcu_scheduler_fully_active = 1;
 	for_each_possible_cpu(cpu) {
 		per_cpu(rcu_cpu_has_work, cpu) = 0;
 		if (cpu_online(cpu))
@@ -1687,7 +1712,7 @@
 	struct rcu_node *rnp = rdp->mynode;
 
 	/* Fire up the incoming CPU's kthread and leaf rcu_node kthread. */
-	if (rcu_kthreads_spawnable) {
+	if (rcu_scheduler_fully_active) {
 		(void)rcu_spawn_one_cpu_kthread(cpu);
 		if (rnp->node_kthread_task == NULL)
 			(void)rcu_spawn_one_node_kthread(rcu_state, rnp);
@@ -1726,6 +1751,13 @@
 {
 }
 
+static int __init rcu_scheduler_really_started(void)
+{
+	rcu_scheduler_fully_active = 1;
+	return 0;
+}
+early_initcall(rcu_scheduler_really_started);
+
 static void __cpuinit rcu_prepare_kthreads(int cpu)
 {
 }
diff --git a/kernel/sched.c b/kernel/sched.c
index 9769c75..fde6ff9 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2544,13 +2544,9 @@
 }
 
 #ifdef CONFIG_SMP
-static void sched_ttwu_pending(void)
+static void sched_ttwu_do_pending(struct task_struct *list)
 {
 	struct rq *rq = this_rq();
-	struct task_struct *list = xchg(&rq->wake_list, NULL);
-
-	if (!list)
-		return;
 
 	raw_spin_lock(&rq->lock);
 
@@ -2563,9 +2559,45 @@
 	raw_spin_unlock(&rq->lock);
 }
 
+#ifdef CONFIG_HOTPLUG_CPU
+
+static void sched_ttwu_pending(void)
+{
+	struct rq *rq = this_rq();
+	struct task_struct *list = xchg(&rq->wake_list, NULL);
+
+	if (!list)
+		return;
+
+	sched_ttwu_do_pending(list);
+}
+
+#endif /* CONFIG_HOTPLUG_CPU */
+
 void scheduler_ipi(void)
 {
-	sched_ttwu_pending();
+	struct rq *rq = this_rq();
+	struct task_struct *list = xchg(&rq->wake_list, NULL);
+
+	if (!list)
+		return;
+
+	/*
+	 * Not all reschedule IPI handlers call irq_enter/irq_exit, since
+	 * traditionally all their work was done from the interrupt return
+	 * path. Now that we actually do some work, we need to make sure
+	 * we do call them.
+	 *
+	 * Some archs already do call them, luckily irq_enter/exit nest
+	 * properly.
+	 *
+	 * Arguably we should visit all archs and update all handlers,
+	 * however a fair share of IPIs are still resched only so this would
+	 * somewhat pessimize the simple resched case.
+	 */
+	irq_enter();
+	sched_ttwu_do_pending(list);
+	irq_exit();
 }
 
 static void ttwu_queue_remote(struct task_struct *p, int cpu)
@@ -6557,7 +6589,7 @@
 			break;
 		}
 
-		if (!group->cpu_power) {
+		if (!group->sgp->power) {
 			printk(KERN_CONT "\n");
 			printk(KERN_ERR "ERROR: domain->cpu_power not "
 					"set\n");
@@ -6581,9 +6613,9 @@
 		cpulist_scnprintf(str, sizeof(str), sched_group_cpus(group));
 
 		printk(KERN_CONT " %s", str);
-		if (group->cpu_power != SCHED_POWER_SCALE) {
+		if (group->sgp->power != SCHED_POWER_SCALE) {
 			printk(KERN_CONT " (cpu_power = %d)",
-				group->cpu_power);
+				group->sgp->power);
 		}
 
 		group = group->next;
@@ -6774,11 +6806,39 @@
 	return rd;
 }
 
+static void free_sched_groups(struct sched_group *sg, int free_sgp)
+{
+	struct sched_group *tmp, *first;
+
+	if (!sg)
+		return;
+
+	first = sg;
+	do {
+		tmp = sg->next;
+
+		if (free_sgp && atomic_dec_and_test(&sg->sgp->ref))
+			kfree(sg->sgp);
+
+		kfree(sg);
+		sg = tmp;
+	} while (sg != first);
+}
+
 static void free_sched_domain(struct rcu_head *rcu)
 {
 	struct sched_domain *sd = container_of(rcu, struct sched_domain, rcu);
-	if (atomic_dec_and_test(&sd->groups->ref))
+
+	/*
+	 * If its an overlapping domain it has private groups, iterate and
+	 * nuke them all.
+	 */
+	if (sd->flags & SD_OVERLAP) {
+		free_sched_groups(sd->groups, 1);
+	} else if (atomic_dec_and_test(&sd->groups->ref)) {
+		kfree(sd->groups->sgp);
 		kfree(sd->groups);
+	}
 	kfree(sd);
 }
 
@@ -6945,6 +7005,7 @@
 struct sd_data {
 	struct sched_domain **__percpu sd;
 	struct sched_group **__percpu sg;
+	struct sched_group_power **__percpu sgp;
 };
 
 struct s_data {
@@ -6964,15 +7025,73 @@
 typedef struct sched_domain *(*sched_domain_init_f)(struct sched_domain_topology_level *tl, int cpu);
 typedef const struct cpumask *(*sched_domain_mask_f)(int cpu);
 
+#define SDTL_OVERLAP	0x01
+
 struct sched_domain_topology_level {
 	sched_domain_init_f init;
 	sched_domain_mask_f mask;
+	int		    flags;
 	struct sd_data      data;
 };
 
-/*
- * Assumes the sched_domain tree is fully constructed
- */
+static int
+build_overlap_sched_groups(struct sched_domain *sd, int cpu)
+{
+	struct sched_group *first = NULL, *last = NULL, *groups = NULL, *sg;
+	const struct cpumask *span = sched_domain_span(sd);
+	struct cpumask *covered = sched_domains_tmpmask;
+	struct sd_data *sdd = sd->private;
+	struct sched_domain *child;
+	int i;
+
+	cpumask_clear(covered);
+
+	for_each_cpu(i, span) {
+		struct cpumask *sg_span;
+
+		if (cpumask_test_cpu(i, covered))
+			continue;
+
+		sg = kzalloc_node(sizeof(struct sched_group) + cpumask_size(),
+				GFP_KERNEL, cpu_to_node(i));
+
+		if (!sg)
+			goto fail;
+
+		sg_span = sched_group_cpus(sg);
+
+		child = *per_cpu_ptr(sdd->sd, i);
+		if (child->child) {
+			child = child->child;
+			cpumask_copy(sg_span, sched_domain_span(child));
+		} else
+			cpumask_set_cpu(i, sg_span);
+
+		cpumask_or(covered, covered, sg_span);
+
+		sg->sgp = *per_cpu_ptr(sdd->sgp, cpumask_first(sg_span));
+		atomic_inc(&sg->sgp->ref);
+
+		if (cpumask_test_cpu(cpu, sg_span))
+			groups = sg;
+
+		if (!first)
+			first = sg;
+		if (last)
+			last->next = sg;
+		last = sg;
+		last->next = first;
+	}
+	sd->groups = groups;
+
+	return 0;
+
+fail:
+	free_sched_groups(first, 0);
+
+	return -ENOMEM;
+}
+
 static int get_group(int cpu, struct sd_data *sdd, struct sched_group **sg)
 {
 	struct sched_domain *sd = *per_cpu_ptr(sdd->sd, cpu);
@@ -6981,24 +7100,24 @@
 	if (child)
 		cpu = cpumask_first(sched_domain_span(child));
 
-	if (sg)
+	if (sg) {
 		*sg = *per_cpu_ptr(sdd->sg, cpu);
+		(*sg)->sgp = *per_cpu_ptr(sdd->sgp, cpu);
+		atomic_set(&(*sg)->sgp->ref, 1); /* for claim_allocations */
+	}
 
 	return cpu;
 }
 
 /*
- * build_sched_groups takes the cpumask we wish to span, and a pointer
- * to a function which identifies what group(along with sched group) a CPU
- * belongs to. The return value of group_fn must be a >= 0 and < nr_cpu_ids
- * (due to the fact that we keep track of groups covered with a struct cpumask).
- *
  * build_sched_groups will build a circular linked list of the groups
  * covered by the given span, and will set each group's ->cpumask correctly,
  * and ->cpu_power to 0.
+ *
+ * Assumes the sched_domain tree is fully constructed
  */
-static void
-build_sched_groups(struct sched_domain *sd)
+static int
+build_sched_groups(struct sched_domain *sd, int cpu)
 {
 	struct sched_group *first = NULL, *last = NULL;
 	struct sd_data *sdd = sd->private;
@@ -7006,6 +7125,12 @@
 	struct cpumask *covered;
 	int i;
 
+	get_group(cpu, sdd, &sd->groups);
+	atomic_inc(&sd->groups->ref);
+
+	if (cpu != cpumask_first(sched_domain_span(sd)))
+		return 0;
+
 	lockdep_assert_held(&sched_domains_mutex);
 	covered = sched_domains_tmpmask;
 
@@ -7020,7 +7145,7 @@
 			continue;
 
 		cpumask_clear(sched_group_cpus(sg));
-		sg->cpu_power = 0;
+		sg->sgp->power = 0;
 
 		for_each_cpu(j, span) {
 			if (get_group(j, sdd, NULL) != group)
@@ -7037,6 +7162,8 @@
 		last = sg;
 	}
 	last->next = first;
+
+	return 0;
 }
 
 /*
@@ -7051,13 +7178,18 @@
  */
 static void init_sched_groups_power(int cpu, struct sched_domain *sd)
 {
-	WARN_ON(!sd || !sd->groups);
+	struct sched_group *sg = sd->groups;
 
-	if (cpu != group_first_cpu(sd->groups))
+	WARN_ON(!sd || !sg);
+
+	do {
+		sg->group_weight = cpumask_weight(sched_group_cpus(sg));
+		sg = sg->next;
+	} while (sg != sd->groups);
+
+	if (cpu != group_first_cpu(sg))
 		return;
 
-	sd->groups->group_weight = cpumask_weight(sched_group_cpus(sd->groups));
-
 	update_group_power(sd, cpu);
 }
 
@@ -7177,15 +7309,15 @@
 static void claim_allocations(int cpu, struct sched_domain *sd)
 {
 	struct sd_data *sdd = sd->private;
-	struct sched_group *sg = sd->groups;
 
 	WARN_ON_ONCE(*per_cpu_ptr(sdd->sd, cpu) != sd);
 	*per_cpu_ptr(sdd->sd, cpu) = NULL;
 
-	if (cpu == cpumask_first(sched_group_cpus(sg))) {
-		WARN_ON_ONCE(*per_cpu_ptr(sdd->sg, cpu) != sg);
+	if (atomic_read(&(*per_cpu_ptr(sdd->sg, cpu))->ref))
 		*per_cpu_ptr(sdd->sg, cpu) = NULL;
-	}
+
+	if (atomic_read(&(*per_cpu_ptr(sdd->sgp, cpu))->ref))
+		*per_cpu_ptr(sdd->sgp, cpu) = NULL;
 }
 
 #ifdef CONFIG_SCHED_SMT
@@ -7210,7 +7342,7 @@
 #endif
 	{ sd_init_CPU, cpu_cpu_mask, },
 #ifdef CONFIG_NUMA
-	{ sd_init_NODE, cpu_node_mask, },
+	{ sd_init_NODE, cpu_node_mask, SDTL_OVERLAP, },
 	{ sd_init_ALLNODES, cpu_allnodes_mask, },
 #endif
 	{ NULL, },
@@ -7234,9 +7366,14 @@
 		if (!sdd->sg)
 			return -ENOMEM;
 
+		sdd->sgp = alloc_percpu(struct sched_group_power *);
+		if (!sdd->sgp)
+			return -ENOMEM;
+
 		for_each_cpu(j, cpu_map) {
 			struct sched_domain *sd;
 			struct sched_group *sg;
+			struct sched_group_power *sgp;
 
 		       	sd = kzalloc_node(sizeof(struct sched_domain) + cpumask_size(),
 					GFP_KERNEL, cpu_to_node(j));
@@ -7251,6 +7388,13 @@
 				return -ENOMEM;
 
 			*per_cpu_ptr(sdd->sg, j) = sg;
+
+			sgp = kzalloc_node(sizeof(struct sched_group_power),
+					GFP_KERNEL, cpu_to_node(j));
+			if (!sgp)
+				return -ENOMEM;
+
+			*per_cpu_ptr(sdd->sgp, j) = sgp;
 		}
 	}
 
@@ -7266,11 +7410,15 @@
 		struct sd_data *sdd = &tl->data;
 
 		for_each_cpu(j, cpu_map) {
-			kfree(*per_cpu_ptr(sdd->sd, j));
+			struct sched_domain *sd = *per_cpu_ptr(sdd->sd, j);
+			if (sd && (sd->flags & SD_OVERLAP))
+				free_sched_groups(sd->groups, 0);
 			kfree(*per_cpu_ptr(sdd->sg, j));
+			kfree(*per_cpu_ptr(sdd->sgp, j));
 		}
 		free_percpu(sdd->sd);
 		free_percpu(sdd->sg);
+		free_percpu(sdd->sgp);
 	}
 }
 
@@ -7316,8 +7464,13 @@
 		struct sched_domain_topology_level *tl;
 
 		sd = NULL;
-		for (tl = sched_domain_topology; tl->init; tl++)
+		for (tl = sched_domain_topology; tl->init; tl++) {
 			sd = build_sched_domain(tl, &d, cpu_map, attr, sd, i);
+			if (tl->flags & SDTL_OVERLAP || sched_feat(FORCE_SD_OVERLAP))
+				sd->flags |= SD_OVERLAP;
+			if (cpumask_equal(cpu_map, sched_domain_span(sd)))
+				break;
+		}
 
 		while (sd->child)
 			sd = sd->child;
@@ -7329,13 +7482,13 @@
 	for_each_cpu(i, cpu_map) {
 		for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) {
 			sd->span_weight = cpumask_weight(sched_domain_span(sd));
-			get_group(i, sd->private, &sd->groups);
-			atomic_inc(&sd->groups->ref);
-
-			if (i != cpumask_first(sched_domain_span(sd)))
-				continue;
-
-			build_sched_groups(sd);
+			if (sd->flags & SD_OVERLAP) {
+				if (build_overlap_sched_groups(sd, i))
+					goto error;
+			} else {
+				if (build_sched_groups(sd, i))
+					goto error;
+			}
 		}
 	}
 
@@ -7757,6 +7910,9 @@
 #endif
 #endif
 	cfs_rq->min_vruntime = (u64)(-(1LL << 20));
+#ifndef CONFIG_64BIT
+	cfs_rq->min_vruntime_copy = cfs_rq->min_vruntime;
+#endif
 }
 
 static void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq)
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 433491c2..c768588 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1585,7 +1585,7 @@
 		}
 
 		/* Adjust by relative CPU power of the group */
-		avg_load = (avg_load * SCHED_POWER_SCALE) / group->cpu_power;
+		avg_load = (avg_load * SCHED_POWER_SCALE) / group->sgp->power;
 
 		if (local_group) {
 			this_load = avg_load;
@@ -2631,7 +2631,7 @@
 		power >>= SCHED_POWER_SHIFT;
 	}
 
-	sdg->cpu_power_orig = power;
+	sdg->sgp->power_orig = power;
 
 	if (sched_feat(ARCH_POWER))
 		power *= arch_scale_freq_power(sd, cpu);
@@ -2647,7 +2647,7 @@
 		power = 1;
 
 	cpu_rq(cpu)->cpu_power = power;
-	sdg->cpu_power = power;
+	sdg->sgp->power = power;
 }
 
 static void update_group_power(struct sched_domain *sd, int cpu)
@@ -2665,11 +2665,11 @@
 
 	group = child->groups;
 	do {
-		power += group->cpu_power;
+		power += group->sgp->power;
 		group = group->next;
 	} while (group != child->groups);
 
-	sdg->cpu_power = power;
+	sdg->sgp->power = power;
 }
 
 /*
@@ -2691,7 +2691,7 @@
 	/*
 	 * If ~90% of the cpu_power is still there, we're good.
 	 */
-	if (group->cpu_power * 32 > group->cpu_power_orig * 29)
+	if (group->sgp->power * 32 > group->sgp->power_orig * 29)
 		return 1;
 
 	return 0;
@@ -2771,7 +2771,7 @@
 	}
 
 	/* Adjust by relative CPU power of the group */
-	sgs->avg_load = (sgs->group_load*SCHED_POWER_SCALE) / group->cpu_power;
+	sgs->avg_load = (sgs->group_load*SCHED_POWER_SCALE) / group->sgp->power;
 
 	/*
 	 * Consider the group unbalanced when the imbalance is larger
@@ -2788,7 +2788,7 @@
 	if ((max_cpu_load - min_cpu_load) >= avg_load_per_task && max_nr_running > 1)
 		sgs->group_imb = 1;
 
-	sgs->group_capacity = DIV_ROUND_CLOSEST(group->cpu_power,
+	sgs->group_capacity = DIV_ROUND_CLOSEST(group->sgp->power,
 						SCHED_POWER_SCALE);
 	if (!sgs->group_capacity)
 		sgs->group_capacity = fix_small_capacity(sd, group);
@@ -2877,7 +2877,7 @@
 			return;
 
 		sds->total_load += sgs.group_load;
-		sds->total_pwr += sg->cpu_power;
+		sds->total_pwr += sg->sgp->power;
 
 		/*
 		 * In case the child domain prefers tasks go to siblings
@@ -2962,7 +2962,7 @@
 	if (this_cpu > busiest_cpu)
 		return 0;
 
-	*imbalance = DIV_ROUND_CLOSEST(sds->max_load * sds->busiest->cpu_power,
+	*imbalance = DIV_ROUND_CLOSEST(sds->max_load * sds->busiest->sgp->power,
 				       SCHED_POWER_SCALE);
 	return 1;
 }
@@ -2993,7 +2993,7 @@
 
 	scaled_busy_load_per_task = sds->busiest_load_per_task
 					 * SCHED_POWER_SCALE;
-	scaled_busy_load_per_task /= sds->busiest->cpu_power;
+	scaled_busy_load_per_task /= sds->busiest->sgp->power;
 
 	if (sds->max_load - sds->this_load + scaled_busy_load_per_task >=
 			(scaled_busy_load_per_task * imbn)) {
@@ -3007,28 +3007,28 @@
 	 * moving them.
 	 */
 
-	pwr_now += sds->busiest->cpu_power *
+	pwr_now += sds->busiest->sgp->power *
 			min(sds->busiest_load_per_task, sds->max_load);
-	pwr_now += sds->this->cpu_power *
+	pwr_now += sds->this->sgp->power *
 			min(sds->this_load_per_task, sds->this_load);
 	pwr_now /= SCHED_POWER_SCALE;
 
 	/* Amount of load we'd subtract */
 	tmp = (sds->busiest_load_per_task * SCHED_POWER_SCALE) /
-		sds->busiest->cpu_power;
+		sds->busiest->sgp->power;
 	if (sds->max_load > tmp)
-		pwr_move += sds->busiest->cpu_power *
+		pwr_move += sds->busiest->sgp->power *
 			min(sds->busiest_load_per_task, sds->max_load - tmp);
 
 	/* Amount of load we'd add */
-	if (sds->max_load * sds->busiest->cpu_power <
+	if (sds->max_load * sds->busiest->sgp->power <
 		sds->busiest_load_per_task * SCHED_POWER_SCALE)
-		tmp = (sds->max_load * sds->busiest->cpu_power) /
-			sds->this->cpu_power;
+		tmp = (sds->max_load * sds->busiest->sgp->power) /
+			sds->this->sgp->power;
 	else
 		tmp = (sds->busiest_load_per_task * SCHED_POWER_SCALE) /
-			sds->this->cpu_power;
-	pwr_move += sds->this->cpu_power *
+			sds->this->sgp->power;
+	pwr_move += sds->this->sgp->power *
 			min(sds->this_load_per_task, sds->this_load + tmp);
 	pwr_move /= SCHED_POWER_SCALE;
 
@@ -3074,7 +3074,7 @@
 
 		load_above_capacity *= (SCHED_LOAD_SCALE * SCHED_POWER_SCALE);
 
-		load_above_capacity /= sds->busiest->cpu_power;
+		load_above_capacity /= sds->busiest->sgp->power;
 	}
 
 	/*
@@ -3090,8 +3090,8 @@
 	max_pull = min(sds->max_load - sds->avg_load, load_above_capacity);
 
 	/* How much load to actually move to equalise the imbalance */
-	*imbalance = min(max_pull * sds->busiest->cpu_power,
-		(sds->avg_load - sds->this_load) * sds->this->cpu_power)
+	*imbalance = min(max_pull * sds->busiest->sgp->power,
+		(sds->avg_load - sds->this_load) * sds->this->sgp->power)
 			/ SCHED_POWER_SCALE;
 
 	/*
diff --git a/kernel/sched_features.h b/kernel/sched_features.h
index be40f73..1e7066d 100644
--- a/kernel/sched_features.h
+++ b/kernel/sched_features.h
@@ -70,3 +70,5 @@
  * using the scheduler IPI. Reduces rq->lock contention/bounces.
  */
 SCHED_FEAT(TTWU_QUEUE, 1)
+
+SCHED_FEAT(FORCE_SD_OVERLAP, 0)
diff --git a/kernel/signal.c b/kernel/signal.c
index ff76786..415d85d 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1178,18 +1178,25 @@
 {
 	struct sighand_struct *sighand;
 
-	rcu_read_lock();
 	for (;;) {
+		local_irq_save(*flags);
+		rcu_read_lock();
 		sighand = rcu_dereference(tsk->sighand);
-		if (unlikely(sighand == NULL))
+		if (unlikely(sighand == NULL)) {
+			rcu_read_unlock();
+			local_irq_restore(*flags);
 			break;
+		}
 
-		spin_lock_irqsave(&sighand->siglock, *flags);
-		if (likely(sighand == tsk->sighand))
+		spin_lock(&sighand->siglock);
+		if (likely(sighand == tsk->sighand)) {
+			rcu_read_unlock();
 			break;
-		spin_unlock_irqrestore(&sighand->siglock, *flags);
+		}
+		spin_unlock(&sighand->siglock);
+		rcu_read_unlock();
+		local_irq_restore(*flags);
 	}
-	rcu_read_unlock();
 
 	return sighand;
 }
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 40cf63d..fca82c3 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -315,16 +315,24 @@
 {
 	if (!force_irqthreads)
 		__do_softirq();
-	else
+	else {
+		__local_bh_disable((unsigned long)__builtin_return_address(0),
+				SOFTIRQ_OFFSET);
 		wakeup_softirqd();
+		__local_bh_enable(SOFTIRQ_OFFSET);
+	}
 }
 #else
 static inline void invoke_softirq(void)
 {
 	if (!force_irqthreads)
 		do_softirq();
-	else
+	else {
+		__local_bh_disable((unsigned long)__builtin_return_address(0),
+				SOFTIRQ_OFFSET);
 		wakeup_softirqd();
+		__local_bh_enable(SOFTIRQ_OFFSET);
+	}
 }
 #endif
 
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 5ed24b9..d036e59 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2310,7 +2310,8 @@
 	for (i = 0; i <= classzone_idx; i++)
 		present_pages += pgdat->node_zones[i].present_pages;
 
-	return balanced_pages > (present_pages >> 2);
+	/* A special case here: if zone has no page, we think it's balanced */
+	return balanced_pages >= (present_pages >> 2);
 }
 
 /* is kswapd sleeping prematurely? */
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 86bff9b..6e82148 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -528,7 +528,11 @@
 					  (1<<__LINK_STATE_DORMANT))) |
 		      (1<<__LINK_STATE_PRESENT);
 
-	dev->hw_features = NETIF_F_ALL_TX_OFFLOADS;
+	dev->hw_features = NETIF_F_ALL_CSUM | NETIF_F_SG |
+			   NETIF_F_FRAGLIST | NETIF_F_ALL_TSO |
+			   NETIF_F_HIGHDMA | NETIF_F_SCTP_CSUM |
+			   NETIF_F_ALL_FCOE;
+
 	dev->features |= real_dev->vlan_features | NETIF_F_LLTX;
 	dev->gso_max_size = real_dev->gso_max_size;
 
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index d3a05b9..bcd158f 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -393,6 +393,9 @@
 
 	hci_dev_put(hdev);
 
+	if (conn->handle == 0)
+		kfree(conn);
+
 	return 0;
 }
 
diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index c405a95..43b4c2d 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -464,7 +464,8 @@
 {
 	struct hidp_session *session = (struct hidp_session *) arg;
 
-	kthread_stop(session->task);
+	atomic_inc(&session->terminate);
+	wake_up_process(session->task);
 }
 
 static void hidp_set_timer(struct hidp_session *session)
@@ -535,7 +536,8 @@
 		skb_queue_purge(&session->ctrl_transmit);
 		skb_queue_purge(&session->intr_transmit);
 
-		kthread_stop(session->task);
+		atomic_inc(&session->terminate);
+		wake_up_process(current);
 	}
 }
 
@@ -706,9 +708,8 @@
 	add_wait_queue(sk_sleep(intr_sk), &intr_wait);
 	session->waiting_for_startup = 0;
 	wake_up_interruptible(&session->startup_queue);
-	while (!kthread_should_stop()) {
-		set_current_state(TASK_INTERRUPTIBLE);
-
+	set_current_state(TASK_INTERRUPTIBLE);
+	while (!atomic_read(&session->terminate)) {
 		if (ctrl_sk->sk_state != BT_CONNECTED ||
 				intr_sk->sk_state != BT_CONNECTED)
 			break;
@@ -726,6 +727,7 @@
 		hidp_process_transmit(session);
 
 		schedule();
+		set_current_state(TASK_INTERRUPTIBLE);
 	}
 	set_current_state(TASK_RUNNING);
 	remove_wait_queue(sk_sleep(intr_sk), &intr_wait);
@@ -1060,7 +1062,8 @@
 err_add_device:
 	hid_destroy_device(session->hid);
 	session->hid = NULL;
-	kthread_stop(session->task);
+	atomic_inc(&session->terminate);
+	wake_up_process(session->task);
 
 unlink:
 	hidp_del_timer(session);
@@ -1111,7 +1114,8 @@
 			skb_queue_purge(&session->ctrl_transmit);
 			skb_queue_purge(&session->intr_transmit);
 
-			kthread_stop(session->task);
+			atomic_inc(&session->terminate);
+			wake_up_process(session->task);
 		}
 	} else
 		err = -ENOENT;
diff --git a/net/bluetooth/hidp/hidp.h b/net/bluetooth/hidp/hidp.h
index 19e9500..af1bcc8 100644
--- a/net/bluetooth/hidp/hidp.h
+++ b/net/bluetooth/hidp/hidp.h
@@ -142,6 +142,7 @@
 	uint ctrl_mtu;
 	uint intr_mtu;
 
+	atomic_t terminate;
 	struct task_struct *task;
 
 	unsigned char keys[8];
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index 56fdd91..7705e26 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -620,7 +620,8 @@
 					struct sock *parent = bt_sk(sk)->parent;
 					rsp.result = cpu_to_le16(L2CAP_CR_PEND);
 					rsp.status = cpu_to_le16(L2CAP_CS_AUTHOR_PEND);
-					parent->sk_data_ready(parent, 0);
+					if (parent)
+						parent->sk_data_ready(parent, 0);
 
 				} else {
 					sk->sk_state = BT_CONFIG;
@@ -2323,7 +2324,7 @@
 
 	sk = chan->sk;
 
-	if (sk->sk_state != BT_CONFIG) {
+	if (sk->sk_state != BT_CONFIG && sk->sk_state != BT_CONNECT2) {
 		struct l2cap_cmd_rej rej;
 
 		rej.reason = cpu_to_le16(0x0002);
@@ -2334,7 +2335,7 @@
 
 	/* Reject if config buffer is too small. */
 	len = cmd_len - sizeof(*req);
-	if (chan->conf_len + len > sizeof(chan->conf_req)) {
+	if (len < 0 || chan->conf_len + len > sizeof(chan->conf_req)) {
 		l2cap_send_cmd(conn, cmd->ident, L2CAP_CONF_RSP,
 				l2cap_build_conf_rsp(chan, rsp,
 					L2CAP_CONF_REJECT, flags), rsp);
@@ -4009,7 +4010,8 @@
 					struct sock *parent = bt_sk(sk)->parent;
 					res = L2CAP_CR_PEND;
 					stat = L2CAP_CS_AUTHOR_PEND;
-					parent->sk_data_ready(parent, 0);
+					if (parent)
+						parent->sk_data_ready(parent, 0);
 				} else {
 					sk->sk_state = BT_CONFIG;
 					res = L2CAP_CR_SUCCESS;
diff --git a/net/ceph/ceph_fs.c b/net/ceph/ceph_fs.c
index a3a3a31..41466cc 100644
--- a/net/ceph/ceph_fs.c
+++ b/net/ceph/ceph_fs.c
@@ -36,16 +36,19 @@
 	if ((flags & O_DIRECTORY) == O_DIRECTORY)
 		return CEPH_FILE_MODE_PIN;
 #endif
-	if ((flags & O_APPEND) == O_APPEND)
-		flags |= O_WRONLY;
 
-	if ((flags & O_ACCMODE) == O_RDWR)
-		mode = CEPH_FILE_MODE_RDWR;
-	else if ((flags & O_ACCMODE) == O_WRONLY)
+	switch (flags & O_ACCMODE) {
+	case O_WRONLY:
 		mode = CEPH_FILE_MODE_WR;
-	else
+		break;
+	case O_RDONLY:
 		mode = CEPH_FILE_MODE_RD;
-
+		break;
+	case O_RDWR:
+	case O_ACCMODE: /* this is what the VFS does */
+		mode = CEPH_FILE_MODE_RDWR;
+		break;
+	}
 #ifdef O_LAZY
 	if (flags & O_LAZY)
 		mode |= CEPH_FILE_MODE_LAZY;
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 58ffa7d..669d2e3 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -877,7 +877,8 @@
 	for (i = 0; i < IEEE80211_NUM_BANDS; i++) {
 		local->sched_scan_ies.ie[i] = kzalloc(2 +
 						      IEEE80211_MAX_SSID_LEN +
-						      local->scan_ies_len,
+						      local->scan_ies_len +
+						      req->ie_len,
 						      GFP_KERNEL);
 		if (!local->sched_scan_ies.ie[i]) {
 			ret = -ENOMEM;
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index d91c1a2..8f6a302 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -86,6 +86,11 @@
 	struct sk_buff *skb = rx->skb;
 	struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
+	int queue = rx->queue;
+
+	/* otherwise, TKIP is vulnerable to TID 0 vs. non-QoS replays */
+	if (rx->queue == NUM_RX_DATA_QUEUES - 1)
+		queue = 0;
 
 	/*
 	 * it makes no sense to check for MIC errors on anything other
@@ -148,8 +153,8 @@
 
 update_iv:
 	/* update IV in key information to be able to detect replays */
-	rx->key->u.tkip.rx[rx->queue].iv32 = rx->tkip_iv32;
-	rx->key->u.tkip.rx[rx->queue].iv16 = rx->tkip_iv16;
+	rx->key->u.tkip.rx[queue].iv32 = rx->tkip_iv32;
+	rx->key->u.tkip.rx[queue].iv16 = rx->tkip_iv16;
 
 	return RX_CONTINUE;
 
@@ -241,6 +246,11 @@
 	struct ieee80211_key *key = rx->key;
 	struct sk_buff *skb = rx->skb;
 	struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
+	int queue = rx->queue;
+
+	/* otherwise, TKIP is vulnerable to TID 0 vs. non-QoS replays */
+	if (rx->queue == NUM_RX_DATA_QUEUES - 1)
+		queue = 0;
 
 	hdrlen = ieee80211_hdrlen(hdr->frame_control);
 
@@ -261,7 +271,7 @@
 	res = ieee80211_tkip_decrypt_data(rx->local->wep_rx_tfm,
 					  key, skb->data + hdrlen,
 					  skb->len - hdrlen, rx->sta->sta.addr,
-					  hdr->addr1, hwaccel, rx->queue,
+					  hdr->addr1, hwaccel, queue,
 					  &rx->tkip_iv32,
 					  &rx->tkip_iv16);
 	if (res != TKIP_DECRYPT_OK)
diff --git a/net/sctp/output.c b/net/sctp/output.c
index b4f3cf0..08b3cea 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -500,23 +500,20 @@
 	 * Note: Adler-32 is no longer applicable, as has been replaced
 	 * by CRC32-C as described in <draft-ietf-tsvwg-sctpcsum-02.txt>.
 	 */
-	if (!sctp_checksum_disable &&
-	    !(dst->dev->features & (NETIF_F_NO_CSUM | NETIF_F_SCTP_CSUM))) {
-		__u32 crc32 = sctp_start_cksum((__u8 *)sh, cksum_buf_len);
+	if (!sctp_checksum_disable) {
+		if (!(dst->dev->features & NETIF_F_SCTP_CSUM)) {
+			__u32 crc32 = sctp_start_cksum((__u8 *)sh, cksum_buf_len);
 
-		/* 3) Put the resultant value into the checksum field in the
-		 *    common header, and leave the rest of the bits unchanged.
-		 */
-		sh->checksum = sctp_end_cksum(crc32);
-	} else {
-		if (dst->dev->features & NETIF_F_SCTP_CSUM) {
+			/* 3) Put the resultant value into the checksum field in the
+			 *    common header, and leave the rest of the bits unchanged.
+			 */
+			sh->checksum = sctp_end_cksum(crc32);
+		} else {
 			/* no need to seed pseudo checksum for SCTP */
 			nskb->ip_summed = CHECKSUM_PARTIAL;
 			nskb->csum_start = (skb_transport_header(nskb) -
 			                    nskb->head);
 			nskb->csum_offset = offsetof(struct sctphdr, checksum);
-		} else {
-			nskb->ip_summed = CHECKSUM_UNNECESSARY;
 		}
 	}
 
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 1c88c89..d036821 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -1582,6 +1582,8 @@
 #endif /* SCTP_DEBUG */
 	if (transport) {
 		if (bytes_acked) {
+			struct sctp_association *asoc = transport->asoc;
+
 			/* We may have counted DATA that was migrated
 			 * to this transport due to DEL-IP operation.
 			 * Subtract those bytes, since the were never
@@ -1600,6 +1602,17 @@
 			transport->error_count = 0;
 			transport->asoc->overall_error_count = 0;
 
+			/*
+			 * While in SHUTDOWN PENDING, we may have started
+			 * the T5 shutdown guard timer after reaching the
+			 * retransmission limit. Stop that timer as soon
+			 * as the receiver acknowledged any data.
+			 */
+			if (asoc->state == SCTP_STATE_SHUTDOWN_PENDING &&
+			    del_timer(&asoc->timers
+				[SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD]))
+					sctp_association_put(asoc);
+
 			/* Mark the destination transport address as
 			 * active if it is not so marked.
 			 */
@@ -1629,10 +1642,15 @@
 			 * A sender is doing zero window probing when the
 			 * receiver's advertised window is zero, and there is
 			 * only one data chunk in flight to the receiver.
+			 *
+			 * Allow the association to timeout while in SHUTDOWN
+			 * PENDING or SHUTDOWN RECEIVED in case the receiver
+			 * stays in zero window mode forever.
 			 */
 			if (!q->asoc->peer.rwnd &&
 			    !list_empty(&tlist) &&
-			    (sack_ctsn+2 == q->asoc->next_tsn)) {
+			    (sack_ctsn+2 == q->asoc->next_tsn) &&
+			    q->asoc->state < SCTP_STATE_SHUTDOWN_PENDING) {
 				SCTP_DEBUG_PRINTK("%s: SACK received for zero "
 						  "window probe: %u\n",
 						  __func__, sack_ctsn);
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index 534c2e5..6e0f882 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -670,10 +670,19 @@
 	/* 8.3 Upon the receipt of the HEARTBEAT ACK, the sender of the
 	 * HEARTBEAT should clear the error counter of the destination
 	 * transport address to which the HEARTBEAT was sent.
-	 * The association's overall error count is also cleared.
 	 */
 	t->error_count = 0;
-	t->asoc->overall_error_count = 0;
+
+	/*
+	 * Although RFC4960 specifies that the overall error count must
+	 * be cleared when a HEARTBEAT ACK is received, we make an
+	 * exception while in SHUTDOWN PENDING. If the peer keeps its
+	 * window shut forever, we may never be able to transmit our
+	 * outstanding data and rely on the retransmission limit be reached
+	 * to shutdown the association.
+	 */
+	if (t->asoc->state != SCTP_STATE_SHUTDOWN_PENDING)
+		t->asoc->overall_error_count = 0;
 
 	/* Clear the hb_sent flag to signal that we had a good
 	 * acknowledgement.
@@ -1437,6 +1446,13 @@
 			sctp_cmd_setup_t2(commands, asoc, cmd->obj.ptr);
 			break;
 
+		case SCTP_CMD_TIMER_START_ONCE:
+			timer = &asoc->timers[cmd->obj.to];
+
+			if (timer_pending(timer))
+				break;
+			/* fall through */
+
 		case SCTP_CMD_TIMER_START:
 			timer = &asoc->timers[cmd->obj.to];
 			timeout = asoc->timeouts[cmd->obj.to];
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index a297283..2461171 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -5154,7 +5154,7 @@
 	 * The sender of the SHUTDOWN MAY also start an overall guard timer
 	 * 'T5-shutdown-guard' to bound the overall time for shutdown sequence.
 	 */
-	sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_START,
+	sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_RESTART,
 			SCTP_TO(SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD));
 
 	if (asoc->autoclose)
@@ -5299,14 +5299,28 @@
 	SCTP_INC_STATS(SCTP_MIB_T3_RTX_EXPIREDS);
 
 	if (asoc->overall_error_count >= asoc->max_retrans) {
-		sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR,
-				SCTP_ERROR(ETIMEDOUT));
-		/* CMD_ASSOC_FAILED calls CMD_DELETE_TCB. */
-		sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED,
-				SCTP_PERR(SCTP_ERROR_NO_ERROR));
-		SCTP_INC_STATS(SCTP_MIB_ABORTEDS);
-		SCTP_DEC_STATS(SCTP_MIB_CURRESTAB);
-		return SCTP_DISPOSITION_DELETE_TCB;
+		if (asoc->state == SCTP_STATE_SHUTDOWN_PENDING) {
+			/*
+			 * We are here likely because the receiver had its rwnd
+			 * closed for a while and we have not been able to
+			 * transmit the locally queued data within the maximum
+			 * retransmission attempts limit.  Start the T5
+			 * shutdown guard timer to give the receiver one last
+			 * chance and some additional time to recover before
+			 * aborting.
+			 */
+			sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_START_ONCE,
+				SCTP_TO(SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD));
+		} else {
+			sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR,
+					SCTP_ERROR(ETIMEDOUT));
+			/* CMD_ASSOC_FAILED calls CMD_DELETE_TCB. */
+			sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED,
+					SCTP_PERR(SCTP_ERROR_NO_ERROR));
+			SCTP_INC_STATS(SCTP_MIB_ABORTEDS);
+			SCTP_DEC_STATS(SCTP_MIB_CURRESTAB);
+			return SCTP_DISPOSITION_DELETE_TCB;
+		}
 	}
 
 	/* E1) For the destination address for which the timer
diff --git a/net/sctp/sm_statetable.c b/net/sctp/sm_statetable.c
index 0338dc6..7c211a7 100644
--- a/net/sctp/sm_statetable.c
+++ b/net/sctp/sm_statetable.c
@@ -827,7 +827,7 @@
 	/* SCTP_STATE_ESTABLISHED */ \
 	TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
 	/* SCTP_STATE_SHUTDOWN_PENDING */ \
-	TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
+	TYPE_SCTP_FUNC(sctp_sf_t5_timer_expire), \
 	/* SCTP_STATE_SHUTDOWN_SENT */ \
 	TYPE_SCTP_FUNC(sctp_sf_t5_timer_expire), \
 	/* SCTP_STATE_SHUTDOWN_RECEIVED */ \
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 08c6238..d3ccf79 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -1384,6 +1384,7 @@
 	struct sctp_endpoint *ep;
 	struct sctp_association *asoc;
 	struct list_head *pos, *temp;
+	unsigned int data_was_unread;
 
 	SCTP_DEBUG_PRINTK("sctp_close(sk: 0x%p, timeout:%ld)\n", sk, timeout);
 
@@ -1393,6 +1394,10 @@
 
 	ep = sctp_sk(sk)->ep;
 
+	/* Clean up any skbs sitting on the receive queue.  */
+	data_was_unread = sctp_queue_purge_ulpevents(&sk->sk_receive_queue);
+	data_was_unread += sctp_queue_purge_ulpevents(&sctp_sk(sk)->pd_lobby);
+
 	/* Walk all associations on an endpoint.  */
 	list_for_each_safe(pos, temp, &ep->asocs) {
 		asoc = list_entry(pos, struct sctp_association, asocs);
@@ -1410,7 +1415,9 @@
 			}
 		}
 
-		if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
+		if (data_was_unread || !skb_queue_empty(&asoc->ulpq.lobby) ||
+		    !skb_queue_empty(&asoc->ulpq.reasm) ||
+		    (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime)) {
 			struct sctp_chunk *chunk;
 
 			chunk = sctp_make_abort_user(asoc, NULL, 0);
@@ -1420,10 +1427,6 @@
 			sctp_primitive_SHUTDOWN(asoc, NULL);
 	}
 
-	/* Clean up any skbs sitting on the receive queue.  */
-	sctp_queue_purge_ulpevents(&sk->sk_receive_queue);
-	sctp_queue_purge_ulpevents(&sctp_sk(sk)->pd_lobby);
-
 	/* On a TCP-style socket, block for at most linger_time if set. */
 	if (sctp_style(sk, TCP) && timeout)
 		sctp_wait_for_close(sk, timeout);
diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c
index e70e5fc..8a84017 100644
--- a/net/sctp/ulpevent.c
+++ b/net/sctp/ulpevent.c
@@ -1081,9 +1081,19 @@
 }
 
 /* Purge the skb lists holding ulpevents. */
-void sctp_queue_purge_ulpevents(struct sk_buff_head *list)
+unsigned int sctp_queue_purge_ulpevents(struct sk_buff_head *list)
 {
 	struct sk_buff *skb;
-	while ((skb = skb_dequeue(list)) != NULL)
-		sctp_ulpevent_free(sctp_skb2event(skb));
+	unsigned int data_unread = 0;
+
+	while ((skb = skb_dequeue(list)) != NULL) {
+		struct sctp_ulpevent *event = sctp_skb2event(skb);
+
+		if (!sctp_ulpevent_is_notification(event))
+			data_unread += skb->len;
+
+		sctp_ulpevent_free(event);
+	}
+
+	return data_unread;
 }
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index 9a80a92..e45d2fb 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -597,7 +597,7 @@
 	u32 bind_version;
 	struct rpc_xprt *xprt;
 	struct rpc_clnt	*rpcb_clnt;
-	static struct rpcbind_args *map;
+	struct rpcbind_args *map;
 	struct rpc_task	*child;
 	struct sockaddr_storage addr;
 	struct sockaddr *sap = (struct sockaddr *)&addr;
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index a27406b..4814e24 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -616,30 +616,25 @@
 	BUG_ON(RPC_IS_QUEUED(task));
 
 	for (;;) {
+		void (*do_action)(struct rpc_task *);
 
 		/*
-		 * Execute any pending callback.
+		 * Execute any pending callback first.
 		 */
-		if (task->tk_callback) {
-			void (*save_callback)(struct rpc_task *);
-
-			/*
-			 * We set tk_callback to NULL before calling it,
-			 * in case it sets the tk_callback field itself:
-			 */
-			save_callback = task->tk_callback;
-			task->tk_callback = NULL;
-			save_callback(task);
-		} else {
+		do_action = task->tk_callback;
+		task->tk_callback = NULL;
+		if (do_action == NULL) {
 			/*
 			 * Perform the next FSM step.
-			 * tk_action may be NULL when the task has been killed
-			 * by someone else.
+			 * tk_action may be NULL if the task has been killed.
+			 * In particular, note that rpc_killall_tasks may
+			 * do this at any time, so beware when dereferencing.
 			 */
-			if (task->tk_action == NULL)
+			do_action = task->tk_action;
+			if (do_action == NULL)
 				break;
-			task->tk_action(task);
 		}
+		do_action(task);
 
 		/*
 		 * Lockless check for whether task is sleeping or not.
diff --git a/net/wireless/core.c b/net/wireless/core.c
index c22ef34..880dbe2 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -366,6 +366,7 @@
 
 	mutex_init(&rdev->mtx);
 	mutex_init(&rdev->devlist_mtx);
+	mutex_init(&rdev->sched_scan_mtx);
 	INIT_LIST_HEAD(&rdev->netdev_list);
 	spin_lock_init(&rdev->bss_lock);
 	INIT_LIST_HEAD(&rdev->bss_list);
@@ -701,6 +702,7 @@
 	rfkill_destroy(rdev->rfkill);
 	mutex_destroy(&rdev->mtx);
 	mutex_destroy(&rdev->devlist_mtx);
+	mutex_destroy(&rdev->sched_scan_mtx);
 	list_for_each_entry_safe(scan, tmp, &rdev->bss_list, list)
 		cfg80211_put_bss(&scan->pub);
 	cfg80211_rdev_free_wowlan(rdev);
@@ -737,12 +739,16 @@
 		___cfg80211_scan_done(rdev, true);
 	}
 
+	cfg80211_unlock_rdev(rdev);
+
+	mutex_lock(&rdev->sched_scan_mtx);
+
 	if (WARN_ON(rdev->sched_scan_req &&
 		    rdev->sched_scan_req->dev == wdev->netdev)) {
 		__cfg80211_stop_sched_scan(rdev, false);
 	}
 
-	cfg80211_unlock_rdev(rdev);
+	mutex_unlock(&rdev->sched_scan_mtx);
 
 	mutex_lock(&rdev->devlist_mtx);
 	rdev->opencount--;
@@ -830,9 +836,9 @@
 			break;
 		case NL80211_IFTYPE_P2P_CLIENT:
 		case NL80211_IFTYPE_STATION:
-			cfg80211_lock_rdev(rdev);
+			mutex_lock(&rdev->sched_scan_mtx);
 			__cfg80211_stop_sched_scan(rdev, false);
-			cfg80211_unlock_rdev(rdev);
+			mutex_unlock(&rdev->sched_scan_mtx);
 
 			wdev_lock(wdev);
 #ifdef CONFIG_CFG80211_WEXT
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 3dce1f1..a570ff9 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -65,6 +65,8 @@
 	struct work_struct scan_done_wk;
 	struct work_struct sched_scan_results_wk;
 
+	struct mutex sched_scan_mtx;
+
 #ifdef CONFIG_NL80211_TESTMODE
 	struct genl_info *testmode_info;
 #endif
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index f07602d..cea3381 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -3461,9 +3461,6 @@
 	if (!is_valid_ie_attr(info->attrs[NL80211_ATTR_IE]))
 		return -EINVAL;
 
-	if (rdev->sched_scan_req)
-		return -EINPROGRESS;
-
 	if (!info->attrs[NL80211_ATTR_SCHED_SCAN_INTERVAL])
 		return -EINVAL;
 
@@ -3502,12 +3499,21 @@
 	if (ie_len > wiphy->max_scan_ie_len)
 		return -EINVAL;
 
+	mutex_lock(&rdev->sched_scan_mtx);
+
+	if (rdev->sched_scan_req) {
+		err = -EINPROGRESS;
+		goto out;
+	}
+
 	request = kzalloc(sizeof(*request)
 			+ sizeof(*request->ssids) * n_ssids
 			+ sizeof(*request->channels) * n_channels
 			+ ie_len, GFP_KERNEL);
-	if (!request)
-		return -ENOMEM;
+	if (!request) {
+		err = -ENOMEM;
+		goto out;
+	}
 
 	if (n_ssids)
 		request->ssids = (void *)&request->channels[n_channels];
@@ -3605,6 +3611,7 @@
 out_free:
 	kfree(request);
 out:
+	mutex_unlock(&rdev->sched_scan_mtx);
 	return err;
 }
 
@@ -3612,12 +3619,17 @@
 				   struct genl_info *info)
 {
 	struct cfg80211_registered_device *rdev = info->user_ptr[0];
+	int err;
 
 	if (!(rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_SCHED_SCAN) ||
 	    !rdev->ops->sched_scan_stop)
 		return -EOPNOTSUPP;
 
-	return __cfg80211_stop_sched_scan(rdev, false);
+	mutex_lock(&rdev->sched_scan_mtx);
+	err = __cfg80211_stop_sched_scan(rdev, false);
+	mutex_unlock(&rdev->sched_scan_mtx);
+
+	return err;
 }
 
 static int nl80211_send_bss(struct sk_buff *msg, u32 pid, u32 seq, int flags,
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 7a6c676..ae0c225 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -100,14 +100,14 @@
 	rdev = container_of(wk, struct cfg80211_registered_device,
 			    sched_scan_results_wk);
 
-	cfg80211_lock_rdev(rdev);
+	mutex_lock(&rdev->sched_scan_mtx);
 
 	/* we don't have sched_scan_req anymore if the scan is stopping */
 	if (rdev->sched_scan_req)
 		nl80211_send_sched_scan_results(rdev,
 						rdev->sched_scan_req->dev);
 
-	cfg80211_unlock_rdev(rdev);
+	mutex_unlock(&rdev->sched_scan_mtx);
 }
 
 void cfg80211_sched_scan_results(struct wiphy *wiphy)
@@ -123,9 +123,9 @@
 {
 	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
 
-	cfg80211_lock_rdev(rdev);
+	mutex_lock(&rdev->sched_scan_mtx);
 	__cfg80211_stop_sched_scan(rdev, true);
-	cfg80211_unlock_rdev(rdev);
+	mutex_unlock(&rdev->sched_scan_mtx);
 }
 EXPORT_SYMBOL(cfg80211_sched_scan_stopped);
 
@@ -135,7 +135,7 @@
 	int err;
 	struct net_device *dev;
 
-	ASSERT_RDEV_LOCK(rdev);
+	lockdep_assert_held(&rdev->sched_scan_mtx);
 
 	if (!rdev->sched_scan_req)
 		return 0;
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index d70f85e..9414b9c 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -1345,6 +1345,8 @@
 			xfrm_state_check_expire(x1);
 
 		err = 0;
+		x->km.state = XFRM_STATE_DEAD;
+		__xfrm_state_put(x);
 	}
 	spin_unlock_bh(&x1->lock);
 
diff --git a/scripts/depmod.sh b/scripts/depmod.sh
index 3b029cb..a272356 100755
--- a/scripts/depmod.sh
+++ b/scripts/depmod.sh
@@ -21,13 +21,15 @@
 # older versions of depmod require the version string to start with three
 # numbers, so we cheat with a symlink here
 depmod_hack_needed=true
-mkdir -p .tmp_depmod/lib/modules/$KERNELRELEASE
-if "$DEPMOD" -b .tmp_depmod $KERNELRELEASE 2>/dev/null; then
-	if test -e .tmp_depmod/lib/modules/$KERNELRELEASE/modules.dep -o \
-		-e .tmp_depmod/lib/modules/$KERNELRELEASE/modules.dep.bin; then
+tmp_dir=$(mktemp -d ${TMPDIR:-/tmp}/depmod.XXXXXX)
+mkdir -p "$tmp_dir/lib/modules/$KERNELRELEASE"
+if "$DEPMOD" -b "$tmp_dir" $KERNELRELEASE 2>/dev/null; then
+	if test -e "$tmp_dir/lib/modules/$KERNELRELEASE/modules.dep" -o \
+		-e "$tmp_dir/lib/modules/$KERNELRELEASE/modules.dep.bin"; then
 		depmod_hack_needed=false
 	fi
 fi
+rm -rf "$tmp_dir"
 if $depmod_hack_needed; then
 	symlink="$INSTALL_MOD_PATH/lib/modules/99.98.$KERNELRELEASE"
 	ln -s "$KERNELRELEASE" "$symlink"
diff --git a/sound/soc/codecs/wm8994.c b/sound/soc/codecs/wm8994.c
index c2fc035..83014a7 100644
--- a/sound/soc/codecs/wm8994.c
+++ b/sound/soc/codecs/wm8994.c
@@ -1190,7 +1190,6 @@
 SND_SOC_DAPM_INPUT("DMIC2DAT"),
 SND_SOC_DAPM_INPUT("Clock"),
 
-SND_SOC_DAPM_MICBIAS("MICBIAS", WM8994_MICBIAS, 2, 0),
 SND_SOC_DAPM_SUPPLY_S("MICBIAS Supply", 1, SND_SOC_NOPM, 0, 0, micbias_ev,
 		      SND_SOC_DAPM_PRE_PMU),
 
@@ -1509,8 +1508,10 @@
 	{ "AIF2DACDAT", NULL, "AIF1DACDAT" },
 	{ "AIF1ADCDAT", NULL, "AIF2ADCDAT" },
 	{ "AIF2ADCDAT", NULL, "AIF1ADCDAT" },
-	{ "MICBIAS", NULL, "CLK_SYS" },
-	{ "MICBIAS", NULL, "MICBIAS Supply" },
+	{ "MICBIAS1", NULL, "CLK_SYS" },
+	{ "MICBIAS1", NULL, "MICBIAS Supply" },
+	{ "MICBIAS2", NULL, "CLK_SYS" },
+	{ "MICBIAS2", NULL, "MICBIAS Supply" },
 };
 
 static const struct snd_soc_dapm_route wm8994_intercon[] = {
@@ -2763,7 +2764,7 @@
 	report = SND_JACK_MICROPHONE;
 
 	/* Everything else is buttons; just assign slots */
-	if (status & 0x1c0)
+	if (status & 0x1c)
 		report |= SND_JACK_BTN_0;
 
 done:
diff --git a/sound/soc/sh/fsi-ak4642.c b/sound/soc/sh/fsi-ak4642.c
index d6f4703..770a71a 100644
--- a/sound/soc/sh/fsi-ak4642.c
+++ b/sound/soc/sh/fsi-ak4642.c
@@ -97,7 +97,7 @@
 
 static struct fsi_ak4642_data fsi_a_ak4642 = {
 	.name		= "AK4642",
-	.card		= "FSIA (AK4642)",
+	.card		= "FSIA-AK4642",
 	.cpu_dai	= "fsia-dai",
 	.codec		= "ak4642-codec.0-0012",
 	.platform	= "sh_fsi.0",
@@ -106,7 +106,7 @@
 
 static struct fsi_ak4642_data fsi_b_ak4642 = {
 	.name		= "AK4642",
-	.card		= "FSIB (AK4642)",
+	.card		= "FSIB-AK4642",
 	.cpu_dai	= "fsib-dai",
 	.codec		= "ak4642-codec.0-0012",
 	.platform	= "sh_fsi.0",
@@ -115,7 +115,7 @@
 
 static struct fsi_ak4642_data fsi_a_ak4643 = {
 	.name		= "AK4643",
-	.card		= "FSIA (AK4643)",
+	.card		= "FSIA-AK4643",
 	.cpu_dai	= "fsia-dai",
 	.codec		= "ak4642-codec.0-0013",
 	.platform	= "sh_fsi.0",
@@ -124,7 +124,7 @@
 
 static struct fsi_ak4642_data fsi_b_ak4643 = {
 	.name		= "AK4643",
-	.card		= "FSIB (AK4643)",
+	.card		= "FSIB-AK4643",
 	.cpu_dai	= "fsib-dai",
 	.codec		= "ak4642-codec.0-0013",
 	.platform	= "sh_fsi.0",
@@ -133,7 +133,7 @@
 
 static struct fsi_ak4642_data fsi2_a_ak4642 = {
 	.name		= "AK4642",
-	.card		= "FSI2A (AK4642)",
+	.card		= "FSI2A-AK4642",
 	.cpu_dai	= "fsia-dai",
 	.codec		= "ak4642-codec.0-0012",
 	.platform	= "sh_fsi2",
@@ -142,7 +142,7 @@
 
 static struct fsi_ak4642_data fsi2_b_ak4642 = {
 	.name		= "AK4642",
-	.card		= "FSI2B (AK4642)",
+	.card		= "FSI2B-AK4642",
 	.cpu_dai	= "fsib-dai",
 	.codec		= "ak4642-codec.0-0012",
 	.platform	= "sh_fsi2",
@@ -151,7 +151,7 @@
 
 static struct fsi_ak4642_data fsi2_a_ak4643 = {
 	.name		= "AK4643",
-	.card		= "FSI2A (AK4643)",
+	.card		= "FSI2A-AK4643",
 	.cpu_dai	= "fsia-dai",
 	.codec		= "ak4642-codec.0-0013",
 	.platform	= "sh_fsi2",
@@ -160,7 +160,7 @@
 
 static struct fsi_ak4642_data fsi2_b_ak4643 = {
 	.name		= "AK4643",
-	.card		= "FSI2B (AK4643)",
+	.card		= "FSI2B-AK4643",
 	.cpu_dai	= "fsib-dai",
 	.codec		= "ak4642-codec.0-0013",
 	.platform	= "sh_fsi2",
diff --git a/sound/soc/sh/fsi-da7210.c b/sound/soc/sh/fsi-da7210.c
index dbafd7a..59553fd 100644
--- a/sound/soc/sh/fsi-da7210.c
+++ b/sound/soc/sh/fsi-da7210.c
@@ -42,7 +42,7 @@
 };
 
 static struct snd_soc_card fsi_soc_card = {
-	.name		= "FSI (DA7210)",
+	.name		= "FSI-DA7210",
 	.dai_link	= &fsi_da7210_dai,
 	.num_links	= 1,
 };
diff --git a/sound/soc/sh/fsi-hdmi.c b/sound/soc/sh/fsi-hdmi.c
index 9719985..d3d9fd8 100644
--- a/sound/soc/sh/fsi-hdmi.c
+++ b/sound/soc/sh/fsi-hdmi.c
@@ -83,13 +83,13 @@
 
 static struct fsi_hdmi_data fsi2_a_hdmi = {
 	.cpu_dai	= "fsia-dai",
-	.card		= "FSI2A (SH MOBILE HDMI)",
+	.card		= "FSI2A-HDMI",
 	.id		= FSI_PORT_A,
 };
 
 static struct fsi_hdmi_data fsi2_b_hdmi = {
 	.cpu_dai	= "fsib-dai",
-	.card		= "FSI2B (SH MOBILE HDMI)",
+	.card		= "FSI2B-HDMI",
 	.id		= FSI_PORT_B,
 };