Merge branch 'fixes' of master.kernel.org:/home/rmk/linux-2.6-arm

* 'fixes' of master.kernel.org:/home/rmk/linux-2.6-arm:
  ARM: smp_on_up: allow non-ARM SMP processors
  ARM: io: ensure inb/outb() et.al. are properly ordered on ARMv6+
  ARM: initrd: disable initrd if passed address overlaps reserved region
  ARM: footbridge: fix debug macros
  ARM: mmci: round down the bytes transferred on error
  ARM: mmci: complete the transaction on error
  ARM: 6642/1: mmci: calculate remaining bytes at error correctly
diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h
index 20e0f7c..d66605de 100644
--- a/arch/arm/include/asm/io.h
+++ b/arch/arm/include/asm/io.h
@@ -95,6 +95,15 @@
 	return (void __iomem *)addr;
 }
 
+/* IO barriers */
+#ifdef CONFIG_ARM_DMA_MEM_BUFFERABLE
+#define __iormb()		rmb()
+#define __iowmb()		wmb()
+#else
+#define __iormb()		do { } while (0)
+#define __iowmb()		do { } while (0)
+#endif
+
 /*
  * Now, pick up the machine-defined IO definitions
  */
@@ -125,17 +134,17 @@
  * The {in,out}[bwl] macros are for emulating x86-style PCI/ISA IO space.
  */
 #ifdef __io
-#define outb(v,p)		__raw_writeb(v,__io(p))
-#define outw(v,p)		__raw_writew((__force __u16) \
-					cpu_to_le16(v),__io(p))
-#define outl(v,p)		__raw_writel((__force __u32) \
-					cpu_to_le32(v),__io(p))
+#define outb(v,p)	({ __iowmb(); __raw_writeb(v,__io(p)); })
+#define outw(v,p)	({ __iowmb(); __raw_writew((__force __u16) \
+					cpu_to_le16(v),__io(p)); })
+#define outl(v,p)	({ __iowmb(); __raw_writel((__force __u32) \
+					cpu_to_le32(v),__io(p)); })
 
-#define inb(p)	({ __u8 __v = __raw_readb(__io(p)); __v; })
+#define inb(p)	({ __u8 __v = __raw_readb(__io(p)); __iormb(); __v; })
 #define inw(p)	({ __u16 __v = le16_to_cpu((__force __le16) \
-			__raw_readw(__io(p))); __v; })
+			__raw_readw(__io(p))); __iormb(); __v; })
 #define inl(p)	({ __u32 __v = le32_to_cpu((__force __le32) \
-			__raw_readl(__io(p))); __v; })
+			__raw_readl(__io(p))); __iormb(); __v; })
 
 #define outsb(p,d,l)		__raw_writesb(__io(p),d,l)
 #define outsw(p,d,l)		__raw_writesw(__io(p),d,l)
@@ -192,14 +201,6 @@
 #define writel_relaxed(v,c)	((void)__raw_writel((__force u32) \
 					cpu_to_le32(v),__mem_pci(c)))
 
-#ifdef CONFIG_ARM_DMA_MEM_BUFFERABLE
-#define __iormb()		rmb()
-#define __iowmb()		wmb()
-#else
-#define __iormb()		do { } while (0)
-#define __iowmb()		do { } while (0)
-#endif
-
 #define readb(c)		({ u8  __v = readb_relaxed(c); __iormb(); __v; })
 #define readw(c)		({ u16 __v = readw_relaxed(c); __iormb(); __v; })
 #define readl(c)		({ u32 __v = readl_relaxed(c); __iormb(); __v; })
diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
index f17d9a0..c0225da 100644
--- a/arch/arm/kernel/head.S
+++ b/arch/arm/kernel/head.S
@@ -392,24 +392,22 @@
 
 #ifdef CONFIG_SMP_ON_UP
 __fixup_smp:
-	mov	r4, #0x00070000
-	orr	r3, r4, #0xff000000	@ mask 0xff070000
-	orr	r4, r4, #0x41000000	@ val 0x41070000
-	and	r0, r9, r3
-	teq	r0, r4			@ ARM CPU and ARMv6/v7?
+	and	r3, r9, #0x000f0000	@ architecture version
+	teq	r3, #0x000f0000		@ CPU ID supported?
 	bne	__fixup_smp_on_up	@ no, assume UP
 
-	orr	r3, r3, #0x0000ff00
-	orr	r3, r3, #0x000000f0	@ mask 0xff07fff0
+	bic	r3, r9, #0x00ff0000
+	bic	r3, r3, #0x0000000f	@ mask 0xff00fff0
+	mov	r4, #0x41000000
 	orr	r4, r4, #0x0000b000
-	orr	r4, r4, #0x00000020	@ val 0x4107b020
-	and	r0, r9, r3
-	teq	r0, r4			@ ARM 11MPCore?
+	orr	r4, r4, #0x00000020	@ val 0x4100b020
+	teq	r3, r4			@ ARM 11MPCore?
 	moveq	pc, lr			@ yes, assume SMP
 
 	mrc	p15, 0, r0, c0, c0, 5	@ read MPIDR
-	tst	r0, #1 << 31
-	movne	pc, lr			@ bit 31 => SMP
+	and	r0, r0, #0xc0000000	@ multiprocessing extensions and
+	teq	r0, #0x80000000		@ not part of a uniprocessor system?
+	moveq	pc, lr			@ yes, assume SMP
 
 __fixup_smp_on_up:
 	adr	r0, 1f
diff --git a/arch/arm/mach-footbridge/include/mach/debug-macro.S b/arch/arm/mach-footbridge/include/mach/debug-macro.S
index 3c9e0c4..30b971d 100644
--- a/arch/arm/mach-footbridge/include/mach/debug-macro.S
+++ b/arch/arm/mach-footbridge/include/mach/debug-macro.S
@@ -17,8 +17,8 @@
 	/* For NetWinder debugging */
 		.macro	addruart, rp, rv
 		mov	\rp, #0x000003f8
-		orr	\rv, \rp, #0x7c000000	@ physical
-		orr	\rp, \rp, #0xff000000	@ virtual
+		orr	\rv, \rp, #0xff000000	@ virtual
+		orr	\rp, \rp, #0x7c000000	@ physical
 		.endm
 
 #define UART_SHIFT	0
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index 5164069..cddd684 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -297,6 +297,12 @@
 	memblock_reserve(__pa(_stext), _end - _stext);
 #endif
 #ifdef CONFIG_BLK_DEV_INITRD
+	if (phys_initrd_size &&
+	    memblock_is_region_reserved(phys_initrd_start, phys_initrd_size)) {
+		pr_err("INITRD: 0x%08lx+0x%08lx overlaps in-use memory region - disabling initrd\n",
+		       phys_initrd_start, phys_initrd_size);
+		phys_initrd_start = phys_initrd_size = 0;
+	}
 	if (phys_initrd_size) {
 		memblock_reserve(phys_initrd_start, phys_initrd_size);
 
diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c
index 4b8dcd5..2d6de3e 100644
--- a/drivers/mmc/host/mmci.c
+++ b/drivers/mmc/host/mmci.c
@@ -14,6 +14,7 @@
 #include <linux/ioport.h>
 #include <linux/device.h>
 #include <linux/interrupt.h>
+#include <linux/kernel.h>
 #include <linux/delay.h>
 #include <linux/err.h>
 #include <linux/highmem.h>
@@ -283,19 +284,19 @@
 		u32 remain, success;
 
 		/* Calculate how far we are into the transfer */
-		remain = readl(host->base + MMCIDATACNT) << 2;
+		remain = readl(host->base + MMCIDATACNT);
 		success = data->blksz * data->blocks - remain;
 
 		dev_dbg(mmc_dev(host->mmc), "MCI ERROR IRQ (status %08x)\n", status);
 		if (status & MCI_DATACRCFAIL) {
 			/* Last block was not successful */
-			host->data_xfered = ((success / data->blksz) - 1 * data->blksz);
+			host->data_xfered = round_down(success - 1, data->blksz);
 			data->error = -EILSEQ;
 		} else if (status & MCI_DATATIMEOUT) {
-			host->data_xfered = success;
+			host->data_xfered = round_down(success, data->blksz);
 			data->error = -ETIMEDOUT;
 		} else if (status & (MCI_TXUNDERRUN|MCI_RXOVERRUN)) {
-			host->data_xfered = success;
+			host->data_xfered = round_down(success, data->blksz);
 			data->error = -EIO;
 		}
 
@@ -319,7 +320,7 @@
 	if (status & MCI_DATABLOCKEND)
 		dev_err(mmc_dev(host->mmc), "stray MCI_DATABLOCKEND interrupt\n");
 
-	if (status & MCI_DATAEND) {
+	if (status & MCI_DATAEND || data->error) {
 		mmci_stop_data(host);
 
 		if (!data->error)