Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ericvh/v9fs

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ericvh/v9fs:
  9p: update Documentation pointers
  net/9p: enable 9p to work in non-default network namespace
  net/9p: p9_idpool_get return -1 on error
  fs/9p: Don't clunk dentry fid when we fail to get a writeback inode
  9p: Small cleanup in <net/9p/9p.h>
  9p: remove experimental tag from tested configurations
  9p: typo fixes and minor cleanups
  net/9p: Change linuxdoc names to match functions.
diff --git a/CREDITS b/CREDITS
index 95c469c..58d2a02 100644
--- a/CREDITS
+++ b/CREDITS
@@ -2943,6 +2943,10 @@
 S: 70110 Kuopio
 S: Finland
 
+N: Tobias Ringström
+E: tori@unhappy.mine.nu
+D: Davicom DM9102(A)/DM9132/DM9801 fast ethernet driver
+
 N: Luca Risolia
 E: luca.risolia@studio.unibo.it
 P: 1024D/FCE635A4 88E8 F32F 7244 68BA 3958  5D40 99DA 5D2A FCE6 35A4
diff --git a/Documentation/ABI/testing/sysfs-block b/Documentation/ABI/testing/sysfs-block
index 4873c75..c1eb41c 100644
--- a/Documentation/ABI/testing/sysfs-block
+++ b/Documentation/ABI/testing/sysfs-block
@@ -142,3 +142,67 @@
 		with the previous I/O request are enabled. When set to 2,
 		all merge tries are disabled. The default value is 0 -
 		which enables all types of merge tries.
+
+What:		/sys/block/<disk>/discard_alignment
+Date:		May 2011
+Contact:	Martin K. Petersen <martin.petersen@oracle.com>
+Description:
+		Devices that support discard functionality may
+		internally allocate space in units that are bigger than
+		the exported logical block size. The discard_alignment
+		parameter indicates how many bytes the beginning of the
+		device is offset from the internal allocation unit's
+		natural alignment.
+
+What:		/sys/block/<disk>/<partition>/discard_alignment
+Date:		May 2011
+Contact:	Martin K. Petersen <martin.petersen@oracle.com>
+Description:
+		Devices that support discard functionality may
+		internally allocate space in units that are bigger than
+		the exported logical block size. The discard_alignment
+		parameter indicates how many bytes the beginning of the
+		partition is offset from the internal allocation unit's
+		natural alignment.
+
+What:		/sys/block/<disk>/queue/discard_granularity
+Date:		May 2011
+Contact:	Martin K. Petersen <martin.petersen@oracle.com>
+Description:
+		Devices that support discard functionality may
+		internally allocate space using units that are bigger
+		than the logical block size. The discard_granularity
+		parameter indicates the size of the internal allocation
+		unit in bytes if reported by the device. Otherwise the
+		discard_granularity will be set to match the device's
+		physical block size. A discard_granularity of 0 means
+		that the device does not support discard functionality.
+
+What:		/sys/block/<disk>/queue/discard_max_bytes
+Date:		May 2011
+Contact:	Martin K. Petersen <martin.petersen@oracle.com>
+Description:
+		Devices that support discard functionality may have
+		internal limits on the number of bytes that can be
+		trimmed or unmapped in a single operation. Some storage
+		protocols also have inherent limits on the number of
+		blocks that can be described in a single command. The
+		discard_max_bytes parameter is set by the device driver
+		to the maximum number of bytes that can be discarded in
+		a single operation. Discard requests issued to the
+		device must not exceed this limit. A discard_max_bytes
+		value of 0 means that the device does not support
+		discard functionality.
+
+What:		/sys/block/<disk>/queue/discard_zeroes_data
+Date:		May 2011
+Contact:	Martin K. Petersen <martin.petersen@oracle.com>
+Description:
+		Devices that support discard functionality may return
+		stale or random data when a previously discarded block
+		is read back. This can cause problems if the filesystem
+		expects discarded blocks to be explicitly cleared. If a
+		device reports that it deterministically returns zeroes
+		when a discarded area is read the discard_zeroes_data
+		parameter will be set to one. Otherwise it will be 0 and
+		the result of reading a discarded area is undefined.
diff --git a/Documentation/ABI/testing/sysfs-ptp b/Documentation/ABI/testing/sysfs-ptp
new file mode 100644
index 0000000..d40d2b5
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-ptp
@@ -0,0 +1,98 @@
+What:		/sys/class/ptp/
+Date:		September 2010
+Contact:	Richard Cochran <richardcochran@gmail.com>
+Description:
+		This directory contains files and directories
+		providing a standardized interface to the ancillary
+		features of PTP hardware clocks.
+
+What:		/sys/class/ptp/ptpN/
+Date:		September 2010
+Contact:	Richard Cochran <richardcochran@gmail.com>
+Description:
+		This directory contains the attributes of the Nth PTP
+		hardware clock registered into the PTP class driver
+		subsystem.
+
+What:		/sys/class/ptp/ptpN/clock_name
+Date:		September 2010
+Contact:	Richard Cochran <richardcochran@gmail.com>
+Description:
+		This file contains the name of the PTP hardware clock
+		as a human readable string.
+
+What:		/sys/class/ptp/ptpN/max_adjustment
+Date:		September 2010
+Contact:	Richard Cochran <richardcochran@gmail.com>
+Description:
+		This file contains the PTP hardware clock's maximum
+		frequency adjustment value (a positive integer) in
+		parts per billion.
+
+What:		/sys/class/ptp/ptpN/n_alarms
+Date:		September 2010
+Contact:	Richard Cochran <richardcochran@gmail.com>
+Description:
+		This file contains the number of periodic or one shot
+		alarms offer by the PTP hardware clock.
+
+What:		/sys/class/ptp/ptpN/n_external_timestamps
+Date:		September 2010
+Contact:	Richard Cochran <richardcochran@gmail.com>
+Description:
+		This file contains the number of external timestamp
+		channels offered by the PTP hardware clock.
+
+What:		/sys/class/ptp/ptpN/n_periodic_outputs
+Date:		September 2010
+Contact:	Richard Cochran <richardcochran@gmail.com>
+Description:
+		This file contains the number of programmable periodic
+		output channels offered by the PTP hardware clock.
+
+What:		/sys/class/ptp/ptpN/pps_avaiable
+Date:		September 2010
+Contact:	Richard Cochran <richardcochran@gmail.com>
+Description:
+		This file indicates whether the PTP hardware clock
+		supports a Pulse Per Second to the host CPU. Reading
+		"1" means that the PPS is supported, while "0" means
+		not supported.
+
+What:		/sys/class/ptp/ptpN/extts_enable
+Date:		September 2010
+Contact:	Richard Cochran <richardcochran@gmail.com>
+Description:
+		This write-only file enables or disables external
+		timestamps. To enable external timestamps, write the
+		channel index followed by a "1" into the file.
+		To disable external timestamps, write the channel
+		index followed by a "0" into the file.
+
+What:		/sys/class/ptp/ptpN/fifo
+Date:		September 2010
+Contact:	Richard Cochran <richardcochran@gmail.com>
+Description:
+		This file provides timestamps on external events, in
+		the form of three integers: channel index, seconds,
+		and nanoseconds.
+
+What:		/sys/class/ptp/ptpN/period
+Date:		September 2010
+Contact:	Richard Cochran <richardcochran@gmail.com>
+Description:
+		This write-only file enables or disables periodic
+		outputs. To enable a periodic output, write five
+		integers into the file: channel index, start time
+		seconds, start time nanoseconds, period seconds, and
+		period nanoseconds. To disable a periodic output, set
+		all the seconds and nanoseconds values to zero.
+
+What:		/sys/class/ptp/ptpN/pps_enable
+Date:		September 2010
+Contact:	Richard Cochran <richardcochran@gmail.com>
+Description:
+		This write-only file enables or disables delivery of
+		PPS events to the Linux PPS subsystem. To enable PPS
+		events, write a "1" into the file. To disable events,
+		write a "0" into the file.
diff --git a/Documentation/IRQ-affinity.txt b/Documentation/IRQ-affinity.txt
index b4a615b..7890fae 100644
--- a/Documentation/IRQ-affinity.txt
+++ b/Documentation/IRQ-affinity.txt
@@ -4,10 +4,11 @@
 
 SMP IRQ affinity
 
-/proc/irq/IRQ#/smp_affinity specifies which target CPUs are permitted
-for a given IRQ source. It's a bitmask of allowed CPUs. It's not allowed
-to turn off all CPUs, and if an IRQ controller does not support IRQ
-affinity then the value will not change from the default 0xffffffff.
+/proc/irq/IRQ#/smp_affinity and /proc/irq/IRQ#/smp_affinity_list specify
+which target CPUs are permitted for a given IRQ source.  It's a bitmask
+(smp_affinity) or cpu list (smp_affinity_list) of allowed CPUs.  It's not
+allowed to turn off all CPUs, and if an IRQ controller does not support
+IRQ affinity then the value will not change from the default of all cpus.
 
 /proc/irq/default_smp_affinity specifies default affinity mask that applies
 to all non-active IRQs. Once IRQ is allocated/activated its affinity bitmask
@@ -54,3 +55,11 @@
 This time around IRQ44 was delivered only to the last four processors.
 i.e counters for the CPU0-3 did not change.
 
+Here is an example of limiting that same irq (44) to cpus 1024 to 1031:
+
+[root@moon 44]# echo 1024-1031 > smp_affinity
+[root@moon 44]# cat smp_affinity
+1024-1031
+
+Note that to do this with a bitmask would require 32 bitmasks of zero
+to follow the pertinent one.
diff --git a/Documentation/blockdev/cciss.txt b/Documentation/blockdev/cciss.txt
index 89698e8..c00c6a5 100644
--- a/Documentation/blockdev/cciss.txt
+++ b/Documentation/blockdev/cciss.txt
@@ -169,3 +169,18 @@
 must rewind the tape (by issuing "mt -f /dev/st0 rewind" for example)
 before i/o can proceed again to a tape drive which was reset.
 
+There is a cciss_tape_cmds module parameter which can be used to make cciss
+allocate more commands for use by tape drives.  Ordinarily only a few commands
+(6) are allocated for tape drives because tape drives are slow and
+infrequently used and the primary purpose of Smart Array controllers is to
+act as a RAID controller for disk drives, so the vast majority of commands
+are allocated for disk devices.  However, if you have more than a few tape
+drives attached to a smart array, the default number of commands may not be
+enought (for example, if you have 8 tape drives, you could only rewind 6
+at one time with the default number of commands.)  The cciss_tape_cmds module
+parameter allows more commands (up to 16 more) to be allocated for use by
+tape drives.  For example:
+
+        insmod cciss.ko cciss_tape_cmds=16
+
+Or, as a kernel boot parameter passed in via grub:  cciss.cciss_tape_cmds=8
diff --git a/Documentation/cachetlb.txt b/Documentation/cachetlb.txt
index 9164ae3..9b728dc 100644
--- a/Documentation/cachetlb.txt
+++ b/Documentation/cachetlb.txt
@@ -16,7 +16,7 @@
 thinking SMP cache/tlb flushing must be so inefficient, this is in
 fact an area where many optimizations are possible.  For example,
 if it can be proven that a user address space has never executed
-on a cpu (see vma->cpu_vm_mask), one need not perform a flush
+on a cpu (see mm_cpumask()), one need not perform a flush
 for this address space on that cpu.
 
 First, the TLB flushing interfaces, since they are the simplest.  The
diff --git a/Documentation/devicetree/bindings/net/fsl-tsec-phy.txt b/Documentation/devicetree/bindings/net/fsl-tsec-phy.txt
index edb7ae1..2c6be03 100644
--- a/Documentation/devicetree/bindings/net/fsl-tsec-phy.txt
+++ b/Documentation/devicetree/bindings/net/fsl-tsec-phy.txt
@@ -74,3 +74,57 @@
 		interrupt-parent = <&mpic>;
 		phy-handle = <&phy0>
 	};
+
+* Gianfar PTP clock nodes
+
+General Properties:
+
+  - compatible   Should be "fsl,etsec-ptp"
+  - reg          Offset and length of the register set for the device
+  - interrupts   There should be at least two interrupts. Some devices
+                 have as many as four PTP related interrupts.
+
+Clock Properties:
+
+  - fsl,tclk-period  Timer reference clock period in nanoseconds.
+  - fsl,tmr-prsc     Prescaler, divides the output clock.
+  - fsl,tmr-add      Frequency compensation value.
+  - fsl,tmr-fiper1   Fixed interval period pulse generator.
+  - fsl,tmr-fiper2   Fixed interval period pulse generator.
+  - fsl,max-adj      Maximum frequency adjustment in parts per billion.
+
+  These properties set the operational parameters for the PTP
+  clock. You must choose these carefully for the clock to work right.
+  Here is how to figure good values:
+
+  TimerOsc     = system clock               MHz
+  tclk_period  = desired clock period       nanoseconds
+  NominalFreq  = 1000 / tclk_period         MHz
+  FreqDivRatio = TimerOsc / NominalFreq     (must be greater that 1.0)
+  tmr_add      = ceil(2^32 / FreqDivRatio)
+  OutputClock  = NominalFreq / tmr_prsc     MHz
+  PulseWidth   = 1 / OutputClock            microseconds
+  FiperFreq1   = desired frequency in Hz
+  FiperDiv1    = 1000000 * OutputClock / FiperFreq1
+  tmr_fiper1   = tmr_prsc * tclk_period * FiperDiv1 - tclk_period
+  max_adj      = 1000000000 * (FreqDivRatio - 1.0) - 1
+
+  The calculation for tmr_fiper2 is the same as for tmr_fiper1. The
+  driver expects that tmr_fiper1 will be correctly set to produce a 1
+  Pulse Per Second (PPS) signal, since this will be offered to the PPS
+  subsystem to synchronize the Linux clock.
+
+Example:
+
+	ptp_clock@24E00 {
+		compatible = "fsl,etsec-ptp";
+		reg = <0x24E00 0xB0>;
+		interrupts = <12 0x8 13 0x8>;
+		interrupt-parent = < &ipic >;
+		fsl,tclk-period = <10>;
+		fsl,tmr-prsc    = <100>;
+		fsl,tmr-add     = <0x999999A4>;
+		fsl,tmr-fiper1  = <0x3B9AC9F6>;
+		fsl,tmr-fiper2  = <0x00018696>;
+		fsl,max-adj     = <659999998>;
+	};
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index 60740e8..f481780 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -574,6 +574,12 @@
   > cat /proc/irq/0/smp_affinity
   ffffffff
 
+There is an alternate interface, smp_affinity_list which allows specifying
+a cpu range instead of a bitmask:
+
+  > cat /proc/irq/0/smp_affinity_list
+  1024-1031
+
 The default_smp_affinity mask applies to all non-active IRQs, which are the
 IRQs which have not yet been allocated/activated, and hence which lack a
 /proc/irq/[0-9]* directory.
@@ -583,12 +589,13 @@
 include information about any possible driver locality preference.
 
 prof_cpu_mask specifies which CPUs are to be profiled by the system wide
-profiler. Default value is ffffffff (all cpus).
+profiler. Default value is ffffffff (all cpus if there are only 32 of them).
 
 The way IRQs are routed is handled by the IO-APIC, and it's Round Robin
 between all the CPUs which are allowed to handle it. As usual the kernel has
 more info than you and does a better job than you, so the defaults are the
-best choice for almost everyone.
+best choice for almost everyone.  [Note this applies only to those IO-APIC's
+that support "Round Robin" interrupt distribution.]
 
 There are  three  more  important subdirectories in /proc: net, scsi, and sys.
 The general  rule  is  that  the  contents,  or  even  the  existence of these
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 7c6624e..5438a2d 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1777,9 +1777,6 @@
 
 	nosoftlockup	[KNL] Disable the soft-lockup detector.
 
-	noswapaccount	[KNL] Disable accounting of swap in memory resource
-			controller. (See Documentation/cgroups/memory.txt)
-
 	nosync		[HW,M68K] Disables sync negotiation for all devices.
 
 	notsc		[BUGS=X86-32] Disable Time Stamp Counter
diff --git a/Documentation/lockstat.txt b/Documentation/lockstat.txt
index 65f4c79..9c0a80d 100644
--- a/Documentation/lockstat.txt
+++ b/Documentation/lockstat.txt
@@ -136,7 +136,7 @@
                              dcache_lock:          1037           1161           0.38          45.32         774.51           6611         243371           0.15         306.48       77387.24
                          &inode->i_mutex:           161            286 18446744073709       62882.54     1244614.55           3653          20598 18446744073709       62318.60     1693822.74
                          &zone->lru_lock:            94             94           0.53           7.33          92.10           4366          32690           0.29          59.81       16350.06
-              &inode->i_data.i_mmap_lock:            79             79           0.40           3.77          53.03          11779          87755           0.28         116.93       29898.44
+              &inode->i_data.i_mmap_mutex:            79             79           0.40           3.77          53.03          11779          87755           0.28         116.93       29898.44
                         &q->__queue_lock:            48             50           0.52          31.62          86.31            774          13131           0.17         113.08       12277.52
                         &rq->rq_lock_key:            43             47           0.74          68.50         170.63           3706          33929           0.22         107.99       17460.62
                       &rq->rq_lock_key#2:            39             46           0.75           6.68          49.03           2979          32292           0.17         125.17       17137.63
diff --git a/Documentation/ptp/ptp.txt b/Documentation/ptp/ptp.txt
new file mode 100644
index 0000000..ae8fef8
--- /dev/null
+++ b/Documentation/ptp/ptp.txt
@@ -0,0 +1,89 @@
+
+* PTP hardware clock infrastructure for Linux
+
+  This patch set introduces support for IEEE 1588 PTP clocks in
+  Linux. Together with the SO_TIMESTAMPING socket options, this
+  presents a standardized method for developing PTP user space
+  programs, synchronizing Linux with external clocks, and using the
+  ancillary features of PTP hardware clocks.
+
+  A new class driver exports a kernel interface for specific clock
+  drivers and a user space interface. The infrastructure supports a
+  complete set of PTP hardware clock functionality.
+
+  + Basic clock operations
+    - Set time
+    - Get time
+    - Shift the clock by a given offset atomically
+    - Adjust clock frequency
+
+  + Ancillary clock features
+    - One short or periodic alarms, with signal delivery to user program
+    - Time stamp external events
+    - Period output signals configurable from user space
+    - Synchronization of the Linux system time via the PPS subsystem
+
+** PTP hardware clock kernel API
+
+   A PTP clock driver registers itself with the class driver. The
+   class driver handles all of the dealings with user space. The
+   author of a clock driver need only implement the details of
+   programming the clock hardware. The clock driver notifies the class
+   driver of asynchronous events (alarms and external time stamps) via
+   a simple message passing interface.
+
+   The class driver supports multiple PTP clock drivers. In normal use
+   cases, only one PTP clock is needed. However, for testing and
+   development, it can be useful to have more than one clock in a
+   single system, in order to allow performance comparisons.
+
+** PTP hardware clock user space API
+
+   The class driver also creates a character device for each
+   registered clock. User space can use an open file descriptor from
+   the character device as a POSIX clock id and may call
+   clock_gettime, clock_settime, and clock_adjtime.  These calls
+   implement the basic clock operations.
+
+   User space programs may control the clock using standardized
+   ioctls. A program may query, enable, configure, and disable the
+   ancillary clock features. User space can receive time stamped
+   events via blocking read() and poll(). One shot and periodic
+   signals may be configured via the POSIX timer_settime() system
+   call.
+
+** Writing clock drivers
+
+   Clock drivers include include/linux/ptp_clock_kernel.h and register
+   themselves by presenting a 'struct ptp_clock_info' to the
+   registration method. Clock drivers must implement all of the
+   functions in the interface. If a clock does not offer a particular
+   ancillary feature, then the driver should just return -EOPNOTSUPP
+   from those functions.
+
+   Drivers must ensure that all of the methods in interface are
+   reentrant. Since most hardware implementations treat the time value
+   as a 64 bit integer accessed as two 32 bit registers, drivers
+   should use spin_lock_irqsave/spin_unlock_irqrestore to protect
+   against concurrent access. This locking cannot be accomplished in
+   class driver, since the lock may also be needed by the clock
+   driver's interrupt service routine.
+
+** Supported hardware
+
+   + Freescale eTSEC gianfar
+     - 2 Time stamp external triggers, programmable polarity (opt. interrupt)
+     - 2 Alarm registers (optional interrupt)
+     - 3 Periodic signals (optional interrupt)
+
+   + National DP83640
+     - 6 GPIOs programmable as inputs or outputs
+     - 6 GPIOs with dedicated functions (LED/JTAG/clock) can also be
+       used as general inputs or outputs
+     - GPIO inputs can time stamp external triggers
+     - GPIO outputs can produce periodic signals
+     - 1 interrupt pin
+
+   + Intel IXP465
+     - Auxiliary Slave/Master Mode Snapshot (optional interrupt)
+     - Target Time (optional interrupt)
diff --git a/Documentation/ptp/testptp.c b/Documentation/ptp/testptp.c
new file mode 100644
index 0000000..f59ded0
--- /dev/null
+++ b/Documentation/ptp/testptp.c
@@ -0,0 +1,381 @@
+/*
+ * PTP 1588 clock support - User space test program
+ *
+ * Copyright (C) 2010 OMICRON electronics GmbH
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#include <errno.h>
+#include <fcntl.h>
+#include <math.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <sys/types.h>
+#include <time.h>
+#include <unistd.h>
+
+#include <linux/ptp_clock.h>
+
+#define DEVICE "/dev/ptp0"
+
+#ifndef ADJ_SETOFFSET
+#define ADJ_SETOFFSET 0x0100
+#endif
+
+#ifndef CLOCK_INVALID
+#define CLOCK_INVALID -1
+#endif
+
+/* When glibc offers the syscall, this will go away. */
+#include <sys/syscall.h>
+static int clock_adjtime(clockid_t id, struct timex *tx)
+{
+	return syscall(__NR_clock_adjtime, id, tx);
+}
+
+static clockid_t get_clockid(int fd)
+{
+#define CLOCKFD 3
+#define FD_TO_CLOCKID(fd)	((~(clockid_t) (fd) << 3) | CLOCKFD)
+
+	return FD_TO_CLOCKID(fd);
+}
+
+static void handle_alarm(int s)
+{
+	printf("received signal %d\n", s);
+}
+
+static int install_handler(int signum, void (*handler)(int))
+{
+	struct sigaction action;
+	sigset_t mask;
+
+	/* Unblock the signal. */
+	sigemptyset(&mask);
+	sigaddset(&mask, signum);
+	sigprocmask(SIG_UNBLOCK, &mask, NULL);
+
+	/* Install the signal handler. */
+	action.sa_handler = handler;
+	action.sa_flags = 0;
+	sigemptyset(&action.sa_mask);
+	sigaction(signum, &action, NULL);
+
+	return 0;
+}
+
+static long ppb_to_scaled_ppm(int ppb)
+{
+	/*
+	 * The 'freq' field in the 'struct timex' is in parts per
+	 * million, but with a 16 bit binary fractional field.
+	 * Instead of calculating either one of
+	 *
+	 *    scaled_ppm = (ppb / 1000) << 16  [1]
+	 *    scaled_ppm = (ppb << 16) / 1000  [2]
+	 *
+	 * we simply use double precision math, in order to avoid the
+	 * truncation in [1] and the possible overflow in [2].
+	 */
+	return (long) (ppb * 65.536);
+}
+
+static void usage(char *progname)
+{
+	fprintf(stderr,
+		"usage: %s [options]\n"
+		" -a val     request a one-shot alarm after 'val' seconds\n"
+		" -A val     request a periodic alarm every 'val' seconds\n"
+		" -c         query the ptp clock's capabilities\n"
+		" -d name    device to open\n"
+		" -e val     read 'val' external time stamp events\n"
+		" -f val     adjust the ptp clock frequency by 'val' ppb\n"
+		" -g         get the ptp clock time\n"
+		" -h         prints this message\n"
+		" -p val     enable output with a period of 'val' nanoseconds\n"
+		" -P val     enable or disable (val=1|0) the system clock PPS\n"
+		" -s         set the ptp clock time from the system time\n"
+		" -S         set the system time from the ptp clock time\n"
+		" -t val     shift the ptp clock time by 'val' seconds\n",
+		progname);
+}
+
+int main(int argc, char *argv[])
+{
+	struct ptp_clock_caps caps;
+	struct ptp_extts_event event;
+	struct ptp_extts_request extts_request;
+	struct ptp_perout_request perout_request;
+	struct timespec ts;
+	struct timex tx;
+
+	static timer_t timerid;
+	struct itimerspec timeout;
+	struct sigevent sigevent;
+
+	char *progname;
+	int c, cnt, fd;
+
+	char *device = DEVICE;
+	clockid_t clkid;
+	int adjfreq = 0x7fffffff;
+	int adjtime = 0;
+	int capabilities = 0;
+	int extts = 0;
+	int gettime = 0;
+	int oneshot = 0;
+	int periodic = 0;
+	int perout = -1;
+	int pps = -1;
+	int settime = 0;
+
+	progname = strrchr(argv[0], '/');
+	progname = progname ? 1+progname : argv[0];
+	while (EOF != (c = getopt(argc, argv, "a:A:cd:e:f:ghp:P:sSt:v"))) {
+		switch (c) {
+		case 'a':
+			oneshot = atoi(optarg);
+			break;
+		case 'A':
+			periodic = atoi(optarg);
+			break;
+		case 'c':
+			capabilities = 1;
+			break;
+		case 'd':
+			device = optarg;
+			break;
+		case 'e':
+			extts = atoi(optarg);
+			break;
+		case 'f':
+			adjfreq = atoi(optarg);
+			break;
+		case 'g':
+			gettime = 1;
+			break;
+		case 'p':
+			perout = atoi(optarg);
+			break;
+		case 'P':
+			pps = atoi(optarg);
+			break;
+		case 's':
+			settime = 1;
+			break;
+		case 'S':
+			settime = 2;
+			break;
+		case 't':
+			adjtime = atoi(optarg);
+			break;
+		case 'h':
+			usage(progname);
+			return 0;
+		case '?':
+		default:
+			usage(progname);
+			return -1;
+		}
+	}
+
+	fd = open(device, O_RDWR);
+	if (fd < 0) {
+		fprintf(stderr, "opening %s: %s\n", device, strerror(errno));
+		return -1;
+	}
+
+	clkid = get_clockid(fd);
+	if (CLOCK_INVALID == clkid) {
+		fprintf(stderr, "failed to read clock id\n");
+		return -1;
+	}
+
+	if (capabilities) {
+		if (ioctl(fd, PTP_CLOCK_GETCAPS, &caps)) {
+			perror("PTP_CLOCK_GETCAPS");
+		} else {
+			printf("capabilities:\n"
+			       "  %d maximum frequency adjustment (ppb)\n"
+			       "  %d programmable alarms\n"
+			       "  %d external time stamp channels\n"
+			       "  %d programmable periodic signals\n"
+			       "  %d pulse per second\n",
+			       caps.max_adj,
+			       caps.n_alarm,
+			       caps.n_ext_ts,
+			       caps.n_per_out,
+			       caps.pps);
+		}
+	}
+
+	if (0x7fffffff != adjfreq) {
+		memset(&tx, 0, sizeof(tx));
+		tx.modes = ADJ_FREQUENCY;
+		tx.freq = ppb_to_scaled_ppm(adjfreq);
+		if (clock_adjtime(clkid, &tx)) {
+			perror("clock_adjtime");
+		} else {
+			puts("frequency adjustment okay");
+		}
+	}
+
+	if (adjtime) {
+		memset(&tx, 0, sizeof(tx));
+		tx.modes = ADJ_SETOFFSET;
+		tx.time.tv_sec = adjtime;
+		tx.time.tv_usec = 0;
+		if (clock_adjtime(clkid, &tx) < 0) {
+			perror("clock_adjtime");
+		} else {
+			puts("time shift okay");
+		}
+	}
+
+	if (gettime) {
+		if (clock_gettime(clkid, &ts)) {
+			perror("clock_gettime");
+		} else {
+			printf("clock time: %ld.%09ld or %s",
+			       ts.tv_sec, ts.tv_nsec, ctime(&ts.tv_sec));
+		}
+	}
+
+	if (settime == 1) {
+		clock_gettime(CLOCK_REALTIME, &ts);
+		if (clock_settime(clkid, &ts)) {
+			perror("clock_settime");
+		} else {
+			puts("set time okay");
+		}
+	}
+
+	if (settime == 2) {
+		clock_gettime(clkid, &ts);
+		if (clock_settime(CLOCK_REALTIME, &ts)) {
+			perror("clock_settime");
+		} else {
+			puts("set time okay");
+		}
+	}
+
+	if (extts) {
+		memset(&extts_request, 0, sizeof(extts_request));
+		extts_request.index = 0;
+		extts_request.flags = PTP_ENABLE_FEATURE;
+		if (ioctl(fd, PTP_EXTTS_REQUEST, &extts_request)) {
+			perror("PTP_EXTTS_REQUEST");
+			extts = 0;
+		} else {
+			puts("external time stamp request okay");
+		}
+		for (; extts; extts--) {
+			cnt = read(fd, &event, sizeof(event));
+			if (cnt != sizeof(event)) {
+				perror("read");
+				break;
+			}
+			printf("event index %u at %lld.%09u\n", event.index,
+			       event.t.sec, event.t.nsec);
+			fflush(stdout);
+		}
+		/* Disable the feature again. */
+		extts_request.flags = 0;
+		if (ioctl(fd, PTP_EXTTS_REQUEST, &extts_request)) {
+			perror("PTP_EXTTS_REQUEST");
+		}
+	}
+
+	if (oneshot) {
+		install_handler(SIGALRM, handle_alarm);
+		/* Create a timer. */
+		sigevent.sigev_notify = SIGEV_SIGNAL;
+		sigevent.sigev_signo = SIGALRM;
+		if (timer_create(clkid, &sigevent, &timerid)) {
+			perror("timer_create");
+			return -1;
+		}
+		/* Start the timer. */
+		memset(&timeout, 0, sizeof(timeout));
+		timeout.it_value.tv_sec = oneshot;
+		if (timer_settime(timerid, 0, &timeout, NULL)) {
+			perror("timer_settime");
+			return -1;
+		}
+		pause();
+		timer_delete(timerid);
+	}
+
+	if (periodic) {
+		install_handler(SIGALRM, handle_alarm);
+		/* Create a timer. */
+		sigevent.sigev_notify = SIGEV_SIGNAL;
+		sigevent.sigev_signo = SIGALRM;
+		if (timer_create(clkid, &sigevent, &timerid)) {
+			perror("timer_create");
+			return -1;
+		}
+		/* Start the timer. */
+		memset(&timeout, 0, sizeof(timeout));
+		timeout.it_interval.tv_sec = periodic;
+		timeout.it_value.tv_sec = periodic;
+		if (timer_settime(timerid, 0, &timeout, NULL)) {
+			perror("timer_settime");
+			return -1;
+		}
+		while (1) {
+			pause();
+		}
+		timer_delete(timerid);
+	}
+
+	if (perout >= 0) {
+		if (clock_gettime(clkid, &ts)) {
+			perror("clock_gettime");
+			return -1;
+		}
+		memset(&perout_request, 0, sizeof(perout_request));
+		perout_request.index = 0;
+		perout_request.start.sec = ts.tv_sec + 2;
+		perout_request.start.nsec = 0;
+		perout_request.period.sec = 0;
+		perout_request.period.nsec = perout;
+		if (ioctl(fd, PTP_PEROUT_REQUEST, &perout_request)) {
+			perror("PTP_PEROUT_REQUEST");
+		} else {
+			puts("periodic output request okay");
+		}
+	}
+
+	if (pps != -1) {
+		int enable = pps ? 1 : 0;
+		if (ioctl(fd, PTP_ENABLE_PPS, enable)) {
+			perror("PTP_ENABLE_PPS");
+		} else {
+			puts("pps for system time request okay");
+		}
+	}
+
+	close(fd);
+	return 0;
+}
diff --git a/Documentation/ptp/testptp.mk b/Documentation/ptp/testptp.mk
new file mode 100644
index 0000000..4ef2d97
--- /dev/null
+++ b/Documentation/ptp/testptp.mk
@@ -0,0 +1,33 @@
+# PTP 1588 clock support - User space test program
+#
+# Copyright (C) 2010 OMICRON electronics GmbH
+#
+#  This program is free software; you can redistribute it and/or modify
+#  it under the terms of the GNU General Public License as published by
+#  the Free Software Foundation; either version 2 of the License, or
+#  (at your option) any later version.
+#
+#  This program is distributed in the hope that it will be useful,
+#  but WITHOUT ANY WARRANTY; without even the implied warranty of
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#  GNU General Public License for more details.
+#
+#  You should have received a copy of the GNU General Public License
+#  along with this program; if not, write to the Free Software
+#  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+CC        = $(CROSS_COMPILE)gcc
+INC       = -I$(KBUILD_OUTPUT)/usr/include
+CFLAGS    = -Wall $(INC)
+LDLIBS    = -lrt
+PROGS     = testptp
+
+all: $(PROGS)
+
+testptp: testptp.o
+
+clean:
+	rm -f testptp.o
+
+distclean: clean
+	rm -f $(PROGS)
diff --git a/Documentation/virtual/uml/UserModeLinux-HOWTO.txt b/Documentation/virtual/uml/UserModeLinux-HOWTO.txt
index 9b7e190..5d0fc8b 100644
--- a/Documentation/virtual/uml/UserModeLinux-HOWTO.txt
+++ b/Documentation/virtual/uml/UserModeLinux-HOWTO.txt
@@ -1182,6 +1182,16 @@
   forge.net/>  and explains these in detail, as well as
   some other issues.
 
+  There is also a related point-to-point only "ucast" transport.
+  This is useful when your network does not support multicast, and
+  all network connections are simple point to point links.
+
+  The full set of command line options for this transport are
+
+
+       ethn=ucast,ethernet address,remote address,listen port,remote port
+
+
 
 
   66..66..  TTUUNN//TTAAPP wwiitthh tthhee uummll__nneett hheellppeerr
diff --git a/Documentation/vm/locking b/Documentation/vm/locking
index 25fadb4..f61228b 100644
--- a/Documentation/vm/locking
+++ b/Documentation/vm/locking
@@ -66,7 +66,7 @@
 expand_stack(), it is hard to come up with a destructive scenario without 
 having the vmlist protection in this case.
 
-The page_table_lock nests with the inode i_mmap_lock and the kmem cache
+The page_table_lock nests with the inode i_mmap_mutex and the kmem cache
 c_spinlock spinlocks.  This is okay, since the kmem code asks for pages after
 dropping c_spinlock.  The page_table_lock also nests with pagecache_lock and
 pagemap_lru_lock spinlocks, and no code asks for memory with these locks
diff --git a/MAINTAINERS b/MAINTAINERS
index 98c324b..59cd2f5 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2034,9 +2034,8 @@
 F:	net/ax25/sysctl_net_ax25.c
 
 DAVICOM FAST ETHERNET (DMFE) NETWORK DRIVER
-M:	Tobias Ringstrom <tori@unhappy.mine.nu>
 L:	netdev@vger.kernel.org
-S:	Maintained
+S:	Orphan
 F:	Documentation/networking/dmfe.txt
 F:	drivers/net/tulip/dmfe.c
 
@@ -3898,7 +3897,6 @@
 LINUX SECURITY MODULE (LSM) FRAMEWORK
 M:	Chris Wright <chrisw@sous-sol.org>
 L:	linux-security-module@vger.kernel.org
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/chrisw/lsm-2.6.git
 S:	Supported
 
 LIS3LV02D ACCELEROMETER DRIVER
diff --git a/arch/Kconfig b/arch/Kconfig
index 8d24bac..26b0e239 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -175,4 +175,7 @@
 config HAVE_ARCH_MUTEX_CPU_RELAX
 	bool
 
+config HAVE_RCU_TABLE_FREE
+	bool
+
 source "kernel/gcov/Kconfig"
diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
index 9808998..e3a8277 100644
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -12,6 +12,7 @@
 	select GENERIC_IRQ_PROBE
 	select AUTO_IRQ_AFFINITY if SMP
 	select GENERIC_IRQ_SHOW
+	select ARCH_WANT_OPTIONAL_GPIOLIB
 	help
 	  The Alpha is a 64-bit general-purpose processor designed and
 	  marketed by the Digital Equipment Corporation of blessed memory,
@@ -51,6 +52,9 @@
 config GENERIC_CMOS_UPDATE
         def_bool y
 
+config GENERIC_GPIO
+	def_bool y
+
 config ZONE_DMA
 	bool
 	default y
diff --git a/arch/alpha/include/asm/gpio.h b/arch/alpha/include/asm/gpio.h
new file mode 100644
index 0000000..7dc6a63
--- /dev/null
+++ b/arch/alpha/include/asm/gpio.h
@@ -0,0 +1,55 @@
+/*
+ * Generic GPIO API implementation for Alpha.
+ *
+ * A stright copy of that for PowerPC which was:
+ *
+ * Copyright (c) 2007-2008  MontaVista Software, Inc.
+ *
+ * Author: Anton Vorontsov <avorontsov@ru.mvista.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef _ASM_ALPHA_GPIO_H
+#define _ASM_ALPHA_GPIO_H
+
+#include <linux/errno.h>
+#include <asm-generic/gpio.h>
+
+#ifdef CONFIG_GPIOLIB
+
+/*
+ * We don't (yet) implement inlined/rapid versions for on-chip gpios.
+ * Just call gpiolib.
+ */
+static inline int gpio_get_value(unsigned int gpio)
+{
+	return __gpio_get_value(gpio);
+}
+
+static inline void gpio_set_value(unsigned int gpio, int value)
+{
+	__gpio_set_value(gpio, value);
+}
+
+static inline int gpio_cansleep(unsigned int gpio)
+{
+	return __gpio_cansleep(gpio);
+}
+
+static inline int gpio_to_irq(unsigned int gpio)
+{
+	return __gpio_to_irq(gpio);
+}
+
+static inline int irq_to_gpio(unsigned int irq)
+{
+	return -EINVAL;
+}
+
+#endif /* CONFIG_GPIOLIB */
+
+#endif /* _ASM_ALPHA_GPIO_H */
diff --git a/arch/alpha/include/asm/smp.h b/arch/alpha/include/asm/smp.h
index 3f390e8..c46e714 100644
--- a/arch/alpha/include/asm/smp.h
+++ b/arch/alpha/include/asm/smp.h
@@ -39,8 +39,6 @@
 
 extern struct cpuinfo_alpha cpu_data[NR_CPUS];
 
-#define PROC_CHANGE_PENALTY     20
-
 #define hard_smp_processor_id()	__hard_smp_processor_id()
 #define raw_smp_processor_id()	(current_thread_info()->cpu)
 
diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c
index 3ec3506..838eac1 100644
--- a/arch/alpha/kernel/process.c
+++ b/arch/alpha/kernel/process.c
@@ -121,7 +121,7 @@
 	/* Wait for the secondaries to halt. */
 	set_cpu_present(boot_cpuid, false);
 	set_cpu_possible(boot_cpuid, false);
-	while (cpus_weight(cpu_present_map))
+	while (cpumask_weight(cpu_present_mask))
 		barrier();
 #endif
 
diff --git a/arch/alpha/kernel/setup.c b/arch/alpha/kernel/setup.c
index edbddcb..cc0fd86 100644
--- a/arch/alpha/kernel/setup.c
+++ b/arch/alpha/kernel/setup.c
@@ -1257,7 +1257,7 @@
 #ifdef CONFIG_SMP
 	seq_printf(f, "cpus active\t\t: %u\n"
 		      "cpu active mask\t\t: %016lx\n",
-		       num_online_cpus(), cpus_addr(cpu_possible_map)[0]);
+		       num_online_cpus(), cpumask_bits(cpu_possible_mask)[0]);
 #endif
 
 	show_cache_size (f, "L1 Icache", alpha_l1i_cacheshape);
diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c
index 5a621c6..d739703 100644
--- a/arch/alpha/kernel/smp.c
+++ b/arch/alpha/kernel/smp.c
@@ -451,7 +451,7 @@
 	}
 
 	printk(KERN_INFO "SMP: %d CPUs probed -- cpu_present_map = %lx\n",
-	       smp_num_probed, cpu_present_map.bits[0]);
+	       smp_num_probed, cpumask_bits(cpu_present_mask)[0]);
 }
 
 /*
@@ -629,8 +629,9 @@
 void
 smp_send_stop(void)
 {
-	cpumask_t to_whom = cpu_possible_map;
-	cpu_clear(smp_processor_id(), to_whom);
+	cpumask_t to_whom;
+	cpumask_copy(&to_whom, cpu_possible_mask);
+	cpumask_clear_cpu(smp_processor_id(), &to_whom);
 #ifdef DEBUG_IPI_MSG
 	if (hard_smp_processor_id() != boot_cpu_id)
 		printk(KERN_WARNING "smp_send_stop: Not on boot cpu.\n");
diff --git a/arch/alpha/kernel/sys_dp264.c b/arch/alpha/kernel/sys_dp264.c
index 5ac00fd..f885682 100644
--- a/arch/alpha/kernel/sys_dp264.c
+++ b/arch/alpha/kernel/sys_dp264.c
@@ -140,7 +140,7 @@
 
 	for (cpu = 0; cpu < 4; cpu++) {
 		unsigned long aff = cpu_irq_affinity[cpu];
-		if (cpu_isset(cpu, affinity))
+		if (cpumask_test_cpu(cpu, &affinity))
 			aff |= 1UL << irq;
 		else
 			aff &= ~(1UL << irq);
diff --git a/arch/alpha/kernel/sys_titan.c b/arch/alpha/kernel/sys_titan.c
index fea0e46..6994407 100644
--- a/arch/alpha/kernel/sys_titan.c
+++ b/arch/alpha/kernel/sys_titan.c
@@ -65,10 +65,11 @@
 	register int bcpu = boot_cpuid;
 
 #ifdef CONFIG_SMP
-	cpumask_t cpm = cpu_present_map;
+	cpumask_t cpm;
 	volatile unsigned long *dim0, *dim1, *dim2, *dim3;
 	unsigned long mask0, mask1, mask2, mask3, dummy;
 
+	cpumask_copy(&cpm, cpu_present_mask);
 	mask &= ~isa_enable;
 	mask0 = mask & titan_cpu_irq_affinity[0];
 	mask1 = mask & titan_cpu_irq_affinity[1];
@@ -84,10 +85,10 @@
 	dim1 = &cchip->dim1.csr;
 	dim2 = &cchip->dim2.csr;
 	dim3 = &cchip->dim3.csr;
-	if (!cpu_isset(0, cpm)) dim0 = &dummy;
-	if (!cpu_isset(1, cpm)) dim1 = &dummy;
-	if (!cpu_isset(2, cpm)) dim2 = &dummy;
-	if (!cpu_isset(3, cpm)) dim3 = &dummy;
+	if (!cpumask_test_cpu(0, &cpm)) dim0 = &dummy;
+	if (!cpumask_test_cpu(1, &cpm)) dim1 = &dummy;
+	if (!cpumask_test_cpu(2, &cpm)) dim2 = &dummy;
+	if (!cpumask_test_cpu(3, &cpm)) dim3 = &dummy;
 
 	*dim0 = mask0;
 	*dim1 = mask1;
@@ -137,7 +138,7 @@
 	int cpu;
 
 	for (cpu = 0; cpu < 4; cpu++) {
-		if (cpu_isset(cpu, affinity))
+		if (cpumask_test_cpu(cpu, &affinity))
 			titan_cpu_irq_affinity[cpu] |= 1UL << irq;
 		else
 			titan_cpu_irq_affinity[cpu] &= ~(1UL << irq);
diff --git a/arch/alpha/mm/init.c b/arch/alpha/mm/init.c
index 86425ab..69d0c57 100644
--- a/arch/alpha/mm/init.c
+++ b/arch/alpha/mm/init.c
@@ -32,8 +32,6 @@
 #include <asm/console.h>
 #include <asm/tlb.h>
 
-DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
-
 extern void die_if_kernel(char *,struct pt_regs *,long);
 
 static struct pcb_struct original_pcb;
diff --git a/arch/alpha/mm/numa.c b/arch/alpha/mm/numa.c
index 7b2c56d..3973ae3 100644
--- a/arch/alpha/mm/numa.c
+++ b/arch/alpha/mm/numa.c
@@ -313,6 +313,7 @@
 			zones_size[ZONE_DMA] = dma_local_pfn;
 			zones_size[ZONE_NORMAL] = (end_pfn - start_pfn) - dma_local_pfn;
 		}
+		node_set_state(nid, N_NORMAL_MEMORY);
 		free_area_init_node(nid, zones_size, start_pfn, NULL);
 	}
 
diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug
index 03d01d7..81cbe40 100644
--- a/arch/arm/Kconfig.debug
+++ b/arch/arm/Kconfig.debug
@@ -63,13 +63,6 @@
 	      8 - SIGSEGV faults
 	     16 - SIGBUS faults
 
-config DEBUG_STACK_USAGE
-	bool "Enable stack utilization instrumentation"
-	depends on DEBUG_KERNEL
-	help
-	  Enables the display of the minimum amount of free stack which each
-	  task has ever had available in the sysrq-T output.
-
 # These options are only for real kernel hackers who want to get their hands dirty.
 config DEBUG_LL
 	bool "Kernel low-level debugging functions"
diff --git a/arch/arm/include/asm/smp.h b/arch/arm/include/asm/smp.h
index a87664f54..d2b514f 100644
--- a/arch/arm/include/asm/smp.h
+++ b/arch/arm/include/asm/smp.h
@@ -20,12 +20,6 @@
 
 #define raw_smp_processor_id() (current_thread_info()->cpu)
 
-/*
- * at the moment, there's not a big penalty for changing CPUs
- * (the >big< penalty is running SMP in the first place)
- */
-#define PROC_CHANGE_PENALTY		15
-
 struct seq_file;
 
 /*
diff --git a/arch/arm/include/asm/tlb.h b/arch/arm/include/asm/tlb.h
index 82dfe5d..265f908 100644
--- a/arch/arm/include/asm/tlb.h
+++ b/arch/arm/include/asm/tlb.h
@@ -41,12 +41,12 @@
  */
 #if defined(CONFIG_SMP) || defined(CONFIG_CPU_32v7)
 #define tlb_fast_mode(tlb)	0
-#define FREE_PTE_NR		500
 #else
 #define tlb_fast_mode(tlb)	1
-#define FREE_PTE_NR		0
 #endif
 
+#define MMU_GATHER_BUNDLE	8
+
 /*
  * TLB handling.  This allows us to remove pages from the page
  * tables, and efficiently handle the TLB issues.
@@ -58,7 +58,9 @@
 	unsigned long		range_start;
 	unsigned long		range_end;
 	unsigned int		nr;
-	struct page		*pages[FREE_PTE_NR];
+	unsigned int		max;
+	struct page		**pages;
+	struct page		*local[MMU_GATHER_BUNDLE];
 };
 
 DECLARE_PER_CPU(struct mmu_gather, mmu_gathers);
@@ -97,26 +99,37 @@
 	}
 }
 
+static inline void __tlb_alloc_page(struct mmu_gather *tlb)
+{
+	unsigned long addr = __get_free_pages(GFP_NOWAIT | __GFP_NOWARN, 0);
+
+	if (addr) {
+		tlb->pages = (void *)addr;
+		tlb->max = PAGE_SIZE / sizeof(struct page *);
+	}
+}
+
 static inline void tlb_flush_mmu(struct mmu_gather *tlb)
 {
 	tlb_flush(tlb);
 	if (!tlb_fast_mode(tlb)) {
 		free_pages_and_swap_cache(tlb->pages, tlb->nr);
 		tlb->nr = 0;
+		if (tlb->pages == tlb->local)
+			__tlb_alloc_page(tlb);
 	}
 }
 
-static inline struct mmu_gather *
-tlb_gather_mmu(struct mm_struct *mm, unsigned int full_mm_flush)
+static inline void
+tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned int fullmm)
 {
-	struct mmu_gather *tlb = &get_cpu_var(mmu_gathers);
-
 	tlb->mm = mm;
-	tlb->fullmm = full_mm_flush;
+	tlb->fullmm = fullmm;
 	tlb->vma = NULL;
+	tlb->max = ARRAY_SIZE(tlb->local);
+	tlb->pages = tlb->local;
 	tlb->nr = 0;
-
-	return tlb;
+	__tlb_alloc_page(tlb);
 }
 
 static inline void
@@ -127,7 +140,8 @@
 	/* keep the page table cache within bounds */
 	check_pgt_cache();
 
-	put_cpu_var(mmu_gathers);
+	if (tlb->pages != tlb->local)
+		free_pages((unsigned long)tlb->pages, 0);
 }
 
 /*
@@ -162,15 +176,22 @@
 		tlb_flush(tlb);
 }
 
-static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
+static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
 {
 	if (tlb_fast_mode(tlb)) {
 		free_page_and_swap_cache(page);
-	} else {
-		tlb->pages[tlb->nr++] = page;
-		if (tlb->nr >= FREE_PTE_NR)
-			tlb_flush_mmu(tlb);
+		return 1; /* avoid calling tlb_flush_mmu */
 	}
+
+	tlb->pages[tlb->nr++] = page;
+	VM_BUG_ON(tlb->nr > tlb->max);
+	return tlb->max - tlb->nr;
+}
+
+static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
+{
+	if (!__tlb_remove_page(tlb, page))
+		tlb_flush_mmu(tlb);
 }
 
 static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
diff --git a/arch/arm/mach-ixp4xx/include/mach/ixp46x_ts.h b/arch/arm/mach-ixp4xx/include/mach/ixp46x_ts.h
new file mode 100644
index 0000000..292d55e
--- /dev/null
+++ b/arch/arm/mach-ixp4xx/include/mach/ixp46x_ts.h
@@ -0,0 +1,78 @@
+/*
+ * PTP 1588 clock using the IXP46X
+ *
+ * Copyright (C) 2010 OMICRON electronics GmbH
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef _IXP46X_TS_H_
+#define _IXP46X_TS_H_
+
+#define DEFAULT_ADDEND 0xF0000029
+#define TICKS_NS_SHIFT 4
+
+struct ixp46x_channel_ctl {
+	u32 ch_control;  /* 0x40 Time Synchronization Channel Control */
+	u32 ch_event;    /* 0x44 Time Synchronization Channel Event */
+	u32 tx_snap_lo;  /* 0x48 Transmit Snapshot Low Register */
+	u32 tx_snap_hi;  /* 0x4C Transmit Snapshot High Register */
+	u32 rx_snap_lo;  /* 0x50 Receive Snapshot Low Register */
+	u32 rx_snap_hi;  /* 0x54 Receive Snapshot High Register */
+	u32 src_uuid_lo; /* 0x58 Source UUID0 Low Register */
+	u32 src_uuid_hi; /* 0x5C Sequence Identifier/Source UUID0 High */
+};
+
+struct ixp46x_ts_regs {
+	u32 control;     /* 0x00 Time Sync Control Register */
+	u32 event;       /* 0x04 Time Sync Event Register */
+	u32 addend;      /* 0x08 Time Sync Addend Register */
+	u32 accum;       /* 0x0C Time Sync Accumulator Register */
+	u32 test;        /* 0x10 Time Sync Test Register */
+	u32 unused;      /* 0x14 */
+	u32 rsystime_lo; /* 0x18 RawSystemTime_Low Register */
+	u32 rsystime_hi; /* 0x1C RawSystemTime_High Register */
+	u32 systime_lo;  /* 0x20 SystemTime_Low Register */
+	u32 systime_hi;  /* 0x24 SystemTime_High Register */
+	u32 trgt_lo;     /* 0x28 TargetTime_Low Register */
+	u32 trgt_hi;     /* 0x2C TargetTime_High Register */
+	u32 asms_lo;     /* 0x30 Auxiliary Slave Mode Snapshot Low  */
+	u32 asms_hi;     /* 0x34 Auxiliary Slave Mode Snapshot High */
+	u32 amms_lo;     /* 0x38 Auxiliary Master Mode Snapshot Low */
+	u32 amms_hi;     /* 0x3C Auxiliary Master Mode Snapshot High */
+
+	struct ixp46x_channel_ctl channel[3];
+};
+
+/* 0x00 Time Sync Control Register Bits */
+#define TSCR_AMM (1<<3)
+#define TSCR_ASM (1<<2)
+#define TSCR_TTM (1<<1)
+#define TSCR_RST (1<<0)
+
+/* 0x04 Time Sync Event Register Bits */
+#define TSER_SNM (1<<3)
+#define TSER_SNS (1<<2)
+#define TTIPEND  (1<<1)
+
+/* 0x40 Time Synchronization Channel Control Register Bits */
+#define MASTER_MODE   (1<<0)
+#define TIMESTAMP_ALL (1<<1)
+
+/* 0x44 Time Synchronization Channel Event Register Bits */
+#define TX_SNAPSHOT_LOCKED (1<<0)
+#define RX_SNAPSHOT_LOCKED (1<<1)
+
+#endif
diff --git a/arch/arm/mach-omap2/board-3430sdp.c b/arch/arm/mach-omap2/board-3430sdp.c
index 9afd087..23244cd 100644
--- a/arch/arm/mach-omap2/board-3430sdp.c
+++ b/arch/arm/mach-omap2/board-3430sdp.c
@@ -37,8 +37,8 @@
 #include <plat/common.h>
 #include <plat/dma.h>
 #include <plat/gpmc.h>
-#include <plat/display.h>
-#include <plat/panel-generic-dpi.h>
+#include <video/omapdss.h>
+#include <video/omap-panel-generic-dpi.h>
 
 #include <plat/gpmc-smc91x.h>
 
diff --git a/arch/arm/mach-omap2/board-4430sdp.c b/arch/arm/mach-omap2/board-4430sdp.c
index 56702c5..93edd7f 100644
--- a/arch/arm/mach-omap2/board-4430sdp.c
+++ b/arch/arm/mach-omap2/board-4430sdp.c
@@ -36,7 +36,7 @@
 #include <plat/usb.h>
 #include <plat/mmc.h>
 #include <plat/omap4-keypad.h>
-#include <plat/display.h>
+#include <video/omapdss.h>
 
 #include "mux.h"
 #include "hsmmc.h"
@@ -680,6 +680,15 @@
 	.name = "hdmi",
 	.driver_name = "hdmi_panel",
 	.type = OMAP_DISPLAY_TYPE_HDMI,
+	.clocks	= {
+		.dispc	= {
+			.dispc_fclk_src	= OMAP_DSS_CLK_SRC_FCK,
+		},
+		.hdmi	= {
+			.regn	= 15,
+			.regm2	= 1,
+		},
+	},
 	.platform_enable = sdp4430_panel_enable_hdmi,
 	.platform_disable = sdp4430_panel_disable_hdmi,
 	.channel = OMAP_DSS_CHANNEL_DIGIT,
diff --git a/arch/arm/mach-omap2/board-am3517evm.c b/arch/arm/mach-omap2/board-am3517evm.c
index ce7d5e6..ff8c59b 100644
--- a/arch/arm/mach-omap2/board-am3517evm.c
+++ b/arch/arm/mach-omap2/board-am3517evm.c
@@ -34,8 +34,8 @@
 #include <plat/board.h>
 #include <plat/common.h>
 #include <plat/usb.h>
-#include <plat/display.h>
-#include <plat/panel-generic-dpi.h>
+#include <video/omapdss.h>
+#include <video/omap-panel-generic-dpi.h>
 
 #include "mux.h"
 #include "control.h"
diff --git a/arch/arm/mach-omap2/board-cm-t35.c b/arch/arm/mach-omap2/board-cm-t35.c
index 02a12b4..9340f6a 100644
--- a/arch/arm/mach-omap2/board-cm-t35.c
+++ b/arch/arm/mach-omap2/board-cm-t35.c
@@ -45,8 +45,8 @@
 #include <plat/nand.h>
 #include <plat/gpmc.h>
 #include <plat/usb.h>
-#include <plat/display.h>
-#include <plat/panel-generic-dpi.h>
+#include <video/omapdss.h>
+#include <video/omap-panel-generic-dpi.h>
 #include <plat/mcspi.h>
 
 #include <mach/hardware.h>
diff --git a/arch/arm/mach-omap2/board-devkit8000.c b/arch/arm/mach-omap2/board-devkit8000.c
index 65f9fde..1d1b56a 100644
--- a/arch/arm/mach-omap2/board-devkit8000.c
+++ b/arch/arm/mach-omap2/board-devkit8000.c
@@ -45,8 +45,8 @@
 #include <plat/gpmc.h>
 #include <plat/nand.h>
 #include <plat/usb.h>
-#include <plat/display.h>
-#include <plat/panel-generic-dpi.h>
+#include <video/omapdss.h>
+#include <video/omap-panel-generic-dpi.h>
 
 #include <plat/mcspi.h>
 #include <linux/input/matrix_keypad.h>
diff --git a/arch/arm/mach-omap2/board-igep0020.c b/arch/arm/mach-omap2/board-igep0020.c
index 34cf982..3da64d3 100644
--- a/arch/arm/mach-omap2/board-igep0020.c
+++ b/arch/arm/mach-omap2/board-igep0020.c
@@ -31,8 +31,8 @@
 #include <plat/common.h>
 #include <plat/gpmc.h>
 #include <plat/usb.h>
-#include <plat/display.h>
-#include <plat/panel-generic-dpi.h>
+#include <video/omapdss.h>
+#include <video/omap-panel-generic-dpi.h>
 #include <plat/onenand.h>
 
 #include "mux.h"
diff --git a/arch/arm/mach-omap2/board-omap3beagle.c b/arch/arm/mach-omap2/board-omap3beagle.c
index 33007fd..97750d4 100644
--- a/arch/arm/mach-omap2/board-omap3beagle.c
+++ b/arch/arm/mach-omap2/board-omap3beagle.c
@@ -41,8 +41,8 @@
 
 #include <plat/board.h>
 #include <plat/common.h>
-#include <plat/display.h>
-#include <plat/panel-generic-dpi.h>
+#include <video/omapdss.h>
+#include <video/omap-panel-generic-dpi.h>
 #include <plat/gpmc.h>
 #include <plat/nand.h>
 #include <plat/usb.h>
diff --git a/arch/arm/mach-omap2/board-omap3evm.c b/arch/arm/mach-omap2/board-omap3evm.c
index 5a1a916..7f94ccc 100644
--- a/arch/arm/mach-omap2/board-omap3evm.c
+++ b/arch/arm/mach-omap2/board-omap3evm.c
@@ -44,8 +44,8 @@
 #include <plat/usb.h>
 #include <plat/common.h>
 #include <plat/mcspi.h>
-#include <plat/display.h>
-#include <plat/panel-generic-dpi.h>
+#include <video/omapdss.h>
+#include <video/omap-panel-generic-dpi.h>
 
 #include "mux.h"
 #include "sdram-micron-mt46h32m32lf-6.h"
diff --git a/arch/arm/mach-omap2/board-omap3pandora.c b/arch/arm/mach-omap2/board-omap3pandora.c
index 07dba88..1db1549 100644
--- a/arch/arm/mach-omap2/board-omap3pandora.c
+++ b/arch/arm/mach-omap2/board-omap3pandora.c
@@ -46,7 +46,7 @@
 #include <mach/hardware.h>
 #include <plat/mcspi.h>
 #include <plat/usb.h>
-#include <plat/display.h>
+#include <video/omapdss.h>
 #include <plat/nand.h>
 
 #include "mux.h"
diff --git a/arch/arm/mach-omap2/board-omap3stalker.c b/arch/arm/mach-omap2/board-omap3stalker.c
index a6e0b91..a72c90a 100644
--- a/arch/arm/mach-omap2/board-omap3stalker.c
+++ b/arch/arm/mach-omap2/board-omap3stalker.c
@@ -39,8 +39,8 @@
 #include <plat/gpmc.h>
 #include <plat/nand.h>
 #include <plat/usb.h>
-#include <plat/display.h>
-#include <plat/panel-generic-dpi.h>
+#include <video/omapdss.h>
+#include <video/omap-panel-generic-dpi.h>
 
 #include <plat/mcspi.h>
 #include <linux/input/matrix_keypad.h>
diff --git a/arch/arm/mach-omap2/board-omap4panda.c b/arch/arm/mach-omap2/board-omap4panda.c
index f3a7b10..e4973ac 100644
--- a/arch/arm/mach-omap2/board-omap4panda.c
+++ b/arch/arm/mach-omap2/board-omap4panda.c
@@ -34,13 +34,13 @@
 #include <asm/mach-types.h>
 #include <asm/mach/arch.h>
 #include <asm/mach/map.h>
-#include <plat/display.h>
+#include <video/omapdss.h>
 
 #include <plat/board.h>
 #include <plat/common.h>
 #include <plat/usb.h>
 #include <plat/mmc.h>
-#include <plat/panel-generic-dpi.h>
+#include <video/omap-panel-generic-dpi.h>
 #include "timer-gp.h"
 
 #include "hsmmc.h"
diff --git a/arch/arm/mach-omap2/board-overo.c b/arch/arm/mach-omap2/board-overo.c
index 59ca333..9d192ff 100644
--- a/arch/arm/mach-omap2/board-overo.c
+++ b/arch/arm/mach-omap2/board-overo.c
@@ -43,8 +43,8 @@
 
 #include <plat/board.h>
 #include <plat/common.h>
-#include <plat/display.h>
-#include <plat/panel-generic-dpi.h>
+#include <video/omapdss.h>
+#include <video/omap-panel-generic-dpi.h>
 #include <mach/gpio.h>
 #include <plat/gpmc.h>
 #include <mach/hardware.h>
diff --git a/arch/arm/mach-omap2/board-rx51-video.c b/arch/arm/mach-omap2/board-rx51-video.c
index 89a66db..2df10b6 100644
--- a/arch/arm/mach-omap2/board-rx51-video.c
+++ b/arch/arm/mach-omap2/board-rx51-video.c
@@ -15,7 +15,7 @@
 #include <linux/spi/spi.h>
 #include <linux/mm.h>
 #include <asm/mach-types.h>
-#include <plat/display.h>
+#include <video/omapdss.h>
 #include <plat/vram.h>
 #include <plat/mcspi.h>
 
diff --git a/arch/arm/mach-omap2/board-zoom-display.c b/arch/arm/mach-omap2/board-zoom-display.c
index 37b84c2..60e8645 100644
--- a/arch/arm/mach-omap2/board-zoom-display.c
+++ b/arch/arm/mach-omap2/board-zoom-display.c
@@ -15,7 +15,7 @@
 #include <linux/i2c/twl.h>
 #include <linux/spi/spi.h>
 #include <plat/mcspi.h>
-#include <plat/display.h>
+#include <video/omapdss.h>
 
 #define LCD_PANEL_RESET_GPIO_PROD	96
 #define LCD_PANEL_RESET_GPIO_PILOT	55
diff --git a/arch/arm/mach-omap2/display.c b/arch/arm/mach-omap2/display.c
index 256d23f..543fcb8 100644
--- a/arch/arm/mach-omap2/display.c
+++ b/arch/arm/mach-omap2/display.c
@@ -22,7 +22,7 @@
 #include <linux/clk.h>
 #include <linux/err.h>
 
-#include <plat/display.h>
+#include <video/omapdss.h>
 #include <plat/omap_hwmod.h>
 #include <plat/omap_device.h>
 
@@ -56,37 +56,58 @@
 	return false;
 }
 
+struct omap_dss_hwmod_data {
+	const char *oh_name;
+	const char *dev_name;
+	const int id;
+};
+
+static const struct omap_dss_hwmod_data omap2_dss_hwmod_data[] __initdata = {
+	{ "dss_core", "omapdss_dss", -1 },
+	{ "dss_dispc", "omapdss_dispc", -1 },
+	{ "dss_rfbi", "omapdss_rfbi", -1 },
+	{ "dss_venc", "omapdss_venc", -1 },
+};
+
+static const struct omap_dss_hwmod_data omap3_dss_hwmod_data[] __initdata = {
+	{ "dss_core", "omapdss_dss", -1 },
+	{ "dss_dispc", "omapdss_dispc", -1 },
+	{ "dss_rfbi", "omapdss_rfbi", -1 },
+	{ "dss_venc", "omapdss_venc", -1 },
+	{ "dss_dsi1", "omapdss_dsi1", -1 },
+};
+
+static const struct omap_dss_hwmod_data omap4_dss_hwmod_data[] __initdata = {
+	{ "dss_core", "omapdss_dss", -1 },
+	{ "dss_dispc", "omapdss_dispc", -1 },
+	{ "dss_rfbi", "omapdss_rfbi", -1 },
+	{ "dss_venc", "omapdss_venc", -1 },
+	{ "dss_dsi1", "omapdss_dsi1", -1 },
+	{ "dss_dsi2", "omapdss_dsi2", -1 },
+	{ "dss_hdmi", "omapdss_hdmi", -1 },
+};
+
 int __init omap_display_init(struct omap_dss_board_info *board_data)
 {
 	int r = 0;
 	struct omap_hwmod *oh;
 	struct omap_device *od;
-	int i;
+	int i, oh_count;
 	struct omap_display_platform_data pdata;
-
-	/*
-	 * omap: valid DSS hwmod names
-	 * omap2,3,4: dss_core, dss_dispc, dss_rfbi, dss_venc
-	 * omap3,4: dss_dsi1
-	 * omap4: dss_dsi2, dss_hdmi
-	 */
-	char *oh_name[] = { "dss_core", "dss_dispc", "dss_rfbi", "dss_venc",
-		"dss_dsi1", "dss_dsi2", "dss_hdmi" };
-	char *dev_name[] = { "omapdss_dss", "omapdss_dispc", "omapdss_rfbi",
-		"omapdss_venc", "omapdss_dsi1", "omapdss_dsi2",
-		"omapdss_hdmi" };
-	int oh_count;
+	const struct omap_dss_hwmod_data *curr_dss_hwmod;
 
 	memset(&pdata, 0, sizeof(pdata));
 
-	if (cpu_is_omap24xx())
-		oh_count = ARRAY_SIZE(oh_name) - 3;
-		/* last 3 hwmod dev in oh_name are not available for omap2 */
-	else if (cpu_is_omap44xx())
-		oh_count = ARRAY_SIZE(oh_name);
-	else
-		oh_count = ARRAY_SIZE(oh_name) - 2;
-		/* last 2 hwmod dev in oh_name are not available for omap3 */
+	if (cpu_is_omap24xx()) {
+		curr_dss_hwmod = omap2_dss_hwmod_data;
+		oh_count = ARRAY_SIZE(omap2_dss_hwmod_data);
+	} else if (cpu_is_omap34xx()) {
+		curr_dss_hwmod = omap3_dss_hwmod_data;
+		oh_count = ARRAY_SIZE(omap3_dss_hwmod_data);
+	} else {
+		curr_dss_hwmod = omap4_dss_hwmod_data;
+		oh_count = ARRAY_SIZE(omap4_dss_hwmod_data);
+	}
 
 	/* opt_clks are always associated with dss hwmod */
 	oh_core = omap_hwmod_lookup("dss_core");
@@ -100,19 +121,21 @@
 	pdata.opt_clock_available = opt_clock_available;
 
 	for (i = 0; i < oh_count; i++) {
-		oh = omap_hwmod_lookup(oh_name[i]);
+		oh = omap_hwmod_lookup(curr_dss_hwmod[i].oh_name);
 		if (!oh) {
-			pr_err("Could not look up %s\n", oh_name[i]);
+			pr_err("Could not look up %s\n",
+				curr_dss_hwmod[i].oh_name);
 			return -ENODEV;
 		}
 
-		od = omap_device_build(dev_name[i], -1, oh, &pdata,
+		od = omap_device_build(curr_dss_hwmod[i].dev_name,
+				curr_dss_hwmod[i].id, oh, &pdata,
 				sizeof(struct omap_display_platform_data),
 				omap_dss_latency,
 				ARRAY_SIZE(omap_dss_latency), 0);
 
 		if (WARN((IS_ERR(od)), "Could not build omap_device for %s\n",
-				oh_name[i]))
+				curr_dss_hwmod[i].oh_name))
 			return -ENODEV;
 	}
 	omap_display_device.dev.platform_data = board_data;
diff --git a/arch/arm/mach-omap2/include/mach/board-zoom.h b/arch/arm/mach-omap2/include/mach/board-zoom.h
index d20bd9c..775fdc3 100644
--- a/arch/arm/mach-omap2/include/mach/board-zoom.h
+++ b/arch/arm/mach-omap2/include/mach/board-zoom.h
@@ -1,7 +1,7 @@
 /*
  * Defines for zoom boards
  */
-#include <plat/display.h>
+#include <video/omapdss.h>
 
 #define ZOOM_NAND_CS    0
 
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index 76f82ae..3f17ea1 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -85,7 +85,7 @@
 	struct meminfo * mi = &meminfo;
 
 	printk("Mem-info:\n");
-	show_free_areas();
+	show_free_areas(filter);
 
 	for_each_bank (i, mi) {
 		struct membank *bank = &mi->bank[i];
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index 6cf76b3..08a9236 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -31,8 +31,6 @@
 
 #include "mm.h"
 
-DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
-
 /*
  * empty_zero_page is a special page that is used for
  * zero-initialized data and COW.
diff --git a/arch/avr32/mm/init.c b/arch/avr32/mm/init.c
index a7314d4..2798c2d 100644
--- a/arch/avr32/mm/init.c
+++ b/arch/avr32/mm/init.c
@@ -25,8 +25,6 @@
 #include <asm/setup.h>
 #include <asm/sections.h>
 
-DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
-
 pgd_t swapper_pg_dir[PTRS_PER_PGD] __page_aligned_data;
 
 struct page *empty_zero_page;
diff --git a/arch/blackfin/Kconfig.debug b/arch/blackfin/Kconfig.debug
index 2641731..19ccfb3 100644
--- a/arch/blackfin/Kconfig.debug
+++ b/arch/blackfin/Kconfig.debug
@@ -9,15 +9,6 @@
 	  This option will cause messages to be printed if free stack space
 	  drops below a certain limit.
 
-config DEBUG_STACK_USAGE
-	bool "Enable stack utilization instrumentation"
-	depends on DEBUG_KERNEL
-	help
-	  Enables the display of the minimum amount of free stack which each
-	  task has ever had available in the sysrq-T output.
-
-	  This option will slow down process creation somewhat.
-
 config DEBUG_VERBOSE
 	bool "Verbose fault messages"
 	default y
diff --git a/arch/cris/arch-v32/kernel/irq.c b/arch/cris/arch-v32/kernel/irq.c
index 68a1a59..5ebe6e8 100644
--- a/arch/cris/arch-v32/kernel/irq.c
+++ b/arch/cris/arch-v32/kernel/irq.c
@@ -266,11 +266,11 @@
 
 
 	/* Let the interrupt stay if possible */
-	if (cpu_isset(cpu, irq_allocations[irq - FIRST_IRQ].mask))
+	if (cpumask_test_cpu(cpu, &irq_allocations[irq - FIRST_IRQ].mask))
 		goto out;
 
 	/* IRQ must be moved to another CPU. */
-	cpu = first_cpu(irq_allocations[irq - FIRST_IRQ].mask);
+	cpu = cpumask_first(&irq_allocations[irq - FIRST_IRQ].mask);
 	irq_allocations[irq - FIRST_IRQ].cpu = cpu;
 out:
 	spin_unlock_irqrestore(&irq_lock, flags);
diff --git a/arch/cris/arch-v32/kernel/smp.c b/arch/cris/arch-v32/kernel/smp.c
index 66cc756..a0843a7 100644
--- a/arch/cris/arch-v32/kernel/smp.c
+++ b/arch/cris/arch-v32/kernel/smp.c
@@ -81,7 +81,7 @@
 
 	/* Mark all possible CPUs as present */
 	for (i = 0; i < max_cpus; i++)
-	    cpu_set(i, phys_cpu_present_map);
+		cpumask_set_cpu(i, &phys_cpu_present_map);
 }
 
 void __devinit smp_prepare_boot_cpu(void)
@@ -98,7 +98,7 @@
 	SUPP_REG_WR(RW_MM_TLB_PGD, pgd);
 
 	set_cpu_online(0, true);
-	cpu_set(0, phys_cpu_present_map);
+	cpumask_set_cpu(0, &phys_cpu_present_map);
 	set_cpu_possible(0, true);
 }
 
@@ -112,8 +112,9 @@
 {
 	unsigned timeout;
 	struct task_struct *idle;
-	cpumask_t cpu_mask = CPU_MASK_NONE;
+	cpumask_t cpu_mask;
 
+	cpumask_clear(&cpu_mask);
 	idle = fork_idle(cpuid);
 	if (IS_ERR(idle))
 		panic("SMP: fork failed for CPU:%d", cpuid);
@@ -125,10 +126,10 @@
 	cpu_now_booting = cpuid;
 
 	/* Kick it */
-	cpu_set(cpuid, cpu_online_map);
-	cpu_set(cpuid, cpu_mask);
+	set_cpu_online(cpuid, true);
+	cpumask_set_cpu(cpuid, &cpu_mask);
 	send_ipi(IPI_BOOT, 0, cpu_mask);
-	cpu_clear(cpuid, cpu_online_map);
+	set_cpu_online(cpuid, false);
 
 	/* Wait for CPU to come online */
 	for (timeout = 0; timeout < 10000; timeout++) {
@@ -176,7 +177,7 @@
 	notify_cpu_starting(cpu);
 	local_irq_enable();
 
-	cpu_set(cpu, cpu_online_map);
+	set_cpu_online(cpu, true);
 	cpu_idle();
 }
 
@@ -214,8 +215,9 @@
 
 void smp_send_reschedule(int cpu)
 {
-	cpumask_t cpu_mask = CPU_MASK_NONE;
-	cpu_set(cpu, cpu_mask);
+	cpumask_t cpu_mask;
+	cpumask_clear(&cpu_mask);
+	cpumask_set_cpu(cpu, &cpu_mask);
 	send_ipi(IPI_SCHEDULE, 0, cpu_mask);
 }
 
@@ -232,7 +234,7 @@
 
 	spin_lock_irqsave(&tlbstate_lock, flags);
 	cpu_mask = (mm == FLUSH_ALL ? cpu_all_mask : *mm_cpumask(mm));
-	cpu_clear(smp_processor_id(), cpu_mask);
+	cpumask_clear_cpu(smp_processor_id(), &cpu_mask);
 	flush_mm = mm;
 	flush_vma = vma;
 	flush_addr = addr;
@@ -277,10 +279,10 @@
 	int ret = 0;
 
 	/* Calculate CPUs to send to. */
-	cpus_and(cpu_mask, cpu_mask, cpu_online_map);
+	cpumask_and(&cpu_mask, &cpu_mask, cpu_online_mask);
 
 	/* Send the IPI. */
-	for_each_cpu_mask(i, cpu_mask)
+	for_each_cpu(i, &cpu_mask)
 	{
 		ipi.vector |= vector;
 		REG_WR(intr_vect, irq_regs[i], rw_ipi, ipi);
@@ -288,7 +290,7 @@
 
 	/* Wait for IPI to finish on other CPUS */
 	if (wait) {
-		for_each_cpu_mask(i, cpu_mask) {
+		for_each_cpu(i, &cpu_mask) {
                         int j;
                         for (j = 0 ; j < 1000; j++) {
 				ipi = REG_RD(intr_vect, irq_regs[i], rw_ipi);
@@ -314,11 +316,12 @@
  */
 int smp_call_function(void (*func)(void *info), void *info, int wait)
 {
-	cpumask_t cpu_mask = CPU_MASK_ALL;
+	cpumask_t cpu_mask;
 	struct call_data_struct data;
 	int ret;
 
-	cpu_clear(smp_processor_id(), cpu_mask);
+	cpumask_setall(&cpu_mask);
+	cpumask_clear_cpu(smp_processor_id(), &cpu_mask);
 
 	WARN_ON(irqs_disabled());
 
diff --git a/arch/cris/mm/init.c b/arch/cris/mm/init.c
index df33ab8..d72ab58 100644
--- a/arch/cris/mm/init.c
+++ b/arch/cris/mm/init.c
@@ -13,8 +13,6 @@
 #include <linux/bootmem.h>
 #include <asm/tlb.h>
 
-DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
-
 unsigned long empty_zero_page;
 
 extern char _stext, _edata, _etext; /* From linkerscript */
diff --git a/arch/frv/mm/init.c b/arch/frv/mm/init.c
index ed64588..fbe5f0db 100644
--- a/arch/frv/mm/init.c
+++ b/arch/frv/mm/init.c
@@ -41,8 +41,6 @@
 
 #undef DEBUG
 
-DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
-
 /*
  * BAD_PAGE is the page that is used for page faults when linux
  * is out-of-memory. Older versions of linux just did a
diff --git a/arch/ia64/include/asm/tlb.h b/arch/ia64/include/asm/tlb.h
index 23cce99..c3ffe3e 100644
--- a/arch/ia64/include/asm/tlb.h
+++ b/arch/ia64/include/asm/tlb.h
@@ -47,21 +47,27 @@
 #include <asm/machvec.h>
 
 #ifdef CONFIG_SMP
-# define FREE_PTE_NR		2048
 # define tlb_fast_mode(tlb)	((tlb)->nr == ~0U)
 #else
-# define FREE_PTE_NR		0
 # define tlb_fast_mode(tlb)	(1)
 #endif
 
+/*
+ * If we can't allocate a page to make a big batch of page pointers
+ * to work on, then just handle a few from the on-stack structure.
+ */
+#define	IA64_GATHER_BUNDLE	8
+
 struct mmu_gather {
 	struct mm_struct	*mm;
 	unsigned int		nr;		/* == ~0U => fast mode */
+	unsigned int		max;
 	unsigned char		fullmm;		/* non-zero means full mm flush */
 	unsigned char		need_flush;	/* really unmapped some PTEs? */
 	unsigned long		start_addr;
 	unsigned long		end_addr;
-	struct page 		*pages[FREE_PTE_NR];
+	struct page		**pages;
+	struct page		*local[IA64_GATHER_BUNDLE];
 };
 
 struct ia64_tr_entry {
@@ -90,9 +96,6 @@
 #define RR_RID_MASK	0x00000000ffffff00L
 #define RR_TO_RID(val) 	((val >> 8) & 0xffffff)
 
-/* Users of the generic TLB shootdown code must declare this storage space. */
-DECLARE_PER_CPU(struct mmu_gather, mmu_gathers);
-
 /*
  * Flush the TLB for address range START to END and, if not in fast mode, release the
  * freed pages that where gathered up to this point.
@@ -147,15 +150,23 @@
 	}
 }
 
-/*
- * Return a pointer to an initialized struct mmu_gather.
- */
-static inline struct mmu_gather *
-tlb_gather_mmu (struct mm_struct *mm, unsigned int full_mm_flush)
+static inline void __tlb_alloc_page(struct mmu_gather *tlb)
 {
-	struct mmu_gather *tlb = &get_cpu_var(mmu_gathers);
+	unsigned long addr = __get_free_pages(GFP_NOWAIT | __GFP_NOWARN, 0);
 
+	if (addr) {
+		tlb->pages = (void *)addr;
+		tlb->max = PAGE_SIZE / sizeof(void *);
+	}
+}
+
+
+static inline void
+tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned int full_mm_flush)
+{
 	tlb->mm = mm;
+	tlb->max = ARRAY_SIZE(tlb->local);
+	tlb->pages = tlb->local;
 	/*
 	 * Use fast mode if only 1 CPU is online.
 	 *
@@ -172,7 +183,6 @@
 	tlb->nr = (num_online_cpus() == 1) ? ~0U : 0;
 	tlb->fullmm = full_mm_flush;
 	tlb->start_addr = ~0UL;
-	return tlb;
 }
 
 /*
@@ -180,7 +190,7 @@
  * collected.
  */
 static inline void
-tlb_finish_mmu (struct mmu_gather *tlb, unsigned long start, unsigned long end)
+tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
 {
 	/*
 	 * Note: tlb->nr may be 0 at this point, so we can't rely on tlb->start_addr and
@@ -191,7 +201,8 @@
 	/* keep the page table cache within bounds */
 	check_pgt_cache();
 
-	put_cpu_var(mmu_gathers);
+	if (tlb->pages != tlb->local)
+		free_pages((unsigned long)tlb->pages, 0);
 }
 
 /*
@@ -199,18 +210,33 @@
  * must be delayed until after the TLB has been flushed (see comments at the beginning of
  * this file).
  */
-static inline void
-tlb_remove_page (struct mmu_gather *tlb, struct page *page)
+static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
 {
 	tlb->need_flush = 1;
 
 	if (tlb_fast_mode(tlb)) {
 		free_page_and_swap_cache(page);
-		return;
+		return 1; /* avoid calling tlb_flush_mmu */
 	}
+
+	if (!tlb->nr && tlb->pages == tlb->local)
+		__tlb_alloc_page(tlb);
+
 	tlb->pages[tlb->nr++] = page;
-	if (tlb->nr >= FREE_PTE_NR)
-		ia64_tlb_flush_mmu(tlb, tlb->start_addr, tlb->end_addr);
+	VM_BUG_ON(tlb->nr > tlb->max);
+
+	return tlb->max - tlb->nr;
+}
+
+static inline void tlb_flush_mmu(struct mmu_gather *tlb)
+{
+	ia64_tlb_flush_mmu(tlb, tlb->start_addr, tlb->end_addr);
+}
+
+static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
+{
+	if (!__tlb_remove_page(tlb, page))
+		tlb_flush_mmu(tlb);
 }
 
 /*
diff --git a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c
index 9a018cd..f114a3b 100644
--- a/arch/ia64/mm/contig.c
+++ b/arch/ia64/mm/contig.c
@@ -44,13 +44,16 @@
 	pg_data_t *pgdat;
 
 	printk(KERN_INFO "Mem-info:\n");
-	show_free_areas();
+	show_free_areas(filter);
 	printk(KERN_INFO "Node memory in pages:\n");
 	for_each_online_pgdat(pgdat) {
 		unsigned long present;
 		unsigned long flags;
 		int shared = 0, cached = 0, reserved = 0;
+		int nid = pgdat->node_id;
 
+		if (skip_free_areas_node(filter, nid))
+			continue;
 		pgdat_resize_lock(pgdat, &flags);
 		present = pgdat->node_present_pages;
 		for(i = 0; i < pgdat->node_spanned_pages; i++) {
@@ -64,8 +67,7 @@
 				if (max_gap < LARGE_GAP)
 					continue;
 #endif
-				i = vmemmap_find_next_valid_pfn(pgdat->node_id,
-					 i) - 1;
+				i = vmemmap_find_next_valid_pfn(nid, i) - 1;
 				continue;
 			}
 			if (PageReserved(page))
@@ -81,7 +83,7 @@
 		total_cached += cached;
 		total_shared += shared;
 		printk(KERN_INFO "Node %4d:  RAM: %11ld, rsvd: %8d, "
-		       "shrd: %10d, swpd: %10d\n", pgdat->node_id,
+		       "shrd: %10d, swpd: %10d\n", nid,
 		       present, reserved, shared, cached);
 	}
 	printk(KERN_INFO "%ld pages of RAM\n", total_present);
diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c
index 82ab1bc..c641333 100644
--- a/arch/ia64/mm/discontig.c
+++ b/arch/ia64/mm/discontig.c
@@ -622,13 +622,16 @@
 	pg_data_t *pgdat;
 
 	printk(KERN_INFO "Mem-info:\n");
-	show_free_areas();
+	show_free_areas(filter);
 	printk(KERN_INFO "Node memory in pages:\n");
 	for_each_online_pgdat(pgdat) {
 		unsigned long present;
 		unsigned long flags;
 		int shared = 0, cached = 0, reserved = 0;
+		int nid = pgdat->node_id;
 
+		if (skip_free_areas_node(filter, nid))
+			continue;
 		pgdat_resize_lock(pgdat, &flags);
 		present = pgdat->node_present_pages;
 		for(i = 0; i < pgdat->node_spanned_pages; i++) {
@@ -638,8 +641,7 @@
 			if (pfn_valid(pgdat->node_start_pfn + i))
 				page = pfn_to_page(pgdat->node_start_pfn + i);
 			else {
-				i = vmemmap_find_next_valid_pfn(pgdat->node_id,
-					 i) - 1;
+				i = vmemmap_find_next_valid_pfn(nid, i) - 1;
 				continue;
 			}
 			if (PageReserved(page))
@@ -655,7 +657,7 @@
 		total_cached += cached;
 		total_shared += shared;
 		printk(KERN_INFO "Node %4d:  RAM: %11ld, rsvd: %8d, "
-		       "shrd: %10d, swpd: %10d\n", pgdat->node_id,
+		       "shrd: %10d, swpd: %10d\n", nid,
 		       present, reserved, shared, cached);
 	}
 	printk(KERN_INFO "%ld pages of RAM\n", total_present);
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index ed41759..00cb0e2 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -36,8 +36,6 @@
 #include <asm/mca.h>
 #include <asm/paravirt.h>
 
-DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
-
 extern void ia64_tlb_init (void);
 
 unsigned long MAX_DMA_ADDRESS = PAGE_OFFSET + 0x100000000UL;
diff --git a/arch/m32r/Kconfig.debug b/arch/m32r/Kconfig.debug
index 2e1019d..bb1afc1 100644
--- a/arch/m32r/Kconfig.debug
+++ b/arch/m32r/Kconfig.debug
@@ -9,15 +9,6 @@
 	  This option will cause messages to be printed if free stack space
 	  drops below a certain limit.
 
-config DEBUG_STACK_USAGE
-	bool "Stack utilization instrumentation"
-	depends on DEBUG_KERNEL
-	help
-	  Enables the display of the minimum amount of free stack which each
-	  task has ever had available in the sysrq-T and sysrq-P debug output.
-
-	  This option will slow down process creation somewhat.
-
 config DEBUG_PAGEALLOC
 	bool "Debug page memory allocations"
 	depends on DEBUG_KERNEL && BROKEN
diff --git a/arch/m32r/include/asm/smp.h b/arch/m32r/include/asm/smp.h
index e67ded1..8accc1b 100644
--- a/arch/m32r/include/asm/smp.h
+++ b/arch/m32r/include/asm/smp.h
@@ -94,8 +94,6 @@
 
 #define NO_PROC_ID (0xff)	/* No processor magic marker */
 
-#define PROC_CHANGE_PENALTY	(15)	/* Schedule penalty */
-
 /*
  * M32R-mp IPI
  */
diff --git a/arch/m32r/mm/discontig.c b/arch/m32r/mm/discontig.c
index 5d2858f..2c468e8 100644
--- a/arch/m32r/mm/discontig.c
+++ b/arch/m32r/mm/discontig.c
@@ -149,6 +149,7 @@
 		zholes_size[ZONE_DMA] = mp->holes;
 		holes += zholes_size[ZONE_DMA];
 
+		node_set_state(nid, N_NORMAL_MEMORY);
 		free_area_init_node(nid, zones_size, start_pfn, zholes_size);
 	}
 
diff --git a/arch/m32r/mm/init.c b/arch/m32r/mm/init.c
index 73e2205..78b660e 100644
--- a/arch/m32r/mm/init.c
+++ b/arch/m32r/mm/init.c
@@ -35,8 +35,6 @@
 
 pgd_t swapper_pg_dir[1024];
 
-DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
-
 /*
  * Cache of MMU context last used.
  */
diff --git a/arch/m68k/mm/init_mm.c b/arch/m68k/mm/init_mm.c
index 8bc8425..9113c2f 100644
--- a/arch/m68k/mm/init_mm.c
+++ b/arch/m68k/mm/init_mm.c
@@ -32,8 +32,6 @@
 #include <asm/sections.h>
 #include <asm/tlb.h>
 
-DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
-
 pg_data_t pg_data_map[MAX_NUMNODES];
 EXPORT_SYMBOL(pg_data_map);
 
diff --git a/arch/microblaze/mm/init.c b/arch/microblaze/mm/init.c
index c843786..213f2d6 100644
--- a/arch/microblaze/mm/init.c
+++ b/arch/microblaze/mm/init.c
@@ -32,8 +32,6 @@
 EXPORT_SYMBOL(__page_offset);
 
 #else
-DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
-
 static int init_bootmem_done;
 #endif /* CONFIG_MMU */
 
diff --git a/arch/mips/Kconfig.debug b/arch/mips/Kconfig.debug
index 5358f90..83ed00a 100644
--- a/arch/mips/Kconfig.debug
+++ b/arch/mips/Kconfig.debug
@@ -76,15 +76,6 @@
 	  provides another way to check stack overflow happened on kernel mode
 	  stack usually caused by nested interruption.
 
-config DEBUG_STACK_USAGE
-	bool "Enable stack utilization instrumentation"
-	depends on DEBUG_KERNEL
-	help
-	  Enables the display of the minimum amount of free stack which each
-	  task has ever had available in the sysrq-T and sysrq-P debug output.
-
-	  This option will slow down process creation somewhat.
-
 config SMTC_IDLE_HOOK_DEBUG
 	bool "Enable additional debug checks before going into CPU idle loop"
 	depends on DEBUG_KERNEL && MIPS_MT_SMTC
diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
index 279599e..1aadeb4 100644
--- a/arch/mips/mm/init.c
+++ b/arch/mips/mm/init.c
@@ -64,8 +64,6 @@
 
 #endif /* CONFIG_MIPS_MT_SMTC */
 
-DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
-
 /*
  * We have up to 8 empty zeroed pages so we can map one of the right colour
  * when needed.  This is necessary only on R4000 / R4400 SC and MC versions
diff --git a/arch/mn10300/kernel/irq.c b/arch/mn10300/kernel/irq.c
index 86af0d7..2623d19f 100644
--- a/arch/mn10300/kernel/irq.c
+++ b/arch/mn10300/kernel/irq.c
@@ -87,7 +87,7 @@
 		tmp2 = GxICR(irq);
 
 		irq_affinity_online[irq] =
-			any_online_cpu(*d->affinity);
+			cpumask_any_and(d->affinity, cpu_online_mask);
 		CROSS_GxICR(irq, irq_affinity_online[irq]) =
 			(tmp & (GxICR_LEVEL | GxICR_ENABLE)) | GxICR_DETECT;
 		tmp = CROSS_GxICR(irq, irq_affinity_online[irq]);
@@ -124,7 +124,8 @@
 	} else {
 		tmp = GxICR(irq);
 
-		irq_affinity_online[irq] = any_online_cpu(*d->affinity);
+		irq_affinity_online[irq] = cpumask_any_and(d->affinity,
+							   cpu_online_mask);
 		CROSS_GxICR(irq, irq_affinity_online[irq]) = (tmp & GxICR_LEVEL) | GxICR_ENABLE | GxICR_DETECT;
 		tmp = CROSS_GxICR(irq, irq_affinity_online[irq]);
 	}
@@ -366,11 +367,11 @@
 		if (irqd_is_per_cpu(data))
 			continue;
 
-		if (cpu_isset(self, data->affinity) &&
-		    !cpus_intersects(irq_affinity[irq], cpu_online_map)) {
+		if (cpumask_test_cpu(self, &data->affinity) &&
+		    !cpumask_intersects(&irq_affinity[irq], cpu_online_mask)) {
 			int cpu_id;
-			cpu_id = first_cpu(cpu_online_map);
-			cpu_set(cpu_id, data->affinity);
+			cpu_id = cpumask_first(cpu_online_mask);
+			cpumask_set_cpu(cpu_id, &data->affinity);
 		}
 		/* We need to operate irq_affinity_online atomically. */
 		arch_local_cli_save(flags);
@@ -381,7 +382,8 @@
 			GxICR(irq) = x & GxICR_LEVEL;
 			tmp = GxICR(irq);
 
-			new = any_online_cpu(data->affinity);
+			new = cpumask_any_and(&data->affinity,
+					      cpu_online_mask);
 			irq_affinity_online[irq] = new;
 
 			CROSS_GxICR(irq, new) =
diff --git a/arch/mn10300/kernel/smp.c b/arch/mn10300/kernel/smp.c
index 83fb279..9242e9f 100644
--- a/arch/mn10300/kernel/smp.c
+++ b/arch/mn10300/kernel/smp.c
@@ -309,7 +309,7 @@
 	u16 tmp;
 
 	for (i = 0; i < NR_CPUS; i++) {
-		if (cpu_isset(i, *cpumask)) {
+		if (cpumask_test_cpu(i, cpumask)) {
 			/* send IPI */
 			tmp = CROSS_GxICR(irq, i);
 			CROSS_GxICR(irq, i) =
@@ -342,8 +342,8 @@
 {
 	cpumask_t cpumask;
 
-	cpumask = cpu_online_map;
-	cpu_clear(smp_processor_id(), cpumask);
+	cpumask_copy(&cpumask, cpu_online_mask);
+	cpumask_clear_cpu(smp_processor_id(), &cpumask);
 	send_IPI_mask(&cpumask, irq);
 }
 
@@ -393,8 +393,8 @@
 
 	data.func = func;
 	data.info = info;
-	data.started = cpu_online_map;
-	cpu_clear(smp_processor_id(), data.started);
+	cpumask_copy(&data.started, cpu_online_mask);
+	cpumask_clear_cpu(smp_processor_id(), &data.started);
 	data.wait = wait;
 	if (wait)
 		data.finished = data.started;
@@ -410,14 +410,14 @@
 	if (CALL_FUNCTION_NMI_IPI_TIMEOUT > 0) {
 		for (cnt = 0;
 		     cnt < CALL_FUNCTION_NMI_IPI_TIMEOUT &&
-			     !cpus_empty(data.started);
+			     !cpumask_empty(&data.started);
 		     cnt++)
 			mdelay(1);
 
 		if (wait && cnt < CALL_FUNCTION_NMI_IPI_TIMEOUT) {
 			for (cnt = 0;
 			     cnt < CALL_FUNCTION_NMI_IPI_TIMEOUT &&
-				     !cpus_empty(data.finished);
+				     !cpumask_empty(&data.finished);
 			     cnt++)
 				mdelay(1);
 		}
@@ -428,10 +428,10 @@
 	} else {
 		/* If timeout value is zero, wait until cpumask has been
 		 * cleared */
-		while (!cpus_empty(data.started))
+		while (!cpumask_empty(&data.started))
 			barrier();
 		if (wait)
-			while (!cpus_empty(data.finished))
+			while (!cpumask_empty(&data.finished))
 				barrier();
 	}
 
@@ -472,12 +472,12 @@
 #endif	/* CONFIG_GDBSTUB */
 
 	flags = arch_local_cli_save();
-	cpu_clear(smp_processor_id(), cpu_online_map);
+	set_cpu_online(smp_processor_id(), false);
 
 	while (!stopflag)
 		cpu_relax();
 
-	cpu_set(smp_processor_id(), cpu_online_map);
+	set_cpu_online(smp_processor_id(), true);
 	arch_local_irq_restore(flags);
 }
 
@@ -529,12 +529,13 @@
 	 * execute the function
 	 */
 	smp_mb();
-	cpu_clear(smp_processor_id(), nmi_call_data->started);
+	cpumask_clear_cpu(smp_processor_id(), &nmi_call_data->started);
 	(*func)(info);
 
 	if (wait) {
 		smp_mb();
-		cpu_clear(smp_processor_id(), nmi_call_data->finished);
+		cpumask_clear_cpu(smp_processor_id(),
+				  &nmi_call_data->finished);
 	}
 }
 
@@ -657,7 +658,7 @@
 {
 	smp_cpu_init();
 	smp_callin();
-	while (!cpu_isset(smp_processor_id(), smp_commenced_mask))
+	while (!cpumask_test_cpu(smp_processor_id(), &smp_commenced_mask))
 		cpu_relax();
 
 	local_flush_tlb();
@@ -780,13 +781,14 @@
 
 	if (send_status == 0) {
 		/* Allow AP to start initializing */
-		cpu_set(cpu_id, cpu_callout_map);
+		cpumask_set_cpu(cpu_id, &cpu_callout_map);
 
 		/* Wait for setting cpu_callin_map */
 		timeout = 0;
 		do {
 			udelay(1000);
-			callin_status = cpu_isset(cpu_id, cpu_callin_map);
+			callin_status = cpumask_test_cpu(cpu_id,
+							 &cpu_callin_map);
 		} while (callin_status == 0 && timeout++ < 5000);
 
 		if (callin_status == 0)
@@ -796,9 +798,9 @@
 	}
 
 	if (send_status == GxICR_REQUEST || callin_status == 0) {
-		cpu_clear(cpu_id, cpu_callout_map);
-		cpu_clear(cpu_id, cpu_callin_map);
-		cpu_clear(cpu_id, cpu_initialized);
+		cpumask_clear_cpu(cpu_id, &cpu_callout_map);
+		cpumask_clear_cpu(cpu_id, &cpu_callin_map);
+		cpumask_clear_cpu(cpu_id, &cpu_initialized);
 		cpucount--;
 		return 1;
 	}
@@ -833,7 +835,7 @@
 	cpu = smp_processor_id();
 	timeout = jiffies + (2 * HZ);
 
-	if (cpu_isset(cpu, cpu_callin_map)) {
+	if (cpumask_test_cpu(cpu, &cpu_callin_map)) {
 		printk(KERN_ERR "CPU#%d already present.\n", cpu);
 		BUG();
 	}
@@ -841,7 +843,7 @@
 
 	/* Wait for AP startup 2s total */
 	while (time_before(jiffies, timeout)) {
-		if (cpu_isset(cpu, cpu_callout_map))
+		if (cpumask_test_cpu(cpu, &cpu_callout_map))
 			break;
 		cpu_relax();
 	}
@@ -861,11 +863,11 @@
 	smp_store_cpu_info(cpu);
 
 	/* Allow the boot processor to continue */
-	cpu_set(cpu, cpu_callin_map);
+	cpumask_set_cpu(cpu, &cpu_callin_map);
 }
 
 /**
- * smp_online - Set cpu_online_map
+ * smp_online - Set cpu_online_mask
  */
 static void __init smp_online(void)
 {
@@ -875,7 +877,7 @@
 
 	local_irq_enable();
 
-	cpu_set(cpu, cpu_online_map);
+	set_cpu_online(cpu, true);
 	smp_wmb();
 }
 
@@ -892,13 +894,13 @@
 /*
  * smp_prepare_boot_cpu - Set up stuff for the boot processor.
  *
- * Set up the cpu_online_map, cpu_callout_map and cpu_callin_map of the boot
+ * Set up the cpu_online_mask, cpu_callout_map and cpu_callin_map of the boot
  * processor (CPU 0).
  */
 void __devinit smp_prepare_boot_cpu(void)
 {
-	cpu_set(0, cpu_callout_map);
-	cpu_set(0, cpu_callin_map);
+	cpumask_set_cpu(0, &cpu_callout_map);
+	cpumask_set_cpu(0, &cpu_callin_map);
 	current_thread_info()->cpu = 0;
 }
 
@@ -931,16 +933,16 @@
 		run_wakeup_cpu(cpu);
 #endif /* CONFIG_HOTPLUG_CPU */
 
-	cpu_set(cpu, smp_commenced_mask);
+	cpumask_set_cpu(cpu, &smp_commenced_mask);
 
 	/* Wait 5s total for a response */
 	for (timeout = 0 ; timeout < 5000 ; timeout++) {
-		if (cpu_isset(cpu, cpu_online_map))
+		if (cpu_online(cpu))
 			break;
 		udelay(1000);
 	}
 
-	BUG_ON(!cpu_isset(cpu, cpu_online_map));
+	BUG_ON(!cpu_online(cpu));
 	return 0;
 }
 
@@ -986,7 +988,7 @@
 		return -EBUSY;
 
 	migrate_irqs();
-	cpu_clear(cpu, current->active_mm->cpu_vm_mask);
+	cpumask_clear_cpu(cpu, &mm_cpumask(current->active_mm));
 	return 0;
 }
 
@@ -1091,13 +1093,13 @@
 	do {
 		mn10300_local_dcache_inv_range(start, end);
 		barrier();
-	} while (!cpus_empty(nmi_call_func_mask_data.started));
+	} while (!cpumask_empty(&nmi_call_func_mask_data.started));
 
 	if (wait) {
 		do {
 			mn10300_local_dcache_inv_range(start, end);
 			barrier();
-		} while (!cpus_empty(nmi_call_func_mask_data.finished));
+		} while (!cpumask_empty(&nmi_call_func_mask_data.finished));
 	}
 
 	spin_unlock(&smp_nmi_call_lock);
@@ -1108,9 +1110,9 @@
 {
 	unsigned int cpu = smp_processor_id();
 
-	cpu_set(cpu, cpu_callin_map);
+	cpumask_set_cpu(cpu, &cpu_callin_map);
 	local_flush_tlb();
-	cpu_set(cpu, cpu_online_map);
+	set_cpu_online(cpu, true);
 	smp_wmb();
 }
 
@@ -1141,8 +1143,9 @@
 static void run_sleep_cpu(unsigned int cpu)
 {
 	unsigned long flags;
-	cpumask_t cpumask = cpumask_of(cpu);
+	cpumask_t cpumask;
 
+	cpumask_copy(&cpumask, &cpumask_of(cpu));
 	flags = arch_local_cli_save();
 	hotplug_cpu_nmi_call_function(cpumask, prepare_sleep_cpu, NULL, 1);
 	hotplug_cpu_nmi_call_function(cpumask, sleep_cpu, NULL, 0);
diff --git a/arch/mn10300/mm/cache-smp.c b/arch/mn10300/mm/cache-smp.c
index 4a6e9a4..2d23b9e 100644
--- a/arch/mn10300/mm/cache-smp.c
+++ b/arch/mn10300/mm/cache-smp.c
@@ -74,7 +74,7 @@
 		break;
 	}
 
-	cpu_clear(smp_processor_id(), smp_cache_ipi_map);
+	cpumask_clear_cpu(smp_processor_id(), &smp_cache_ipi_map);
 }
 
 /**
@@ -94,12 +94,12 @@
 	smp_cache_mask = opr_mask;
 	smp_cache_start = start;
 	smp_cache_end = end;
-	smp_cache_ipi_map = cpu_online_map;
-	cpu_clear(smp_processor_id(), smp_cache_ipi_map);
+	cpumask_copy(&smp_cache_ipi_map, cpu_online_mask);
+	cpumask_clear_cpu(smp_processor_id(), &smp_cache_ipi_map);
 
 	send_IPI_allbutself(FLUSH_CACHE_IPI);
 
-	while (!cpus_empty(smp_cache_ipi_map))
+	while (!cpumask_empty(&smp_cache_ipi_map))
 		/* nothing. lockup detection does not belong here */
 		mb();
 }
diff --git a/arch/mn10300/mm/init.c b/arch/mn10300/mm/init.c
index 48907cc..1380182 100644
--- a/arch/mn10300/mm/init.c
+++ b/arch/mn10300/mm/init.c
@@ -37,8 +37,6 @@
 #include <asm/tlb.h>
 #include <asm/sections.h>
 
-DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
-
 unsigned long highstart_pfn, highend_pfn;
 
 #ifdef CONFIG_MN10300_HAS_ATOMIC_OPS_UNIT
diff --git a/arch/mn10300/mm/tlb-smp.c b/arch/mn10300/mm/tlb-smp.c
index 0b6a5ad..9a77749 100644
--- a/arch/mn10300/mm/tlb-smp.c
+++ b/arch/mn10300/mm/tlb-smp.c
@@ -64,7 +64,7 @@
 
 	cpu_id = get_cpu();
 
-	if (!cpu_isset(cpu_id, flush_cpumask))
+	if (!cpumask_test_cpu(cpu_id, &flush_cpumask))
 		/* This was a BUG() but until someone can quote me the line
 		 * from the intel manual that guarantees an IPI to multiple
 		 * CPUs is retried _only_ on the erroring CPUs its staying as a
@@ -80,7 +80,7 @@
 		local_flush_tlb_page(flush_mm, flush_va);
 
 	smp_mb__before_clear_bit();
-	cpu_clear(cpu_id, flush_cpumask);
+	cpumask_clear_cpu(cpu_id, &flush_cpumask);
 	smp_mb__after_clear_bit();
 out:
 	put_cpu();
@@ -103,11 +103,11 @@
 	 * - we do not send IPIs to as-yet unbooted CPUs.
 	 */
 	BUG_ON(!mm);
-	BUG_ON(cpus_empty(cpumask));
-	BUG_ON(cpu_isset(smp_processor_id(), cpumask));
+	BUG_ON(cpumask_empty(&cpumask));
+	BUG_ON(cpumask_test_cpu(smp_processor_id(), &cpumask));
 
-	cpus_and(tmp, cpumask, cpu_online_map);
-	BUG_ON(!cpus_equal(cpumask, tmp));
+	cpumask_and(&tmp, &cpumask, cpu_online_mask);
+	BUG_ON(!cpumask_equal(&cpumask, &tmp));
 
 	/* I'm not happy about this global shared spinlock in the MM hot path,
 	 * but we'll see how contended it is.
@@ -128,7 +128,7 @@
 	/* FIXME: if NR_CPUS>=3, change send_IPI_mask */
 	smp_call_function(smp_flush_tlb, NULL, 1);
 
-	while (!cpus_empty(flush_cpumask))
+	while (!cpumask_empty(&flush_cpumask))
 		/* Lockup detection does not belong here */
 		smp_mb();
 
@@ -146,11 +146,11 @@
 	cpumask_t cpu_mask;
 
 	preempt_disable();
-	cpu_mask = mm->cpu_vm_mask;
-	cpu_clear(smp_processor_id(), cpu_mask);
+	cpumask_copy(&cpu_mask, mm_cpumask(mm));
+	cpumask_clear_cpu(smp_processor_id(), &cpu_mask);
 
 	local_flush_tlb();
-	if (!cpus_empty(cpu_mask))
+	if (!cpumask_empty(&cpu_mask))
 		flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
 
 	preempt_enable();
@@ -165,11 +165,11 @@
 	cpumask_t cpu_mask;
 
 	preempt_disable();
-	cpu_mask = mm->cpu_vm_mask;
-	cpu_clear(smp_processor_id(), cpu_mask);
+	cpumask_copy(&cpu_mask, mm_cpumask(mm));
+	cpumask_clear_cpu(smp_processor_id(), &cpu_mask);
 
 	local_flush_tlb();
-	if (!cpus_empty(cpu_mask))
+	if (!cpumask_empty(&cpu_mask))
 		flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
 
 	preempt_enable();
@@ -186,11 +186,11 @@
 	cpumask_t cpu_mask;
 
 	preempt_disable();
-	cpu_mask = mm->cpu_vm_mask;
-	cpu_clear(smp_processor_id(), cpu_mask);
+	cpumask_copy(&cpu_mask, mm_cpumask(mm));
+	cpumask_clear_cpu(smp_processor_id(), &cpu_mask);
 
 	local_flush_tlb_page(mm, va);
-	if (!cpus_empty(cpu_mask))
+	if (!cpumask_empty(&cpu_mask))
 		flush_tlb_others(cpu_mask, mm, va);
 
 	preempt_enable();
diff --git a/arch/parisc/include/asm/smp.h b/arch/parisc/include/asm/smp.h
index 2e73623..e8f8037 100644
--- a/arch/parisc/include/asm/smp.h
+++ b/arch/parisc/include/asm/smp.h
@@ -33,15 +33,6 @@
 
 #endif /* !ASSEMBLY */
 
-/*
- *	This magic constant controls our willingness to transfer
- *      a process across CPUs. Such a transfer incurs cache and tlb
- *      misses. The current value is inherited from i386. Still needs
- *      to be tuned for parisc.
- */
- 
-#define PROC_CHANGE_PENALTY	15		/* Schedule penalty */
-
 #define raw_smp_processor_id()	(current_thread_info()->cpu)
 
 #else /* CONFIG_SMP */
diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c
index 5fa1e27..82f364e 100644
--- a/arch/parisc/mm/init.c
+++ b/arch/parisc/mm/init.c
@@ -31,8 +31,6 @@
 #include <asm/mmzone.h>
 #include <asm/sections.h>
 
-DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
-
 extern int  data_start;
 
 #ifdef CONFIG_DISCONTIGMEM
@@ -686,7 +684,7 @@
 	int shared = 0, cached = 0;
 
 	printk(KERN_INFO "Mem-info:\n");
-	show_free_areas();
+	show_free_areas(filter);
 #ifndef CONFIG_DISCONTIGMEM
 	i = max_mapnr;
 	while (i-- > 0) {
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index a3128ca..423145a6 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -140,6 +140,7 @@
 	select IRQ_PER_CPU
 	select GENERIC_IRQ_SHOW
 	select GENERIC_IRQ_SHOW_LEVEL
+	select HAVE_RCU_TABLE_FREE if SMP
 
 config EARLY_PRINTK
 	bool
diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug
index a597dd7..e72dcf6 100644
--- a/arch/powerpc/Kconfig.debug
+++ b/arch/powerpc/Kconfig.debug
@@ -35,27 +35,6 @@
 	  This option will cause messages to be printed if free stack space
 	  drops below a certain limit.
 
-config DEBUG_STACK_USAGE
-	bool "Stack utilization instrumentation"
-	depends on DEBUG_KERNEL
-	help
-	  Enables the display of the minimum amount of free stack which each
-	  task has ever had available in the sysrq-T and sysrq-P debug output.
-
-	  This option will slow down process creation somewhat.
-
-config DEBUG_PER_CPU_MAPS
-	bool "Debug access to per_cpu maps"
-	depends on DEBUG_KERNEL
-	depends on SMP
-	default n
-	---help---
-	  Say Y to verify that the per_cpu map being accessed has
-	  been setup.  Adds a fair amount of code to kernel memory
-	  and decreases performance.
-
-	  Say N if unsure.
-
 config HCALL_STATS
 	bool "Hypervisor call instrumentation"
 	depends on PPC_PSERIES && DEBUG_FS && TRACEPOINTS
diff --git a/arch/powerpc/boot/dts/mpc8313erdb.dts b/arch/powerpc/boot/dts/mpc8313erdb.dts
index 761faa7..ac1eb32 100644
--- a/arch/powerpc/boot/dts/mpc8313erdb.dts
+++ b/arch/powerpc/boot/dts/mpc8313erdb.dts
@@ -176,6 +176,19 @@
 			sleep = <&pmc 0x00300000>;
 		};
 
+		ptp_clock@24E00 {
+			compatible = "fsl,etsec-ptp";
+			reg = <0x24E00 0xB0>;
+			interrupts = <12 0x8 13 0x8>;
+			interrupt-parent = < &ipic >;
+			fsl,tclk-period = <10>;
+			fsl,tmr-prsc    = <100>;
+			fsl,tmr-add     = <0x999999A4>;
+			fsl,tmr-fiper1  = <0x3B9AC9F6>;
+			fsl,tmr-fiper2  = <0x00018696>;
+			fsl,max-adj     = <659999998>;
+		};
+
 		enet0: ethernet@24000 {
 			#address-cells = <1>;
 			#size-cells = <1>;
diff --git a/arch/powerpc/boot/dts/mpc8572ds.dts b/arch/powerpc/boot/dts/mpc8572ds.dts
index cafc1285..f6c04d2 100644
--- a/arch/powerpc/boot/dts/mpc8572ds.dts
+++ b/arch/powerpc/boot/dts/mpc8572ds.dts
@@ -324,6 +324,19 @@
 			};
 		};
 
+		ptp_clock@24E00 {
+			compatible = "fsl,etsec-ptp";
+			reg = <0x24E00 0xB0>;
+			interrupts = <68 2 69 2 70 2 71 2>;
+			interrupt-parent = < &mpic >;
+			fsl,tclk-period = <5>;
+			fsl,tmr-prsc = <200>;
+			fsl,tmr-add = <0xAAAAAAAB>;
+			fsl,tmr-fiper1 = <0x3B9AC9FB>;
+			fsl,tmr-fiper2 = <0x3B9AC9FB>;
+			fsl,max-adj = <499999999>;
+		};
+
 		enet0: ethernet@24000 {
 			#address-cells = <1>;
 			#size-cells = <1>;
diff --git a/arch/powerpc/boot/dts/p2020ds.dts b/arch/powerpc/boot/dts/p2020ds.dts
index 2bcf368..dae4031 100644
--- a/arch/powerpc/boot/dts/p2020ds.dts
+++ b/arch/powerpc/boot/dts/p2020ds.dts
@@ -178,6 +178,19 @@
 
 		};
 
+		ptp_clock@24E00 {
+			compatible = "fsl,etsec-ptp";
+			reg = <0x24E00 0xB0>;
+			interrupts = <68 2 69 2 70 2>;
+			interrupt-parent = < &mpic >;
+			fsl,tclk-period = <5>;
+			fsl,tmr-prsc = <200>;
+			fsl,tmr-add = <0xCCCCCCCD>;
+			fsl,tmr-fiper1 = <0x3B9AC9FB>;
+			fsl,tmr-fiper2 = <0x0001869B>;
+			fsl,max-adj = <249999999>;
+		};
+
 		enet0: ethernet@24000 {
 			tbi-handle = <&tbi0>;
 			phy-handle = <&phy0>;
diff --git a/arch/powerpc/boot/dts/p2020rdb.dts b/arch/powerpc/boot/dts/p2020rdb.dts
index 3782a58..1d7a05f 100644
--- a/arch/powerpc/boot/dts/p2020rdb.dts
+++ b/arch/powerpc/boot/dts/p2020rdb.dts
@@ -224,6 +224,19 @@
 			status = "disabled";
 		};
 
+		ptp_clock@24E00 {
+			compatible = "fsl,etsec-ptp";
+			reg = <0x24E00 0xB0>;
+			interrupts = <68 2 69 2 70 2>;
+			interrupt-parent = < &mpic >;
+			fsl,tclk-period = <5>;
+			fsl,tmr-prsc = <200>;
+			fsl,tmr-add = <0xCCCCCCCD>;
+			fsl,tmr-fiper1 = <0x3B9AC9FB>;
+			fsl,tmr-fiper2 = <0x0001869B>;
+			fsl,max-adj = <249999999>;
+		};
+
 		enet0: ethernet@24000 {
 			fixed-link = <1 1 1000 0 0>;
 			phy-connection-type = "rgmii-id";
diff --git a/arch/powerpc/include/asm/pgalloc.h b/arch/powerpc/include/asm/pgalloc.h
index abe8532..bf301ac 100644
--- a/arch/powerpc/include/asm/pgalloc.h
+++ b/arch/powerpc/include/asm/pgalloc.h
@@ -31,14 +31,29 @@
 #endif
 
 #ifdef CONFIG_SMP
-extern void pgtable_free_tlb(struct mmu_gather *tlb, void *table, unsigned shift);
-extern void pte_free_finish(void);
+struct mmu_gather;
+extern void tlb_remove_table(struct mmu_gather *, void *);
+
+static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift)
+{
+	unsigned long pgf = (unsigned long)table;
+	BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
+	pgf |= shift;
+	tlb_remove_table(tlb, (void *)pgf);
+}
+
+static inline void __tlb_remove_table(void *_table)
+{
+	void *table = (void *)((unsigned long)_table & ~MAX_PGTABLE_INDEX_SIZE);
+	unsigned shift = (unsigned long)_table & MAX_PGTABLE_INDEX_SIZE;
+
+	pgtable_free(table, shift);
+}
 #else /* CONFIG_SMP */
 static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, unsigned shift)
 {
 	pgtable_free(table, shift);
 }
-static inline void pte_free_finish(void) { }
 #endif /* !CONFIG_SMP */
 
 static inline void __pte_free_tlb(struct mmu_gather *tlb, struct page *ptepage,
diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h
index d8529ef..37c353e 100644
--- a/arch/powerpc/include/asm/thread_info.h
+++ b/arch/powerpc/include/asm/thread_info.h
@@ -139,10 +139,12 @@
 #define TLF_NAPPING		0	/* idle thread enabled NAP mode */
 #define TLF_SLEEPING		1	/* suspend code enabled SLEEP mode */
 #define TLF_RESTORE_SIGMASK	2	/* Restore signal mask in do_signal */
+#define TLF_LAZY_MMU		3	/* tlb_batch is active */
 
 #define _TLF_NAPPING		(1 << TLF_NAPPING)
 #define _TLF_SLEEPING		(1 << TLF_SLEEPING)
 #define _TLF_RESTORE_SIGMASK	(1 << TLF_RESTORE_SIGMASK)
+#define _TLF_LAZY_MMU		(1 << TLF_LAZY_MMU)
 
 #ifndef __ASSEMBLY__
 #define HAVE_SET_RESTORE_SIGMASK	1
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 095043d..91e52df 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -395,6 +395,9 @@
 	struct thread_struct *new_thread, *old_thread;
 	unsigned long flags;
 	struct task_struct *last;
+#ifdef CONFIG_PPC_BOOK3S_64
+	struct ppc64_tlb_batch *batch;
+#endif
 
 #ifdef CONFIG_SMP
 	/* avoid complexity of lazy save/restore of fpu
@@ -513,7 +516,17 @@
 		old_thread->accum_tb += (current_tb - start_tb);
 		new_thread->start_tb = current_tb;
 	}
-#endif
+#endif /* CONFIG_PPC64 */
+
+#ifdef CONFIG_PPC_BOOK3S_64
+	batch = &__get_cpu_var(ppc64_tlb_batch);
+	if (batch->active) {
+		current_thread_info()->local_flags |= _TLF_LAZY_MMU;
+		if (batch->index)
+			__flush_tlb_pending(batch);
+		batch->active = 0;
+	}
+#endif /* CONFIG_PPC_BOOK3S_64 */
 
 	local_irq_save(flags);
 
@@ -528,6 +541,14 @@
 	hard_irq_disable();
 	last = _switch(old_thread, new_thread);
 
+#ifdef CONFIG_PPC_BOOK3S_64
+	if (current_thread_info()->local_flags & _TLF_LAZY_MMU) {
+		current_thread_info()->local_flags &= ~_TLF_LAZY_MMU;
+		batch = &__get_cpu_var(ppc64_tlb_batch);
+		batch->active = 1;
+	}
+#endif /* CONFIG_PPC_BOOK3S_64 */
+
 	local_irq_restore(flags);
 
 	return last;
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index 6a3997f..af40c87 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -33,110 +33,6 @@
 
 #include "mmu_decl.h"
 
-DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
-
-#ifdef CONFIG_SMP
-
-/*
- * Handle batching of page table freeing on SMP. Page tables are
- * queued up and send to be freed later by RCU in order to avoid
- * freeing a page table page that is being walked without locks
- */
-
-static DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur);
-static unsigned long pte_freelist_forced_free;
-
-struct pte_freelist_batch
-{
-	struct rcu_head	rcu;
-	unsigned int	index;
-	unsigned long	tables[0];
-};
-
-#define PTE_FREELIST_SIZE \
-	((PAGE_SIZE - sizeof(struct pte_freelist_batch)) \
-	  / sizeof(unsigned long))
-
-static void pte_free_smp_sync(void *arg)
-{
-	/* Do nothing, just ensure we sync with all CPUs */
-}
-
-/* This is only called when we are critically out of memory
- * (and fail to get a page in pte_free_tlb).
- */
-static void pgtable_free_now(void *table, unsigned shift)
-{
-	pte_freelist_forced_free++;
-
-	smp_call_function(pte_free_smp_sync, NULL, 1);
-
-	pgtable_free(table, shift);
-}
-
-static void pte_free_rcu_callback(struct rcu_head *head)
-{
-	struct pte_freelist_batch *batch =
-		container_of(head, struct pte_freelist_batch, rcu);
-	unsigned int i;
-
-	for (i = 0; i < batch->index; i++) {
-		void *table = (void *)(batch->tables[i] & ~MAX_PGTABLE_INDEX_SIZE);
-		unsigned shift = batch->tables[i] & MAX_PGTABLE_INDEX_SIZE;
-
-		pgtable_free(table, shift);
-	}
-
-	free_page((unsigned long)batch);
-}
-
-static void pte_free_submit(struct pte_freelist_batch *batch)
-{
-	call_rcu_sched(&batch->rcu, pte_free_rcu_callback);
-}
-
-void pgtable_free_tlb(struct mmu_gather *tlb, void *table, unsigned shift)
-{
-	/* This is safe since tlb_gather_mmu has disabled preemption */
-	struct pte_freelist_batch **batchp = &__get_cpu_var(pte_freelist_cur);
-	unsigned long pgf;
-
-	if (atomic_read(&tlb->mm->mm_users) < 2 ||
-	    cpumask_equal(mm_cpumask(tlb->mm), cpumask_of(smp_processor_id()))){
-		pgtable_free(table, shift);
-		return;
-	}
-
-	if (*batchp == NULL) {
-		*batchp = (struct pte_freelist_batch *)__get_free_page(GFP_ATOMIC);
-		if (*batchp == NULL) {
-			pgtable_free_now(table, shift);
-			return;
-		}
-		(*batchp)->index = 0;
-	}
-	BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
-	pgf = (unsigned long)table | shift;
-	(*batchp)->tables[(*batchp)->index++] = pgf;
-	if ((*batchp)->index == PTE_FREELIST_SIZE) {
-		pte_free_submit(*batchp);
-		*batchp = NULL;
-	}
-}
-
-void pte_free_finish(void)
-{
-	/* This is safe since tlb_gather_mmu has disabled preemption */
-	struct pte_freelist_batch **batchp = &__get_cpu_var(pte_freelist_cur);
-
-	if (*batchp == NULL)
-		return;
-	pte_free_submit(*batchp);
-	*batchp = NULL;
-}
-
-#endif /* CONFIG_SMP */
-
 static inline int is_exec_fault(void)
 {
 	return current->thread.regs && TRAP(current->thread.regs) == 0x400;
diff --git a/arch/powerpc/mm/tlb_hash32.c b/arch/powerpc/mm/tlb_hash32.c
index 690566b..27b863c 100644
--- a/arch/powerpc/mm/tlb_hash32.c
+++ b/arch/powerpc/mm/tlb_hash32.c
@@ -71,9 +71,6 @@
 		 */
 		_tlbia();
 	}
-
-	/* Push out batch of freed page tables */
-	pte_free_finish();
 }
 
 /*
diff --git a/arch/powerpc/mm/tlb_hash64.c b/arch/powerpc/mm/tlb_hash64.c
index c14d09f..31f1820 100644
--- a/arch/powerpc/mm/tlb_hash64.c
+++ b/arch/powerpc/mm/tlb_hash64.c
@@ -155,7 +155,7 @@
 
 void tlb_flush(struct mmu_gather *tlb)
 {
-	struct ppc64_tlb_batch *tlbbatch = &__get_cpu_var(ppc64_tlb_batch);
+	struct ppc64_tlb_batch *tlbbatch = &get_cpu_var(ppc64_tlb_batch);
 
 	/* If there's a TLB batch pending, then we must flush it because the
 	 * pages are going to be freed and we really don't want to have a CPU
@@ -164,8 +164,7 @@
 	if (tlbbatch->index)
 		__flush_tlb_pending(tlbbatch);
 
-	/* Push out batch of freed page tables */
-	pte_free_finish();
+	put_cpu_var(ppc64_tlb_batch);
 }
 
 /**
diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c
index 2a030d8..0bdad3a 100644
--- a/arch/powerpc/mm/tlb_nohash.c
+++ b/arch/powerpc/mm/tlb_nohash.c
@@ -299,9 +299,6 @@
 void tlb_flush(struct mmu_gather *tlb)
 {
 	flush_tlb_mm(tlb->mm);
-
-	/* Push out batch of freed page tables */
-	pte_free_finish();
 }
 
 /*
diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
index 9074a54..77eee54 100644
--- a/arch/s390/include/asm/tlb.h
+++ b/arch/s390/include/asm/tlb.h
@@ -29,65 +29,77 @@
 #include <asm/smp.h>
 #include <asm/tlbflush.h>
 
-#ifndef CONFIG_SMP
-#define TLB_NR_PTRS	1
-#else
-#define TLB_NR_PTRS	508
-#endif
-
 struct mmu_gather {
 	struct mm_struct *mm;
 	unsigned int fullmm;
 	unsigned int nr_ptes;
 	unsigned int nr_pxds;
-	void *array[TLB_NR_PTRS];
+	unsigned int max;
+	void **array;
+	void *local[8];
 };
 
-DECLARE_PER_CPU(struct mmu_gather, mmu_gathers);
-
-static inline struct mmu_gather *tlb_gather_mmu(struct mm_struct *mm,
-						unsigned int full_mm_flush)
+static inline void __tlb_alloc_page(struct mmu_gather *tlb)
 {
-	struct mmu_gather *tlb = &get_cpu_var(mmu_gathers);
+	unsigned long addr = __get_free_pages(GFP_NOWAIT | __GFP_NOWARN, 0);
 
-	tlb->mm = mm;
-	tlb->fullmm = full_mm_flush;
-	tlb->nr_ptes = 0;
-	tlb->nr_pxds = TLB_NR_PTRS;
-	if (tlb->fullmm)
-		__tlb_flush_mm(mm);
-	return tlb;
+	if (addr) {
+		tlb->array = (void *) addr;
+		tlb->max = PAGE_SIZE / sizeof(void *);
+	}
 }
 
-static inline void tlb_flush_mmu(struct mmu_gather *tlb,
-				 unsigned long start, unsigned long end)
+static inline void tlb_gather_mmu(struct mmu_gather *tlb,
+				  struct mm_struct *mm,
+				  unsigned int full_mm_flush)
 {
-	if (!tlb->fullmm && (tlb->nr_ptes > 0 || tlb->nr_pxds < TLB_NR_PTRS))
+	tlb->mm = mm;
+	tlb->max = ARRAY_SIZE(tlb->local);
+	tlb->array = tlb->local;
+	tlb->fullmm = full_mm_flush;
+	if (tlb->fullmm)
+		__tlb_flush_mm(mm);
+	else
+		__tlb_alloc_page(tlb);
+	tlb->nr_ptes = 0;
+	tlb->nr_pxds = tlb->max;
+}
+
+static inline void tlb_flush_mmu(struct mmu_gather *tlb)
+{
+	if (!tlb->fullmm && (tlb->nr_ptes > 0 || tlb->nr_pxds < tlb->max))
 		__tlb_flush_mm(tlb->mm);
 	while (tlb->nr_ptes > 0)
 		page_table_free_rcu(tlb->mm, tlb->array[--tlb->nr_ptes]);
-	while (tlb->nr_pxds < TLB_NR_PTRS)
+	while (tlb->nr_pxds < tlb->max)
 		crst_table_free_rcu(tlb->mm, tlb->array[tlb->nr_pxds++]);
 }
 
 static inline void tlb_finish_mmu(struct mmu_gather *tlb,
 				  unsigned long start, unsigned long end)
 {
-	tlb_flush_mmu(tlb, start, end);
+	tlb_flush_mmu(tlb);
 
 	rcu_table_freelist_finish();
 
 	/* keep the page table cache within bounds */
 	check_pgt_cache();
 
-	put_cpu_var(mmu_gathers);
+	if (tlb->array != tlb->local)
+		free_pages((unsigned long) tlb->array, 0);
 }
 
 /*
  * Release the page cache reference for a pte removed by
- * tlb_ptep_clear_flush. In both flush modes the tlb fo a page cache page
+ * tlb_ptep_clear_flush. In both flush modes the tlb for a page cache page
  * has already been freed, so just do free_page_and_swap_cache.
  */
+static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
+{
+	free_page_and_swap_cache(page);
+	return 1; /* avoid calling tlb_flush_mmu */
+}
+
 static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
 {
 	free_page_and_swap_cache(page);
@@ -103,7 +115,7 @@
 	if (!tlb->fullmm) {
 		tlb->array[tlb->nr_ptes++] = pte;
 		if (tlb->nr_ptes >= tlb->nr_pxds)
-			tlb_flush_mmu(tlb, 0, 0);
+			tlb_flush_mmu(tlb);
 	} else
 		page_table_free(tlb->mm, (unsigned long *) pte);
 }
@@ -124,7 +136,7 @@
 	if (!tlb->fullmm) {
 		tlb->array[--tlb->nr_pxds] = pmd;
 		if (tlb->nr_ptes >= tlb->nr_pxds)
-			tlb_flush_mmu(tlb, 0, 0);
+			tlb_flush_mmu(tlb);
 	} else
 		crst_table_free(tlb->mm, (unsigned long *) pmd);
 #endif
@@ -146,7 +158,7 @@
 	if (!tlb->fullmm) {
 		tlb->array[--tlb->nr_pxds] = pud;
 		if (tlb->nr_ptes >= tlb->nr_pxds)
-			tlb_flush_mmu(tlb, 0, 0);
+			tlb_flush_mmu(tlb);
 	} else
 		crst_table_free(tlb->mm, (unsigned long *) pud);
 #endif
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 8d43306..14c6fae 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -36,7 +36,6 @@
 	((PAGE_SIZE - sizeof(struct rcu_table_freelist)) \
 	  / sizeof(unsigned long))
 
-DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
 static DEFINE_PER_CPU(struct rcu_table_freelist *, rcu_table_freelist);
 
 static void __page_table_free(struct mm_struct *mm, unsigned long *table);
diff --git a/arch/score/Kconfig.debug b/arch/score/Kconfig.debug
index 451ed54..a1f346d 100644
--- a/arch/score/Kconfig.debug
+++ b/arch/score/Kconfig.debug
@@ -16,15 +16,6 @@
 	  other cases you can specify kernel args so that you don't have
 	  to set them up in board prom initialization routines.
 
-config DEBUG_STACK_USAGE
-	bool "Enable stack utilization instrumentation"
-	depends on DEBUG_KERNEL
-	help
-	  Enables the display of the minimum amount of free stack which each
-	  task has ever had available in the sysrq-T and sysrq-P debug output.
-
-	  This option will slow down process creation somewhat.
-
 config RUNTIME_DEBUG
 	bool "Enable run-time debugging"
 	depends on DEBUG_KERNEL
diff --git a/arch/score/mm/init.c b/arch/score/mm/init.c
index 50fdec5..cee6bce 100644
--- a/arch/score/mm/init.c
+++ b/arch/score/mm/init.c
@@ -38,8 +38,6 @@
 #include <asm/sections.h>
 #include <asm/tlb.h>
 
-DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
-
 unsigned long empty_zero_page;
 EXPORT_SYMBOL_GPL(empty_zero_page);
 
diff --git a/arch/sh/Kconfig.debug b/arch/sh/Kconfig.debug
index 1553d56..c1d5a82 100644
--- a/arch/sh/Kconfig.debug
+++ b/arch/sh/Kconfig.debug
@@ -28,15 +28,6 @@
 	  every function call and will therefore incur a major
 	  performance hit. Most users should say N.
 
-config DEBUG_STACK_USAGE
-	bool "Stack utilization instrumentation"
-	depends on DEBUG_KERNEL
-	help
-	  Enables the display of the minimum amount of free stack which each
-	  task has ever had available in the sysrq-T and sysrq-P debug output.
-
-	  This option will slow down process creation somewhat.
-
 config 4KSTACKS
 	bool "Use 4Kb for kernel stacks instead of 8Kb"
 	depends on DEBUG_KERNEL && (MMU || BROKEN) && !PAGE_SIZE_64KB
diff --git a/arch/sh/include/asm/tlb.h b/arch/sh/include/asm/tlb.h
index 75abb38..6c308d8 100644
--- a/arch/sh/include/asm/tlb.h
+++ b/arch/sh/include/asm/tlb.h
@@ -23,8 +23,6 @@
 	unsigned long		start, end;
 };
 
-DECLARE_PER_CPU(struct mmu_gather, mmu_gathers);
-
 static inline void init_tlb_gather(struct mmu_gather *tlb)
 {
 	tlb->start = TASK_SIZE;
@@ -36,17 +34,13 @@
 	}
 }
 
-static inline struct mmu_gather *
-tlb_gather_mmu(struct mm_struct *mm, unsigned int full_mm_flush)
+static inline void
+tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned int full_mm_flush)
 {
-	struct mmu_gather *tlb = &get_cpu_var(mmu_gathers);
-
 	tlb->mm = mm;
 	tlb->fullmm = full_mm_flush;
 
 	init_tlb_gather(tlb);
-
-	return tlb;
 }
 
 static inline void
@@ -57,8 +51,6 @@
 
 	/* keep the page table cache within bounds */
 	check_pgt_cache();
-
-	put_cpu_var(mmu_gathers);
 }
 
 static inline void
@@ -91,7 +83,21 @@
 	}
 }
 
-#define tlb_remove_page(tlb,page)	free_page_and_swap_cache(page)
+static inline void tlb_flush_mmu(struct mmu_gather *tlb)
+{
+}
+
+static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
+{
+	free_page_and_swap_cache(page);
+	return 1; /* avoid calling tlb_flush_mmu */
+}
+
+static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
+{
+	__tlb_remove_page(tlb, page);
+}
+
 #define pte_free_tlb(tlb, ptep, addr)	pte_free((tlb)->mm, ptep)
 #define pmd_free_tlb(tlb, pmdp, addr)	pmd_free((tlb)->mm, pmdp)
 #define pud_free_tlb(tlb, pudp, addr)	pud_free((tlb)->mm, pudp)
diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c
index 0d3f912..58a93fb3 100644
--- a/arch/sh/mm/init.c
+++ b/arch/sh/mm/init.c
@@ -28,7 +28,6 @@
 #include <asm/cache.h>
 #include <asm/sizes.h>
 
-DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
 pgd_t swapper_pg_dir[PTRS_PER_PGD];
 
 void __init generic_mem_init(void)
diff --git a/arch/sparc/Kconfig.debug b/arch/sparc/Kconfig.debug
index d9a795e..6db35fb 100644
--- a/arch/sparc/Kconfig.debug
+++ b/arch/sparc/Kconfig.debug
@@ -6,15 +6,6 @@
 
 source "lib/Kconfig.debug"
 
-config DEBUG_STACK_USAGE
-	bool "Enable stack utilization instrumentation"
-	depends on DEBUG_KERNEL
-	help
-	  Enables the display of the minimum amount of free stack which each
-	  task has ever had available in the sysrq-T and sysrq-P debug output.
-
-	  This option will slow down process creation somewhat.
-
 config DEBUG_DCFLUSH
 	bool "D-cache flush debugging"
 	depends on SPARC64 && DEBUG_KERNEL
diff --git a/arch/sparc/include/asm/pgalloc_64.h b/arch/sparc/include/asm/pgalloc_64.h
index 5bdfa2c..4e5e087 100644
--- a/arch/sparc/include/asm/pgalloc_64.h
+++ b/arch/sparc/include/asm/pgalloc_64.h
@@ -78,4 +78,7 @@
 	quicklist_trim(0, NULL, 25, 16);
 }
 
+#define __pte_free_tlb(tlb, pte, addr)	pte_free((tlb)->mm, pte)
+#define __pmd_free_tlb(tlb, pmd, addr)	pmd_free((tlb)->mm, pmd)
+
 #endif /* _SPARC64_PGALLOC_H */
diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h
index b77128c..1e03c5a 100644
--- a/arch/sparc/include/asm/pgtable_64.h
+++ b/arch/sparc/include/asm/pgtable_64.h
@@ -655,9 +655,11 @@
 #define pte_unmap(pte)			do { } while (0)
 
 /* Actual page table PTE updates.  */
-extern void tlb_batch_add(struct mm_struct *mm, unsigned long vaddr, pte_t *ptep, pte_t orig);
+extern void tlb_batch_add(struct mm_struct *mm, unsigned long vaddr,
+			  pte_t *ptep, pte_t orig, int fullmm);
 
-static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte)
+static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
+			     pte_t *ptep, pte_t pte, int fullmm)
 {
 	pte_t orig = *ptep;
 
@@ -670,12 +672,19 @@
 	 *             and SUN4V pte layout, so this inline test is fine.
 	 */
 	if (likely(mm != &init_mm) && (pte_val(orig) & _PAGE_VALID))
-		tlb_batch_add(mm, addr, ptep, orig);
+		tlb_batch_add(mm, addr, ptep, orig, fullmm);
 }
 
+#define set_pte_at(mm,addr,ptep,pte)	\
+	__set_pte_at((mm), (addr), (ptep), (pte), 0)
+
 #define pte_clear(mm,addr,ptep)		\
 	set_pte_at((mm), (addr), (ptep), __pte(0UL))
 
+#define __HAVE_ARCH_PTE_CLEAR_NOT_PRESENT_FULL
+#define pte_clear_not_present_full(mm,addr,ptep,fullmm)	\
+	__set_pte_at((mm), (addr), (ptep), __pte(0UL), (fullmm))
+
 #ifdef DCACHE_ALIASING_POSSIBLE
 #define __HAVE_ARCH_MOVE_PTE
 #define move_pte(pte, prot, old_addr, new_addr)				\
diff --git a/arch/sparc/include/asm/tlb_64.h b/arch/sparc/include/asm/tlb_64.h
index dca406b..190e189 100644
--- a/arch/sparc/include/asm/tlb_64.h
+++ b/arch/sparc/include/asm/tlb_64.h
@@ -7,66 +7,11 @@
 #include <asm/tlbflush.h>
 #include <asm/mmu_context.h>
 
-#define TLB_BATCH_NR	192
-
-/*
- * For UP we don't need to worry about TLB flush
- * and page free order so much..
- */
-#ifdef CONFIG_SMP
-  #define FREE_PTE_NR	506
-  #define tlb_fast_mode(bp) ((bp)->pages_nr == ~0U)
-#else
-  #define FREE_PTE_NR	1
-  #define tlb_fast_mode(bp) 1
-#endif
-
-struct mmu_gather {
-	struct mm_struct *mm;
-	unsigned int pages_nr;
-	unsigned int need_flush;
-	unsigned int fullmm;
-	unsigned int tlb_nr;
-	unsigned long vaddrs[TLB_BATCH_NR];
-	struct page *pages[FREE_PTE_NR];
-};
-
-DECLARE_PER_CPU(struct mmu_gather, mmu_gathers);
-
 #ifdef CONFIG_SMP
 extern void smp_flush_tlb_pending(struct mm_struct *,
 				  unsigned long, unsigned long *);
 #endif
 
-extern void __flush_tlb_pending(unsigned long, unsigned long, unsigned long *);
-extern void flush_tlb_pending(void);
-
-static inline struct mmu_gather *tlb_gather_mmu(struct mm_struct *mm, unsigned int full_mm_flush)
-{
-	struct mmu_gather *mp = &get_cpu_var(mmu_gathers);
-
-	BUG_ON(mp->tlb_nr);
-
-	mp->mm = mm;
-	mp->pages_nr = num_online_cpus() > 1 ? 0U : ~0U;
-	mp->fullmm = full_mm_flush;
-
-	return mp;
-}
-
-
-static inline void tlb_flush_mmu(struct mmu_gather *mp)
-{
-	if (!mp->fullmm)
-		flush_tlb_pending();
-	if (mp->need_flush) {
-		free_pages_and_swap_cache(mp->pages, mp->pages_nr);
-		mp->pages_nr = 0;
-		mp->need_flush = 0;
-	}
-
-}
-
 #ifdef CONFIG_SMP
 extern void smp_flush_tlb_mm(struct mm_struct *mm);
 #define do_flush_tlb_mm(mm) smp_flush_tlb_mm(mm)
@@ -74,38 +19,14 @@
 #define do_flush_tlb_mm(mm) __flush_tlb_mm(CTX_HWBITS(mm->context), SECONDARY_CONTEXT)
 #endif
 
-static inline void tlb_finish_mmu(struct mmu_gather *mp, unsigned long start, unsigned long end)
-{
-	tlb_flush_mmu(mp);
+extern void __flush_tlb_pending(unsigned long, unsigned long, unsigned long *);
+extern void flush_tlb_pending(void);
 
-	if (mp->fullmm)
-		mp->fullmm = 0;
-
-	/* keep the page table cache within bounds */
-	check_pgt_cache();
-
-	put_cpu_var(mmu_gathers);
-}
-
-static inline void tlb_remove_page(struct mmu_gather *mp, struct page *page)
-{
-	if (tlb_fast_mode(mp)) {
-		free_page_and_swap_cache(page);
-		return;
-	}
-	mp->need_flush = 1;
-	mp->pages[mp->pages_nr++] = page;
-	if (mp->pages_nr >= FREE_PTE_NR)
-		tlb_flush_mmu(mp);
-}
-
-#define tlb_remove_tlb_entry(mp,ptep,addr) do { } while (0)
-#define pte_free_tlb(mp, ptepage, addr) pte_free((mp)->mm, ptepage)
-#define pmd_free_tlb(mp, pmdp, addr) pmd_free((mp)->mm, pmdp)
-#define pud_free_tlb(tlb,pudp, addr) __pud_free_tlb(tlb,pudp,addr)
-
-#define tlb_migrate_finish(mm)	do { } while (0)
 #define tlb_start_vma(tlb, vma) do { } while (0)
 #define tlb_end_vma(tlb, vma)	do { } while (0)
+#define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0)
+#define tlb_flush(tlb)	flush_tlb_pending()
+
+#include <asm-generic/tlb.h>
 
 #endif /* _SPARC64_TLB_H */
diff --git a/arch/sparc/include/asm/tlbflush_64.h b/arch/sparc/include/asm/tlbflush_64.h
index fbb675d..2ef4634 100644
--- a/arch/sparc/include/asm/tlbflush_64.h
+++ b/arch/sparc/include/asm/tlbflush_64.h
@@ -5,9 +5,17 @@
 #include <asm/mmu_context.h>
 
 /* TSB flush operations. */
-struct mmu_gather;
+
+#define TLB_BATCH_NR	192
+
+struct tlb_batch {
+	struct mm_struct *mm;
+	unsigned long tlb_nr;
+	unsigned long vaddrs[TLB_BATCH_NR];
+};
+
 extern void flush_tsb_kernel_range(unsigned long start, unsigned long end);
-extern void flush_tsb_user(struct mmu_gather *mp);
+extern void flush_tsb_user(struct tlb_batch *tb);
 
 /* TLB flush operations. */
 
diff --git a/arch/sparc/kernel/setup_32.c b/arch/sparc/kernel/setup_32.c
index 3609bde..3249d3f 100644
--- a/arch/sparc/kernel/setup_32.c
+++ b/arch/sparc/kernel/setup_32.c
@@ -82,7 +82,7 @@
 			     "nop\n\t" : : "r" (&trapbase));
 
 	prom_printf("PROM SYNC COMMAND...\n");
-	show_free_areas();
+	show_free_areas(0);
 	if(current->pid != 0) {
 		local_irq_enable();
 		sys_sync();
diff --git a/arch/sparc/mm/init_32.c b/arch/sparc/mm/init_32.c
index 4c31e2b..ca21732 100644
--- a/arch/sparc/mm/init_32.c
+++ b/arch/sparc/mm/init_32.c
@@ -37,8 +37,6 @@
 #include <asm/prom.h>
 #include <asm/leon.h>
 
-DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
-
 unsigned long *sparc_valid_addr_bitmap;
 EXPORT_SYMBOL(sparc_valid_addr_bitmap);
 
@@ -78,7 +76,7 @@
 void show_mem(unsigned int filter)
 {
 	printk("Mem-info:\n");
-	show_free_areas();
+	show_free_areas(filter);
 	printk("Free swap:       %6ldkB\n",
 	       nr_swap_pages << (PAGE_SHIFT-10));
 	printk("%ld pages of RAM\n", totalram_pages);
diff --git a/arch/sparc/mm/tlb.c b/arch/sparc/mm/tlb.c
index d8f21e2..b1f279c 100644
--- a/arch/sparc/mm/tlb.c
+++ b/arch/sparc/mm/tlb.c
@@ -19,33 +19,34 @@
 
 /* Heavily inspired by the ppc64 code.  */
 
-DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
+static DEFINE_PER_CPU(struct tlb_batch, tlb_batch);
 
 void flush_tlb_pending(void)
 {
-	struct mmu_gather *mp = &get_cpu_var(mmu_gathers);
+	struct tlb_batch *tb = &get_cpu_var(tlb_batch);
 
-	if (mp->tlb_nr) {
-		flush_tsb_user(mp);
+	if (tb->tlb_nr) {
+		flush_tsb_user(tb);
 
-		if (CTX_VALID(mp->mm->context)) {
+		if (CTX_VALID(tb->mm->context)) {
 #ifdef CONFIG_SMP
-			smp_flush_tlb_pending(mp->mm, mp->tlb_nr,
-					      &mp->vaddrs[0]);
+			smp_flush_tlb_pending(tb->mm, tb->tlb_nr,
+					      &tb->vaddrs[0]);
 #else
-			__flush_tlb_pending(CTX_HWBITS(mp->mm->context),
-					    mp->tlb_nr, &mp->vaddrs[0]);
+			__flush_tlb_pending(CTX_HWBITS(tb->mm->context),
+					    tb->tlb_nr, &tb->vaddrs[0]);
 #endif
 		}
-		mp->tlb_nr = 0;
+		tb->tlb_nr = 0;
 	}
 
-	put_cpu_var(mmu_gathers);
+	put_cpu_var(tlb_batch);
 }
 
-void tlb_batch_add(struct mm_struct *mm, unsigned long vaddr, pte_t *ptep, pte_t orig)
+void tlb_batch_add(struct mm_struct *mm, unsigned long vaddr,
+		   pte_t *ptep, pte_t orig, int fullmm)
 {
-	struct mmu_gather *mp = &__get_cpu_var(mmu_gathers);
+	struct tlb_batch *tb = &get_cpu_var(tlb_batch);
 	unsigned long nr;
 
 	vaddr &= PAGE_MASK;
@@ -77,21 +78,25 @@
 
 no_cache_flush:
 
-	if (mp->fullmm)
+	if (fullmm) {
+		put_cpu_var(tlb_batch);
 		return;
+	}
 
-	nr = mp->tlb_nr;
+	nr = tb->tlb_nr;
 
-	if (unlikely(nr != 0 && mm != mp->mm)) {
+	if (unlikely(nr != 0 && mm != tb->mm)) {
 		flush_tlb_pending();
 		nr = 0;
 	}
 
 	if (nr == 0)
-		mp->mm = mm;
+		tb->mm = mm;
 
-	mp->vaddrs[nr] = vaddr;
-	mp->tlb_nr = ++nr;
+	tb->vaddrs[nr] = vaddr;
+	tb->tlb_nr = ++nr;
 	if (nr >= TLB_BATCH_NR)
 		flush_tlb_pending();
+
+	put_cpu_var(tlb_batch);
 }
diff --git a/arch/sparc/mm/tsb.c b/arch/sparc/mm/tsb.c
index 101d7c8..9484615 100644
--- a/arch/sparc/mm/tsb.c
+++ b/arch/sparc/mm/tsb.c
@@ -47,12 +47,13 @@
 	}
 }
 
-static void __flush_tsb_one(struct mmu_gather *mp, unsigned long hash_shift, unsigned long tsb, unsigned long nentries)
+static void __flush_tsb_one(struct tlb_batch *tb, unsigned long hash_shift,
+			    unsigned long tsb, unsigned long nentries)
 {
 	unsigned long i;
 
-	for (i = 0; i < mp->tlb_nr; i++) {
-		unsigned long v = mp->vaddrs[i];
+	for (i = 0; i < tb->tlb_nr; i++) {
+		unsigned long v = tb->vaddrs[i];
 		unsigned long tag, ent, hash;
 
 		v &= ~0x1UL;
@@ -65,9 +66,9 @@
 	}
 }
 
-void flush_tsb_user(struct mmu_gather *mp)
+void flush_tsb_user(struct tlb_batch *tb)
 {
-	struct mm_struct *mm = mp->mm;
+	struct mm_struct *mm = tb->mm;
 	unsigned long nentries, base, flags;
 
 	spin_lock_irqsave(&mm->context.lock, flags);
@@ -76,7 +77,7 @@
 	nentries = mm->context.tsb_block[MM_TSB_BASE].tsb_nentries;
 	if (tlb_type == cheetah_plus || tlb_type == hypervisor)
 		base = __pa(base);
-	__flush_tsb_one(mp, PAGE_SHIFT, base, nentries);
+	__flush_tsb_one(tb, PAGE_SHIFT, base, nentries);
 
 #ifdef CONFIG_HUGETLB_PAGE
 	if (mm->context.tsb_block[MM_TSB_HUGE].tsb) {
@@ -84,7 +85,7 @@
 		nentries = mm->context.tsb_block[MM_TSB_HUGE].tsb_nentries;
 		if (tlb_type == cheetah_plus || tlb_type == hypervisor)
 			base = __pa(base);
-		__flush_tsb_one(mp, HPAGE_SHIFT, base, nentries);
+		__flush_tsb_one(tb, HPAGE_SHIFT, base, nentries);
 	}
 #endif
 	spin_unlock_irqrestore(&mm->context.lock, flags);
diff --git a/arch/tile/Kconfig.debug b/arch/tile/Kconfig.debug
index 9bc161a..ddbfc33 100644
--- a/arch/tile/Kconfig.debug
+++ b/arch/tile/Kconfig.debug
@@ -21,15 +21,6 @@
 	  This option will cause messages to be printed if free stack space
 	  drops below a certain limit.
 
-config DEBUG_STACK_USAGE
-	bool "Stack utilization instrumentation"
-	depends on DEBUG_KERNEL
-	help
-	  Enables the display of the minimum amount of free stack which each
-	  task has ever had available in the sysrq-T and sysrq-P debug output.
-
-	  This option will slow down process creation somewhat.
-
 config DEBUG_EXTRA_FLAGS
 	string "Additional compiler arguments when building with '-g'"
 	depends on DEBUG_INFO
diff --git a/arch/tile/mm/init.c b/arch/tile/mm/init.c
index d6e87fd..4e10c40 100644
--- a/arch/tile/mm/init.c
+++ b/arch/tile/mm/init.c
@@ -60,8 +60,6 @@
 EXPORT_SYMBOL(VMALLOC_RESERVE);
 #endif
 
-DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
-
 /* Create an L2 page table */
 static pte_t * __init alloc_pte(void)
 {
diff --git a/arch/um/Kconfig.debug b/arch/um/Kconfig.debug
index 8fce5e5..68205fd 100644
--- a/arch/um/Kconfig.debug
+++ b/arch/um/Kconfig.debug
@@ -28,13 +28,13 @@
 	  If you're involved in UML kernel development and want to use gcov,
 	  say Y.  If you're unsure, say N.
 
-config DEBUG_STACK_USAGE
-	bool "Stack utilization instrumentation"
-	default N
-	help
-	  Track the maximum kernel stack usage - this will look at each
-	  kernel stack at process exit and log it if it's the deepest
-	  stack seen so far.
+config EARLY_PRINTK
+	bool "Early printk"
+	default y
+	---help---
+	  Write kernel log output directly to stdout.
 
-	  This option will slow down process creation and destruction somewhat.
+	  This is useful for kernel debugging when your machine crashes very
+	  early before the console code is initialized.
+
 endmenu
diff --git a/arch/um/drivers/Makefile b/arch/um/drivers/Makefile
index 1d9b6ae..e7582e1d 100644
--- a/arch/um/drivers/Makefile
+++ b/arch/um/drivers/Makefile
@@ -9,7 +9,7 @@
 slip-objs := slip_kern.o slip_user.o
 slirp-objs := slirp_kern.o slirp_user.o
 daemon-objs := daemon_kern.o daemon_user.o
-mcast-objs := mcast_kern.o mcast_user.o
+umcast-objs := umcast_kern.o umcast_user.o
 net-objs := net_kern.o net_user.o
 mconsole-objs := mconsole_kern.o mconsole_user.o
 hostaudio-objs := hostaudio_kern.o
@@ -44,7 +44,7 @@
 obj-$(CONFIG_UML_NET_SLIRP) += slirp.o slip_common.o
 obj-$(CONFIG_UML_NET_DAEMON) += daemon.o 
 obj-$(CONFIG_UML_NET_VDE) += vde.o
-obj-$(CONFIG_UML_NET_MCAST) += mcast.o 
+obj-$(CONFIG_UML_NET_MCAST) += umcast.o
 obj-$(CONFIG_UML_NET_PCAP) += pcap.o
 obj-$(CONFIG_UML_NET) += net.o 
 obj-$(CONFIG_MCONSOLE) += mconsole.o
diff --git a/arch/um/drivers/mcast.h b/arch/um/drivers/mcast.h
deleted file mode 100644
index 6fa282e..0000000
--- a/arch/um/drivers/mcast.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/* 
- * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
- */
-
-#ifndef __DRIVERS_MCAST_H
-#define __DRIVERS_MCAST_H
-
-#include "net_user.h"
-
-struct mcast_data {
-	char *addr;
-	unsigned short port;
-	void *mcast_addr;
-	int ttl;
-	void *dev;
-};
-
-extern const struct net_user_info mcast_user_info;
-
-extern int mcast_user_write(int fd, void *buf, int len, 
-			    struct mcast_data *pri);
-
-#endif
diff --git a/arch/um/drivers/mcast_kern.c b/arch/um/drivers/mcast_kern.c
deleted file mode 100644
index ffc6416..0000000
--- a/arch/um/drivers/mcast_kern.c
+++ /dev/null
@@ -1,120 +0,0 @@
-/*
- * user-mode-linux networking multicast transport
- * Copyright (C) 2001 by Harald Welte <laforge@gnumonks.org>
- * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- *
- * based on the existing uml-networking code, which is
- * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and
- * James Leu (jleu@mindspring.net).
- * Copyright (C) 2001 by various other people who didn't put their name here.
- *
- * Licensed under the GPL.
- */
-
-#include "linux/init.h"
-#include <linux/netdevice.h>
-#include "mcast.h"
-#include "net_kern.h"
-
-struct mcast_init {
-	char *addr;
-	int port;
-	int ttl;
-};
-
-static void mcast_init(struct net_device *dev, void *data)
-{
-	struct uml_net_private *pri;
-	struct mcast_data *dpri;
-	struct mcast_init *init = data;
-
-	pri = netdev_priv(dev);
-	dpri = (struct mcast_data *) pri->user;
-	dpri->addr = init->addr;
-	dpri->port = init->port;
-	dpri->ttl = init->ttl;
-	dpri->dev = dev;
-
-	printk("mcast backend multicast address: %s:%u, TTL:%u\n",
-	       dpri->addr, dpri->port, dpri->ttl);
-}
-
-static int mcast_read(int fd, struct sk_buff *skb, struct uml_net_private *lp)
-{
-	return net_recvfrom(fd, skb_mac_header(skb),
-			    skb->dev->mtu + ETH_HEADER_OTHER);
-}
-
-static int mcast_write(int fd, struct sk_buff *skb, struct uml_net_private *lp)
-{
-	return mcast_user_write(fd, skb->data, skb->len,
-				(struct mcast_data *) &lp->user);
-}
-
-static const struct net_kern_info mcast_kern_info = {
-	.init			= mcast_init,
-	.protocol		= eth_protocol,
-	.read			= mcast_read,
-	.write			= mcast_write,
-};
-
-static int mcast_setup(char *str, char **mac_out, void *data)
-{
-	struct mcast_init *init = data;
-	char *port_str = NULL, *ttl_str = NULL, *remain;
-	char *last;
-
-	*init = ((struct mcast_init)
-		{ .addr 	= "239.192.168.1",
-		  .port 	= 1102,
-		  .ttl 		= 1 });
-
-	remain = split_if_spec(str, mac_out, &init->addr, &port_str, &ttl_str,
-			       NULL);
-	if (remain != NULL) {
-		printk(KERN_ERR "mcast_setup - Extra garbage on "
-		       "specification : '%s'\n", remain);
-		return 0;
-	}
-
-	if (port_str != NULL) {
-		init->port = simple_strtoul(port_str, &last, 10);
-		if ((*last != '\0') || (last == port_str)) {
-			printk(KERN_ERR "mcast_setup - Bad port : '%s'\n",
-			       port_str);
-			return 0;
-		}
-	}
-
-	if (ttl_str != NULL) {
-		init->ttl = simple_strtoul(ttl_str, &last, 10);
-		if ((*last != '\0') || (last == ttl_str)) {
-			printk(KERN_ERR "mcast_setup - Bad ttl : '%s'\n",
-			       ttl_str);
-			return 0;
-		}
-	}
-
-	printk(KERN_INFO "Configured mcast device: %s:%u-%u\n", init->addr,
-	       init->port, init->ttl);
-
-	return 1;
-}
-
-static struct transport mcast_transport = {
-	.list 		= LIST_HEAD_INIT(mcast_transport.list),
-	.name 		= "mcast",
-	.setup  	= mcast_setup,
-	.user 		= &mcast_user_info,
-	.kern 		= &mcast_kern_info,
-	.private_size 	= sizeof(struct mcast_data),
-	.setup_size 	= sizeof(struct mcast_init),
-};
-
-static int register_mcast(void)
-{
-	register_transport(&mcast_transport);
-	return 0;
-}
-
-late_initcall(register_mcast);
diff --git a/arch/um/drivers/mcast_user.c b/arch/um/drivers/mcast_user.c
deleted file mode 100644
index ee19e91..0000000
--- a/arch/um/drivers/mcast_user.c
+++ /dev/null
@@ -1,165 +0,0 @@
-/*
- * user-mode-linux networking multicast transport
- * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Copyright (C) 2001 by Harald Welte <laforge@gnumonks.org>
- *
- * based on the existing uml-networking code, which is
- * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and
- * James Leu (jleu@mindspring.net).
- * Copyright (C) 2001 by various other people who didn't put their name here.
- *
- * Licensed under the GPL.
- *
- */
-
-#include <unistd.h>
-#include <errno.h>
-#include <netinet/in.h>
-#include "kern_constants.h"
-#include "mcast.h"
-#include "net_user.h"
-#include "um_malloc.h"
-#include "user.h"
-
-static struct sockaddr_in *new_addr(char *addr, unsigned short port)
-{
-	struct sockaddr_in *sin;
-
-	sin = uml_kmalloc(sizeof(struct sockaddr_in), UM_GFP_KERNEL);
-	if (sin == NULL) {
-		printk(UM_KERN_ERR "new_addr: allocation of sockaddr_in "
-		       "failed\n");
-		return NULL;
-	}
-	sin->sin_family = AF_INET;
-	sin->sin_addr.s_addr = in_aton(addr);
-	sin->sin_port = htons(port);
-	return sin;
-}
-
-static int mcast_user_init(void *data, void *dev)
-{
-	struct mcast_data *pri = data;
-
-	pri->mcast_addr = new_addr(pri->addr, pri->port);
-	pri->dev = dev;
-	return 0;
-}
-
-static void mcast_remove(void *data)
-{
-	struct mcast_data *pri = data;
-
-	kfree(pri->mcast_addr);
-	pri->mcast_addr = NULL;
-}
-
-static int mcast_open(void *data)
-{
-	struct mcast_data *pri = data;
-	struct sockaddr_in *sin = pri->mcast_addr;
-	struct ip_mreq mreq;
-	int fd, yes = 1, err = -EINVAL;
-
-
-	if ((sin->sin_addr.s_addr == 0) || (sin->sin_port == 0))
-		goto out;
-
-	fd = socket(AF_INET, SOCK_DGRAM, 0);
-
-	if (fd < 0) {
-		err = -errno;
-		printk(UM_KERN_ERR "mcast_open : data socket failed, "
-		       "errno = %d\n", errno);
-		goto out;
-	}
-
-	if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)) < 0) {
-		err = -errno;
-		printk(UM_KERN_ERR "mcast_open: SO_REUSEADDR failed, "
-		       "errno = %d\n", errno);
-		goto out_close;
-	}
-
-	/* set ttl according to config */
-	if (setsockopt(fd, SOL_IP, IP_MULTICAST_TTL, &pri->ttl,
-		       sizeof(pri->ttl)) < 0) {
-		err = -errno;
-		printk(UM_KERN_ERR "mcast_open: IP_MULTICAST_TTL failed, "
-		       "error = %d\n", errno);
-		goto out_close;
-	}
-
-	/* set LOOP, so data does get fed back to local sockets */
-	if (setsockopt(fd, SOL_IP, IP_MULTICAST_LOOP, &yes, sizeof(yes)) < 0) {
-		err = -errno;
-		printk(UM_KERN_ERR "mcast_open: IP_MULTICAST_LOOP failed, "
-		       "error = %d\n", errno);
-		goto out_close;
-	}
-
-	/* bind socket to mcast address */
-	if (bind(fd, (struct sockaddr *) sin, sizeof(*sin)) < 0) {
-		err = -errno;
-		printk(UM_KERN_ERR "mcast_open : data bind failed, "
-		       "errno = %d\n", errno);
-		goto out_close;
-	}
-
-	/* subscribe to the multicast group */
-	mreq.imr_multiaddr.s_addr = sin->sin_addr.s_addr;
-	mreq.imr_interface.s_addr = 0;
-	if (setsockopt(fd, SOL_IP, IP_ADD_MEMBERSHIP,
-		       &mreq, sizeof(mreq)) < 0) {
-		err = -errno;
-		printk(UM_KERN_ERR "mcast_open: IP_ADD_MEMBERSHIP failed, "
-		       "error = %d\n", errno);
-		printk(UM_KERN_ERR "There appears not to be a multicast-"
-		       "capable network interface on the host.\n");
-		printk(UM_KERN_ERR "eth0 should be configured in order to use "
-		       "the multicast transport.\n");
-		goto out_close;
-	}
-
-	return fd;
-
- out_close:
-	close(fd);
- out:
-	return err;
-}
-
-static void mcast_close(int fd, void *data)
-{
-	struct ip_mreq mreq;
-	struct mcast_data *pri = data;
-	struct sockaddr_in *sin = pri->mcast_addr;
-
-	mreq.imr_multiaddr.s_addr = sin->sin_addr.s_addr;
-	mreq.imr_interface.s_addr = 0;
-	if (setsockopt(fd, SOL_IP, IP_DROP_MEMBERSHIP,
-		       &mreq, sizeof(mreq)) < 0) {
-		printk(UM_KERN_ERR "mcast_open: IP_DROP_MEMBERSHIP failed, "
-		       "error = %d\n", errno);
-	}
-
-	close(fd);
-}
-
-int mcast_user_write(int fd, void *buf, int len, struct mcast_data *pri)
-{
-	struct sockaddr_in *data_addr = pri->mcast_addr;
-
-	return net_sendto(fd, buf, len, data_addr, sizeof(*data_addr));
-}
-
-const struct net_user_info mcast_user_info = {
-	.init		= mcast_user_init,
-	.open		= mcast_open,
-	.close	 	= mcast_close,
-	.remove	 	= mcast_remove,
-	.add_address	= NULL,
-	.delete_address = NULL,
-	.mtu		= ETH_MAX_PACKET,
-	.max_packet	= ETH_MAX_PACKET + ETH_HEADER_OTHER,
-};
diff --git a/arch/um/drivers/umcast.h b/arch/um/drivers/umcast.h
new file mode 100644
index 0000000..6f8c0fe
--- /dev/null
+++ b/arch/um/drivers/umcast.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ * Licensed under the GPL
+ */
+
+#ifndef __DRIVERS_UMCAST_H
+#define __DRIVERS_UMCAST_H
+
+#include "net_user.h"
+
+struct umcast_data {
+	char *addr;
+	unsigned short lport;
+	unsigned short rport;
+	void *listen_addr;
+	void *remote_addr;
+	int ttl;
+	int unicast;
+	void *dev;
+};
+
+extern const struct net_user_info umcast_user_info;
+
+extern int umcast_user_write(int fd, void *buf, int len,
+			     struct umcast_data *pri);
+
+#endif
diff --git a/arch/um/drivers/umcast_kern.c b/arch/um/drivers/umcast_kern.c
new file mode 100644
index 0000000..42dab11
--- /dev/null
+++ b/arch/um/drivers/umcast_kern.c
@@ -0,0 +1,188 @@
+/*
+ * user-mode-linux networking multicast transport
+ * Copyright (C) 2001 by Harald Welte <laforge@gnumonks.org>
+ * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ *
+ * based on the existing uml-networking code, which is
+ * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and
+ * James Leu (jleu@mindspring.net).
+ * Copyright (C) 2001 by various other people who didn't put their name here.
+ *
+ * Licensed under the GPL.
+ */
+
+#include "linux/init.h"
+#include <linux/netdevice.h>
+#include "umcast.h"
+#include "net_kern.h"
+
+struct umcast_init {
+	char *addr;
+	int lport;
+	int rport;
+	int ttl;
+	bool unicast;
+};
+
+static void umcast_init(struct net_device *dev, void *data)
+{
+	struct uml_net_private *pri;
+	struct umcast_data *dpri;
+	struct umcast_init *init = data;
+
+	pri = netdev_priv(dev);
+	dpri = (struct umcast_data *) pri->user;
+	dpri->addr = init->addr;
+	dpri->lport = init->lport;
+	dpri->rport = init->rport;
+	dpri->unicast = init->unicast;
+	dpri->ttl = init->ttl;
+	dpri->dev = dev;
+
+	if (dpri->unicast) {
+		printk(KERN_INFO "ucast backend address: %s:%u listen port: "
+		       "%u\n", dpri->addr, dpri->rport, dpri->lport);
+	} else {
+		printk(KERN_INFO "mcast backend multicast address: %s:%u, "
+		       "TTL:%u\n", dpri->addr, dpri->lport, dpri->ttl);
+	}
+}
+
+static int umcast_read(int fd, struct sk_buff *skb, struct uml_net_private *lp)
+{
+	return net_recvfrom(fd, skb_mac_header(skb),
+			    skb->dev->mtu + ETH_HEADER_OTHER);
+}
+
+static int umcast_write(int fd, struct sk_buff *skb, struct uml_net_private *lp)
+{
+	return umcast_user_write(fd, skb->data, skb->len,
+				(struct umcast_data *) &lp->user);
+}
+
+static const struct net_kern_info umcast_kern_info = {
+	.init			= umcast_init,
+	.protocol		= eth_protocol,
+	.read			= umcast_read,
+	.write			= umcast_write,
+};
+
+static int mcast_setup(char *str, char **mac_out, void *data)
+{
+	struct umcast_init *init = data;
+	char *port_str = NULL, *ttl_str = NULL, *remain;
+	char *last;
+
+	*init = ((struct umcast_init)
+		{ .addr	= "239.192.168.1",
+		  .lport	= 1102,
+		  .ttl	= 1 });
+
+	remain = split_if_spec(str, mac_out, &init->addr, &port_str, &ttl_str,
+			       NULL);
+	if (remain != NULL) {
+		printk(KERN_ERR "mcast_setup - Extra garbage on "
+		       "specification : '%s'\n", remain);
+		return 0;
+	}
+
+	if (port_str != NULL) {
+		init->lport = simple_strtoul(port_str, &last, 10);
+		if ((*last != '\0') || (last == port_str)) {
+			printk(KERN_ERR "mcast_setup - Bad port : '%s'\n",
+			       port_str);
+			return 0;
+		}
+	}
+
+	if (ttl_str != NULL) {
+		init->ttl = simple_strtoul(ttl_str, &last, 10);
+		if ((*last != '\0') || (last == ttl_str)) {
+			printk(KERN_ERR "mcast_setup - Bad ttl : '%s'\n",
+			       ttl_str);
+			return 0;
+		}
+	}
+
+	init->unicast = false;
+	init->rport = init->lport;
+
+	printk(KERN_INFO "Configured mcast device: %s:%u-%u\n", init->addr,
+	       init->lport, init->ttl);
+
+	return 1;
+}
+
+static int ucast_setup(char *str, char **mac_out, void *data)
+{
+	struct umcast_init *init = data;
+	char *lport_str = NULL, *rport_str = NULL, *remain;
+	char *last;
+
+	*init = ((struct umcast_init)
+		{ .addr		= "",
+		  .lport	= 1102,
+		  .rport	= 1102 });
+
+	remain = split_if_spec(str, mac_out, &init->addr,
+			       &lport_str, &rport_str, NULL);
+	if (remain != NULL) {
+		printk(KERN_ERR "ucast_setup - Extra garbage on "
+		       "specification : '%s'\n", remain);
+		return 0;
+	}
+
+	if (lport_str != NULL) {
+		init->lport = simple_strtoul(lport_str, &last, 10);
+		if ((*last != '\0') || (last == lport_str)) {
+			printk(KERN_ERR "ucast_setup - Bad listen port : "
+			       "'%s'\n", lport_str);
+			return 0;
+		}
+	}
+
+	if (rport_str != NULL) {
+		init->rport = simple_strtoul(rport_str, &last, 10);
+		if ((*last != '\0') || (last == rport_str)) {
+			printk(KERN_ERR "ucast_setup - Bad remote port : "
+			       "'%s'\n", rport_str);
+			return 0;
+		}
+	}
+
+	init->unicast = true;
+
+	printk(KERN_INFO "Configured ucast device: :%u -> %s:%u\n",
+	       init->lport, init->addr, init->rport);
+
+	return 1;
+}
+
+static struct transport mcast_transport = {
+	.list	= LIST_HEAD_INIT(mcast_transport.list),
+	.name	= "mcast",
+	.setup	= mcast_setup,
+	.user	= &umcast_user_info,
+	.kern	= &umcast_kern_info,
+	.private_size	= sizeof(struct umcast_data),
+	.setup_size	= sizeof(struct umcast_init),
+};
+
+static struct transport ucast_transport = {
+	.list	= LIST_HEAD_INIT(ucast_transport.list),
+	.name	= "ucast",
+	.setup	= ucast_setup,
+	.user	= &umcast_user_info,
+	.kern	= &umcast_kern_info,
+	.private_size	= sizeof(struct umcast_data),
+	.setup_size	= sizeof(struct umcast_init),
+};
+
+static int register_umcast(void)
+{
+	register_transport(&mcast_transport);
+	register_transport(&ucast_transport);
+	return 0;
+}
+
+late_initcall(register_umcast);
diff --git a/arch/um/drivers/umcast_user.c b/arch/um/drivers/umcast_user.c
new file mode 100644
index 0000000..59c56fd
--- /dev/null
+++ b/arch/um/drivers/umcast_user.c
@@ -0,0 +1,186 @@
+/*
+ * user-mode-linux networking multicast transport
+ * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ * Copyright (C) 2001 by Harald Welte <laforge@gnumonks.org>
+ *
+ * based on the existing uml-networking code, which is
+ * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and
+ * James Leu (jleu@mindspring.net).
+ * Copyright (C) 2001 by various other people who didn't put their name here.
+ *
+ * Licensed under the GPL.
+ *
+ */
+
+#include <unistd.h>
+#include <errno.h>
+#include <netinet/in.h>
+#include "kern_constants.h"
+#include "umcast.h"
+#include "net_user.h"
+#include "um_malloc.h"
+#include "user.h"
+
+static struct sockaddr_in *new_addr(char *addr, unsigned short port)
+{
+	struct sockaddr_in *sin;
+
+	sin = uml_kmalloc(sizeof(struct sockaddr_in), UM_GFP_KERNEL);
+	if (sin == NULL) {
+		printk(UM_KERN_ERR "new_addr: allocation of sockaddr_in "
+		       "failed\n");
+		return NULL;
+	}
+	sin->sin_family = AF_INET;
+	if (addr)
+		sin->sin_addr.s_addr = in_aton(addr);
+	else
+		sin->sin_addr.s_addr = INADDR_ANY;
+	sin->sin_port = htons(port);
+	return sin;
+}
+
+static int umcast_user_init(void *data, void *dev)
+{
+	struct umcast_data *pri = data;
+
+	pri->remote_addr = new_addr(pri->addr, pri->rport);
+	if (pri->unicast)
+		pri->listen_addr = new_addr(NULL, pri->lport);
+	else
+		pri->listen_addr = pri->remote_addr;
+	pri->dev = dev;
+	return 0;
+}
+
+static void umcast_remove(void *data)
+{
+	struct umcast_data *pri = data;
+
+	kfree(pri->listen_addr);
+	if (pri->unicast)
+		kfree(pri->remote_addr);
+	pri->listen_addr = pri->remote_addr = NULL;
+}
+
+static int umcast_open(void *data)
+{
+	struct umcast_data *pri = data;
+	struct sockaddr_in *lsin = pri->listen_addr;
+	struct sockaddr_in *rsin = pri->remote_addr;
+	struct ip_mreq mreq;
+	int fd, yes = 1, err = -EINVAL;
+
+
+	if ((!pri->unicast && lsin->sin_addr.s_addr == 0) ||
+	    (rsin->sin_addr.s_addr == 0) ||
+	    (lsin->sin_port == 0) || (rsin->sin_port == 0))
+		goto out;
+
+	fd = socket(AF_INET, SOCK_DGRAM, 0);
+
+	if (fd < 0) {
+		err = -errno;
+		printk(UM_KERN_ERR "umcast_open : data socket failed, "
+		       "errno = %d\n", errno);
+		goto out;
+	}
+
+	if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)) < 0) {
+		err = -errno;
+		printk(UM_KERN_ERR "umcast_open: SO_REUSEADDR failed, "
+		       "errno = %d\n", errno);
+		goto out_close;
+	}
+
+	if (!pri->unicast) {
+		/* set ttl according to config */
+		if (setsockopt(fd, SOL_IP, IP_MULTICAST_TTL, &pri->ttl,
+			       sizeof(pri->ttl)) < 0) {
+			err = -errno;
+			printk(UM_KERN_ERR "umcast_open: IP_MULTICAST_TTL "
+			       "failed, error = %d\n", errno);
+			goto out_close;
+		}
+
+		/* set LOOP, so data does get fed back to local sockets */
+		if (setsockopt(fd, SOL_IP, IP_MULTICAST_LOOP,
+			       &yes, sizeof(yes)) < 0) {
+			err = -errno;
+			printk(UM_KERN_ERR "umcast_open: IP_MULTICAST_LOOP "
+			       "failed, error = %d\n", errno);
+			goto out_close;
+		}
+	}
+
+	/* bind socket to the address */
+	if (bind(fd, (struct sockaddr *) lsin, sizeof(*lsin)) < 0) {
+		err = -errno;
+		printk(UM_KERN_ERR "umcast_open : data bind failed, "
+		       "errno = %d\n", errno);
+		goto out_close;
+	}
+
+	if (!pri->unicast) {
+		/* subscribe to the multicast group */
+		mreq.imr_multiaddr.s_addr = lsin->sin_addr.s_addr;
+		mreq.imr_interface.s_addr = 0;
+		if (setsockopt(fd, SOL_IP, IP_ADD_MEMBERSHIP,
+			       &mreq, sizeof(mreq)) < 0) {
+			err = -errno;
+			printk(UM_KERN_ERR "umcast_open: IP_ADD_MEMBERSHIP "
+			       "failed, error = %d\n", errno);
+			printk(UM_KERN_ERR "There appears not to be a "
+			       "multicast-capable network interface on the "
+			       "host.\n");
+			printk(UM_KERN_ERR "eth0 should be configured in order "
+			       "to use the multicast transport.\n");
+			goto out_close;
+		}
+	}
+
+	return fd;
+
+ out_close:
+	close(fd);
+ out:
+	return err;
+}
+
+static void umcast_close(int fd, void *data)
+{
+	struct umcast_data *pri = data;
+
+	if (!pri->unicast) {
+		struct ip_mreq mreq;
+		struct sockaddr_in *lsin = pri->listen_addr;
+
+		mreq.imr_multiaddr.s_addr = lsin->sin_addr.s_addr;
+		mreq.imr_interface.s_addr = 0;
+		if (setsockopt(fd, SOL_IP, IP_DROP_MEMBERSHIP,
+			       &mreq, sizeof(mreq)) < 0) {
+			printk(UM_KERN_ERR "umcast_close: IP_DROP_MEMBERSHIP "
+			       "failed, error = %d\n", errno);
+		}
+	}
+
+	close(fd);
+}
+
+int umcast_user_write(int fd, void *buf, int len, struct umcast_data *pri)
+{
+	struct sockaddr_in *data_addr = pri->remote_addr;
+
+	return net_sendto(fd, buf, len, data_addr, sizeof(*data_addr));
+}
+
+const struct net_user_info umcast_user_info = {
+	.init	= umcast_user_init,
+	.open	= umcast_open,
+	.close	= umcast_close,
+	.remove	= umcast_remove,
+	.add_address	= NULL,
+	.delete_address = NULL,
+	.mtu	= ETH_MAX_PACKET,
+	.max_packet	= ETH_MAX_PACKET + ETH_HEADER_OTHER,
+};
diff --git a/arch/um/drivers/xterm.c b/arch/um/drivers/xterm.c
index da2caa5..8ac7146 100644
--- a/arch/um/drivers/xterm.c
+++ b/arch/um/drivers/xterm.c
@@ -90,7 +90,7 @@
 	int pid, fd, new, err;
 	char title[256], file[] = "/tmp/xterm-pipeXXXXXX";
 	char *argv[] = { terminal_emulator, title_switch, title, exec_switch,
-			 "/usr/lib/uml/port-helper", "-uml-socket",
+			 OS_LIB_PATH "/uml/port-helper", "-uml-socket",
 			 file, NULL };
 
 	if (access(argv[4], X_OK) < 0)
diff --git a/arch/um/include/asm/processor-generic.h b/arch/um/include/asm/processor-generic.h
index d1d1b0d..98d01bc 100644
--- a/arch/um/include/asm/processor-generic.h
+++ b/arch/um/include/asm/processor-generic.h
@@ -14,6 +14,8 @@
 #include "registers.h"
 #include "sysdep/archsetjmp.h"
 
+#include <linux/prefetch.h>
+
 struct mm_struct;
 
 struct thread_struct {
diff --git a/arch/um/include/asm/smp.h b/arch/um/include/asm/smp.h
index f27a963..4a4b09d 100644
--- a/arch/um/include/asm/smp.h
+++ b/arch/um/include/asm/smp.h
@@ -11,7 +11,6 @@
 
 #define cpu_logical_map(n) (n)
 #define cpu_number_map(n) (n)
-#define PROC_CHANGE_PENALTY	15 /* Pick a number, any number */
 extern int hard_smp_processor_id(void);
 #define NO_PROC_ID -1
 
diff --git a/arch/um/include/asm/tlb.h b/arch/um/include/asm/tlb.h
index 660caed..4febacd 100644
--- a/arch/um/include/asm/tlb.h
+++ b/arch/um/include/asm/tlb.h
@@ -22,9 +22,6 @@
 	unsigned int		fullmm; /* non-zero means full mm flush */
 };
 
-/* Users of the generic TLB shootdown code must declare this storage space. */
-DECLARE_PER_CPU(struct mmu_gather, mmu_gathers);
-
 static inline void __tlb_remove_tlb_entry(struct mmu_gather *tlb, pte_t *ptep,
 					  unsigned long address)
 {
@@ -47,27 +44,20 @@
 	}
 }
 
-/* tlb_gather_mmu
- *	Return a pointer to an initialized struct mmu_gather.
- */
-static inline struct mmu_gather *
-tlb_gather_mmu(struct mm_struct *mm, unsigned int full_mm_flush)
+static inline void
+tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned int full_mm_flush)
 {
-	struct mmu_gather *tlb = &get_cpu_var(mmu_gathers);
-
 	tlb->mm = mm;
 	tlb->fullmm = full_mm_flush;
 
 	init_tlb_gather(tlb);
-
-	return tlb;
 }
 
 extern void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
 			       unsigned long end);
 
 static inline void
-tlb_flush_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
+tlb_flush_mmu(struct mmu_gather *tlb)
 {
 	if (!tlb->need_flush)
 		return;
@@ -83,12 +73,10 @@
 static inline void
 tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
 {
-	tlb_flush_mmu(tlb, start, end);
+	tlb_flush_mmu(tlb);
 
 	/* keep the page table cache within bounds */
 	check_pgt_cache();
-
-	put_cpu_var(mmu_gathers);
 }
 
 /* tlb_remove_page
@@ -96,11 +84,16 @@
  *	while handling the additional races in SMP caused by other CPUs
  *	caching valid mappings in their TLBs.
  */
-static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
+static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
 {
 	tlb->need_flush = 1;
 	free_page_and_swap_cache(page);
-	return;
+	return 1; /* avoid calling tlb_flush_mmu */
+}
+
+static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
+{
+	__tlb_remove_page(tlb, page);
 }
 
 /**
diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h
index c4617ba..83c7c2e 100644
--- a/arch/um/include/shared/os.h
+++ b/arch/um/include/shared/os.h
@@ -29,6 +29,12 @@
 #define OS_ACC_R_OK    4       /* Test for read permission.  */
 #define OS_ACC_RW_OK   (OS_ACC_W_OK | OS_ACC_R_OK) /* Test for RW permission */
 
+#ifdef CONFIG_64BIT
+#define OS_LIB_PATH	"/usr/lib64/"
+#else
+#define OS_LIB_PATH	"/usr/lib/"
+#endif
+
 /*
  * types taken from stat_file() in hostfs_user.c
  * (if they are wrong here, they are wrong there...).
@@ -238,6 +244,7 @@
 extern void setup_machinename(char *machine_out);
 extern void setup_hostinfo(char *buf, int len);
 extern void os_dump_core(void) __attribute__ ((noreturn));
+extern void um_early_printk(const char *s, unsigned int n);
 
 /* time.c */
 extern void idle_sleep(unsigned long long nsecs);
diff --git a/arch/um/kernel/Makefile b/arch/um/kernel/Makefile
index 11192335..c4491c1 100644
--- a/arch/um/kernel/Makefile
+++ b/arch/um/kernel/Makefile
@@ -17,6 +17,7 @@
 obj-$(CONFIG_BLK_DEV_INITRD) += initrd.o
 obj-$(CONFIG_GPROF)	+= gprof_syms.o
 obj-$(CONFIG_GCOV)	+= gmon_syms.o
+obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
 
 USER_OBJS := config.o
 
diff --git a/arch/um/kernel/early_printk.c b/arch/um/kernel/early_printk.c
new file mode 100644
index 0000000..ec649bf
--- /dev/null
+++ b/arch/um/kernel/early_printk.c
@@ -0,0 +1,33 @@
+/*
+ * Copyright (C) 2011 Richard Weinberger <richrd@nod.at>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/console.h>
+#include <linux/init.h>
+#include "os.h"
+
+static void early_console_write(struct console *con, const char *s, unsigned int n)
+{
+	um_early_printk(s, n);
+}
+
+static struct console early_console = {
+	.name = "earlycon",
+	.write = early_console_write,
+	.flags = CON_BOOT,
+	.index = -1,
+};
+
+static int __init setup_early_printk(char *buf)
+{
+	register_console(&early_console);
+
+	return 0;
+}
+
+early_param("earlyprintk", setup_early_printk);
diff --git a/arch/um/kernel/smp.c b/arch/um/kernel/smp.c
index eefb107..155206a 100644
--- a/arch/um/kernel/smp.c
+++ b/arch/um/kernel/smp.c
@@ -7,9 +7,6 @@
 #include "asm/pgalloc.h"
 #include "asm/tlb.h"
 
-/* For some reason, mmu_gathers are referenced when CONFIG_SMP is off. */
-DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
-
 #ifdef CONFIG_SMP
 
 #include "linux/sched.h"
diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c
index 637c650..8c7b882 100644
--- a/arch/um/kernel/trap.c
+++ b/arch/um/kernel/trap.c
@@ -113,6 +113,27 @@
 	return 0;
 }
 
+static void show_segv_info(struct uml_pt_regs *regs)
+{
+	struct task_struct *tsk = current;
+	struct faultinfo *fi = UPT_FAULTINFO(regs);
+
+	if (!unhandled_signal(tsk, SIGSEGV))
+		return;
+
+	if (!printk_ratelimit())
+		return;
+
+	printk("%s%s[%d]: segfault at %lx ip %p sp %p error %x",
+		task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
+		tsk->comm, task_pid_nr(tsk), FAULT_ADDRESS(*fi),
+		(void *)UPT_IP(regs), (void *)UPT_SP(regs),
+		fi->error_code);
+
+	print_vma_addr(KERN_CONT " in ", UPT_IP(regs));
+	printk(KERN_CONT "\n");
+}
+
 static void bad_segv(struct faultinfo fi, unsigned long ip)
 {
 	struct siginfo si;
@@ -141,6 +162,7 @@
 	struct faultinfo * fi = UPT_FAULTINFO(regs);
 
 	if (UPT_IS_USER(regs) && !SEGV_IS_FIXABLE(fi)) {
+		show_segv_info(regs);
 		bad_segv(*fi, UPT_IP(regs));
 		return;
 	}
@@ -202,6 +224,8 @@
 		      address, ip);
 	}
 
+	show_segv_info(regs);
+
 	if (err == -EACCES) {
 		si.si_signo = SIGBUS;
 		si.si_errno = 0;
diff --git a/arch/um/os-Linux/main.c b/arch/um/os-Linux/main.c
index eee69b9..fb2a97a 100644
--- a/arch/um/os-Linux/main.c
+++ b/arch/um/os-Linux/main.c
@@ -78,7 +78,7 @@
 	}
 }
 
-#define UML_LIB_PATH	":/usr/lib/uml"
+#define UML_LIB_PATH	":" OS_LIB_PATH "/uml"
 
 static void setup_env_path(void)
 {
@@ -142,7 +142,6 @@
 	 */
 	install_fatal_handler(SIGINT);
 	install_fatal_handler(SIGTERM);
-	install_fatal_handler(SIGHUP);
 
 	scan_elf_aux(envp);
 
diff --git a/arch/um/os-Linux/process.c b/arch/um/os-Linux/process.c
index e0477c3..0c45dc8 100644
--- a/arch/um/os-Linux/process.c
+++ b/arch/um/os-Linux/process.c
@@ -253,6 +253,7 @@
 		    SA_ONSTACK | SA_RESTART, SIGUSR1, SIGIO, SIGWINCH, SIGALRM,
 		    SIGVTALRM, -1);
 	signal(SIGWINCH, SIG_IGN);
+	signal(SIGTERM, SIG_DFL);
 }
 
 int run_kernel_thread(int (*fn)(void *), void *arg, jmp_buf **jmp_ptr)
diff --git a/arch/um/os-Linux/util.c b/arch/um/os-Linux/util.c
index 42827ca..5803b18 100644
--- a/arch/um/os-Linux/util.c
+++ b/arch/um/os-Linux/util.c
@@ -139,3 +139,8 @@
 
 	uml_abort();
 }
+
+void um_early_printk(const char *s, unsigned int n)
+{
+	printf("%.*s", n, s);
+}
diff --git a/arch/unicore32/Kconfig.debug b/arch/unicore32/Kconfig.debug
index 3140151..ae2ec33 100644
--- a/arch/unicore32/Kconfig.debug
+++ b/arch/unicore32/Kconfig.debug
@@ -27,13 +27,6 @@
 	  with klogd/syslogd or the X server. You should normally N here,
 	  unless you want to debug such a crash.
 
-config DEBUG_STACK_USAGE
-	bool "Enable stack utilization instrumentation"
-	depends on DEBUG_KERNEL
-	help
-	  Enables the display of the minimum amount of free stack which each
-	  task has ever had available in the sysrq-T output.
-
 # These options are only for real kernel hackers who want to get their hands dirty.
 config DEBUG_LL
 	bool "Kernel low-level debugging functions"
diff --git a/arch/unicore32/mm/init.c b/arch/unicore32/mm/init.c
index 1fc0263..2d3e711 100644
--- a/arch/unicore32/mm/init.c
+++ b/arch/unicore32/mm/init.c
@@ -62,7 +62,7 @@
 	struct meminfo *mi = &meminfo;
 
 	printk(KERN_DEFAULT "Mem-info:\n");
-	show_free_areas();
+	show_free_areas(filter);
 
 	for_each_bank(i, mi) {
 		struct membank *bank = &mi->bank[i];
diff --git a/arch/unicore32/mm/mmu.c b/arch/unicore32/mm/mmu.c
index db2d334..3e5c3e5 100644
--- a/arch/unicore32/mm/mmu.c
+++ b/arch/unicore32/mm/mmu.c
@@ -30,8 +30,6 @@
 
 #include "mm.h"
 
-DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
-
 /*
  * empty_zero_page is a special page that is used for
  * zero-initialized data and COW.
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 880fcb6..fa2cc8c 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -17,8 +17,6 @@
 config X86
 	def_bool y
 	select HAVE_AOUT if X86_32
-	select HAVE_READQ
-	select HAVE_WRITEQ
 	select HAVE_UNSTABLE_SCHED_CLOCK
 	select HAVE_IDE
 	select HAVE_OPROFILE
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 615e188..c0f8a5c 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -66,26 +66,6 @@
 	  This option will cause messages to be printed if free stack space
 	  drops below a certain limit.
 
-config DEBUG_STACK_USAGE
-	bool "Stack utilization instrumentation"
-	depends on DEBUG_KERNEL
-	---help---
-	  Enables the display of the minimum amount of free stack which each
-	  task has ever had available in the sysrq-T and sysrq-P debug output.
-
-	  This option will slow down process creation somewhat.
-
-config DEBUG_PER_CPU_MAPS
-	bool "Debug access to per_cpu maps"
-	depends on DEBUG_KERNEL
-	depends on SMP
-	---help---
-	  Say Y to verify that the per_cpu map being accessed has
-	  been setup.  Adds a fair amount of code to kernel memory
-	  and decreases performance.
-
-	  Say N if unsure.
-
 config X86_PTDUMP
 	bool "Export kernel pagetable layout to userspace via debugfs"
 	depends on DEBUG_KERNEL
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index 0722730..d02804d 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -38,7 +38,6 @@
 
 #include <linux/string.h>
 #include <linux/compiler.h>
-#include <asm-generic/int-ll64.h>
 #include <asm/page.h>
 
 #include <xen/xen.h>
@@ -87,27 +86,6 @@
 build_mmio_read(readq, "q", unsigned long, "=r", :"memory")
 build_mmio_write(writeq, "q", unsigned long, "r", :"memory")
 
-#else
-
-static inline __u64 readq(const volatile void __iomem *addr)
-{
-	const volatile u32 __iomem *p = addr;
-	u32 low, high;
-
-	low = readl(p);
-	high = readl(p + 1);
-
-	return low + ((u64)high << 32);
-}
-
-static inline void writeq(__u64 val, volatile void __iomem *addr)
-{
-	writel(val, addr);
-	writel(val >> 32, addr+4);
-}
-
-#endif
-
 #define readq_relaxed(a)	readq(a)
 
 #define __raw_readq(a)		readq(a)
@@ -117,6 +95,8 @@
 #define readq			readq
 #define writeq			writeq
 
+#endif
+
 /**
  *	virt_to_phys	-	map virtual addresses to physical
  *	@address: address to remap
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 605e5ae..a3e5948 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -946,6 +946,8 @@
 	if (init_ohci1394_dma_early)
 		init_ohci1394_dma_on_all_controllers();
 #endif
+	/* Allocate bigger log buffer */
+	setup_log_buf(1);
 
 	reserve_initrd();
 
diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c
index 998e972..30ac65d 100644
--- a/arch/x86/kernel/tboot.c
+++ b/arch/x86/kernel/tboot.c
@@ -110,7 +110,6 @@
 	.mmap_sem       = __RWSEM_INITIALIZER(init_mm.mmap_sem),
 	.page_table_lock =  __SPIN_LOCK_UNLOCKED(init_mm.page_table_lock),
 	.mmlist         = LIST_HEAD_INIT(init_mm.mmlist),
-	.cpu_vm_mask    = CPU_MASK_ALL,
 };
 
 static inline void switch_to_tboot_pt(void)
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 2841805..bd14bb4 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3545,10 +3545,11 @@
 	return kvm_mmu_prepare_zap_page(kvm, page, invalid_list);
 }
 
-static int mmu_shrink(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
+static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc)
 {
 	struct kvm *kvm;
 	struct kvm *kvm_freed = NULL;
+	int nr_to_scan = sc->nr_to_scan;
 
 	if (nr_to_scan == 0)
 		goto out;
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index bcb394d..f7a2a05 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -965,7 +965,7 @@
 	struct mm_struct *mm;
 	int fault;
 	int write = error_code & PF_WRITE;
-	unsigned int flags = FAULT_FLAG_ALLOW_RETRY |
+	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
 					(write ? FAULT_FLAG_WRITE : 0);
 
 	tsk = current;
@@ -1139,6 +1139,16 @@
 	}
 
 	/*
+	 * Pagefault was interrupted by SIGKILL. We have no reason to
+	 * continue pagefault.
+	 */
+	if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) {
+		if (!(error_code & PF_USER))
+			no_context(regs, error_code, address);
+		return;
+	}
+
+	/*
 	 * Major/minor page fault accounting is only done on the
 	 * initial attempt. If we go through a retry, it is extremely
 	 * likely that the page will be found in page cache at that point.
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c
index d420398..f581a18 100644
--- a/arch/x86/mm/hugetlbpage.c
+++ b/arch/x86/mm/hugetlbpage.c
@@ -72,7 +72,7 @@
 	if (!vma_shareable(vma, addr))
 		return;
 
-	spin_lock(&mapping->i_mmap_lock);
+	mutex_lock(&mapping->i_mmap_mutex);
 	vma_prio_tree_foreach(svma, &iter, &mapping->i_mmap, idx, idx) {
 		if (svma == vma)
 			continue;
@@ -97,7 +97,7 @@
 		put_page(virt_to_page(spte));
 	spin_unlock(&mm->page_table_lock);
 out:
-	spin_unlock(&mapping->i_mmap_lock);
+	mutex_unlock(&mapping->i_mmap_mutex);
 }
 
 /*
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 37b8b0f..3032644 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -16,8 +16,6 @@
 #include <asm/tlb.h>
 #include <asm/proto.h>
 
-DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
-
 unsigned long __initdata pgt_buf_start;
 unsigned long __meminitdata pgt_buf_end;
 unsigned long __meminitdata pgt_buf_top;
diff --git a/arch/xtensa/include/asm/page.h b/arch/xtensa/include/asm/page.h
index 161bb89..7a5591a7 100644
--- a/arch/xtensa/include/asm/page.h
+++ b/arch/xtensa/include/asm/page.h
@@ -171,10 +171,6 @@
 #define virt_addr_valid(kaddr)	pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
 #define page_to_phys(page)	(page_to_pfn(page) << PAGE_SHIFT)
 
-#ifdef CONFIG_MMU
-#define WANT_PAGE_VIRTUAL
-#endif
-
 #endif /* __ASSEMBLY__ */
 
 #define VM_DATA_DEFAULT_FLAGS	(VM_READ | VM_WRITE | VM_EXEC | \
diff --git a/arch/xtensa/mm/mmu.c b/arch/xtensa/mm/mmu.c
index 4bb91a9..ca81654 100644
--- a/arch/xtensa/mm/mmu.c
+++ b/arch/xtensa/mm/mmu.c
@@ -14,8 +14,6 @@
 #include <asm/mmu_context.h>
 #include <asm/page.h>
 
-DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
-
 void __init paging_init(void)
 {
 	memset(swapper_pg_dir, 0, PAGE_SIZE);
diff --git a/arch/xtensa/mm/pgtable.c b/arch/xtensa/mm/pgtable.c
deleted file mode 100644
index 6979927..0000000
--- a/arch/xtensa/mm/pgtable.c
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * arch/xtensa/mm/pgtable.c
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2001 - 2005 Tensilica Inc.
- *
- * Chris Zankel <chris@zankel.net>
- */
-
-#if (DCACHE_SIZE > PAGE_SIZE)
-
-pte_t* pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
-{
-	pte_t *pte = NULL, *p;
-	int color = ADDR_COLOR(address);
-	int i;
-
-	p = (pte_t*) __get_free_pages(GFP_KERNEL|__GFP_REPEAT, COLOR_ORDER);
-
-	if (likely(p)) {
-		split_page(virt_to_page(p), COLOR_ORDER);
-
-		for (i = 0; i < COLOR_SIZE; i++) {
-			if (ADDR_COLOR(p) == color)
-				pte = p;
-			else
-				free_page(p);
-			p += PTRS_PER_PTE;
-		}
-		clear_page(pte);
-	}
-	return pte;
-}
-
-#ifdef PROFILING
-
-int mask;
-int hit;
-int flush;
-
-#endif
-
-struct page* pte_alloc_one(struct mm_struct *mm, unsigned long address)
-{
-	struct page *page = NULL, *p;
-	int color = ADDR_COLOR(address);
-
-	p = alloc_pages(GFP_KERNEL | __GFP_REPEAT, PTE_ORDER);
-
-	if (likely(p)) {
-		split_page(p, COLOR_ORDER);
-
-		for (i = 0; i < PAGE_ORDER; i++) {
-			if (PADDR_COLOR(page_address(p)) == color)
-				page = p;
-			else
-				__free_page(p);
-			p++;
-		}
-		clear_highpage(page);
-	}
-
-	return page;
-}
-
-#endif
-
-
-
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 471fdcc..07371cf 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -385,25 +385,40 @@
 
 	spin_lock_irqsave(&blkg->stats_lock, flags);
 	blkg->stats.time += time;
+#ifdef CONFIG_DEBUG_BLK_CGROUP
 	blkg->stats.unaccounted_time += unaccounted_time;
+#endif
 	spin_unlock_irqrestore(&blkg->stats_lock, flags);
 }
 EXPORT_SYMBOL_GPL(blkiocg_update_timeslice_used);
 
+/*
+ * should be called under rcu read lock or queue lock to make sure blkg pointer
+ * is valid.
+ */
 void blkiocg_update_dispatch_stats(struct blkio_group *blkg,
 				uint64_t bytes, bool direction, bool sync)
 {
-	struct blkio_group_stats *stats;
+	struct blkio_group_stats_cpu *stats_cpu;
 	unsigned long flags;
 
-	spin_lock_irqsave(&blkg->stats_lock, flags);
-	stats = &blkg->stats;
-	stats->sectors += bytes >> 9;
-	blkio_add_stat(stats->stat_arr[BLKIO_STAT_SERVICED], 1, direction,
-			sync);
-	blkio_add_stat(stats->stat_arr[BLKIO_STAT_SERVICE_BYTES], bytes,
-			direction, sync);
-	spin_unlock_irqrestore(&blkg->stats_lock, flags);
+	/*
+	 * Disabling interrupts to provide mutual exclusion between two
+	 * writes on same cpu. It probably is not needed for 64bit. Not
+	 * optimizing that case yet.
+	 */
+	local_irq_save(flags);
+
+	stats_cpu = this_cpu_ptr(blkg->stats_cpu);
+
+	u64_stats_update_begin(&stats_cpu->syncp);
+	stats_cpu->sectors += bytes >> 9;
+	blkio_add_stat(stats_cpu->stat_arr_cpu[BLKIO_STAT_CPU_SERVICED],
+			1, direction, sync);
+	blkio_add_stat(stats_cpu->stat_arr_cpu[BLKIO_STAT_CPU_SERVICE_BYTES],
+			bytes, direction, sync);
+	u64_stats_update_end(&stats_cpu->syncp);
+	local_irq_restore(flags);
 }
 EXPORT_SYMBOL_GPL(blkiocg_update_dispatch_stats);
 
@@ -426,18 +441,44 @@
 }
 EXPORT_SYMBOL_GPL(blkiocg_update_completion_stats);
 
+/*  Merged stats are per cpu.  */
 void blkiocg_update_io_merged_stats(struct blkio_group *blkg, bool direction,
 					bool sync)
 {
+	struct blkio_group_stats_cpu *stats_cpu;
 	unsigned long flags;
 
-	spin_lock_irqsave(&blkg->stats_lock, flags);
-	blkio_add_stat(blkg->stats.stat_arr[BLKIO_STAT_MERGED], 1, direction,
-			sync);
-	spin_unlock_irqrestore(&blkg->stats_lock, flags);
+	/*
+	 * Disabling interrupts to provide mutual exclusion between two
+	 * writes on same cpu. It probably is not needed for 64bit. Not
+	 * optimizing that case yet.
+	 */
+	local_irq_save(flags);
+
+	stats_cpu = this_cpu_ptr(blkg->stats_cpu);
+
+	u64_stats_update_begin(&stats_cpu->syncp);
+	blkio_add_stat(stats_cpu->stat_arr_cpu[BLKIO_STAT_CPU_MERGED], 1,
+				direction, sync);
+	u64_stats_update_end(&stats_cpu->syncp);
+	local_irq_restore(flags);
 }
 EXPORT_SYMBOL_GPL(blkiocg_update_io_merged_stats);
 
+/*
+ * This function allocates the per cpu stats for blkio_group. Should be called
+ * from sleepable context as alloc_per_cpu() requires that.
+ */
+int blkio_alloc_blkg_stats(struct blkio_group *blkg)
+{
+	/* Allocate memory for per cpu stats */
+	blkg->stats_cpu = alloc_percpu(struct blkio_group_stats_cpu);
+	if (!blkg->stats_cpu)
+		return -ENOMEM;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(blkio_alloc_blkg_stats);
+
 void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg,
 		struct blkio_group *blkg, void *key, dev_t dev,
 		enum blkio_policy_id plid)
@@ -508,6 +549,30 @@
 }
 EXPORT_SYMBOL_GPL(blkiocg_lookup_group);
 
+static void blkio_reset_stats_cpu(struct blkio_group *blkg)
+{
+	struct blkio_group_stats_cpu *stats_cpu;
+	int i, j, k;
+	/*
+	 * Note: On 64 bit arch this should not be an issue. This has the
+	 * possibility of returning some inconsistent value on 32bit arch
+	 * as 64bit update on 32bit is non atomic. Taking care of this
+	 * corner case makes code very complicated, like sending IPIs to
+	 * cpus, taking care of stats of offline cpus etc.
+	 *
+	 * reset stats is anyway more of a debug feature and this sounds a
+	 * corner case. So I am not complicating the code yet until and
+	 * unless this becomes a real issue.
+	 */
+	for_each_possible_cpu(i) {
+		stats_cpu = per_cpu_ptr(blkg->stats_cpu, i);
+		stats_cpu->sectors = 0;
+		for(j = 0; j < BLKIO_STAT_CPU_NR; j++)
+			for (k = 0; k < BLKIO_STAT_TOTAL; k++)
+				stats_cpu->stat_arr_cpu[j][k] = 0;
+	}
+}
+
 static int
 blkiocg_reset_stats(struct cgroup *cgroup, struct cftype *cftype, u64 val)
 {
@@ -552,7 +617,11 @@
 		}
 #endif
 		spin_unlock(&blkg->stats_lock);
+
+		/* Reset Per cpu stats which don't take blkg->stats_lock */
+		blkio_reset_stats_cpu(blkg);
 	}
+
 	spin_unlock_irq(&blkcg->lock);
 	return 0;
 }
@@ -598,6 +667,59 @@
 	return val;
 }
 
+
+static uint64_t blkio_read_stat_cpu(struct blkio_group *blkg,
+			enum stat_type_cpu type, enum stat_sub_type sub_type)
+{
+	int cpu;
+	struct blkio_group_stats_cpu *stats_cpu;
+	u64 val = 0, tval;
+
+	for_each_possible_cpu(cpu) {
+		unsigned int start;
+		stats_cpu  = per_cpu_ptr(blkg->stats_cpu, cpu);
+
+		do {
+			start = u64_stats_fetch_begin(&stats_cpu->syncp);
+			if (type == BLKIO_STAT_CPU_SECTORS)
+				tval = stats_cpu->sectors;
+			else
+				tval = stats_cpu->stat_arr_cpu[type][sub_type];
+		} while(u64_stats_fetch_retry(&stats_cpu->syncp, start));
+
+		val += tval;
+	}
+
+	return val;
+}
+
+static uint64_t blkio_get_stat_cpu(struct blkio_group *blkg,
+		struct cgroup_map_cb *cb, dev_t dev, enum stat_type_cpu type)
+{
+	uint64_t disk_total, val;
+	char key_str[MAX_KEY_LEN];
+	enum stat_sub_type sub_type;
+
+	if (type == BLKIO_STAT_CPU_SECTORS) {
+		val = blkio_read_stat_cpu(blkg, type, 0);
+		return blkio_fill_stat(key_str, MAX_KEY_LEN - 1, val, cb, dev);
+	}
+
+	for (sub_type = BLKIO_STAT_READ; sub_type < BLKIO_STAT_TOTAL;
+			sub_type++) {
+		blkio_get_key_name(sub_type, dev, key_str, MAX_KEY_LEN, false);
+		val = blkio_read_stat_cpu(blkg, type, sub_type);
+		cb->fill(cb, key_str, val);
+	}
+
+	disk_total = blkio_read_stat_cpu(blkg, type, BLKIO_STAT_READ) +
+			blkio_read_stat_cpu(blkg, type, BLKIO_STAT_WRITE);
+
+	blkio_get_key_name(BLKIO_STAT_TOTAL, dev, key_str, MAX_KEY_LEN, false);
+	cb->fill(cb, key_str, disk_total);
+	return disk_total;
+}
+
 /* This should be called with blkg->stats_lock held */
 static uint64_t blkio_get_stat(struct blkio_group *blkg,
 		struct cgroup_map_cb *cb, dev_t dev, enum stat_type type)
@@ -609,9 +731,6 @@
 	if (type == BLKIO_STAT_TIME)
 		return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
 					blkg->stats.time, cb, dev);
-	if (type == BLKIO_STAT_SECTORS)
-		return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
-					blkg->stats.sectors, cb, dev);
 #ifdef CONFIG_DEBUG_BLK_CGROUP
 	if (type == BLKIO_STAT_UNACCOUNTED_TIME)
 		return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
@@ -1075,8 +1194,8 @@
 }
 
 static int blkio_read_blkg_stats(struct blkio_cgroup *blkcg,
-		struct cftype *cft, struct cgroup_map_cb *cb, enum stat_type type,
-		bool show_total)
+		struct cftype *cft, struct cgroup_map_cb *cb,
+		enum stat_type type, bool show_total, bool pcpu)
 {
 	struct blkio_group *blkg;
 	struct hlist_node *n;
@@ -1087,10 +1206,15 @@
 		if (blkg->dev) {
 			if (!cftype_blkg_same_policy(cft, blkg))
 				continue;
-			spin_lock_irq(&blkg->stats_lock);
-			cgroup_total += blkio_get_stat(blkg, cb, blkg->dev,
-						type);
-			spin_unlock_irq(&blkg->stats_lock);
+			if (pcpu)
+				cgroup_total += blkio_get_stat_cpu(blkg, cb,
+						blkg->dev, type);
+			else {
+				spin_lock_irq(&blkg->stats_lock);
+				cgroup_total += blkio_get_stat(blkg, cb,
+						blkg->dev, type);
+				spin_unlock_irq(&blkg->stats_lock);
+			}
 		}
 	}
 	if (show_total)
@@ -1114,47 +1238,47 @@
 		switch(name) {
 		case BLKIO_PROP_time:
 			return blkio_read_blkg_stats(blkcg, cft, cb,
-						BLKIO_STAT_TIME, 0);
+						BLKIO_STAT_TIME, 0, 0);
 		case BLKIO_PROP_sectors:
 			return blkio_read_blkg_stats(blkcg, cft, cb,
-						BLKIO_STAT_SECTORS, 0);
+						BLKIO_STAT_CPU_SECTORS, 0, 1);
 		case BLKIO_PROP_io_service_bytes:
 			return blkio_read_blkg_stats(blkcg, cft, cb,
-						BLKIO_STAT_SERVICE_BYTES, 1);
+					BLKIO_STAT_CPU_SERVICE_BYTES, 1, 1);
 		case BLKIO_PROP_io_serviced:
 			return blkio_read_blkg_stats(blkcg, cft, cb,
-						BLKIO_STAT_SERVICED, 1);
+						BLKIO_STAT_CPU_SERVICED, 1, 1);
 		case BLKIO_PROP_io_service_time:
 			return blkio_read_blkg_stats(blkcg, cft, cb,
-						BLKIO_STAT_SERVICE_TIME, 1);
+						BLKIO_STAT_SERVICE_TIME, 1, 0);
 		case BLKIO_PROP_io_wait_time:
 			return blkio_read_blkg_stats(blkcg, cft, cb,
-						BLKIO_STAT_WAIT_TIME, 1);
+						BLKIO_STAT_WAIT_TIME, 1, 0);
 		case BLKIO_PROP_io_merged:
 			return blkio_read_blkg_stats(blkcg, cft, cb,
-						BLKIO_STAT_MERGED, 1);
+						BLKIO_STAT_CPU_MERGED, 1, 1);
 		case BLKIO_PROP_io_queued:
 			return blkio_read_blkg_stats(blkcg, cft, cb,
-						BLKIO_STAT_QUEUED, 1);
+						BLKIO_STAT_QUEUED, 1, 0);
 #ifdef CONFIG_DEBUG_BLK_CGROUP
 		case BLKIO_PROP_unaccounted_time:
 			return blkio_read_blkg_stats(blkcg, cft, cb,
-						BLKIO_STAT_UNACCOUNTED_TIME, 0);
+					BLKIO_STAT_UNACCOUNTED_TIME, 0, 0);
 		case BLKIO_PROP_dequeue:
 			return blkio_read_blkg_stats(blkcg, cft, cb,
-						BLKIO_STAT_DEQUEUE, 0);
+						BLKIO_STAT_DEQUEUE, 0, 0);
 		case BLKIO_PROP_avg_queue_size:
 			return blkio_read_blkg_stats(blkcg, cft, cb,
-						BLKIO_STAT_AVG_QUEUE_SIZE, 0);
+					BLKIO_STAT_AVG_QUEUE_SIZE, 0, 0);
 		case BLKIO_PROP_group_wait_time:
 			return blkio_read_blkg_stats(blkcg, cft, cb,
-						BLKIO_STAT_GROUP_WAIT_TIME, 0);
+					BLKIO_STAT_GROUP_WAIT_TIME, 0, 0);
 		case BLKIO_PROP_idle_time:
 			return blkio_read_blkg_stats(blkcg, cft, cb,
-						BLKIO_STAT_IDLE_TIME, 0);
+						BLKIO_STAT_IDLE_TIME, 0, 0);
 		case BLKIO_PROP_empty_time:
 			return blkio_read_blkg_stats(blkcg, cft, cb,
-						BLKIO_STAT_EMPTY_TIME, 0);
+						BLKIO_STAT_EMPTY_TIME, 0, 0);
 #endif
 		default:
 			BUG();
@@ -1164,10 +1288,10 @@
 		switch(name){
 		case BLKIO_THROTL_io_service_bytes:
 			return blkio_read_blkg_stats(blkcg, cft, cb,
-						BLKIO_STAT_SERVICE_BYTES, 1);
+						BLKIO_STAT_CPU_SERVICE_BYTES, 1, 1);
 		case BLKIO_THROTL_io_serviced:
 			return blkio_read_blkg_stats(blkcg, cft, cb,
-						BLKIO_STAT_SERVICED, 1);
+						BLKIO_STAT_CPU_SERVICED, 1, 1);
 		default:
 			BUG();
 		}
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index c774930..a71d290 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -14,6 +14,7 @@
  */
 
 #include <linux/cgroup.h>
+#include <linux/u64_stats_sync.h>
 
 enum blkio_policy_id {
 	BLKIO_POLICY_PROP = 0,		/* Proportional Bandwidth division */
@@ -36,22 +37,15 @@
 	 * request completion for IOs doen by this cgroup. This may not be
 	 * accurate when NCQ is turned on. */
 	BLKIO_STAT_SERVICE_TIME = 0,
-	/* Total bytes transferred */
-	BLKIO_STAT_SERVICE_BYTES,
-	/* Total IOs serviced, post merge */
-	BLKIO_STAT_SERVICED,
 	/* Total time spent waiting in scheduler queue in ns */
 	BLKIO_STAT_WAIT_TIME,
-	/* Number of IOs merged */
-	BLKIO_STAT_MERGED,
 	/* Number of IOs queued up */
 	BLKIO_STAT_QUEUED,
 	/* All the single valued stats go below this */
 	BLKIO_STAT_TIME,
-	BLKIO_STAT_SECTORS,
+#ifdef CONFIG_DEBUG_BLK_CGROUP
 	/* Time not charged to this cgroup */
 	BLKIO_STAT_UNACCOUNTED_TIME,
-#ifdef CONFIG_DEBUG_BLK_CGROUP
 	BLKIO_STAT_AVG_QUEUE_SIZE,
 	BLKIO_STAT_IDLE_TIME,
 	BLKIO_STAT_EMPTY_TIME,
@@ -60,6 +54,18 @@
 #endif
 };
 
+/* Per cpu stats */
+enum stat_type_cpu {
+	BLKIO_STAT_CPU_SECTORS,
+	/* Total bytes transferred */
+	BLKIO_STAT_CPU_SERVICE_BYTES,
+	/* Total IOs serviced, post merge */
+	BLKIO_STAT_CPU_SERVICED,
+	/* Number of IOs merged */
+	BLKIO_STAT_CPU_MERGED,
+	BLKIO_STAT_CPU_NR
+};
+
 enum stat_sub_type {
 	BLKIO_STAT_READ = 0,
 	BLKIO_STAT_WRITE,
@@ -116,11 +122,11 @@
 struct blkio_group_stats {
 	/* total disk time and nr sectors dispatched by this group */
 	uint64_t time;
-	uint64_t sectors;
-	/* Time not charged to this cgroup */
-	uint64_t unaccounted_time;
 	uint64_t stat_arr[BLKIO_STAT_QUEUED + 1][BLKIO_STAT_TOTAL];
 #ifdef CONFIG_DEBUG_BLK_CGROUP
+	/* Time not charged to this cgroup */
+	uint64_t unaccounted_time;
+
 	/* Sum of number of IOs queued across all samples */
 	uint64_t avg_queue_size_sum;
 	/* Count of samples taken for average */
@@ -145,6 +151,13 @@
 #endif
 };
 
+/* Per cpu blkio group stats */
+struct blkio_group_stats_cpu {
+	uint64_t sectors;
+	uint64_t stat_arr_cpu[BLKIO_STAT_CPU_NR][BLKIO_STAT_TOTAL];
+	struct u64_stats_sync syncp;
+};
+
 struct blkio_group {
 	/* An rcu protected unique identifier for the group */
 	void *key;
@@ -160,6 +173,8 @@
 	/* Need to serialize the stats in the case of reset/update */
 	spinlock_t stats_lock;
 	struct blkio_group_stats stats;
+	/* Per cpu stats pointer */
+	struct blkio_group_stats_cpu __percpu *stats_cpu;
 };
 
 struct blkio_policy_node {
@@ -295,6 +310,7 @@
 extern void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg,
 	struct blkio_group *blkg, void *key, dev_t dev,
 	enum blkio_policy_id plid);
+extern int blkio_alloc_blkg_stats(struct blkio_group *blkg);
 extern int blkiocg_del_blkio_group(struct blkio_group *blkg);
 extern struct blkio_group *blkiocg_lookup_group(struct blkio_cgroup *blkcg,
 						void *key);
@@ -322,6 +338,8 @@
 		struct blkio_group *blkg, void *key, dev_t dev,
 		enum blkio_policy_id plid) {}
 
+static inline int blkio_alloc_blkg_stats(struct blkio_group *blkg) { return 0; }
+
 static inline int
 blkiocg_del_blkio_group(struct blkio_group *blkg) { return 0; }
 
diff --git a/block/blk-core.c b/block/blk-core.c
index 3fe00a1..c8303e9 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -569,8 +569,6 @@
 
 static inline void blk_free_request(struct request_queue *q, struct request *rq)
 {
-	BUG_ON(rq->cmd_flags & REQ_ON_PLUG);
-
 	if (rq->cmd_flags & REQ_ELVPRIV)
 		elv_put_request(q, rq);
 	mempool_free(rq, q->rq.rq_pool);
@@ -1110,14 +1108,6 @@
 {
 	const int ff = bio->bi_rw & REQ_FAILFAST_MASK;
 
-	/*
-	 * Debug stuff, kill later
-	 */
-	if (!rq_mergeable(req)) {
-		blk_dump_rq_flags(req, "back");
-		return false;
-	}
-
 	if (!ll_back_merge_fn(q, req, bio))
 		return false;
 
@@ -1132,6 +1122,7 @@
 	req->ioprio = ioprio_best(req->ioprio, bio_prio(bio));
 
 	drive_stat_acct(req, 0);
+	elv_bio_merged(q, req, bio);
 	return true;
 }
 
@@ -1141,14 +1132,6 @@
 	const int ff = bio->bi_rw & REQ_FAILFAST_MASK;
 	sector_t sector;
 
-	/*
-	 * Debug stuff, kill later
-	 */
-	if (!rq_mergeable(req)) {
-		blk_dump_rq_flags(req, "front");
-		return false;
-	}
-
 	if (!ll_front_merge_fn(q, req, bio))
 		return false;
 
@@ -1173,6 +1156,7 @@
 	req->ioprio = ioprio_best(req->ioprio, bio_prio(bio));
 
 	drive_stat_acct(req, 0);
+	elv_bio_merged(q, req, bio);
 	return true;
 }
 
@@ -1258,14 +1242,12 @@
 
 	el_ret = elv_merge(q, &req, bio);
 	if (el_ret == ELEVATOR_BACK_MERGE) {
-		BUG_ON(req->cmd_flags & REQ_ON_PLUG);
 		if (bio_attempt_back_merge(q, req, bio)) {
 			if (!attempt_back_merge(q, req))
 				elv_merged_request(q, req, el_ret);
 			goto out_unlock;
 		}
 	} else if (el_ret == ELEVATOR_FRONT_MERGE) {
-		BUG_ON(req->cmd_flags & REQ_ON_PLUG);
 		if (bio_attempt_front_merge(q, req, bio)) {
 			if (!attempt_front_merge(q, req))
 				elv_merged_request(q, req, el_ret);
@@ -1320,10 +1302,6 @@
 			if (__rq->q != q)
 				plug->should_sort = 1;
 		}
-		/*
-		 * Debug flag, kill later
-		 */
-		req->cmd_flags |= REQ_ON_PLUG;
 		list_add_tail(&req->queuelist, &plug->list);
 		drive_stat_acct(req, 1);
 	} else {
@@ -1550,7 +1528,8 @@
 			goto end_io;
 		}
 
-		blk_throtl_bio(q, &bio);
+		if (blk_throtl_bio(q, &bio))
+			goto end_io;
 
 		/*
 		 * If bio = NULL, bio has been throttled and will be submitted
@@ -2748,7 +2727,6 @@
 	while (!list_empty(&list)) {
 		rq = list_entry_rq(list.next);
 		list_del_init(&rq->queuelist);
-		BUG_ON(!(rq->cmd_flags & REQ_ON_PLUG));
 		BUG_ON(!rq->q);
 		if (rq->q != q) {
 			/*
@@ -2760,8 +2738,6 @@
 			depth = 0;
 			spin_lock(q->queue_lock);
 		}
-		rq->cmd_flags &= ~REQ_ON_PLUG;
-
 		/*
 		 * rq is already accounted, so use raw insert
 		 */
diff --git a/block/blk-exec.c b/block/blk-exec.c
index 81e3181..8a0e7ec 100644
--- a/block/blk-exec.c
+++ b/block/blk-exec.c
@@ -56,7 +56,7 @@
 	spin_lock_irq(q->queue_lock);
 	__elv_add_request(q, rq, where);
 	__blk_run_queue(q);
-	/* the queue is stopped so it won't be plugged+unplugged */
+	/* the queue is stopped so it won't be run */
 	if (rq->cmd_type == REQ_TYPE_PM_RESUME)
 		q->request_fn(q);
 	spin_unlock_irq(q->queue_lock);
diff --git a/block/blk-flush.c b/block/blk-flush.c
index 6c9b5e1..bb21e4c 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -212,13 +212,19 @@
 	}
 
 	/*
-	 * Moving a request silently to empty queue_head may stall the
-	 * queue.  Kick the queue in those cases.  This function is called
-	 * from request completion path and calling directly into
-	 * request_fn may confuse the driver.  Always use kblockd.
+	 * Kick the queue to avoid stall for two cases:
+	 * 1. Moving a request silently to empty queue_head may stall the
+	 * queue.
+	 * 2. When flush request is running in non-queueable queue, the
+	 * queue is hold. Restart the queue after flush request is finished
+	 * to avoid stall.
+	 * This function is called from request completion path and calling
+	 * directly into request_fn may confuse the driver.  Always use
+	 * kblockd.
 	 */
-	if (queued)
+	if (queued || q->flush_queue_delayed)
 		blk_run_queue_async(q);
+	q->flush_queue_delayed = 0;
 }
 
 /**
diff --git a/block/blk-ioc.c b/block/blk-ioc.c
index b791022..c898049 100644
--- a/block/blk-ioc.c
+++ b/block/blk-ioc.c
@@ -96,6 +96,9 @@
 		INIT_RADIX_TREE(&ret->radix_root, GFP_ATOMIC | __GFP_HIGH);
 		INIT_HLIST_HEAD(&ret->cic_list);
 		ret->ioc_data = NULL;
+#if defined(CONFIG_BLK_CGROUP) || defined(CONFIG_BLK_CGROUP_MODULE)
+		ret->cgroup_changed = 0;
+#endif
 	}
 
 	return ret;
diff --git a/block/blk-lib.c b/block/blk-lib.c
index 25de73e..78e627e 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -9,17 +9,20 @@
 
 #include "blk.h"
 
-static void blkdev_discard_end_io(struct bio *bio, int err)
+struct bio_batch {
+	atomic_t		done;
+	unsigned long		flags;
+	struct completion	*wait;
+};
+
+static void bio_batch_end_io(struct bio *bio, int err)
 {
-	if (err) {
-		if (err == -EOPNOTSUPP)
-			set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
-		clear_bit(BIO_UPTODATE, &bio->bi_flags);
-	}
+	struct bio_batch *bb = bio->bi_private;
 
-	if (bio->bi_private)
-		complete(bio->bi_private);
-
+	if (err && (err != -EOPNOTSUPP))
+		clear_bit(BIO_UPTODATE, &bb->flags);
+	if (atomic_dec_and_test(&bb->done))
+		complete(bb->wait);
 	bio_put(bio);
 }
 
@@ -41,6 +44,7 @@
 	struct request_queue *q = bdev_get_queue(bdev);
 	int type = REQ_WRITE | REQ_DISCARD;
 	unsigned int max_discard_sectors;
+	struct bio_batch bb;
 	struct bio *bio;
 	int ret = 0;
 
@@ -67,7 +71,11 @@
 		type |= REQ_SECURE;
 	}
 
-	while (nr_sects && !ret) {
+	atomic_set(&bb.done, 1);
+	bb.flags = 1 << BIO_UPTODATE;
+	bb.wait = &wait;
+
+	while (nr_sects) {
 		bio = bio_alloc(gfp_mask, 1);
 		if (!bio) {
 			ret = -ENOMEM;
@@ -75,9 +83,9 @@
 		}
 
 		bio->bi_sector = sector;
-		bio->bi_end_io = blkdev_discard_end_io;
+		bio->bi_end_io = bio_batch_end_io;
 		bio->bi_bdev = bdev;
-		bio->bi_private = &wait;
+		bio->bi_private = &bb;
 
 		if (nr_sects > max_discard_sectors) {
 			bio->bi_size = max_discard_sectors << 9;
@@ -88,45 +96,21 @@
 			nr_sects = 0;
 		}
 
-		bio_get(bio);
+		atomic_inc(&bb.done);
 		submit_bio(type, bio);
+	}
 
+	/* Wait for bios in-flight */
+	if (!atomic_dec_and_test(&bb.done))
 		wait_for_completion(&wait);
 
-		if (bio_flagged(bio, BIO_EOPNOTSUPP))
-			ret = -EOPNOTSUPP;
-		else if (!bio_flagged(bio, BIO_UPTODATE))
-			ret = -EIO;
-		bio_put(bio);
-	}
+	if (!test_bit(BIO_UPTODATE, &bb.flags))
+		ret = -EIO;
 
 	return ret;
 }
 EXPORT_SYMBOL(blkdev_issue_discard);
 
-struct bio_batch
-{
-	atomic_t 		done;
-	unsigned long 		flags;
-	struct completion 	*wait;
-};
-
-static void bio_batch_end_io(struct bio *bio, int err)
-{
-	struct bio_batch *bb = bio->bi_private;
-
-	if (err) {
-		if (err == -EOPNOTSUPP)
-			set_bit(BIO_EOPNOTSUPP, &bb->flags);
-		else
-			clear_bit(BIO_UPTODATE, &bb->flags);
-	}
-	if (bb)
-		if (atomic_dec_and_test(&bb->done))
-			complete(bb->wait);
-	bio_put(bio);
-}
-
 /**
  * blkdev_issue_zeroout - generate number of zero filed write bios
  * @bdev:	blockdev to issue
@@ -151,7 +135,6 @@
 	bb.flags = 1 << BIO_UPTODATE;
 	bb.wait = &wait;
 
-submit:
 	ret = 0;
 	while (nr_sects != 0) {
 		bio = bio_alloc(gfp_mask,
@@ -168,9 +151,6 @@
 
 		while (nr_sects != 0) {
 			sz = min((sector_t) PAGE_SIZE >> 9 , nr_sects);
-			if (sz == 0)
-				/* bio has maximum size possible */
-				break;
 			ret = bio_add_page(bio, ZERO_PAGE(0), sz << 9, 0);
 			nr_sects -= ret >> 9;
 			sector += ret >> 9;
@@ -190,16 +170,6 @@
 		/* One of bios in the batch was completed with error.*/
 		ret = -EIO;
 
-	if (ret)
-		goto out;
-
-	if (test_bit(BIO_EOPNOTSUPP, &bb.flags)) {
-		ret = -EOPNOTSUPP;
-		goto out;
-	}
-	if (nr_sects != 0)
-		goto submit;
-out:
 	return ret;
 }
 EXPORT_SYMBOL(blkdev_issue_zeroout);
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 1fa7692..fa1eb04 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -120,7 +120,7 @@
 	lim->discard_granularity = 0;
 	lim->discard_alignment = 0;
 	lim->discard_misaligned = 0;
-	lim->discard_zeroes_data = -1;
+	lim->discard_zeroes_data = 1;
 	lim->logical_block_size = lim->physical_block_size = lim->io_min = 512;
 	lim->bounce_pfn = (unsigned long)(BLK_BOUNCE_ANY >> PAGE_SHIFT);
 	lim->alignment_offset = 0;
@@ -166,6 +166,7 @@
 
 	blk_set_default_limits(&q->limits);
 	blk_queue_max_hw_sectors(q, BLK_SAFE_MAX_SECTORS);
+	q->limits.discard_zeroes_data = 0;
 
 	/*
 	 * by default assume old behaviour and bounce for any highmem page
@@ -790,6 +791,12 @@
 }
 EXPORT_SYMBOL_GPL(blk_queue_flush);
 
+void blk_queue_flush_queueable(struct request_queue *q, bool queueable)
+{
+	q->flush_not_queueable = !queueable;
+}
+EXPORT_SYMBOL_GPL(blk_queue_flush_queueable);
+
 static int __init blk_settings_init(void)
 {
 	blk_max_low_pfn = max_low_pfn - 1;
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index bd23631..d935bd8 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -152,7 +152,8 @@
 
 static ssize_t queue_discard_max_show(struct request_queue *q, char *page)
 {
-	return queue_var_show(q->limits.max_discard_sectors << 9, page);
+	return sprintf(page, "%llu\n",
+		       (unsigned long long)q->limits.max_discard_sectors << 9);
 }
 
 static ssize_t queue_discard_zeroes_data_show(struct request_queue *q, char *page)
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 252a81a..a62be8d0 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -78,6 +78,8 @@
 
 	/* Some throttle limits got updated for the group */
 	int limits_changed;
+
+	struct rcu_head rcu_head;
 };
 
 struct throtl_data
@@ -88,7 +90,7 @@
 	/* service tree for active throtl groups */
 	struct throtl_rb_root tg_service_tree;
 
-	struct throtl_grp root_tg;
+	struct throtl_grp *root_tg;
 	struct request_queue *queue;
 
 	/* Total Number of queued bios on READ and WRITE lists */
@@ -151,56 +153,44 @@
 	return tg;
 }
 
+static void throtl_free_tg(struct rcu_head *head)
+{
+	struct throtl_grp *tg;
+
+	tg = container_of(head, struct throtl_grp, rcu_head);
+	free_percpu(tg->blkg.stats_cpu);
+	kfree(tg);
+}
+
 static void throtl_put_tg(struct throtl_grp *tg)
 {
 	BUG_ON(atomic_read(&tg->ref) <= 0);
 	if (!atomic_dec_and_test(&tg->ref))
 		return;
-	kfree(tg);
+
+	/*
+	 * A group is freed in rcu manner. But having an rcu lock does not
+	 * mean that one can access all the fields of blkg and assume these
+	 * are valid. For example, don't try to follow throtl_data and
+	 * request queue links.
+	 *
+	 * Having a reference to blkg under an rcu allows acess to only
+	 * values local to groups like group stats and group rate limits
+	 */
+	call_rcu(&tg->rcu_head, throtl_free_tg);
 }
 
-static struct throtl_grp * throtl_find_alloc_tg(struct throtl_data *td,
-			struct blkio_cgroup *blkcg)
+static void throtl_init_group(struct throtl_grp *tg)
 {
-	struct throtl_grp *tg = NULL;
-	void *key = td;
-	struct backing_dev_info *bdi = &td->queue->backing_dev_info;
-	unsigned int major, minor;
-
-	/*
-	 * TODO: Speed up blkiocg_lookup_group() by maintaining a radix
-	 * tree of blkg (instead of traversing through hash list all
-	 * the time.
-	 */
-
-	/*
-	 * This is the common case when there are no blkio cgroups.
- 	 * Avoid lookup in this case
- 	 */
-	if (blkcg == &blkio_root_cgroup)
-		tg = &td->root_tg;
-	else
-		tg = tg_of_blkg(blkiocg_lookup_group(blkcg, key));
-
-	/* Fill in device details for root group */
-	if (tg && !tg->blkg.dev && bdi->dev && dev_name(bdi->dev)) {
-		sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor);
-		tg->blkg.dev = MKDEV(major, minor);
-		goto done;
-	}
-
-	if (tg)
-		goto done;
-
-	tg = kzalloc_node(sizeof(*tg), GFP_ATOMIC, td->queue->node);
-	if (!tg)
-		goto done;
-
 	INIT_HLIST_NODE(&tg->tg_node);
 	RB_CLEAR_NODE(&tg->rb_node);
 	bio_list_init(&tg->bio_lists[0]);
 	bio_list_init(&tg->bio_lists[1]);
-	td->limits_changed = false;
+	tg->limits_changed = false;
+
+	/* Practically unlimited BW */
+	tg->bps[0] = tg->bps[1] = -1;
+	tg->iops[0] = tg->iops[1] = -1;
 
 	/*
 	 * Take the initial reference that will be released on destroy
@@ -209,33 +199,181 @@
 	 * exit or cgroup deletion path depending on who is exiting first.
 	 */
 	atomic_set(&tg->ref, 1);
+}
+
+/* Should be called with rcu read lock held (needed for blkcg) */
+static void
+throtl_add_group_to_td_list(struct throtl_data *td, struct throtl_grp *tg)
+{
+	hlist_add_head(&tg->tg_node, &td->tg_list);
+	td->nr_undestroyed_grps++;
+}
+
+static void
+__throtl_tg_fill_dev_details(struct throtl_data *td, struct throtl_grp *tg)
+{
+	struct backing_dev_info *bdi = &td->queue->backing_dev_info;
+	unsigned int major, minor;
+
+	if (!tg || tg->blkg.dev)
+		return;
+
+	/*
+	 * Fill in device details for a group which might not have been
+	 * filled at group creation time as queue was being instantiated
+	 * and driver had not attached a device yet
+	 */
+	if (bdi->dev && dev_name(bdi->dev)) {
+		sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor);
+		tg->blkg.dev = MKDEV(major, minor);
+	}
+}
+
+/*
+ * Should be called with without queue lock held. Here queue lock will be
+ * taken rarely. It will be taken only once during life time of a group
+ * if need be
+ */
+static void
+throtl_tg_fill_dev_details(struct throtl_data *td, struct throtl_grp *tg)
+{
+	if (!tg || tg->blkg.dev)
+		return;
+
+	spin_lock_irq(td->queue->queue_lock);
+	__throtl_tg_fill_dev_details(td, tg);
+	spin_unlock_irq(td->queue->queue_lock);
+}
+
+static void throtl_init_add_tg_lists(struct throtl_data *td,
+			struct throtl_grp *tg, struct blkio_cgroup *blkcg)
+{
+	__throtl_tg_fill_dev_details(td, tg);
 
 	/* Add group onto cgroup list */
-	sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor);
 	blkiocg_add_blkio_group(blkcg, &tg->blkg, (void *)td,
-				MKDEV(major, minor), BLKIO_POLICY_THROTL);
+				tg->blkg.dev, BLKIO_POLICY_THROTL);
 
 	tg->bps[READ] = blkcg_get_read_bps(blkcg, tg->blkg.dev);
 	tg->bps[WRITE] = blkcg_get_write_bps(blkcg, tg->blkg.dev);
 	tg->iops[READ] = blkcg_get_read_iops(blkcg, tg->blkg.dev);
 	tg->iops[WRITE] = blkcg_get_write_iops(blkcg, tg->blkg.dev);
 
-	hlist_add_head(&tg->tg_node, &td->tg_list);
-	td->nr_undestroyed_grps++;
-done:
+	throtl_add_group_to_td_list(td, tg);
+}
+
+/* Should be called without queue lock and outside of rcu period */
+static struct throtl_grp *throtl_alloc_tg(struct throtl_data *td)
+{
+	struct throtl_grp *tg = NULL;
+	int ret;
+
+	tg = kzalloc_node(sizeof(*tg), GFP_ATOMIC, td->queue->node);
+	if (!tg)
+		return NULL;
+
+	ret = blkio_alloc_blkg_stats(&tg->blkg);
+
+	if (ret) {
+		kfree(tg);
+		return NULL;
+	}
+
+	throtl_init_group(tg);
 	return tg;
 }
 
-static struct throtl_grp * throtl_get_tg(struct throtl_data *td)
+static struct
+throtl_grp *throtl_find_tg(struct throtl_data *td, struct blkio_cgroup *blkcg)
 {
 	struct throtl_grp *tg = NULL;
+	void *key = td;
+
+	/*
+	 * This is the common case when there are no blkio cgroups.
+ 	 * Avoid lookup in this case
+ 	 */
+	if (blkcg == &blkio_root_cgroup)
+		tg = td->root_tg;
+	else
+		tg = tg_of_blkg(blkiocg_lookup_group(blkcg, key));
+
+	__throtl_tg_fill_dev_details(td, tg);
+	return tg;
+}
+
+/*
+ * This function returns with queue lock unlocked in case of error, like
+ * request queue is no more
+ */
+static struct throtl_grp * throtl_get_tg(struct throtl_data *td)
+{
+	struct throtl_grp *tg = NULL, *__tg = NULL;
 	struct blkio_cgroup *blkcg;
+	struct request_queue *q = td->queue;
 
 	rcu_read_lock();
 	blkcg = task_blkio_cgroup(current);
-	tg = throtl_find_alloc_tg(td, blkcg);
-	if (!tg)
-		tg = &td->root_tg;
+	tg = throtl_find_tg(td, blkcg);
+	if (tg) {
+		rcu_read_unlock();
+		return tg;
+	}
+
+	/*
+	 * Need to allocate a group. Allocation of group also needs allocation
+	 * of per cpu stats which in-turn takes a mutex() and can block. Hence
+	 * we need to drop rcu lock and queue_lock before we call alloc
+	 *
+	 * Take the request queue reference to make sure queue does not
+	 * go away once we return from allocation.
+	 */
+	blk_get_queue(q);
+	rcu_read_unlock();
+	spin_unlock_irq(q->queue_lock);
+
+	tg = throtl_alloc_tg(td);
+	/*
+	 * We might have slept in group allocation. Make sure queue is not
+	 * dead
+	 */
+	if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) {
+		blk_put_queue(q);
+		if (tg)
+			kfree(tg);
+
+		return ERR_PTR(-ENODEV);
+	}
+	blk_put_queue(q);
+
+	/* Group allocated and queue is still alive. take the lock */
+	spin_lock_irq(q->queue_lock);
+
+	/*
+	 * Initialize the new group. After sleeping, read the blkcg again.
+	 */
+	rcu_read_lock();
+	blkcg = task_blkio_cgroup(current);
+
+	/*
+	 * If some other thread already allocated the group while we were
+	 * not holding queue lock, free up the group
+	 */
+	__tg = throtl_find_tg(td, blkcg);
+
+	if (__tg) {
+		kfree(tg);
+		rcu_read_unlock();
+		return __tg;
+	}
+
+	/* Group allocation failed. Account the IO to root group */
+	if (!tg) {
+		tg = td->root_tg;
+		return tg;
+	}
+
+	throtl_init_add_tg_lists(td, tg, blkcg);
 	rcu_read_unlock();
 	return tg;
 }
@@ -544,6 +682,12 @@
 	return 0;
 }
 
+static bool tg_no_rule_group(struct throtl_grp *tg, bool rw) {
+	if (tg->bps[rw] == -1 && tg->iops[rw] == -1)
+		return 1;
+	return 0;
+}
+
 /*
  * Returns whether one can dispatch a bio or not. Also returns approx number
  * of jiffies to wait before this bio is with-in IO rate and can be dispatched
@@ -608,10 +752,6 @@
 	tg->bytes_disp[rw] += bio->bi_size;
 	tg->io_disp[rw]++;
 
-	/*
-	 * TODO: This will take blkg->stats_lock. Figure out a way
-	 * to avoid this cost.
-	 */
 	blkiocg_update_dispatch_stats(&tg->blkg, bio->bi_size, rw, sync);
 }
 
@@ -989,15 +1129,51 @@
 	struct throtl_grp *tg;
 	struct bio *bio = *biop;
 	bool rw = bio_data_dir(bio), update_disptime = true;
+	struct blkio_cgroup *blkcg;
 
 	if (bio->bi_rw & REQ_THROTTLED) {
 		bio->bi_rw &= ~REQ_THROTTLED;
 		return 0;
 	}
 
+	/*
+	 * A throtl_grp pointer retrieved under rcu can be used to access
+	 * basic fields like stats and io rates. If a group has no rules,
+	 * just update the dispatch stats in lockless manner and return.
+	 */
+
+	rcu_read_lock();
+	blkcg = task_blkio_cgroup(current);
+	tg = throtl_find_tg(td, blkcg);
+	if (tg) {
+		throtl_tg_fill_dev_details(td, tg);
+
+		if (tg_no_rule_group(tg, rw)) {
+			blkiocg_update_dispatch_stats(&tg->blkg, bio->bi_size,
+					rw, bio->bi_rw & REQ_SYNC);
+			rcu_read_unlock();
+			return 0;
+		}
+	}
+	rcu_read_unlock();
+
+	/*
+	 * Either group has not been allocated yet or it is not an unlimited
+	 * IO group
+	 */
+
 	spin_lock_irq(q->queue_lock);
 	tg = throtl_get_tg(td);
 
+	if (IS_ERR(tg)) {
+		if (PTR_ERR(tg)	== -ENODEV) {
+			/*
+			 * Queue is gone. No queue lock held here.
+			 */
+			return -ENODEV;
+		}
+	}
+
 	if (tg->nr_queued[rw]) {
 		/*
 		 * There is already another bio queued in same dir. No
@@ -1060,39 +1236,24 @@
 	INIT_HLIST_HEAD(&td->tg_list);
 	td->tg_service_tree = THROTL_RB_ROOT;
 	td->limits_changed = false;
-
-	/* Init root group */
-	tg = &td->root_tg;
-	INIT_HLIST_NODE(&tg->tg_node);
-	RB_CLEAR_NODE(&tg->rb_node);
-	bio_list_init(&tg->bio_lists[0]);
-	bio_list_init(&tg->bio_lists[1]);
-
-	/* Practically unlimited BW */
-	tg->bps[0] = tg->bps[1] = -1;
-	tg->iops[0] = tg->iops[1] = -1;
-	td->limits_changed = false;
-
-	/*
-	 * Set root group reference to 2. One reference will be dropped when
-	 * all groups on tg_list are being deleted during queue exit. Other
-	 * reference will remain there as we don't want to delete this group
-	 * as it is statically allocated and gets destroyed when throtl_data
-	 * goes away.
-	 */
-	atomic_set(&tg->ref, 2);
-	hlist_add_head(&tg->tg_node, &td->tg_list);
-	td->nr_undestroyed_grps++;
-
 	INIT_DELAYED_WORK(&td->throtl_work, blk_throtl_work);
 
+	/* alloc and Init root group. */
+	td->queue = q;
+	tg = throtl_alloc_tg(td);
+
+	if (!tg) {
+		kfree(td);
+		return -ENOMEM;
+	}
+
+	td->root_tg = tg;
+
 	rcu_read_lock();
-	blkiocg_add_blkio_group(&blkio_root_cgroup, &tg->blkg, (void *)td,
-					0, BLKIO_POLICY_THROTL);
+	throtl_init_add_tg_lists(td, tg, &blkio_root_cgroup);
 	rcu_read_unlock();
 
 	/* Attach throtl data to request queue */
-	td->queue = q;
 	q->td = td;
 	return 0;
 }
diff --git a/block/blk.h b/block/blk.h
index 6126346..d658628 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -62,7 +62,28 @@
 			return rq;
 		}
 
-		if (!q->elevator->ops->elevator_dispatch_fn(q, 0))
+		/*
+		 * Flush request is running and flush request isn't queueable
+		 * in the drive, we can hold the queue till flush request is
+		 * finished. Even we don't do this, driver can't dispatch next
+		 * requests and will requeue them. And this can improve
+		 * throughput too. For example, we have request flush1, write1,
+		 * flush 2. flush1 is dispatched, then queue is hold, write1
+		 * isn't inserted to queue. After flush1 is finished, flush2
+		 * will be dispatched. Since disk cache is already clean,
+		 * flush2 will be finished very soon, so looks like flush2 is
+		 * folded to flush1.
+		 * Since the queue is hold, a flag is set to indicate the queue
+		 * should be restarted later. Please see flush_end_io() for
+		 * details.
+		 */
+		if (q->flush_pending_idx != q->flush_running_idx &&
+				!queue_flush_queueable(q)) {
+			q->flush_queue_delayed = 1;
+			return NULL;
+		}
+		if (test_bit(QUEUE_FLAG_DEAD, &q->queue_flags) ||
+		    !q->elevator->ops->elevator_dispatch_fn(q, 0))
 			return NULL;
 	}
 }
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index ab7a9e6..7c52d68 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -300,7 +300,9 @@
 
 	/* List of cfq groups being managed on this device*/
 	struct hlist_head cfqg_list;
-	struct rcu_head rcu;
+
+	/* Number of groups which are on blkcg->blkg_list */
+	unsigned int nr_blkcg_linked_grps;
 };
 
 static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd);
@@ -665,15 +667,11 @@
 	if (rq2 == NULL)
 		return rq1;
 
-	if (rq_is_sync(rq1) && !rq_is_sync(rq2))
-		return rq1;
-	else if (rq_is_sync(rq2) && !rq_is_sync(rq1))
-		return rq2;
-	if ((rq1->cmd_flags & REQ_META) && !(rq2->cmd_flags & REQ_META))
-		return rq1;
-	else if ((rq2->cmd_flags & REQ_META) &&
-		 !(rq1->cmd_flags & REQ_META))
-		return rq2;
+	if (rq_is_sync(rq1) != rq_is_sync(rq2))
+		return rq_is_sync(rq1) ? rq1 : rq2;
+
+	if ((rq1->cmd_flags ^ rq2->cmd_flags) & REQ_META)
+		return rq1->cmd_flags & REQ_META ? rq1 : rq2;
 
 	s1 = blk_rq_pos(rq1);
 	s2 = blk_rq_pos(rq2);
@@ -1014,28 +1012,47 @@
 	cfqg->needs_update = true;
 }
 
-static struct cfq_group * cfq_find_alloc_cfqg(struct cfq_data *cfqd,
-		struct blkio_cgroup *blkcg, int create)
+static void cfq_init_add_cfqg_lists(struct cfq_data *cfqd,
+			struct cfq_group *cfqg, struct blkio_cgroup *blkcg)
 {
-	struct cfq_group *cfqg = NULL;
-	void *key = cfqd;
-	int i, j;
-	struct cfq_rb_root *st;
 	struct backing_dev_info *bdi = &cfqd->queue->backing_dev_info;
 	unsigned int major, minor;
 
-	cfqg = cfqg_of_blkg(blkiocg_lookup_group(blkcg, key));
-	if (cfqg && !cfqg->blkg.dev && bdi->dev && dev_name(bdi->dev)) {
+	/*
+	 * Add group onto cgroup list. It might happen that bdi->dev is
+	 * not initialized yet. Initialize this new group without major
+	 * and minor info and this info will be filled in once a new thread
+	 * comes for IO.
+	 */
+	if (bdi->dev) {
 		sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor);
-		cfqg->blkg.dev = MKDEV(major, minor);
-		goto done;
-	}
-	if (cfqg || !create)
-		goto done;
+		cfq_blkiocg_add_blkio_group(blkcg, &cfqg->blkg,
+					(void *)cfqd, MKDEV(major, minor));
+	} else
+		cfq_blkiocg_add_blkio_group(blkcg, &cfqg->blkg,
+					(void *)cfqd, 0);
+
+	cfqd->nr_blkcg_linked_grps++;
+	cfqg->weight = blkcg_get_weight(blkcg, cfqg->blkg.dev);
+
+	/* Add group on cfqd list */
+	hlist_add_head(&cfqg->cfqd_node, &cfqd->cfqg_list);
+}
+
+/*
+ * Should be called from sleepable context. No request queue lock as per
+ * cpu stats are allocated dynamically and alloc_percpu needs to be called
+ * from sleepable context.
+ */
+static struct cfq_group * cfq_alloc_cfqg(struct cfq_data *cfqd)
+{
+	struct cfq_group *cfqg = NULL;
+	int i, j, ret;
+	struct cfq_rb_root *st;
 
 	cfqg = kzalloc_node(sizeof(*cfqg), GFP_ATOMIC, cfqd->queue->node);
 	if (!cfqg)
-		goto done;
+		return NULL;
 
 	for_each_cfqg_st(cfqg, i, j, st)
 		*st = CFQ_RB_ROOT;
@@ -1049,43 +1066,94 @@
 	 */
 	cfqg->ref = 1;
 
+	ret = blkio_alloc_blkg_stats(&cfqg->blkg);
+	if (ret) {
+		kfree(cfqg);
+		return NULL;
+	}
+
+	return cfqg;
+}
+
+static struct cfq_group *
+cfq_find_cfqg(struct cfq_data *cfqd, struct blkio_cgroup *blkcg)
+{
+	struct cfq_group *cfqg = NULL;
+	void *key = cfqd;
+	struct backing_dev_info *bdi = &cfqd->queue->backing_dev_info;
+	unsigned int major, minor;
+
 	/*
-	 * Add group onto cgroup list. It might happen that bdi->dev is
-	 * not initialized yet. Initialize this new group without major
-	 * and minor info and this info will be filled in once a new thread
-	 * comes for IO. See code above.
+	 * This is the common case when there are no blkio cgroups.
+	 * Avoid lookup in this case
 	 */
-	if (bdi->dev) {
+	if (blkcg == &blkio_root_cgroup)
+		cfqg = &cfqd->root_group;
+	else
+		cfqg = cfqg_of_blkg(blkiocg_lookup_group(blkcg, key));
+
+	if (cfqg && !cfqg->blkg.dev && bdi->dev && dev_name(bdi->dev)) {
 		sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor);
-		cfq_blkiocg_add_blkio_group(blkcg, &cfqg->blkg, (void *)cfqd,
-					MKDEV(major, minor));
-	} else
-		cfq_blkiocg_add_blkio_group(blkcg, &cfqg->blkg, (void *)cfqd,
-					0);
+		cfqg->blkg.dev = MKDEV(major, minor);
+	}
 
-	cfqg->weight = blkcg_get_weight(blkcg, cfqg->blkg.dev);
-
-	/* Add group on cfqd list */
-	hlist_add_head(&cfqg->cfqd_node, &cfqd->cfqg_list);
-
-done:
 	return cfqg;
 }
 
 /*
- * Search for the cfq group current task belongs to. If create = 1, then also
- * create the cfq group if it does not exist. request_queue lock must be held.
+ * Search for the cfq group current task belongs to. request_queue lock must
+ * be held.
  */
-static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd, int create)
+static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd)
 {
 	struct blkio_cgroup *blkcg;
-	struct cfq_group *cfqg = NULL;
+	struct cfq_group *cfqg = NULL, *__cfqg = NULL;
+	struct request_queue *q = cfqd->queue;
 
 	rcu_read_lock();
 	blkcg = task_blkio_cgroup(current);
-	cfqg = cfq_find_alloc_cfqg(cfqd, blkcg, create);
-	if (!cfqg && create)
+	cfqg = cfq_find_cfqg(cfqd, blkcg);
+	if (cfqg) {
+		rcu_read_unlock();
+		return cfqg;
+	}
+
+	/*
+	 * Need to allocate a group. Allocation of group also needs allocation
+	 * of per cpu stats which in-turn takes a mutex() and can block. Hence
+	 * we need to drop rcu lock and queue_lock before we call alloc.
+	 *
+	 * Not taking any queue reference here and assuming that queue is
+	 * around by the time we return. CFQ queue allocation code does
+	 * the same. It might be racy though.
+	 */
+
+	rcu_read_unlock();
+	spin_unlock_irq(q->queue_lock);
+
+	cfqg = cfq_alloc_cfqg(cfqd);
+
+	spin_lock_irq(q->queue_lock);
+
+	rcu_read_lock();
+	blkcg = task_blkio_cgroup(current);
+
+	/*
+	 * If some other thread already allocated the group while we were
+	 * not holding queue lock, free up the group
+	 */
+	__cfqg = cfq_find_cfqg(cfqd, blkcg);
+
+	if (__cfqg) {
+		kfree(cfqg);
+		rcu_read_unlock();
+		return __cfqg;
+	}
+
+	if (!cfqg)
 		cfqg = &cfqd->root_group;
+
+	cfq_init_add_cfqg_lists(cfqd, cfqg, blkcg);
 	rcu_read_unlock();
 	return cfqg;
 }
@@ -1118,6 +1186,7 @@
 		return;
 	for_each_cfqg_st(cfqg, i, j, st)
 		BUG_ON(!RB_EMPTY_ROOT(&st->rb));
+	free_percpu(cfqg->blkg.stats_cpu);
 	kfree(cfqg);
 }
 
@@ -1176,7 +1245,7 @@
 }
 
 #else /* GROUP_IOSCHED */
-static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd, int create)
+static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd)
 {
 	return &cfqd->root_group;
 }
@@ -1210,7 +1279,6 @@
 	struct cfq_rb_root *service_tree;
 	int left;
 	int new_cfqq = 1;
-	int group_changed = 0;
 
 	service_tree = service_tree_for(cfqq->cfqg, cfqq_prio(cfqq),
 						cfqq_type(cfqq));
@@ -1281,7 +1349,7 @@
 	rb_link_node(&cfqq->rb_node, parent, p);
 	rb_insert_color(&cfqq->rb_node, &service_tree->rb);
 	service_tree->count++;
-	if ((add_front || !new_cfqq) && !group_changed)
+	if (add_front || !new_cfqq)
 		return;
 	cfq_group_notify_queue_add(cfqd, cfqq->cfqg);
 }
@@ -2029,7 +2097,7 @@
 
 	WARN_ON(cfqq->ioprio >= IOPRIO_BE_NR);
 
-	return 2 * (base_rq + base_rq * (CFQ_PRIO_LISTS - 1 - cfqq->ioprio));
+	return 2 * base_rq * (IOPRIO_BE_NR - cfqq->ioprio);
 }
 
 /*
@@ -2911,7 +2979,7 @@
 	struct cfq_group *cfqg;
 
 retry:
-	cfqg = cfq_get_cfqg(cfqd, 1);
+	cfqg = cfq_get_cfqg(cfqd);
 	cic = cfq_cic_lookup(cfqd, ioc);
 	/* cic always exists here */
 	cfqq = cic_to_cfqq(cic, is_sync);
@@ -3815,15 +3883,11 @@
 		cfq_put_queue(cfqd->async_idle_cfqq);
 }
 
-static void cfq_cfqd_free(struct rcu_head *head)
-{
-	kfree(container_of(head, struct cfq_data, rcu));
-}
-
 static void cfq_exit_queue(struct elevator_queue *e)
 {
 	struct cfq_data *cfqd = e->elevator_data;
 	struct request_queue *q = cfqd->queue;
+	bool wait = false;
 
 	cfq_shutdown_timer_wq(cfqd);
 
@@ -3842,7 +3906,13 @@
 
 	cfq_put_async_queues(cfqd);
 	cfq_release_cfq_groups(cfqd);
-	cfq_blkiocg_del_blkio_group(&cfqd->root_group.blkg);
+
+	/*
+	 * If there are groups which we could not unlink from blkcg list,
+	 * wait for a rcu period for them to be freed.
+	 */
+	if (cfqd->nr_blkcg_linked_grps)
+		wait = true;
 
 	spin_unlock_irq(q->queue_lock);
 
@@ -3852,8 +3922,25 @@
 	ida_remove(&cic_index_ida, cfqd->cic_index);
 	spin_unlock(&cic_index_lock);
 
-	/* Wait for cfqg->blkg->key accessors to exit their grace periods. */
-	call_rcu(&cfqd->rcu, cfq_cfqd_free);
+	/*
+	 * Wait for cfqg->blkg->key accessors to exit their grace periods.
+	 * Do this wait only if there are other unlinked groups out
+	 * there. This can happen if cgroup deletion path claimed the
+	 * responsibility of cleaning up a group before queue cleanup code
+	 * get to the group.
+	 *
+	 * Do not call synchronize_rcu() unconditionally as there are drivers
+	 * which create/delete request queue hundreds of times during scan/boot
+	 * and synchronize_rcu() can take significant time and slow down boot.
+	 */
+	if (wait)
+		synchronize_rcu();
+
+#ifdef CONFIG_CFQ_GROUP_IOSCHED
+	/* Free up per cpu stats for root group */
+	free_percpu(cfqd->root_group.blkg.stats_cpu);
+#endif
+	kfree(cfqd);
 }
 
 static int cfq_alloc_cic_index(void)
@@ -3886,8 +3973,12 @@
 		return NULL;
 
 	cfqd = kmalloc_node(sizeof(*cfqd), GFP_KERNEL | __GFP_ZERO, q->node);
-	if (!cfqd)
+	if (!cfqd) {
+		spin_lock(&cic_index_lock);
+		ida_remove(&cic_index_ida, i);
+		spin_unlock(&cic_index_lock);
 		return NULL;
+	}
 
 	/*
 	 * Don't need take queue_lock in the routine, since we are
@@ -3909,14 +4000,29 @@
 
 #ifdef CONFIG_CFQ_GROUP_IOSCHED
 	/*
-	 * Take a reference to root group which we never drop. This is just
-	 * to make sure that cfq_put_cfqg() does not try to kfree root group
+	 * Set root group reference to 2. One reference will be dropped when
+	 * all groups on cfqd->cfqg_list are being deleted during queue exit.
+	 * Other reference will remain there as we don't want to delete this
+	 * group as it is statically allocated and gets destroyed when
+	 * throtl_data goes away.
 	 */
-	cfqg->ref = 1;
+	cfqg->ref = 2;
+
+	if (blkio_alloc_blkg_stats(&cfqg->blkg)) {
+		kfree(cfqg);
+		kfree(cfqd);
+		return NULL;
+	}
+
 	rcu_read_lock();
+
 	cfq_blkiocg_add_blkio_group(&blkio_root_cgroup, &cfqg->blkg,
 					(void *)cfqd, 0);
 	rcu_read_unlock();
+	cfqd->nr_blkcg_linked_grps++;
+
+	/* Add group on cfqd->cfqg_list */
+	hlist_add_head(&cfqg->cfqd_node, &cfqd->cfqg_list);
 #endif
 	/*
 	 * Not strictly needed (since RB_ROOT just clears the node and we
diff --git a/block/elevator.c b/block/elevator.c
index 45ca1e3..b0b38ce 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -155,13 +155,8 @@
 
 	e = elevator_find(name);
 	if (!e) {
-		char elv[ELV_NAME_MAX + strlen("-iosched")];
-
 		spin_unlock(&elv_list_lock);
-
-		snprintf(elv, sizeof(elv), "%s-iosched", name);
-
-		request_module("%s", elv);
+		request_module("%s-iosched", name);
 		spin_lock(&elv_list_lock);
 		e = elevator_find(name);
 	}
@@ -421,8 +416,6 @@
 	struct list_head *entry;
 	int stop_flags;
 
-	BUG_ON(rq->cmd_flags & REQ_ON_PLUG);
-
 	if (q->last_merge == rq)
 		q->last_merge = NULL;
 
@@ -661,8 +654,6 @@
 
 	rq->q = q;
 
-	BUG_ON(rq->cmd_flags & REQ_ON_PLUG);
-
 	if (rq->cmd_flags & REQ_SOFTBARRIER) {
 		/* barriers are scheduling boundary, update end_sector */
 		if (rq->cmd_type == REQ_TYPE_FS ||
diff --git a/drivers/Kconfig b/drivers/Kconfig
index 61631ed..3bb154d 100644
--- a/drivers/Kconfig
+++ b/drivers/Kconfig
@@ -54,6 +54,8 @@
 
 source "drivers/pps/Kconfig"
 
+source "drivers/ptp/Kconfig"
+
 source "drivers/gpio/Kconfig"
 
 source "drivers/w1/Kconfig"
diff --git a/drivers/Makefile b/drivers/Makefile
index 145aead..6b17f58 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -75,6 +75,7 @@
 obj-$(CONFIG_RTC_LIB)		+= rtc/
 obj-y				+= i2c/ media/
 obj-$(CONFIG_PPS)		+= pps/
+obj-$(CONFIG_PTP_1588_CLOCK)	+= ptp/
 obj-$(CONFIG_W1)		+= w1/
 obj-$(CONFIG_POWER_SUPPLY)	+= power/
 obj-$(CONFIG_HWMON)		+= hwmon/
@@ -94,7 +95,7 @@
 obj-$(CONFIG_DMA_ENGINE)	+= dma/
 obj-$(CONFIG_MMC)		+= mmc/
 obj-$(CONFIG_MEMSTICK)		+= memstick/
-obj-$(CONFIG_NEW_LEDS)		+= leds/
+obj-y				+= leds/
 obj-$(CONFIG_INFINIBAND)	+= infiniband/
 obj-$(CONFIG_SGI_SN)		+= sn/
 obj-y				+= firmware/
diff --git a/drivers/acpi/apei/einj.c b/drivers/acpi/apei/einj.c
index 096aebf..f74b2ea 100644
--- a/drivers/acpi/apei/einj.c
+++ b/drivers/acpi/apei/einj.c
@@ -101,6 +101,14 @@
 
 static struct einj_parameter *einj_param;
 
+#ifndef writeq
+static inline void writeq(__u64 val, volatile void __iomem *addr)
+{
+	writel(val, addr);
+	writel(val >> 32, addr+4);
+}
+#endif
+
 static void einj_exec_ctx_init(struct apei_exec_context *ctx)
 {
 	apei_exec_ctx_init(ctx, einj_ins_type, ARRAY_SIZE(einj_ins_type),
diff --git a/drivers/acpi/atomicio.c b/drivers/acpi/atomicio.c
index 542e539..7489b89c3 100644
--- a/drivers/acpi/atomicio.c
+++ b/drivers/acpi/atomicio.c
@@ -280,9 +280,11 @@
 	case 32:
 		*val = readl(addr);
 		break;
+#ifdef readq
 	case 64:
 		*val = readq(addr);
 		break;
+#endif
 	default:
 		return -EINVAL;
 	}
@@ -307,9 +309,11 @@
 	case 32:
 		writel(val, addr);
 		break;
+#ifdef writeq
 	case 64:
 		writeq(val, addr);
 		break;
+#endif
 	default:
 		return -EINVAL;
 	}
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index 30ea95f..d51f979 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -1089,21 +1089,21 @@
 static int ata_scsi_dev_config(struct scsi_device *sdev,
 			       struct ata_device *dev)
 {
+	struct request_queue *q = sdev->request_queue;
+
 	if (!ata_id_has_unload(dev->id))
 		dev->flags |= ATA_DFLAG_NO_UNLOAD;
 
 	/* configure max sectors */
-	blk_queue_max_hw_sectors(sdev->request_queue, dev->max_sectors);
+	blk_queue_max_hw_sectors(q, dev->max_sectors);
 
 	if (dev->class == ATA_DEV_ATAPI) {
-		struct request_queue *q = sdev->request_queue;
 		void *buf;
 
 		sdev->sector_size = ATA_SECT_SIZE;
 
 		/* set DMA padding */
-		blk_queue_update_dma_pad(sdev->request_queue,
-					 ATA_DMA_PAD_SZ - 1);
+		blk_queue_update_dma_pad(q, ATA_DMA_PAD_SZ - 1);
 
 		/* configure draining */
 		buf = kmalloc(ATAPI_MAX_DRAIN, q->bounce_gfp | GFP_KERNEL);
@@ -1131,8 +1131,7 @@
 			"sector_size=%u > PAGE_SIZE, PIO may malfunction\n",
 			sdev->sector_size);
 
-	blk_queue_update_dma_alignment(sdev->request_queue,
-				       sdev->sector_size - 1);
+	blk_queue_update_dma_alignment(q, sdev->sector_size - 1);
 
 	if (dev->flags & ATA_DFLAG_AN)
 		set_bit(SDEV_EVT_MEDIA_CHANGE, sdev->supported_events);
@@ -1145,6 +1144,8 @@
 		scsi_adjust_queue_depth(sdev, MSG_SIMPLE_TAG, depth);
 	}
 
+	blk_queue_flush_queueable(q, false);
+
 	dev->sdev = sdev;
 	return 0;
 }
diff --git a/drivers/base/node.c b/drivers/base/node.c
index b3b72d6..793f796 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -7,6 +7,7 @@
 #include <linux/init.h>
 #include <linux/mm.h>
 #include <linux/memory.h>
+#include <linux/vmstat.h>
 #include <linux/node.h>
 #include <linux/hugetlb.h>
 #include <linux/compaction.h>
@@ -179,11 +180,14 @@
 				struct sysdev_attribute *attr, char *buf)
 {
 	int nid = dev->id;
-	return sprintf(buf,
-		"nr_written %lu\n"
-		"nr_dirtied %lu\n",
-		node_page_state(nid, NR_WRITTEN),
-		node_page_state(nid, NR_DIRTIED));
+	int i;
+	int n = 0;
+
+	for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
+		n += sprintf(buf+n, "%s %lu\n", vmstat_text[i],
+			     node_page_state(nid, i));
+
+	return n;
 }
 static SYSDEV_ATTR(vmstat, S_IRUGO, node_read_vmstat, NULL);
 
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
index 83c32cb..717d6e4 100644
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -470,6 +470,27 @@
 	  block device driver.  It communicates with a back-end driver
 	  in another domain which drives the actual block device.
 
+config XEN_BLKDEV_BACKEND
+	tristate "Block-device backend driver"
+	depends on XEN_BACKEND
+	help
+	  The block-device backend driver allows the kernel to export its
+	  block devices to other guests via a high-performance shared-memory
+	  interface.
+
+	  The corresponding Linux frontend driver is enabled by the
+	  CONFIG_XEN_BLKDEV_FRONTEND configuration option.
+
+	  The backend driver attaches itself to a any block device specified
+	  in the XenBus configuration. There are no limits to what the block
+	  device as long as it has a major and minor.
+
+	  If you are compiling a kernel to run in a Xen block backend driver
+	  domain (often this is domain 0) you should say Y here. To
+	  compile this driver as a module, chose M here: the module
+	  will be called xen-blkback.
+
+
 config VIRTIO_BLK
 	tristate "Virtio block driver (EXPERIMENTAL)"
 	depends on EXPERIMENTAL && VIRTIO
diff --git a/drivers/block/Makefile b/drivers/block/Makefile
index 40528ba..76646e9 100644
--- a/drivers/block/Makefile
+++ b/drivers/block/Makefile
@@ -36,6 +36,7 @@
 obj-$(CONFIG_BLK_DEV_HD)	+= hd.o
 
 obj-$(CONFIG_XEN_BLKDEV_FRONTEND)	+= xen-blkfront.o
+obj-$(CONFIG_XEN_BLKDEV_BACKEND)	+= xen-blkback/
 obj-$(CONFIG_BLK_DEV_DRBD)     += drbd/
 obj-$(CONFIG_BLK_DEV_RBD)     += rbd.o
 
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index 9bf1398..8f4ef65 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -64,6 +64,10 @@
 MODULE_SUPPORTED_DEVICE("HP Smart Array Controllers");
 MODULE_VERSION("3.6.26");
 MODULE_LICENSE("GPL");
+static int cciss_tape_cmds = 6;
+module_param(cciss_tape_cmds, int, 0644);
+MODULE_PARM_DESC(cciss_tape_cmds,
+	"number of commands to allocate for tape devices (default: 6)");
 
 static DEFINE_MUTEX(cciss_mutex);
 static struct proc_dir_entry *proc_cciss;
@@ -194,6 +198,8 @@
 static int __devinit cciss_pci_find_memory_BAR(struct pci_dev *pdev,
 	unsigned long *memory_bar);
 static inline u32 cciss_tag_discard_error_bits(ctlr_info_t *h, u32 tag);
+static __devinit int write_driver_ver_to_cfgtable(
+	CfgTable_struct __iomem *cfgtable);
 
 /* performant mode helper functions */
 static void  calc_bucket_map(int *bucket, int num_buckets, int nsgs,
@@ -556,7 +562,7 @@
 #define to_hba(n) container_of(n, struct ctlr_info, dev)
 #define to_drv(n) container_of(n, drive_info_struct, dev)
 
-/* List of controllers which cannot be reset on kexec with reset_devices */
+/* List of controllers which cannot be hard reset on kexec with reset_devices */
 static u32 unresettable_controller[] = {
 	0x324a103C, /* Smart Array P712m */
 	0x324b103C, /* SmartArray P711m */
@@ -574,23 +580,45 @@
 	0x409D0E11, /* Smart Array 6400 EM */
 };
 
-static int ctlr_is_resettable(struct ctlr_info *h)
+/* List of controllers which cannot even be soft reset */
+static u32 soft_unresettable_controller[] = {
+	0x409C0E11, /* Smart Array 6400 */
+	0x409D0E11, /* Smart Array 6400 EM */
+};
+
+static int ctlr_is_hard_resettable(u32 board_id)
 {
 	int i;
 
 	for (i = 0; i < ARRAY_SIZE(unresettable_controller); i++)
-		if (unresettable_controller[i] == h->board_id)
+		if (unresettable_controller[i] == board_id)
 			return 0;
 	return 1;
 }
 
+static int ctlr_is_soft_resettable(u32 board_id)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(soft_unresettable_controller); i++)
+		if (soft_unresettable_controller[i] == board_id)
+			return 0;
+	return 1;
+}
+
+static int ctlr_is_resettable(u32 board_id)
+{
+	return ctlr_is_hard_resettable(board_id) ||
+		ctlr_is_soft_resettable(board_id);
+}
+
 static ssize_t host_show_resettable(struct device *dev,
 				    struct device_attribute *attr,
 				    char *buf)
 {
 	struct ctlr_info *h = to_hba(dev);
 
-	return snprintf(buf, 20, "%d\n", ctlr_is_resettable(h));
+	return snprintf(buf, 20, "%d\n", ctlr_is_resettable(h->board_id));
 }
 static DEVICE_ATTR(resettable, S_IRUGO, host_show_resettable, NULL);
 
@@ -2567,7 +2595,7 @@
 		}
 	} else if (cmd_type == TYPE_MSG) {
 		switch (cmd) {
-		case 0:	/* ABORT message */
+		case CCISS_ABORT_MSG:
 			c->Request.CDBLen = 12;
 			c->Request.Type.Attribute = ATTR_SIMPLE;
 			c->Request.Type.Direction = XFER_WRITE;
@@ -2577,16 +2605,16 @@
 			/* buff contains the tag of the command to abort */
 			memcpy(&c->Request.CDB[4], buff, 8);
 			break;
-		case 1:	/* RESET message */
+		case CCISS_RESET_MSG:
 			c->Request.CDBLen = 16;
 			c->Request.Type.Attribute = ATTR_SIMPLE;
 			c->Request.Type.Direction = XFER_NONE;
 			c->Request.Timeout = 0;
 			memset(&c->Request.CDB[0], 0, sizeof(c->Request.CDB));
 			c->Request.CDB[0] = cmd;	/* reset */
-			c->Request.CDB[1] = 0x03;	/* reset a target */
+			c->Request.CDB[1] = CCISS_RESET_TYPE_TARGET;
 			break;
-		case 3:	/* No-Op message */
+		case CCISS_NOOP_MSG:
 			c->Request.CDBLen = 1;
 			c->Request.Type.Attribute = ATTR_SIMPLE;
 			c->Request.Type.Direction = XFER_WRITE;
@@ -2615,6 +2643,31 @@
 	return status;
 }
 
+static int __devinit cciss_send_reset(ctlr_info_t *h, unsigned char *scsi3addr,
+	u8 reset_type)
+{
+	CommandList_struct *c;
+	int return_status;
+
+	c = cmd_alloc(h);
+	if (!c)
+		return -ENOMEM;
+	return_status = fill_cmd(h, c, CCISS_RESET_MSG, NULL, 0, 0,
+		CTLR_LUNID, TYPE_MSG);
+	c->Request.CDB[1] = reset_type; /* fill_cmd defaults to target reset */
+	if (return_status != IO_OK) {
+		cmd_special_free(h, c);
+		return return_status;
+	}
+	c->waiting = NULL;
+	enqueue_cmd_and_start_io(h, c);
+	/* Don't wait for completion, the reset won't complete.  Don't free
+	 * the command either.  This is the last command we will send before
+	 * re-initializing everything, so it doesn't matter and won't leak.
+	 */
+	return 0;
+}
+
 static int check_target_status(ctlr_info_t *h, CommandList_struct *c)
 {
 	switch (c->err_info->ScsiStatus) {
@@ -3461,6 +3514,63 @@
 	return next_command(h);
 }
 
+/* Some controllers, like p400, will give us one interrupt
+ * after a soft reset, even if we turned interrupts off.
+ * Only need to check for this in the cciss_xxx_discard_completions
+ * functions.
+ */
+static int ignore_bogus_interrupt(ctlr_info_t *h)
+{
+	if (likely(!reset_devices))
+		return 0;
+
+	if (likely(h->interrupts_enabled))
+		return 0;
+
+	dev_info(&h->pdev->dev, "Received interrupt while interrupts disabled "
+		"(known firmware bug.)  Ignoring.\n");
+
+	return 1;
+}
+
+static irqreturn_t cciss_intx_discard_completions(int irq, void *dev_id)
+{
+	ctlr_info_t *h = dev_id;
+	unsigned long flags;
+	u32 raw_tag;
+
+	if (ignore_bogus_interrupt(h))
+		return IRQ_NONE;
+
+	if (interrupt_not_for_us(h))
+		return IRQ_NONE;
+	spin_lock_irqsave(&h->lock, flags);
+	while (interrupt_pending(h)) {
+		raw_tag = get_next_completion(h);
+		while (raw_tag != FIFO_EMPTY)
+			raw_tag = next_command(h);
+	}
+	spin_unlock_irqrestore(&h->lock, flags);
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t cciss_msix_discard_completions(int irq, void *dev_id)
+{
+	ctlr_info_t *h = dev_id;
+	unsigned long flags;
+	u32 raw_tag;
+
+	if (ignore_bogus_interrupt(h))
+		return IRQ_NONE;
+
+	spin_lock_irqsave(&h->lock, flags);
+	raw_tag = get_next_completion(h);
+	while (raw_tag != FIFO_EMPTY)
+		raw_tag = next_command(h);
+	spin_unlock_irqrestore(&h->lock, flags);
+	return IRQ_HANDLED;
+}
+
 static irqreturn_t do_cciss_intx(int irq, void *dev_id)
 {
 	ctlr_info_t *h = dev_id;
@@ -4078,6 +4188,9 @@
 		cfg_base_addr_index) + cfg_offset, sizeof(h->cfgtable));
 	if (!h->cfgtable)
 		return -ENOMEM;
+	rc = write_driver_ver_to_cfgtable(h->cfgtable);
+	if (rc)
+		return rc;
 	/* Find performant mode table. */
 	trans_offset = readl(&h->cfgtable->TransMethodOffset);
 	h->transtable = remap_pci_mem(pci_resource_start(h->pdev,
@@ -4112,7 +4225,7 @@
 static void __devinit cciss_find_board_params(ctlr_info_t *h)
 {
 	cciss_get_max_perf_mode_cmds(h);
-	h->nr_cmds = h->max_commands - 4; /* Allow room for some ioctls */
+	h->nr_cmds = h->max_commands - 4 - cciss_tape_cmds;
 	h->maxsgentries = readl(&(h->cfgtable->MaxSGElements));
 	/*
 	 * Limit in-command s/g elements to 32 save dma'able memory.
@@ -4348,7 +4461,7 @@
 		tag = readl(vaddr + SA5_REPLY_PORT_OFFSET);
 		if ((tag & ~3) == paddr32)
 			break;
-		schedule_timeout_uninterruptible(HZ);
+		msleep(CCISS_POST_RESET_NOOP_TIMEOUT_MSECS);
 	}
 
 	iounmap(vaddr);
@@ -4375,11 +4488,10 @@
 	return 0;
 }
 
-#define cciss_soft_reset_controller(p) cciss_message(p, 1, 0)
 #define cciss_noop(p) cciss_message(p, 3, 0)
 
 static int cciss_controller_hard_reset(struct pci_dev *pdev,
-	void * __iomem vaddr, bool use_doorbell)
+	void * __iomem vaddr, u32 use_doorbell)
 {
 	u16 pmcsr;
 	int pos;
@@ -4390,8 +4502,7 @@
 		 * other way using the doorbell register.
 		 */
 		dev_info(&pdev->dev, "using doorbell to reset controller\n");
-		writel(DOORBELL_CTLR_RESET, vaddr + SA5_DOORBELL);
-		msleep(1000);
+		writel(use_doorbell, vaddr + SA5_DOORBELL);
 	} else { /* Try to do it the PCI power state way */
 
 		/* Quoting from the Open CISS Specification: "The Power
@@ -4422,12 +4533,64 @@
 		pmcsr &= ~PCI_PM_CTRL_STATE_MASK;
 		pmcsr |= PCI_D0;
 		pci_write_config_word(pdev, pos + PCI_PM_CTRL, pmcsr);
-
-		msleep(500);
 	}
 	return 0;
 }
 
+static __devinit void init_driver_version(char *driver_version, int len)
+{
+	memset(driver_version, 0, len);
+	strncpy(driver_version, "cciss " DRIVER_NAME, len - 1);
+}
+
+static __devinit int write_driver_ver_to_cfgtable(
+	CfgTable_struct __iomem *cfgtable)
+{
+	char *driver_version;
+	int i, size = sizeof(cfgtable->driver_version);
+
+	driver_version = kmalloc(size, GFP_KERNEL);
+	if (!driver_version)
+		return -ENOMEM;
+
+	init_driver_version(driver_version, size);
+	for (i = 0; i < size; i++)
+		writeb(driver_version[i], &cfgtable->driver_version[i]);
+	kfree(driver_version);
+	return 0;
+}
+
+static __devinit void read_driver_ver_from_cfgtable(
+	CfgTable_struct __iomem *cfgtable, unsigned char *driver_ver)
+{
+	int i;
+
+	for (i = 0; i < sizeof(cfgtable->driver_version); i++)
+		driver_ver[i] = readb(&cfgtable->driver_version[i]);
+}
+
+static __devinit int controller_reset_failed(
+	CfgTable_struct __iomem *cfgtable)
+{
+
+	char *driver_ver, *old_driver_ver;
+	int rc, size = sizeof(cfgtable->driver_version);
+
+	old_driver_ver = kmalloc(2 * size, GFP_KERNEL);
+	if (!old_driver_ver)
+		return -ENOMEM;
+	driver_ver = old_driver_ver + size;
+
+	/* After a reset, the 32 bytes of "driver version" in the cfgtable
+	 * should have been changed, otherwise we know the reset failed.
+	 */
+	init_driver_version(old_driver_ver, size);
+	read_driver_ver_from_cfgtable(cfgtable, driver_ver);
+	rc = !memcmp(driver_ver, old_driver_ver, size);
+	kfree(old_driver_ver);
+	return rc;
+}
+
 /* This does a hard reset of the controller using PCI power management
  * states or using the doorbell register. */
 static __devinit int cciss_kdump_hard_reset_controller(struct pci_dev *pdev)
@@ -4437,10 +4600,10 @@
 	u64 cfg_base_addr_index;
 	void __iomem *vaddr;
 	unsigned long paddr;
-	u32 misc_fw_support, active_transport;
+	u32 misc_fw_support;
 	int rc;
 	CfgTable_struct __iomem *cfgtable;
-	bool use_doorbell;
+	u32 use_doorbell;
 	u32 board_id;
 	u16 command_register;
 
@@ -4464,12 +4627,16 @@
 	 * likely not be happy.  Just forbid resetting this conjoined mess.
 	 */
 	cciss_lookup_board_id(pdev, &board_id);
-	if (board_id == 0x409C0E11 || board_id == 0x409D0E11) {
+	if (!ctlr_is_resettable(board_id)) {
 		dev_warn(&pdev->dev, "Cannot reset Smart Array 640x "
 				"due to shared cache module.");
 		return -ENODEV;
 	}
 
+	/* if controller is soft- but not hard resettable... */
+	if (!ctlr_is_hard_resettable(board_id))
+		return -ENOTSUPP; /* try soft reset later. */
+
 	/* Save the PCI command register */
 	pci_read_config_word(pdev, 4, &command_register);
 	/* Turn the board off.  This is so that later pci_restore_state()
@@ -4497,16 +4664,28 @@
 		rc = -ENOMEM;
 		goto unmap_vaddr;
 	}
+	rc = write_driver_ver_to_cfgtable(cfgtable);
+	if (rc)
+		goto unmap_vaddr;
 
-	/* If reset via doorbell register is supported, use that. */
-	misc_fw_support = readl(&cfgtable->misc_fw_support);
-	use_doorbell = misc_fw_support & MISC_FW_DOORBELL_RESET;
-
-	/* The doorbell reset seems to cause lockups on some Smart
-	 * Arrays (e.g. P410, P410i, maybe others).  Until this is
-	 * fixed or at least isolated, avoid the doorbell reset.
+	/* If reset via doorbell register is supported, use that.
+	 * There are two such methods.  Favor the newest method.
 	 */
-	use_doorbell = 0;
+	misc_fw_support = readl(&cfgtable->misc_fw_support);
+	use_doorbell = misc_fw_support & MISC_FW_DOORBELL_RESET2;
+	if (use_doorbell) {
+		use_doorbell = DOORBELL_CTLR_RESET2;
+	} else {
+		use_doorbell = misc_fw_support & MISC_FW_DOORBELL_RESET;
+		if (use_doorbell) {
+			dev_warn(&pdev->dev, "Controller claims that "
+				"'Bit 2 doorbell reset' is "
+				"supported, but not 'bit 5 doorbell reset'.  "
+				"Firmware update is recommended.\n");
+			rc = -ENOTSUPP; /* use the soft reset */
+			goto unmap_cfgtable;
+		}
+	}
 
 	rc = cciss_controller_hard_reset(pdev, vaddr, use_doorbell);
 	if (rc)
@@ -4524,30 +4703,31 @@
 	msleep(CCISS_POST_RESET_PAUSE_MSECS);
 
 	/* Wait for board to become not ready, then ready. */
-	dev_info(&pdev->dev, "Waiting for board to become ready.\n");
+	dev_info(&pdev->dev, "Waiting for board to reset.\n");
 	rc = cciss_wait_for_board_state(pdev, vaddr, BOARD_NOT_READY);
-	if (rc) /* Don't bail, might be E500, etc. which can't be reset */
-		dev_warn(&pdev->dev,
-			"failed waiting for board to become not ready\n");
+	if (rc) {
+		dev_warn(&pdev->dev, "Failed waiting for board to hard reset."
+				"  Will try soft reset.\n");
+		rc = -ENOTSUPP; /* Not expected, but try soft reset later */
+		goto unmap_cfgtable;
+	}
 	rc = cciss_wait_for_board_state(pdev, vaddr, BOARD_READY);
 	if (rc) {
 		dev_warn(&pdev->dev,
-			"failed waiting for board to become ready\n");
+			"failed waiting for board to become ready "
+			"after hard reset\n");
 		goto unmap_cfgtable;
 	}
-	dev_info(&pdev->dev, "board ready.\n");
 
-	/* Controller should be in simple mode at this point.  If it's not,
-	 * It means we're on one of those controllers which doesn't support
-	 * the doorbell reset method and on which the PCI power management reset
-	 * method doesn't work (P800, for example.)
-	 * In those cases, don't try to proceed, as it generally doesn't work.
-	 */
-	active_transport = readl(&cfgtable->TransportActive);
-	if (active_transport & PERFORMANT_MODE) {
-		dev_warn(&pdev->dev, "Unable to successfully reset controller,"
-			" Ignoring controller.\n");
-		rc = -ENODEV;
+	rc = controller_reset_failed(vaddr);
+	if (rc < 0)
+		goto unmap_cfgtable;
+	if (rc) {
+		dev_warn(&pdev->dev, "Unable to successfully hard reset "
+			"controller. Will try soft reset.\n");
+		rc = -ENOTSUPP; /* Not expected, but try soft reset later */
+	} else {
+		dev_info(&pdev->dev, "Board ready after hard reset.\n");
 	}
 
 unmap_cfgtable:
@@ -4574,11 +4754,12 @@
 	 * due to concerns about shared bbwc between 6402/6404 pair.
 	 */
 	if (rc == -ENOTSUPP)
-		return 0; /* just try to do the kdump anyhow. */
+		return rc; /* just try to do the kdump anyhow. */
 	if (rc)
 		return -ENODEV;
 
 	/* Now try to get the controller to respond to a no-op */
+	dev_warn(&pdev->dev, "Waiting for controller to respond to no-op\n");
 	for (i = 0; i < CCISS_POST_RESET_NOOP_RETRIES; i++) {
 		if (cciss_noop(pdev) == 0)
 			break;
@@ -4591,6 +4772,148 @@
 	return 0;
 }
 
+static __devinit int cciss_allocate_cmd_pool(ctlr_info_t *h)
+{
+	h->cmd_pool_bits = kmalloc(
+		DIV_ROUND_UP(h->nr_cmds, BITS_PER_LONG) *
+		sizeof(unsigned long), GFP_KERNEL);
+	h->cmd_pool = pci_alloc_consistent(h->pdev,
+		h->nr_cmds * sizeof(CommandList_struct),
+		&(h->cmd_pool_dhandle));
+	h->errinfo_pool = pci_alloc_consistent(h->pdev,
+		h->nr_cmds * sizeof(ErrorInfo_struct),
+		&(h->errinfo_pool_dhandle));
+	if ((h->cmd_pool_bits == NULL)
+		|| (h->cmd_pool == NULL)
+		|| (h->errinfo_pool == NULL)) {
+		dev_err(&h->pdev->dev, "out of memory");
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+static __devinit int cciss_allocate_scatterlists(ctlr_info_t *h)
+{
+	int i;
+
+	/* zero it, so that on free we need not know how many were alloc'ed */
+	h->scatter_list = kzalloc(h->max_commands *
+				sizeof(struct scatterlist *), GFP_KERNEL);
+	if (!h->scatter_list)
+		return -ENOMEM;
+
+	for (i = 0; i < h->nr_cmds; i++) {
+		h->scatter_list[i] = kmalloc(sizeof(struct scatterlist) *
+						h->maxsgentries, GFP_KERNEL);
+		if (h->scatter_list[i] == NULL) {
+			dev_err(&h->pdev->dev, "could not allocate "
+				"s/g lists\n");
+			return -ENOMEM;
+		}
+	}
+	return 0;
+}
+
+static void cciss_free_scatterlists(ctlr_info_t *h)
+{
+	int i;
+
+	if (h->scatter_list) {
+		for (i = 0; i < h->nr_cmds; i++)
+			kfree(h->scatter_list[i]);
+		kfree(h->scatter_list);
+	}
+}
+
+static void cciss_free_cmd_pool(ctlr_info_t *h)
+{
+	kfree(h->cmd_pool_bits);
+	if (h->cmd_pool)
+		pci_free_consistent(h->pdev,
+			h->nr_cmds * sizeof(CommandList_struct),
+			h->cmd_pool, h->cmd_pool_dhandle);
+	if (h->errinfo_pool)
+		pci_free_consistent(h->pdev,
+			h->nr_cmds * sizeof(ErrorInfo_struct),
+			h->errinfo_pool, h->errinfo_pool_dhandle);
+}
+
+static int cciss_request_irq(ctlr_info_t *h,
+	irqreturn_t (*msixhandler)(int, void *),
+	irqreturn_t (*intxhandler)(int, void *))
+{
+	if (h->msix_vector || h->msi_vector) {
+		if (!request_irq(h->intr[PERF_MODE_INT], msixhandler,
+				IRQF_DISABLED, h->devname, h))
+			return 0;
+		dev_err(&h->pdev->dev, "Unable to get msi irq %d"
+			" for %s\n", h->intr[PERF_MODE_INT],
+			h->devname);
+		return -1;
+	}
+
+	if (!request_irq(h->intr[PERF_MODE_INT], intxhandler,
+			IRQF_DISABLED, h->devname, h))
+		return 0;
+	dev_err(&h->pdev->dev, "Unable to get irq %d for %s\n",
+		h->intr[PERF_MODE_INT], h->devname);
+	return -1;
+}
+
+static int __devinit cciss_kdump_soft_reset(ctlr_info_t *h)
+{
+	if (cciss_send_reset(h, CTLR_LUNID, CCISS_RESET_TYPE_CONTROLLER)) {
+		dev_warn(&h->pdev->dev, "Resetting array controller failed.\n");
+		return -EIO;
+	}
+
+	dev_info(&h->pdev->dev, "Waiting for board to soft reset.\n");
+	if (cciss_wait_for_board_state(h->pdev, h->vaddr, BOARD_NOT_READY)) {
+		dev_warn(&h->pdev->dev, "Soft reset had no effect.\n");
+		return -1;
+	}
+
+	dev_info(&h->pdev->dev, "Board reset, awaiting READY status.\n");
+	if (cciss_wait_for_board_state(h->pdev, h->vaddr, BOARD_READY)) {
+		dev_warn(&h->pdev->dev, "Board failed to become ready "
+			"after soft reset.\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+static void cciss_undo_allocations_after_kdump_soft_reset(ctlr_info_t *h)
+{
+	int ctlr = h->ctlr;
+
+	free_irq(h->intr[PERF_MODE_INT], h);
+#ifdef CONFIG_PCI_MSI
+	if (h->msix_vector)
+		pci_disable_msix(h->pdev);
+	else if (h->msi_vector)
+		pci_disable_msi(h->pdev);
+#endif /* CONFIG_PCI_MSI */
+	cciss_free_sg_chain_blocks(h->cmd_sg_list, h->nr_cmds);
+	cciss_free_scatterlists(h);
+	cciss_free_cmd_pool(h);
+	kfree(h->blockFetchTable);
+	if (h->reply_pool)
+		pci_free_consistent(h->pdev, h->max_commands * sizeof(__u64),
+				h->reply_pool, h->reply_pool_dhandle);
+	if (h->transtable)
+		iounmap(h->transtable);
+	if (h->cfgtable)
+		iounmap(h->cfgtable);
+	if (h->vaddr)
+		iounmap(h->vaddr);
+	unregister_blkdev(h->major, h->devname);
+	cciss_destroy_hba_sysfs_entry(h);
+	pci_release_regions(h->pdev);
+	kfree(h);
+	hba[ctlr] = NULL;
+}
+
 /*
  *  This is it.  Find all the controllers and register them.  I really hate
  *  stealing all these major device numbers.
@@ -4601,15 +4924,28 @@
 {
 	int i;
 	int j = 0;
-	int k = 0;
 	int rc;
+	int try_soft_reset = 0;
 	int dac, return_code;
 	InquiryData_struct *inq_buff;
 	ctlr_info_t *h;
+	unsigned long flags;
 
 	rc = cciss_init_reset_devices(pdev);
-	if (rc)
-		return rc;
+	if (rc) {
+		if (rc != -ENOTSUPP)
+			return rc;
+		/* If the reset fails in a particular way (it has no way to do
+		 * a proper hard reset, so returns -ENOTSUPP) we can try to do
+		 * a soft reset once we get the controller configured up to the
+		 * point that it can accept a command.
+		 */
+		try_soft_reset = 1;
+		rc = 0;
+	}
+
+reinit_after_soft_reset:
+
 	i = alloc_cciss_hba(pdev);
 	if (i < 0)
 		return -1;
@@ -4627,6 +4963,11 @@
 	sprintf(h->devname, "cciss%d", i);
 	h->ctlr = i;
 
+	if (cciss_tape_cmds < 2)
+		cciss_tape_cmds = 2;
+	if (cciss_tape_cmds > 16)
+		cciss_tape_cmds = 16;
+
 	init_completion(&h->scan_wait);
 
 	if (cciss_create_hba_sysfs_entry(h))
@@ -4662,62 +5003,20 @@
 
 	/* make sure the board interrupts are off */
 	h->access.set_intr_mask(h, CCISS_INTR_OFF);
-	if (h->msi_vector || h->msix_vector) {
-		if (request_irq(h->intr[PERF_MODE_INT],
-				do_cciss_msix_intr,
-				IRQF_DISABLED, h->devname, h)) {
-			dev_err(&h->pdev->dev, "Unable to get irq %d for %s\n",
-			       h->intr[PERF_MODE_INT], h->devname);
-			goto clean2;
-		}
-	} else {
-		if (request_irq(h->intr[PERF_MODE_INT], do_cciss_intx,
-				IRQF_DISABLED, h->devname, h)) {
-			dev_err(&h->pdev->dev, "Unable to get irq %d for %s\n",
-			       h->intr[PERF_MODE_INT], h->devname);
-			goto clean2;
-		}
-	}
+	rc = cciss_request_irq(h, do_cciss_msix_intr, do_cciss_intx);
+	if (rc)
+		goto clean2;
 
 	dev_info(&h->pdev->dev, "%s: <0x%x> at PCI %s IRQ %d%s using DAC\n",
 	       h->devname, pdev->device, pci_name(pdev),
 	       h->intr[PERF_MODE_INT], dac ? "" : " not");
 
-	h->cmd_pool_bits =
-	    kmalloc(DIV_ROUND_UP(h->nr_cmds, BITS_PER_LONG)
-			* sizeof(unsigned long), GFP_KERNEL);
-	h->cmd_pool = (CommandList_struct *)
-	    pci_alloc_consistent(h->pdev,
-		    h->nr_cmds * sizeof(CommandList_struct),
-		    &(h->cmd_pool_dhandle));
-	h->errinfo_pool = (ErrorInfo_struct *)
-	    pci_alloc_consistent(h->pdev,
-		    h->nr_cmds * sizeof(ErrorInfo_struct),
-		    &(h->errinfo_pool_dhandle));
-	if ((h->cmd_pool_bits == NULL)
-	    || (h->cmd_pool == NULL)
-	    || (h->errinfo_pool == NULL)) {
-		dev_err(&h->pdev->dev, "out of memory");
-		goto clean4;
-	}
-
-	/* Need space for temp scatter list */
-	h->scatter_list = kmalloc(h->max_commands *
-						sizeof(struct scatterlist *),
-						GFP_KERNEL);
-	if (!h->scatter_list)
+	if (cciss_allocate_cmd_pool(h))
 		goto clean4;
 
-	for (k = 0; k < h->nr_cmds; k++) {
-		h->scatter_list[k] = kmalloc(sizeof(struct scatterlist) *
-							h->maxsgentries,
-							GFP_KERNEL);
-		if (h->scatter_list[k] == NULL) {
-			dev_err(&h->pdev->dev,
-				"could not allocate s/g lists\n");
-			goto clean4;
-		}
-	}
+	if (cciss_allocate_scatterlists(h))
+		goto clean4;
+
 	h->cmd_sg_list = cciss_allocate_sg_chain_blocks(h,
 		h->chainsize, h->nr_cmds);
 	if (!h->cmd_sg_list && h->chainsize > 0)
@@ -4741,6 +5040,62 @@
 		h->gendisk[j] = NULL;
 	}
 
+	/* At this point, the controller is ready to take commands.
+	 * Now, if reset_devices and the hard reset didn't work, try
+	 * the soft reset and see if that works.
+	 */
+	if (try_soft_reset) {
+
+		/* This is kind of gross.  We may or may not get a completion
+		 * from the soft reset command, and if we do, then the value
+		 * from the fifo may or may not be valid.  So, we wait 10 secs
+		 * after the reset throwing away any completions we get during
+		 * that time.  Unregister the interrupt handler and register
+		 * fake ones to scoop up any residual completions.
+		 */
+		spin_lock_irqsave(&h->lock, flags);
+		h->access.set_intr_mask(h, CCISS_INTR_OFF);
+		spin_unlock_irqrestore(&h->lock, flags);
+		free_irq(h->intr[PERF_MODE_INT], h);
+		rc = cciss_request_irq(h, cciss_msix_discard_completions,
+					cciss_intx_discard_completions);
+		if (rc) {
+			dev_warn(&h->pdev->dev, "Failed to request_irq after "
+				"soft reset.\n");
+			goto clean4;
+		}
+
+		rc = cciss_kdump_soft_reset(h);
+		if (rc) {
+			dev_warn(&h->pdev->dev, "Soft reset failed.\n");
+			goto clean4;
+		}
+
+		dev_info(&h->pdev->dev, "Board READY.\n");
+		dev_info(&h->pdev->dev,
+			"Waiting for stale completions to drain.\n");
+		h->access.set_intr_mask(h, CCISS_INTR_ON);
+		msleep(10000);
+		h->access.set_intr_mask(h, CCISS_INTR_OFF);
+
+		rc = controller_reset_failed(h->cfgtable);
+		if (rc)
+			dev_info(&h->pdev->dev,
+				"Soft reset appears to have failed.\n");
+
+		/* since the controller's reset, we have to go back and re-init
+		 * everything.  Easiest to just forget what we've done and do it
+		 * all over again.
+		 */
+		cciss_undo_allocations_after_kdump_soft_reset(h);
+		try_soft_reset = 0;
+		if (rc)
+			/* don't go to clean4, we already unallocated */
+			return -ENODEV;
+
+		goto reinit_after_soft_reset;
+	}
+
 	cciss_scsi_setup(h);
 
 	/* Turn the interrupts on so we can service requests */
@@ -4775,21 +5130,9 @@
 	return 1;
 
 clean4:
-	kfree(h->cmd_pool_bits);
-	/* Free up sg elements */
-	for (k-- ; k >= 0; k--)
-		kfree(h->scatter_list[k]);
-	kfree(h->scatter_list);
+	cciss_free_cmd_pool(h);
+	cciss_free_scatterlists(h);
 	cciss_free_sg_chain_blocks(h->cmd_sg_list, h->nr_cmds);
-	if (h->cmd_pool)
-		pci_free_consistent(h->pdev,
-				    h->nr_cmds * sizeof(CommandList_struct),
-				    h->cmd_pool, h->cmd_pool_dhandle);
-	if (h->errinfo_pool)
-		pci_free_consistent(h->pdev,
-				    h->nr_cmds * sizeof(ErrorInfo_struct),
-				    h->errinfo_pool,
-				    h->errinfo_pool_dhandle);
 	free_irq(h->intr[PERF_MODE_INT], h);
 clean2:
 	unregister_blkdev(h->major, h->devname);
@@ -4887,16 +5230,16 @@
 	iounmap(h->cfgtable);
 	iounmap(h->vaddr);
 
-	pci_free_consistent(h->pdev, h->nr_cmds * sizeof(CommandList_struct),
-			    h->cmd_pool, h->cmd_pool_dhandle);
-	pci_free_consistent(h->pdev, h->nr_cmds * sizeof(ErrorInfo_struct),
-			    h->errinfo_pool, h->errinfo_pool_dhandle);
-	kfree(h->cmd_pool_bits);
+	cciss_free_cmd_pool(h);
 	/* Free up sg elements */
 	for (j = 0; j < h->nr_cmds; j++)
 		kfree(h->scatter_list[j]);
 	kfree(h->scatter_list);
 	cciss_free_sg_chain_blocks(h->cmd_sg_list, h->nr_cmds);
+	kfree(h->blockFetchTable);
+	if (h->reply_pool)
+		pci_free_consistent(h->pdev, h->max_commands * sizeof(__u64),
+				h->reply_pool, h->reply_pool_dhandle);
 	/*
 	 * Deliberately omit pci_disable_device(): it does something nasty to
 	 * Smart Array controllers that pci_enable_device does not undo
diff --git a/drivers/block/cciss.h b/drivers/block/cciss.h
index 554bbd9..16b4d58 100644
--- a/drivers/block/cciss.h
+++ b/drivers/block/cciss.h
@@ -200,7 +200,7 @@
  * the above.
  */
 #define CCISS_BOARD_READY_WAIT_SECS (120)
-#define CCISS_BOARD_NOT_READY_WAIT_SECS (10)
+#define CCISS_BOARD_NOT_READY_WAIT_SECS (100)
 #define CCISS_BOARD_READY_POLL_INTERVAL_MSECS (100)
 #define CCISS_BOARD_READY_ITERATIONS \
 	((CCISS_BOARD_READY_WAIT_SECS * 1000) / \
@@ -209,8 +209,9 @@
 	((CCISS_BOARD_NOT_READY_WAIT_SECS * 1000) / \
 		CCISS_BOARD_READY_POLL_INTERVAL_MSECS)
 #define CCISS_POST_RESET_PAUSE_MSECS (3000)
-#define CCISS_POST_RESET_NOOP_INTERVAL_MSECS (1000)
+#define CCISS_POST_RESET_NOOP_INTERVAL_MSECS (4000)
 #define CCISS_POST_RESET_NOOP_RETRIES (12)
+#define CCISS_POST_RESET_NOOP_TIMEOUT_MSECS (10000)
 
 /* 
 	Send the command to the hardware 
@@ -239,11 +240,13 @@
 	{ /* Turn interrupts on */
 		h->interrupts_enabled = 1;
 		writel(0, h->vaddr + SA5_REPLY_INTR_MASK_OFFSET);
+		(void) readl(h->vaddr + SA5_REPLY_INTR_MASK_OFFSET);
 	} else /* Turn them off */
 	{
 		h->interrupts_enabled = 0;
         	writel( SA5_INTR_OFF, 
 			h->vaddr + SA5_REPLY_INTR_MASK_OFFSET);
+		(void) readl(h->vaddr + SA5_REPLY_INTR_MASK_OFFSET);
 	}
 }
 /*
@@ -257,11 +260,13 @@
         { /* Turn interrupts on */
 		h->interrupts_enabled = 1;
                 writel(0, h->vaddr + SA5_REPLY_INTR_MASK_OFFSET);
+		(void) readl(h->vaddr + SA5_REPLY_INTR_MASK_OFFSET);
         } else /* Turn them off */
         {
 		h->interrupts_enabled = 0;
                 writel( SA5B_INTR_OFF,
                         h->vaddr + SA5_REPLY_INTR_MASK_OFFSET);
+		(void) readl(h->vaddr + SA5_REPLY_INTR_MASK_OFFSET);
         }
 }
 
@@ -271,10 +276,12 @@
 	if (val) { /* turn on interrupts */
 		h->interrupts_enabled = 1;
 		writel(0, h->vaddr + SA5_REPLY_INTR_MASK_OFFSET);
+		(void) readl(h->vaddr + SA5_REPLY_INTR_MASK_OFFSET);
 	} else {
 		h->interrupts_enabled = 0;
 		writel(SA5_PERF_INTR_OFF,
 				h->vaddr + SA5_REPLY_INTR_MASK_OFFSET);
+		(void) readl(h->vaddr + SA5_REPLY_INTR_MASK_OFFSET);
 	}
 }
 
diff --git a/drivers/block/cciss_cmd.h b/drivers/block/cciss_cmd.h
index cd441be..d9be6b4 100644
--- a/drivers/block/cciss_cmd.h
+++ b/drivers/block/cciss_cmd.h
@@ -53,6 +53,7 @@
 #define CFGTBL_ChangeReq        0x00000001l
 #define CFGTBL_AccCmds          0x00000001l
 #define DOORBELL_CTLR_RESET     0x00000004l
+#define DOORBELL_CTLR_RESET2    0x00000020l
 
 #define CFGTBL_Trans_Simple     0x00000002l
 #define CFGTBL_Trans_Performant 0x00000004l
@@ -142,6 +143,14 @@
 #define BMIC_CACHE_FLUSH 0xc2
 #define CCISS_CACHE_FLUSH 0x01	/* C2 was already being used by CCISS */
 
+#define CCISS_ABORT_MSG 0x00
+#define CCISS_RESET_MSG 0x01
+#define CCISS_RESET_TYPE_CONTROLLER 0x00
+#define CCISS_RESET_TYPE_BUS 0x01
+#define CCISS_RESET_TYPE_TARGET 0x03
+#define CCISS_RESET_TYPE_LUN 0x04
+#define CCISS_NOOP_MSG 0x03
+
 /* Command List Structure */
 #define CTLR_LUNID "\0\0\0\0\0\0\0\0"
 
@@ -235,6 +244,8 @@
   u8		   reserved[0x78 - 0x58];
   u32		   misc_fw_support; /* offset 0x78 */
 #define MISC_FW_DOORBELL_RESET (0x02)
+#define MISC_FW_DOORBELL_RESET2 (0x10)
+	u8	   driver_version[32];
 } CfgTable_struct;
 
 struct TransTable_struct {
diff --git a/drivers/block/cciss_scsi.c b/drivers/block/cciss_scsi.c
index df79380..6961002 100644
--- a/drivers/block/cciss_scsi.c
+++ b/drivers/block/cciss_scsi.c
@@ -84,7 +84,6 @@
 	.proc_name		= "cciss",
 	.proc_info		= cciss_scsi_proc_info,
 	.queuecommand		= cciss_scsi_queue_command,
-	.can_queue		= SCSI_CCISS_CAN_QUEUE,
 	.this_id		= 7,
 	.cmd_per_lun		= 1,
 	.use_clustering		= DISABLE_CLUSTERING,
@@ -108,16 +107,13 @@
 
 #pragma pack()
 
-#define CMD_STACK_SIZE (SCSI_CCISS_CAN_QUEUE * \
-		CCISS_MAX_SCSI_DEVS_PER_HBA + 2)
-			// plus two for init time usage
-
 #pragma pack(1)
 struct cciss_scsi_cmd_stack_t {
 	struct cciss_scsi_cmd_stack_elem_t *pool;
-	struct cciss_scsi_cmd_stack_elem_t *elem[CMD_STACK_SIZE];
+	struct cciss_scsi_cmd_stack_elem_t **elem;
 	dma_addr_t cmd_pool_handle;
 	int top;
+	int nelems;
 };
 #pragma pack()
 
@@ -191,7 +187,7 @@
 	sa = h->scsi_ctlr;
 	stk = &sa->cmd_stack; 
 	stk->top++;
-	if (stk->top >= CMD_STACK_SIZE) {
+	if (stk->top >= stk->nelems) {
 		dev_err(&h->pdev->dev,
 			"scsi_cmd_free called too many times.\n");
 		BUG();
@@ -206,13 +202,14 @@
 	struct cciss_scsi_cmd_stack_t *stk;
 	size_t size;
 
+	stk = &sa->cmd_stack;
+	stk->nelems = cciss_tape_cmds + 2;
 	sa->cmd_sg_list = cciss_allocate_sg_chain_blocks(h,
-		h->chainsize, CMD_STACK_SIZE);
+		h->chainsize, stk->nelems);
 	if (!sa->cmd_sg_list && h->chainsize > 0)
 		return -ENOMEM;
 
-	stk = &sa->cmd_stack; 
-	size = sizeof(struct cciss_scsi_cmd_stack_elem_t) * CMD_STACK_SIZE;
+	size = sizeof(struct cciss_scsi_cmd_stack_elem_t) * stk->nelems;
 
 	/* Check alignment, see cciss_cmd.h near CommandList_struct def. */
 	BUILD_BUG_ON((sizeof(*stk->pool) % COMMANDLIST_ALIGNMENT) != 0);
@@ -221,18 +218,23 @@
 		pci_alloc_consistent(h->pdev, size, &stk->cmd_pool_handle);
 
 	if (stk->pool == NULL) {
-		cciss_free_sg_chain_blocks(sa->cmd_sg_list, CMD_STACK_SIZE);
+		cciss_free_sg_chain_blocks(sa->cmd_sg_list, stk->nelems);
 		sa->cmd_sg_list = NULL;
 		return -ENOMEM;
 	}
-
-	for (i=0; i<CMD_STACK_SIZE; i++) {
+	stk->elem = kmalloc(sizeof(stk->elem[0]) * stk->nelems, GFP_KERNEL);
+	if (!stk->elem) {
+		pci_free_consistent(h->pdev, size, stk->pool,
+		stk->cmd_pool_handle);
+		return -1;
+	}
+	for (i = 0; i < stk->nelems; i++) {
 		stk->elem[i] = &stk->pool[i];
 		stk->elem[i]->busaddr = (__u32) (stk->cmd_pool_handle + 
 			(sizeof(struct cciss_scsi_cmd_stack_elem_t) * i));
 		stk->elem[i]->cmdindex = i;
 	}
-	stk->top = CMD_STACK_SIZE-1;
+	stk->top = stk->nelems-1;
 	return 0;
 }
 
@@ -245,16 +247,18 @@
 
 	sa = h->scsi_ctlr;
 	stk = &sa->cmd_stack; 
-	if (stk->top != CMD_STACK_SIZE-1) {
+	if (stk->top != stk->nelems-1) {
 		dev_warn(&h->pdev->dev,
 			"bug: %d scsi commands are still outstanding.\n",
-			CMD_STACK_SIZE - stk->top);
+			stk->nelems - stk->top);
 	}
-	size = sizeof(struct cciss_scsi_cmd_stack_elem_t) * CMD_STACK_SIZE;
+	size = sizeof(struct cciss_scsi_cmd_stack_elem_t) * stk->nelems;
 
 	pci_free_consistent(h->pdev, size, stk->pool, stk->cmd_pool_handle);
 	stk->pool = NULL;
-	cciss_free_sg_chain_blocks(sa->cmd_sg_list, CMD_STACK_SIZE);
+	cciss_free_sg_chain_blocks(sa->cmd_sg_list, stk->nelems);
+	kfree(stk->elem);
+	stk->elem = NULL;
 }
 
 #if 0
@@ -859,6 +863,7 @@
 	sh->io_port = 0;	// good enough?  FIXME, 
 	sh->n_io_port = 0;	// I don't think we use these two...
 	sh->this_id = SELF_SCSI_ID;  
+	sh->can_queue = cciss_tape_cmds;
 	sh->sg_tablesize = h->maxsgentries;
 	sh->max_cmd_len = MAX_COMMAND_SIZE;
 
diff --git a/drivers/block/cciss_scsi.h b/drivers/block/cciss_scsi.h
index 6d5822f..e71d986 100644
--- a/drivers/block/cciss_scsi.h
+++ b/drivers/block/cciss_scsi.h
@@ -36,13 +36,9 @@
 		   addressible natively, and may in fact turn
 		   out to be not scsi at all. */
 
-#define SCSI_CCISS_CAN_QUEUE 2
 
 /* 
 
-Note, cmd_per_lun could give us some trouble, so I'm setting it very low.
-Likewise, SCSI_CCISS_CAN_QUEUE is set very conservatively.
-
 If the upper scsi layer tries to track how many commands we have 
 outstanding, it will be operating under the misapprehension that it is
 the only one sending us requests.  We also have the block interface,
diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c
index c6828b68..09ef9a8 100644
--- a/drivers/block/drbd/drbd_actlog.c
+++ b/drivers/block/drbd/drbd_actlog.c
@@ -28,7 +28,7 @@
 #include "drbd_int.h"
 #include "drbd_wrappers.h"
 
-/* We maintain a trivial check sum in our on disk activity log.
+/* We maintain a trivial checksum in our on disk activity log.
  * With that we can ensure correct operation even when the storage
  * device might do a partial (last) sector write while losing power.
  */
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c
index 76210ba..f440a02 100644
--- a/drivers/block/drbd/drbd_bitmap.c
+++ b/drivers/block/drbd/drbd_bitmap.c
@@ -74,7 +74,7 @@
  *	as we are "attached" to a local disk, which at 32 GiB for 1PiB storage
  *	seems excessive.
  *
- *	We plan to reduce the amount of in-core bitmap pages by pageing them in
+ *	We plan to reduce the amount of in-core bitmap pages by paging them in
  *	and out against their on-disk location as necessary, but need to make
  *	sure we don't cause too much meta data IO, and must not deadlock in
  *	tight memory situations. This needs some more work.
@@ -200,7 +200,7 @@
  * we if bits have been cleared since last IO. */
 #define BM_PAGE_LAZY_WRITEOUT	28
 
-/* store_page_idx uses non-atomic assingment. It is only used directly after
+/* store_page_idx uses non-atomic assignment. It is only used directly after
  * allocating the page.  All other bm_set_page_* and bm_clear_page_* need to
  * use atomic bit manipulation, as set_out_of_sync (and therefore bitmap
  * changes) may happen from various contexts, and wait_on_bit/wake_up_bit
@@ -318,7 +318,7 @@
 /* word offset from start of bitmap to word number _in_page_
  * modulo longs per page
 #define MLPP(X) ((X) % (PAGE_SIZE/sizeof(long))
- hm, well, Philipp thinks gcc might not optimze the % into & (... - 1)
+ hm, well, Philipp thinks gcc might not optimize the % into & (... - 1)
  so do it explicitly:
  */
 #define MLPP(X) ((X) & ((PAGE_SIZE/sizeof(long))-1))
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index d871b14..ef2ceed 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -700,7 +700,7 @@
 	 * see drbd_endio_pri(). */
 	struct bio *private_bio;
 
-	struct hlist_node colision;
+	struct hlist_node collision;
 	sector_t sector;
 	unsigned int size;
 	unsigned int epoch; /* barrier_nr */
@@ -766,7 +766,7 @@
 
 struct drbd_epoch_entry {
 	struct drbd_work w;
-	struct hlist_node colision;
+	struct hlist_node collision;
 	struct drbd_epoch *epoch; /* for writes */
 	struct drbd_conf *mdev;
 	struct page *pages;
@@ -1129,6 +1129,8 @@
 	int rs_in_flight; /* resync sectors in flight (to proxy, in proxy and from proxy) */
 	int rs_planed;    /* resync sectors already planned */
 	atomic_t ap_in_flight; /* App sectors in flight (waiting for ack) */
+	int peer_max_bio_size;
+	int local_max_bio_size;
 };
 
 static inline struct drbd_conf *minor_to_mdev(unsigned int minor)
@@ -1218,8 +1220,6 @@
 extern void tl_release(struct drbd_conf *mdev, unsigned int barrier_nr,
 		       unsigned int set_size);
 extern void tl_clear(struct drbd_conf *mdev);
-enum drbd_req_event;
-extern void tl_restart(struct drbd_conf *mdev, enum drbd_req_event what);
 extern void _tl_add_barrier(struct drbd_conf *, struct drbd_tl_epoch *);
 extern void drbd_free_sock(struct drbd_conf *mdev);
 extern int drbd_send(struct drbd_conf *mdev, struct socket *sock,
@@ -1434,6 +1434,7 @@
  * hash table. */
 #define HT_SHIFT 8
 #define DRBD_MAX_BIO_SIZE (1U<<(9+HT_SHIFT))
+#define DRBD_MAX_BIO_SIZE_SAFE (1 << 12)       /* Works always = 4k */
 
 #define DRBD_MAX_SIZE_H80_PACKET (1 << 15) /* The old header only allows packets up to 32Kib data */
 
@@ -1518,9 +1519,9 @@
 extern char *ppsize(char *buf, unsigned long long size);
 extern sector_t drbd_new_dev_size(struct drbd_conf *, struct drbd_backing_dev *, int);
 enum determine_dev_size { dev_size_error = -1, unchanged = 0, shrunk = 1, grew = 2 };
-extern enum determine_dev_size drbd_determin_dev_size(struct drbd_conf *, enum dds_flags) __must_hold(local);
+extern enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *, enum dds_flags) __must_hold(local);
 extern void resync_after_online_grow(struct drbd_conf *);
-extern void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int) __must_hold(local);
+extern void drbd_reconsider_max_bio_size(struct drbd_conf *mdev);
 extern enum drbd_state_rv drbd_set_role(struct drbd_conf *mdev,
 					enum drbd_role new_role,
 					int force);
@@ -1828,6 +1829,8 @@
 		if (!forcedetach) {
 			if (__ratelimit(&drbd_ratelimit_state))
 				dev_err(DEV, "Local IO failed in %s.\n", where);
+			if (mdev->state.disk > D_INCONSISTENT)
+				_drbd_set_state(_NS(mdev, disk, D_INCONSISTENT), CS_HARD, NULL);
 			break;
 		}
 		/* NOTE fall through to detach case if forcedetach set */
@@ -2153,6 +2156,10 @@
 static inline void put_ldev(struct drbd_conf *mdev)
 {
 	int i = atomic_dec_return(&mdev->local_cnt);
+
+	/* This may be called from some endio handler,
+	 * so we must not sleep here. */
+
 	__release(local);
 	D_ASSERT(i >= 0);
 	if (i == 0) {
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 5b525c1..0358e55 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -745,6 +745,9 @@
 		  mdev->agreed_pro_version < 88)
 		rv = SS_NOT_SUPPORTED;
 
+	else if (ns.conn >= C_CONNECTED && ns.pdsk == D_UNKNOWN)
+		rv = SS_CONNECTED_OUTDATES;
+
 	return rv;
 }
 
@@ -1565,6 +1568,10 @@
 		put_ldev(mdev);
 	}
 
+	/* Notify peer that I had a local IO error, and did not detached.. */
+	if (os.disk == D_UP_TO_DATE && ns.disk == D_INCONSISTENT)
+		drbd_send_state(mdev);
+
 	/* Disks got bigger while they were detached */
 	if (ns.disk > D_NEGOTIATING && ns.pdsk > D_NEGOTIATING &&
 	    test_and_clear_bit(RESYNC_AFTER_NEG, &mdev->flags)) {
@@ -2064,7 +2071,7 @@
 {
 	struct p_sizes p;
 	sector_t d_size, u_size;
-	int q_order_type;
+	int q_order_type, max_bio_size;
 	int ok;
 
 	if (get_ldev_if_state(mdev, D_NEGOTIATING)) {
@@ -2072,17 +2079,20 @@
 		d_size = drbd_get_max_capacity(mdev->ldev);
 		u_size = mdev->ldev->dc.disk_size;
 		q_order_type = drbd_queue_order_type(mdev);
+		max_bio_size = queue_max_hw_sectors(mdev->ldev->backing_bdev->bd_disk->queue) << 9;
+		max_bio_size = min_t(int, max_bio_size, DRBD_MAX_BIO_SIZE);
 		put_ldev(mdev);
 	} else {
 		d_size = 0;
 		u_size = 0;
 		q_order_type = QUEUE_ORDERED_NONE;
+		max_bio_size = DRBD_MAX_BIO_SIZE; /* ... multiple BIOs per peer_request */
 	}
 
 	p.d_size = cpu_to_be64(d_size);
 	p.u_size = cpu_to_be64(u_size);
 	p.c_size = cpu_to_be64(trigger_reply ? 0 : drbd_get_capacity(mdev->this_bdev));
-	p.max_bio_size = cpu_to_be32(queue_max_hw_sectors(mdev->rq_queue) << 9);
+	p.max_bio_size = cpu_to_be32(max_bio_size);
 	p.queue_order_type = cpu_to_be16(q_order_type);
 	p.dds_flags = cpu_to_be16(flags);
 
@@ -2722,7 +2732,7 @@
 
 		/* double check digest, sometimes buffers have been modified in flight. */
 		if (dgs > 0 && dgs <= 64) {
-			/* 64 byte, 512 bit, is the larges digest size
+			/* 64 byte, 512 bit, is the largest digest size
 			 * currently supported in kernel crypto. */
 			unsigned char digest[64];
 			drbd_csum_bio(mdev, mdev->integrity_w_tfm, req->master_bio, digest);
@@ -3041,6 +3051,8 @@
 	mdev->agreed_pro_version = PRO_VERSION_MAX;
 	mdev->write_ordering = WO_bdev_flush;
 	mdev->resync_wenr = LC_FREE;
+	mdev->peer_max_bio_size = DRBD_MAX_BIO_SIZE_SAFE;
+	mdev->local_max_bio_size = DRBD_MAX_BIO_SIZE_SAFE;
 }
 
 void drbd_mdev_cleanup(struct drbd_conf *mdev)
@@ -3275,7 +3287,7 @@
 
 	drbd_release_ee_lists(mdev);
 
-	/* should be free'd on disconnect? */
+	/* should be freed on disconnect? */
 	kfree(mdev->ee_hash);
 	/*
 	mdev->ee_hash_s = 0;
@@ -3415,7 +3427,9 @@
 	q->backing_dev_info.congested_data = mdev;
 
 	blk_queue_make_request(q, drbd_make_request);
-	blk_queue_max_hw_sectors(q, DRBD_MAX_BIO_SIZE >> 9);
+	/* Setting the max_hw_sectors to an odd value of 8kibyte here
+	   This triggers a max_bio_size message upon first attach or connect */
+	blk_queue_max_hw_sectors(q, DRBD_MAX_BIO_SIZE_SAFE >> 8);
 	blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);
 	blk_queue_merge_bvec(q, drbd_merge_bvec);
 	q->queue_lock = &mdev->req_lock;
@@ -3627,7 +3641,8 @@
 	      /* `-- act_log->nr_elements <-- sync_conf.al_extents */
 	u32 bm_offset;         /* offset to the bitmap, from here */
 	u32 bm_bytes_per_bit;  /* BM_BLOCK_SIZE */
-	u32 reserved_u32[4];
+	u32 la_peer_max_bio_size;   /* last peer max_bio_size */
+	u32 reserved_u32[3];
 
 } __packed;
 
@@ -3668,6 +3683,7 @@
 	buffer->device_uuid = cpu_to_be64(mdev->ldev->md.device_uuid);
 
 	buffer->bm_offset = cpu_to_be32(mdev->ldev->md.bm_offset);
+	buffer->la_peer_max_bio_size = cpu_to_be32(mdev->peer_max_bio_size);
 
 	D_ASSERT(drbd_md_ss__(mdev, mdev->ldev) == mdev->ldev->md.md_offset);
 	sector = mdev->ldev->md.md_offset;
@@ -3751,6 +3767,15 @@
 	mdev->sync_conf.al_extents = be32_to_cpu(buffer->al_nr_extents);
 	bdev->md.device_uuid = be64_to_cpu(buffer->device_uuid);
 
+	spin_lock_irq(&mdev->req_lock);
+	if (mdev->state.conn < C_CONNECTED) {
+		int peer;
+		peer = be32_to_cpu(buffer->la_peer_max_bio_size);
+		peer = max_t(int, peer, DRBD_MAX_BIO_SIZE_SAFE);
+		mdev->peer_max_bio_size = peer;
+	}
+	spin_unlock_irq(&mdev->req_lock);
+
 	if (mdev->sync_conf.al_extents < 7)
 		mdev->sync_conf.al_extents = 127;
 
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 03b29f7..515bcd9 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -272,9 +272,28 @@
 {
 	struct drbd_conf *mdev = (struct drbd_conf *)data;
 	enum drbd_disk_state nps;
+	union drbd_state ns;
 
 	nps = drbd_try_outdate_peer(mdev);
-	drbd_request_state(mdev, NS(pdsk, nps));
+
+	/* Not using
+	   drbd_request_state(mdev, NS(pdsk, nps));
+	   here, because we might were able to re-establish the connection
+	   in the meantime. This can only partially be solved in the state's
+	   engine is_valid_state() and is_valid_state_transition()
+	   functions.
+
+	   nps can be D_INCONSISTENT, D_OUTDATED or D_UNKNOWN.
+	   pdsk == D_INCONSISTENT while conn >= C_CONNECTED is valid,
+	   therefore we have to have the pre state change check here.
+	*/
+	spin_lock_irq(&mdev->req_lock);
+	ns = mdev->state;
+	if (ns.conn < C_WF_REPORT_PARAMS) {
+		ns.pdsk = nps;
+		_drbd_set_state(mdev, ns, CS_VERBOSE, NULL);
+	}
+	spin_unlock_irq(&mdev->req_lock);
 
 	return 0;
 }
@@ -577,7 +596,7 @@
  * Returns 0 on success, negative return values indicate errors.
  * You should call drbd_md_sync() after calling this function.
  */
-enum determine_dev_size drbd_determin_dev_size(struct drbd_conf *mdev, enum dds_flags flags) __must_hold(local)
+enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds_flags flags) __must_hold(local)
 {
 	sector_t prev_first_sect, prev_size; /* previous meta location */
 	sector_t la_size;
@@ -773,30 +792,78 @@
 	return 0;
 }
 
-void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int max_bio_size) __must_hold(local)
+static void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int max_bio_size)
 {
 	struct request_queue * const q = mdev->rq_queue;
-	struct request_queue * const b = mdev->ldev->backing_bdev->bd_disk->queue;
-	int max_segments = mdev->ldev->dc.max_bio_bvecs;
-	int max_hw_sectors = min(queue_max_hw_sectors(b), max_bio_size >> 9);
+	int max_hw_sectors = max_bio_size >> 9;
+	int max_segments = 0;
+
+	if (get_ldev_if_state(mdev, D_ATTACHING)) {
+		struct request_queue * const b = mdev->ldev->backing_bdev->bd_disk->queue;
+
+		max_hw_sectors = min(queue_max_hw_sectors(b), max_bio_size >> 9);
+		max_segments = mdev->ldev->dc.max_bio_bvecs;
+		put_ldev(mdev);
+	}
 
 	blk_queue_logical_block_size(q, 512);
 	blk_queue_max_hw_sectors(q, max_hw_sectors);
 	/* This is the workaround for "bio would need to, but cannot, be split" */
 	blk_queue_max_segments(q, max_segments ? max_segments : BLK_MAX_SEGMENTS);
 	blk_queue_segment_boundary(q, PAGE_CACHE_SIZE-1);
-	blk_queue_stack_limits(q, b);
 
-	dev_info(DEV, "max BIO size = %u\n", queue_max_hw_sectors(q) << 9);
+	if (get_ldev_if_state(mdev, D_ATTACHING)) {
+		struct request_queue * const b = mdev->ldev->backing_bdev->bd_disk->queue;
 
-	if (q->backing_dev_info.ra_pages != b->backing_dev_info.ra_pages) {
-		dev_info(DEV, "Adjusting my ra_pages to backing device's (%lu -> %lu)\n",
-		     q->backing_dev_info.ra_pages,
-		     b->backing_dev_info.ra_pages);
-		q->backing_dev_info.ra_pages = b->backing_dev_info.ra_pages;
+		blk_queue_stack_limits(q, b);
+
+		if (q->backing_dev_info.ra_pages != b->backing_dev_info.ra_pages) {
+			dev_info(DEV, "Adjusting my ra_pages to backing device's (%lu -> %lu)\n",
+				 q->backing_dev_info.ra_pages,
+				 b->backing_dev_info.ra_pages);
+			q->backing_dev_info.ra_pages = b->backing_dev_info.ra_pages;
+		}
+		put_ldev(mdev);
 	}
 }
 
+void drbd_reconsider_max_bio_size(struct drbd_conf *mdev)
+{
+	int now, new, local, peer;
+
+	now = queue_max_hw_sectors(mdev->rq_queue) << 9;
+	local = mdev->local_max_bio_size; /* Eventually last known value, from volatile memory */
+	peer = mdev->peer_max_bio_size; /* Eventually last known value, from meta data */
+
+	if (get_ldev_if_state(mdev, D_ATTACHING)) {
+		local = queue_max_hw_sectors(mdev->ldev->backing_bdev->bd_disk->queue) << 9;
+		mdev->local_max_bio_size = local;
+		put_ldev(mdev);
+	}
+
+	/* We may ignore peer limits if the peer is modern enough.
+	   Because new from 8.3.8 onwards the peer can use multiple
+	   BIOs for a single peer_request */
+	if (mdev->state.conn >= C_CONNECTED) {
+		if (mdev->agreed_pro_version < 94)
+			peer = mdev->peer_max_bio_size;
+		else if (mdev->agreed_pro_version == 94)
+			peer = DRBD_MAX_SIZE_H80_PACKET;
+		else /* drbd 8.3.8 onwards */
+			peer = DRBD_MAX_BIO_SIZE;
+	}
+
+	new = min_t(int, local, peer);
+
+	if (mdev->state.role == R_PRIMARY && new < now)
+		dev_err(DEV, "ASSERT FAILED new < now; (%d < %d)\n", new, now);
+
+	if (new != now)
+		dev_info(DEV, "max BIO size = %u\n", new);
+
+	drbd_setup_queue_param(mdev, new);
+}
+
 /* serialize deconfig (worker exiting, doing cleanup)
  * and reconfig (drbdsetup disk, drbdsetup net)
  *
@@ -865,7 +932,6 @@
 	struct block_device *bdev;
 	struct lru_cache *resync_lru = NULL;
 	union drbd_state ns, os;
-	unsigned int max_bio_size;
 	enum drbd_state_rv rv;
 	int cp_discovered = 0;
 	int logical_block_size;
@@ -1117,20 +1183,7 @@
 	mdev->read_cnt = 0;
 	mdev->writ_cnt = 0;
 
-	max_bio_size = DRBD_MAX_BIO_SIZE;
-	if (mdev->state.conn == C_CONNECTED) {
-		/* We are Primary, Connected, and now attach a new local
-		 * backing store. We must not increase the user visible maximum
-		 * bio size on this device to something the peer may not be
-		 * able to handle. */
-		if (mdev->agreed_pro_version < 94)
-			max_bio_size = queue_max_hw_sectors(mdev->rq_queue) << 9;
-		else if (mdev->agreed_pro_version == 94)
-			max_bio_size = DRBD_MAX_SIZE_H80_PACKET;
-		/* else: drbd 8.3.9 and later, stay with default */
-	}
-
-	drbd_setup_queue_param(mdev, max_bio_size);
+	drbd_reconsider_max_bio_size(mdev);
 
 	/* If I am currently not R_PRIMARY,
 	 * but meta data primary indicator is set,
@@ -1152,7 +1205,7 @@
 	    !drbd_md_test_flag(mdev->ldev, MDF_CONNECTED_IND))
 		set_bit(USE_DEGR_WFC_T, &mdev->flags);
 
-	dd = drbd_determin_dev_size(mdev, 0);
+	dd = drbd_determine_dev_size(mdev, 0);
 	if (dd == dev_size_error) {
 		retcode = ERR_NOMEM_BITMAP;
 		goto force_diskless_dec;
@@ -1281,11 +1334,19 @@
 static int drbd_nl_detach(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
 			  struct drbd_nl_cfg_reply *reply)
 {
+	enum drbd_ret_code retcode;
+	int ret;
 	drbd_suspend_io(mdev); /* so no-one is stuck in drbd_al_begin_io */
-	reply->ret_code = drbd_request_state(mdev, NS(disk, D_DISKLESS));
-	if (mdev->state.disk == D_DISKLESS)
-		wait_event(mdev->misc_wait, !atomic_read(&mdev->local_cnt));
+	retcode = drbd_request_state(mdev, NS(disk, D_FAILED));
+	/* D_FAILED will transition to DISKLESS. */
+	ret = wait_event_interruptible(mdev->misc_wait,
+			mdev->state.disk != D_FAILED);
 	drbd_resume_io(mdev);
+	if ((int)retcode == (int)SS_IS_DISKLESS)
+		retcode = SS_NOTHING_TO_DO;
+	if (ret)
+		retcode = ERR_INTR;
+	reply->ret_code = retcode;
 	return 0;
 }
 
@@ -1658,7 +1719,7 @@
 
 	mdev->ldev->dc.disk_size = (sector_t)rs.resize_size;
 	ddsf = (rs.resize_force ? DDSF_FORCED : 0) | (rs.no_resync ? DDSF_NO_RESYNC : 0);
-	dd = drbd_determin_dev_size(mdev, ddsf);
+	dd = drbd_determine_dev_size(mdev, ddsf);
 	drbd_md_sync(mdev);
 	put_ldev(mdev);
 	if (dd == dev_size_error) {
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index fd26666..25d32c5 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -333,7 +333,7 @@
 	if (!page)
 		goto fail;
 
-	INIT_HLIST_NODE(&e->colision);
+	INIT_HLIST_NODE(&e->collision);
 	e->epoch = NULL;
 	e->mdev = mdev;
 	e->pages = page;
@@ -356,7 +356,7 @@
 		kfree(e->digest);
 	drbd_pp_free(mdev, e->pages, is_net);
 	D_ASSERT(atomic_read(&e->pending_bios) == 0);
-	D_ASSERT(hlist_unhashed(&e->colision));
+	D_ASSERT(hlist_unhashed(&e->collision));
 	mempool_free(e, drbd_ee_mempool);
 }
 
@@ -787,7 +787,7 @@
 		}
 
 		if (sock && msock) {
-			schedule_timeout_interruptible(HZ / 10);
+			schedule_timeout_interruptible(mdev->net_conf->ping_timeo*HZ/10);
 			ok = drbd_socket_okay(mdev, &sock);
 			ok = drbd_socket_okay(mdev, &msock) && ok;
 			if (ok)
@@ -899,11 +899,6 @@
 
 	drbd_thread_start(&mdev->asender);
 
-	if (mdev->agreed_pro_version < 95 && get_ldev(mdev)) {
-		drbd_setup_queue_param(mdev, DRBD_MAX_SIZE_H80_PACKET);
-		put_ldev(mdev);
-	}
-
 	if (drbd_send_protocol(mdev) == -1)
 		return -1;
 	drbd_send_sync_param(mdev, &mdev->sync_conf);
@@ -1418,7 +1413,7 @@
 	sector_t sector = e->sector;
 	int ok;
 
-	D_ASSERT(hlist_unhashed(&e->colision));
+	D_ASSERT(hlist_unhashed(&e->collision));
 
 	if (likely((e->flags & EE_WAS_ERROR) == 0)) {
 		drbd_set_in_sync(mdev, sector, e->size);
@@ -1487,7 +1482,7 @@
 		return false;
 	}
 
-	/* hlist_del(&req->colision) is done in _req_may_be_done, to avoid
+	/* hlist_del(&req->collision) is done in _req_may_be_done, to avoid
 	 * special casing it there for the various failure cases.
 	 * still no race with drbd_fail_pending_reads */
 	ok = recv_dless_read(mdev, req, sector, data_size);
@@ -1558,11 +1553,11 @@
 	 * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right.  */
 	if (mdev->net_conf->two_primaries) {
 		spin_lock_irq(&mdev->req_lock);
-		D_ASSERT(!hlist_unhashed(&e->colision));
-		hlist_del_init(&e->colision);
+		D_ASSERT(!hlist_unhashed(&e->collision));
+		hlist_del_init(&e->collision);
 		spin_unlock_irq(&mdev->req_lock);
 	} else {
-		D_ASSERT(hlist_unhashed(&e->colision));
+		D_ASSERT(hlist_unhashed(&e->collision));
 	}
 
 	drbd_may_finish_epoch(mdev, e->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0));
@@ -1579,8 +1574,8 @@
 	ok = drbd_send_ack(mdev, P_DISCARD_ACK, e);
 
 	spin_lock_irq(&mdev->req_lock);
-	D_ASSERT(!hlist_unhashed(&e->colision));
-	hlist_del_init(&e->colision);
+	D_ASSERT(!hlist_unhashed(&e->collision));
+	hlist_del_init(&e->collision);
 	spin_unlock_irq(&mdev->req_lock);
 
 	dec_unacked(mdev);
@@ -1755,7 +1750,7 @@
 
 		spin_lock_irq(&mdev->req_lock);
 
-		hlist_add_head(&e->colision, ee_hash_slot(mdev, sector));
+		hlist_add_head(&e->collision, ee_hash_slot(mdev, sector));
 
 #define OVERLAPS overlaps(i->sector, i->size, sector, size)
 		slot = tl_hash_slot(mdev, sector);
@@ -1765,7 +1760,7 @@
 			int have_conflict = 0;
 			prepare_to_wait(&mdev->misc_wait, &wait,
 				TASK_INTERRUPTIBLE);
-			hlist_for_each_entry(i, n, slot, colision) {
+			hlist_for_each_entry(i, n, slot, collision) {
 				if (OVERLAPS) {
 					/* only ALERT on first iteration,
 					 * we may be woken up early... */
@@ -1804,7 +1799,7 @@
 			}
 
 			if (signal_pending(current)) {
-				hlist_del_init(&e->colision);
+				hlist_del_init(&e->collision);
 
 				spin_unlock_irq(&mdev->req_lock);
 
@@ -1862,7 +1857,7 @@
 	dev_err(DEV, "submit failed, triggering re-connect\n");
 	spin_lock_irq(&mdev->req_lock);
 	list_del(&e->w.list);
-	hlist_del_init(&e->colision);
+	hlist_del_init(&e->collision);
 	spin_unlock_irq(&mdev->req_lock);
 	if (e->flags & EE_CALL_AL_COMPLETE_IO)
 		drbd_al_complete_io(mdev, e->sector);
@@ -2916,12 +2911,6 @@
 	return false;
 }
 
-static void drbd_setup_order_type(struct drbd_conf *mdev, int peer)
-{
-	/* sorry, we currently have no working implementation
-	 * of distributed TCQ */
-}
-
 /* warn if the arguments differ by more than 12.5% */
 static void warn_if_differ_considerably(struct drbd_conf *mdev,
 	const char *s, sector_t a, sector_t b)
@@ -2939,7 +2928,6 @@
 {
 	struct p_sizes *p = &mdev->data.rbuf.sizes;
 	enum determine_dev_size dd = unchanged;
-	unsigned int max_bio_size;
 	sector_t p_size, p_usize, my_usize;
 	int ldsc = 0; /* local disk size changed */
 	enum dds_flags ddsf;
@@ -2994,7 +2982,7 @@
 
 	ddsf = be16_to_cpu(p->dds_flags);
 	if (get_ldev(mdev)) {
-		dd = drbd_determin_dev_size(mdev, ddsf);
+		dd = drbd_determine_dev_size(mdev, ddsf);
 		put_ldev(mdev);
 		if (dd == dev_size_error)
 			return false;
@@ -3004,23 +2992,15 @@
 		drbd_set_my_capacity(mdev, p_size);
 	}
 
+	mdev->peer_max_bio_size = be32_to_cpu(p->max_bio_size);
+	drbd_reconsider_max_bio_size(mdev);
+
 	if (get_ldev(mdev)) {
 		if (mdev->ldev->known_size != drbd_get_capacity(mdev->ldev->backing_bdev)) {
 			mdev->ldev->known_size = drbd_get_capacity(mdev->ldev->backing_bdev);
 			ldsc = 1;
 		}
 
-		if (mdev->agreed_pro_version < 94)
-			max_bio_size = be32_to_cpu(p->max_bio_size);
-		else if (mdev->agreed_pro_version == 94)
-			max_bio_size = DRBD_MAX_SIZE_H80_PACKET;
-		else /* drbd 8.3.8 onwards */
-			max_bio_size = DRBD_MAX_BIO_SIZE;
-
-		if (max_bio_size != queue_max_hw_sectors(mdev->rq_queue) << 9)
-			drbd_setup_queue_param(mdev, max_bio_size);
-
-		drbd_setup_order_type(mdev, be16_to_cpu(p->queue_order_type));
 		put_ldev(mdev);
 	}
 
@@ -4275,7 +4255,7 @@
 	struct hlist_node *n;
 	struct drbd_request *req;
 
-	hlist_for_each_entry(req, n, slot, colision) {
+	hlist_for_each_entry(req, n, slot, collision) {
 		if ((unsigned long)req == (unsigned long)id) {
 			if (req->sector != sector) {
 				dev_err(DEV, "_ack_id_to_req: found req %p but it has "
@@ -4554,6 +4534,7 @@
 	int received = 0;
 	int expect   = sizeof(struct p_header80);
 	int empty;
+	int ping_timeout_active = 0;
 
 	sprintf(current->comm, "drbd%d_asender", mdev_to_minor(mdev));
 
@@ -4566,6 +4547,7 @@
 			ERR_IF(!drbd_send_ping(mdev)) goto reconnect;
 			mdev->meta.socket->sk->sk_rcvtimeo =
 				mdev->net_conf->ping_timeo*HZ/10;
+			ping_timeout_active = 1;
 		}
 
 		/* conditionally cork;
@@ -4620,8 +4602,7 @@
 			dev_err(DEV, "meta connection shut down by peer.\n");
 			goto reconnect;
 		} else if (rv == -EAGAIN) {
-			if (mdev->meta.socket->sk->sk_rcvtimeo ==
-			    mdev->net_conf->ping_timeo*HZ/10) {
+			if (ping_timeout_active) {
 				dev_err(DEV, "PingAck did not arrive in time.\n");
 				goto reconnect;
 			}
@@ -4660,6 +4641,11 @@
 			if (!cmd->process(mdev, h))
 				goto reconnect;
 
+			/* the idle_timeout (ping-int)
+			 * has been restored in got_PingAck() */
+			if (cmd == get_asender_cmd(P_PING_ACK))
+				ping_timeout_active = 0;
+
 			buf	 = h;
 			received = 0;
 			expect	 = sizeof(struct p_header80);
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index 5c0c8be..3424d67 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -163,7 +163,7 @@
 		 * they must have been failed on the spot */
 #define OVERLAPS overlaps(sector, size, i->sector, i->size)
 		slot = tl_hash_slot(mdev, sector);
-		hlist_for_each_entry(i, n, slot, colision) {
+		hlist_for_each_entry(i, n, slot, collision) {
 			if (OVERLAPS) {
 				dev_alert(DEV, "LOGIC BUG: completed: %p %llus +%u; "
 				      "other: %p %llus +%u\n",
@@ -187,7 +187,7 @@
 #undef OVERLAPS
 #define OVERLAPS overlaps(sector, size, e->sector, e->size)
 		slot = ee_hash_slot(mdev, req->sector);
-		hlist_for_each_entry(e, n, slot, colision) {
+		hlist_for_each_entry(e, n, slot, collision) {
 			if (OVERLAPS) {
 				wake_up(&mdev->misc_wait);
 				break;
@@ -260,8 +260,8 @@
 
 		/* remove the request from the conflict detection
 		 * respective block_id verification hash */
-		if (!hlist_unhashed(&req->colision))
-			hlist_del(&req->colision);
+		if (!hlist_unhashed(&req->collision))
+			hlist_del(&req->collision);
 		else
 			D_ASSERT((s & (RQ_NET_MASK & ~RQ_NET_DONE)) == 0);
 
@@ -329,7 +329,7 @@
 	struct hlist_node *n;
 	struct hlist_head *slot;
 
-	D_ASSERT(hlist_unhashed(&req->colision));
+	D_ASSERT(hlist_unhashed(&req->collision));
 
 	if (!get_net_conf(mdev))
 		return 0;
@@ -341,7 +341,7 @@
 
 #define OVERLAPS overlaps(i->sector, i->size, sector, size)
 	slot = tl_hash_slot(mdev, sector);
-	hlist_for_each_entry(i, n, slot, colision) {
+	hlist_for_each_entry(i, n, slot, collision) {
 		if (OVERLAPS) {
 			dev_alert(DEV, "%s[%u] Concurrent local write detected! "
 			      "[DISCARD L] new: %llus +%u; "
@@ -359,7 +359,7 @@
 #undef OVERLAPS
 #define OVERLAPS overlaps(e->sector, e->size, sector, size)
 		slot = ee_hash_slot(mdev, sector);
-		hlist_for_each_entry(e, n, slot, colision) {
+		hlist_for_each_entry(e, n, slot, collision) {
 			if (OVERLAPS) {
 				dev_alert(DEV, "%s[%u] Concurrent remote write detected!"
 				      " [DISCARD L] new: %llus +%u; "
@@ -491,7 +491,7 @@
 
 		/* so we can verify the handle in the answer packet
 		 * corresponding hlist_del is in _req_may_be_done() */
-		hlist_add_head(&req->colision, ar_hash_slot(mdev, req->sector));
+		hlist_add_head(&req->collision, ar_hash_slot(mdev, req->sector));
 
 		set_bit(UNPLUG_REMOTE, &mdev->flags);
 
@@ -507,7 +507,7 @@
 		/* assert something? */
 		/* from drbd_make_request_common only */
 
-		hlist_add_head(&req->colision, tl_hash_slot(mdev, req->sector));
+		hlist_add_head(&req->collision, tl_hash_slot(mdev, req->sector));
 		/* corresponding hlist_del is in _req_may_be_done() */
 
 		/* NOTE
@@ -1033,7 +1033,7 @@
 	err = 0;
 
 fail_free_complete:
-	if (rw == WRITE && local)
+	if (req->rq_state & RQ_IN_ACT_LOG)
 		drbd_al_complete_io(mdev, sector);
 fail_and_free_req:
 	if (local) {
diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h
index 32e2c3e..68a234a 100644
--- a/drivers/block/drbd/drbd_req.h
+++ b/drivers/block/drbd/drbd_req.h
@@ -256,7 +256,7 @@
 	struct hlist_node *n;
 	struct drbd_request *req;
 
-	hlist_for_each_entry(req, n, slot, colision) {
+	hlist_for_each_entry(req, n, slot, collision) {
 		if ((unsigned long)req == (unsigned long)id) {
 			D_ASSERT(req->sector == sector);
 			return req;
@@ -291,7 +291,7 @@
 		req->epoch       = 0;
 		req->sector      = bio_src->bi_sector;
 		req->size        = bio_src->bi_size;
-		INIT_HLIST_NODE(&req->colision);
+		INIT_HLIST_NODE(&req->collision);
 		INIT_LIST_HEAD(&req->tl_requests);
 		INIT_LIST_HEAD(&req->w.list);
 	}
@@ -323,6 +323,7 @@
 extern void complete_master_bio(struct drbd_conf *mdev,
 		struct bio_and_error *m);
 extern void request_timer_fn(unsigned long data);
+extern void tl_restart(struct drbd_conf *mdev, enum drbd_req_event what);
 
 /* use this if you don't want to deal with calling complete_master_bio()
  * outside the spinlock, e.g. when walking some list on cleanup. */
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index f7e6c92..4d76b06 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -126,7 +126,7 @@
 	list_del(&e->w.list); /* has been on active_ee or sync_ee */
 	list_add_tail(&e->w.list, &mdev->done_ee);
 
-	/* No hlist_del_init(&e->colision) here, we did not send the Ack yet,
+	/* No hlist_del_init(&e->collision) here, we did not send the Ack yet,
 	 * neither did we wake possibly waiting conflicting requests.
 	 * done from "drbd_process_done_ee" within the appropriate w.cb
 	 * (e_end_block/e_end_resync_block) or from _drbd_clear_done_ee */
@@ -297,42 +297,48 @@
 	crypto_hash_final(&desc, digest);
 }
 
-static int w_e_send_csum(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+/* TODO merge common code with w_e_end_ov_req */
+int w_e_send_csum(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
 {
 	struct drbd_epoch_entry *e = container_of(w, struct drbd_epoch_entry, w);
 	int digest_size;
 	void *digest;
-	int ok;
+	int ok = 1;
 
 	D_ASSERT(e->block_id == DRBD_MAGIC + 0xbeef);
 
-	if (unlikely(cancel)) {
+	if (unlikely(cancel))
+		goto out;
+
+	if (likely((e->flags & EE_WAS_ERROR) != 0))
+		goto out;
+
+	digest_size = crypto_hash_digestsize(mdev->csums_tfm);
+	digest = kmalloc(digest_size, GFP_NOIO);
+	if (digest) {
+		sector_t sector = e->sector;
+		unsigned int size = e->size;
+		drbd_csum_ee(mdev, mdev->csums_tfm, e, digest);
+		/* Free e and pages before send.
+		 * In case we block on congestion, we could otherwise run into
+		 * some distributed deadlock, if the other side blocks on
+		 * congestion as well, because our receiver blocks in
+		 * drbd_pp_alloc due to pp_in_use > max_buffers. */
 		drbd_free_ee(mdev, e);
-		return 1;
+		e = NULL;
+		inc_rs_pending(mdev);
+		ok = drbd_send_drequest_csum(mdev, sector, size,
+					     digest, digest_size,
+					     P_CSUM_RS_REQUEST);
+		kfree(digest);
+	} else {
+		dev_err(DEV, "kmalloc() of digest failed.\n");
+		ok = 0;
 	}
 
-	if (likely((e->flags & EE_WAS_ERROR) == 0)) {
-		digest_size = crypto_hash_digestsize(mdev->csums_tfm);
-		digest = kmalloc(digest_size, GFP_NOIO);
-		if (digest) {
-			drbd_csum_ee(mdev, mdev->csums_tfm, e, digest);
-
-			inc_rs_pending(mdev);
-			ok = drbd_send_drequest_csum(mdev,
-						     e->sector,
-						     e->size,
-						     digest,
-						     digest_size,
-						     P_CSUM_RS_REQUEST);
-			kfree(digest);
-		} else {
-			dev_err(DEV, "kmalloc() of digest failed.\n");
-			ok = 0;
-		}
-	} else
-		ok = 1;
-
-	drbd_free_ee(mdev, e);
+out:
+	if (e)
+		drbd_free_ee(mdev, e);
 
 	if (unlikely(!ok))
 		dev_err(DEV, "drbd_send_drequest(..., csum) failed\n");
@@ -834,7 +840,7 @@
 			const int ratio =
 				(t == 0)     ? 0 :
 			(t < 100000) ? ((s*100)/t) : (s/(t/100));
-			dev_info(DEV, "%u %% had equal check sums, eliminated: %luK; "
+			dev_info(DEV, "%u %% had equal checksums, eliminated: %luK; "
 			     "transferred %luK total %luK\n",
 			     ratio,
 			     Bit2KB(mdev->rs_same_csum),
@@ -1071,9 +1077,12 @@
 	return ok;
 }
 
+/* TODO merge common code with w_e_send_csum */
 int w_e_end_ov_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
 {
 	struct drbd_epoch_entry *e = container_of(w, struct drbd_epoch_entry, w);
+	sector_t sector = e->sector;
+	unsigned int size = e->size;
 	int digest_size;
 	void *digest;
 	int ok = 1;
@@ -1093,17 +1102,25 @@
 	else
 		memset(digest, 0, digest_size);
 
+	/* Free e and pages before send.
+	 * In case we block on congestion, we could otherwise run into
+	 * some distributed deadlock, if the other side blocks on
+	 * congestion as well, because our receiver blocks in
+	 * drbd_pp_alloc due to pp_in_use > max_buffers. */
+	drbd_free_ee(mdev, e);
+	e = NULL;
 	inc_rs_pending(mdev);
-	ok = drbd_send_drequest_csum(mdev, e->sector, e->size,
-				     digest, digest_size, P_OV_REPLY);
+	ok = drbd_send_drequest_csum(mdev, sector, size,
+				     digest, digest_size,
+				     P_OV_REPLY);
 	if (!ok)
 		dec_rs_pending(mdev);
 	kfree(digest);
 
 out:
-	drbd_free_ee(mdev, e);
+	if (e)
+		drbd_free_ee(mdev, e);
 	dec_unacked(mdev);
-
 	return ok;
 }
 
@@ -1122,8 +1139,10 @@
 {
 	struct drbd_epoch_entry *e = container_of(w, struct drbd_epoch_entry, w);
 	struct digest_info *di;
-	int digest_size;
 	void *digest;
+	sector_t sector = e->sector;
+	unsigned int size = e->size;
+	int digest_size;
 	int ok, eq = 0;
 
 	if (unlikely(cancel)) {
@@ -1153,16 +1172,21 @@
 		}
 	}
 
-	dec_unacked(mdev);
+		/* Free e and pages before send.
+		 * In case we block on congestion, we could otherwise run into
+		 * some distributed deadlock, if the other side blocks on
+		 * congestion as well, because our receiver blocks in
+		 * drbd_pp_alloc due to pp_in_use > max_buffers. */
+	drbd_free_ee(mdev, e);
 	if (!eq)
-		drbd_ov_oos_found(mdev, e->sector, e->size);
+		drbd_ov_oos_found(mdev, sector, size);
 	else
 		ov_oos_print(mdev);
 
-	ok = drbd_send_ack_ex(mdev, P_OV_RESULT, e->sector, e->size,
+	ok = drbd_send_ack_ex(mdev, P_OV_RESULT, sector, size,
 			      eq ? ID_IN_SYNC : ID_OUT_OF_SYNC);
 
-	drbd_free_ee(mdev, e);
+	dec_unacked(mdev);
 
 	--mdev->ov_left;
 
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index a076a14..c59a672 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -1658,7 +1658,7 @@
 	struct kobject *kobj;
 
 	mutex_lock(&loop_devices_mutex);
-	lo = loop_init_one(dev & MINORMASK);
+	lo = loop_init_one(MINOR(dev) >> part_shift);
 	kobj = lo ? get_disk(lo->lo_disk) : ERR_PTR(-ENOMEM);
 	mutex_unlock(&loop_devices_mutex);
 
@@ -1691,15 +1691,18 @@
 	if (max_part > 0)
 		part_shift = fls(max_part);
 
+	if ((1UL << part_shift) > DISK_MAX_PARTS)
+		return -EINVAL;
+
 	if (max_loop > 1UL << (MINORBITS - part_shift))
 		return -EINVAL;
 
 	if (max_loop) {
 		nr = max_loop;
-		range = max_loop;
+		range = max_loop << part_shift;
 	} else {
 		nr = 8;
-		range = 1UL << (MINORBITS - part_shift);
+		range = 1UL << MINORBITS;
 	}
 
 	if (register_blkdev(LOOP_MAJOR, "loop"))
@@ -1738,7 +1741,7 @@
 	unsigned long range;
 	struct loop_device *lo, *next;
 
-	range = max_loop ? max_loop :  1UL << (MINORBITS - part_shift);
+	range = max_loop ? max_loop << part_shift : 1UL << MINORBITS;
 
 	list_for_each_entry_safe(lo, next, &loop_devices, lo_list)
 		loop_del_one(lo);
diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c
index 8690e31..a0aabd9 100644
--- a/drivers/block/paride/pcd.c
+++ b/drivers/block/paride/pcd.c
@@ -320,6 +320,8 @@
 		disk->first_minor = unit;
 		strcpy(disk->disk_name, cd->name);	/* umm... */
 		disk->fops = &pcd_bdops;
+		disk->flags = GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE;
+		disk->events = DISK_EVENT_MEDIA_CHANGE;
 	}
 }
 
diff --git a/drivers/block/xen-blkback/Makefile b/drivers/block/xen-blkback/Makefile
new file mode 100644
index 0000000..e491c1b
--- /dev/null
+++ b/drivers/block/xen-blkback/Makefile
@@ -0,0 +1,3 @@
+obj-$(CONFIG_XEN_BLKDEV_BACKEND) := xen-blkback.o
+
+xen-blkback-y	:= blkback.o xenbus.o
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
new file mode 100644
index 0000000..c73910c
--- /dev/null
+++ b/drivers/block/xen-blkback/blkback.c
@@ -0,0 +1,824 @@
+/******************************************************************************
+ *
+ * Back-end of the driver for virtual block devices. This portion of the
+ * driver exports a 'unified' block-device interface that can be accessed
+ * by any operating system that implements a compatible front end. A
+ * reference front-end implementation can be found in:
+ *  drivers/block/xen-blkfront.c
+ *
+ * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
+ * Copyright (c) 2005, Christopher Clark
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <linux/spinlock.h>
+#include <linux/kthread.h>
+#include <linux/list.h>
+#include <linux/delay.h>
+#include <linux/freezer.h>
+
+#include <xen/events.h>
+#include <xen/page.h>
+#include <asm/xen/hypervisor.h>
+#include <asm/xen/hypercall.h>
+#include "common.h"
+
+/*
+ * These are rather arbitrary. They are fairly large because adjacent requests
+ * pulled from a communication ring are quite likely to end up being part of
+ * the same scatter/gather request at the disc.
+ *
+ * ** TRY INCREASING 'xen_blkif_reqs' IF WRITE SPEEDS SEEM TOO LOW **
+ *
+ * This will increase the chances of being able to write whole tracks.
+ * 64 should be enough to keep us competitive with Linux.
+ */
+static int xen_blkif_reqs = 64;
+module_param_named(reqs, xen_blkif_reqs, int, 0);
+MODULE_PARM_DESC(reqs, "Number of blkback requests to allocate");
+
+/* Run-time switchable: /sys/module/blkback/parameters/ */
+static unsigned int log_stats;
+module_param(log_stats, int, 0644);
+
+/*
+ * Each outstanding request that we've passed to the lower device layers has a
+ * 'pending_req' allocated to it. Each buffer_head that completes decrements
+ * the pendcnt towards zero. When it hits zero, the specified domain has a
+ * response queued for it, with the saved 'id' passed back.
+ */
+struct pending_req {
+	struct xen_blkif	*blkif;
+	u64			id;
+	int			nr_pages;
+	atomic_t		pendcnt;
+	unsigned short		operation;
+	int			status;
+	struct list_head	free_list;
+};
+
+#define BLKBACK_INVALID_HANDLE (~0)
+
+struct xen_blkbk {
+	struct pending_req	*pending_reqs;
+	/* List of all 'pending_req' available */
+	struct list_head	pending_free;
+	/* And its spinlock. */
+	spinlock_t		pending_free_lock;
+	wait_queue_head_t	pending_free_wq;
+	/* The list of all pages that are available. */
+	struct page		**pending_pages;
+	/* And the grant handles that are available. */
+	grant_handle_t		*pending_grant_handles;
+};
+
+static struct xen_blkbk *blkbk;
+
+/*
+ * Little helpful macro to figure out the index and virtual address of the
+ * pending_pages[..]. For each 'pending_req' we have have up to
+ * BLKIF_MAX_SEGMENTS_PER_REQUEST (11) pages. The seg would be from 0 through
+ * 10 and would index in the pending_pages[..].
+ */
+static inline int vaddr_pagenr(struct pending_req *req, int seg)
+{
+	return (req - blkbk->pending_reqs) *
+		BLKIF_MAX_SEGMENTS_PER_REQUEST + seg;
+}
+
+#define pending_page(req, seg) pending_pages[vaddr_pagenr(req, seg)]
+
+static inline unsigned long vaddr(struct pending_req *req, int seg)
+{
+	unsigned long pfn = page_to_pfn(blkbk->pending_page(req, seg));
+	return (unsigned long)pfn_to_kaddr(pfn);
+}
+
+#define pending_handle(_req, _seg) \
+	(blkbk->pending_grant_handles[vaddr_pagenr(_req, _seg)])
+
+
+static int do_block_io_op(struct xen_blkif *blkif);
+static int dispatch_rw_block_io(struct xen_blkif *blkif,
+				struct blkif_request *req,
+				struct pending_req *pending_req);
+static void make_response(struct xen_blkif *blkif, u64 id,
+			  unsigned short op, int st);
+
+/*
+ * Retrieve from the 'pending_reqs' a free pending_req structure to be used.
+ */
+static struct pending_req *alloc_req(void)
+{
+	struct pending_req *req = NULL;
+	unsigned long flags;
+
+	spin_lock_irqsave(&blkbk->pending_free_lock, flags);
+	if (!list_empty(&blkbk->pending_free)) {
+		req = list_entry(blkbk->pending_free.next, struct pending_req,
+				 free_list);
+		list_del(&req->free_list);
+	}
+	spin_unlock_irqrestore(&blkbk->pending_free_lock, flags);
+	return req;
+}
+
+/*
+ * Return the 'pending_req' structure back to the freepool. We also
+ * wake up the thread if it was waiting for a free page.
+ */
+static void free_req(struct pending_req *req)
+{
+	unsigned long flags;
+	int was_empty;
+
+	spin_lock_irqsave(&blkbk->pending_free_lock, flags);
+	was_empty = list_empty(&blkbk->pending_free);
+	list_add(&req->free_list, &blkbk->pending_free);
+	spin_unlock_irqrestore(&blkbk->pending_free_lock, flags);
+	if (was_empty)
+		wake_up(&blkbk->pending_free_wq);
+}
+
+/*
+ * Routines for managing virtual block devices (vbds).
+ */
+static int xen_vbd_translate(struct phys_req *req, struct xen_blkif *blkif,
+			     int operation)
+{
+	struct xen_vbd *vbd = &blkif->vbd;
+	int rc = -EACCES;
+
+	if ((operation != READ) && vbd->readonly)
+		goto out;
+
+	if (likely(req->nr_sects)) {
+		blkif_sector_t end = req->sector_number + req->nr_sects;
+
+		if (unlikely(end < req->sector_number))
+			goto out;
+		if (unlikely(end > vbd_sz(vbd)))
+			goto out;
+	}
+
+	req->dev  = vbd->pdevice;
+	req->bdev = vbd->bdev;
+	rc = 0;
+
+ out:
+	return rc;
+}
+
+static void xen_vbd_resize(struct xen_blkif *blkif)
+{
+	struct xen_vbd *vbd = &blkif->vbd;
+	struct xenbus_transaction xbt;
+	int err;
+	struct xenbus_device *dev = xen_blkbk_xenbus(blkif->be);
+	unsigned long long new_size = vbd_sz(vbd);
+
+	pr_info(DRV_PFX "VBD Resize: Domid: %d, Device: (%d, %d)\n",
+		blkif->domid, MAJOR(vbd->pdevice), MINOR(vbd->pdevice));
+	pr_info(DRV_PFX "VBD Resize: new size %llu\n", new_size);
+	vbd->size = new_size;
+again:
+	err = xenbus_transaction_start(&xbt);
+	if (err) {
+		pr_warn(DRV_PFX "Error starting transaction");
+		return;
+	}
+	err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu",
+			    (unsigned long long)vbd_sz(vbd));
+	if (err) {
+		pr_warn(DRV_PFX "Error writing new size");
+		goto abort;
+	}
+	/*
+	 * Write the current state; we will use this to synchronize
+	 * the front-end. If the current state is "connected" the
+	 * front-end will get the new size information online.
+	 */
+	err = xenbus_printf(xbt, dev->nodename, "state", "%d", dev->state);
+	if (err) {
+		pr_warn(DRV_PFX "Error writing the state");
+		goto abort;
+	}
+
+	err = xenbus_transaction_end(xbt, 0);
+	if (err == -EAGAIN)
+		goto again;
+	if (err)
+		pr_warn(DRV_PFX "Error ending transaction");
+	return;
+abort:
+	xenbus_transaction_end(xbt, 1);
+}
+
+/*
+ * Notification from the guest OS.
+ */
+static void blkif_notify_work(struct xen_blkif *blkif)
+{
+	blkif->waiting_reqs = 1;
+	wake_up(&blkif->wq);
+}
+
+irqreturn_t xen_blkif_be_int(int irq, void *dev_id)
+{
+	blkif_notify_work(dev_id);
+	return IRQ_HANDLED;
+}
+
+/*
+ * SCHEDULER FUNCTIONS
+ */
+
+static void print_stats(struct xen_blkif *blkif)
+{
+	pr_info("xen-blkback (%s): oo %3d  |  rd %4d  |  wr %4d  |  f %4d\n",
+		 current->comm, blkif->st_oo_req,
+		 blkif->st_rd_req, blkif->st_wr_req, blkif->st_f_req);
+	blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000);
+	blkif->st_rd_req = 0;
+	blkif->st_wr_req = 0;
+	blkif->st_oo_req = 0;
+}
+
+int xen_blkif_schedule(void *arg)
+{
+	struct xen_blkif *blkif = arg;
+	struct xen_vbd *vbd = &blkif->vbd;
+
+	xen_blkif_get(blkif);
+
+	while (!kthread_should_stop()) {
+		if (try_to_freeze())
+			continue;
+		if (unlikely(vbd->size != vbd_sz(vbd)))
+			xen_vbd_resize(blkif);
+
+		wait_event_interruptible(
+			blkif->wq,
+			blkif->waiting_reqs || kthread_should_stop());
+		wait_event_interruptible(
+			blkbk->pending_free_wq,
+			!list_empty(&blkbk->pending_free) ||
+			kthread_should_stop());
+
+		blkif->waiting_reqs = 0;
+		smp_mb(); /* clear flag *before* checking for work */
+
+		if (do_block_io_op(blkif))
+			blkif->waiting_reqs = 1;
+
+		if (log_stats && time_after(jiffies, blkif->st_print))
+			print_stats(blkif);
+	}
+
+	if (log_stats)
+		print_stats(blkif);
+
+	blkif->xenblkd = NULL;
+	xen_blkif_put(blkif);
+
+	return 0;
+}
+
+struct seg_buf {
+	unsigned long buf;
+	unsigned int nsec;
+};
+/*
+ * Unmap the grant references, and also remove the M2P over-rides
+ * used in the 'pending_req'.
+ */
+static void xen_blkbk_unmap(struct pending_req *req)
+{
+	struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+	unsigned int i, invcount = 0;
+	grant_handle_t handle;
+	int ret;
+
+	for (i = 0; i < req->nr_pages; i++) {
+		handle = pending_handle(req, i);
+		if (handle == BLKBACK_INVALID_HANDLE)
+			continue;
+		gnttab_set_unmap_op(&unmap[invcount], vaddr(req, i),
+				    GNTMAP_host_map, handle);
+		pending_handle(req, i) = BLKBACK_INVALID_HANDLE;
+		invcount++;
+	}
+
+	ret = HYPERVISOR_grant_table_op(
+		GNTTABOP_unmap_grant_ref, unmap, invcount);
+	BUG_ON(ret);
+	/*
+	 * Note, we use invcount, so nr->pages, so we can't index
+	 * using vaddr(req, i).
+	 */
+	for (i = 0; i < invcount; i++) {
+		ret = m2p_remove_override(
+			virt_to_page(unmap[i].host_addr), false);
+		if (ret) {
+			pr_alert(DRV_PFX "Failed to remove M2P override for %lx\n",
+				 (unsigned long)unmap[i].host_addr);
+			continue;
+		}
+	}
+}
+
+static int xen_blkbk_map(struct blkif_request *req,
+			 struct pending_req *pending_req,
+			 struct seg_buf seg[])
+{
+	struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+	int i;
+	int nseg = req->nr_segments;
+	int ret = 0;
+
+	/*
+	 * Fill out preq.nr_sects with proper amount of sectors, and setup
+	 * assign map[..] with the PFN of the page in our domain with the
+	 * corresponding grant reference for each page.
+	 */
+	for (i = 0; i < nseg; i++) {
+		uint32_t flags;
+
+		flags = GNTMAP_host_map;
+		if (pending_req->operation != BLKIF_OP_READ)
+			flags |= GNTMAP_readonly;
+		gnttab_set_map_op(&map[i], vaddr(pending_req, i), flags,
+				  req->u.rw.seg[i].gref,
+				  pending_req->blkif->domid);
+	}
+
+	ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map, nseg);
+	BUG_ON(ret);
+
+	/*
+	 * Now swizzle the MFN in our domain with the MFN from the other domain
+	 * so that when we access vaddr(pending_req,i) it has the contents of
+	 * the page from the other domain.
+	 */
+	for (i = 0; i < nseg; i++) {
+		if (unlikely(map[i].status != 0)) {
+			pr_debug(DRV_PFX "invalid buffer -- could not remap it\n");
+			map[i].handle = BLKBACK_INVALID_HANDLE;
+			ret |= 1;
+		}
+
+		pending_handle(pending_req, i) = map[i].handle;
+
+		if (ret)
+			continue;
+
+		ret = m2p_add_override(PFN_DOWN(map[i].dev_bus_addr),
+			blkbk->pending_page(pending_req, i), false);
+		if (ret) {
+			pr_alert(DRV_PFX "Failed to install M2P override for %lx (ret: %d)\n",
+				 (unsigned long)map[i].dev_bus_addr, ret);
+			/* We could switch over to GNTTABOP_copy */
+			continue;
+		}
+
+		seg[i].buf  = map[i].dev_bus_addr |
+			(req->u.rw.seg[i].first_sect << 9);
+	}
+	return ret;
+}
+
+/*
+ * Completion callback on the bio's. Called as bh->b_end_io()
+ */
+
+static void __end_block_io_op(struct pending_req *pending_req, int error)
+{
+	/* An error fails the entire request. */
+	if ((pending_req->operation == BLKIF_OP_FLUSH_DISKCACHE) &&
+	    (error == -EOPNOTSUPP)) {
+		pr_debug(DRV_PFX "flush diskcache op failed, not supported\n");
+		xen_blkbk_flush_diskcache(XBT_NIL, pending_req->blkif->be, 0);
+		pending_req->status = BLKIF_RSP_EOPNOTSUPP;
+	} else if (error) {
+		pr_debug(DRV_PFX "Buffer not up-to-date at end of operation,"
+			 " error=%d\n", error);
+		pending_req->status = BLKIF_RSP_ERROR;
+	}
+
+	/*
+	 * If all of the bio's have completed it is time to unmap
+	 * the grant references associated with 'request' and provide
+	 * the proper response on the ring.
+	 */
+	if (atomic_dec_and_test(&pending_req->pendcnt)) {
+		xen_blkbk_unmap(pending_req);
+		make_response(pending_req->blkif, pending_req->id,
+			      pending_req->operation, pending_req->status);
+		xen_blkif_put(pending_req->blkif);
+		free_req(pending_req);
+	}
+}
+
+/*
+ * bio callback.
+ */
+static void end_block_io_op(struct bio *bio, int error)
+{
+	__end_block_io_op(bio->bi_private, error);
+	bio_put(bio);
+}
+
+
+
+/*
+ * Function to copy the from the ring buffer the 'struct blkif_request'
+ * (which has the sectors we want, number of them, grant references, etc),
+ * and transmute  it to the block API to hand it over to the proper block disk.
+ */
+static int do_block_io_op(struct xen_blkif *blkif)
+{
+	union blkif_back_rings *blk_rings = &blkif->blk_rings;
+	struct blkif_request req;
+	struct pending_req *pending_req;
+	RING_IDX rc, rp;
+	int more_to_do = 0;
+
+	rc = blk_rings->common.req_cons;
+	rp = blk_rings->common.sring->req_prod;
+	rmb(); /* Ensure we see queued requests up to 'rp'. */
+
+	while (rc != rp) {
+
+		if (RING_REQUEST_CONS_OVERFLOW(&blk_rings->common, rc))
+			break;
+
+		if (kthread_should_stop()) {
+			more_to_do = 1;
+			break;
+		}
+
+		pending_req = alloc_req();
+		if (NULL == pending_req) {
+			blkif->st_oo_req++;
+			more_to_do = 1;
+			break;
+		}
+
+		switch (blkif->blk_protocol) {
+		case BLKIF_PROTOCOL_NATIVE:
+			memcpy(&req, RING_GET_REQUEST(&blk_rings->native, rc), sizeof(req));
+			break;
+		case BLKIF_PROTOCOL_X86_32:
+			blkif_get_x86_32_req(&req, RING_GET_REQUEST(&blk_rings->x86_32, rc));
+			break;
+		case BLKIF_PROTOCOL_X86_64:
+			blkif_get_x86_64_req(&req, RING_GET_REQUEST(&blk_rings->x86_64, rc));
+			break;
+		default:
+			BUG();
+		}
+		blk_rings->common.req_cons = ++rc; /* before make_response() */
+
+		/* Apply all sanity checks to /private copy/ of request. */
+		barrier();
+
+		if (dispatch_rw_block_io(blkif, &req, pending_req))
+			break;
+
+		/* Yield point for this unbounded loop. */
+		cond_resched();
+	}
+
+	return more_to_do;
+}
+
+/*
+ * Transmutation of the 'struct blkif_request' to a proper 'struct bio'
+ * and call the 'submit_bio' to pass it to the underlying storage.
+ */
+static int dispatch_rw_block_io(struct xen_blkif *blkif,
+				struct blkif_request *req,
+				struct pending_req *pending_req)
+{
+	struct phys_req preq;
+	struct seg_buf seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+	unsigned int nseg;
+	struct bio *bio = NULL;
+	struct bio *biolist[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+	int i, nbio = 0;
+	int operation;
+	struct blk_plug plug;
+
+	switch (req->operation) {
+	case BLKIF_OP_READ:
+		blkif->st_rd_req++;
+		operation = READ;
+		break;
+	case BLKIF_OP_WRITE:
+		blkif->st_wr_req++;
+		operation = WRITE_ODIRECT;
+		break;
+	case BLKIF_OP_FLUSH_DISKCACHE:
+		blkif->st_f_req++;
+		operation = WRITE_FLUSH;
+		break;
+	case BLKIF_OP_WRITE_BARRIER:
+	default:
+		operation = 0; /* make gcc happy */
+		goto fail_response;
+		break;
+	}
+
+	/* Check that the number of segments is sane. */
+	nseg = req->nr_segments;
+	if (unlikely(nseg == 0 && operation != WRITE_FLUSH) ||
+	    unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) {
+		pr_debug(DRV_PFX "Bad number of segments in request (%d)\n",
+			 nseg);
+		/* Haven't submitted any bio's yet. */
+		goto fail_response;
+	}
+
+	preq.dev           = req->handle;
+	preq.sector_number = req->u.rw.sector_number;
+	preq.nr_sects      = 0;
+
+	pending_req->blkif     = blkif;
+	pending_req->id        = req->id;
+	pending_req->operation = req->operation;
+	pending_req->status    = BLKIF_RSP_OKAY;
+	pending_req->nr_pages  = nseg;
+
+	for (i = 0; i < nseg; i++) {
+		seg[i].nsec = req->u.rw.seg[i].last_sect -
+			req->u.rw.seg[i].first_sect + 1;
+		if ((req->u.rw.seg[i].last_sect >= (PAGE_SIZE >> 9)) ||
+		    (req->u.rw.seg[i].last_sect < req->u.rw.seg[i].first_sect))
+			goto fail_response;
+		preq.nr_sects += seg[i].nsec;
+
+	}
+
+	if (xen_vbd_translate(&preq, blkif, operation) != 0) {
+		pr_debug(DRV_PFX "access denied: %s of [%llu,%llu] on dev=%04x\n",
+			 operation == READ ? "read" : "write",
+			 preq.sector_number,
+			 preq.sector_number + preq.nr_sects, preq.dev);
+		goto fail_response;
+	}
+
+	/*
+	 * This check _MUST_ be done after xen_vbd_translate as the preq.bdev
+	 * is set there.
+	 */
+	for (i = 0; i < nseg; i++) {
+		if (((int)preq.sector_number|(int)seg[i].nsec) &
+		    ((bdev_logical_block_size(preq.bdev) >> 9) - 1)) {
+			pr_debug(DRV_PFX "Misaligned I/O request from domain %d",
+				 blkif->domid);
+			goto fail_response;
+		}
+	}
+
+	/*
+	 * If we have failed at this point, we need to undo the M2P override,
+	 * set gnttab_set_unmap_op on all of the grant references and perform
+	 * the hypercall to unmap the grants - that is all done in
+	 * xen_blkbk_unmap.
+	 */
+	if (xen_blkbk_map(req, pending_req, seg))
+		goto fail_flush;
+
+	/* This corresponding xen_blkif_put is done in __end_block_io_op */
+	xen_blkif_get(blkif);
+
+	for (i = 0; i < nseg; i++) {
+		while ((bio == NULL) ||
+		       (bio_add_page(bio,
+				     blkbk->pending_page(pending_req, i),
+				     seg[i].nsec << 9,
+				     seg[i].buf & ~PAGE_MASK) == 0)) {
+
+			bio = bio_alloc(GFP_KERNEL, nseg-i);
+			if (unlikely(bio == NULL))
+				goto fail_put_bio;
+
+			biolist[nbio++] = bio;
+			bio->bi_bdev    = preq.bdev;
+			bio->bi_private = pending_req;
+			bio->bi_end_io  = end_block_io_op;
+			bio->bi_sector  = preq.sector_number;
+		}
+
+		preq.sector_number += seg[i].nsec;
+	}
+
+	/* This will be hit if the operation was a flush. */
+	if (!bio) {
+		BUG_ON(operation != WRITE_FLUSH);
+
+		bio = bio_alloc(GFP_KERNEL, 0);
+		if (unlikely(bio == NULL))
+			goto fail_put_bio;
+
+		biolist[nbio++] = bio;
+		bio->bi_bdev    = preq.bdev;
+		bio->bi_private = pending_req;
+		bio->bi_end_io  = end_block_io_op;
+	}
+
+	/*
+	 * We set it one so that the last submit_bio does not have to call
+	 * atomic_inc.
+	 */
+	atomic_set(&pending_req->pendcnt, nbio);
+
+	/* Get a reference count for the disk queue and start sending I/O */
+	blk_start_plug(&plug);
+
+	for (i = 0; i < nbio; i++)
+		submit_bio(operation, biolist[i]);
+
+	/* Let the I/Os go.. */
+	blk_finish_plug(&plug);
+
+	if (operation == READ)
+		blkif->st_rd_sect += preq.nr_sects;
+	else if (operation == WRITE || operation == WRITE_FLUSH)
+		blkif->st_wr_sect += preq.nr_sects;
+
+	return 0;
+
+ fail_flush:
+	xen_blkbk_unmap(pending_req);
+ fail_response:
+	/* Haven't submitted any bio's yet. */
+	make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR);
+	free_req(pending_req);
+	msleep(1); /* back off a bit */
+	return -EIO;
+
+ fail_put_bio:
+	for (i = 0; i < nbio; i++)
+		bio_put(biolist[i]);
+	__end_block_io_op(pending_req, -EINVAL);
+	msleep(1); /* back off a bit */
+	return -EIO;
+}
+
+
+
+/*
+ * Put a response on the ring on how the operation fared.
+ */
+static void make_response(struct xen_blkif *blkif, u64 id,
+			  unsigned short op, int st)
+{
+	struct blkif_response  resp;
+	unsigned long     flags;
+	union blkif_back_rings *blk_rings = &blkif->blk_rings;
+	int more_to_do = 0;
+	int notify;
+
+	resp.id        = id;
+	resp.operation = op;
+	resp.status    = st;
+
+	spin_lock_irqsave(&blkif->blk_ring_lock, flags);
+	/* Place on the response ring for the relevant domain. */
+	switch (blkif->blk_protocol) {
+	case BLKIF_PROTOCOL_NATIVE:
+		memcpy(RING_GET_RESPONSE(&blk_rings->native, blk_rings->native.rsp_prod_pvt),
+		       &resp, sizeof(resp));
+		break;
+	case BLKIF_PROTOCOL_X86_32:
+		memcpy(RING_GET_RESPONSE(&blk_rings->x86_32, blk_rings->x86_32.rsp_prod_pvt),
+		       &resp, sizeof(resp));
+		break;
+	case BLKIF_PROTOCOL_X86_64:
+		memcpy(RING_GET_RESPONSE(&blk_rings->x86_64, blk_rings->x86_64.rsp_prod_pvt),
+		       &resp, sizeof(resp));
+		break;
+	default:
+		BUG();
+	}
+	blk_rings->common.rsp_prod_pvt++;
+	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&blk_rings->common, notify);
+	if (blk_rings->common.rsp_prod_pvt == blk_rings->common.req_cons) {
+		/*
+		 * Tail check for pending requests. Allows frontend to avoid
+		 * notifications if requests are already in flight (lower
+		 * overheads and promotes batching).
+		 */
+		RING_FINAL_CHECK_FOR_REQUESTS(&blk_rings->common, more_to_do);
+
+	} else if (RING_HAS_UNCONSUMED_REQUESTS(&blk_rings->common)) {
+		more_to_do = 1;
+	}
+
+	spin_unlock_irqrestore(&blkif->blk_ring_lock, flags);
+
+	if (more_to_do)
+		blkif_notify_work(blkif);
+	if (notify)
+		notify_remote_via_irq(blkif->irq);
+}
+
+static int __init xen_blkif_init(void)
+{
+	int i, mmap_pages;
+	int rc = 0;
+
+	if (!xen_pv_domain())
+		return -ENODEV;
+
+	blkbk = kzalloc(sizeof(struct xen_blkbk), GFP_KERNEL);
+	if (!blkbk) {
+		pr_alert(DRV_PFX "%s: out of memory!\n", __func__);
+		return -ENOMEM;
+	}
+
+	mmap_pages = xen_blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST;
+
+	blkbk->pending_reqs          = kmalloc(sizeof(blkbk->pending_reqs[0]) *
+					xen_blkif_reqs, GFP_KERNEL);
+	blkbk->pending_grant_handles = kzalloc(sizeof(blkbk->pending_grant_handles[0]) *
+					mmap_pages, GFP_KERNEL);
+	blkbk->pending_pages         = kzalloc(sizeof(blkbk->pending_pages[0]) *
+					mmap_pages, GFP_KERNEL);
+
+	if (!blkbk->pending_reqs || !blkbk->pending_grant_handles ||
+	    !blkbk->pending_pages) {
+		rc = -ENOMEM;
+		goto out_of_memory;
+	}
+
+	for (i = 0; i < mmap_pages; i++) {
+		blkbk->pending_grant_handles[i] = BLKBACK_INVALID_HANDLE;
+		blkbk->pending_pages[i] = alloc_page(GFP_KERNEL);
+		if (blkbk->pending_pages[i] == NULL) {
+			rc = -ENOMEM;
+			goto out_of_memory;
+		}
+	}
+	rc = xen_blkif_interface_init();
+	if (rc)
+		goto failed_init;
+
+	memset(blkbk->pending_reqs, 0, sizeof(blkbk->pending_reqs));
+
+	INIT_LIST_HEAD(&blkbk->pending_free);
+	spin_lock_init(&blkbk->pending_free_lock);
+	init_waitqueue_head(&blkbk->pending_free_wq);
+
+	for (i = 0; i < xen_blkif_reqs; i++)
+		list_add_tail(&blkbk->pending_reqs[i].free_list,
+			      &blkbk->pending_free);
+
+	rc = xen_blkif_xenbus_init();
+	if (rc)
+		goto failed_init;
+
+	return 0;
+
+ out_of_memory:
+	pr_alert(DRV_PFX "%s: out of memory\n", __func__);
+ failed_init:
+	kfree(blkbk->pending_reqs);
+	kfree(blkbk->pending_grant_handles);
+	for (i = 0; i < mmap_pages; i++) {
+		if (blkbk->pending_pages[i])
+			__free_page(blkbk->pending_pages[i]);
+	}
+	kfree(blkbk->pending_pages);
+	kfree(blkbk);
+	blkbk = NULL;
+	return rc;
+}
+
+module_init(xen_blkif_init);
+
+MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h
new file mode 100644
index 0000000..9e40b28
--- /dev/null
+++ b/drivers/block/xen-blkback/common.h
@@ -0,0 +1,233 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef __XEN_BLKIF__BACKEND__COMMON_H__
+#define __XEN_BLKIF__BACKEND__COMMON_H__
+
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/slab.h>
+#include <linux/blkdev.h>
+#include <linux/vmalloc.h>
+#include <linux/wait.h>
+#include <linux/io.h>
+#include <asm/setup.h>
+#include <asm/pgalloc.h>
+#include <asm/hypervisor.h>
+#include <xen/grant_table.h>
+#include <xen/xenbus.h>
+#include <xen/interface/io/ring.h>
+#include <xen/interface/io/blkif.h>
+#include <xen/interface/io/protocols.h>
+
+#define DRV_PFX "xen-blkback:"
+#define DPRINTK(fmt, args...)				\
+	pr_debug(DRV_PFX "(%s:%d) " fmt ".\n",	\
+		 __func__, __LINE__, ##args)
+
+
+/* Not a real protocol.  Used to generate ring structs which contain
+ * the elements common to all protocols only.  This way we get a
+ * compiler-checkable way to use common struct elements, so we can
+ * avoid using switch(protocol) in a number of places.  */
+struct blkif_common_request {
+	char dummy;
+};
+struct blkif_common_response {
+	char dummy;
+};
+
+/* i386 protocol version */
+#pragma pack(push, 4)
+struct blkif_x86_32_request {
+	uint8_t        operation;    /* BLKIF_OP_???                         */
+	uint8_t        nr_segments;  /* number of segments                   */
+	blkif_vdev_t   handle;       /* only for read/write requests         */
+	uint64_t       id;           /* private guest value, echoed in resp  */
+	blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
+	struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+};
+struct blkif_x86_32_response {
+	uint64_t        id;              /* copied from request */
+	uint8_t         operation;       /* copied from request */
+	int16_t         status;          /* BLKIF_RSP_???       */
+};
+#pragma pack(pop)
+
+/* x86_64 protocol version */
+struct blkif_x86_64_request {
+	uint8_t        operation;    /* BLKIF_OP_???                         */
+	uint8_t        nr_segments;  /* number of segments                   */
+	blkif_vdev_t   handle;       /* only for read/write requests         */
+	uint64_t       __attribute__((__aligned__(8))) id;
+	blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
+	struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+};
+struct blkif_x86_64_response {
+	uint64_t       __attribute__((__aligned__(8))) id;
+	uint8_t         operation;       /* copied from request */
+	int16_t         status;          /* BLKIF_RSP_???       */
+};
+
+DEFINE_RING_TYPES(blkif_common, struct blkif_common_request,
+		  struct blkif_common_response);
+DEFINE_RING_TYPES(blkif_x86_32, struct blkif_x86_32_request,
+		  struct blkif_x86_32_response);
+DEFINE_RING_TYPES(blkif_x86_64, struct blkif_x86_64_request,
+		  struct blkif_x86_64_response);
+
+union blkif_back_rings {
+	struct blkif_back_ring        native;
+	struct blkif_common_back_ring common;
+	struct blkif_x86_32_back_ring x86_32;
+	struct blkif_x86_64_back_ring x86_64;
+};
+
+enum blkif_protocol {
+	BLKIF_PROTOCOL_NATIVE = 1,
+	BLKIF_PROTOCOL_X86_32 = 2,
+	BLKIF_PROTOCOL_X86_64 = 3,
+};
+
+struct xen_vbd {
+	/* What the domain refers to this vbd as. */
+	blkif_vdev_t		handle;
+	/* Non-zero -> read-only */
+	unsigned char		readonly;
+	/* VDISK_xxx */
+	unsigned char		type;
+	/* phys device that this vbd maps to. */
+	u32			pdevice;
+	struct block_device	*bdev;
+	/* Cached size parameter. */
+	sector_t		size;
+	bool			flush_support;
+};
+
+struct backend_info;
+
+struct xen_blkif {
+	/* Unique identifier for this interface. */
+	domid_t			domid;
+	unsigned int		handle;
+	/* Physical parameters of the comms window. */
+	unsigned int		irq;
+	/* Comms information. */
+	enum blkif_protocol	blk_protocol;
+	union blkif_back_rings	blk_rings;
+	struct vm_struct	*blk_ring_area;
+	/* The VBD attached to this interface. */
+	struct xen_vbd		vbd;
+	/* Back pointer to the backend_info. */
+	struct backend_info	*be;
+	/* Private fields. */
+	spinlock_t		blk_ring_lock;
+	atomic_t		refcnt;
+
+	wait_queue_head_t	wq;
+	/* One thread per one blkif. */
+	struct task_struct	*xenblkd;
+	unsigned int		waiting_reqs;
+
+	/* statistics */
+	unsigned long		st_print;
+	int			st_rd_req;
+	int			st_wr_req;
+	int			st_oo_req;
+	int			st_f_req;
+	int			st_rd_sect;
+	int			st_wr_sect;
+
+	wait_queue_head_t	waiting_to_free;
+
+	grant_handle_t		shmem_handle;
+	grant_ref_t		shmem_ref;
+};
+
+
+#define vbd_sz(_v)	((_v)->bdev->bd_part ? \
+			 (_v)->bdev->bd_part->nr_sects : \
+			  get_capacity((_v)->bdev->bd_disk))
+
+#define xen_blkif_get(_b) (atomic_inc(&(_b)->refcnt))
+#define xen_blkif_put(_b)				\
+	do {						\
+		if (atomic_dec_and_test(&(_b)->refcnt))	\
+			wake_up(&(_b)->waiting_to_free);\
+	} while (0)
+
+struct phys_req {
+	unsigned short		dev;
+	unsigned short		nr_sects;
+	struct block_device	*bdev;
+	blkif_sector_t		sector_number;
+};
+int xen_blkif_interface_init(void);
+
+int xen_blkif_xenbus_init(void);
+
+irqreturn_t xen_blkif_be_int(int irq, void *dev_id);
+int xen_blkif_schedule(void *arg);
+
+int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt,
+			      struct backend_info *be, int state);
+
+struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be);
+
+static inline void blkif_get_x86_32_req(struct blkif_request *dst,
+					struct blkif_x86_32_request *src)
+{
+	int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST;
+	dst->operation = src->operation;
+	dst->nr_segments = src->nr_segments;
+	dst->handle = src->handle;
+	dst->id = src->id;
+	dst->u.rw.sector_number = src->sector_number;
+	barrier();
+	if (n > dst->nr_segments)
+		n = dst->nr_segments;
+	for (i = 0; i < n; i++)
+		dst->u.rw.seg[i] = src->seg[i];
+}
+
+static inline void blkif_get_x86_64_req(struct blkif_request *dst,
+					struct blkif_x86_64_request *src)
+{
+	int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST;
+	dst->operation = src->operation;
+	dst->nr_segments = src->nr_segments;
+	dst->handle = src->handle;
+	dst->id = src->id;
+	dst->u.rw.sector_number = src->sector_number;
+	barrier();
+	if (n > dst->nr_segments)
+		n = dst->nr_segments;
+	for (i = 0; i < n; i++)
+		dst->u.rw.seg[i] = src->seg[i];
+}
+
+#endif /* __XEN_BLKIF__BACKEND__COMMON_H__ */
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
new file mode 100644
index 0000000..3457082
--- /dev/null
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -0,0 +1,768 @@
+/*  Xenbus code for blkif backend
+    Copyright (C) 2005 Rusty Russell <rusty@rustcorp.com.au>
+    Copyright (C) 2005 XenSource Ltd
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+*/
+
+#include <stdarg.h>
+#include <linux/module.h>
+#include <linux/kthread.h>
+#include <xen/events.h>
+#include <xen/grant_table.h>
+#include "common.h"
+
+struct backend_info {
+	struct xenbus_device	*dev;
+	struct xen_blkif	*blkif;
+	struct xenbus_watch	backend_watch;
+	unsigned		major;
+	unsigned		minor;
+	char			*mode;
+};
+
+static struct kmem_cache *xen_blkif_cachep;
+static void connect(struct backend_info *);
+static int connect_ring(struct backend_info *);
+static void backend_changed(struct xenbus_watch *, const char **,
+			    unsigned int);
+
+struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be)
+{
+	return be->dev;
+}
+
+static int blkback_name(struct xen_blkif *blkif, char *buf)
+{
+	char *devpath, *devname;
+	struct xenbus_device *dev = blkif->be->dev;
+
+	devpath = xenbus_read(XBT_NIL, dev->nodename, "dev", NULL);
+	if (IS_ERR(devpath))
+		return PTR_ERR(devpath);
+
+	devname = strstr(devpath, "/dev/");
+	if (devname != NULL)
+		devname += strlen("/dev/");
+	else
+		devname  = devpath;
+
+	snprintf(buf, TASK_COMM_LEN, "blkback.%d.%s", blkif->domid, devname);
+	kfree(devpath);
+
+	return 0;
+}
+
+static void xen_update_blkif_status(struct xen_blkif *blkif)
+{
+	int err;
+	char name[TASK_COMM_LEN];
+
+	/* Not ready to connect? */
+	if (!blkif->irq || !blkif->vbd.bdev)
+		return;
+
+	/* Already connected? */
+	if (blkif->be->dev->state == XenbusStateConnected)
+		return;
+
+	/* Attempt to connect: exit if we fail to. */
+	connect(blkif->be);
+	if (blkif->be->dev->state != XenbusStateConnected)
+		return;
+
+	err = blkback_name(blkif, name);
+	if (err) {
+		xenbus_dev_error(blkif->be->dev, err, "get blkback dev name");
+		return;
+	}
+
+	err = filemap_write_and_wait(blkif->vbd.bdev->bd_inode->i_mapping);
+	if (err) {
+		xenbus_dev_error(blkif->be->dev, err, "block flush");
+		return;
+	}
+	invalidate_inode_pages2(blkif->vbd.bdev->bd_inode->i_mapping);
+
+	blkif->xenblkd = kthread_run(xen_blkif_schedule, blkif, name);
+	if (IS_ERR(blkif->xenblkd)) {
+		err = PTR_ERR(blkif->xenblkd);
+		blkif->xenblkd = NULL;
+		xenbus_dev_error(blkif->be->dev, err, "start xenblkd");
+	}
+}
+
+static struct xen_blkif *xen_blkif_alloc(domid_t domid)
+{
+	struct xen_blkif *blkif;
+
+	blkif = kmem_cache_alloc(xen_blkif_cachep, GFP_KERNEL);
+	if (!blkif)
+		return ERR_PTR(-ENOMEM);
+
+	memset(blkif, 0, sizeof(*blkif));
+	blkif->domid = domid;
+	spin_lock_init(&blkif->blk_ring_lock);
+	atomic_set(&blkif->refcnt, 1);
+	init_waitqueue_head(&blkif->wq);
+	blkif->st_print = jiffies;
+	init_waitqueue_head(&blkif->waiting_to_free);
+
+	return blkif;
+}
+
+static int map_frontend_page(struct xen_blkif *blkif, unsigned long shared_page)
+{
+	struct gnttab_map_grant_ref op;
+
+	gnttab_set_map_op(&op, (unsigned long)blkif->blk_ring_area->addr,
+			  GNTMAP_host_map, shared_page, blkif->domid);
+
+	if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
+		BUG();
+
+	if (op.status) {
+		DPRINTK("Grant table operation failure !\n");
+		return op.status;
+	}
+
+	blkif->shmem_ref = shared_page;
+	blkif->shmem_handle = op.handle;
+
+	return 0;
+}
+
+static void unmap_frontend_page(struct xen_blkif *blkif)
+{
+	struct gnttab_unmap_grant_ref op;
+
+	gnttab_set_unmap_op(&op, (unsigned long)blkif->blk_ring_area->addr,
+			    GNTMAP_host_map, blkif->shmem_handle);
+
+	if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
+		BUG();
+}
+
+static int xen_blkif_map(struct xen_blkif *blkif, unsigned long shared_page,
+			 unsigned int evtchn)
+{
+	int err;
+
+	/* Already connected through? */
+	if (blkif->irq)
+		return 0;
+
+	blkif->blk_ring_area = alloc_vm_area(PAGE_SIZE);
+	if (!blkif->blk_ring_area)
+		return -ENOMEM;
+
+	err = map_frontend_page(blkif, shared_page);
+	if (err) {
+		free_vm_area(blkif->blk_ring_area);
+		return err;
+	}
+
+	switch (blkif->blk_protocol) {
+	case BLKIF_PROTOCOL_NATIVE:
+	{
+		struct blkif_sring *sring;
+		sring = (struct blkif_sring *)blkif->blk_ring_area->addr;
+		BACK_RING_INIT(&blkif->blk_rings.native, sring, PAGE_SIZE);
+		break;
+	}
+	case BLKIF_PROTOCOL_X86_32:
+	{
+		struct blkif_x86_32_sring *sring_x86_32;
+		sring_x86_32 = (struct blkif_x86_32_sring *)blkif->blk_ring_area->addr;
+		BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, PAGE_SIZE);
+		break;
+	}
+	case BLKIF_PROTOCOL_X86_64:
+	{
+		struct blkif_x86_64_sring *sring_x86_64;
+		sring_x86_64 = (struct blkif_x86_64_sring *)blkif->blk_ring_area->addr;
+		BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, PAGE_SIZE);
+		break;
+	}
+	default:
+		BUG();
+	}
+
+	err = bind_interdomain_evtchn_to_irqhandler(blkif->domid, evtchn,
+						    xen_blkif_be_int, 0,
+						    "blkif-backend", blkif);
+	if (err < 0) {
+		unmap_frontend_page(blkif);
+		free_vm_area(blkif->blk_ring_area);
+		blkif->blk_rings.common.sring = NULL;
+		return err;
+	}
+	blkif->irq = err;
+
+	return 0;
+}
+
+static void xen_blkif_disconnect(struct xen_blkif *blkif)
+{
+	if (blkif->xenblkd) {
+		kthread_stop(blkif->xenblkd);
+		blkif->xenblkd = NULL;
+	}
+
+	atomic_dec(&blkif->refcnt);
+	wait_event(blkif->waiting_to_free, atomic_read(&blkif->refcnt) == 0);
+	atomic_inc(&blkif->refcnt);
+
+	if (blkif->irq) {
+		unbind_from_irqhandler(blkif->irq, blkif);
+		blkif->irq = 0;
+	}
+
+	if (blkif->blk_rings.common.sring) {
+		unmap_frontend_page(blkif);
+		free_vm_area(blkif->blk_ring_area);
+		blkif->blk_rings.common.sring = NULL;
+	}
+}
+
+void xen_blkif_free(struct xen_blkif *blkif)
+{
+	if (!atomic_dec_and_test(&blkif->refcnt))
+		BUG();
+	kmem_cache_free(xen_blkif_cachep, blkif);
+}
+
+int __init xen_blkif_interface_init(void)
+{
+	xen_blkif_cachep = kmem_cache_create("blkif_cache",
+					     sizeof(struct xen_blkif),
+					     0, 0, NULL);
+	if (!xen_blkif_cachep)
+		return -ENOMEM;
+
+	return 0;
+}
+
+/*
+ *  sysfs interface for VBD I/O requests
+ */
+
+#define VBD_SHOW(name, format, args...)					\
+	static ssize_t show_##name(struct device *_dev,			\
+				   struct device_attribute *attr,	\
+				   char *buf)				\
+	{								\
+		struct xenbus_device *dev = to_xenbus_device(_dev);	\
+		struct backend_info *be = dev_get_drvdata(&dev->dev);	\
+									\
+		return sprintf(buf, format, ##args);			\
+	}								\
+	static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL)
+
+VBD_SHOW(oo_req,  "%d\n", be->blkif->st_oo_req);
+VBD_SHOW(rd_req,  "%d\n", be->blkif->st_rd_req);
+VBD_SHOW(wr_req,  "%d\n", be->blkif->st_wr_req);
+VBD_SHOW(f_req,  "%d\n", be->blkif->st_f_req);
+VBD_SHOW(rd_sect, "%d\n", be->blkif->st_rd_sect);
+VBD_SHOW(wr_sect, "%d\n", be->blkif->st_wr_sect);
+
+static struct attribute *xen_vbdstat_attrs[] = {
+	&dev_attr_oo_req.attr,
+	&dev_attr_rd_req.attr,
+	&dev_attr_wr_req.attr,
+	&dev_attr_f_req.attr,
+	&dev_attr_rd_sect.attr,
+	&dev_attr_wr_sect.attr,
+	NULL
+};
+
+static struct attribute_group xen_vbdstat_group = {
+	.name = "statistics",
+	.attrs = xen_vbdstat_attrs,
+};
+
+VBD_SHOW(physical_device, "%x:%x\n", be->major, be->minor);
+VBD_SHOW(mode, "%s\n", be->mode);
+
+int xenvbd_sysfs_addif(struct xenbus_device *dev)
+{
+	int error;
+
+	error = device_create_file(&dev->dev, &dev_attr_physical_device);
+	if (error)
+		goto fail1;
+
+	error = device_create_file(&dev->dev, &dev_attr_mode);
+	if (error)
+		goto fail2;
+
+	error = sysfs_create_group(&dev->dev.kobj, &xen_vbdstat_group);
+	if (error)
+		goto fail3;
+
+	return 0;
+
+fail3:	sysfs_remove_group(&dev->dev.kobj, &xen_vbdstat_group);
+fail2:	device_remove_file(&dev->dev, &dev_attr_mode);
+fail1:	device_remove_file(&dev->dev, &dev_attr_physical_device);
+	return error;
+}
+
+void xenvbd_sysfs_delif(struct xenbus_device *dev)
+{
+	sysfs_remove_group(&dev->dev.kobj, &xen_vbdstat_group);
+	device_remove_file(&dev->dev, &dev_attr_mode);
+	device_remove_file(&dev->dev, &dev_attr_physical_device);
+}
+
+
+static void xen_vbd_free(struct xen_vbd *vbd)
+{
+	if (vbd->bdev)
+		blkdev_put(vbd->bdev, vbd->readonly ? FMODE_READ : FMODE_WRITE);
+	vbd->bdev = NULL;
+}
+
+static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle,
+			  unsigned major, unsigned minor, int readonly,
+			  int cdrom)
+{
+	struct xen_vbd *vbd;
+	struct block_device *bdev;
+	struct request_queue *q;
+
+	vbd = &blkif->vbd;
+	vbd->handle   = handle;
+	vbd->readonly = readonly;
+	vbd->type     = 0;
+
+	vbd->pdevice  = MKDEV(major, minor);
+
+	bdev = blkdev_get_by_dev(vbd->pdevice, vbd->readonly ?
+				 FMODE_READ : FMODE_WRITE, NULL);
+
+	if (IS_ERR(bdev)) {
+		DPRINTK("xen_vbd_create: device %08x could not be opened.\n",
+			vbd->pdevice);
+		return -ENOENT;
+	}
+
+	vbd->bdev = bdev;
+	vbd->size = vbd_sz(vbd);
+
+	if (vbd->bdev->bd_disk == NULL) {
+		DPRINTK("xen_vbd_create: device %08x doesn't exist.\n",
+			vbd->pdevice);
+		xen_vbd_free(vbd);
+		return -ENOENT;
+	}
+
+	if (vbd->bdev->bd_disk->flags & GENHD_FL_CD || cdrom)
+		vbd->type |= VDISK_CDROM;
+	if (vbd->bdev->bd_disk->flags & GENHD_FL_REMOVABLE)
+		vbd->type |= VDISK_REMOVABLE;
+
+	q = bdev_get_queue(bdev);
+	if (q && q->flush_flags)
+		vbd->flush_support = true;
+
+	DPRINTK("Successful creation of handle=%04x (dom=%u)\n",
+		handle, blkif->domid);
+	return 0;
+}
+static int xen_blkbk_remove(struct xenbus_device *dev)
+{
+	struct backend_info *be = dev_get_drvdata(&dev->dev);
+
+	DPRINTK("");
+
+	if (be->major || be->minor)
+		xenvbd_sysfs_delif(dev);
+
+	if (be->backend_watch.node) {
+		unregister_xenbus_watch(&be->backend_watch);
+		kfree(be->backend_watch.node);
+		be->backend_watch.node = NULL;
+	}
+
+	if (be->blkif) {
+		xen_blkif_disconnect(be->blkif);
+		xen_vbd_free(&be->blkif->vbd);
+		xen_blkif_free(be->blkif);
+		be->blkif = NULL;
+	}
+
+	kfree(be);
+	dev_set_drvdata(&dev->dev, NULL);
+	return 0;
+}
+
+int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt,
+			      struct backend_info *be, int state)
+{
+	struct xenbus_device *dev = be->dev;
+	int err;
+
+	err = xenbus_printf(xbt, dev->nodename, "feature-flush-cache",
+			    "%d", state);
+	if (err)
+		xenbus_dev_fatal(dev, err, "writing feature-flush-cache");
+
+	return err;
+}
+
+/*
+ * Entry point to this code when a new device is created.  Allocate the basic
+ * structures, and watch the store waiting for the hotplug scripts to tell us
+ * the device's physical major and minor numbers.  Switch to InitWait.
+ */
+static int xen_blkbk_probe(struct xenbus_device *dev,
+			   const struct xenbus_device_id *id)
+{
+	int err;
+	struct backend_info *be = kzalloc(sizeof(struct backend_info),
+					  GFP_KERNEL);
+	if (!be) {
+		xenbus_dev_fatal(dev, -ENOMEM,
+				 "allocating backend structure");
+		return -ENOMEM;
+	}
+	be->dev = dev;
+	dev_set_drvdata(&dev->dev, be);
+
+	be->blkif = xen_blkif_alloc(dev->otherend_id);
+	if (IS_ERR(be->blkif)) {
+		err = PTR_ERR(be->blkif);
+		be->blkif = NULL;
+		xenbus_dev_fatal(dev, err, "creating block interface");
+		goto fail;
+	}
+
+	/* setup back pointer */
+	be->blkif->be = be;
+
+	err = xenbus_watch_pathfmt(dev, &be->backend_watch, backend_changed,
+				   "%s/%s", dev->nodename, "physical-device");
+	if (err)
+		goto fail;
+
+	err = xenbus_switch_state(dev, XenbusStateInitWait);
+	if (err)
+		goto fail;
+
+	return 0;
+
+fail:
+	DPRINTK("failed");
+	xen_blkbk_remove(dev);
+	return err;
+}
+
+
+/*
+ * Callback received when the hotplug scripts have placed the physical-device
+ * node.  Read it and the mode node, and create a vbd.  If the frontend is
+ * ready, connect.
+ */
+static void backend_changed(struct xenbus_watch *watch,
+			    const char **vec, unsigned int len)
+{
+	int err;
+	unsigned major;
+	unsigned minor;
+	struct backend_info *be
+		= container_of(watch, struct backend_info, backend_watch);
+	struct xenbus_device *dev = be->dev;
+	int cdrom = 0;
+	char *device_type;
+
+	DPRINTK("");
+
+	err = xenbus_scanf(XBT_NIL, dev->nodename, "physical-device", "%x:%x",
+			   &major, &minor);
+	if (XENBUS_EXIST_ERR(err)) {
+		/*
+		 * Since this watch will fire once immediately after it is
+		 * registered, we expect this.  Ignore it, and wait for the
+		 * hotplug scripts.
+		 */
+		return;
+	}
+	if (err != 2) {
+		xenbus_dev_fatal(dev, err, "reading physical-device");
+		return;
+	}
+
+	if ((be->major || be->minor) &&
+	    ((be->major != major) || (be->minor != minor))) {
+		pr_warn(DRV_PFX "changing physical device (from %x:%x to %x:%x) not supported.\n",
+			be->major, be->minor, major, minor);
+		return;
+	}
+
+	be->mode = xenbus_read(XBT_NIL, dev->nodename, "mode", NULL);
+	if (IS_ERR(be->mode)) {
+		err = PTR_ERR(be->mode);
+		be->mode = NULL;
+		xenbus_dev_fatal(dev, err, "reading mode");
+		return;
+	}
+
+	device_type = xenbus_read(XBT_NIL, dev->otherend, "device-type", NULL);
+	if (!IS_ERR(device_type)) {
+		cdrom = strcmp(device_type, "cdrom") == 0;
+		kfree(device_type);
+	}
+
+	if (be->major == 0 && be->minor == 0) {
+		/* Front end dir is a number, which is used as the handle. */
+
+		char *p = strrchr(dev->otherend, '/') + 1;
+		long handle;
+		err = strict_strtoul(p, 0, &handle);
+		if (err)
+			return;
+
+		be->major = major;
+		be->minor = minor;
+
+		err = xen_vbd_create(be->blkif, handle, major, minor,
+				 (NULL == strchr(be->mode, 'w')), cdrom);
+		if (err) {
+			be->major = 0;
+			be->minor = 0;
+			xenbus_dev_fatal(dev, err, "creating vbd structure");
+			return;
+		}
+
+		err = xenvbd_sysfs_addif(dev);
+		if (err) {
+			xen_vbd_free(&be->blkif->vbd);
+			be->major = 0;
+			be->minor = 0;
+			xenbus_dev_fatal(dev, err, "creating sysfs entries");
+			return;
+		}
+
+		/* We're potentially connected now */
+		xen_update_blkif_status(be->blkif);
+	}
+}
+
+
+/*
+ * Callback received when the frontend's state changes.
+ */
+static void frontend_changed(struct xenbus_device *dev,
+			     enum xenbus_state frontend_state)
+{
+	struct backend_info *be = dev_get_drvdata(&dev->dev);
+	int err;
+
+	DPRINTK("%s", xenbus_strstate(frontend_state));
+
+	switch (frontend_state) {
+	case XenbusStateInitialising:
+		if (dev->state == XenbusStateClosed) {
+			pr_info(DRV_PFX "%s: prepare for reconnect\n",
+				dev->nodename);
+			xenbus_switch_state(dev, XenbusStateInitWait);
+		}
+		break;
+
+	case XenbusStateInitialised:
+	case XenbusStateConnected:
+		/*
+		 * Ensure we connect even when two watches fire in
+		 * close successsion and we miss the intermediate value
+		 * of frontend_state.
+		 */
+		if (dev->state == XenbusStateConnected)
+			break;
+
+		/*
+		 * Enforce precondition before potential leak point.
+		 * blkif_disconnect() is idempotent.
+		 */
+		xen_blkif_disconnect(be->blkif);
+
+		err = connect_ring(be);
+		if (err)
+			break;
+		xen_update_blkif_status(be->blkif);
+		break;
+
+	case XenbusStateClosing:
+		xen_blkif_disconnect(be->blkif);
+		xenbus_switch_state(dev, XenbusStateClosing);
+		break;
+
+	case XenbusStateClosed:
+		xenbus_switch_state(dev, XenbusStateClosed);
+		if (xenbus_dev_is_online(dev))
+			break;
+		/* fall through if not online */
+	case XenbusStateUnknown:
+		/* implies blkif_disconnect() via blkback_remove() */
+		device_unregister(&dev->dev);
+		break;
+
+	default:
+		xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend",
+				 frontend_state);
+		break;
+	}
+}
+
+
+/* ** Connection ** */
+
+
+/*
+ * Write the physical details regarding the block device to the store, and
+ * switch to Connected state.
+ */
+static void connect(struct backend_info *be)
+{
+	struct xenbus_transaction xbt;
+	int err;
+	struct xenbus_device *dev = be->dev;
+
+	DPRINTK("%s", dev->otherend);
+
+	/* Supply the information about the device the frontend needs */
+again:
+	err = xenbus_transaction_start(&xbt);
+	if (err) {
+		xenbus_dev_fatal(dev, err, "starting transaction");
+		return;
+	}
+
+	err = xen_blkbk_flush_diskcache(xbt, be, be->blkif->vbd.flush_support);
+	if (err)
+		goto abort;
+
+	err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu",
+			    (unsigned long long)vbd_sz(&be->blkif->vbd));
+	if (err) {
+		xenbus_dev_fatal(dev, err, "writing %s/sectors",
+				 dev->nodename);
+		goto abort;
+	}
+
+	/* FIXME: use a typename instead */
+	err = xenbus_printf(xbt, dev->nodename, "info", "%u",
+			    be->blkif->vbd.type |
+			    (be->blkif->vbd.readonly ? VDISK_READONLY : 0));
+	if (err) {
+		xenbus_dev_fatal(dev, err, "writing %s/info",
+				 dev->nodename);
+		goto abort;
+	}
+	err = xenbus_printf(xbt, dev->nodename, "sector-size", "%lu",
+			    (unsigned long)
+			    bdev_logical_block_size(be->blkif->vbd.bdev));
+	if (err) {
+		xenbus_dev_fatal(dev, err, "writing %s/sector-size",
+				 dev->nodename);
+		goto abort;
+	}
+
+	err = xenbus_transaction_end(xbt, 0);
+	if (err == -EAGAIN)
+		goto again;
+	if (err)
+		xenbus_dev_fatal(dev, err, "ending transaction");
+
+	err = xenbus_switch_state(dev, XenbusStateConnected);
+	if (err)
+		xenbus_dev_fatal(dev, err, "switching to Connected state",
+				 dev->nodename);
+
+	return;
+ abort:
+	xenbus_transaction_end(xbt, 1);
+}
+
+
+static int connect_ring(struct backend_info *be)
+{
+	struct xenbus_device *dev = be->dev;
+	unsigned long ring_ref;
+	unsigned int evtchn;
+	char protocol[64] = "";
+	int err;
+
+	DPRINTK("%s", dev->otherend);
+
+	err = xenbus_gather(XBT_NIL, dev->otherend, "ring-ref", "%lu",
+			    &ring_ref, "event-channel", "%u", &evtchn, NULL);
+	if (err) {
+		xenbus_dev_fatal(dev, err,
+				 "reading %s/ring-ref and event-channel",
+				 dev->otherend);
+		return err;
+	}
+
+	be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE;
+	err = xenbus_gather(XBT_NIL, dev->otherend, "protocol",
+			    "%63s", protocol, NULL);
+	if (err)
+		strcpy(protocol, "unspecified, assuming native");
+	else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_NATIVE))
+		be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE;
+	else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_32))
+		be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_32;
+	else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_64))
+		be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_64;
+	else {
+		xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol);
+		return -1;
+	}
+	pr_info(DRV_PFX "ring-ref %ld, event-channel %d, protocol %d (%s)\n",
+		ring_ref, evtchn, be->blkif->blk_protocol, protocol);
+
+	/* Map the shared frame, irq etc. */
+	err = xen_blkif_map(be->blkif, ring_ref, evtchn);
+	if (err) {
+		xenbus_dev_fatal(dev, err, "mapping ring-ref %lu port %u",
+				 ring_ref, evtchn);
+		return err;
+	}
+
+	return 0;
+}
+
+
+/* ** Driver Registration ** */
+
+
+static const struct xenbus_device_id xen_blkbk_ids[] = {
+	{ "vbd" },
+	{ "" }
+};
+
+
+static struct xenbus_driver xen_blkbk = {
+	.name = "vbd",
+	.owner = THIS_MODULE,
+	.ids = xen_blkbk_ids,
+	.probe = xen_blkbk_probe,
+	.remove = xen_blkbk_remove,
+	.otherend_changed = frontend_changed
+};
+
+
+int xen_blkif_xenbus_init(void)
+{
+	return xenbus_register_backend(&xen_blkbk);
+}
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 9cb8668..b536a9c 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -97,6 +97,7 @@
 	struct blk_shadow shadow[BLK_RING_SIZE];
 	unsigned long shadow_free;
 	unsigned int feature_flush;
+	unsigned int flush_op;
 	int is_ready;
 };
 
@@ -250,8 +251,7 @@
 
 /*
  * Generate a Xen blkfront IO request from a blk layer request.  Reads
- * and writes are handled as expected.  Since we lack a loose flush
- * request, we map flushes into a full ordered barrier.
+ * and writes are handled as expected.
  *
  * @req: a request struct
  */
@@ -293,14 +293,13 @@
 
 	if (req->cmd_flags & (REQ_FLUSH | REQ_FUA)) {
 		/*
-		 * Ideally we could just do an unordered
-		 * flush-to-disk, but all we have is a full write
-		 * barrier at the moment.  However, a barrier write is
+		 * Ideally we can do an unordered flush-to-disk. In case the
+		 * backend onlysupports barriers, use that. A barrier request
 		 * a superset of FUA, so we can implement it the same
 		 * way.  (It's also a FLUSH+FUA, since it is
 		 * guaranteed ordered WRT previous writes.)
 		 */
-		ring_req->operation = BLKIF_OP_WRITE_BARRIER;
+		ring_req->operation = info->flush_op;
 	}
 
 	ring_req->nr_segments = blk_rq_map_sg(req->q, req, info->sg);
@@ -433,8 +432,11 @@
 static void xlvbd_flush(struct blkfront_info *info)
 {
 	blk_queue_flush(info->rq, info->feature_flush);
-	printk(KERN_INFO "blkfront: %s: barriers %s\n",
+	printk(KERN_INFO "blkfront: %s: %s: %s\n",
 	       info->gd->disk_name,
+	       info->flush_op == BLKIF_OP_WRITE_BARRIER ?
+		"barrier" : (info->flush_op == BLKIF_OP_FLUSH_DISKCACHE ?
+		"flush diskcache" : "barrier or flush"),
 	       info->feature_flush ? "enabled" : "disabled");
 }
 
@@ -720,15 +722,20 @@
 
 		error = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO;
 		switch (bret->operation) {
+		case BLKIF_OP_FLUSH_DISKCACHE:
 		case BLKIF_OP_WRITE_BARRIER:
 			if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
-				printk(KERN_WARNING "blkfront: %s: write barrier op failed\n",
+				printk(KERN_WARNING "blkfront: %s: write %s op failed\n",
+				       info->flush_op == BLKIF_OP_WRITE_BARRIER ?
+				       "barrier" :  "flush disk cache",
 				       info->gd->disk_name);
 				error = -EOPNOTSUPP;
 			}
 			if (unlikely(bret->status == BLKIF_RSP_ERROR &&
 				     info->shadow[id].req.nr_segments == 0)) {
-				printk(KERN_WARNING "blkfront: %s: empty write barrier op failed\n",
+				printk(KERN_WARNING "blkfront: %s: empty write %s op failed\n",
+				       info->flush_op == BLKIF_OP_WRITE_BARRIER ?
+				       "barrier" :  "flush disk cache",
 				       info->gd->disk_name);
 				error = -EOPNOTSUPP;
 			}
@@ -736,6 +743,7 @@
 				if (error == -EOPNOTSUPP)
 					error = 0;
 				info->feature_flush = 0;
+				info->flush_op = 0;
 				xlvbd_flush(info);
 			}
 			/* fall through */
@@ -1100,7 +1108,7 @@
 	unsigned long sector_size;
 	unsigned int binfo;
 	int err;
-	int barrier;
+	int barrier, flush;
 
 	switch (info->connected) {
 	case BLKIF_STATE_CONNECTED:
@@ -1140,8 +1148,11 @@
 		return;
 	}
 
+	info->feature_flush = 0;
+	info->flush_op = 0;
+
 	err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
-			    "feature-barrier", "%lu", &barrier,
+			    "feature-barrier", "%d", &barrier,
 			    NULL);
 
 	/*
@@ -1151,11 +1162,23 @@
 	 *
 	 * If there are barriers, then we use flush.
 	 */
-	info->feature_flush = 0;
-
-	if (!err && barrier)
+	if (!err && barrier) {
 		info->feature_flush = REQ_FLUSH | REQ_FUA;
+		info->flush_op = BLKIF_OP_WRITE_BARRIER;
+	}
+	/*
+	 * And if there is "feature-flush-cache" use that above
+	 * barriers.
+	 */
+	err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
+			    "feature-flush-cache", "%d", &flush,
+			    NULL);
 
+	if (!err && flush) {
+		info->feature_flush = REQ_FLUSH;
+		info->flush_op = BLKIF_OP_FLUSH_DISKCACHE;
+	}
+		
 	err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size);
 	if (err) {
 		xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s",
diff --git a/drivers/cdrom/viocd.c b/drivers/cdrom/viocd.c
index e427fbe..ae15a4d 100644
--- a/drivers/cdrom/viocd.c
+++ b/drivers/cdrom/viocd.c
@@ -625,7 +625,9 @@
 	blk_queue_max_hw_sectors(q, 4096 / 512);
 	gendisk->queue = q;
 	gendisk->fops = &viocd_fops;
-	gendisk->flags = GENHD_FL_CD|GENHD_FL_REMOVABLE;
+	gendisk->flags = GENHD_FL_CD | GENHD_FL_REMOVABLE |
+			 GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE;
+	gendisk->events = DISK_EVENT_MEDIA_CHANGE;
 	set_capacity(gendisk, 0);
 	gendisk->private_data = d;
 	d->viocd_disk = gendisk;
diff --git a/drivers/edac/i3200_edac.c b/drivers/edac/i3200_edac.c
index d41f900..aa08497 100644
--- a/drivers/edac/i3200_edac.c
+++ b/drivers/edac/i3200_edac.c
@@ -101,6 +101,19 @@
 
 static int nr_channels;
 
+#ifndef readq
+static inline __u64 readq(const volatile void __iomem *addr)
+{
+	const volatile u32 __iomem *p = addr;
+	u32 low, high;
+
+	low = readl(p);
+	high = readl(p + 1);
+
+	return low + ((u64)high << 32);
+}
+#endif
+
 static int how_many_channels(struct pci_dev *pdev)
 {
 	unsigned char capid0_8b; /* 8th byte of CAPID0 */
diff --git a/drivers/gpio/ml_ioh_gpio.c b/drivers/gpio/ml_ioh_gpio.c
index 0a775f7..1bc621a 100644
--- a/drivers/gpio/ml_ioh_gpio.c
+++ b/drivers/gpio/ml_ioh_gpio.c
@@ -15,6 +15,7 @@
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
  */
 #include <linux/kernel.h>
+#include <linux/slab.h>
 #include <linux/pci.h>
 #include <linux/gpio.h>
 
@@ -138,6 +139,7 @@
 	return 0;
 }
 
+#ifdef CONFIG_PM
 /*
  * Save register configuration and disable interrupts.
  */
@@ -157,6 +159,7 @@
 	/* to store contents of PM register */
 	iowrite32(chip->ioh_gpio_reg.pm_reg, &chip->reg->regs[chip->ch].pm);
 }
+#endif
 
 static void ioh_gpio_setup(struct ioh_gpio *chip, int num_port)
 {
diff --git a/drivers/gpio/vx855_gpio.c b/drivers/gpio/vx855_gpio.c
index 8a98ee5..ef5aabd 100644
--- a/drivers/gpio/vx855_gpio.c
+++ b/drivers/gpio/vx855_gpio.c
@@ -26,6 +26,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/gpio.h>
+#include <linux/slab.h>
 #include <linux/device.h>
 #include <linux/platform_device.h>
 #include <linux/pci.h>
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index c628903..0b2e167 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -56,9 +56,7 @@
 static void i915_gem_free_object_tail(struct drm_i915_gem_object *obj);
 
 static int i915_gem_inactive_shrink(struct shrinker *shrinker,
-				    int nr_to_scan,
-				    gfp_t gfp_mask);
-
+				    struct shrink_control *sc);
 
 /* some bookkeeping */
 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv,
@@ -4092,9 +4090,7 @@
 }
 
 static int
-i915_gem_inactive_shrink(struct shrinker *shrinker,
-			 int nr_to_scan,
-			 gfp_t gfp_mask)
+i915_gem_inactive_shrink(struct shrinker *shrinker, struct shrink_control *sc)
 {
 	struct drm_i915_private *dev_priv =
 		container_of(shrinker,
@@ -4102,6 +4098,7 @@
 			     mm.inactive_shrinker);
 	struct drm_device *dev = dev_priv->dev;
 	struct drm_i915_gem_object *obj, *next;
+	int nr_to_scan = sc->nr_to_scan;
 	int cnt;
 
 	if (!mutex_trylock(&dev->struct_mutex))
diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c b/drivers/gpu/drm/ttm/ttm_page_alloc.c
index 9d9d929..d948575 100644
--- a/drivers/gpu/drm/ttm/ttm_page_alloc.c
+++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c
@@ -395,12 +395,14 @@
 /**
  * Callback for mm to request pool to reduce number of page held.
  */
-static int ttm_pool_mm_shrink(struct shrinker *shrink, int shrink_pages, gfp_t gfp_mask)
+static int ttm_pool_mm_shrink(struct shrinker *shrink,
+			      struct shrink_control *sc)
 {
 	static atomic_t start_pool = ATOMIC_INIT(0);
 	unsigned i;
 	unsigned pool_offset = atomic_add_return(1, &start_pool);
 	struct ttm_page_pool *pool;
+	int shrink_pages = sc->nr_to_scan;
 
 	pool_offset = pool_offset % NUM_POOLS;
 	/* select start pool in round robin fashion */
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index a5ec5a7c..6e5123b 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -1781,7 +1781,8 @@
 
 	ide_cd_read_toc(drive, &sense);
 	g->fops = &idecd_ops;
-	g->flags |= GENHD_FL_REMOVABLE;
+	g->flags |= GENHD_FL_REMOVABLE | GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE;
+	g->events = DISK_EVENT_MEDIA_CHANGE;
 	add_disk(g);
 	return 0;
 
diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig
index 9bec869..1d027b47 100644
--- a/drivers/leds/Kconfig
+++ b/drivers/leds/Kconfig
@@ -14,6 +14,13 @@
 	  This option enables the led sysfs class in /sys/class/leds.  You'll
 	  need this to do anything useful with LEDs.  If unsure, say N.
 
+config LEDS_GPIO_REGISTER
+	bool
+	help
+	  This option provides the function gpio_led_register_device.
+	  As this function is used by arch code it must not be compiled as a
+	  module.
+
 if NEW_LEDS
 
 comment "LED drivers"
@@ -115,13 +122,6 @@
 	  This option enables support for the PCEngines ALIX.2 and ALIX.3 LEDs.
 	  You have to set leds-alix2.force=1 for boards with Award BIOS.
 
-config LEDS_H1940
-	tristate "LED Support for iPAQ H1940 device"
-	depends on LEDS_CLASS
-	depends on ARCH_H1940
-	help
-	  This option enables support for the LEDs on the h1940.
-
 config LEDS_COBALT_QUBE
 	tristate "LED Support for the Cobalt Qube series front LED"
 	depends on LEDS_CLASS
@@ -162,6 +162,16 @@
 	  LED controller. It is generally only useful
 	  as a platform driver
 
+config LEDS_PCA9532_GPIO
+	bool "Enable GPIO support for PCA9532"
+	depends on LEDS_PCA9532
+	depends on GPIOLIB
+	help
+	  Allow unused pins on PCA9532 to be used as gpio.
+
+	  To use a pin as gpio pca9532_type in pca9532_platform data needs to
+	  set to PCA9532_TYPE_GPIO.
+
 config LEDS_GPIO
 	tristate "LED Support for GPIO connected LEDs"
 	depends on LEDS_CLASS
diff --git a/drivers/leds/Makefile b/drivers/leds/Makefile
index 39c80fc..bccb96c 100644
--- a/drivers/leds/Makefile
+++ b/drivers/leds/Makefile
@@ -17,11 +17,11 @@
 obj-$(CONFIG_LEDS_NET5501)		+= leds-net5501.o
 obj-$(CONFIG_LEDS_WRAP)			+= leds-wrap.o
 obj-$(CONFIG_LEDS_ALIX2)		+= leds-alix2.o
-obj-$(CONFIG_LEDS_H1940)		+= leds-h1940.o
 obj-$(CONFIG_LEDS_COBALT_QUBE)		+= leds-cobalt-qube.o
 obj-$(CONFIG_LEDS_COBALT_RAQ)		+= leds-cobalt-raq.o
 obj-$(CONFIG_LEDS_SUNFIRE)		+= leds-sunfire.o
 obj-$(CONFIG_LEDS_PCA9532)		+= leds-pca9532.o
+obj-$(CONFIG_LEDS_GPIO_REGISTER)	+= leds-gpio-register.o
 obj-$(CONFIG_LEDS_GPIO)			+= leds-gpio.o
 obj-$(CONFIG_LEDS_LP3944)		+= leds-lp3944.o
 obj-$(CONFIG_LEDS_LP5521)		+= leds-lp5521.o
diff --git a/drivers/leds/led-class.c b/drivers/leds/led-class.c
index d5a4ade..dc3d3d8 100644
--- a/drivers/leds/led-class.c
+++ b/drivers/leds/led-class.c
@@ -131,7 +131,8 @@
 	if (!led_cdev->blink_brightness)
 		led_cdev->blink_brightness = led_cdev->max_brightness;
 
-	if (delay_on == led_cdev->blink_delay_on &&
+	if (led_get_trigger_data(led_cdev) &&
+	    delay_on == led_cdev->blink_delay_on &&
 	    delay_off == led_cdev->blink_delay_off)
 		return;
 
diff --git a/drivers/leds/leds-gpio-register.c b/drivers/leds/leds-gpio-register.c
new file mode 100644
index 0000000..1c4ed55
--- /dev/null
+++ b/drivers/leds/leds-gpio-register.c
@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2011 Pengutronix
+ * Uwe Kleine-Koenig <u.kleine-koenig@pengutronix.de>
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License version 2 as published by the
+ * Free Software Foundation.
+ */
+#include <linux/err.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/leds.h>
+
+/**
+ * gpio_led_register_device - register a gpio-led device
+ * @pdata: the platform data used for the new device
+ *
+ * Makes a copy of pdata and pdata->leds and registers a new leds-gpio device
+ * with the result. This allows to have pdata and pdata-leds in .init.rodata
+ * and so saves some bytes compared to a static struct platform_device with
+ * static platform data.
+ *
+ * Returns the registered device or an error pointer.
+ */
+struct platform_device *__init gpio_led_register_device(
+		int id, const struct gpio_led_platform_data *pdata)
+{
+	struct platform_device *ret;
+	struct gpio_led_platform_data _pdata = *pdata;
+
+	_pdata.leds = kmemdup(pdata->leds,
+			pdata->num_leds * sizeof(*pdata->leds), GFP_KERNEL);
+	if (!_pdata.leds)
+		return ERR_PTR(-ENOMEM);
+
+	ret = platform_device_register_resndata(NULL, "leds-gpio", id,
+			NULL, 0, &_pdata, sizeof(_pdata));
+	if (IS_ERR(ret))
+		kfree(_pdata.leds);
+
+	return ret;
+}
diff --git a/drivers/leds/leds-h1940.c b/drivers/leds/leds-h1940.c
deleted file mode 100644
index 173d104..0000000
--- a/drivers/leds/leds-h1940.c
+++ /dev/null
@@ -1,170 +0,0 @@
-/*
- * drivers/leds/leds-h1940.c
- * Copyright (c) Arnaud Patard <arnaud.patard@rtp-net.org>
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file COPYING in the main directory of this archive for
- * more details.
- *
- * H1940 leds driver
- *
- */
-
-#include <linux/module.h>
-#include <linux/platform_device.h>
-#include <linux/delay.h>
-#include <linux/string.h>
-#include <linux/ctype.h>
-#include <linux/leds.h>
-#include <linux/gpio.h>
-
-#include <mach/regs-gpio.h>
-#include <mach/hardware.h>
-#include <mach/h1940-latch.h>
-
-/*
- * Green led.
- */
-static void h1940_greenled_set(struct led_classdev *led_dev,
-			       enum led_brightness value)
-{
-	switch (value) {
-	case LED_HALF:
-		h1940_latch_control(0, H1940_LATCH_LED_FLASH);
-		s3c2410_gpio_setpin(S3C2410_GPA7, 1);
-		break;
-	case LED_FULL:
-		h1940_latch_control(0, H1940_LATCH_LED_GREEN);
-		s3c2410_gpio_setpin(S3C2410_GPA7, 1);
-		break;
-	default:
-	case LED_OFF:
-		h1940_latch_control(H1940_LATCH_LED_FLASH, 0);
-		h1940_latch_control(H1940_LATCH_LED_GREEN, 0);
-		s3c2410_gpio_setpin(S3C2410_GPA7, 0);
-		break;
-	}
-}
-
-static struct led_classdev h1940_greenled = {
-	.name			= "h1940:green",
-	.brightness_set		= h1940_greenled_set,
-	.default_trigger	= "h1940-charger",
-};
-
-/*
- * Red led.
- */
-static void h1940_redled_set(struct led_classdev *led_dev,
-			     enum led_brightness value)
-{
-	switch (value) {
-	case LED_HALF:
-		h1940_latch_control(0, H1940_LATCH_LED_FLASH);
-		s3c2410_gpio_setpin(S3C2410_GPA1, 1);
-		break;
-	case LED_FULL:
-		h1940_latch_control(0, H1940_LATCH_LED_RED);
-		s3c2410_gpio_setpin(S3C2410_GPA1, 1);
-		break;
-	default:
-	case LED_OFF:
-		h1940_latch_control(H1940_LATCH_LED_FLASH, 0);
-		h1940_latch_control(H1940_LATCH_LED_RED, 0);
-		s3c2410_gpio_setpin(S3C2410_GPA1, 0);
-		break;
-	}
-}
-
-static struct led_classdev h1940_redled = {
-	.name			= "h1940:red",
-	.brightness_set		= h1940_redled_set,
-	.default_trigger	= "h1940-charger",
-};
-
-/*
- * Blue led.
- * (it can only be blue flashing led)
- */
-static void h1940_blueled_set(struct led_classdev *led_dev,
-			      enum led_brightness value)
-{
-	if (value) {
-		/* flashing Blue */
-		h1940_latch_control(0, H1940_LATCH_LED_FLASH);
-		s3c2410_gpio_setpin(S3C2410_GPA3, 1);
-	} else {
-		h1940_latch_control(H1940_LATCH_LED_FLASH, 0);
-		s3c2410_gpio_setpin(S3C2410_GPA3, 0);
-	}
-
-}
-
-static struct led_classdev h1940_blueled = {
-	.name			= "h1940:blue",
-	.brightness_set		= h1940_blueled_set,
-	.default_trigger	= "h1940-bluetooth",
-};
-
-static int __devinit h1940leds_probe(struct platform_device *pdev)
-{
-	int ret;
-
-	ret = led_classdev_register(&pdev->dev, &h1940_greenled);
-	if (ret)
-		goto err_green;
-
-	ret = led_classdev_register(&pdev->dev, &h1940_redled);
-	if (ret)
-		goto err_red;
-
-	ret = led_classdev_register(&pdev->dev, &h1940_blueled);
-	if (ret)
-		goto err_blue;
-
-	return 0;
-
-err_blue:
-	led_classdev_unregister(&h1940_redled);
-err_red:
-	led_classdev_unregister(&h1940_greenled);
-err_green:
-	return ret;
-}
-
-static int h1940leds_remove(struct platform_device *pdev)
-{
-	led_classdev_unregister(&h1940_greenled);
-	led_classdev_unregister(&h1940_redled);
-	led_classdev_unregister(&h1940_blueled);
-	return 0;
-}
-
-
-static struct platform_driver h1940leds_driver = {
-	.driver		= {
-		.name	= "h1940-leds",
-		.owner	= THIS_MODULE,
-	},
-	.probe		= h1940leds_probe,
-	.remove		= h1940leds_remove,
-};
-
-
-static int __init h1940leds_init(void)
-{
-	return platform_driver_register(&h1940leds_driver);
-}
-
-static void __exit h1940leds_exit(void)
-{
-	platform_driver_unregister(&h1940leds_driver);
-}
-
-module_init(h1940leds_init);
-module_exit(h1940leds_exit);
-
-MODULE_AUTHOR("Arnaud Patard <arnaud.patard@rtp-net.org>");
-MODULE_DESCRIPTION("LED driver for the iPAQ H1940");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS("platform:h1940-leds");
diff --git a/drivers/leds/leds-lm3530.c b/drivers/leds/leds-lm3530.c
index b37e618..4d7ce76 100644
--- a/drivers/leds/leds-lm3530.c
+++ b/drivers/leds/leds-lm3530.c
@@ -17,6 +17,7 @@
 #include <linux/input.h>
 #include <linux/led-lm3530.h>
 #include <linux/types.h>
+#include <linux/regulator/consumer.h>
 
 #define LM3530_LED_DEV "lcd-backlight"
 #define LM3530_NAME "lm3530-led"
@@ -96,12 +97,18 @@
  * @client: i2c client
  * @pdata: LM3530 platform data
  * @mode: mode of operation - manual, ALS, PWM
+ * @regulator: regulator
+ * @brighness: previous brightness value
+ * @enable: regulator is enabled
  */
 struct lm3530_data {
 	struct led_classdev led_dev;
 	struct i2c_client *client;
 	struct lm3530_platform_data *pdata;
 	enum lm3530_mode mode;
+	struct regulator *regulator;
+	enum led_brightness brightness;
+	bool enable;
 };
 
 static const u8 lm3530_reg[LM3530_REG_MAX] = {
@@ -172,7 +179,10 @@
 	brt_ramp = (pltfm->brt_ramp_fall << LM3530_BRT_RAMP_FALL_SHIFT) |
 			(pltfm->brt_ramp_rise << LM3530_BRT_RAMP_RISE_SHIFT);
 
-	brightness = pltfm->brt_val;
+	if (drvdata->brightness)
+		brightness = drvdata->brightness;
+	else
+		brightness = drvdata->brightness = pltfm->brt_val;
 
 	reg_val[0] = gen_config;	/* LM3530_GEN_CONFIG */
 	reg_val[1] = als_config;	/* LM3530_ALS_CONFIG */
@@ -190,6 +200,16 @@
 	reg_val[13] = LM3530_DEF_ZT_3;	/* LM3530_ALS_Z3T_REG */
 	reg_val[14] = LM3530_DEF_ZT_4;	/* LM3530_ALS_Z4T_REG */
 
+	if (!drvdata->enable) {
+		ret = regulator_enable(drvdata->regulator);
+		if (ret) {
+			dev_err(&drvdata->client->dev,
+					"Enable regulator failed\n");
+			return ret;
+		}
+		drvdata->enable = true;
+	}
+
 	for (i = 0; i < LM3530_REG_MAX; i++) {
 		ret = i2c_smbus_write_byte_data(client,
 				lm3530_reg[i], reg_val[i]);
@@ -210,12 +230,31 @@
 	switch (drvdata->mode) {
 	case LM3530_BL_MODE_MANUAL:
 
+		if (!drvdata->enable) {
+			err = lm3530_init_registers(drvdata);
+			if (err) {
+				dev_err(&drvdata->client->dev,
+					"Register Init failed: %d\n", err);
+				break;
+			}
+		}
+
 		/* set the brightness in brightness control register*/
 		err = i2c_smbus_write_byte_data(drvdata->client,
 				LM3530_BRT_CTRL_REG, brt_val / 2);
 		if (err)
 			dev_err(&drvdata->client->dev,
 				"Unable to set brightness: %d\n", err);
+		else
+			drvdata->brightness = brt_val / 2;
+
+		if (brt_val == 0) {
+			err = regulator_disable(drvdata->regulator);
+			if (err)
+				dev_err(&drvdata->client->dev,
+					"Disable regulator failed\n");
+			drvdata->enable = false;
+		}
 		break;
 	case LM3530_BL_MODE_ALS:
 		break;
@@ -297,20 +336,31 @@
 	drvdata->mode = pdata->mode;
 	drvdata->client = client;
 	drvdata->pdata = pdata;
+	drvdata->brightness = LED_OFF;
+	drvdata->enable = false;
 	drvdata->led_dev.name = LM3530_LED_DEV;
 	drvdata->led_dev.brightness_set = lm3530_brightness_set;
 
 	i2c_set_clientdata(client, drvdata);
 
-	err = lm3530_init_registers(drvdata);
-	if (err < 0) {
-		dev_err(&client->dev, "Register Init failed: %d\n", err);
-		err = -ENODEV;
-		goto err_reg_init;
+	drvdata->regulator = regulator_get(&client->dev, "vin");
+	if (IS_ERR(drvdata->regulator)) {
+		dev_err(&client->dev, "regulator get failed\n");
+		err = PTR_ERR(drvdata->regulator);
+		drvdata->regulator = NULL;
+		goto err_regulator_get;
 	}
 
-	err = led_classdev_register((struct device *)
-				      &client->dev, &drvdata->led_dev);
+	if (drvdata->pdata->brt_val) {
+		err = lm3530_init_registers(drvdata);
+		if (err < 0) {
+			dev_err(&client->dev,
+				"Register Init failed: %d\n", err);
+			err = -ENODEV;
+			goto err_reg_init;
+		}
+	}
+	err = led_classdev_register(&client->dev, &drvdata->led_dev);
 	if (err < 0) {
 		dev_err(&client->dev, "Register led class failed: %d\n", err);
 		err = -ENODEV;
@@ -330,6 +380,9 @@
 	led_classdev_unregister(&drvdata->led_dev);
 err_class_register:
 err_reg_init:
+	regulator_put(drvdata->regulator);
+err_regulator_get:
+	i2c_set_clientdata(client, NULL);
 	kfree(drvdata);
 err_out:
 	return err;
@@ -340,6 +393,10 @@
 	struct lm3530_data *drvdata = i2c_get_clientdata(client);
 
 	device_remove_file(drvdata->led_dev.dev, &dev_attr_mode);
+
+	if (drvdata->enable)
+		regulator_disable(drvdata->regulator);
+	regulator_put(drvdata->regulator);
 	led_classdev_unregister(&drvdata->led_dev);
 	kfree(drvdata);
 	return 0;
diff --git a/drivers/leds/leds-pca9532.c b/drivers/leds/leds-pca9532.c
index 5bf63af..d8d3a1e 100644
--- a/drivers/leds/leds-pca9532.c
+++ b/drivers/leds/leds-pca9532.c
@@ -1,13 +1,14 @@
 /*
  * pca9532.c - 16-bit Led dimmer
  *
+ * Copyright (C) 2011 Jan Weitzel
  * Copyright (C) 2008 Riku Voipio
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation; version 2 of the License.
  *
- * Datasheet: http://www.nxp.com/acrobat/datasheets/PCA9532_3.pdf
+ * Datasheet: http://www.nxp.com/documents/data_sheet/PCA9532.pdf
  *
  */
 
@@ -19,21 +20,32 @@
 #include <linux/mutex.h>
 #include <linux/workqueue.h>
 #include <linux/leds-pca9532.h>
+#include <linux/gpio.h>
 
-#define PCA9532_REG_PSC(i) (0x2+(i)*2)
-#define PCA9532_REG_PWM(i) (0x3+(i)*2)
-#define PCA9532_REG_LS0  0x6
-#define LED_REG(led) ((led>>2)+PCA9532_REG_LS0)
-#define LED_NUM(led) (led & 0x3)
+/* m =  num_leds*/
+#define PCA9532_REG_INPUT(i)	((i) >> 3)
+#define PCA9532_REG_OFFSET(m)	((m) >> 4)
+#define PCA9532_REG_PSC(m, i)	(PCA9532_REG_OFFSET(m) + 0x1 + (i) * 2)
+#define PCA9532_REG_PWM(m, i)	(PCA9532_REG_OFFSET(m) + 0x2 + (i) * 2)
+#define LED_REG(m, led)		(PCA9532_REG_OFFSET(m) + 0x5 + (led >> 2))
+#define LED_NUM(led)		(led & 0x3)
 
 #define ldev_to_led(c)       container_of(c, struct pca9532_led, ldev)
 
+struct pca9532_chip_info {
+	u8	num_leds;
+};
+
 struct pca9532_data {
 	struct i2c_client *client;
 	struct pca9532_led leds[16];
 	struct mutex update_lock;
 	struct input_dev *idev;
 	struct work_struct work;
+#ifdef CONFIG_LEDS_PCA9532_GPIO
+	struct gpio_chip gpio;
+#endif
+	const struct pca9532_chip_info *chip_info;
 	u8 pwm[2];
 	u8 psc[2];
 };
@@ -42,16 +54,41 @@
 	const struct i2c_device_id *id);
 static int pca9532_remove(struct i2c_client *client);
 
+enum {
+	pca9530,
+	pca9531,
+	pca9532,
+	pca9533,
+};
+
 static const struct i2c_device_id pca9532_id[] = {
-	{ "pca9532", 0 },
+	{ "pca9530", pca9530 },
+	{ "pca9531", pca9531 },
+	{ "pca9532", pca9532 },
+	{ "pca9533", pca9533 },
 	{ }
 };
 
 MODULE_DEVICE_TABLE(i2c, pca9532_id);
 
+static const struct pca9532_chip_info pca9532_chip_info_tbl[] = {
+	[pca9530] = {
+		.num_leds = 2,
+	},
+	[pca9531] = {
+		.num_leds = 8,
+	},
+	[pca9532] = {
+		.num_leds = 16,
+	},
+	[pca9533] = {
+		.num_leds = 4,
+	},
+};
+
 static struct i2c_driver pca9532_driver = {
 	.driver = {
-		.name = "pca9532",
+		.name = "pca953x",
 	},
 	.probe = pca9532_probe,
 	.remove = pca9532_remove,
@@ -68,7 +105,7 @@
 {
 	int a = 0, b = 0, i = 0;
 	struct pca9532_data *data = i2c_get_clientdata(client);
-	for (i = 0; i < 16; i++) {
+	for (i = 0; i < data->chip_info->num_leds; i++) {
 		if (data->leds[i].type == PCA9532_TYPE_LED &&
 			data->leds[i].state == PCA9532_PWM0+pwm) {
 				a++;
@@ -92,10 +129,12 @@
 static int pca9532_setpwm(struct i2c_client *client, int pwm)
 {
 	struct pca9532_data *data = i2c_get_clientdata(client);
+	u8 maxleds = data->chip_info->num_leds;
+
 	mutex_lock(&data->update_lock);
-	i2c_smbus_write_byte_data(client, PCA9532_REG_PWM(pwm),
+	i2c_smbus_write_byte_data(client, PCA9532_REG_PWM(maxleds, pwm),
 		data->pwm[pwm]);
-	i2c_smbus_write_byte_data(client, PCA9532_REG_PSC(pwm),
+	i2c_smbus_write_byte_data(client, PCA9532_REG_PSC(maxleds, pwm),
 		data->psc[pwm]);
 	mutex_unlock(&data->update_lock);
 	return 0;
@@ -106,15 +145,16 @@
 {
 	struct i2c_client *client = led->client;
 	struct pca9532_data *data = i2c_get_clientdata(client);
+	u8 maxleds = data->chip_info->num_leds;
 	char reg;
 
 	mutex_lock(&data->update_lock);
-	reg = i2c_smbus_read_byte_data(client, LED_REG(led->id));
+	reg = i2c_smbus_read_byte_data(client, LED_REG(maxleds, led->id));
 	/* zero led bits */
 	reg = reg & ~(0x3<<LED_NUM(led->id)*2);
 	/* set the new value */
 	reg = reg | (led->state << LED_NUM(led->id)*2);
-	i2c_smbus_write_byte_data(client, LED_REG(led->id), reg);
+	i2c_smbus_write_byte_data(client, LED_REG(maxleds, led->id), reg);
 	mutex_unlock(&data->update_lock);
 }
 
@@ -183,10 +223,12 @@
 
 static void pca9532_input_work(struct work_struct *work)
 {
-	struct pca9532_data *data;
-	data = container_of(work, struct pca9532_data, work);
+	struct pca9532_data *data =
+		container_of(work, struct pca9532_data, work);
+	u8 maxleds = data->chip_info->num_leds;
+
 	mutex_lock(&data->update_lock);
-	i2c_smbus_write_byte_data(data->client, PCA9532_REG_PWM(1),
+	i2c_smbus_write_byte_data(data->client, PCA9532_REG_PWM(maxleds, 1),
 		data->pwm[1]);
 	mutex_unlock(&data->update_lock);
 }
@@ -200,16 +242,68 @@
 	pca9532_setled(led);
 }
 
-static void pca9532_destroy_devices(struct pca9532_data *data, int n_devs)
+#ifdef CONFIG_LEDS_PCA9532_GPIO
+static int pca9532_gpio_request_pin(struct gpio_chip *gc, unsigned offset)
+{
+	struct pca9532_data *data = container_of(gc, struct pca9532_data, gpio);
+	struct pca9532_led *led = &data->leds[offset];
+
+	if (led->type == PCA9532_TYPE_GPIO)
+		return 0;
+
+	return -EBUSY;
+}
+
+static void pca9532_gpio_set_value(struct gpio_chip *gc, unsigned offset, int val)
+{
+	struct pca9532_data *data = container_of(gc, struct pca9532_data, gpio);
+	struct pca9532_led *led = &data->leds[offset];
+
+	if (val)
+		led->state = PCA9532_ON;
+	else
+		led->state = PCA9532_OFF;
+
+	pca9532_setled(led);
+}
+
+static int pca9532_gpio_get_value(struct gpio_chip *gc, unsigned offset)
+{
+	struct pca9532_data *data = container_of(gc, struct pca9532_data, gpio);
+	unsigned char reg;
+
+	reg = i2c_smbus_read_byte_data(data->client, PCA9532_REG_INPUT(offset));
+
+	return !!(reg & (1 << (offset % 8)));
+}
+
+static int pca9532_gpio_direction_input(struct gpio_chip *gc, unsigned offset)
+{
+	/* To use as input ensure pin is not driven */
+	pca9532_gpio_set_value(gc, offset, 0);
+
+	return 0;
+}
+
+static int pca9532_gpio_direction_output(struct gpio_chip *gc, unsigned offset, int val)
+{
+	pca9532_gpio_set_value(gc, offset, val);
+
+	return 0;
+}
+#endif /* CONFIG_LEDS_PCA9532_GPIO */
+
+static int pca9532_destroy_devices(struct pca9532_data *data, int n_devs)
 {
 	int i = n_devs;
 
 	if (!data)
-		return;
+		return -EINVAL;
 
 	while (--i >= 0) {
 		switch (data->leds[i].type) {
 		case PCA9532_TYPE_NONE:
+		case PCA9532_TYPE_GPIO:
 			break;
 		case PCA9532_TYPE_LED:
 			led_classdev_unregister(&data->leds[i].ldev);
@@ -224,23 +318,38 @@
 			break;
 		}
 	}
+
+#ifdef CONFIG_LEDS_PCA9532_GPIO
+	if (data->gpio.dev) {
+		int err = gpiochip_remove(&data->gpio);
+		if (err) {
+			dev_err(&data->client->dev, "%s failed, %d\n",
+						"gpiochip_remove()", err);
+			return err;
+		}
+	}
+#endif
+
+	return 0;
 }
 
 static int pca9532_configure(struct i2c_client *client,
 	struct pca9532_data *data, struct pca9532_platform_data *pdata)
 {
 	int i, err = 0;
+	int gpios = 0;
+	u8 maxleds = data->chip_info->num_leds;
 
 	for (i = 0; i < 2; i++)	{
 		data->pwm[i] = pdata->pwm[i];
 		data->psc[i] = pdata->psc[i];
-		i2c_smbus_write_byte_data(client, PCA9532_REG_PWM(i),
+		i2c_smbus_write_byte_data(client, PCA9532_REG_PWM(maxleds, i),
 			data->pwm[i]);
-		i2c_smbus_write_byte_data(client, PCA9532_REG_PSC(i),
+		i2c_smbus_write_byte_data(client, PCA9532_REG_PSC(maxleds, i),
 			data->psc[i]);
 	}
 
-	for (i = 0; i < 16; i++) {
+	for (i = 0; i < data->chip_info->num_leds; i++) {
 		struct pca9532_led *led = &data->leds[i];
 		struct pca9532_led *pled = &pdata->leds[i];
 		led->client = client;
@@ -249,6 +358,9 @@
 		switch (led->type) {
 		case PCA9532_TYPE_NONE:
 			break;
+		case PCA9532_TYPE_GPIO:
+			gpios++;
+			break;
 		case PCA9532_TYPE_LED:
 			led->state = pled->state;
 			led->name = pled->name;
@@ -297,6 +409,34 @@
 			break;
 		}
 	}
+
+#ifdef CONFIG_LEDS_PCA9532_GPIO
+	if (gpios) {
+		data->gpio.label = "gpio-pca9532";
+		data->gpio.direction_input = pca9532_gpio_direction_input;
+		data->gpio.direction_output = pca9532_gpio_direction_output;
+		data->gpio.set = pca9532_gpio_set_value;
+		data->gpio.get = pca9532_gpio_get_value;
+		data->gpio.request = pca9532_gpio_request_pin;
+		data->gpio.can_sleep = 1;
+		data->gpio.base = pdata->gpio_base;
+		data->gpio.ngpio = data->chip_info->num_leds;
+		data->gpio.dev = &client->dev;
+		data->gpio.owner = THIS_MODULE;
+
+		err = gpiochip_add(&data->gpio);
+		if (err) {
+			/* Use data->gpio.dev as a flag for freeing gpiochip */
+			data->gpio.dev = NULL;
+			dev_warn(&client->dev, "could not add gpiochip\n");
+		} else {
+			dev_info(&client->dev, "gpios %i...%i\n",
+				data->gpio.base, data->gpio.base +
+				data->gpio.ngpio - 1);
+		}
+	}
+#endif
+
 	return 0;
 
 exit:
@@ -322,6 +462,8 @@
 	if (!data)
 		return -ENOMEM;
 
+	data->chip_info = &pca9532_chip_info_tbl[id->driver_data];
+
 	dev_info(&client->dev, "setting platform data\n");
 	i2c_set_clientdata(client, data);
 	data->client = client;
@@ -337,7 +479,12 @@
 static int pca9532_remove(struct i2c_client *client)
 {
 	struct pca9532_data *data = i2c_get_clientdata(client);
-	pca9532_destroy_devices(data, 16);
+	int err;
+
+	err = pca9532_destroy_devices(data, data->chip_info->num_leds);
+	if (err)
+		return err;
+
 	kfree(data);
 	return 0;
 }
diff --git a/drivers/leds/leds.h b/drivers/leds/leds.h
index 2dd8ecb..e77c7f8 100644
--- a/drivers/leds/leds.h
+++ b/drivers/leds/leds.h
@@ -40,10 +40,17 @@
 void led_trigger_set(struct led_classdev *led_cdev,
 			struct led_trigger *trigger);
 void led_trigger_remove(struct led_classdev *led_cdev);
+
+static inline void *led_get_trigger_data(struct led_classdev *led_cdev)
+{
+	return led_cdev->trigger_data;
+}
+
 #else
 #define led_trigger_set_default(x) do {} while (0)
 #define led_trigger_set(x, y) do {} while (0)
 #define led_trigger_remove(x) do {} while (0)
+#define led_get_trigger_data(x) (NULL)
 #endif
 
 ssize_t led_trigger_store(struct device *dev, struct device_attribute *attr,
diff --git a/drivers/leds/ledtrig-timer.c b/drivers/leds/ledtrig-timer.c
index b09bcbe..d87c9d0 100644
--- a/drivers/leds/ledtrig-timer.c
+++ b/drivers/leds/ledtrig-timer.c
@@ -91,6 +91,9 @@
 	if (rc)
 		goto err_out_delayon;
 
+	led_blink_set(led_cdev, &led_cdev->blink_delay_on,
+		      &led_cdev->blink_delay_off);
+
 	led_cdev->trigger_data = (void *)1;
 
 	return;
diff --git a/drivers/media/video/omap/omap_vout.c b/drivers/media/video/omap/omap_vout.c
index d4fe7bc..4ada9be 100644
--- a/drivers/media/video/omap/omap_vout.c
+++ b/drivers/media/video/omap/omap_vout.c
@@ -47,7 +47,7 @@
 #include <plat/dma.h>
 #include <plat/vram.h>
 #include <plat/vrfb.h>
-#include <plat/display.h>
+#include <video/omapdss.h>
 
 #include "omap_voutlib.h"
 #include "omap_voutdef.h"
diff --git a/drivers/media/video/omap/omap_voutdef.h b/drivers/media/video/omap/omap_voutdef.h
index ea3a047..659497b 100644
--- a/drivers/media/video/omap/omap_voutdef.h
+++ b/drivers/media/video/omap/omap_voutdef.h
@@ -11,7 +11,7 @@
 #ifndef OMAP_VOUTDEF_H
 #define OMAP_VOUTDEF_H
 
-#include <plat/display.h>
+#include <video/omapdss.h>
 
 #define YUYV_BPP        2
 #define RGB565_BPP      2
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index 209fbb7..776a478 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -31,6 +31,7 @@
 obj-$(CONFIG_ATL1E) += atl1e/
 obj-$(CONFIG_ATL1C) += atl1c/
 obj-$(CONFIG_GIANFAR) += gianfar_driver.o
+obj-$(CONFIG_PTP_1588_CLOCK_GIANFAR) += gianfar_ptp.o
 obj-$(CONFIG_TEHUTI) += tehuti.o
 obj-$(CONFIG_ENIC) += enic/
 obj-$(CONFIG_JME) += jme.o
diff --git a/drivers/net/arm/ixp4xx_eth.c b/drivers/net/arm/ixp4xx_eth.c
index 9eb9b98..de51e84 100644
--- a/drivers/net/arm/ixp4xx_eth.c
+++ b/drivers/net/arm/ixp4xx_eth.c
@@ -30,9 +30,12 @@
 #include <linux/etherdevice.h>
 #include <linux/io.h>
 #include <linux/kernel.h>
+#include <linux/net_tstamp.h>
 #include <linux/phy.h>
 #include <linux/platform_device.h>
+#include <linux/ptp_classify.h>
 #include <linux/slab.h>
+#include <mach/ixp46x_ts.h>
 #include <mach/npe.h>
 #include <mach/qmgr.h>
 
@@ -67,6 +70,10 @@
 #define RXFREE_QUEUE(port_id)	(NPE_ID(port_id) + 26)
 #define TXDONE_QUEUE		31
 
+#define PTP_SLAVE_MODE		1
+#define PTP_MASTER_MODE		2
+#define PORT2CHANNEL(p)		NPE_ID(p->id)
+
 /* TX Control Registers */
 #define TX_CNTRL0_TX_EN		0x01
 #define TX_CNTRL0_HALFDUPLEX	0x02
@@ -171,6 +178,8 @@
 	int id;			/* logical port ID */
 	int speed, duplex;
 	u8 firmware[4];
+	int hwts_tx_en;
+	int hwts_rx_en;
 };
 
 /* NPE message structure */
@@ -246,6 +255,172 @@
 static struct port *npe_port_tab[MAX_NPES];
 static struct dma_pool *dma_pool;
 
+static struct sock_filter ptp_filter[] = {
+	PTP_FILTER
+};
+
+static int ixp_ptp_match(struct sk_buff *skb, u16 uid_hi, u32 uid_lo, u16 seqid)
+{
+	u8 *data = skb->data;
+	unsigned int offset;
+	u16 *hi, *id;
+	u32 lo;
+
+	if (sk_run_filter(skb, ptp_filter) != PTP_CLASS_V1_IPV4)
+		return 0;
+
+	offset = ETH_HLEN + IPV4_HLEN(data) + UDP_HLEN;
+
+	if (skb->len < offset + OFF_PTP_SEQUENCE_ID + sizeof(seqid))
+		return 0;
+
+	hi = (u16 *)(data + offset + OFF_PTP_SOURCE_UUID);
+	id = (u16 *)(data + offset + OFF_PTP_SEQUENCE_ID);
+
+	memcpy(&lo, &hi[1], sizeof(lo));
+
+	return (uid_hi == ntohs(*hi) &&
+		uid_lo == ntohl(lo) &&
+		seqid  == ntohs(*id));
+}
+
+static void ixp_rx_timestamp(struct port *port, struct sk_buff *skb)
+{
+	struct skb_shared_hwtstamps *shhwtstamps;
+	struct ixp46x_ts_regs *regs;
+	u64 ns;
+	u32 ch, hi, lo, val;
+	u16 uid, seq;
+
+	if (!port->hwts_rx_en)
+		return;
+
+	ch = PORT2CHANNEL(port);
+
+	regs = (struct ixp46x_ts_regs __iomem *) IXP4XX_TIMESYNC_BASE_VIRT;
+
+	val = __raw_readl(&regs->channel[ch].ch_event);
+
+	if (!(val & RX_SNAPSHOT_LOCKED))
+		return;
+
+	lo = __raw_readl(&regs->channel[ch].src_uuid_lo);
+	hi = __raw_readl(&regs->channel[ch].src_uuid_hi);
+
+	uid = hi & 0xffff;
+	seq = (hi >> 16) & 0xffff;
+
+	if (!ixp_ptp_match(skb, htons(uid), htonl(lo), htons(seq)))
+		goto out;
+
+	lo = __raw_readl(&regs->channel[ch].rx_snap_lo);
+	hi = __raw_readl(&regs->channel[ch].rx_snap_hi);
+	ns = ((u64) hi) << 32;
+	ns |= lo;
+	ns <<= TICKS_NS_SHIFT;
+
+	shhwtstamps = skb_hwtstamps(skb);
+	memset(shhwtstamps, 0, sizeof(*shhwtstamps));
+	shhwtstamps->hwtstamp = ns_to_ktime(ns);
+out:
+	__raw_writel(RX_SNAPSHOT_LOCKED, &regs->channel[ch].ch_event);
+}
+
+static void ixp_tx_timestamp(struct port *port, struct sk_buff *skb)
+{
+	struct skb_shared_hwtstamps shhwtstamps;
+	struct ixp46x_ts_regs *regs;
+	struct skb_shared_info *shtx;
+	u64 ns;
+	u32 ch, cnt, hi, lo, val;
+
+	shtx = skb_shinfo(skb);
+	if (unlikely(shtx->tx_flags & SKBTX_HW_TSTAMP && port->hwts_tx_en))
+		shtx->tx_flags |= SKBTX_IN_PROGRESS;
+	else
+		return;
+
+	ch = PORT2CHANNEL(port);
+
+	regs = (struct ixp46x_ts_regs __iomem *) IXP4XX_TIMESYNC_BASE_VIRT;
+
+	/*
+	 * This really stinks, but we have to poll for the Tx time stamp.
+	 * Usually, the time stamp is ready after 4 to 6 microseconds.
+	 */
+	for (cnt = 0; cnt < 100; cnt++) {
+		val = __raw_readl(&regs->channel[ch].ch_event);
+		if (val & TX_SNAPSHOT_LOCKED)
+			break;
+		udelay(1);
+	}
+	if (!(val & TX_SNAPSHOT_LOCKED)) {
+		shtx->tx_flags &= ~SKBTX_IN_PROGRESS;
+		return;
+	}
+
+	lo = __raw_readl(&regs->channel[ch].tx_snap_lo);
+	hi = __raw_readl(&regs->channel[ch].tx_snap_hi);
+	ns = ((u64) hi) << 32;
+	ns |= lo;
+	ns <<= TICKS_NS_SHIFT;
+
+	memset(&shhwtstamps, 0, sizeof(shhwtstamps));
+	shhwtstamps.hwtstamp = ns_to_ktime(ns);
+	skb_tstamp_tx(skb, &shhwtstamps);
+
+	__raw_writel(TX_SNAPSHOT_LOCKED, &regs->channel[ch].ch_event);
+}
+
+static int hwtstamp_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
+{
+	struct hwtstamp_config cfg;
+	struct ixp46x_ts_regs *regs;
+	struct port *port = netdev_priv(netdev);
+	int ch;
+
+	if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg)))
+		return -EFAULT;
+
+	if (cfg.flags) /* reserved for future extensions */
+		return -EINVAL;
+
+	ch = PORT2CHANNEL(port);
+	regs = (struct ixp46x_ts_regs __iomem *) IXP4XX_TIMESYNC_BASE_VIRT;
+
+	switch (cfg.tx_type) {
+	case HWTSTAMP_TX_OFF:
+		port->hwts_tx_en = 0;
+		break;
+	case HWTSTAMP_TX_ON:
+		port->hwts_tx_en = 1;
+		break;
+	default:
+		return -ERANGE;
+	}
+
+	switch (cfg.rx_filter) {
+	case HWTSTAMP_FILTER_NONE:
+		port->hwts_rx_en = 0;
+		break;
+	case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
+		port->hwts_rx_en = PTP_SLAVE_MODE;
+		__raw_writel(0, &regs->channel[ch].ch_control);
+		break;
+	case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
+		port->hwts_rx_en = PTP_MASTER_MODE;
+		__raw_writel(MASTER_MODE, &regs->channel[ch].ch_control);
+		break;
+	default:
+		return -ERANGE;
+	}
+
+	/* Clear out any old time stamps. */
+	__raw_writel(TX_SNAPSHOT_LOCKED | RX_SNAPSHOT_LOCKED,
+		     &regs->channel[ch].ch_event);
+
+	return copy_to_user(ifr->ifr_data, &cfg, sizeof(cfg)) ? -EFAULT : 0;
+}
 
 static int ixp4xx_mdio_cmd(struct mii_bus *bus, int phy_id, int location,
 			   int write, u16 cmd)
@@ -573,6 +748,7 @@
 
 		debug_pkt(dev, "eth_poll", skb->data, skb->len);
 
+		ixp_rx_timestamp(port, skb);
 		skb->protocol = eth_type_trans(skb, dev);
 		dev->stats.rx_packets++;
 		dev->stats.rx_bytes += skb->len;
@@ -679,14 +855,12 @@
 		return NETDEV_TX_OK;
 	}
 	memcpy_swab32(mem, (u32 *)((int)skb->data & ~3), bytes / 4);
-	dev_kfree_skb(skb);
 #endif
 
 	phys = dma_map_single(&dev->dev, mem, bytes, DMA_TO_DEVICE);
 	if (dma_mapping_error(&dev->dev, phys)) {
-#ifdef __ARMEB__
 		dev_kfree_skb(skb);
-#else
+#ifndef __ARMEB__
 		kfree(mem);
 #endif
 		dev->stats.tx_dropped++;
@@ -728,6 +902,13 @@
 #if DEBUG_TX
 	printk(KERN_DEBUG "%s: eth_xmit end\n", dev->name);
 #endif
+
+	ixp_tx_timestamp(port, skb);
+	skb_tx_timestamp(skb);
+
+#ifndef __ARMEB__
+	dev_kfree_skb(skb);
+#endif
 	return NETDEV_TX_OK;
 }
 
@@ -783,6 +964,9 @@
 	if (!netif_running(dev))
 		return -EINVAL;
 
+	if (cpu_is_ixp46x() && cmd == SIOCSHWTSTAMP)
+		return hwtstamp_ioctl(dev, req, cmd);
+
 	return phy_mii_ioctl(port->phydev, req, cmd);
 }
 
@@ -1171,6 +1355,11 @@
 	char phy_id[MII_BUS_ID_SIZE + 3];
 	int err;
 
+	if (ptp_filter_init(ptp_filter, ARRAY_SIZE(ptp_filter))) {
+		pr_err("ixp4xx_eth: bad ptp filter\n");
+		return -EINVAL;
+	}
+
 	if (!(dev = alloc_etherdev(sizeof(struct port))))
 		return -ENOMEM;
 
diff --git a/drivers/net/gianfar_ptp.c b/drivers/net/gianfar_ptp.c
new file mode 100644
index 0000000..d8e1753
--- /dev/null
+++ b/drivers/net/gianfar_ptp.c
@@ -0,0 +1,588 @@
+/*
+ * PTP 1588 clock using the eTSEC
+ *
+ * Copyright (C) 2010 OMICRON electronics GmbH
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#include <linux/device.h>
+#include <linux/hrtimer.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/timex.h>
+#include <linux/io.h>
+
+#include <linux/ptp_clock_kernel.h>
+
+#include "gianfar.h"
+
+/*
+ * gianfar ptp registers
+ * Generated by regen.tcl on Thu May 13 01:38:57 PM CEST 2010
+ */
+struct gianfar_ptp_registers {
+	u32 tmr_ctrl;     /* Timer control register */
+	u32 tmr_tevent;   /* Timestamp event register */
+	u32 tmr_temask;   /* Timer event mask register */
+	u32 tmr_pevent;   /* Timestamp event register */
+	u32 tmr_pemask;   /* Timer event mask register */
+	u32 tmr_stat;     /* Timestamp status register */
+	u32 tmr_cnt_h;    /* Timer counter high register */
+	u32 tmr_cnt_l;    /* Timer counter low register */
+	u32 tmr_add;      /* Timer drift compensation addend register */
+	u32 tmr_acc;      /* Timer accumulator register */
+	u32 tmr_prsc;     /* Timer prescale */
+	u8  res1[4];
+	u32 tmroff_h;     /* Timer offset high */
+	u32 tmroff_l;     /* Timer offset low */
+	u8  res2[8];
+	u32 tmr_alarm1_h; /* Timer alarm 1 high register */
+	u32 tmr_alarm1_l; /* Timer alarm 1 high register */
+	u32 tmr_alarm2_h; /* Timer alarm 2 high register */
+	u32 tmr_alarm2_l; /* Timer alarm 2 high register */
+	u8  res3[48];
+	u32 tmr_fiper1;   /* Timer fixed period interval */
+	u32 tmr_fiper2;   /* Timer fixed period interval */
+	u32 tmr_fiper3;   /* Timer fixed period interval */
+	u8  res4[20];
+	u32 tmr_etts1_h;  /* Timestamp of general purpose external trigger */
+	u32 tmr_etts1_l;  /* Timestamp of general purpose external trigger */
+	u32 tmr_etts2_h;  /* Timestamp of general purpose external trigger */
+	u32 tmr_etts2_l;  /* Timestamp of general purpose external trigger */
+};
+
+/* Bit definitions for the TMR_CTRL register */
+#define ALM1P                 (1<<31) /* Alarm1 output polarity */
+#define ALM2P                 (1<<30) /* Alarm2 output polarity */
+#define FS                    (1<<28) /* FIPER start indication */
+#define PP1L                  (1<<27) /* Fiper1 pulse loopback mode enabled. */
+#define PP2L                  (1<<26) /* Fiper2 pulse loopback mode enabled. */
+#define TCLK_PERIOD_SHIFT     (16) /* 1588 timer reference clock period. */
+#define TCLK_PERIOD_MASK      (0x3ff)
+#define RTPE                  (1<<15) /* Record Tx Timestamp to PAL Enable. */
+#define FRD                   (1<<14) /* FIPER Realignment Disable */
+#define ESFDP                 (1<<11) /* External Tx/Rx SFD Polarity. */
+#define ESFDE                 (1<<10) /* External Tx/Rx SFD Enable. */
+#define ETEP2                 (1<<9) /* External trigger 2 edge polarity */
+#define ETEP1                 (1<<8) /* External trigger 1 edge polarity */
+#define COPH                  (1<<7) /* Generated clock output phase. */
+#define CIPH                  (1<<6) /* External oscillator input clock phase */
+#define TMSR                  (1<<5) /* Timer soft reset. */
+#define BYP                   (1<<3) /* Bypass drift compensated clock */
+#define TE                    (1<<2) /* 1588 timer enable. */
+#define CKSEL_SHIFT           (0)    /* 1588 Timer reference clock source */
+#define CKSEL_MASK            (0x3)
+
+/* Bit definitions for the TMR_TEVENT register */
+#define ETS2                  (1<<25) /* External trigger 2 timestamp sampled */
+#define ETS1                  (1<<24) /* External trigger 1 timestamp sampled */
+#define ALM2                  (1<<17) /* Current time = alarm time register 2 */
+#define ALM1                  (1<<16) /* Current time = alarm time register 1 */
+#define PP1                   (1<<7)  /* periodic pulse generated on FIPER1 */
+#define PP2                   (1<<6)  /* periodic pulse generated on FIPER2 */
+#define PP3                   (1<<5)  /* periodic pulse generated on FIPER3 */
+
+/* Bit definitions for the TMR_TEMASK register */
+#define ETS2EN                (1<<25) /* External trigger 2 timestamp enable */
+#define ETS1EN                (1<<24) /* External trigger 1 timestamp enable */
+#define ALM2EN                (1<<17) /* Timer ALM2 event enable */
+#define ALM1EN                (1<<16) /* Timer ALM1 event enable */
+#define PP1EN                 (1<<7) /* Periodic pulse event 1 enable */
+#define PP2EN                 (1<<6) /* Periodic pulse event 2 enable */
+
+/* Bit definitions for the TMR_PEVENT register */
+#define TXP2                  (1<<9) /* PTP transmitted timestamp im TXTS2 */
+#define TXP1                  (1<<8) /* PTP transmitted timestamp in TXTS1 */
+#define RXP                   (1<<0) /* PTP frame has been received */
+
+/* Bit definitions for the TMR_PEMASK register */
+#define TXP2EN                (1<<9) /* Transmit PTP packet event 2 enable */
+#define TXP1EN                (1<<8) /* Transmit PTP packet event 1 enable */
+#define RXPEN                 (1<<0) /* Receive PTP packet event enable */
+
+/* Bit definitions for the TMR_STAT register */
+#define STAT_VEC_SHIFT        (0) /* Timer general purpose status vector */
+#define STAT_VEC_MASK         (0x3f)
+
+/* Bit definitions for the TMR_PRSC register */
+#define PRSC_OCK_SHIFT        (0) /* Output clock division/prescale factor. */
+#define PRSC_OCK_MASK         (0xffff)
+
+
+#define DRIVER		"gianfar_ptp"
+#define DEFAULT_CKSEL	1
+#define N_ALARM		1 /* first alarm is used internally to reset fipers */
+#define N_EXT_TS	2
+#define REG_SIZE	sizeof(struct gianfar_ptp_registers)
+
+struct etsects {
+	struct gianfar_ptp_registers *regs;
+	spinlock_t lock; /* protects regs */
+	struct ptp_clock *clock;
+	struct ptp_clock_info caps;
+	struct resource *rsrc;
+	int irq;
+	u64 alarm_interval; /* for periodic alarm */
+	u64 alarm_value;
+	u32 tclk_period;  /* nanoseconds */
+	u32 tmr_prsc;
+	u32 tmr_add;
+	u32 cksel;
+	u32 tmr_fiper1;
+	u32 tmr_fiper2;
+};
+
+/*
+ * Register access functions
+ */
+
+/* Caller must hold etsects->lock. */
+static u64 tmr_cnt_read(struct etsects *etsects)
+{
+	u64 ns;
+	u32 lo, hi;
+
+	lo = gfar_read(&etsects->regs->tmr_cnt_l);
+	hi = gfar_read(&etsects->regs->tmr_cnt_h);
+	ns = ((u64) hi) << 32;
+	ns |= lo;
+	return ns;
+}
+
+/* Caller must hold etsects->lock. */
+static void tmr_cnt_write(struct etsects *etsects, u64 ns)
+{
+	u32 hi = ns >> 32;
+	u32 lo = ns & 0xffffffff;
+
+	gfar_write(&etsects->regs->tmr_cnt_l, lo);
+	gfar_write(&etsects->regs->tmr_cnt_h, hi);
+}
+
+/* Caller must hold etsects->lock. */
+static void set_alarm(struct etsects *etsects)
+{
+	u64 ns;
+	u32 lo, hi;
+
+	ns = tmr_cnt_read(etsects) + 1500000000ULL;
+	ns = div_u64(ns, 1000000000UL) * 1000000000ULL;
+	ns -= etsects->tclk_period;
+	hi = ns >> 32;
+	lo = ns & 0xffffffff;
+	gfar_write(&etsects->regs->tmr_alarm1_l, lo);
+	gfar_write(&etsects->regs->tmr_alarm1_h, hi);
+}
+
+/* Caller must hold etsects->lock. */
+static void set_fipers(struct etsects *etsects)
+{
+	u32 tmr_ctrl = gfar_read(&etsects->regs->tmr_ctrl);
+
+	gfar_write(&etsects->regs->tmr_ctrl,   tmr_ctrl & (~TE));
+	gfar_write(&etsects->regs->tmr_prsc,   etsects->tmr_prsc);
+	gfar_write(&etsects->regs->tmr_fiper1, etsects->tmr_fiper1);
+	gfar_write(&etsects->regs->tmr_fiper2, etsects->tmr_fiper2);
+	set_alarm(etsects);
+	gfar_write(&etsects->regs->tmr_ctrl,   tmr_ctrl|TE);
+}
+
+/*
+ * Interrupt service routine
+ */
+
+static irqreturn_t isr(int irq, void *priv)
+{
+	struct etsects *etsects = priv;
+	struct ptp_clock_event event;
+	u64 ns;
+	u32 ack = 0, lo, hi, mask, val;
+
+	val = gfar_read(&etsects->regs->tmr_tevent);
+
+	if (val & ETS1) {
+		ack |= ETS1;
+		hi = gfar_read(&etsects->regs->tmr_etts1_h);
+		lo = gfar_read(&etsects->regs->tmr_etts1_l);
+		event.type = PTP_CLOCK_EXTTS;
+		event.index = 0;
+		event.timestamp = ((u64) hi) << 32;
+		event.timestamp |= lo;
+		ptp_clock_event(etsects->clock, &event);
+	}
+
+	if (val & ETS2) {
+		ack |= ETS2;
+		hi = gfar_read(&etsects->regs->tmr_etts2_h);
+		lo = gfar_read(&etsects->regs->tmr_etts2_l);
+		event.type = PTP_CLOCK_EXTTS;
+		event.index = 1;
+		event.timestamp = ((u64) hi) << 32;
+		event.timestamp |= lo;
+		ptp_clock_event(etsects->clock, &event);
+	}
+
+	if (val & ALM2) {
+		ack |= ALM2;
+		if (etsects->alarm_value) {
+			event.type = PTP_CLOCK_ALARM;
+			event.index = 0;
+			event.timestamp = etsects->alarm_value;
+			ptp_clock_event(etsects->clock, &event);
+		}
+		if (etsects->alarm_interval) {
+			ns = etsects->alarm_value + etsects->alarm_interval;
+			hi = ns >> 32;
+			lo = ns & 0xffffffff;
+			spin_lock(&etsects->lock);
+			gfar_write(&etsects->regs->tmr_alarm2_l, lo);
+			gfar_write(&etsects->regs->tmr_alarm2_h, hi);
+			spin_unlock(&etsects->lock);
+			etsects->alarm_value = ns;
+		} else {
+			gfar_write(&etsects->regs->tmr_tevent, ALM2);
+			spin_lock(&etsects->lock);
+			mask = gfar_read(&etsects->regs->tmr_temask);
+			mask &= ~ALM2EN;
+			gfar_write(&etsects->regs->tmr_temask, mask);
+			spin_unlock(&etsects->lock);
+			etsects->alarm_value = 0;
+			etsects->alarm_interval = 0;
+		}
+	}
+
+	if (val & PP1) {
+		ack |= PP1;
+		event.type = PTP_CLOCK_PPS;
+		ptp_clock_event(etsects->clock, &event);
+	}
+
+	if (ack) {
+		gfar_write(&etsects->regs->tmr_tevent, ack);
+		return IRQ_HANDLED;
+	} else
+		return IRQ_NONE;
+}
+
+/*
+ * PTP clock operations
+ */
+
+static int ptp_gianfar_adjfreq(struct ptp_clock_info *ptp, s32 ppb)
+{
+	u64 adj;
+	u32 diff, tmr_add;
+	int neg_adj = 0;
+	struct etsects *etsects = container_of(ptp, struct etsects, caps);
+
+	if (ppb < 0) {
+		neg_adj = 1;
+		ppb = -ppb;
+	}
+	tmr_add = etsects->tmr_add;
+	adj = tmr_add;
+	adj *= ppb;
+	diff = div_u64(adj, 1000000000ULL);
+
+	tmr_add = neg_adj ? tmr_add - diff : tmr_add + diff;
+
+	gfar_write(&etsects->regs->tmr_add, tmr_add);
+
+	return 0;
+}
+
+static int ptp_gianfar_adjtime(struct ptp_clock_info *ptp, s64 delta)
+{
+	s64 now;
+	unsigned long flags;
+	struct etsects *etsects = container_of(ptp, struct etsects, caps);
+
+	spin_lock_irqsave(&etsects->lock, flags);
+
+	now = tmr_cnt_read(etsects);
+	now += delta;
+	tmr_cnt_write(etsects, now);
+
+	spin_unlock_irqrestore(&etsects->lock, flags);
+
+	set_fipers(etsects);
+
+	return 0;
+}
+
+static int ptp_gianfar_gettime(struct ptp_clock_info *ptp, struct timespec *ts)
+{
+	u64 ns;
+	u32 remainder;
+	unsigned long flags;
+	struct etsects *etsects = container_of(ptp, struct etsects, caps);
+
+	spin_lock_irqsave(&etsects->lock, flags);
+
+	ns = tmr_cnt_read(etsects);
+
+	spin_unlock_irqrestore(&etsects->lock, flags);
+
+	ts->tv_sec = div_u64_rem(ns, 1000000000, &remainder);
+	ts->tv_nsec = remainder;
+	return 0;
+}
+
+static int ptp_gianfar_settime(struct ptp_clock_info *ptp,
+			       const struct timespec *ts)
+{
+	u64 ns;
+	unsigned long flags;
+	struct etsects *etsects = container_of(ptp, struct etsects, caps);
+
+	ns = ts->tv_sec * 1000000000ULL;
+	ns += ts->tv_nsec;
+
+	spin_lock_irqsave(&etsects->lock, flags);
+
+	tmr_cnt_write(etsects, ns);
+	set_fipers(etsects);
+
+	spin_unlock_irqrestore(&etsects->lock, flags);
+
+	return 0;
+}
+
+static int ptp_gianfar_enable(struct ptp_clock_info *ptp,
+			      struct ptp_clock_request *rq, int on)
+{
+	struct etsects *etsects = container_of(ptp, struct etsects, caps);
+	unsigned long flags;
+	u32 bit, mask;
+
+	switch (rq->type) {
+	case PTP_CLK_REQ_EXTTS:
+		switch (rq->extts.index) {
+		case 0:
+			bit = ETS1EN;
+			break;
+		case 1:
+			bit = ETS2EN;
+			break;
+		default:
+			return -EINVAL;
+		}
+		spin_lock_irqsave(&etsects->lock, flags);
+		mask = gfar_read(&etsects->regs->tmr_temask);
+		if (on)
+			mask |= bit;
+		else
+			mask &= ~bit;
+		gfar_write(&etsects->regs->tmr_temask, mask);
+		spin_unlock_irqrestore(&etsects->lock, flags);
+		return 0;
+
+	case PTP_CLK_REQ_PPS:
+		spin_lock_irqsave(&etsects->lock, flags);
+		mask = gfar_read(&etsects->regs->tmr_temask);
+		if (on)
+			mask |= PP1EN;
+		else
+			mask &= ~PP1EN;
+		gfar_write(&etsects->regs->tmr_temask, mask);
+		spin_unlock_irqrestore(&etsects->lock, flags);
+		return 0;
+
+	default:
+		break;
+	}
+
+	return -EOPNOTSUPP;
+}
+
+static struct ptp_clock_info ptp_gianfar_caps = {
+	.owner		= THIS_MODULE,
+	.name		= "gianfar clock",
+	.max_adj	= 512000,
+	.n_alarm	= N_ALARM,
+	.n_ext_ts	= N_EXT_TS,
+	.n_per_out	= 0,
+	.pps		= 1,
+	.adjfreq	= ptp_gianfar_adjfreq,
+	.adjtime	= ptp_gianfar_adjtime,
+	.gettime	= ptp_gianfar_gettime,
+	.settime	= ptp_gianfar_settime,
+	.enable		= ptp_gianfar_enable,
+};
+
+/* OF device tree */
+
+static int get_of_u32(struct device_node *node, char *str, u32 *val)
+{
+	int plen;
+	const u32 *prop = of_get_property(node, str, &plen);
+
+	if (!prop || plen != sizeof(*prop))
+		return -1;
+	*val = *prop;
+	return 0;
+}
+
+static int gianfar_ptp_probe(struct platform_device *dev)
+{
+	struct device_node *node = dev->dev.of_node;
+	struct etsects *etsects;
+	struct timespec now;
+	int err = -ENOMEM;
+	u32 tmr_ctrl;
+	unsigned long flags;
+
+	etsects = kzalloc(sizeof(*etsects), GFP_KERNEL);
+	if (!etsects)
+		goto no_memory;
+
+	err = -ENODEV;
+
+	etsects->caps = ptp_gianfar_caps;
+	etsects->cksel = DEFAULT_CKSEL;
+
+	if (get_of_u32(node, "fsl,tclk-period", &etsects->tclk_period) ||
+	    get_of_u32(node, "fsl,tmr-prsc", &etsects->tmr_prsc) ||
+	    get_of_u32(node, "fsl,tmr-add", &etsects->tmr_add) ||
+	    get_of_u32(node, "fsl,tmr-fiper1", &etsects->tmr_fiper1) ||
+	    get_of_u32(node, "fsl,tmr-fiper2", &etsects->tmr_fiper2) ||
+	    get_of_u32(node, "fsl,max-adj", &etsects->caps.max_adj)) {
+		pr_err("device tree node missing required elements\n");
+		goto no_node;
+	}
+
+	etsects->irq = platform_get_irq(dev, 0);
+
+	if (etsects->irq == NO_IRQ) {
+		pr_err("irq not in device tree\n");
+		goto no_node;
+	}
+	if (request_irq(etsects->irq, isr, 0, DRIVER, etsects)) {
+		pr_err("request_irq failed\n");
+		goto no_node;
+	}
+
+	etsects->rsrc = platform_get_resource(dev, IORESOURCE_MEM, 0);
+	if (!etsects->rsrc) {
+		pr_err("no resource\n");
+		goto no_resource;
+	}
+	if (request_resource(&ioport_resource, etsects->rsrc)) {
+		pr_err("resource busy\n");
+		goto no_resource;
+	}
+
+	spin_lock_init(&etsects->lock);
+
+	etsects->regs = ioremap(etsects->rsrc->start,
+				1 + etsects->rsrc->end - etsects->rsrc->start);
+	if (!etsects->regs) {
+		pr_err("ioremap ptp registers failed\n");
+		goto no_ioremap;
+	}
+	getnstimeofday(&now);
+	ptp_gianfar_settime(&etsects->caps, &now);
+
+	tmr_ctrl =
+	  (etsects->tclk_period & TCLK_PERIOD_MASK) << TCLK_PERIOD_SHIFT |
+	  (etsects->cksel & CKSEL_MASK) << CKSEL_SHIFT;
+
+	spin_lock_irqsave(&etsects->lock, flags);
+
+	gfar_write(&etsects->regs->tmr_ctrl,   tmr_ctrl);
+	gfar_write(&etsects->regs->tmr_add,    etsects->tmr_add);
+	gfar_write(&etsects->regs->tmr_prsc,   etsects->tmr_prsc);
+	gfar_write(&etsects->regs->tmr_fiper1, etsects->tmr_fiper1);
+	gfar_write(&etsects->regs->tmr_fiper2, etsects->tmr_fiper2);
+	set_alarm(etsects);
+	gfar_write(&etsects->regs->tmr_ctrl,   tmr_ctrl|FS|RTPE|TE);
+
+	spin_unlock_irqrestore(&etsects->lock, flags);
+
+	etsects->clock = ptp_clock_register(&etsects->caps);
+	if (IS_ERR(etsects->clock)) {
+		err = PTR_ERR(etsects->clock);
+		goto no_clock;
+	}
+
+	dev_set_drvdata(&dev->dev, etsects);
+
+	return 0;
+
+no_clock:
+no_ioremap:
+	release_resource(etsects->rsrc);
+no_resource:
+	free_irq(etsects->irq, etsects);
+no_node:
+	kfree(etsects);
+no_memory:
+	return err;
+}
+
+static int gianfar_ptp_remove(struct platform_device *dev)
+{
+	struct etsects *etsects = dev_get_drvdata(&dev->dev);
+
+	gfar_write(&etsects->regs->tmr_temask, 0);
+	gfar_write(&etsects->regs->tmr_ctrl,   0);
+
+	ptp_clock_unregister(etsects->clock);
+	iounmap(etsects->regs);
+	release_resource(etsects->rsrc);
+	free_irq(etsects->irq, etsects);
+	kfree(etsects);
+
+	return 0;
+}
+
+static struct of_device_id match_table[] = {
+	{ .compatible = "fsl,etsec-ptp" },
+	{},
+};
+
+static struct platform_driver gianfar_ptp_driver = {
+	.driver = {
+		.name		= "gianfar_ptp",
+		.of_match_table	= match_table,
+		.owner		= THIS_MODULE,
+	},
+	.probe       = gianfar_ptp_probe,
+	.remove      = gianfar_ptp_remove,
+};
+
+/* module operations */
+
+static int __init ptp_gianfar_init(void)
+{
+	return platform_driver_register(&gianfar_ptp_driver);
+}
+
+module_init(ptp_gianfar_init);
+
+static void __exit ptp_gianfar_exit(void)
+{
+	platform_driver_unregister(&gianfar_ptp_driver);
+}
+
+module_exit(ptp_gianfar_exit);
+
+MODULE_AUTHOR("Richard Cochran <richard.cochran@omicron.at>");
+MODULE_DESCRIPTION("PTP clock using the eTSEC");
+MODULE_LICENSE("GPL");
diff --git a/drivers/net/ioc3-eth.c b/drivers/net/ioc3-eth.c
index 96c9561..32f07f8 100644
--- a/drivers/net/ioc3-eth.c
+++ b/drivers/net/ioc3-eth.c
@@ -915,7 +915,7 @@
 
 			skb = ioc3_alloc_skb(RX_BUF_ALLOC_SIZE, GFP_ATOMIC);
 			if (!skb) {
-				show_free_areas();
+				show_free_areas(0);
 				continue;
 			}
 
diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile
index 13bebab..2333215 100644
--- a/drivers/net/phy/Makefile
+++ b/drivers/net/phy/Makefile
@@ -19,6 +19,7 @@
 obj-$(CONFIG_MDIO_BITBANG)	+= mdio-bitbang.o
 obj-$(CONFIG_MDIO_GPIO)		+= mdio-gpio.o
 obj-$(CONFIG_NATIONAL_PHY)	+= national.o
+obj-$(CONFIG_DP83640_PHY)	+= dp83640.o
 obj-$(CONFIG_STE10XP)		+= ste10Xp.o
 obj-$(CONFIG_MICREL_PHY)	+= micrel.o
 obj-$(CONFIG_MDIO_OCTEON)	+= mdio-octeon.o
diff --git a/drivers/net/phy/dp83640.c b/drivers/net/phy/dp83640.c
new file mode 100644
index 0000000..b0c9522
--- /dev/null
+++ b/drivers/net/phy/dp83640.c
@@ -0,0 +1,1100 @@
+/*
+ * Driver for the National Semiconductor DP83640 PHYTER
+ *
+ * Copyright (C) 2010 OMICRON electronics GmbH
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#include <linux/ethtool.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/mii.h>
+#include <linux/module.h>
+#include <linux/net_tstamp.h>
+#include <linux/netdevice.h>
+#include <linux/phy.h>
+#include <linux/ptp_classify.h>
+#include <linux/ptp_clock_kernel.h>
+
+#include "dp83640_reg.h"
+
+#define DP83640_PHY_ID	0x20005ce1
+#define PAGESEL		0x13
+#define LAYER4		0x02
+#define LAYER2		0x01
+#define MAX_RXTS	4
+#define MAX_TXTS	4
+#define N_EXT_TS	1
+#define PSF_PTPVER	2
+#define PSF_EVNT	0x4000
+#define PSF_RX		0x2000
+#define PSF_TX		0x1000
+#define EXT_EVENT	1
+#define EXT_GPIO	1
+#define CAL_EVENT	2
+#define CAL_GPIO	9
+#define CAL_TRIGGER	2
+
+/* phyter seems to miss the mark by 16 ns */
+#define ADJTIME_FIX	16
+
+#if defined(__BIG_ENDIAN)
+#define ENDIAN_FLAG	0
+#elif defined(__LITTLE_ENDIAN)
+#define ENDIAN_FLAG	PSF_ENDIAN
+#endif
+
+#define SKB_PTP_TYPE(__skb) (*(unsigned int *)((__skb)->cb))
+
+struct phy_rxts {
+	u16 ns_lo;   /* ns[15:0] */
+	u16 ns_hi;   /* overflow[1:0], ns[29:16] */
+	u16 sec_lo;  /* sec[15:0] */
+	u16 sec_hi;  /* sec[31:16] */
+	u16 seqid;   /* sequenceId[15:0] */
+	u16 msgtype; /* messageType[3:0], hash[11:0] */
+};
+
+struct phy_txts {
+	u16 ns_lo;   /* ns[15:0] */
+	u16 ns_hi;   /* overflow[1:0], ns[29:16] */
+	u16 sec_lo;  /* sec[15:0] */
+	u16 sec_hi;  /* sec[31:16] */
+};
+
+struct rxts {
+	struct list_head list;
+	unsigned long tmo;
+	u64 ns;
+	u16 seqid;
+	u8  msgtype;
+	u16 hash;
+};
+
+struct dp83640_clock;
+
+struct dp83640_private {
+	struct list_head list;
+	struct dp83640_clock *clock;
+	struct phy_device *phydev;
+	struct work_struct ts_work;
+	int hwts_tx_en;
+	int hwts_rx_en;
+	int layer;
+	int version;
+	/* remember state of cfg0 during calibration */
+	int cfg0;
+	/* remember the last event time stamp */
+	struct phy_txts edata;
+	/* list of rx timestamps */
+	struct list_head rxts;
+	struct list_head rxpool;
+	struct rxts rx_pool_data[MAX_RXTS];
+	/* protects above three fields from concurrent access */
+	spinlock_t rx_lock;
+	/* queues of incoming and outgoing packets */
+	struct sk_buff_head rx_queue;
+	struct sk_buff_head tx_queue;
+};
+
+struct dp83640_clock {
+	/* keeps the instance in the 'phyter_clocks' list */
+	struct list_head list;
+	/* we create one clock instance per MII bus */
+	struct mii_bus *bus;
+	/* protects extended registers from concurrent access */
+	struct mutex extreg_lock;
+	/* remembers which page was last selected */
+	int page;
+	/* our advertised capabilities */
+	struct ptp_clock_info caps;
+	/* protects the three fields below from concurrent access */
+	struct mutex clock_lock;
+	/* the one phyter from which we shall read */
+	struct dp83640_private *chosen;
+	/* list of the other attached phyters, not chosen */
+	struct list_head phylist;
+	/* reference to our PTP hardware clock */
+	struct ptp_clock *ptp_clock;
+};
+
+/* globals */
+
+static int chosen_phy = -1;
+static ushort cal_gpio = 4;
+
+module_param(chosen_phy, int, 0444);
+module_param(cal_gpio, ushort, 0444);
+
+MODULE_PARM_DESC(chosen_phy, \
+	"The address of the PHY to use for the ancillary clock features");
+MODULE_PARM_DESC(cal_gpio, \
+	"Which GPIO line to use for synchronizing multiple PHYs");
+
+/* a list of clocks and a mutex to protect it */
+static LIST_HEAD(phyter_clocks);
+static DEFINE_MUTEX(phyter_clocks_lock);
+
+static void rx_timestamp_work(struct work_struct *work);
+
+/* extended register access functions */
+
+#define BROADCAST_ADDR 31
+
+static inline int broadcast_write(struct mii_bus *bus, u32 regnum, u16 val)
+{
+	return mdiobus_write(bus, BROADCAST_ADDR, regnum, val);
+}
+
+/* Caller must hold extreg_lock. */
+static int ext_read(struct phy_device *phydev, int page, u32 regnum)
+{
+	struct dp83640_private *dp83640 = phydev->priv;
+	int val;
+
+	if (dp83640->clock->page != page) {
+		broadcast_write(phydev->bus, PAGESEL, page);
+		dp83640->clock->page = page;
+	}
+	val = phy_read(phydev, regnum);
+
+	return val;
+}
+
+/* Caller must hold extreg_lock. */
+static void ext_write(int broadcast, struct phy_device *phydev,
+		      int page, u32 regnum, u16 val)
+{
+	struct dp83640_private *dp83640 = phydev->priv;
+
+	if (dp83640->clock->page != page) {
+		broadcast_write(phydev->bus, PAGESEL, page);
+		dp83640->clock->page = page;
+	}
+	if (broadcast)
+		broadcast_write(phydev->bus, regnum, val);
+	else
+		phy_write(phydev, regnum, val);
+}
+
+/* Caller must hold extreg_lock. */
+static int tdr_write(int bc, struct phy_device *dev,
+		     const struct timespec *ts, u16 cmd)
+{
+	ext_write(bc, dev, PAGE4, PTP_TDR, ts->tv_nsec & 0xffff);/* ns[15:0]  */
+	ext_write(bc, dev, PAGE4, PTP_TDR, ts->tv_nsec >> 16);   /* ns[31:16] */
+	ext_write(bc, dev, PAGE4, PTP_TDR, ts->tv_sec & 0xffff); /* sec[15:0] */
+	ext_write(bc, dev, PAGE4, PTP_TDR, ts->tv_sec >> 16);    /* sec[31:16]*/
+
+	ext_write(bc, dev, PAGE4, PTP_CTL, cmd);
+
+	return 0;
+}
+
+/* convert phy timestamps into driver timestamps */
+
+static void phy2rxts(struct phy_rxts *p, struct rxts *rxts)
+{
+	u32 sec;
+
+	sec = p->sec_lo;
+	sec |= p->sec_hi << 16;
+
+	rxts->ns = p->ns_lo;
+	rxts->ns |= (p->ns_hi & 0x3fff) << 16;
+	rxts->ns += ((u64)sec) * 1000000000ULL;
+	rxts->seqid = p->seqid;
+	rxts->msgtype = (p->msgtype >> 12) & 0xf;
+	rxts->hash = p->msgtype & 0x0fff;
+	rxts->tmo = jiffies + HZ;
+}
+
+static u64 phy2txts(struct phy_txts *p)
+{
+	u64 ns;
+	u32 sec;
+
+	sec = p->sec_lo;
+	sec |= p->sec_hi << 16;
+
+	ns = p->ns_lo;
+	ns |= (p->ns_hi & 0x3fff) << 16;
+	ns += ((u64)sec) * 1000000000ULL;
+
+	return ns;
+}
+
+/* ptp clock methods */
+
+static int ptp_dp83640_adjfreq(struct ptp_clock_info *ptp, s32 ppb)
+{
+	struct dp83640_clock *clock =
+		container_of(ptp, struct dp83640_clock, caps);
+	struct phy_device *phydev = clock->chosen->phydev;
+	u64 rate;
+	int neg_adj = 0;
+	u16 hi, lo;
+
+	if (ppb < 0) {
+		neg_adj = 1;
+		ppb = -ppb;
+	}
+	rate = ppb;
+	rate <<= 26;
+	rate = div_u64(rate, 1953125);
+
+	hi = (rate >> 16) & PTP_RATE_HI_MASK;
+	if (neg_adj)
+		hi |= PTP_RATE_DIR;
+
+	lo = rate & 0xffff;
+
+	mutex_lock(&clock->extreg_lock);
+
+	ext_write(1, phydev, PAGE4, PTP_RATEH, hi);
+	ext_write(1, phydev, PAGE4, PTP_RATEL, lo);
+
+	mutex_unlock(&clock->extreg_lock);
+
+	return 0;
+}
+
+static int ptp_dp83640_adjtime(struct ptp_clock_info *ptp, s64 delta)
+{
+	struct dp83640_clock *clock =
+		container_of(ptp, struct dp83640_clock, caps);
+	struct phy_device *phydev = clock->chosen->phydev;
+	struct timespec ts;
+	int err;
+
+	delta += ADJTIME_FIX;
+
+	ts = ns_to_timespec(delta);
+
+	mutex_lock(&clock->extreg_lock);
+
+	err = tdr_write(1, phydev, &ts, PTP_STEP_CLK);
+
+	mutex_unlock(&clock->extreg_lock);
+
+	return err;
+}
+
+static int ptp_dp83640_gettime(struct ptp_clock_info *ptp, struct timespec *ts)
+{
+	struct dp83640_clock *clock =
+		container_of(ptp, struct dp83640_clock, caps);
+	struct phy_device *phydev = clock->chosen->phydev;
+	unsigned int val[4];
+
+	mutex_lock(&clock->extreg_lock);
+
+	ext_write(0, phydev, PAGE4, PTP_CTL, PTP_RD_CLK);
+
+	val[0] = ext_read(phydev, PAGE4, PTP_TDR); /* ns[15:0] */
+	val[1] = ext_read(phydev, PAGE4, PTP_TDR); /* ns[31:16] */
+	val[2] = ext_read(phydev, PAGE4, PTP_TDR); /* sec[15:0] */
+	val[3] = ext_read(phydev, PAGE4, PTP_TDR); /* sec[31:16] */
+
+	mutex_unlock(&clock->extreg_lock);
+
+	ts->tv_nsec = val[0] | (val[1] << 16);
+	ts->tv_sec  = val[2] | (val[3] << 16);
+
+	return 0;
+}
+
+static int ptp_dp83640_settime(struct ptp_clock_info *ptp,
+			       const struct timespec *ts)
+{
+	struct dp83640_clock *clock =
+		container_of(ptp, struct dp83640_clock, caps);
+	struct phy_device *phydev = clock->chosen->phydev;
+	int err;
+
+	mutex_lock(&clock->extreg_lock);
+
+	err = tdr_write(1, phydev, ts, PTP_LOAD_CLK);
+
+	mutex_unlock(&clock->extreg_lock);
+
+	return err;
+}
+
+static int ptp_dp83640_enable(struct ptp_clock_info *ptp,
+			      struct ptp_clock_request *rq, int on)
+{
+	struct dp83640_clock *clock =
+		container_of(ptp, struct dp83640_clock, caps);
+	struct phy_device *phydev = clock->chosen->phydev;
+	u16 evnt;
+
+	switch (rq->type) {
+	case PTP_CLK_REQ_EXTTS:
+		if (rq->extts.index != 0)
+			return -EINVAL;
+		evnt = EVNT_WR | (EXT_EVENT & EVNT_SEL_MASK) << EVNT_SEL_SHIFT;
+		if (on) {
+			evnt |= (EXT_GPIO & EVNT_GPIO_MASK) << EVNT_GPIO_SHIFT;
+			evnt |= EVNT_RISE;
+		}
+		ext_write(0, phydev, PAGE5, PTP_EVNT, evnt);
+		return 0;
+	default:
+		break;
+	}
+
+	return -EOPNOTSUPP;
+}
+
+static u8 status_frame_dst[6] = { 0x01, 0x1B, 0x19, 0x00, 0x00, 0x00 };
+static u8 status_frame_src[6] = { 0x08, 0x00, 0x17, 0x0B, 0x6B, 0x0F };
+
+static void enable_status_frames(struct phy_device *phydev, bool on)
+{
+	u16 cfg0 = 0, ver;
+
+	if (on)
+		cfg0 = PSF_EVNT_EN | PSF_RXTS_EN | PSF_TXTS_EN | ENDIAN_FLAG;
+
+	ver = (PSF_PTPVER & VERSIONPTP_MASK) << VERSIONPTP_SHIFT;
+
+	ext_write(0, phydev, PAGE5, PSF_CFG0, cfg0);
+	ext_write(0, phydev, PAGE6, PSF_CFG1, ver);
+
+	if (!phydev->attached_dev) {
+		pr_warning("dp83640: expected to find an attached netdevice\n");
+		return;
+	}
+
+	if (on) {
+		if (dev_mc_add(phydev->attached_dev, status_frame_dst))
+			pr_warning("dp83640: failed to add mc address\n");
+	} else {
+		if (dev_mc_del(phydev->attached_dev, status_frame_dst))
+			pr_warning("dp83640: failed to delete mc address\n");
+	}
+}
+
+static bool is_status_frame(struct sk_buff *skb, int type)
+{
+	struct ethhdr *h = eth_hdr(skb);
+
+	if (PTP_CLASS_V2_L2 == type &&
+	    !memcmp(h->h_source, status_frame_src, sizeof(status_frame_src)))
+		return true;
+	else
+		return false;
+}
+
+static int expired(struct rxts *rxts)
+{
+	return time_after(jiffies, rxts->tmo);
+}
+
+/* Caller must hold rx_lock. */
+static void prune_rx_ts(struct dp83640_private *dp83640)
+{
+	struct list_head *this, *next;
+	struct rxts *rxts;
+
+	list_for_each_safe(this, next, &dp83640->rxts) {
+		rxts = list_entry(this, struct rxts, list);
+		if (expired(rxts)) {
+			list_del_init(&rxts->list);
+			list_add(&rxts->list, &dp83640->rxpool);
+		}
+	}
+}
+
+/* synchronize the phyters so they act as one clock */
+
+static void enable_broadcast(struct phy_device *phydev, int init_page, int on)
+{
+	int val;
+	phy_write(phydev, PAGESEL, 0);
+	val = phy_read(phydev, PHYCR2);
+	if (on)
+		val |= BC_WRITE;
+	else
+		val &= ~BC_WRITE;
+	phy_write(phydev, PHYCR2, val);
+	phy_write(phydev, PAGESEL, init_page);
+}
+
+static void recalibrate(struct dp83640_clock *clock)
+{
+	s64 now, diff;
+	struct phy_txts event_ts;
+	struct timespec ts;
+	struct list_head *this;
+	struct dp83640_private *tmp;
+	struct phy_device *master = clock->chosen->phydev;
+	u16 cfg0, evnt, ptp_trig, trigger, val;
+
+	trigger = CAL_TRIGGER;
+
+	mutex_lock(&clock->extreg_lock);
+
+	/*
+	 * enable broadcast, disable status frames, enable ptp clock
+	 */
+	list_for_each(this, &clock->phylist) {
+		tmp = list_entry(this, struct dp83640_private, list);
+		enable_broadcast(tmp->phydev, clock->page, 1);
+		tmp->cfg0 = ext_read(tmp->phydev, PAGE5, PSF_CFG0);
+		ext_write(0, tmp->phydev, PAGE5, PSF_CFG0, 0);
+		ext_write(0, tmp->phydev, PAGE4, PTP_CTL, PTP_ENABLE);
+	}
+	enable_broadcast(master, clock->page, 1);
+	cfg0 = ext_read(master, PAGE5, PSF_CFG0);
+	ext_write(0, master, PAGE5, PSF_CFG0, 0);
+	ext_write(0, master, PAGE4, PTP_CTL, PTP_ENABLE);
+
+	/*
+	 * enable an event timestamp
+	 */
+	evnt = EVNT_WR | EVNT_RISE | EVNT_SINGLE;
+	evnt |= (CAL_EVENT & EVNT_SEL_MASK) << EVNT_SEL_SHIFT;
+	evnt |= (cal_gpio & EVNT_GPIO_MASK) << EVNT_GPIO_SHIFT;
+
+	list_for_each(this, &clock->phylist) {
+		tmp = list_entry(this, struct dp83640_private, list);
+		ext_write(0, tmp->phydev, PAGE5, PTP_EVNT, evnt);
+	}
+	ext_write(0, master, PAGE5, PTP_EVNT, evnt);
+
+	/*
+	 * configure a trigger
+	 */
+	ptp_trig = TRIG_WR | TRIG_IF_LATE | TRIG_PULSE;
+	ptp_trig |= (trigger  & TRIG_CSEL_MASK) << TRIG_CSEL_SHIFT;
+	ptp_trig |= (cal_gpio & TRIG_GPIO_MASK) << TRIG_GPIO_SHIFT;
+	ext_write(0, master, PAGE5, PTP_TRIG, ptp_trig);
+
+	/* load trigger */
+	val = (trigger & TRIG_SEL_MASK) << TRIG_SEL_SHIFT;
+	val |= TRIG_LOAD;
+	ext_write(0, master, PAGE4, PTP_CTL, val);
+
+	/* enable trigger */
+	val &= ~TRIG_LOAD;
+	val |= TRIG_EN;
+	ext_write(0, master, PAGE4, PTP_CTL, val);
+
+	/* disable trigger */
+	val = (trigger & TRIG_SEL_MASK) << TRIG_SEL_SHIFT;
+	val |= TRIG_DIS;
+	ext_write(0, master, PAGE4, PTP_CTL, val);
+
+	/*
+	 * read out and correct offsets
+	 */
+	val = ext_read(master, PAGE4, PTP_STS);
+	pr_info("master PTP_STS  0x%04hx", val);
+	val = ext_read(master, PAGE4, PTP_ESTS);
+	pr_info("master PTP_ESTS 0x%04hx", val);
+	event_ts.ns_lo  = ext_read(master, PAGE4, PTP_EDATA);
+	event_ts.ns_hi  = ext_read(master, PAGE4, PTP_EDATA);
+	event_ts.sec_lo = ext_read(master, PAGE4, PTP_EDATA);
+	event_ts.sec_hi = ext_read(master, PAGE4, PTP_EDATA);
+	now = phy2txts(&event_ts);
+
+	list_for_each(this, &clock->phylist) {
+		tmp = list_entry(this, struct dp83640_private, list);
+		val = ext_read(tmp->phydev, PAGE4, PTP_STS);
+		pr_info("slave  PTP_STS  0x%04hx", val);
+		val = ext_read(tmp->phydev, PAGE4, PTP_ESTS);
+		pr_info("slave  PTP_ESTS 0x%04hx", val);
+		event_ts.ns_lo  = ext_read(tmp->phydev, PAGE4, PTP_EDATA);
+		event_ts.ns_hi  = ext_read(tmp->phydev, PAGE4, PTP_EDATA);
+		event_ts.sec_lo = ext_read(tmp->phydev, PAGE4, PTP_EDATA);
+		event_ts.sec_hi = ext_read(tmp->phydev, PAGE4, PTP_EDATA);
+		diff = now - (s64) phy2txts(&event_ts);
+		pr_info("slave offset %lld nanoseconds\n", diff);
+		diff += ADJTIME_FIX;
+		ts = ns_to_timespec(diff);
+		tdr_write(0, tmp->phydev, &ts, PTP_STEP_CLK);
+	}
+
+	/*
+	 * restore status frames
+	 */
+	list_for_each(this, &clock->phylist) {
+		tmp = list_entry(this, struct dp83640_private, list);
+		ext_write(0, tmp->phydev, PAGE5, PSF_CFG0, tmp->cfg0);
+	}
+	ext_write(0, master, PAGE5, PSF_CFG0, cfg0);
+
+	mutex_unlock(&clock->extreg_lock);
+}
+
+/* time stamping methods */
+
+static void decode_evnt(struct dp83640_private *dp83640,
+			struct phy_txts *phy_txts, u16 ests)
+{
+	struct ptp_clock_event event;
+	int words = (ests >> EVNT_TS_LEN_SHIFT) & EVNT_TS_LEN_MASK;
+
+	switch (words) { /* fall through in every case */
+	case 3:
+		dp83640->edata.sec_hi = phy_txts->sec_hi;
+	case 2:
+		dp83640->edata.sec_lo = phy_txts->sec_lo;
+	case 1:
+		dp83640->edata.ns_hi = phy_txts->ns_hi;
+	case 0:
+		dp83640->edata.ns_lo = phy_txts->ns_lo;
+	}
+
+	event.type = PTP_CLOCK_EXTTS;
+	event.index = 0;
+	event.timestamp = phy2txts(&dp83640->edata);
+
+	ptp_clock_event(dp83640->clock->ptp_clock, &event);
+}
+
+static void decode_rxts(struct dp83640_private *dp83640,
+			struct phy_rxts *phy_rxts)
+{
+	struct rxts *rxts;
+	unsigned long flags;
+
+	spin_lock_irqsave(&dp83640->rx_lock, flags);
+
+	prune_rx_ts(dp83640);
+
+	if (list_empty(&dp83640->rxpool)) {
+		pr_warning("dp83640: rx timestamp pool is empty\n");
+		goto out;
+	}
+	rxts = list_first_entry(&dp83640->rxpool, struct rxts, list);
+	list_del_init(&rxts->list);
+	phy2rxts(phy_rxts, rxts);
+	list_add_tail(&rxts->list, &dp83640->rxts);
+out:
+	spin_unlock_irqrestore(&dp83640->rx_lock, flags);
+}
+
+static void decode_txts(struct dp83640_private *dp83640,
+			struct phy_txts *phy_txts)
+{
+	struct skb_shared_hwtstamps shhwtstamps;
+	struct sk_buff *skb;
+	u64 ns;
+
+	/* We must already have the skb that triggered this. */
+
+	skb = skb_dequeue(&dp83640->tx_queue);
+
+	if (!skb) {
+		pr_warning("dp83640: have timestamp but tx_queue empty\n");
+		return;
+	}
+	ns = phy2txts(phy_txts);
+	memset(&shhwtstamps, 0, sizeof(shhwtstamps));
+	shhwtstamps.hwtstamp = ns_to_ktime(ns);
+	skb_complete_tx_timestamp(skb, &shhwtstamps);
+}
+
+static void decode_status_frame(struct dp83640_private *dp83640,
+				struct sk_buff *skb)
+{
+	struct phy_rxts *phy_rxts;
+	struct phy_txts *phy_txts;
+	u8 *ptr;
+	int len, size;
+	u16 ests, type;
+
+	ptr = skb->data + 2;
+
+	for (len = skb_headlen(skb) - 2; len > sizeof(type); len -= size) {
+
+		type = *(u16 *)ptr;
+		ests = type & 0x0fff;
+		type = type & 0xf000;
+		len -= sizeof(type);
+		ptr += sizeof(type);
+
+		if (PSF_RX == type && len >= sizeof(*phy_rxts)) {
+
+			phy_rxts = (struct phy_rxts *) ptr;
+			decode_rxts(dp83640, phy_rxts);
+			size = sizeof(*phy_rxts);
+
+		} else if (PSF_TX == type && len >= sizeof(*phy_txts)) {
+
+			phy_txts = (struct phy_txts *) ptr;
+			decode_txts(dp83640, phy_txts);
+			size = sizeof(*phy_txts);
+
+		} else if (PSF_EVNT == type && len >= sizeof(*phy_txts)) {
+
+			phy_txts = (struct phy_txts *) ptr;
+			decode_evnt(dp83640, phy_txts, ests);
+			size = sizeof(*phy_txts);
+
+		} else {
+			size = 0;
+			break;
+		}
+		ptr += size;
+	}
+}
+
+static int match(struct sk_buff *skb, unsigned int type, struct rxts *rxts)
+{
+	u16 *seqid;
+	unsigned int offset;
+	u8 *msgtype, *data = skb_mac_header(skb);
+
+	/* check sequenceID, messageType, 12 bit hash of offset 20-29 */
+
+	switch (type) {
+	case PTP_CLASS_V1_IPV4:
+	case PTP_CLASS_V2_IPV4:
+		offset = ETH_HLEN + IPV4_HLEN(data) + UDP_HLEN;
+		break;
+	case PTP_CLASS_V1_IPV6:
+	case PTP_CLASS_V2_IPV6:
+		offset = OFF_PTP6;
+		break;
+	case PTP_CLASS_V2_L2:
+		offset = ETH_HLEN;
+		break;
+	case PTP_CLASS_V2_VLAN:
+		offset = ETH_HLEN + VLAN_HLEN;
+		break;
+	default:
+		return 0;
+	}
+
+	if (skb->len + ETH_HLEN < offset + OFF_PTP_SEQUENCE_ID + sizeof(*seqid))
+		return 0;
+
+	if (unlikely(type & PTP_CLASS_V1))
+		msgtype = data + offset + OFF_PTP_CONTROL;
+	else
+		msgtype = data + offset;
+
+	seqid = (u16 *)(data + offset + OFF_PTP_SEQUENCE_ID);
+
+	return (rxts->msgtype == (*msgtype & 0xf) &&
+		rxts->seqid   == ntohs(*seqid));
+}
+
+static void dp83640_free_clocks(void)
+{
+	struct dp83640_clock *clock;
+	struct list_head *this, *next;
+
+	mutex_lock(&phyter_clocks_lock);
+
+	list_for_each_safe(this, next, &phyter_clocks) {
+		clock = list_entry(this, struct dp83640_clock, list);
+		if (!list_empty(&clock->phylist)) {
+			pr_warning("phy list non-empty while unloading");
+			BUG();
+		}
+		list_del(&clock->list);
+		mutex_destroy(&clock->extreg_lock);
+		mutex_destroy(&clock->clock_lock);
+		put_device(&clock->bus->dev);
+		kfree(clock);
+	}
+
+	mutex_unlock(&phyter_clocks_lock);
+}
+
+static void dp83640_clock_init(struct dp83640_clock *clock, struct mii_bus *bus)
+{
+	INIT_LIST_HEAD(&clock->list);
+	clock->bus = bus;
+	mutex_init(&clock->extreg_lock);
+	mutex_init(&clock->clock_lock);
+	INIT_LIST_HEAD(&clock->phylist);
+	clock->caps.owner = THIS_MODULE;
+	sprintf(clock->caps.name, "dp83640 timer");
+	clock->caps.max_adj	= 1953124;
+	clock->caps.n_alarm	= 0;
+	clock->caps.n_ext_ts	= N_EXT_TS;
+	clock->caps.n_per_out	= 0;
+	clock->caps.pps		= 0;
+	clock->caps.adjfreq	= ptp_dp83640_adjfreq;
+	clock->caps.adjtime	= ptp_dp83640_adjtime;
+	clock->caps.gettime	= ptp_dp83640_gettime;
+	clock->caps.settime	= ptp_dp83640_settime;
+	clock->caps.enable	= ptp_dp83640_enable;
+	/*
+	 * Get a reference to this bus instance.
+	 */
+	get_device(&bus->dev);
+}
+
+static int choose_this_phy(struct dp83640_clock *clock,
+			   struct phy_device *phydev)
+{
+	if (chosen_phy == -1 && !clock->chosen)
+		return 1;
+
+	if (chosen_phy == phydev->addr)
+		return 1;
+
+	return 0;
+}
+
+static struct dp83640_clock *dp83640_clock_get(struct dp83640_clock *clock)
+{
+	if (clock)
+		mutex_lock(&clock->clock_lock);
+	return clock;
+}
+
+/*
+ * Look up and lock a clock by bus instance.
+ * If there is no clock for this bus, then create it first.
+ */
+static struct dp83640_clock *dp83640_clock_get_bus(struct mii_bus *bus)
+{
+	struct dp83640_clock *clock = NULL, *tmp;
+	struct list_head *this;
+
+	mutex_lock(&phyter_clocks_lock);
+
+	list_for_each(this, &phyter_clocks) {
+		tmp = list_entry(this, struct dp83640_clock, list);
+		if (tmp->bus == bus) {
+			clock = tmp;
+			break;
+		}
+	}
+	if (clock)
+		goto out;
+
+	clock = kzalloc(sizeof(struct dp83640_clock), GFP_KERNEL);
+	if (!clock)
+		goto out;
+
+	dp83640_clock_init(clock, bus);
+	list_add_tail(&phyter_clocks, &clock->list);
+out:
+	mutex_unlock(&phyter_clocks_lock);
+
+	return dp83640_clock_get(clock);
+}
+
+static void dp83640_clock_put(struct dp83640_clock *clock)
+{
+	mutex_unlock(&clock->clock_lock);
+}
+
+static int dp83640_probe(struct phy_device *phydev)
+{
+	struct dp83640_clock *clock;
+	struct dp83640_private *dp83640;
+	int err = -ENOMEM, i;
+
+	if (phydev->addr == BROADCAST_ADDR)
+		return 0;
+
+	clock = dp83640_clock_get_bus(phydev->bus);
+	if (!clock)
+		goto no_clock;
+
+	dp83640 = kzalloc(sizeof(struct dp83640_private), GFP_KERNEL);
+	if (!dp83640)
+		goto no_memory;
+
+	dp83640->phydev = phydev;
+	INIT_WORK(&dp83640->ts_work, rx_timestamp_work);
+
+	INIT_LIST_HEAD(&dp83640->rxts);
+	INIT_LIST_HEAD(&dp83640->rxpool);
+	for (i = 0; i < MAX_RXTS; i++)
+		list_add(&dp83640->rx_pool_data[i].list, &dp83640->rxpool);
+
+	phydev->priv = dp83640;
+
+	spin_lock_init(&dp83640->rx_lock);
+	skb_queue_head_init(&dp83640->rx_queue);
+	skb_queue_head_init(&dp83640->tx_queue);
+
+	dp83640->clock = clock;
+
+	if (choose_this_phy(clock, phydev)) {
+		clock->chosen = dp83640;
+		clock->ptp_clock = ptp_clock_register(&clock->caps);
+		if (IS_ERR(clock->ptp_clock)) {
+			err = PTR_ERR(clock->ptp_clock);
+			goto no_register;
+		}
+	} else
+		list_add_tail(&dp83640->list, &clock->phylist);
+
+	if (clock->chosen && !list_empty(&clock->phylist))
+		recalibrate(clock);
+	else
+		enable_broadcast(dp83640->phydev, clock->page, 1);
+
+	dp83640_clock_put(clock);
+	return 0;
+
+no_register:
+	clock->chosen = NULL;
+	kfree(dp83640);
+no_memory:
+	dp83640_clock_put(clock);
+no_clock:
+	return err;
+}
+
+static void dp83640_remove(struct phy_device *phydev)
+{
+	struct dp83640_clock *clock;
+	struct list_head *this, *next;
+	struct dp83640_private *tmp, *dp83640 = phydev->priv;
+
+	if (phydev->addr == BROADCAST_ADDR)
+		return;
+
+	enable_status_frames(phydev, false);
+	cancel_work_sync(&dp83640->ts_work);
+
+	clock = dp83640_clock_get(dp83640->clock);
+
+	if (dp83640 == clock->chosen) {
+		ptp_clock_unregister(clock->ptp_clock);
+		clock->chosen = NULL;
+	} else {
+		list_for_each_safe(this, next, &clock->phylist) {
+			tmp = list_entry(this, struct dp83640_private, list);
+			if (tmp == dp83640) {
+				list_del_init(&tmp->list);
+				break;
+			}
+		}
+	}
+
+	dp83640_clock_put(clock);
+	kfree(dp83640);
+}
+
+static int dp83640_hwtstamp(struct phy_device *phydev, struct ifreq *ifr)
+{
+	struct dp83640_private *dp83640 = phydev->priv;
+	struct hwtstamp_config cfg;
+	u16 txcfg0, rxcfg0;
+
+	if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg)))
+		return -EFAULT;
+
+	if (cfg.flags) /* reserved for future extensions */
+		return -EINVAL;
+
+	switch (cfg.tx_type) {
+	case HWTSTAMP_TX_OFF:
+		dp83640->hwts_tx_en = 0;
+		break;
+	case HWTSTAMP_TX_ON:
+		dp83640->hwts_tx_en = 1;
+		break;
+	default:
+		return -ERANGE;
+	}
+
+	switch (cfg.rx_filter) {
+	case HWTSTAMP_FILTER_NONE:
+		dp83640->hwts_rx_en = 0;
+		dp83640->layer = 0;
+		dp83640->version = 0;
+		break;
+	case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
+	case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
+	case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
+		dp83640->hwts_rx_en = 1;
+		dp83640->layer = LAYER4;
+		dp83640->version = 1;
+		break;
+	case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
+		dp83640->hwts_rx_en = 1;
+		dp83640->layer = LAYER4;
+		dp83640->version = 2;
+		break;
+	case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
+		dp83640->hwts_rx_en = 1;
+		dp83640->layer = LAYER2;
+		dp83640->version = 2;
+		break;
+	case HWTSTAMP_FILTER_PTP_V2_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
+		dp83640->hwts_rx_en = 1;
+		dp83640->layer = LAYER4|LAYER2;
+		dp83640->version = 2;
+		break;
+	default:
+		return -ERANGE;
+	}
+
+	txcfg0 = (dp83640->version & TX_PTP_VER_MASK) << TX_PTP_VER_SHIFT;
+	rxcfg0 = (dp83640->version & TX_PTP_VER_MASK) << TX_PTP_VER_SHIFT;
+
+	if (dp83640->layer & LAYER2) {
+		txcfg0 |= TX_L2_EN;
+		rxcfg0 |= RX_L2_EN;
+	}
+	if (dp83640->layer & LAYER4) {
+		txcfg0 |= TX_IPV6_EN | TX_IPV4_EN;
+		rxcfg0 |= RX_IPV6_EN | RX_IPV4_EN;
+	}
+
+	if (dp83640->hwts_tx_en)
+		txcfg0 |= TX_TS_EN;
+
+	if (dp83640->hwts_rx_en)
+		rxcfg0 |= RX_TS_EN;
+
+	mutex_lock(&dp83640->clock->extreg_lock);
+
+	if (dp83640->hwts_tx_en || dp83640->hwts_rx_en) {
+		enable_status_frames(phydev, true);
+		ext_write(0, phydev, PAGE4, PTP_CTL, PTP_ENABLE);
+	}
+
+	ext_write(0, phydev, PAGE5, PTP_TXCFG0, txcfg0);
+	ext_write(0, phydev, PAGE5, PTP_RXCFG0, rxcfg0);
+
+	mutex_unlock(&dp83640->clock->extreg_lock);
+
+	return copy_to_user(ifr->ifr_data, &cfg, sizeof(cfg)) ? -EFAULT : 0;
+}
+
+static void rx_timestamp_work(struct work_struct *work)
+{
+	struct dp83640_private *dp83640 =
+		container_of(work, struct dp83640_private, ts_work);
+	struct list_head *this, *next;
+	struct rxts *rxts;
+	struct skb_shared_hwtstamps *shhwtstamps;
+	struct sk_buff *skb;
+	unsigned int type;
+	unsigned long flags;
+
+	/* Deliver each deferred packet, with or without a time stamp. */
+
+	while ((skb = skb_dequeue(&dp83640->rx_queue)) != NULL) {
+		type = SKB_PTP_TYPE(skb);
+		spin_lock_irqsave(&dp83640->rx_lock, flags);
+		list_for_each_safe(this, next, &dp83640->rxts) {
+			rxts = list_entry(this, struct rxts, list);
+			if (match(skb, type, rxts)) {
+				shhwtstamps = skb_hwtstamps(skb);
+				memset(shhwtstamps, 0, sizeof(*shhwtstamps));
+				shhwtstamps->hwtstamp = ns_to_ktime(rxts->ns);
+				list_del_init(&rxts->list);
+				list_add(&rxts->list, &dp83640->rxpool);
+				break;
+			}
+		}
+		spin_unlock_irqrestore(&dp83640->rx_lock, flags);
+		netif_rx(skb);
+	}
+
+	/* Clear out expired time stamps. */
+
+	spin_lock_irqsave(&dp83640->rx_lock, flags);
+	prune_rx_ts(dp83640);
+	spin_unlock_irqrestore(&dp83640->rx_lock, flags);
+}
+
+static bool dp83640_rxtstamp(struct phy_device *phydev,
+			     struct sk_buff *skb, int type)
+{
+	struct dp83640_private *dp83640 = phydev->priv;
+
+	if (!dp83640->hwts_rx_en)
+		return false;
+
+	if (is_status_frame(skb, type)) {
+		decode_status_frame(dp83640, skb);
+		/* Let the stack drop this frame. */
+		return false;
+	}
+
+	SKB_PTP_TYPE(skb) = type;
+	skb_queue_tail(&dp83640->rx_queue, skb);
+	schedule_work(&dp83640->ts_work);
+
+	return true;
+}
+
+static void dp83640_txtstamp(struct phy_device *phydev,
+			     struct sk_buff *skb, int type)
+{
+	struct dp83640_private *dp83640 = phydev->priv;
+
+	if (!dp83640->hwts_tx_en) {
+		kfree_skb(skb);
+		return;
+	}
+	skb_queue_tail(&dp83640->tx_queue, skb);
+	schedule_work(&dp83640->ts_work);
+}
+
+static struct phy_driver dp83640_driver = {
+	.phy_id		= DP83640_PHY_ID,
+	.phy_id_mask	= 0xfffffff0,
+	.name		= "NatSemi DP83640",
+	.features	= PHY_BASIC_FEATURES,
+	.flags		= 0,
+	.probe		= dp83640_probe,
+	.remove		= dp83640_remove,
+	.config_aneg	= genphy_config_aneg,
+	.read_status	= genphy_read_status,
+	.hwtstamp	= dp83640_hwtstamp,
+	.rxtstamp	= dp83640_rxtstamp,
+	.txtstamp	= dp83640_txtstamp,
+	.driver		= {.owner = THIS_MODULE,}
+};
+
+static int __init dp83640_init(void)
+{
+	return phy_driver_register(&dp83640_driver);
+}
+
+static void __exit dp83640_exit(void)
+{
+	dp83640_free_clocks();
+	phy_driver_unregister(&dp83640_driver);
+}
+
+MODULE_DESCRIPTION("National Semiconductor DP83640 PHY driver");
+MODULE_AUTHOR("Richard Cochran <richard.cochran@omicron.at>");
+MODULE_LICENSE("GPL");
+
+module_init(dp83640_init);
+module_exit(dp83640_exit);
+
+static struct mdio_device_id __maybe_unused dp83640_tbl[] = {
+	{ DP83640_PHY_ID, 0xfffffff0 },
+	{ }
+};
+
+MODULE_DEVICE_TABLE(mdio, dp83640_tbl);
diff --git a/drivers/net/phy/dp83640_reg.h b/drivers/net/phy/dp83640_reg.h
new file mode 100644
index 0000000..e7fe411
--- /dev/null
+++ b/drivers/net/phy/dp83640_reg.h
@@ -0,0 +1,267 @@
+/* dp83640_reg.h
+ * Generated by regen.tcl on Thu Feb 17 10:02:48 AM CET 2011
+ */
+#ifndef HAVE_DP83640_REGISTERS
+#define HAVE_DP83640_REGISTERS
+
+#define PAGE0                     0x0000
+#define PHYCR2                    0x001c /* PHY Control Register 2 */
+
+#define PAGE4                     0x0004
+#define PTP_CTL                   0x0014 /* PTP Control Register */
+#define PTP_TDR                   0x0015 /* PTP Time Data Register */
+#define PTP_STS                   0x0016 /* PTP Status Register */
+#define PTP_TSTS                  0x0017 /* PTP Trigger Status Register */
+#define PTP_RATEL                 0x0018 /* PTP Rate Low Register */
+#define PTP_RATEH                 0x0019 /* PTP Rate High Register */
+#define PTP_RDCKSUM               0x001a /* PTP Read Checksum */
+#define PTP_WRCKSUM               0x001b /* PTP Write Checksum */
+#define PTP_TXTS                  0x001c /* PTP Transmit Timestamp Register, in four 16-bit reads */
+#define PTP_RXTS                  0x001d /* PTP Receive Timestamp Register, in six? 16-bit reads */
+#define PTP_ESTS                  0x001e /* PTP Event Status Register */
+#define PTP_EDATA                 0x001f /* PTP Event Data Register */
+
+#define PAGE5                     0x0005
+#define PTP_TRIG                  0x0014 /* PTP Trigger Configuration Register */
+#define PTP_EVNT                  0x0015 /* PTP Event Configuration Register */
+#define PTP_TXCFG0                0x0016 /* PTP Transmit Configuration Register 0 */
+#define PTP_TXCFG1                0x0017 /* PTP Transmit Configuration Register 1 */
+#define PSF_CFG0                  0x0018 /* PHY Status Frame Configuration Register 0 */
+#define PTP_RXCFG0                0x0019 /* PTP Receive Configuration Register 0 */
+#define PTP_RXCFG1                0x001a /* PTP Receive Configuration Register 1 */
+#define PTP_RXCFG2                0x001b /* PTP Receive Configuration Register 2 */
+#define PTP_RXCFG3                0x001c /* PTP Receive Configuration Register 3 */
+#define PTP_RXCFG4                0x001d /* PTP Receive Configuration Register 4 */
+#define PTP_TRDL                  0x001e /* PTP Temporary Rate Duration Low Register */
+#define PTP_TRDH                  0x001f /* PTP Temporary Rate Duration High Register */
+
+#define PAGE6                     0x0006
+#define PTP_COC                   0x0014 /* PTP Clock Output Control Register */
+#define PSF_CFG1                  0x0015 /* PHY Status Frame Configuration Register 1 */
+#define PSF_CFG2                  0x0016 /* PHY Status Frame Configuration Register 2 */
+#define PSF_CFG3                  0x0017 /* PHY Status Frame Configuration Register 3 */
+#define PSF_CFG4                  0x0018 /* PHY Status Frame Configuration Register 4 */
+#define PTP_SFDCFG                0x0019 /* PTP SFD Configuration Register */
+#define PTP_INTCTL                0x001a /* PTP Interrupt Control Register */
+#define PTP_CLKSRC                0x001b /* PTP Clock Source Register */
+#define PTP_ETR                   0x001c /* PTP Ethernet Type Register */
+#define PTP_OFF                   0x001d /* PTP Offset Register */
+#define PTP_GPIOMON               0x001e /* PTP GPIO Monitor Register */
+#define PTP_RXHASH                0x001f /* PTP Receive Hash Register */
+
+/* Bit definitions for the PHYCR2 register */
+#define BC_WRITE                  (1<<11) /* Broadcast Write Enable */
+
+/* Bit definitions for the PTP_CTL register */
+#define TRIG_SEL_SHIFT            (10)    /* PTP Trigger Select */
+#define TRIG_SEL_MASK             (0x7)
+#define TRIG_DIS                  (1<<9)  /* Disable PTP Trigger */
+#define TRIG_EN                   (1<<8)  /* Enable PTP Trigger */
+#define TRIG_READ                 (1<<7)  /* Read PTP Trigger */
+#define TRIG_LOAD                 (1<<6)  /* Load PTP Trigger */
+#define PTP_RD_CLK                (1<<5)  /* Read PTP Clock */
+#define PTP_LOAD_CLK              (1<<4)  /* Load PTP Clock */
+#define PTP_STEP_CLK              (1<<3)  /* Step PTP Clock */
+#define PTP_ENABLE                (1<<2)  /* Enable PTP Clock */
+#define PTP_DISABLE               (1<<1)  /* Disable PTP Clock */
+#define PTP_RESET                 (1<<0)  /* Reset PTP Clock */
+
+/* Bit definitions for the PTP_STS register */
+#define TXTS_RDY                  (1<<11) /* Transmit Timestamp Ready */
+#define RXTS_RDY                  (1<<10) /* Receive Timestamp Ready */
+#define TRIG_DONE                 (1<<9)  /* PTP Trigger Done */
+#define EVENT_RDY                 (1<<8)  /* PTP Event Timestamp Ready */
+#define TXTS_IE                   (1<<3)  /* Transmit Timestamp Interrupt Enable */
+#define RXTS_IE                   (1<<2)  /* Receive Timestamp Interrupt Enable */
+#define TRIG_IE                   (1<<1)  /* Trigger Interrupt Enable */
+#define EVENT_IE                  (1<<0)  /* Event Interrupt Enable */
+
+/* Bit definitions for the PTP_TSTS register */
+#define TRIG7_ERROR               (1<<15) /* Trigger 7 Error */
+#define TRIG7_ACTIVE              (1<<14) /* Trigger 7 Active */
+#define TRIG6_ERROR               (1<<13) /* Trigger 6 Error */
+#define TRIG6_ACTIVE              (1<<12) /* Trigger 6 Active */
+#define TRIG5_ERROR               (1<<11) /* Trigger 5 Error */
+#define TRIG5_ACTIVE              (1<<10) /* Trigger 5 Active */
+#define TRIG4_ERROR               (1<<9)  /* Trigger 4 Error */
+#define TRIG4_ACTIVE              (1<<8)  /* Trigger 4 Active */
+#define TRIG3_ERROR               (1<<7)  /* Trigger 3 Error */
+#define TRIG3_ACTIVE              (1<<6)  /* Trigger 3 Active */
+#define TRIG2_ERROR               (1<<5)  /* Trigger 2 Error */
+#define TRIG2_ACTIVE              (1<<4)  /* Trigger 2 Active */
+#define TRIG1_ERROR               (1<<3)  /* Trigger 1 Error */
+#define TRIG1_ACTIVE              (1<<2)  /* Trigger 1 Active */
+#define TRIG0_ERROR               (1<<1)  /* Trigger 0 Error */
+#define TRIG0_ACTIVE              (1<<0)  /* Trigger 0 Active */
+
+/* Bit definitions for the PTP_RATEH register */
+#define PTP_RATE_DIR              (1<<15) /* PTP Rate Direction */
+#define PTP_TMP_RATE              (1<<14) /* PTP Temporary Rate */
+#define PTP_RATE_HI_SHIFT         (0)     /* PTP Rate High 10-bits */
+#define PTP_RATE_HI_MASK          (0x3ff)
+
+/* Bit definitions for the PTP_ESTS register */
+#define EVNTS_MISSED_SHIFT        (8)     /* Indicates number of events missed */
+#define EVNTS_MISSED_MASK         (0x7)
+#define EVNT_TS_LEN_SHIFT         (6)     /* Indicates length of the Timestamp field in 16-bit words minus 1 */
+#define EVNT_TS_LEN_MASK          (0x3)
+#define EVNT_RF                   (1<<5)  /* Indicates whether the event is a rise or falling event */
+#define EVNT_NUM_SHIFT            (2)     /* Indicates Event Timestamp Unit which detected an event */
+#define EVNT_NUM_MASK             (0x7)
+#define MULT_EVNT                 (1<<1)  /* Indicates multiple events were detected at the same time */
+#define EVENT_DET                 (1<<0)  /* PTP Event Detected */
+
+/* Bit definitions for the PTP_EDATA register */
+#define E7_RISE                   (1<<15) /* Indicates direction of Event 7 */
+#define E7_DET                    (1<<14) /* Indicates Event 7 detected */
+#define E6_RISE                   (1<<13) /* Indicates direction of Event 6 */
+#define E6_DET                    (1<<12) /* Indicates Event 6 detected */
+#define E5_RISE                   (1<<11) /* Indicates direction of Event 5 */
+#define E5_DET                    (1<<10) /* Indicates Event 5 detected */
+#define E4_RISE                   (1<<9)  /* Indicates direction of Event 4 */
+#define E4_DET                    (1<<8)  /* Indicates Event 4 detected */
+#define E3_RISE                   (1<<7)  /* Indicates direction of Event 3 */
+#define E3_DET                    (1<<6)  /* Indicates Event 3 detected */
+#define E2_RISE                   (1<<5)  /* Indicates direction of Event 2 */
+#define E2_DET                    (1<<4)  /* Indicates Event 2 detected */
+#define E1_RISE                   (1<<3)  /* Indicates direction of Event 1 */
+#define E1_DET                    (1<<2)  /* Indicates Event 1 detected */
+#define E0_RISE                   (1<<1)  /* Indicates direction of Event 0 */
+#define E0_DET                    (1<<0)  /* Indicates Event 0 detected */
+
+/* Bit definitions for the PTP_TRIG register */
+#define TRIG_PULSE                (1<<15) /* generate a Pulse rather than a single edge */
+#define TRIG_PER                  (1<<14) /* generate a periodic signal */
+#define TRIG_IF_LATE              (1<<13) /* trigger immediately if already past */
+#define TRIG_NOTIFY               (1<<12) /* Trigger Notification Enable */
+#define TRIG_GPIO_SHIFT           (8)     /* Trigger GPIO Connection, value 1-12 */
+#define TRIG_GPIO_MASK            (0xf)
+#define TRIG_TOGGLE               (1<<7)  /* Trigger Toggle Mode Enable */
+#define TRIG_CSEL_SHIFT           (1)     /* Trigger Configuration Select */
+#define TRIG_CSEL_MASK            (0x7)
+#define TRIG_WR                   (1<<0)  /* Trigger Configuration Write */
+
+/* Bit definitions for the PTP_EVNT register */
+#define EVNT_RISE                 (1<<14) /* Event Rise Detect Enable */
+#define EVNT_FALL                 (1<<13) /* Event Fall Detect Enable */
+#define EVNT_SINGLE               (1<<12) /* enable single event capture operation */
+#define EVNT_GPIO_SHIFT           (8)     /* Event GPIO Connection, value 1-12 */
+#define EVNT_GPIO_MASK            (0xf)
+#define EVNT_SEL_SHIFT            (1)     /* Event Select */
+#define EVNT_SEL_MASK             (0x7)
+#define EVNT_WR                   (1<<0)  /* Event Configuration Write */
+
+/* Bit definitions for the PTP_TXCFG0 register */
+#define SYNC_1STEP                (1<<15) /* insert timestamp into transmit Sync Messages */
+#define DR_INSERT                 (1<<13) /* Insert Delay_Req Timestamp in Delay_Resp (dangerous) */
+#define NTP_TS_EN                 (1<<12) /* Enable Timestamping of NTP Packets */
+#define IGNORE_2STEP              (1<<11) /* Ignore Two_Step flag for One-Step operation */
+#define CRC_1STEP                 (1<<10) /* Disable checking of CRC for One-Step operation */
+#define CHK_1STEP                 (1<<9)  /* Enable UDP Checksum correction for One-Step Operation */
+#define IP1588_EN                 (1<<8)  /* Enable IEEE 1588 defined IP address filter */
+#define TX_L2_EN                  (1<<7)  /* Layer2 Timestamp Enable */
+#define TX_IPV6_EN                (1<<6)  /* IPv6 Timestamp Enable */
+#define TX_IPV4_EN                (1<<5)  /* IPv4 Timestamp Enable */
+#define TX_PTP_VER_SHIFT          (1)     /* Enable Timestamp capture for IEEE 1588 version X */
+#define TX_PTP_VER_MASK           (0xf)
+#define TX_TS_EN                  (1<<0)  /* Transmit Timestamp Enable */
+
+/* Bit definitions for the PTP_TXCFG1 register */
+#define BYTE0_MASK_SHIFT          (8)     /* Bit mask to be used for matching Byte0 of the PTP Message */
+#define BYTE0_MASK_MASK           (0xff)
+#define BYTE0_DATA_SHIFT          (0)     /* Data to be used for matching Byte0 of the PTP Message */
+#define BYTE0_DATA_MASK           (0xff)
+
+/* Bit definitions for the PSF_CFG0 register */
+#define MAC_SRC_ADD_SHIFT         (11)    /* Status Frame Mac Source Address */
+#define MAC_SRC_ADD_MASK          (0x3)
+#define MIN_PRE_SHIFT             (8)     /* Status Frame Minimum Preamble */
+#define MIN_PRE_MASK              (0x7)
+#define PSF_ENDIAN                (1<<7)  /* Status Frame Endian Control */
+#define PSF_IPV4                  (1<<6)  /* Status Frame IPv4 Enable */
+#define PSF_PCF_RD                (1<<5)  /* Control Frame Read PHY Status Frame Enable */
+#define PSF_ERR_EN                (1<<4)  /* Error PHY Status Frame Enable */
+#define PSF_TXTS_EN               (1<<3)  /* Transmit Timestamp PHY Status Frame Enable */
+#define PSF_RXTS_EN               (1<<2)  /* Receive Timestamp PHY Status Frame Enable */
+#define PSF_TRIG_EN               (1<<1)  /* Trigger PHY Status Frame Enable */
+#define PSF_EVNT_EN               (1<<0)  /* Event PHY Status Frame Enable */
+
+/* Bit definitions for the PTP_RXCFG0 register */
+#define DOMAIN_EN                 (1<<15) /* Domain Match Enable */
+#define ALT_MAST_DIS              (1<<14) /* Alternate Master Timestamp Disable */
+#define USER_IP_SEL               (1<<13) /* Selects portion of IP address accessible thru PTP_RXCFG2 */
+#define USER_IP_EN                (1<<12) /* Enable User-programmed IP address filter */
+#define RX_SLAVE                  (1<<11) /* Receive Slave Only */
+#define IP1588_EN_SHIFT           (8)     /* Enable IEEE 1588 defined IP address filters */
+#define IP1588_EN_MASK            (0xf)
+#define RX_L2_EN                  (1<<7)  /* Layer2 Timestamp Enable */
+#define RX_IPV6_EN                (1<<6)  /* IPv6 Timestamp Enable */
+#define RX_IPV4_EN                (1<<5)  /* IPv4 Timestamp Enable */
+#define RX_PTP_VER_SHIFT          (1)     /* Enable Timestamp capture for IEEE 1588 version X */
+#define RX_PTP_VER_MASK           (0xf)
+#define RX_TS_EN                  (1<<0)  /* Receive Timestamp Enable */
+
+/* Bit definitions for the PTP_RXCFG1 register */
+#define BYTE0_MASK_SHIFT          (8)     /* Bit mask to be used for matching Byte0 of the PTP Message */
+#define BYTE0_MASK_MASK           (0xff)
+#define BYTE0_DATA_SHIFT          (0)     /* Data to be used for matching Byte0 of the PTP Message */
+#define BYTE0_DATA_MASK           (0xff)
+
+/* Bit definitions for the PTP_RXCFG3 register */
+#define TS_MIN_IFG_SHIFT          (12)    /* Minimum Inter-frame Gap */
+#define TS_MIN_IFG_MASK           (0xf)
+#define ACC_UDP                   (1<<11) /* Record Timestamp if UDP Checksum Error */
+#define ACC_CRC                   (1<<10) /* Record Timestamp if CRC Error */
+#define TS_APPEND                 (1<<9)  /* Append Timestamp for L2 */
+#define TS_INSERT                 (1<<8)  /* Enable Timestamp Insertion */
+#define PTP_DOMAIN_SHIFT          (0)     /* PTP Message domainNumber field */
+#define PTP_DOMAIN_MASK           (0xff)
+
+/* Bit definitions for the PTP_RXCFG4 register */
+#define IPV4_UDP_MOD              (1<<15) /* Enable IPV4 UDP Modification */
+#define TS_SEC_EN                 (1<<14) /* Enable Timestamp Seconds */
+#define TS_SEC_LEN_SHIFT          (12)    /* Inserted Timestamp Seconds Length */
+#define TS_SEC_LEN_MASK           (0x3)
+#define RXTS_NS_OFF_SHIFT         (6)     /* Receive Timestamp Nanoseconds offset */
+#define RXTS_NS_OFF_MASK          (0x3f)
+#define RXTS_SEC_OFF_SHIFT        (0)     /* Receive Timestamp Seconds offset */
+#define RXTS_SEC_OFF_MASK         (0x3f)
+
+/* Bit definitions for the PTP_COC register */
+#define PTP_CLKOUT_EN             (1<<15) /* PTP Clock Output Enable */
+#define PTP_CLKOUT_SEL            (1<<14) /* PTP Clock Output Source Select */
+#define PTP_CLKOUT_SPEEDSEL       (1<<13) /* PTP Clock Output I/O Speed Select */
+#define PTP_CLKDIV_SHIFT          (0)     /* PTP Clock Divide-by Value */
+#define PTP_CLKDIV_MASK           (0xff)
+
+/* Bit definitions for the PSF_CFG1 register */
+#define PTPRESERVED_SHIFT         (12)    /* PTP v2 reserved field */
+#define PTPRESERVED_MASK          (0xf)
+#define VERSIONPTP_SHIFT          (8)     /* PTP v2 versionPTP field */
+#define VERSIONPTP_MASK           (0xf)
+#define TRANSPORT_SPECIFIC_SHIFT  (4)     /* PTP v2 Header transportSpecific field */
+#define TRANSPORT_SPECIFIC_MASK   (0xf)
+#define MESSAGETYPE_SHIFT         (0)     /* PTP v2 messageType field */
+#define MESSAGETYPE_MASK          (0xf)
+
+/* Bit definitions for the PTP_SFDCFG register */
+#define TX_SFD_GPIO_SHIFT         (4)     /* TX SFD GPIO Select, value 1-12 */
+#define TX_SFD_GPIO_MASK          (0xf)
+#define RX_SFD_GPIO_SHIFT         (0)     /* RX SFD GPIO Select, value 1-12 */
+#define RX_SFD_GPIO_MASK          (0xf)
+
+/* Bit definitions for the PTP_INTCTL register */
+#define PTP_INT_GPIO_SHIFT        (0)     /* PTP Interrupt GPIO Select */
+#define PTP_INT_GPIO_MASK         (0xf)
+
+/* Bit definitions for the PTP_CLKSRC register */
+#define CLK_SRC_SHIFT             (14)    /* PTP Clock Source Select */
+#define CLK_SRC_MASK              (0x3)
+#define CLK_SRC_PER_SHIFT         (0)     /* PTP Clock Source Period */
+#define CLK_SRC_PER_MASK          (0x7f)
+
+/* Bit definitions for the PTP_OFF register */
+#define PTP_OFFSET_SHIFT          (0)     /* PTP Message offset from preceding header */
+#define PTP_OFFSET_MASK           (0xff)
+
+#endif
diff --git a/drivers/platform/x86/ibm_rtl.c b/drivers/platform/x86/ibm_rtl.c
index 94a114a..b1396e5 100644
--- a/drivers/platform/x86/ibm_rtl.c
+++ b/drivers/platform/x86/ibm_rtl.c
@@ -81,6 +81,19 @@
 static u8 rtl_cmd_type;
 static u8 rtl_cmd_width;
 
+#ifndef readq
+static inline __u64 readq(const volatile void __iomem *addr)
+{
+	const volatile u32 __iomem *p = addr;
+	u32 low, high;
+
+	low = readl(p);
+	high = readl(p + 1);
+
+	return low + ((u64)high << 32);
+}
+#endif
+
 static void __iomem *rtl_port_map(phys_addr_t addr, unsigned long len)
 {
 	if (rtl_cmd_type == RTL_ADDR_TYPE_MMIO)
diff --git a/drivers/platform/x86/intel_ips.c b/drivers/platform/x86/intel_ips.c
index 85c8ad4..5ffe7c3 100644
--- a/drivers/platform/x86/intel_ips.c
+++ b/drivers/platform/x86/intel_ips.c
@@ -344,6 +344,19 @@
 static bool
 ips_gpu_turbo_enabled(struct ips_driver *ips);
 
+#ifndef readq
+static inline __u64 readq(const volatile void __iomem *addr)
+{
+	const volatile u32 __iomem *p = addr;
+	u32 low, high;
+
+	low = readl(p);
+	high = readl(p + 1);
+
+	return low + ((u64)high << 32);
+}
+#endif
+
 /**
  * ips_cpu_busy - is CPU busy?
  * @ips: IPS driver struct
diff --git a/drivers/ptp/Kconfig b/drivers/ptp/Kconfig
new file mode 100644
index 0000000..68d7201
--- /dev/null
+++ b/drivers/ptp/Kconfig
@@ -0,0 +1,75 @@
+#
+# PTP clock support configuration
+#
+
+menu "PTP clock support"
+
+comment "Enable Device Drivers -> PPS to see the PTP clock options."
+	depends on PPS=n
+
+config PTP_1588_CLOCK
+	tristate "PTP clock support"
+	depends on EXPERIMENTAL
+	depends on PPS
+	help
+	  The IEEE 1588 standard defines a method to precisely
+	  synchronize distributed clocks over Ethernet networks. The
+	  standard defines a Precision Time Protocol (PTP), which can
+	  be used to achieve synchronization within a few dozen
+	  microseconds. In addition, with the help of special hardware
+	  time stamping units, it can be possible to achieve
+	  synchronization to within a few hundred nanoseconds.
+
+	  This driver adds support for PTP clocks as character
+	  devices. If you want to use a PTP clock, then you should
+	  also enable at least one clock driver as well.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called ptp.
+
+config PTP_1588_CLOCK_GIANFAR
+	tristate "Freescale eTSEC as PTP clock"
+	depends on PTP_1588_CLOCK
+	depends on GIANFAR
+	help
+	  This driver adds support for using the eTSEC as a PTP
+	  clock. This clock is only useful if your PTP programs are
+	  getting hardware time stamps on the PTP Ethernet packets
+	  using the SO_TIMESTAMPING API.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called gianfar_ptp.
+
+config PTP_1588_CLOCK_IXP46X
+	tristate "Intel IXP46x as PTP clock"
+	depends on PTP_1588_CLOCK
+	depends on IXP4XX_ETH
+	help
+	  This driver adds support for using the IXP46X as a PTP
+	  clock. This clock is only useful if your PTP programs are
+	  getting hardware time stamps on the PTP Ethernet packets
+	  using the SO_TIMESTAMPING API.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called ptp_ixp46x.
+
+comment "Enable PHYLIB and NETWORK_PHY_TIMESTAMPING to see the additional clocks."
+	depends on PTP_1588_CLOCK && (PHYLIB=n || NETWORK_PHY_TIMESTAMPING=n)
+
+config DP83640_PHY
+	tristate "Driver for the National Semiconductor DP83640 PHYTER"
+	depends on PTP_1588_CLOCK
+	depends on NETWORK_PHY_TIMESTAMPING
+	depends on PHYLIB
+	---help---
+	  Supports the DP83640 PHYTER with IEEE 1588 features.
+
+	  This driver adds support for using the DP83640 as a PTP
+	  clock. This clock is only useful if your PTP programs are
+	  getting hardware time stamps on the PTP Ethernet packets
+	  using the SO_TIMESTAMPING API.
+
+	  In order for this to work, your MAC driver must also
+	  implement the skb_tx_timetamp() function.
+
+endmenu
diff --git a/drivers/ptp/Makefile b/drivers/ptp/Makefile
new file mode 100644
index 0000000..f6933e8
--- /dev/null
+++ b/drivers/ptp/Makefile
@@ -0,0 +1,7 @@
+#
+# Makefile for PTP 1588 clock support.
+#
+
+ptp-y					:= ptp_clock.o ptp_chardev.o ptp_sysfs.o
+obj-$(CONFIG_PTP_1588_CLOCK)		+= ptp.o
+obj-$(CONFIG_PTP_1588_CLOCK_IXP46X)	+= ptp_ixp46x.o
diff --git a/drivers/ptp/ptp_chardev.c b/drivers/ptp/ptp_chardev.c
new file mode 100644
index 0000000..a8d03ae
--- /dev/null
+++ b/drivers/ptp/ptp_chardev.c
@@ -0,0 +1,159 @@
+/*
+ * PTP 1588 clock support - character device implementation.
+ *
+ * Copyright (C) 2010 OMICRON electronics GmbH
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#include <linux/module.h>
+#include <linux/posix-clock.h>
+#include <linux/poll.h>
+#include <linux/sched.h>
+
+#include "ptp_private.h"
+
+int ptp_open(struct posix_clock *pc, fmode_t fmode)
+{
+	return 0;
+}
+
+long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg)
+{
+	struct ptp_clock_caps caps;
+	struct ptp_clock_request req;
+	struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock);
+	struct ptp_clock_info *ops = ptp->info;
+	int enable, err = 0;
+
+	switch (cmd) {
+
+	case PTP_CLOCK_GETCAPS:
+		memset(&caps, 0, sizeof(caps));
+		caps.max_adj = ptp->info->max_adj;
+		caps.n_alarm = ptp->info->n_alarm;
+		caps.n_ext_ts = ptp->info->n_ext_ts;
+		caps.n_per_out = ptp->info->n_per_out;
+		caps.pps = ptp->info->pps;
+		err = copy_to_user((void __user *)arg, &caps, sizeof(caps));
+		break;
+
+	case PTP_EXTTS_REQUEST:
+		if (copy_from_user(&req.extts, (void __user *)arg,
+				   sizeof(req.extts))) {
+			err = -EFAULT;
+			break;
+		}
+		if (req.extts.index >= ops->n_ext_ts) {
+			err = -EINVAL;
+			break;
+		}
+		req.type = PTP_CLK_REQ_EXTTS;
+		enable = req.extts.flags & PTP_ENABLE_FEATURE ? 1 : 0;
+		err = ops->enable(ops, &req, enable);
+		break;
+
+	case PTP_PEROUT_REQUEST:
+		if (copy_from_user(&req.perout, (void __user *)arg,
+				   sizeof(req.perout))) {
+			err = -EFAULT;
+			break;
+		}
+		if (req.perout.index >= ops->n_per_out) {
+			err = -EINVAL;
+			break;
+		}
+		req.type = PTP_CLK_REQ_PEROUT;
+		enable = req.perout.period.sec || req.perout.period.nsec;
+		err = ops->enable(ops, &req, enable);
+		break;
+
+	case PTP_ENABLE_PPS:
+		if (!capable(CAP_SYS_TIME))
+			return -EPERM;
+		req.type = PTP_CLK_REQ_PPS;
+		enable = arg ? 1 : 0;
+		err = ops->enable(ops, &req, enable);
+		break;
+
+	default:
+		err = -ENOTTY;
+		break;
+	}
+	return err;
+}
+
+unsigned int ptp_poll(struct posix_clock *pc, struct file *fp, poll_table *wait)
+{
+	struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock);
+
+	poll_wait(fp, &ptp->tsev_wq, wait);
+
+	return queue_cnt(&ptp->tsevq) ? POLLIN : 0;
+}
+
+ssize_t ptp_read(struct posix_clock *pc,
+		 uint rdflags, char __user *buf, size_t cnt)
+{
+	struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock);
+	struct timestamp_event_queue *queue = &ptp->tsevq;
+	struct ptp_extts_event event[PTP_BUF_TIMESTAMPS];
+	unsigned long flags;
+	size_t qcnt, i;
+
+	if (cnt % sizeof(struct ptp_extts_event) != 0)
+		return -EINVAL;
+
+	if (cnt > sizeof(event))
+		cnt = sizeof(event);
+
+	cnt = cnt / sizeof(struct ptp_extts_event);
+
+	if (mutex_lock_interruptible(&ptp->tsevq_mux))
+		return -ERESTARTSYS;
+
+	if (wait_event_interruptible(ptp->tsev_wq,
+				     ptp->defunct || queue_cnt(queue))) {
+		mutex_unlock(&ptp->tsevq_mux);
+		return -ERESTARTSYS;
+	}
+
+	if (ptp->defunct)
+		return -ENODEV;
+
+	spin_lock_irqsave(&queue->lock, flags);
+
+	qcnt = queue_cnt(queue);
+
+	if (cnt > qcnt)
+		cnt = qcnt;
+
+	for (i = 0; i < cnt; i++) {
+		event[i] = queue->buf[queue->head];
+		queue->head = (queue->head + 1) % PTP_MAX_TIMESTAMPS;
+	}
+
+	spin_unlock_irqrestore(&queue->lock, flags);
+
+	cnt = cnt * sizeof(struct ptp_extts_event);
+
+	mutex_unlock(&ptp->tsevq_mux);
+
+	if (copy_to_user(buf, event, cnt)) {
+		mutex_unlock(&ptp->tsevq_mux);
+		return -EFAULT;
+	}
+
+	return cnt;
+}
diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c
new file mode 100644
index 0000000..cf3f999
--- /dev/null
+++ b/drivers/ptp/ptp_clock.c
@@ -0,0 +1,343 @@
+/*
+ * PTP 1588 clock support
+ *
+ * Copyright (C) 2010 OMICRON electronics GmbH
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#include <linux/bitops.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/posix-clock.h>
+#include <linux/pps_kernel.h>
+#include <linux/slab.h>
+#include <linux/syscalls.h>
+#include <linux/uaccess.h>
+
+#include "ptp_private.h"
+
+#define PTP_MAX_ALARMS 4
+#define PTP_MAX_CLOCKS 8
+#define PTP_PPS_DEFAULTS (PPS_CAPTUREASSERT | PPS_OFFSETASSERT)
+#define PTP_PPS_EVENT PPS_CAPTUREASSERT
+#define PTP_PPS_MODE (PTP_PPS_DEFAULTS | PPS_CANWAIT | PPS_TSFMT_TSPEC)
+
+/* private globals */
+
+static dev_t ptp_devt;
+static struct class *ptp_class;
+
+static DECLARE_BITMAP(ptp_clocks_map, PTP_MAX_CLOCKS);
+static DEFINE_MUTEX(ptp_clocks_mutex); /* protects 'ptp_clocks_map' */
+
+/* time stamp event queue operations */
+
+static inline int queue_free(struct timestamp_event_queue *q)
+{
+	return PTP_MAX_TIMESTAMPS - queue_cnt(q) - 1;
+}
+
+static void enqueue_external_timestamp(struct timestamp_event_queue *queue,
+				       struct ptp_clock_event *src)
+{
+	struct ptp_extts_event *dst;
+	unsigned long flags;
+	s64 seconds;
+	u32 remainder;
+
+	seconds = div_u64_rem(src->timestamp, 1000000000, &remainder);
+
+	spin_lock_irqsave(&queue->lock, flags);
+
+	dst = &queue->buf[queue->tail];
+	dst->index = src->index;
+	dst->t.sec = seconds;
+	dst->t.nsec = remainder;
+
+	if (!queue_free(queue))
+		queue->head = (queue->head + 1) % PTP_MAX_TIMESTAMPS;
+
+	queue->tail = (queue->tail + 1) % PTP_MAX_TIMESTAMPS;
+
+	spin_unlock_irqrestore(&queue->lock, flags);
+}
+
+static s32 scaled_ppm_to_ppb(long ppm)
+{
+	/*
+	 * The 'freq' field in the 'struct timex' is in parts per
+	 * million, but with a 16 bit binary fractional field.
+	 *
+	 * We want to calculate
+	 *
+	 *    ppb = scaled_ppm * 1000 / 2^16
+	 *
+	 * which simplifies to
+	 *
+	 *    ppb = scaled_ppm * 125 / 2^13
+	 */
+	s64 ppb = 1 + ppm;
+	ppb *= 125;
+	ppb >>= 13;
+	return (s32) ppb;
+}
+
+/* posix clock implementation */
+
+static int ptp_clock_getres(struct posix_clock *pc, struct timespec *tp)
+{
+	return 1; /* always round timer functions to one nanosecond */
+}
+
+static int ptp_clock_settime(struct posix_clock *pc, const struct timespec *tp)
+{
+	struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock);
+	return ptp->info->settime(ptp->info, tp);
+}
+
+static int ptp_clock_gettime(struct posix_clock *pc, struct timespec *tp)
+{
+	struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock);
+	return ptp->info->gettime(ptp->info, tp);
+}
+
+static int ptp_clock_adjtime(struct posix_clock *pc, struct timex *tx)
+{
+	struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock);
+	struct ptp_clock_info *ops;
+	int err = -EOPNOTSUPP;
+
+	ops = ptp->info;
+
+	if (tx->modes & ADJ_SETOFFSET) {
+		struct timespec ts;
+		ktime_t kt;
+		s64 delta;
+
+		ts.tv_sec  = tx->time.tv_sec;
+		ts.tv_nsec = tx->time.tv_usec;
+
+		if (!(tx->modes & ADJ_NANO))
+			ts.tv_nsec *= 1000;
+
+		if ((unsigned long) ts.tv_nsec >= NSEC_PER_SEC)
+			return -EINVAL;
+
+		kt = timespec_to_ktime(ts);
+		delta = ktime_to_ns(kt);
+		err = ops->adjtime(ops, delta);
+
+	} else if (tx->modes & ADJ_FREQUENCY) {
+
+		err = ops->adjfreq(ops, scaled_ppm_to_ppb(tx->freq));
+	}
+
+	return err;
+}
+
+static struct posix_clock_operations ptp_clock_ops = {
+	.owner		= THIS_MODULE,
+	.clock_adjtime	= ptp_clock_adjtime,
+	.clock_gettime	= ptp_clock_gettime,
+	.clock_getres	= ptp_clock_getres,
+	.clock_settime	= ptp_clock_settime,
+	.ioctl		= ptp_ioctl,
+	.open		= ptp_open,
+	.poll		= ptp_poll,
+	.read		= ptp_read,
+};
+
+static void delete_ptp_clock(struct posix_clock *pc)
+{
+	struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock);
+
+	mutex_destroy(&ptp->tsevq_mux);
+
+	/* Remove the clock from the bit map. */
+	mutex_lock(&ptp_clocks_mutex);
+	clear_bit(ptp->index, ptp_clocks_map);
+	mutex_unlock(&ptp_clocks_mutex);
+
+	kfree(ptp);
+}
+
+/* public interface */
+
+struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info)
+{
+	struct ptp_clock *ptp;
+	int err = 0, index, major = MAJOR(ptp_devt);
+
+	if (info->n_alarm > PTP_MAX_ALARMS)
+		return ERR_PTR(-EINVAL);
+
+	/* Find a free clock slot and reserve it. */
+	err = -EBUSY;
+	mutex_lock(&ptp_clocks_mutex);
+	index = find_first_zero_bit(ptp_clocks_map, PTP_MAX_CLOCKS);
+	if (index < PTP_MAX_CLOCKS)
+		set_bit(index, ptp_clocks_map);
+	else
+		goto no_slot;
+
+	/* Initialize a clock structure. */
+	err = -ENOMEM;
+	ptp = kzalloc(sizeof(struct ptp_clock), GFP_KERNEL);
+	if (ptp == NULL)
+		goto no_memory;
+
+	ptp->clock.ops = ptp_clock_ops;
+	ptp->clock.release = delete_ptp_clock;
+	ptp->info = info;
+	ptp->devid = MKDEV(major, index);
+	ptp->index = index;
+	spin_lock_init(&ptp->tsevq.lock);
+	mutex_init(&ptp->tsevq_mux);
+	init_waitqueue_head(&ptp->tsev_wq);
+
+	/* Create a new device in our class. */
+	ptp->dev = device_create(ptp_class, NULL, ptp->devid, ptp,
+				 "ptp%d", ptp->index);
+	if (IS_ERR(ptp->dev))
+		goto no_device;
+
+	dev_set_drvdata(ptp->dev, ptp);
+
+	err = ptp_populate_sysfs(ptp);
+	if (err)
+		goto no_sysfs;
+
+	/* Register a new PPS source. */
+	if (info->pps) {
+		struct pps_source_info pps;
+		memset(&pps, 0, sizeof(pps));
+		snprintf(pps.name, PPS_MAX_NAME_LEN, "ptp%d", index);
+		pps.mode = PTP_PPS_MODE;
+		pps.owner = info->owner;
+		ptp->pps_source = pps_register_source(&pps, PTP_PPS_DEFAULTS);
+		if (!ptp->pps_source) {
+			pr_err("failed to register pps source\n");
+			goto no_pps;
+		}
+	}
+
+	/* Create a posix clock. */
+	err = posix_clock_register(&ptp->clock, ptp->devid);
+	if (err) {
+		pr_err("failed to create posix clock\n");
+		goto no_clock;
+	}
+
+	mutex_unlock(&ptp_clocks_mutex);
+	return ptp;
+
+no_clock:
+	if (ptp->pps_source)
+		pps_unregister_source(ptp->pps_source);
+no_pps:
+	ptp_cleanup_sysfs(ptp);
+no_sysfs:
+	device_destroy(ptp_class, ptp->devid);
+no_device:
+	mutex_destroy(&ptp->tsevq_mux);
+	kfree(ptp);
+no_memory:
+	clear_bit(index, ptp_clocks_map);
+no_slot:
+	mutex_unlock(&ptp_clocks_mutex);
+	return ERR_PTR(err);
+}
+EXPORT_SYMBOL(ptp_clock_register);
+
+int ptp_clock_unregister(struct ptp_clock *ptp)
+{
+	ptp->defunct = 1;
+	wake_up_interruptible(&ptp->tsev_wq);
+
+	/* Release the clock's resources. */
+	if (ptp->pps_source)
+		pps_unregister_source(ptp->pps_source);
+	ptp_cleanup_sysfs(ptp);
+	device_destroy(ptp_class, ptp->devid);
+
+	posix_clock_unregister(&ptp->clock);
+	return 0;
+}
+EXPORT_SYMBOL(ptp_clock_unregister);
+
+void ptp_clock_event(struct ptp_clock *ptp, struct ptp_clock_event *event)
+{
+	struct pps_event_time evt;
+
+	switch (event->type) {
+
+	case PTP_CLOCK_ALARM:
+		break;
+
+	case PTP_CLOCK_EXTTS:
+		enqueue_external_timestamp(&ptp->tsevq, event);
+		wake_up_interruptible(&ptp->tsev_wq);
+		break;
+
+	case PTP_CLOCK_PPS:
+		pps_get_ts(&evt);
+		pps_event(ptp->pps_source, &evt, PTP_PPS_EVENT, NULL);
+		break;
+	}
+}
+EXPORT_SYMBOL(ptp_clock_event);
+
+/* module operations */
+
+static void __exit ptp_exit(void)
+{
+	class_destroy(ptp_class);
+	unregister_chrdev_region(ptp_devt, PTP_MAX_CLOCKS);
+}
+
+static int __init ptp_init(void)
+{
+	int err;
+
+	ptp_class = class_create(THIS_MODULE, "ptp");
+	if (IS_ERR(ptp_class)) {
+		pr_err("ptp: failed to allocate class\n");
+		return PTR_ERR(ptp_class);
+	}
+
+	err = alloc_chrdev_region(&ptp_devt, 0, PTP_MAX_CLOCKS, "ptp");
+	if (err < 0) {
+		pr_err("ptp: failed to allocate device region\n");
+		goto no_region;
+	}
+
+	ptp_class->dev_attrs = ptp_dev_attrs;
+	pr_info("PTP clock support registered\n");
+	return 0;
+
+no_region:
+	class_destroy(ptp_class);
+	return err;
+}
+
+subsys_initcall(ptp_init);
+module_exit(ptp_exit);
+
+MODULE_AUTHOR("Richard Cochran <richard.cochran@omicron.at>");
+MODULE_DESCRIPTION("PTP clocks support");
+MODULE_LICENSE("GPL");
diff --git a/drivers/ptp/ptp_ixp46x.c b/drivers/ptp/ptp_ixp46x.c
new file mode 100644
index 0000000..803d665b
--- /dev/null
+++ b/drivers/ptp/ptp_ixp46x.c
@@ -0,0 +1,332 @@
+/*
+ * PTP 1588 clock using the IXP46X
+ *
+ * Copyright (C) 2010 OMICRON electronics GmbH
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/gpio.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+#include <linux/ptp_clock_kernel.h>
+#include <mach/ixp46x_ts.h>
+
+#define DRIVER		"ptp_ixp46x"
+#define N_EXT_TS	2
+#define MASTER_GPIO	8
+#define MASTER_IRQ	25
+#define SLAVE_GPIO	7
+#define SLAVE_IRQ	24
+
+struct ixp_clock {
+	struct ixp46x_ts_regs *regs;
+	struct ptp_clock *ptp_clock;
+	struct ptp_clock_info caps;
+	int exts0_enabled;
+	int exts1_enabled;
+};
+
+DEFINE_SPINLOCK(register_lock);
+
+/*
+ * Register access functions
+ */
+
+static u64 ixp_systime_read(struct ixp46x_ts_regs *regs)
+{
+	u64 ns;
+	u32 lo, hi;
+
+	lo = __raw_readl(&regs->systime_lo);
+	hi = __raw_readl(&regs->systime_hi);
+
+	ns = ((u64) hi) << 32;
+	ns |= lo;
+	ns <<= TICKS_NS_SHIFT;
+
+	return ns;
+}
+
+static void ixp_systime_write(struct ixp46x_ts_regs *regs, u64 ns)
+{
+	u32 hi, lo;
+
+	ns >>= TICKS_NS_SHIFT;
+	hi = ns >> 32;
+	lo = ns & 0xffffffff;
+
+	__raw_writel(lo, &regs->systime_lo);
+	__raw_writel(hi, &regs->systime_hi);
+}
+
+/*
+ * Interrupt service routine
+ */
+
+static irqreturn_t isr(int irq, void *priv)
+{
+	struct ixp_clock *ixp_clock = priv;
+	struct ixp46x_ts_regs *regs = ixp_clock->regs;
+	struct ptp_clock_event event;
+	u32 ack = 0, lo, hi, val;
+
+	val = __raw_readl(&regs->event);
+
+	if (val & TSER_SNS) {
+		ack |= TSER_SNS;
+		if (ixp_clock->exts0_enabled) {
+			hi = __raw_readl(&regs->asms_hi);
+			lo = __raw_readl(&regs->asms_lo);
+			event.type = PTP_CLOCK_EXTTS;
+			event.index = 0;
+			event.timestamp = ((u64) hi) << 32;
+			event.timestamp |= lo;
+			event.timestamp <<= TICKS_NS_SHIFT;
+			ptp_clock_event(ixp_clock->ptp_clock, &event);
+		}
+	}
+
+	if (val & TSER_SNM) {
+		ack |= TSER_SNM;
+		if (ixp_clock->exts1_enabled) {
+			hi = __raw_readl(&regs->amms_hi);
+			lo = __raw_readl(&regs->amms_lo);
+			event.type = PTP_CLOCK_EXTTS;
+			event.index = 1;
+			event.timestamp = ((u64) hi) << 32;
+			event.timestamp |= lo;
+			event.timestamp <<= TICKS_NS_SHIFT;
+			ptp_clock_event(ixp_clock->ptp_clock, &event);
+		}
+	}
+
+	if (val & TTIPEND)
+		ack |= TTIPEND; /* this bit seems to be always set */
+
+	if (ack) {
+		__raw_writel(ack, &regs->event);
+		return IRQ_HANDLED;
+	} else
+		return IRQ_NONE;
+}
+
+/*
+ * PTP clock operations
+ */
+
+static int ptp_ixp_adjfreq(struct ptp_clock_info *ptp, s32 ppb)
+{
+	u64 adj;
+	u32 diff, addend;
+	int neg_adj = 0;
+	struct ixp_clock *ixp_clock = container_of(ptp, struct ixp_clock, caps);
+	struct ixp46x_ts_regs *regs = ixp_clock->regs;
+
+	if (ppb < 0) {
+		neg_adj = 1;
+		ppb = -ppb;
+	}
+	addend = DEFAULT_ADDEND;
+	adj = addend;
+	adj *= ppb;
+	diff = div_u64(adj, 1000000000ULL);
+
+	addend = neg_adj ? addend - diff : addend + diff;
+
+	__raw_writel(addend, &regs->addend);
+
+	return 0;
+}
+
+static int ptp_ixp_adjtime(struct ptp_clock_info *ptp, s64 delta)
+{
+	s64 now;
+	unsigned long flags;
+	struct ixp_clock *ixp_clock = container_of(ptp, struct ixp_clock, caps);
+	struct ixp46x_ts_regs *regs = ixp_clock->regs;
+
+	spin_lock_irqsave(&register_lock, flags);
+
+	now = ixp_systime_read(regs);
+	now += delta;
+	ixp_systime_write(regs, now);
+
+	spin_unlock_irqrestore(&register_lock, flags);
+
+	return 0;
+}
+
+static int ptp_ixp_gettime(struct ptp_clock_info *ptp, struct timespec *ts)
+{
+	u64 ns;
+	u32 remainder;
+	unsigned long flags;
+	struct ixp_clock *ixp_clock = container_of(ptp, struct ixp_clock, caps);
+	struct ixp46x_ts_regs *regs = ixp_clock->regs;
+
+	spin_lock_irqsave(&register_lock, flags);
+
+	ns = ixp_systime_read(regs);
+
+	spin_unlock_irqrestore(&register_lock, flags);
+
+	ts->tv_sec = div_u64_rem(ns, 1000000000, &remainder);
+	ts->tv_nsec = remainder;
+	return 0;
+}
+
+static int ptp_ixp_settime(struct ptp_clock_info *ptp,
+			   const struct timespec *ts)
+{
+	u64 ns;
+	unsigned long flags;
+	struct ixp_clock *ixp_clock = container_of(ptp, struct ixp_clock, caps);
+	struct ixp46x_ts_regs *regs = ixp_clock->regs;
+
+	ns = ts->tv_sec * 1000000000ULL;
+	ns += ts->tv_nsec;
+
+	spin_lock_irqsave(&register_lock, flags);
+
+	ixp_systime_write(regs, ns);
+
+	spin_unlock_irqrestore(&register_lock, flags);
+
+	return 0;
+}
+
+static int ptp_ixp_enable(struct ptp_clock_info *ptp,
+			  struct ptp_clock_request *rq, int on)
+{
+	struct ixp_clock *ixp_clock = container_of(ptp, struct ixp_clock, caps);
+
+	switch (rq->type) {
+	case PTP_CLK_REQ_EXTTS:
+		switch (rq->extts.index) {
+		case 0:
+			ixp_clock->exts0_enabled = on ? 1 : 0;
+			break;
+		case 1:
+			ixp_clock->exts1_enabled = on ? 1 : 0;
+			break;
+		default:
+			return -EINVAL;
+		}
+		return 0;
+	default:
+		break;
+	}
+
+	return -EOPNOTSUPP;
+}
+
+static struct ptp_clock_info ptp_ixp_caps = {
+	.owner		= THIS_MODULE,
+	.name		= "IXP46X timer",
+	.max_adj	= 66666655,
+	.n_ext_ts	= N_EXT_TS,
+	.pps		= 0,
+	.adjfreq	= ptp_ixp_adjfreq,
+	.adjtime	= ptp_ixp_adjtime,
+	.gettime	= ptp_ixp_gettime,
+	.settime	= ptp_ixp_settime,
+	.enable		= ptp_ixp_enable,
+};
+
+/* module operations */
+
+static struct ixp_clock ixp_clock;
+
+static int setup_interrupt(int gpio)
+{
+	int irq;
+
+	gpio_line_config(gpio, IXP4XX_GPIO_IN);
+
+	irq = gpio_to_irq(gpio);
+
+	if (NO_IRQ == irq)
+		return NO_IRQ;
+
+	if (irq_set_irq_type(irq, IRQF_TRIGGER_FALLING)) {
+		pr_err("cannot set trigger type for irq %d\n", irq);
+		return NO_IRQ;
+	}
+
+	if (request_irq(irq, isr, 0, DRIVER, &ixp_clock)) {
+		pr_err("request_irq failed for irq %d\n", irq);
+		return NO_IRQ;
+	}
+
+	return irq;
+}
+
+static void __exit ptp_ixp_exit(void)
+{
+	free_irq(MASTER_IRQ, &ixp_clock);
+	free_irq(SLAVE_IRQ, &ixp_clock);
+	ptp_clock_unregister(ixp_clock.ptp_clock);
+}
+
+static int __init ptp_ixp_init(void)
+{
+	if (!cpu_is_ixp46x())
+		return -ENODEV;
+
+	ixp_clock.regs =
+		(struct ixp46x_ts_regs __iomem *) IXP4XX_TIMESYNC_BASE_VIRT;
+
+	ixp_clock.caps = ptp_ixp_caps;
+
+	ixp_clock.ptp_clock = ptp_clock_register(&ixp_clock.caps);
+
+	if (IS_ERR(ixp_clock.ptp_clock))
+		return PTR_ERR(ixp_clock.ptp_clock);
+
+	__raw_writel(DEFAULT_ADDEND, &ixp_clock.regs->addend);
+	__raw_writel(1, &ixp_clock.regs->trgt_lo);
+	__raw_writel(0, &ixp_clock.regs->trgt_hi);
+	__raw_writel(TTIPEND, &ixp_clock.regs->event);
+
+	if (MASTER_IRQ != setup_interrupt(MASTER_GPIO)) {
+		pr_err("failed to setup gpio %d as irq\n", MASTER_GPIO);
+		goto no_master;
+	}
+	if (SLAVE_IRQ != setup_interrupt(SLAVE_GPIO)) {
+		pr_err("failed to setup gpio %d as irq\n", SLAVE_GPIO);
+		goto no_slave;
+	}
+
+	return 0;
+no_slave:
+	free_irq(MASTER_IRQ, &ixp_clock);
+no_master:
+	ptp_clock_unregister(ixp_clock.ptp_clock);
+	return -ENODEV;
+}
+
+module_init(ptp_ixp_init);
+module_exit(ptp_ixp_exit);
+
+MODULE_AUTHOR("Richard Cochran <richard.cochran@omicron.at>");
+MODULE_DESCRIPTION("PTP clock using the IXP46X timer");
+MODULE_LICENSE("GPL");
diff --git a/drivers/ptp/ptp_private.h b/drivers/ptp/ptp_private.h
new file mode 100644
index 0000000..4d5b508
--- /dev/null
+++ b/drivers/ptp/ptp_private.h
@@ -0,0 +1,92 @@
+/*
+ * PTP 1588 clock support - private declarations for the core module.
+ *
+ * Copyright (C) 2010 OMICRON electronics GmbH
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#ifndef _PTP_PRIVATE_H_
+#define _PTP_PRIVATE_H_
+
+#include <linux/cdev.h>
+#include <linux/device.h>
+#include <linux/mutex.h>
+#include <linux/posix-clock.h>
+#include <linux/ptp_clock.h>
+#include <linux/ptp_clock_kernel.h>
+#include <linux/time.h>
+
+#define PTP_MAX_TIMESTAMPS 128
+#define PTP_BUF_TIMESTAMPS 30
+
+struct timestamp_event_queue {
+	struct ptp_extts_event buf[PTP_MAX_TIMESTAMPS];
+	int head;
+	int tail;
+	spinlock_t lock;
+};
+
+struct ptp_clock {
+	struct posix_clock clock;
+	struct device *dev;
+	struct ptp_clock_info *info;
+	dev_t devid;
+	int index; /* index into clocks.map */
+	struct pps_device *pps_source;
+	struct timestamp_event_queue tsevq; /* simple fifo for time stamps */
+	struct mutex tsevq_mux; /* one process at a time reading the fifo */
+	wait_queue_head_t tsev_wq;
+	int defunct; /* tells readers to go away when clock is being removed */
+};
+
+/*
+ * The function queue_cnt() is safe for readers to call without
+ * holding q->lock. Readers use this function to verify that the queue
+ * is nonempty before proceeding with a dequeue operation. The fact
+ * that a writer might concurrently increment the tail does not
+ * matter, since the queue remains nonempty nonetheless.
+ */
+static inline int queue_cnt(struct timestamp_event_queue *q)
+{
+	int cnt = q->tail - q->head;
+	return cnt < 0 ? PTP_MAX_TIMESTAMPS + cnt : cnt;
+}
+
+/*
+ * see ptp_chardev.c
+ */
+
+long ptp_ioctl(struct posix_clock *pc,
+	       unsigned int cmd, unsigned long arg);
+
+int ptp_open(struct posix_clock *pc, fmode_t fmode);
+
+ssize_t ptp_read(struct posix_clock *pc,
+		 uint flags, char __user *buf, size_t cnt);
+
+uint ptp_poll(struct posix_clock *pc,
+	      struct file *fp, poll_table *wait);
+
+/*
+ * see ptp_sysfs.c
+ */
+
+extern struct device_attribute ptp_dev_attrs[];
+
+int ptp_cleanup_sysfs(struct ptp_clock *ptp);
+
+int ptp_populate_sysfs(struct ptp_clock *ptp);
+
+#endif
diff --git a/drivers/ptp/ptp_sysfs.c b/drivers/ptp/ptp_sysfs.c
new file mode 100644
index 0000000..2f93926
--- /dev/null
+++ b/drivers/ptp/ptp_sysfs.c
@@ -0,0 +1,230 @@
+/*
+ * PTP 1588 clock support - sysfs interface.
+ *
+ * Copyright (C) 2010 OMICRON electronics GmbH
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#include <linux/capability.h>
+
+#include "ptp_private.h"
+
+static ssize_t clock_name_show(struct device *dev,
+			       struct device_attribute *attr, char *page)
+{
+	struct ptp_clock *ptp = dev_get_drvdata(dev);
+	return snprintf(page, PAGE_SIZE-1, "%s\n", ptp->info->name);
+}
+
+#define PTP_SHOW_INT(name)						\
+static ssize_t name##_show(struct device *dev,				\
+			   struct device_attribute *attr, char *page)	\
+{									\
+	struct ptp_clock *ptp = dev_get_drvdata(dev);			\
+	return snprintf(page, PAGE_SIZE-1, "%d\n", ptp->info->name);	\
+}
+
+PTP_SHOW_INT(max_adj);
+PTP_SHOW_INT(n_alarm);
+PTP_SHOW_INT(n_ext_ts);
+PTP_SHOW_INT(n_per_out);
+PTP_SHOW_INT(pps);
+
+#define PTP_RO_ATTR(_var, _name) {				\
+	.attr	= { .name = __stringify(_name), .mode = 0444 },	\
+	.show	= _var##_show,					\
+}
+
+struct device_attribute ptp_dev_attrs[] = {
+	PTP_RO_ATTR(clock_name,	clock_name),
+	PTP_RO_ATTR(max_adj,	max_adjustment),
+	PTP_RO_ATTR(n_alarm,	n_alarms),
+	PTP_RO_ATTR(n_ext_ts,	n_external_timestamps),
+	PTP_RO_ATTR(n_per_out,	n_periodic_outputs),
+	PTP_RO_ATTR(pps,	pps_available),
+	__ATTR_NULL,
+};
+
+static ssize_t extts_enable_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t count)
+{
+	struct ptp_clock *ptp = dev_get_drvdata(dev);
+	struct ptp_clock_info *ops = ptp->info;
+	struct ptp_clock_request req = { .type = PTP_CLK_REQ_EXTTS };
+	int cnt, enable;
+	int err = -EINVAL;
+
+	cnt = sscanf(buf, "%u %d", &req.extts.index, &enable);
+	if (cnt != 2)
+		goto out;
+	if (req.extts.index >= ops->n_ext_ts)
+		goto out;
+
+	err = ops->enable(ops, &req, enable ? 1 : 0);
+	if (err)
+		goto out;
+
+	return count;
+out:
+	return err;
+}
+
+static ssize_t extts_fifo_show(struct device *dev,
+			       struct device_attribute *attr, char *page)
+{
+	struct ptp_clock *ptp = dev_get_drvdata(dev);
+	struct timestamp_event_queue *queue = &ptp->tsevq;
+	struct ptp_extts_event event;
+	unsigned long flags;
+	size_t qcnt;
+	int cnt = 0;
+
+	memset(&event, 0, sizeof(event));
+
+	if (mutex_lock_interruptible(&ptp->tsevq_mux))
+		return -ERESTARTSYS;
+
+	spin_lock_irqsave(&queue->lock, flags);
+	qcnt = queue_cnt(queue);
+	if (qcnt) {
+		event = queue->buf[queue->head];
+		queue->head = (queue->head + 1) % PTP_MAX_TIMESTAMPS;
+	}
+	spin_unlock_irqrestore(&queue->lock, flags);
+
+	if (!qcnt)
+		goto out;
+
+	cnt = snprintf(page, PAGE_SIZE, "%u %lld %u\n",
+		       event.index, event.t.sec, event.t.nsec);
+out:
+	mutex_unlock(&ptp->tsevq_mux);
+	return cnt;
+}
+
+static ssize_t period_store(struct device *dev,
+			    struct device_attribute *attr,
+			    const char *buf, size_t count)
+{
+	struct ptp_clock *ptp = dev_get_drvdata(dev);
+	struct ptp_clock_info *ops = ptp->info;
+	struct ptp_clock_request req = { .type = PTP_CLK_REQ_PEROUT };
+	int cnt, enable, err = -EINVAL;
+
+	cnt = sscanf(buf, "%u %lld %u %lld %u", &req.perout.index,
+		     &req.perout.start.sec, &req.perout.start.nsec,
+		     &req.perout.period.sec, &req.perout.period.nsec);
+	if (cnt != 5)
+		goto out;
+	if (req.perout.index >= ops->n_per_out)
+		goto out;
+
+	enable = req.perout.period.sec || req.perout.period.nsec;
+	err = ops->enable(ops, &req, enable);
+	if (err)
+		goto out;
+
+	return count;
+out:
+	return err;
+}
+
+static ssize_t pps_enable_store(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t count)
+{
+	struct ptp_clock *ptp = dev_get_drvdata(dev);
+	struct ptp_clock_info *ops = ptp->info;
+	struct ptp_clock_request req = { .type = PTP_CLK_REQ_PPS };
+	int cnt, enable;
+	int err = -EINVAL;
+
+	if (!capable(CAP_SYS_TIME))
+		return -EPERM;
+
+	cnt = sscanf(buf, "%d", &enable);
+	if (cnt != 1)
+		goto out;
+
+	err = ops->enable(ops, &req, enable ? 1 : 0);
+	if (err)
+		goto out;
+
+	return count;
+out:
+	return err;
+}
+
+static DEVICE_ATTR(extts_enable, 0220, NULL, extts_enable_store);
+static DEVICE_ATTR(fifo,         0444, extts_fifo_show, NULL);
+static DEVICE_ATTR(period,       0220, NULL, period_store);
+static DEVICE_ATTR(pps_enable,   0220, NULL, pps_enable_store);
+
+int ptp_cleanup_sysfs(struct ptp_clock *ptp)
+{
+	struct device *dev = ptp->dev;
+	struct ptp_clock_info *info = ptp->info;
+
+	if (info->n_ext_ts) {
+		device_remove_file(dev, &dev_attr_extts_enable);
+		device_remove_file(dev, &dev_attr_fifo);
+	}
+	if (info->n_per_out)
+		device_remove_file(dev, &dev_attr_period);
+
+	if (info->pps)
+		device_remove_file(dev, &dev_attr_pps_enable);
+
+	return 0;
+}
+
+int ptp_populate_sysfs(struct ptp_clock *ptp)
+{
+	struct device *dev = ptp->dev;
+	struct ptp_clock_info *info = ptp->info;
+	int err;
+
+	if (info->n_ext_ts) {
+		err = device_create_file(dev, &dev_attr_extts_enable);
+		if (err)
+			goto out1;
+		err = device_create_file(dev, &dev_attr_fifo);
+		if (err)
+			goto out2;
+	}
+	if (info->n_per_out) {
+		err = device_create_file(dev, &dev_attr_period);
+		if (err)
+			goto out3;
+	}
+	if (info->pps) {
+		err = device_create_file(dev, &dev_attr_pps_enable);
+		if (err)
+			goto out4;
+	}
+	return 0;
+out4:
+	if (info->n_per_out)
+		device_remove_file(dev, &dev_attr_period);
+out3:
+	if (info->n_ext_ts)
+		device_remove_file(dev, &dev_attr_fifo);
+out2:
+	if (info->n_ext_ts)
+		device_remove_file(dev, &dev_attr_extts_enable);
+out1:
+	return err;
+}
diff --git a/drivers/scsi/qla4xxx/ql4_nx.c b/drivers/scsi/qla4xxx/ql4_nx.c
index 35381cb..03e522b 100644
--- a/drivers/scsi/qla4xxx/ql4_nx.c
+++ b/drivers/scsi/qla4xxx/ql4_nx.c
@@ -655,6 +655,27 @@
 	return 0;
 }
 
+#ifndef readq
+static inline __u64 readq(const volatile void __iomem *addr)
+{
+	const volatile u32 __iomem *p = addr;
+	u32 low, high;
+
+	low = readl(p);
+	high = readl(p + 1);
+
+	return low + ((u64)high << 32);
+}
+#endif
+
+#ifndef writeq
+static inline void writeq(__u64 val, volatile void __iomem *addr)
+{
+	writel(val, addr);
+	writel(val >> 32, addr+4);
+}
+#endif
+
 static int qla4_8xxx_pci_mem_read_direct(struct scsi_qla_host *ha,
 		u64 off, void *data, int size)
 {
diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c
index 95019c7..4778e27 100644
--- a/drivers/scsi/sr.c
+++ b/drivers/scsi/sr.c
@@ -636,7 +636,7 @@
 	disk->first_minor = minor;
 	sprintf(disk->disk_name, "sr%d", minor);
 	disk->fops = &sr_bdops;
-	disk->flags = GENHD_FL_CD;
+	disk->flags = GENHD_FL_CD | GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE;
 	disk->events = DISK_EVENT_MEDIA_CHANGE | DISK_EVENT_EJECT_REQUEST;
 
 	blk_queue_rq_timeout(sdev->request_queue, SR_TIMEOUT);
diff --git a/drivers/staging/zcache/zcache.c b/drivers/staging/zcache/zcache.c
index b8a2b30..77ac2d4 100644
--- a/drivers/staging/zcache/zcache.c
+++ b/drivers/staging/zcache/zcache.c
@@ -1181,9 +1181,12 @@
 /*
  * zcache shrinker interface (only useful for ephemeral pages, so zbud only)
  */
-static int shrink_zcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
+static int shrink_zcache_memory(struct shrinker *shrink,
+				struct shrink_control *sc)
 {
 	int ret = -1;
+	int nr = sc->nr_to_scan;
+	gfp_t gfp_mask = sc->gfp_mask;
 
 	if (nr >= 0) {
 		if (!(gfp_mask & __GFP_FS))
diff --git a/drivers/tty/serial/68328serial.c b/drivers/tty/serial/68328serial.c
index d5bfd41..e0a7754 100644
--- a/drivers/tty/serial/68328serial.c
+++ b/drivers/tty/serial/68328serial.c
@@ -281,7 +281,7 @@
 #ifdef CONFIG_MAGIC_SYSRQ
 			} else if (ch == 0x10) { /* ^P */
 				show_state();
-				show_free_areas();
+				show_free_areas(0);
 				show_buffers();
 /*				show_net_buffers(); */
 				return;
diff --git a/drivers/tty/serial/pch_uart.c b/drivers/tty/serial/pch_uart.c
index c63d0d1..f2cb750 100644
--- a/drivers/tty/serial/pch_uart.c
+++ b/drivers/tty/serial/pch_uart.c
@@ -15,6 +15,7 @@
  *Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307, USA.
  */
 #include <linux/serial_reg.h>
+#include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/pci.h>
 #include <linux/serial_core.h>
diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig
index f9916ca..549b9606 100644
--- a/drivers/video/Kconfig
+++ b/drivers/video/Kconfig
@@ -1460,6 +1460,14 @@
 	---help---
 	  Driver for graphics boards with S3 Trio / S3 Virge chip.
 
+config FB_S3_DDC
+	bool "DDC for S3 support"
+	depends on FB_S3
+	select FB_DDC
+	default y
+	help
+	  Say Y here if you want DDC support for your S3 graphics card.
+
 config FB_SAVAGE
 	tristate "S3 Savage support"
 	depends on FB && PCI && EXPERIMENTAL
@@ -1983,6 +1991,18 @@
 	---help---
 	  Driver for the on-chip SH-Mobile HDMI controller.
 
+config FB_SH_MOBILE_MERAM
+	tristate "SuperH Mobile MERAM read ahead support for LCDC"
+	depends on FB_SH_MOBILE_LCDC
+	default y
+	---help---
+	  Enable MERAM support for the SH-Mobile LCD controller.
+
+	  This will allow for caching of the framebuffer to provide more
+	  reliable access under heavy main memory bus traffic situations.
+	  Up to 4 memory channels can be configured, allowing 4 RGB or
+	  2 YCbCr framebuffers to be configured.
+
 config FB_TMIO
 	tristate "Toshiba Mobile IO FrameBuffer support"
 	depends on FB && MFD_CORE
@@ -2246,29 +2266,43 @@
 config FB_MB862XX
 	tristate "Fujitsu MB862xx GDC support"
 	depends on FB
+	depends on PCI || (OF && PPC)
 	select FB_CFB_FILLRECT
 	select FB_CFB_COPYAREA
 	select FB_CFB_IMAGEBLIT
 	---help---
 	  Frame buffer driver for Fujitsu Carmine/Coral-P(A)/Lime controllers.
 
+choice
+	prompt "GDC variant"
+	depends on FB_MB862XX
+
 config FB_MB862XX_PCI_GDC
 	bool "Carmine/Coral-P(A) GDC"
-	depends on PCI && FB_MB862XX
+	depends on PCI
 	---help---
 	  This enables framebuffer support for Fujitsu Carmine/Coral-P(A)
 	  PCI graphics controller devices.
 
 config FB_MB862XX_LIME
 	bool "Lime GDC"
-	depends on FB_MB862XX
-	depends on OF && !FB_MB862XX_PCI_GDC
-	depends on PPC
+	depends on OF && PPC
 	select FB_FOREIGN_ENDIAN
 	select FB_LITTLE_ENDIAN
 	---help---
 	  Framebuffer support for Fujitsu Lime GDC on host CPU bus.
 
+endchoice
+
+config FB_MB862XX_I2C
+	bool "Support I2C bus on MB862XX GDC"
+	depends on FB_MB862XX && I2C
+	default y
+	help
+	  Selecting this option adds Coral-P(A)/Lime GDC I2C bus adapter
+	  driver to support accessing I2C devices on controller's I2C bus.
+	  These are usually some video decoder chips.
+
 config FB_EP93XX
 	tristate "EP93XX frame buffer support"
 	depends on FB && ARCH_EP93XX
diff --git a/drivers/video/Makefile b/drivers/video/Makefile
index 2ea44b6..8b83129 100644
--- a/drivers/video/Makefile
+++ b/drivers/video/Makefile
@@ -130,6 +130,7 @@
 obj-$(CONFIG_FB_XILINX)           += xilinxfb.o
 obj-$(CONFIG_SH_MIPI_DSI)	  += sh_mipi_dsi.o
 obj-$(CONFIG_FB_SH_MOBILE_HDMI)	  += sh_mobile_hdmi.o
+obj-$(CONFIG_FB_SH_MOBILE_MERAM)  += sh_mobile_meram.o
 obj-$(CONFIG_FB_SH_MOBILE_LCDC)	  += sh_mobile_lcdcfb.o
 obj-$(CONFIG_FB_OMAP)             += omap/
 obj-y                             += omap2/
diff --git a/drivers/video/amifb.c b/drivers/video/amifb.c
index e5d6b56..5ea6596 100644
--- a/drivers/video/amifb.c
+++ b/drivers/video/amifb.c
@@ -2224,22 +2224,23 @@
 	 * Allocate, Clear and Align a Block of Chip Memory
 	 */
 
-static u_long unaligned_chipptr = 0;
+static void *aligned_chipptr;
 
 static inline u_long __init chipalloc(u_long size)
 {
-	size += PAGE_SIZE-1;
-	if (!(unaligned_chipptr = (u_long)amiga_chip_alloc(size,
-							   "amifb [RAM]")))
-		panic("No Chip RAM for frame buffer");
-	memset((void *)unaligned_chipptr, 0, size);
-	return PAGE_ALIGN(unaligned_chipptr);
+	aligned_chipptr = amiga_chip_alloc(size, "amifb [RAM]");
+	if (!aligned_chipptr) {
+		pr_err("amifb: No Chip RAM for frame buffer");
+		return 0;
+	}
+	memset(aligned_chipptr, 0, size);
+	return (u_long)aligned_chipptr;
 }
 
 static inline void chipfree(void)
 {
-	if (unaligned_chipptr)
-		amiga_chip_free((void *)unaligned_chipptr);
+	if (aligned_chipptr)
+		amiga_chip_free(aligned_chipptr);
 }
 
 
@@ -2295,7 +2296,7 @@
 			    defmode = amiga_vblank == 50 ? DEFMODE_PAL
 							 : DEFMODE_NTSC;
 			if (amiga_chip_avail()-CHIPRAM_SAFETY_LIMIT >
-			    VIDEOMEMSIZE_ECS_1M)
+			    VIDEOMEMSIZE_ECS_2M)
 				fb_info.fix.smem_len = VIDEOMEMSIZE_ECS_2M;
 			else
 				fb_info.fix.smem_len = VIDEOMEMSIZE_ECS_1M;
@@ -2312,7 +2313,7 @@
 			maxfmode = TAG_FMODE_4;
 			defmode = DEFMODE_AGA;
 			if (amiga_chip_avail()-CHIPRAM_SAFETY_LIMIT >
-			    VIDEOMEMSIZE_AGA_1M)
+			    VIDEOMEMSIZE_AGA_2M)
 				fb_info.fix.smem_len = VIDEOMEMSIZE_AGA_2M;
 			else
 				fb_info.fix.smem_len = VIDEOMEMSIZE_AGA_1M;
@@ -2385,6 +2386,10 @@
 	                    DUMMYSPRITEMEMSIZE+
 	                    COPINITSIZE+
 	                    4*COPLISTSIZE);
+	if (!chipptr) {
+		err = -ENOMEM;
+		goto amifb_error;
+	}
 
 	assignchunk(videomemory, u_long, chipptr, fb_info.fix.smem_len);
 	assignchunk(spritememory, u_long, chipptr, SPRITEMEMSIZE);
diff --git a/drivers/video/backlight/adp5520_bl.c b/drivers/video/backlight/adp5520_bl.c
index af31197..d1aee73 100644
--- a/drivers/video/backlight/adp5520_bl.c
+++ b/drivers/video/backlight/adp5520_bl.c
@@ -211,8 +211,12 @@
 			const char *buf, size_t count)
 {
 	struct adp5520_bl *data = dev_get_drvdata(dev);
+	int ret;
 
-	strict_strtoul(buf, 10, &data->cached_daylight_max);
+	ret = strict_strtoul(buf, 10, &data->cached_daylight_max);
+	if (ret < 0)
+		return ret;
+
 	return adp5520_store(dev, buf, count, ADP5520_DAYLIGHT_MAX);
 }
 static DEVICE_ATTR(daylight_max, 0664, adp5520_bl_daylight_max_show,
diff --git a/drivers/video/da8xx-fb.c b/drivers/video/da8xx-fb.c
index 8b7d473..fcdac87 100644
--- a/drivers/video/da8xx-fb.c
+++ b/drivers/video/da8xx-fb.c
@@ -899,7 +899,7 @@
 	.fb_blank = cfb_blank,
 };
 
-static int __init fb_probe(struct platform_device *device)
+static int __devinit fb_probe(struct platform_device *device)
 {
 	struct da8xx_lcdc_platform_data *fb_pdata =
 						device->dev.platform_data;
@@ -1165,7 +1165,7 @@
 
 static struct platform_driver da8xx_fb_driver = {
 	.probe = fb_probe,
-	.remove = fb_remove,
+	.remove = __devexit_p(fb_remove),
 	.suspend = fb_suspend,
 	.resume = fb_resume,
 	.driver = {
diff --git a/drivers/video/efifb.c b/drivers/video/efifb.c
index 4eb38db..fb20584 100644
--- a/drivers/video/efifb.c
+++ b/drivers/video/efifb.c
@@ -242,9 +242,9 @@
 		return 0;
 	}
 
-	printk(KERN_INFO "efifb: dmi detected %s - framebuffer at %p "
+	printk(KERN_INFO "efifb: dmi detected %s - framebuffer at 0x%08x "
 			 "(%dx%d, stride %d)\n", id->ident,
-			 (void *)screen_info.lfb_base, screen_info.lfb_width,
+			 screen_info.lfb_base, screen_info.lfb_width,
 			 screen_info.lfb_height, screen_info.lfb_linelength);
 
 
diff --git a/drivers/video/mb862xx/Makefile b/drivers/video/mb862xx/Makefile
index d777771..5707ed0 100644
--- a/drivers/video/mb862xx/Makefile
+++ b/drivers/video/mb862xx/Makefile
@@ -2,4 +2,7 @@
 # Makefile for the MB862xx framebuffer driver
 #
 
-obj-$(CONFIG_FB_MB862XX)	:= mb862xxfb.o mb862xxfb_accel.o
+obj-$(CONFIG_FB_MB862XX) += mb862xxfb.o
+
+mb862xxfb-y := mb862xxfbdrv.o mb862xxfb_accel.o
+mb862xxfb-$(CONFIG_FB_MB862XX_I2C) += mb862xx-i2c.o
diff --git a/drivers/video/mb862xx/mb862xx-i2c.c b/drivers/video/mb862xx/mb862xx-i2c.c
new file mode 100644
index 0000000..cb77d3b
--- /dev/null
+++ b/drivers/video/mb862xx/mb862xx-i2c.c
@@ -0,0 +1,177 @@
+/*
+ * Coral-P(A)/Lime I2C adapter driver
+ *
+ * (C) 2011 DENX Software Engineering, Anatolij Gustschin <agust@denx.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/fb.h>
+#include <linux/i2c.h>
+#include <linux/io.h>
+
+#include "mb862xxfb.h"
+#include "mb862xx_reg.h"
+
+static int mb862xx_i2c_wait_event(struct i2c_adapter *adap)
+{
+	struct mb862xxfb_par *par = adap->algo_data;
+	u32 reg;
+
+	do {
+		udelay(1);
+		reg = inreg(i2c, GC_I2C_BCR);
+		if (reg & (I2C_INT | I2C_BER))
+			break;
+	} while (1);
+
+	return (reg & I2C_BER) ? 0 : 1;
+}
+
+static int mb862xx_i2c_do_address(struct i2c_adapter *adap, int addr)
+{
+	struct mb862xxfb_par *par = adap->algo_data;
+
+	outreg(i2c, GC_I2C_DAR, addr);
+	outreg(i2c, GC_I2C_CCR, I2C_CLOCK_AND_ENABLE);
+	outreg(i2c, GC_I2C_BCR, par->i2c_rs ? I2C_REPEATED_START : I2C_START);
+	if (!mb862xx_i2c_wait_event(adap))
+		return -EIO;
+	par->i2c_rs = !(inreg(i2c, GC_I2C_BSR) & I2C_LRB);
+	return par->i2c_rs;
+}
+
+static int mb862xx_i2c_write_byte(struct i2c_adapter *adap, u8 byte)
+{
+	struct mb862xxfb_par *par = adap->algo_data;
+
+	outreg(i2c, GC_I2C_DAR, byte);
+	outreg(i2c, GC_I2C_BCR, I2C_START);
+	if (!mb862xx_i2c_wait_event(adap))
+		return -EIO;
+	return !(inreg(i2c, GC_I2C_BSR) & I2C_LRB);
+}
+
+static int mb862xx_i2c_read_byte(struct i2c_adapter *adap, u8 *byte, int last)
+{
+	struct mb862xxfb_par *par = adap->algo_data;
+
+	outreg(i2c, GC_I2C_BCR, I2C_START | (last ? 0 : I2C_ACK));
+	if (!mb862xx_i2c_wait_event(adap))
+		return 0;
+	*byte = inreg(i2c, GC_I2C_DAR);
+	return 1;
+}
+
+void mb862xx_i2c_stop(struct i2c_adapter *adap)
+{
+	struct mb862xxfb_par *par = adap->algo_data;
+
+	outreg(i2c, GC_I2C_BCR, I2C_STOP);
+	outreg(i2c, GC_I2C_CCR, I2C_DISABLE);
+	par->i2c_rs = 0;
+}
+
+static int mb862xx_i2c_read(struct i2c_adapter *adap, struct i2c_msg *m)
+{
+	int i, ret = 0;
+	int last = m->len - 1;
+
+	for (i = 0; i < m->len; i++) {
+		if (!mb862xx_i2c_read_byte(adap, &m->buf[i], i == last)) {
+			ret = -EIO;
+			break;
+		}
+	}
+	return ret;
+}
+
+static int mb862xx_i2c_write(struct i2c_adapter *adap, struct i2c_msg *m)
+{
+	int i, ret = 0;
+
+	for (i = 0; i < m->len; i++) {
+		if (!mb862xx_i2c_write_byte(adap, m->buf[i])) {
+			ret = -EIO;
+			break;
+		}
+	}
+	return ret;
+}
+
+static int mb862xx_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs,
+			int num)
+{
+	struct mb862xxfb_par *par = adap->algo_data;
+	struct i2c_msg *m;
+	int addr;
+	int i = 0, err = 0;
+
+	dev_dbg(par->dev, "%s: %d msgs\n", __func__, num);
+
+	for (i = 0; i < num; i++) {
+		m = &msgs[i];
+		if (!m->len) {
+			dev_dbg(par->dev, "%s: null msgs\n", __func__);
+			continue;
+		}
+		addr = m->addr;
+		if (m->flags & I2C_M_RD)
+			addr |= 1;
+
+		err = mb862xx_i2c_do_address(adap, addr);
+		if (err < 0)
+			break;
+		if (m->flags & I2C_M_RD)
+			err = mb862xx_i2c_read(adap, m);
+		else
+			err = mb862xx_i2c_write(adap, m);
+	}
+
+	if (i)
+		mb862xx_i2c_stop(adap);
+
+	return (err < 0) ? err : i;
+}
+
+static u32 mb862xx_func(struct i2c_adapter *adap)
+{
+	return I2C_FUNC_SMBUS_BYTE_DATA;
+}
+
+static const struct i2c_algorithm mb862xx_algo = {
+	.master_xfer	= mb862xx_xfer,
+	.functionality	= mb862xx_func,
+};
+
+static struct i2c_adapter mb862xx_i2c_adapter = {
+	.name		= "MB862xx I2C adapter",
+	.algo		= &mb862xx_algo,
+	.owner		= THIS_MODULE,
+};
+
+int mb862xx_i2c_init(struct mb862xxfb_par *par)
+{
+	int ret;
+
+	mb862xx_i2c_adapter.algo_data = par;
+	par->adap = &mb862xx_i2c_adapter;
+
+	ret = i2c_add_adapter(par->adap);
+	if (ret < 0) {
+		dev_err(par->dev, "failed to add %s\n",
+			mb862xx_i2c_adapter.name);
+	}
+	return ret;
+}
+
+void mb862xx_i2c_exit(struct mb862xxfb_par *par)
+{
+	if (par->adap) {
+		i2c_del_adapter(par->adap);
+		par->adap = NULL;
+	}
+}
diff --git a/drivers/video/mb862xx/mb862xx_reg.h b/drivers/video/mb862xx/mb862xx_reg.h
index 2ba65e1..9df48b8 100644
--- a/drivers/video/mb862xx/mb862xx_reg.h
+++ b/drivers/video/mb862xx/mb862xx_reg.h
@@ -5,11 +5,8 @@
 #ifndef _MB862XX_REG_H
 #define _MB862XX_REG_H
 
-#ifdef MB862XX_MMIO_BOTTOM
-#define MB862XX_MMIO_BASE	0x03fc0000
-#else
 #define MB862XX_MMIO_BASE	0x01fc0000
-#endif
+#define MB862XX_MMIO_HIGH_BASE	0x03fc0000
 #define MB862XX_I2C_BASE	0x0000c000
 #define MB862XX_DISP_BASE	0x00010000
 #define MB862XX_CAP_BASE	0x00018000
@@ -23,6 +20,7 @@
 #define GC_IMASK		0x00000024
 #define GC_SRST			0x0000002c
 #define GC_CCF			0x00000038
+#define GC_RSW			0x0000005c
 #define GC_CID			0x000000f0
 #define GC_REVISION		0x00000084
 
@@ -53,10 +51,16 @@
 #define GC_L0OA0		0x00000024
 #define GC_L0DA0		0x00000028
 #define GC_L0DY_L0DX		0x0000002c
+#define GC_L1M			0x00000030
+#define GC_L1DA			0x00000034
 #define GC_DCM1			0x00000100
 #define GC_L0EM			0x00000110
 #define GC_L0WY_L0WX		0x00000114
 #define GC_L0WH_L0WW		0x00000118
+#define GC_L1EM			0x00000120
+#define GC_L1WY_L1WX		0x00000124
+#define GC_L1WH_L1WW		0x00000128
+#define GC_DLS			0x00000180
 #define GC_DCM2			0x00000104
 #define GC_DCM3			0x00000108
 #define GC_CPM_CUTC		0x000000a0
@@ -68,6 +72,11 @@
 
 #define GC_CPM_CEN0		0x00100000
 #define GC_CPM_CEN1		0x00200000
+#define GC_DCM1_DEN		0x80000000
+#define GC_DCM1_L1E		0x00020000
+#define GC_L1M_16		0x80000000
+#define GC_L1M_YC		0x40000000
+#define GC_L1M_CS		0x20000000
 
 #define GC_DCM01_ESY		0x00000004
 #define GC_DCM01_SC		0x00003f00
@@ -79,9 +88,50 @@
 #define GC_L0M_L0C_16		0x80000000
 #define GC_L0EM_L0EC_24		0x40000000
 #define GC_L0M_L0W_UNIT		64
+#define GC_L1EM_DM		0x02000000
 
 #define GC_DISP_REFCLK_400	400
 
+/* I2C */
+#define GC_I2C_BSR		0x00000000	/* BSR */
+#define GC_I2C_BCR		0x00000004	/* BCR */
+#define GC_I2C_CCR		0x00000008	/* CCR */
+#define GC_I2C_ADR		0x0000000C	/* ADR */
+#define GC_I2C_DAR		0x00000010	/* DAR */
+
+#define I2C_DISABLE		0x00000000
+#define I2C_STOP		0x00000000
+#define I2C_START		0x00000010
+#define I2C_REPEATED_START	0x00000030
+#define I2C_CLOCK_AND_ENABLE	0x0000003f
+#define I2C_READY		0x01
+#define I2C_INT			0x01
+#define I2C_INTE		0x02
+#define I2C_ACK			0x08
+#define I2C_BER			0x80
+#define I2C_BEIE		0x40
+#define I2C_TRX			0x80
+#define I2C_LRB			0x10
+
+/* Capture registers and bits */
+#define GC_CAP_VCM		0x00000000
+#define GC_CAP_CSC		0x00000004
+#define GC_CAP_VCS		0x00000008
+#define GC_CAP_CBM		0x00000010
+#define GC_CAP_CBOA		0x00000014
+#define GC_CAP_CBLA		0x00000018
+#define GC_CAP_IMG_START	0x0000001C
+#define GC_CAP_IMG_END		0x00000020
+#define GC_CAP_CMSS		0x00000048
+#define GC_CAP_CMDS		0x0000004C
+
+#define GC_VCM_VIE		0x80000000
+#define GC_VCM_CM		0x03000000
+#define GC_VCM_VS_PAL		0x00000002
+#define GC_CBM_OO		0x80000000
+#define GC_CBM_HRV		0x00000010
+#define GC_CBM_CBST		0x00000001
+
 /* Carmine specific */
 #define MB86297_DRAW_BASE		0x00020000
 #define MB86297_DISP0_BASE		0x00100000
diff --git a/drivers/video/mb862xx/mb862xxfb.h b/drivers/video/mb862xx/mb862xxfb.h
index d7e7cb7..8550630 100644
--- a/drivers/video/mb862xx/mb862xxfb.h
+++ b/drivers/video/mb862xx/mb862xxfb.h
@@ -1,6 +1,26 @@
 #ifndef __MB862XX_H__
 #define __MB862XX_H__
 
+struct mb862xx_l1_cfg {
+	unsigned short sx;
+	unsigned short sy;
+	unsigned short sw;
+	unsigned short sh;
+	unsigned short dx;
+	unsigned short dy;
+	unsigned short dw;
+	unsigned short dh;
+	int mirror;
+};
+
+#define MB862XX_BASE		'M'
+#define MB862XX_L1_GET_CFG	_IOR(MB862XX_BASE, 0, struct mb862xx_l1_cfg*)
+#define MB862XX_L1_SET_CFG	_IOW(MB862XX_BASE, 1, struct mb862xx_l1_cfg*)
+#define MB862XX_L1_ENABLE	_IOW(MB862XX_BASE, 2, int)
+#define MB862XX_L1_CAP_CTL	_IOW(MB862XX_BASE, 3, int)
+
+#ifdef __KERNEL__
+
 #define PCI_VENDOR_ID_FUJITSU_LIMITED	0x10cf
 #define PCI_DEVICE_ID_FUJITSU_CORALP	0x2019
 #define PCI_DEVICE_ID_FUJITSU_CORALPA	0x201e
@@ -38,6 +58,8 @@
 	void __iomem		*mmio_base;	/* remapped registers */
 	size_t			mapped_vram;	/* length of remapped vram */
 	size_t			mmio_len;	/* length of register region */
+	unsigned long		cap_buf;	/* capture buffers offset */
+	size_t			cap_len;	/* length of capture buffers */
 
 	void __iomem		*host;		/* relocatable reg. bases */
 	void __iomem		*i2c;
@@ -57,11 +79,23 @@
 	unsigned int		refclk;		/* disp. reference clock */
 	struct mb862xx_gc_mode	*gc_mode;	/* GDC mode init data */
 	int			pre_init;	/* don't init display if 1 */
+	struct i2c_adapter	*adap;		/* GDC I2C bus adapter */
+	int			i2c_rs;
+
+	struct mb862xx_l1_cfg	l1_cfg;
+	int			l1_stride;
 
 	u32			pseudo_palette[16];
 };
 
 extern void mb862xxfb_init_accel(struct fb_info *info, int xres);
+#ifdef CONFIG_FB_MB862XX_I2C
+extern int mb862xx_i2c_init(struct mb862xxfb_par *par);
+extern void mb862xx_i2c_exit(struct mb862xxfb_par *par);
+#else
+static inline int mb862xx_i2c_init(struct mb862xxfb_par *par) { return 0; }
+static inline void mb862xx_i2c_exit(struct mb862xxfb_par *par) { }
+#endif
 
 #if defined(CONFIG_FB_MB862XX_LIME) && defined(CONFIG_FB_MB862XX_PCI_GDC)
 #error	"Select Lime GDC or CoralP/Carmine support, but not both together"
@@ -82,4 +116,6 @@
 
 #define pack(a, b)	(((a) << 16) | (b))
 
+#endif /* __KERNEL__ */
+
 #endif
diff --git a/drivers/video/mb862xx/mb862xxfb.c b/drivers/video/mb862xx/mb862xxfbdrv.c
similarity index 86%
rename from drivers/video/mb862xx/mb862xxfb.c
rename to drivers/video/mb862xx/mb862xxfbdrv.c
index c76e663..ea39336 100644
--- a/drivers/video/mb862xx/mb862xxfb.c
+++ b/drivers/video/mb862xx/mb862xxfbdrv.c
@@ -27,7 +27,7 @@
 
 #define NR_PALETTE		256
 #define MB862XX_MEM_SIZE	0x1000000
-#define CORALP_MEM_SIZE		0x4000000
+#define CORALP_MEM_SIZE		0x2000000
 #define CARMINE_MEM_SIZE	0x8000000
 #define DRV_NAME		"mb862xxfb"
 
@@ -309,6 +309,97 @@
 	return 0;
 }
 
+static int mb862xxfb_ioctl(struct fb_info *fbi, unsigned int cmd,
+			   unsigned long arg)
+{
+	struct mb862xxfb_par *par = fbi->par;
+	struct mb862xx_l1_cfg *l1_cfg = &par->l1_cfg;
+	void __user *argp = (void __user *)arg;
+	int *enable;
+	u32 l1em = 0;
+
+	switch (cmd) {
+	case MB862XX_L1_GET_CFG:
+		if (copy_to_user(argp, l1_cfg, sizeof(*l1_cfg)))
+			return -EFAULT;
+		break;
+	case MB862XX_L1_SET_CFG:
+		if (copy_from_user(l1_cfg, argp, sizeof(*l1_cfg)))
+			return -EFAULT;
+		if ((l1_cfg->sw >= l1_cfg->dw) && (l1_cfg->sh >= l1_cfg->dh)) {
+			/* downscaling */
+			outreg(cap, GC_CAP_CSC,
+				pack((l1_cfg->sh << 11) / l1_cfg->dh,
+				     (l1_cfg->sw << 11) / l1_cfg->dw));
+			l1em = inreg(disp, GC_L1EM);
+			l1em &= ~GC_L1EM_DM;
+		} else if ((l1_cfg->sw <= l1_cfg->dw) &&
+			   (l1_cfg->sh <= l1_cfg->dh)) {
+			/* upscaling */
+			outreg(cap, GC_CAP_CSC,
+				pack((l1_cfg->sh << 11) / l1_cfg->dh,
+				     (l1_cfg->sw << 11) / l1_cfg->dw));
+			outreg(cap, GC_CAP_CMSS,
+				pack(l1_cfg->sw >> 1, l1_cfg->sh));
+			outreg(cap, GC_CAP_CMDS,
+				pack(l1_cfg->dw >> 1, l1_cfg->dh));
+			l1em = inreg(disp, GC_L1EM);
+			l1em |= GC_L1EM_DM;
+		}
+
+		if (l1_cfg->mirror) {
+			outreg(cap, GC_CAP_CBM,
+				inreg(cap, GC_CAP_CBM) | GC_CBM_HRV);
+			l1em |= l1_cfg->dw * 2 - 8;
+		} else {
+			outreg(cap, GC_CAP_CBM,
+				inreg(cap, GC_CAP_CBM) & ~GC_CBM_HRV);
+			l1em &= 0xffff0000;
+		}
+		outreg(disp, GC_L1EM, l1em);
+		break;
+	case MB862XX_L1_ENABLE:
+		enable = (int *)arg;
+		if (*enable) {
+			outreg(disp, GC_L1DA, par->cap_buf);
+			outreg(cap, GC_CAP_IMG_START,
+				pack(l1_cfg->sy >> 1, l1_cfg->sx));
+			outreg(cap, GC_CAP_IMG_END,
+				pack(l1_cfg->sh, l1_cfg->sw));
+			outreg(disp, GC_L1M, GC_L1M_16 | GC_L1M_YC | GC_L1M_CS |
+					     (par->l1_stride << 16));
+			outreg(disp, GC_L1WY_L1WX,
+				pack(l1_cfg->dy, l1_cfg->dx));
+			outreg(disp, GC_L1WH_L1WW,
+				pack(l1_cfg->dh - 1, l1_cfg->dw));
+			outreg(disp, GC_DLS, 1);
+			outreg(cap, GC_CAP_VCM,
+				GC_VCM_VIE | GC_VCM_CM | GC_VCM_VS_PAL);
+			outreg(disp, GC_DCM1, inreg(disp, GC_DCM1) |
+					      GC_DCM1_DEN | GC_DCM1_L1E);
+		} else {
+			outreg(cap, GC_CAP_VCM,
+				inreg(cap, GC_CAP_VCM) & ~GC_VCM_VIE);
+			outreg(disp, GC_DCM1,
+				inreg(disp, GC_DCM1) & ~GC_DCM1_L1E);
+		}
+		break;
+	case MB862XX_L1_CAP_CTL:
+		enable = (int *)arg;
+		if (*enable) {
+			outreg(cap, GC_CAP_VCM,
+				inreg(cap, GC_CAP_VCM) | GC_VCM_VIE);
+		} else {
+			outreg(cap, GC_CAP_VCM,
+				inreg(cap, GC_CAP_VCM) & ~GC_VCM_VIE);
+		}
+		break;
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
 /* framebuffer ops */
 static struct fb_ops mb862xxfb_ops = {
 	.owner		= THIS_MODULE,
@@ -320,6 +411,7 @@
 	.fb_fillrect	= cfb_fillrect,
 	.fb_copyarea	= cfb_copyarea,
 	.fb_imageblit	= cfb_imageblit,
+	.fb_ioctl	= mb862xxfb_ioctl,
 };
 
 /* initialize fb_info data */
@@ -328,6 +420,7 @@
 	struct mb862xxfb_par *par = fbi->par;
 	struct mb862xx_gc_mode *mode = par->gc_mode;
 	unsigned long reg;
+	int stride;
 
 	fbi->fbops = &mb862xxfb_ops;
 	fbi->pseudo_palette = par->pseudo_palette;
@@ -336,7 +429,6 @@
 
 	strcpy(fbi->fix.id, DRV_NAME);
 	fbi->fix.smem_start = (unsigned long)par->fb_base_phys;
-	fbi->fix.smem_len = par->mapped_vram;
 	fbi->fix.mmio_start = (unsigned long)par->mmio_base_phys;
 	fbi->fix.mmio_len = par->mmio_len;
 	fbi->fix.accel = FB_ACCEL_NONE;
@@ -420,6 +512,28 @@
 			 FB_VISUAL_PSEUDOCOLOR : FB_VISUAL_TRUECOLOR;
 	fbi->fix.line_length = (fbi->var.xres_virtual *
 				fbi->var.bits_per_pixel) / 8;
+	fbi->fix.smem_len = fbi->fix.line_length * fbi->var.yres_virtual;
+
+	/*
+	 * reserve space for capture buffers and two cursors
+	 * at the end of vram: 720x576 * 2 * 2.2 + 64x64 * 16.
+	 */
+	par->cap_buf = par->mapped_vram - 0x1bd800 - 0x10000;
+	par->cap_len = 0x1bd800;
+	par->l1_cfg.sx = 0;
+	par->l1_cfg.sy = 0;
+	par->l1_cfg.sw = 720;
+	par->l1_cfg.sh = 576;
+	par->l1_cfg.dx = 0;
+	par->l1_cfg.dy = 0;
+	par->l1_cfg.dw = 720;
+	par->l1_cfg.dh = 576;
+	stride = par->l1_cfg.sw * (fbi->var.bits_per_pixel / 8);
+	par->l1_stride = stride / 64 + ((stride % 64) ? 1 : 0);
+	outreg(cap, GC_CAP_CBM, GC_CBM_OO | GC_CBM_CBST |
+				(par->l1_stride << 16));
+	outreg(cap, GC_CAP_CBOA, par->cap_buf);
+	outreg(cap, GC_CAP_CBLA, par->cap_buf + par->cap_len);
 	return 0;
 }
 
@@ -742,22 +856,38 @@
 
 	par->refclk = GC_DISP_REFCLK_400;
 
+	if (par->mapped_vram >= 0x2000000) {
+		/* relocate gdc registers space */
+		writel(1, par->fb_base + MB862XX_MMIO_BASE + GC_RSW);
+		udelay(1); /* wait at least 20 bus cycles */
+	}
+
 	ver = inreg(host, GC_CID);
 	cn = (ver & GC_CID_CNAME_MSK) >> 8;
 	ver = ver & GC_CID_VERSION_MSK;
 	if (cn == 3) {
+		unsigned long reg;
+
 		dev_info(par->dev, "Fujitsu Coral-%s GDC Rev.%d found\n",\
 			 (ver == 6) ? "P" : (ver == 8) ? "PA" : "?",
 			 par->pdev->revision);
-		outreg(host, GC_CCF, GC_CCF_CGE_166 | GC_CCF_COT_133);
-		udelay(200);
-		outreg(host, GC_MMR, GC_MMR_CORALP_EVB_VAL);
-		udelay(10);
+		reg = inreg(disp, GC_DCM1);
+		if (reg & GC_DCM01_DEN && reg & GC_DCM01_L0E)
+			par->pre_init = 1;
+
+		if (!par->pre_init) {
+			outreg(host, GC_CCF, GC_CCF_CGE_166 | GC_CCF_COT_133);
+			udelay(200);
+			outreg(host, GC_MMR, GC_MMR_CORALP_EVB_VAL);
+			udelay(10);
+		}
 		/* Clear interrupt status */
 		outreg(host, GC_IST, 0);
 	} else {
 		return -ENODEV;
 	}
+
+	mb862xx_i2c_init(par);
 	return 0;
 }
 
@@ -899,7 +1029,13 @@
 	case PCI_DEVICE_ID_FUJITSU_CORALPA:
 		par->fb_base_phys = pci_resource_start(par->pdev, 0);
 		par->mapped_vram = CORALP_MEM_SIZE;
-		par->mmio_base_phys = par->fb_base_phys + MB862XX_MMIO_BASE;
+		if (par->mapped_vram >= 0x2000000) {
+			par->mmio_base_phys = par->fb_base_phys +
+					      MB862XX_MMIO_HIGH_BASE;
+		} else {
+			par->mmio_base_phys = par->fb_base_phys +
+					      MB862XX_MMIO_BASE;
+		}
 		par->mmio_len = MB862XX_MMIO_SIZE;
 		par->type = BT_CORALP;
 		break;
@@ -1009,6 +1145,8 @@
 		outreg(host, GC_IMASK, 0);
 	}
 
+	mb862xx_i2c_exit(par);
+
 	device_remove_file(&pdev->dev, &dev_attr_dispregs);
 
 	pci_set_drvdata(pdev, NULL);
diff --git a/drivers/video/omap/dispc.c b/drivers/video/omap/dispc.c
index 5294834..0ccd7ad 100644
--- a/drivers/video/omap/dispc.c
+++ b/drivers/video/omap/dispc.c
@@ -922,14 +922,14 @@
 		return PTR_ERR(dispc.dss_ick);
 	}
 
-	dispc.dss1_fck = clk_get(&dispc.fbdev->dssdev->dev, "dss1_fck");
+	dispc.dss1_fck = clk_get(&dispc.fbdev->dssdev->dev, "fck");
 	if (IS_ERR(dispc.dss1_fck)) {
 		dev_err(dispc.fbdev->dev, "can't get dss1_fck\n");
 		clk_put(dispc.dss_ick);
 		return PTR_ERR(dispc.dss1_fck);
 	}
 
-	dispc.dss_54m_fck = clk_get(&dispc.fbdev->dssdev->dev, "tv_fck");
+	dispc.dss_54m_fck = clk_get(&dispc.fbdev->dssdev->dev, "tv_clk");
 	if (IS_ERR(dispc.dss_54m_fck)) {
 		dev_err(dispc.fbdev->dev, "can't get tv_fck\n");
 		clk_put(dispc.dss_ick);
diff --git a/drivers/video/omap/omapfb_main.c b/drivers/video/omap/omapfb_main.c
index e264efd..b3ddd74 100644
--- a/drivers/video/omap/omapfb_main.c
+++ b/drivers/video/omap/omapfb_main.c
@@ -90,7 +90,7 @@
 
 /* dummy device for clocks */
 static struct platform_device omapdss_device = {
-	.name		= "omapdss",
+	.name		= "omapdss_dss",
 	.id		= -1,
 	.dev            = {
 		.release = omapdss_release,
diff --git a/drivers/video/omap/rfbi.c b/drivers/video/omap/rfbi.c
index eada9f1..0c6981f 100644
--- a/drivers/video/omap/rfbi.c
+++ b/drivers/video/omap/rfbi.c
@@ -90,7 +90,7 @@
 		return PTR_ERR(rfbi.dss_ick);
 	}
 
-	rfbi.dss1_fck = clk_get(&rfbi.fbdev->dssdev->dev, "dss1_fck");
+	rfbi.dss1_fck = clk_get(&rfbi.fbdev->dssdev->dev, "fck");
 	if (IS_ERR(rfbi.dss1_fck)) {
 		dev_err(rfbi.fbdev->dev, "can't get dss1_fck\n");
 		clk_put(rfbi.dss_ick);
diff --git a/drivers/video/omap2/Makefile b/drivers/video/omap2/Makefile
index d853d05..5ddef12 100644
--- a/drivers/video/omap2/Makefile
+++ b/drivers/video/omap2/Makefile
@@ -1,6 +1,6 @@
 obj-$(CONFIG_OMAP2_VRAM) += vram.o
 obj-$(CONFIG_OMAP2_VRFB) += vrfb.o
 
-obj-y += dss/
-obj-y += omapfb/
+obj-$(CONFIG_OMAP2_DSS) += dss/
+obj-$(CONFIG_FB_OMAP2) += omapfb/
 obj-y += displays/
diff --git a/drivers/video/omap2/displays/Kconfig b/drivers/video/omap2/displays/Kconfig
index d18ad6b..609a280 100644
--- a/drivers/video/omap2/displays/Kconfig
+++ b/drivers/video/omap2/displays/Kconfig
@@ -3,6 +3,7 @@
 
 config PANEL_GENERIC_DPI
         tristate "Generic DPI Panel"
+	depends on OMAP2_DSS_DPI
         help
 	  Generic DPI panel driver.
 	  Supports DVI output for Beagle and OMAP3 SDP.
@@ -11,20 +12,20 @@
 
 config PANEL_LGPHILIPS_LB035Q02
 	tristate "LG.Philips LB035Q02 LCD Panel"
-	depends on OMAP2_DSS && SPI
+	depends on OMAP2_DSS_DPI && SPI
 	help
 	  LCD Panel used on the Gumstix Overo Palo35
 
 config PANEL_SHARP_LS037V7DW01
         tristate "Sharp LS037V7DW01 LCD Panel"
-        depends on OMAP2_DSS
+        depends on OMAP2_DSS_DPI
         select BACKLIGHT_CLASS_DEVICE
         help
           LCD Panel used in TI's SDP3430 and EVM boards
 
 config PANEL_NEC_NL8048HL11_01B
 	tristate "NEC NL8048HL11-01B Panel"
-	depends on OMAP2_DSS
+	depends on OMAP2_DSS_DPI
 	help
 		This NEC NL8048HL11-01B panel is TFT LCD
 		used in the Zoom2/3/3630 sdp boards.
@@ -37,7 +38,7 @@
 
 config PANEL_TPO_TD043MTEA1
         tristate "TPO TD043MTEA1 LCD Panel"
-        depends on OMAP2_DSS && SPI
+        depends on OMAP2_DSS_DPI && SPI
         help
           LCD Panel used in OMAP3 Pandora
 
diff --git a/drivers/video/omap2/displays/panel-acx565akm.c b/drivers/video/omap2/displays/panel-acx565akm.c
index 7e04c92..dbd59b8 100644
--- a/drivers/video/omap2/displays/panel-acx565akm.c
+++ b/drivers/video/omap2/displays/panel-acx565akm.c
@@ -30,7 +30,7 @@
 #include <linux/backlight.h>
 #include <linux/fb.h>
 
-#include <plat/display.h>
+#include <video/omapdss.h>
 
 #define MIPID_CMD_READ_DISP_ID		0x04
 #define MIPID_CMD_READ_RED		0x06
diff --git a/drivers/video/omap2/displays/panel-generic-dpi.c b/drivers/video/omap2/displays/panel-generic-dpi.c
index 4a9b9ff..9c90f75 100644
--- a/drivers/video/omap2/displays/panel-generic-dpi.c
+++ b/drivers/video/omap2/displays/panel-generic-dpi.c
@@ -33,8 +33,9 @@
 #include <linux/module.h>
 #include <linux/delay.h>
 #include <linux/slab.h>
+#include <video/omapdss.h>
 
-#include <plat/panel-generic-dpi.h>
+#include <video/omap-panel-generic-dpi.h>
 
 struct panel_config {
 	struct omap_video_timings timings;
@@ -181,6 +182,56 @@
 		.power_off_delay	= 0,
 		.name			= "samsung_lte430wq_f0c",
 	},
+
+	/* Seiko 70WVW1TZ3Z3 */
+	{
+		{
+			.x_res		= 800,
+			.y_res		= 480,
+
+			.pixel_clock	= 33000,
+
+			.hsw		= 128,
+			.hfp		= 10,
+			.hbp		= 10,
+
+			.vsw		= 2,
+			.vfp		= 4,
+			.vbp		= 11,
+		},
+		.acbi			= 0x0,
+		.acb			= 0x0,
+		.config			= OMAP_DSS_LCD_TFT | OMAP_DSS_LCD_IVS |
+						OMAP_DSS_LCD_IHS,
+		.power_on_delay		= 0,
+		.power_off_delay	= 0,
+		.name			= "seiko_70wvw1tz3",
+	},
+
+	/* Powertip PH480272T */
+	{
+		{
+			.x_res		= 480,
+			.y_res		= 272,
+
+			.pixel_clock	= 9000,
+
+			.hsw		= 40,
+			.hfp		= 2,
+			.hbp		= 2,
+
+			.vsw		= 10,
+			.vfp		= 2,
+			.vbp		= 2,
+		},
+		.acbi			= 0x0,
+		.acb			= 0x0,
+		.config			= OMAP_DSS_LCD_TFT | OMAP_DSS_LCD_IVS |
+					  OMAP_DSS_LCD_IHS | OMAP_DSS_LCD_IEO,
+		.power_on_delay		= 0,
+		.power_off_delay	= 0,
+		.name			= "powertip_ph480272t",
+	},
 };
 
 struct panel_drv_data {
@@ -285,7 +336,7 @@
 	return 0;
 }
 
-static void generic_dpi_panel_remove(struct omap_dss_device *dssdev)
+static void __exit generic_dpi_panel_remove(struct omap_dss_device *dssdev)
 {
 	struct panel_drv_data *drv_data = dev_get_drvdata(&dssdev->dev);
 
@@ -358,7 +409,7 @@
 
 static struct omap_dss_driver dpi_driver = {
 	.probe		= generic_dpi_panel_probe,
-	.remove		= generic_dpi_panel_remove,
+	.remove		= __exit_p(generic_dpi_panel_remove),
 
 	.enable		= generic_dpi_panel_enable,
 	.disable	= generic_dpi_panel_disable,
diff --git a/drivers/video/omap2/displays/panel-lgphilips-lb035q02.c b/drivers/video/omap2/displays/panel-lgphilips-lb035q02.c
index 271324d..e0eb35b 100644
--- a/drivers/video/omap2/displays/panel-lgphilips-lb035q02.c
+++ b/drivers/video/omap2/displays/panel-lgphilips-lb035q02.c
@@ -21,7 +21,7 @@
 #include <linux/spi/spi.h>
 #include <linux/mutex.h>
 
-#include <plat/display.h>
+#include <video/omapdss.h>
 
 struct lb035q02_data {
 	struct mutex lock;
diff --git a/drivers/video/omap2/displays/panel-nec-nl8048hl11-01b.c b/drivers/video/omap2/displays/panel-nec-nl8048hl11-01b.c
index 925e0fa..2ba9d0c 100644
--- a/drivers/video/omap2/displays/panel-nec-nl8048hl11-01b.c
+++ b/drivers/video/omap2/displays/panel-nec-nl8048hl11-01b.c
@@ -22,7 +22,7 @@
 #include <linux/backlight.h>
 #include <linux/fb.h>
 
-#include <plat/display.h>
+#include <video/omapdss.h>
 
 #define LCD_XRES		800
 #define LCD_YRES		480
diff --git a/drivers/video/omap2/displays/panel-sharp-ls037v7dw01.c b/drivers/video/omap2/displays/panel-sharp-ls037v7dw01.c
index d2b35d2..ba38b3a 100644
--- a/drivers/video/omap2/displays/panel-sharp-ls037v7dw01.c
+++ b/drivers/video/omap2/displays/panel-sharp-ls037v7dw01.c
@@ -25,7 +25,7 @@
 #include <linux/err.h>
 #include <linux/slab.h>
 
-#include <plat/display.h>
+#include <video/omapdss.h>
 
 struct sharp_data {
 	struct backlight_device *bl;
@@ -120,7 +120,7 @@
 	return 0;
 }
 
-static void sharp_ls_panel_remove(struct omap_dss_device *dssdev)
+static void __exit sharp_ls_panel_remove(struct omap_dss_device *dssdev)
 {
 	struct sharp_data *sd = dev_get_drvdata(&dssdev->dev);
 	struct backlight_device *bl = sd->bl;
@@ -205,7 +205,7 @@
 
 static struct omap_dss_driver sharp_ls_driver = {
 	.probe		= sharp_ls_panel_probe,
-	.remove		= sharp_ls_panel_remove,
+	.remove		= __exit_p(sharp_ls_panel_remove),
 
 	.enable		= sharp_ls_panel_enable,
 	.disable	= sharp_ls_panel_disable,
diff --git a/drivers/video/omap2/displays/panel-taal.c b/drivers/video/omap2/displays/panel-taal.c
index adc9900..fdd5d4ae 100644
--- a/drivers/video/omap2/displays/panel-taal.c
+++ b/drivers/video/omap2/displays/panel-taal.c
@@ -33,8 +33,8 @@
 #include <linux/regulator/consumer.h>
 #include <linux/mutex.h>
 
-#include <plat/display.h>
-#include <plat/nokia-dsi-panel.h>
+#include <video/omapdss.h>
+#include <video/omap-panel-nokia-dsi.h>
 
 /* DSI Virtual channel. Hardcoded for now. */
 #define TCH 0
@@ -63,12 +63,12 @@
 #define DCS_GET_ID2		0xdb
 #define DCS_GET_ID3		0xdc
 
-#define TAAL_ESD_CHECK_PERIOD	msecs_to_jiffies(5000)
-
 static irqreturn_t taal_te_isr(int irq, void *data);
 static void taal_te_timeout_work_callback(struct work_struct *work);
 static int _taal_enable_te(struct omap_dss_device *dssdev, bool enable);
 
+static int taal_panel_reset(struct omap_dss_device *dssdev);
+
 struct panel_regulator {
 	struct regulator *regulator;
 	const char *name;
@@ -229,8 +229,14 @@
 
 	bool intro_printed;
 
-	struct workqueue_struct *esd_wq;
+	struct workqueue_struct *workqueue;
+
 	struct delayed_work esd_work;
+	unsigned esd_interval;
+
+	bool ulps_enabled;
+	unsigned ulps_timeout;
+	struct delayed_work ulps_work;
 
 	struct panel_config *panel_config;
 };
@@ -242,6 +248,7 @@
 }
 
 static void taal_esd_work(struct work_struct *work);
+static void taal_ulps_work(struct work_struct *work);
 
 static void hw_guard_start(struct taal_data *td, int guard_msec)
 {
@@ -264,7 +271,7 @@
 	int r;
 	u8 buf[1];
 
-	r = dsi_vc_dcs_read(td->channel, dcs_cmd, buf, 1);
+	r = dsi_vc_dcs_read(td->dssdev, td->channel, dcs_cmd, buf, 1);
 
 	if (r < 0)
 		return r;
@@ -276,7 +283,7 @@
 
 static int taal_dcs_write_0(struct taal_data *td, u8 dcs_cmd)
 {
-	return dsi_vc_dcs_write(td->channel, &dcs_cmd, 1);
+	return dsi_vc_dcs_write(td->dssdev, td->channel, &dcs_cmd, 1);
 }
 
 static int taal_dcs_write_1(struct taal_data *td, u8 dcs_cmd, u8 param)
@@ -284,7 +291,7 @@
 	u8 buf[2];
 	buf[0] = dcs_cmd;
 	buf[1] = param;
-	return dsi_vc_dcs_write(td->channel, buf, 2);
+	return dsi_vc_dcs_write(td->dssdev, td->channel, buf, 2);
 }
 
 static int taal_sleep_in(struct taal_data *td)
@@ -296,7 +303,7 @@
 	hw_guard_wait(td);
 
 	cmd = DCS_SLEEP_IN;
-	r = dsi_vc_dcs_write_nosync(td->channel, &cmd, 1);
+	r = dsi_vc_dcs_write_nosync(td->dssdev, td->channel, &cmd, 1);
 	if (r)
 		return r;
 
@@ -402,7 +409,7 @@
 	buf[3] = (x2 >> 8) & 0xff;
 	buf[4] = (x2 >> 0) & 0xff;
 
-	r = dsi_vc_dcs_write_nosync(td->channel, buf, sizeof(buf));
+	r = dsi_vc_dcs_write_nosync(td->dssdev, td->channel, buf, sizeof(buf));
 	if (r)
 		return r;
 
@@ -412,15 +419,132 @@
 	buf[3] = (y2 >> 8) & 0xff;
 	buf[4] = (y2 >> 0) & 0xff;
 
-	r = dsi_vc_dcs_write_nosync(td->channel, buf, sizeof(buf));
+	r = dsi_vc_dcs_write_nosync(td->dssdev, td->channel, buf, sizeof(buf));
 	if (r)
 		return r;
 
-	dsi_vc_send_bta_sync(td->channel);
+	dsi_vc_send_bta_sync(td->dssdev, td->channel);
 
 	return r;
 }
 
+static void taal_queue_esd_work(struct omap_dss_device *dssdev)
+{
+	struct taal_data *td = dev_get_drvdata(&dssdev->dev);
+
+	if (td->esd_interval > 0)
+		queue_delayed_work(td->workqueue, &td->esd_work,
+				msecs_to_jiffies(td->esd_interval));
+}
+
+static void taal_cancel_esd_work(struct omap_dss_device *dssdev)
+{
+	struct taal_data *td = dev_get_drvdata(&dssdev->dev);
+
+	cancel_delayed_work(&td->esd_work);
+}
+
+static void taal_queue_ulps_work(struct omap_dss_device *dssdev)
+{
+	struct taal_data *td = dev_get_drvdata(&dssdev->dev);
+
+	if (td->ulps_timeout > 0)
+		queue_delayed_work(td->workqueue, &td->ulps_work,
+				msecs_to_jiffies(td->ulps_timeout));
+}
+
+static void taal_cancel_ulps_work(struct omap_dss_device *dssdev)
+{
+	struct taal_data *td = dev_get_drvdata(&dssdev->dev);
+
+	cancel_delayed_work(&td->ulps_work);
+}
+
+static int taal_enter_ulps(struct omap_dss_device *dssdev)
+{
+	struct taal_data *td = dev_get_drvdata(&dssdev->dev);
+	struct nokia_dsi_panel_data *panel_data = get_panel_data(dssdev);
+	int r;
+
+	if (td->ulps_enabled)
+		return 0;
+
+	taal_cancel_ulps_work(dssdev);
+
+	r = _taal_enable_te(dssdev, false);
+	if (r)
+		goto err;
+
+	disable_irq(gpio_to_irq(panel_data->ext_te_gpio));
+
+	omapdss_dsi_display_disable(dssdev, false, true);
+
+	td->ulps_enabled = true;
+
+	return 0;
+
+err:
+	dev_err(&dssdev->dev, "enter ULPS failed");
+	taal_panel_reset(dssdev);
+
+	td->ulps_enabled = false;
+
+	taal_queue_ulps_work(dssdev);
+
+	return r;
+}
+
+static int taal_exit_ulps(struct omap_dss_device *dssdev)
+{
+	struct taal_data *td = dev_get_drvdata(&dssdev->dev);
+	struct nokia_dsi_panel_data *panel_data = get_panel_data(dssdev);
+	int r;
+
+	if (!td->ulps_enabled)
+		return 0;
+
+	r = omapdss_dsi_display_enable(dssdev);
+	if (r)
+		goto err;
+
+	omapdss_dsi_vc_enable_hs(dssdev, td->channel, true);
+
+	r = _taal_enable_te(dssdev, true);
+	if (r)
+		goto err;
+
+	enable_irq(gpio_to_irq(panel_data->ext_te_gpio));
+
+	taal_queue_ulps_work(dssdev);
+
+	td->ulps_enabled = false;
+
+	return 0;
+
+err:
+	dev_err(&dssdev->dev, "exit ULPS failed");
+	r = taal_panel_reset(dssdev);
+
+	enable_irq(gpio_to_irq(panel_data->ext_te_gpio));
+	td->ulps_enabled = false;
+
+	taal_queue_ulps_work(dssdev);
+
+	return r;
+}
+
+static int taal_wake_up(struct omap_dss_device *dssdev)
+{
+	struct taal_data *td = dev_get_drvdata(&dssdev->dev);
+
+	if (td->ulps_enabled)
+		return taal_exit_ulps(dssdev);
+
+	taal_cancel_ulps_work(dssdev);
+	taal_queue_ulps_work(dssdev);
+	return 0;
+}
+
 static int taal_bl_update_status(struct backlight_device *dev)
 {
 	struct omap_dss_device *dssdev = dev_get_drvdata(&dev->dev);
@@ -441,9 +565,13 @@
 
 	if (td->use_dsi_bl) {
 		if (td->enabled) {
-			dsi_bus_lock();
-			r = taal_dcs_write_1(td, DCS_BRIGHTNESS, level);
-			dsi_bus_unlock();
+			dsi_bus_lock(dssdev);
+
+			r = taal_wake_up(dssdev);
+			if (!r)
+				r = taal_dcs_write_1(td, DCS_BRIGHTNESS, level);
+
+			dsi_bus_unlock(dssdev);
 		} else {
 			r = 0;
 		}
@@ -504,9 +632,13 @@
 	mutex_lock(&td->lock);
 
 	if (td->enabled) {
-		dsi_bus_lock();
-		r = taal_dcs_read_1(td, DCS_READ_NUM_ERRORS, &errors);
-		dsi_bus_unlock();
+		dsi_bus_lock(dssdev);
+
+		r = taal_wake_up(dssdev);
+		if (!r)
+			r = taal_dcs_read_1(td, DCS_READ_NUM_ERRORS, &errors);
+
+		dsi_bus_unlock(dssdev);
 	} else {
 		r = -ENODEV;
 	}
@@ -530,9 +662,13 @@
 	mutex_lock(&td->lock);
 
 	if (td->enabled) {
-		dsi_bus_lock();
-		r = taal_get_id(td, &id1, &id2, &id3);
-		dsi_bus_unlock();
+		dsi_bus_lock(dssdev);
+
+		r = taal_wake_up(dssdev);
+		if (!r)
+			r = taal_get_id(td, &id1, &id2, &id3);
+
+		dsi_bus_unlock(dssdev);
 	} else {
 		r = -ENODEV;
 	}
@@ -579,6 +715,7 @@
 	struct omap_dss_device *dssdev = to_dss_device(dev);
 	struct taal_data *td = dev_get_drvdata(&dssdev->dev);
 	int i;
+	int r;
 
 	for (i = 0; i < ARRAY_SIZE(cabc_modes); i++) {
 		if (sysfs_streq(cabc_modes[i], buf))
@@ -591,10 +728,19 @@
 	mutex_lock(&td->lock);
 
 	if (td->enabled) {
-		dsi_bus_lock();
-		if (!td->cabc_broken)
-			taal_dcs_write_1(td, DCS_WRITE_CABC, i);
-		dsi_bus_unlock();
+		dsi_bus_lock(dssdev);
+
+		if (!td->cabc_broken) {
+			r = taal_wake_up(dssdev);
+			if (r)
+				goto err;
+
+			r = taal_dcs_write_1(td, DCS_WRITE_CABC, i);
+			if (r)
+				goto err;
+		}
+
+		dsi_bus_unlock(dssdev);
 	}
 
 	td->cabc_mode = i;
@@ -602,6 +748,10 @@
 	mutex_unlock(&td->lock);
 
 	return count;
+err:
+	dsi_bus_unlock(dssdev);
+	mutex_unlock(&td->lock);
+	return r;
 }
 
 static ssize_t show_cabc_available_modes(struct device *dev,
@@ -620,18 +770,161 @@
 	return len < PAGE_SIZE ? len : PAGE_SIZE - 1;
 }
 
+static ssize_t taal_store_esd_interval(struct device *dev,
+		struct device_attribute *attr,
+		const char *buf, size_t count)
+{
+	struct omap_dss_device *dssdev = to_dss_device(dev);
+	struct taal_data *td = dev_get_drvdata(&dssdev->dev);
+
+	unsigned long t;
+	int r;
+
+	r = strict_strtoul(buf, 10, &t);
+	if (r)
+		return r;
+
+	mutex_lock(&td->lock);
+	taal_cancel_esd_work(dssdev);
+	td->esd_interval = t;
+	if (td->enabled)
+		taal_queue_esd_work(dssdev);
+	mutex_unlock(&td->lock);
+
+	return count;
+}
+
+static ssize_t taal_show_esd_interval(struct device *dev,
+		struct device_attribute *attr,
+		char *buf)
+{
+	struct omap_dss_device *dssdev = to_dss_device(dev);
+	struct taal_data *td = dev_get_drvdata(&dssdev->dev);
+	unsigned t;
+
+	mutex_lock(&td->lock);
+	t = td->esd_interval;
+	mutex_unlock(&td->lock);
+
+	return snprintf(buf, PAGE_SIZE, "%u\n", t);
+}
+
+static ssize_t taal_store_ulps(struct device *dev,
+		struct device_attribute *attr,
+		const char *buf, size_t count)
+{
+	struct omap_dss_device *dssdev = to_dss_device(dev);
+	struct taal_data *td = dev_get_drvdata(&dssdev->dev);
+	unsigned long t;
+	int r;
+
+	r = strict_strtoul(buf, 10, &t);
+	if (r)
+		return r;
+
+	mutex_lock(&td->lock);
+
+	if (td->enabled) {
+		dsi_bus_lock(dssdev);
+
+		if (t)
+			r = taal_enter_ulps(dssdev);
+		else
+			r = taal_wake_up(dssdev);
+
+		dsi_bus_unlock(dssdev);
+	}
+
+	mutex_unlock(&td->lock);
+
+	if (r)
+		return r;
+
+	return count;
+}
+
+static ssize_t taal_show_ulps(struct device *dev,
+		struct device_attribute *attr,
+		char *buf)
+{
+	struct omap_dss_device *dssdev = to_dss_device(dev);
+	struct taal_data *td = dev_get_drvdata(&dssdev->dev);
+	unsigned t;
+
+	mutex_lock(&td->lock);
+	t = td->ulps_enabled;
+	mutex_unlock(&td->lock);
+
+	return snprintf(buf, PAGE_SIZE, "%u\n", t);
+}
+
+static ssize_t taal_store_ulps_timeout(struct device *dev,
+		struct device_attribute *attr,
+		const char *buf, size_t count)
+{
+	struct omap_dss_device *dssdev = to_dss_device(dev);
+	struct taal_data *td = dev_get_drvdata(&dssdev->dev);
+	unsigned long t;
+	int r;
+
+	r = strict_strtoul(buf, 10, &t);
+	if (r)
+		return r;
+
+	mutex_lock(&td->lock);
+	td->ulps_timeout = t;
+
+	if (td->enabled) {
+		/* taal_wake_up will restart the timer */
+		dsi_bus_lock(dssdev);
+		r = taal_wake_up(dssdev);
+		dsi_bus_unlock(dssdev);
+	}
+
+	mutex_unlock(&td->lock);
+
+	if (r)
+		return r;
+
+	return count;
+}
+
+static ssize_t taal_show_ulps_timeout(struct device *dev,
+		struct device_attribute *attr,
+		char *buf)
+{
+	struct omap_dss_device *dssdev = to_dss_device(dev);
+	struct taal_data *td = dev_get_drvdata(&dssdev->dev);
+	unsigned t;
+
+	mutex_lock(&td->lock);
+	t = td->ulps_timeout;
+	mutex_unlock(&td->lock);
+
+	return snprintf(buf, PAGE_SIZE, "%u\n", t);
+}
+
 static DEVICE_ATTR(num_dsi_errors, S_IRUGO, taal_num_errors_show, NULL);
 static DEVICE_ATTR(hw_revision, S_IRUGO, taal_hw_revision_show, NULL);
 static DEVICE_ATTR(cabc_mode, S_IRUGO | S_IWUSR,
 		show_cabc_mode, store_cabc_mode);
 static DEVICE_ATTR(cabc_available_modes, S_IRUGO,
 		show_cabc_available_modes, NULL);
+static DEVICE_ATTR(esd_interval, S_IRUGO | S_IWUSR,
+		taal_show_esd_interval, taal_store_esd_interval);
+static DEVICE_ATTR(ulps, S_IRUGO | S_IWUSR,
+		taal_show_ulps, taal_store_ulps);
+static DEVICE_ATTR(ulps_timeout, S_IRUGO | S_IWUSR,
+		taal_show_ulps_timeout, taal_store_ulps_timeout);
 
 static struct attribute *taal_attrs[] = {
 	&dev_attr_num_dsi_errors.attr,
 	&dev_attr_hw_revision.attr,
 	&dev_attr_cabc_mode.attr,
 	&dev_attr_cabc_available_modes.attr,
+	&dev_attr_esd_interval.attr,
+	&dev_attr_ulps.attr,
+	&dev_attr_ulps_timeout.attr,
 	NULL,
 };
 
@@ -700,6 +993,9 @@
 	}
 	td->dssdev = dssdev;
 	td->panel_config = panel_config;
+	td->esd_interval = panel_data->esd_interval;
+	td->ulps_enabled = false;
+	td->ulps_timeout = panel_data->ulps_timeout;
 
 	mutex_init(&td->lock);
 
@@ -710,13 +1006,14 @@
 	if (r)
 		goto err_reg;
 
-	td->esd_wq = create_singlethread_workqueue("taal_esd");
-	if (td->esd_wq == NULL) {
+	td->workqueue = create_singlethread_workqueue("taal_esd");
+	if (td->workqueue == NULL) {
 		dev_err(&dssdev->dev, "can't create ESD workqueue\n");
 		r = -ENOMEM;
 		goto err_wq;
 	}
 	INIT_DELAYED_WORK_DEFERRABLE(&td->esd_work, taal_esd_work);
+	INIT_DELAYED_WORK(&td->ulps_work, taal_ulps_work);
 
 	dev_set_drvdata(&dssdev->dev, td);
 
@@ -734,8 +1031,8 @@
 		props.max_brightness = 127;
 
 	props.type = BACKLIGHT_RAW;
-	bldev = backlight_device_register("taal", &dssdev->dev, dssdev,
-					  &taal_bl_ops, &props);
+	bldev = backlight_device_register(dev_name(&dssdev->dev), &dssdev->dev,
+					dssdev, &taal_bl_ops, &props);
 	if (IS_ERR(bldev)) {
 		r = PTR_ERR(bldev);
 		goto err_bl;
@@ -810,7 +1107,7 @@
 err_gpio:
 	backlight_device_unregister(bldev);
 err_bl:
-	destroy_workqueue(td->esd_wq);
+	destroy_workqueue(td->workqueue);
 err_wq:
 	free_regulators(panel_config->regulators, panel_config->num_regulators);
 err_reg:
@@ -819,7 +1116,7 @@
 	return r;
 }
 
-static void taal_remove(struct omap_dss_device *dssdev)
+static void __exit taal_remove(struct omap_dss_device *dssdev)
 {
 	struct taal_data *td = dev_get_drvdata(&dssdev->dev);
 	struct nokia_dsi_panel_data *panel_data = get_panel_data(dssdev);
@@ -841,8 +1138,9 @@
 	taal_bl_update_status(bldev);
 	backlight_device_unregister(bldev);
 
-	cancel_delayed_work(&td->esd_work);
-	destroy_workqueue(td->esd_wq);
+	taal_cancel_ulps_work(dssdev);
+	taal_cancel_esd_work(dssdev);
+	destroy_workqueue(td->workqueue);
 
 	/* reset, to be sure that the panel is in a valid state */
 	taal_hw_reset(dssdev);
@@ -867,7 +1165,7 @@
 
 	taal_hw_reset(dssdev);
 
-	omapdss_dsi_vc_enable_hs(td->channel, false);
+	omapdss_dsi_vc_enable_hs(dssdev, td->channel, false);
 
 	r = taal_sleep_out(td);
 	if (r)
@@ -924,7 +1222,7 @@
 		td->intro_printed = true;
 	}
 
-	omapdss_dsi_vc_enable_hs(td->channel, true);
+	omapdss_dsi_vc_enable_hs(dssdev, td->channel, true);
 
 	return 0;
 err:
@@ -932,7 +1230,7 @@
 
 	taal_hw_reset(dssdev);
 
-	omapdss_dsi_display_disable(dssdev);
+	omapdss_dsi_display_disable(dssdev, true, false);
 err0:
 	return r;
 }
@@ -955,15 +1253,23 @@
 		taal_hw_reset(dssdev);
 	}
 
-	omapdss_dsi_display_disable(dssdev);
+	omapdss_dsi_display_disable(dssdev, true, false);
 
 	td->enabled = 0;
 }
 
+static int taal_panel_reset(struct omap_dss_device *dssdev)
+{
+	dev_err(&dssdev->dev, "performing LCD reset\n");
+
+	taal_power_off(dssdev);
+	taal_hw_reset(dssdev);
+	return taal_power_on(dssdev);
+}
+
 static int taal_enable(struct omap_dss_device *dssdev)
 {
 	struct taal_data *td = dev_get_drvdata(&dssdev->dev);
-	struct nokia_dsi_panel_data *panel_data = get_panel_data(dssdev);
 	int r;
 
 	dev_dbg(&dssdev->dev, "enable\n");
@@ -975,18 +1281,16 @@
 		goto err;
 	}
 
-	dsi_bus_lock();
+	dsi_bus_lock(dssdev);
 
 	r = taal_power_on(dssdev);
 
-	dsi_bus_unlock();
+	dsi_bus_unlock(dssdev);
 
 	if (r)
 		goto err;
 
-	if (panel_data->use_esd_check)
-		queue_delayed_work(td->esd_wq, &td->esd_work,
-				TAAL_ESD_CHECK_PERIOD);
+	taal_queue_esd_work(dssdev);
 
 	dssdev->state = OMAP_DSS_DISPLAY_ACTIVE;
 
@@ -1007,14 +1311,17 @@
 
 	mutex_lock(&td->lock);
 
-	cancel_delayed_work(&td->esd_work);
+	taal_cancel_ulps_work(dssdev);
+	taal_cancel_esd_work(dssdev);
 
-	dsi_bus_lock();
+	dsi_bus_lock(dssdev);
 
-	if (dssdev->state == OMAP_DSS_DISPLAY_ACTIVE)
+	if (dssdev->state == OMAP_DSS_DISPLAY_ACTIVE) {
+		taal_wake_up(dssdev);
 		taal_power_off(dssdev);
+	}
 
-	dsi_bus_unlock();
+	dsi_bus_unlock(dssdev);
 
 	dssdev->state = OMAP_DSS_DISPLAY_DISABLED;
 
@@ -1035,13 +1342,16 @@
 		goto err;
 	}
 
-	cancel_delayed_work(&td->esd_work);
+	taal_cancel_ulps_work(dssdev);
+	taal_cancel_esd_work(dssdev);
 
-	dsi_bus_lock();
+	dsi_bus_lock(dssdev);
 
-	taal_power_off(dssdev);
+	r = taal_wake_up(dssdev);
+	if (!r)
+		taal_power_off(dssdev);
 
-	dsi_bus_unlock();
+	dsi_bus_unlock(dssdev);
 
 	dssdev->state = OMAP_DSS_DISPLAY_SUSPENDED;
 
@@ -1056,7 +1366,6 @@
 static int taal_resume(struct omap_dss_device *dssdev)
 {
 	struct taal_data *td = dev_get_drvdata(&dssdev->dev);
-	struct nokia_dsi_panel_data *panel_data = get_panel_data(dssdev);
 	int r;
 
 	dev_dbg(&dssdev->dev, "resume\n");
@@ -1068,19 +1377,17 @@
 		goto err;
 	}
 
-	dsi_bus_lock();
+	dsi_bus_lock(dssdev);
 
 	r = taal_power_on(dssdev);
 
-	dsi_bus_unlock();
+	dsi_bus_unlock(dssdev);
 
 	if (r) {
 		dssdev->state = OMAP_DSS_DISPLAY_DISABLED;
 	} else {
 		dssdev->state = OMAP_DSS_DISPLAY_ACTIVE;
-		if (panel_data->use_esd_check)
-			queue_delayed_work(td->esd_wq, &td->esd_work,
-					TAAL_ESD_CHECK_PERIOD);
+		taal_queue_esd_work(dssdev);
 	}
 
 	mutex_unlock(&td->lock);
@@ -1095,7 +1402,7 @@
 {
 	struct omap_dss_device *dssdev = data;
 	dev_dbg(&dssdev->dev, "framedone, err %d\n", err);
-	dsi_bus_unlock();
+	dsi_bus_unlock(dssdev);
 }
 
 static irqreturn_t taal_te_isr(int irq, void *data)
@@ -1123,7 +1430,7 @@
 	return IRQ_HANDLED;
 err:
 	dev_err(&dssdev->dev, "start update failed\n");
-	dsi_bus_unlock();
+	dsi_bus_unlock(dssdev);
 	return IRQ_HANDLED;
 }
 
@@ -1136,7 +1443,7 @@
 	dev_err(&dssdev->dev, "TE not received for 250ms!\n");
 
 	atomic_set(&td->do_update, 0);
-	dsi_bus_unlock();
+	dsi_bus_unlock(dssdev);
 }
 
 static int taal_update(struct omap_dss_device *dssdev,
@@ -1149,7 +1456,11 @@
 	dev_dbg(&dssdev->dev, "update %d, %d, %d x %d\n", x, y, w, h);
 
 	mutex_lock(&td->lock);
-	dsi_bus_lock();
+	dsi_bus_lock(dssdev);
+
+	r = taal_wake_up(dssdev);
+	if (r)
+		goto err;
 
 	if (!td->enabled) {
 		r = 0;
@@ -1184,7 +1495,7 @@
 	mutex_unlock(&td->lock);
 	return 0;
 err:
-	dsi_bus_unlock();
+	dsi_bus_unlock(dssdev);
 	mutex_unlock(&td->lock);
 	return r;
 }
@@ -1196,8 +1507,8 @@
 	dev_dbg(&dssdev->dev, "sync\n");
 
 	mutex_lock(&td->lock);
-	dsi_bus_lock();
-	dsi_bus_unlock();
+	dsi_bus_lock(dssdev);
+	dsi_bus_unlock(dssdev);
 	mutex_unlock(&td->lock);
 
 	dev_dbg(&dssdev->dev, "sync done\n");
@@ -1235,9 +1546,13 @@
 	if (td->te_enabled == enable)
 		goto end;
 
-	dsi_bus_lock();
+	dsi_bus_lock(dssdev);
 
 	if (td->enabled) {
+		r = taal_wake_up(dssdev);
+		if (r)
+			goto err;
+
 		r = _taal_enable_te(dssdev, enable);
 		if (r)
 			goto err;
@@ -1245,13 +1560,13 @@
 
 	td->te_enabled = enable;
 
-	dsi_bus_unlock();
+	dsi_bus_unlock(dssdev);
 end:
 	mutex_unlock(&td->lock);
 
 	return 0;
 err:
-	dsi_bus_unlock();
+	dsi_bus_unlock(dssdev);
 	mutex_unlock(&td->lock);
 
 	return r;
@@ -1281,9 +1596,13 @@
 	if (td->rotate == rotate)
 		goto end;
 
-	dsi_bus_lock();
+	dsi_bus_lock(dssdev);
 
 	if (td->enabled) {
+		r = taal_wake_up(dssdev);
+		if (r)
+			goto err;
+
 		r = taal_set_addr_mode(td, rotate, td->mirror);
 		if (r)
 			goto err;
@@ -1291,12 +1610,12 @@
 
 	td->rotate = rotate;
 
-	dsi_bus_unlock();
+	dsi_bus_unlock(dssdev);
 end:
 	mutex_unlock(&td->lock);
 	return 0;
 err:
-	dsi_bus_unlock();
+	dsi_bus_unlock(dssdev);
 	mutex_unlock(&td->lock);
 	return r;
 }
@@ -1325,8 +1644,12 @@
 	if (td->mirror == enable)
 		goto end;
 
-	dsi_bus_lock();
+	dsi_bus_lock(dssdev);
 	if (td->enabled) {
+		r = taal_wake_up(dssdev);
+		if (r)
+			goto err;
+
 		r = taal_set_addr_mode(td, td->rotate, enable);
 		if (r)
 			goto err;
@@ -1334,12 +1657,12 @@
 
 	td->mirror = enable;
 
-	dsi_bus_unlock();
+	dsi_bus_unlock(dssdev);
 end:
 	mutex_unlock(&td->lock);
 	return 0;
 err:
-	dsi_bus_unlock();
+	dsi_bus_unlock(dssdev);
 	mutex_unlock(&td->lock);
 	return r;
 }
@@ -1369,7 +1692,11 @@
 		goto err1;
 	}
 
-	dsi_bus_lock();
+	dsi_bus_lock(dssdev);
+
+	r = taal_wake_up(dssdev);
+	if (r)
+		goto err2;
 
 	r = taal_dcs_read_1(td, DCS_GET_ID1, &id1);
 	if (r)
@@ -1381,11 +1708,11 @@
 	if (r)
 		goto err2;
 
-	dsi_bus_unlock();
+	dsi_bus_unlock(dssdev);
 	mutex_unlock(&td->lock);
 	return 0;
 err2:
-	dsi_bus_unlock();
+	dsi_bus_unlock(dssdev);
 err1:
 	mutex_unlock(&td->lock);
 	return r;
@@ -1415,7 +1742,11 @@
 			dssdev->panel.timings.x_res *
 			dssdev->panel.timings.y_res * 3);
 
-	dsi_bus_lock();
+	dsi_bus_lock(dssdev);
+
+	r = taal_wake_up(dssdev);
+	if (r)
+		goto err2;
 
 	/* plen 1 or 2 goes into short packet. until checksum error is fixed,
 	 * use short packets. plen 32 works, but bigger packets seem to cause
@@ -1427,7 +1758,7 @@
 
 	taal_set_update_window(td, x, y, w, h);
 
-	r = dsi_vc_set_max_rx_packet_size(td->channel, plen);
+	r = dsi_vc_set_max_rx_packet_size(dssdev, td->channel, plen);
 	if (r)
 		goto err2;
 
@@ -1435,7 +1766,7 @@
 		u8 dcs_cmd = first ? 0x2e : 0x3e;
 		first = 0;
 
-		r = dsi_vc_dcs_read(td->channel, dcs_cmd,
+		r = dsi_vc_dcs_read(dssdev, td->channel, dcs_cmd,
 				buf + buf_used, size - buf_used);
 
 		if (r < 0) {
@@ -1461,14 +1792,35 @@
 	r = buf_used;
 
 err3:
-	dsi_vc_set_max_rx_packet_size(td->channel, 1);
+	dsi_vc_set_max_rx_packet_size(dssdev, td->channel, 1);
 err2:
-	dsi_bus_unlock();
+	dsi_bus_unlock(dssdev);
 err1:
 	mutex_unlock(&td->lock);
 	return r;
 }
 
+static void taal_ulps_work(struct work_struct *work)
+{
+	struct taal_data *td = container_of(work, struct taal_data,
+			ulps_work.work);
+	struct omap_dss_device *dssdev = td->dssdev;
+
+	mutex_lock(&td->lock);
+
+	if (dssdev->state != OMAP_DSS_DISPLAY_ACTIVE || !td->enabled) {
+		mutex_unlock(&td->lock);
+		return;
+	}
+
+	dsi_bus_lock(dssdev);
+
+	taal_enter_ulps(dssdev);
+
+	dsi_bus_unlock(dssdev);
+	mutex_unlock(&td->lock);
+}
+
 static void taal_esd_work(struct work_struct *work)
 {
 	struct taal_data *td = container_of(work, struct taal_data,
@@ -1485,7 +1837,13 @@
 		return;
 	}
 
-	dsi_bus_lock();
+	dsi_bus_lock(dssdev);
+
+	r = taal_wake_up(dssdev);
+	if (r) {
+		dev_err(&dssdev->dev, "failed to exit ULPS\n");
+		goto err;
+	}
 
 	r = taal_dcs_read_1(td, DCS_RDDSDR, &state1);
 	if (r) {
@@ -1521,22 +1879,20 @@
 			goto err;
 	}
 
-	dsi_bus_unlock();
+	dsi_bus_unlock(dssdev);
 
-	queue_delayed_work(td->esd_wq, &td->esd_work, TAAL_ESD_CHECK_PERIOD);
+	taal_queue_esd_work(dssdev);
 
 	mutex_unlock(&td->lock);
 	return;
 err:
 	dev_err(&dssdev->dev, "performing LCD reset\n");
 
-	taal_power_off(dssdev);
-	taal_hw_reset(dssdev);
-	taal_power_on(dssdev);
+	taal_panel_reset(dssdev);
 
-	dsi_bus_unlock();
+	dsi_bus_unlock(dssdev);
 
-	queue_delayed_work(td->esd_wq, &td->esd_work, TAAL_ESD_CHECK_PERIOD);
+	taal_queue_esd_work(dssdev);
 
 	mutex_unlock(&td->lock);
 }
@@ -1557,7 +1913,7 @@
 
 static struct omap_dss_driver taal_driver = {
 	.probe		= taal_probe,
-	.remove		= taal_remove,
+	.remove		= __exit_p(taal_remove),
 
 	.enable		= taal_enable,
 	.disable	= taal_disable,
diff --git a/drivers/video/omap2/displays/panel-tpo-td043mtea1.c b/drivers/video/omap2/displays/panel-tpo-td043mtea1.c
index dbe9d43..2462b9e 100644
--- a/drivers/video/omap2/displays/panel-tpo-td043mtea1.c
+++ b/drivers/video/omap2/displays/panel-tpo-td043mtea1.c
@@ -17,7 +17,7 @@
 #include <linux/err.h>
 #include <linux/slab.h>
 
-#include <plat/display.h>
+#include <video/omapdss.h>
 
 #define TPO_R02_MODE(x)		((x) & 7)
 #define TPO_R02_MODE_800x480	7
@@ -144,13 +144,15 @@
 	struct device_attribute *attr, const char *buf, size_t count)
 {
 	struct tpo_td043_device *tpo_td043 = dev_get_drvdata(dev);
-	long val;
+	int val;
 	int ret;
 
-	ret = strict_strtol(buf, 0, &val);
+	ret = kstrtoint(buf, 0, &val);
 	if (ret < 0)
 		return ret;
 
+	val = !!val;
+
 	ret = tpo_td043_write_mirror(tpo_td043->spi, tpo_td043->hmirror, val);
 	if (ret < 0)
 		return ret;
@@ -175,7 +177,7 @@
 	long val;
 	int ret;
 
-	ret = strict_strtol(buf, 0, &val);
+	ret = kstrtol(buf, 0, &val);
 	if (ret != 0 || val & ~7)
 		return -EINVAL;
 
diff --git a/drivers/video/omap2/dss/Kconfig b/drivers/video/omap2/dss/Kconfig
index bfc5da0..6b3e2da 100644
--- a/drivers/video/omap2/dss/Kconfig
+++ b/drivers/video/omap2/dss/Kconfig
@@ -80,7 +80,7 @@
 
 config OMAP2_DSS_DSI
 	bool "DSI support"
-	depends on ARCH_OMAP3
+	depends on ARCH_OMAP3 || ARCH_OMAP4
         default n
 	help
 	  MIPI DSI (Display Serial Interface) support.
@@ -90,14 +90,6 @@
 
 	  See http://www.mipi.org/ for DSI spesifications.
 
-config OMAP2_DSS_USE_DSI_PLL
-	bool "Use DSI PLL for PCLK (EXPERIMENTAL)"
-	default n
-	depends on OMAP2_DSS_DSI
-	help
-	  Use DSI PLL to generate pixel clock.  Currently only for DPI output.
-	  DSI PLL can be used to generate higher and more precise pixel clocks.
-
 config OMAP2_DSS_FAKE_VSYNC
 	bool "Fake VSYNC irq from manual update displays"
 	default n
@@ -125,4 +117,27 @@
 	  Max FCK is 173MHz, so this doesn't work if your PCK
 	  is very high.
 
+config OMAP2_DSS_SLEEP_BEFORE_RESET
+	bool "Sleep 50ms before DSS reset"
+	default y
+	help
+	  For some unknown reason we may get SYNC_LOST errors from the display
+	  subsystem at initialization time if we don't sleep before resetting
+	  the DSS. See the source (dss.c) for more comments.
+
+	  However, 50ms is quite long time to sleep, and with some
+	  configurations the SYNC_LOST may never happen, so the sleep can
+	  be disabled here.
+
+config OMAP2_DSS_SLEEP_AFTER_VENC_RESET
+	bool "Sleep 20ms after VENC reset"
+	default y
+	help
+	  There is a 20ms sleep after VENC reset which seemed to fix the
+	  reset. The reason for the bug is unclear, and it's also unclear
+	  on what platforms this happens.
+
+	  This option enables the sleep, and is enabled by default. You can
+	  disable the sleep if it doesn't cause problems on your platform.
+
 endif
diff --git a/drivers/video/omap2/dss/core.c b/drivers/video/omap2/dss/core.c
index 1aa2ed1..3da4267 100644
--- a/drivers/video/omap2/dss/core.c
+++ b/drivers/video/omap2/dss/core.c
@@ -33,7 +33,7 @@
 #include <linux/device.h>
 #include <linux/regulator/consumer.h>
 
-#include <plat/display.h>
+#include <video/omapdss.h>
 
 #include "dss.h"
 #include "dss_features.h"
@@ -54,6 +54,9 @@
 module_param_named(debug, dss_debug, bool, 0644);
 #endif
 
+static int omap_dss_register_device(struct omap_dss_device *);
+static void omap_dss_unregister_device(struct omap_dss_device *);
+
 /* REGULATORS */
 
 struct regulator *dss_get_vdds_dsi(void)
@@ -124,8 +127,7 @@
 #endif
 
 #if defined(CONFIG_OMAP2_DSS_DSI) && defined(CONFIG_OMAP2_DSS_COLLECT_IRQ_STATS)
-	debugfs_create_file("dsi_irq", S_IRUGO, dss_debugfs_dir,
-			&dsi_dump_irqs, &dss_debug_fops);
+	dsi_create_debugfs_files_irq(dss_debugfs_dir, &dss_debug_fops);
 #endif
 
 	debugfs_create_file("dss", S_IRUGO, dss_debugfs_dir,
@@ -137,8 +139,7 @@
 			&rfbi_dump_regs, &dss_debug_fops);
 #endif
 #ifdef CONFIG_OMAP2_DSS_DSI
-	debugfs_create_file("dsi", S_IRUGO, dss_debugfs_dir,
-			&dsi_dump_regs, &dss_debug_fops);
+	dsi_create_debugfs_files_reg(dss_debugfs_dir, &dss_debug_fops);
 #endif
 #ifdef CONFIG_OMAP2_DSS_VENC
 	debugfs_create_file("venc", S_IRUGO, dss_debugfs_dir,
@@ -480,7 +481,7 @@
 	reset_device(dev, 0);
 }
 
-int omap_dss_register_device(struct omap_dss_device *dssdev)
+static int omap_dss_register_device(struct omap_dss_device *dssdev)
 {
 	static int dev_num;
 
@@ -494,7 +495,7 @@
 	return device_register(&dssdev->dev);
 }
 
-void omap_dss_unregister_device(struct omap_dss_device *dssdev)
+static void omap_dss_unregister_device(struct omap_dss_device *dssdev)
 {
 	device_unregister(&dssdev->dev);
 }
diff --git a/drivers/video/omap2/dss/dispc.c b/drivers/video/omap2/dss/dispc.c
index 7804779..7a9a2e7 100644
--- a/drivers/video/omap2/dss/dispc.c
+++ b/drivers/video/omap2/dss/dispc.c
@@ -37,99 +37,15 @@
 #include <plat/sram.h>
 #include <plat/clock.h>
 
-#include <plat/display.h>
+#include <video/omapdss.h>
 
 #include "dss.h"
 #include "dss_features.h"
+#include "dispc.h"
 
 /* DISPC */
 #define DISPC_SZ_REGS			SZ_4K
 
-struct dispc_reg { u16 idx; };
-
-#define DISPC_REG(idx)			((const struct dispc_reg) { idx })
-
-/*
- * DISPC common registers and
- * DISPC channel registers , ch = 0 for LCD, ch = 1 for
- * DIGIT, and ch = 2 for LCD2
- */
-#define DISPC_REVISION			DISPC_REG(0x0000)
-#define DISPC_SYSCONFIG			DISPC_REG(0x0010)
-#define DISPC_SYSSTATUS			DISPC_REG(0x0014)
-#define DISPC_IRQSTATUS			DISPC_REG(0x0018)
-#define DISPC_IRQENABLE			DISPC_REG(0x001C)
-#define DISPC_CONTROL			DISPC_REG(0x0040)
-#define DISPC_CONTROL2			DISPC_REG(0x0238)
-#define DISPC_CONFIG			DISPC_REG(0x0044)
-#define DISPC_CONFIG2			DISPC_REG(0x0620)
-#define DISPC_CAPABLE			DISPC_REG(0x0048)
-#define DISPC_DEFAULT_COLOR(ch)		DISPC_REG(ch == 0 ? 0x004C : \
-					(ch == 1 ? 0x0050 : 0x03AC))
-#define DISPC_TRANS_COLOR(ch)		DISPC_REG(ch == 0 ? 0x0054 : \
-					(ch == 1 ? 0x0058 : 0x03B0))
-#define DISPC_LINE_STATUS		DISPC_REG(0x005C)
-#define DISPC_LINE_NUMBER		DISPC_REG(0x0060)
-#define DISPC_TIMING_H(ch)		DISPC_REG(ch != 2 ? 0x0064 : 0x0400)
-#define DISPC_TIMING_V(ch)		DISPC_REG(ch != 2 ? 0x0068 : 0x0404)
-#define DISPC_POL_FREQ(ch)		DISPC_REG(ch != 2 ? 0x006C : 0x0408)
-#define DISPC_DIVISORo(ch)		DISPC_REG(ch != 2 ? 0x0070 : 0x040C)
-#define DISPC_GLOBAL_ALPHA		DISPC_REG(0x0074)
-#define DISPC_SIZE_DIG			DISPC_REG(0x0078)
-#define DISPC_SIZE_LCD(ch)		DISPC_REG(ch != 2 ? 0x007C : 0x03CC)
-
-/* DISPC GFX plane */
-#define DISPC_GFX_BA0			DISPC_REG(0x0080)
-#define DISPC_GFX_BA1			DISPC_REG(0x0084)
-#define DISPC_GFX_POSITION		DISPC_REG(0x0088)
-#define DISPC_GFX_SIZE			DISPC_REG(0x008C)
-#define DISPC_GFX_ATTRIBUTES		DISPC_REG(0x00A0)
-#define DISPC_GFX_FIFO_THRESHOLD	DISPC_REG(0x00A4)
-#define DISPC_GFX_FIFO_SIZE_STATUS	DISPC_REG(0x00A8)
-#define DISPC_GFX_ROW_INC		DISPC_REG(0x00AC)
-#define DISPC_GFX_PIXEL_INC		DISPC_REG(0x00B0)
-#define DISPC_GFX_WINDOW_SKIP		DISPC_REG(0x00B4)
-#define DISPC_GFX_TABLE_BA		DISPC_REG(0x00B8)
-
-#define DISPC_DATA_CYCLE1(ch)		DISPC_REG(ch != 2 ? 0x01D4 : 0x03C0)
-#define DISPC_DATA_CYCLE2(ch)		DISPC_REG(ch != 2 ? 0x01D8 : 0x03C4)
-#define DISPC_DATA_CYCLE3(ch)		DISPC_REG(ch != 2 ? 0x01DC : 0x03C8)
-#define DISPC_CPR_COEF_R(ch)		DISPC_REG(ch != 2 ? 0x0220 : 0x03BC)
-#define DISPC_CPR_COEF_G(ch)		DISPC_REG(ch != 2 ? 0x0224 : 0x03B8)
-#define DISPC_CPR_COEF_B(ch)		DISPC_REG(ch != 2 ? 0x0228 : 0x03B4)
-
-#define DISPC_GFX_PRELOAD		DISPC_REG(0x022C)
-
-/* DISPC Video plane, n = 0 for VID1 and n = 1 for VID2 */
-#define DISPC_VID_REG(n, idx)		DISPC_REG(0x00BC + (n)*0x90 + idx)
-
-#define DISPC_VID_BA0(n)		DISPC_VID_REG(n, 0x0000)
-#define DISPC_VID_BA1(n)		DISPC_VID_REG(n, 0x0004)
-#define DISPC_VID_POSITION(n)		DISPC_VID_REG(n, 0x0008)
-#define DISPC_VID_SIZE(n)		DISPC_VID_REG(n, 0x000C)
-#define DISPC_VID_ATTRIBUTES(n)		DISPC_VID_REG(n, 0x0010)
-#define DISPC_VID_FIFO_THRESHOLD(n)	DISPC_VID_REG(n, 0x0014)
-#define DISPC_VID_FIFO_SIZE_STATUS(n)	DISPC_VID_REG(n, 0x0018)
-#define DISPC_VID_ROW_INC(n)		DISPC_VID_REG(n, 0x001C)
-#define DISPC_VID_PIXEL_INC(n)		DISPC_VID_REG(n, 0x0020)
-#define DISPC_VID_FIR(n)		DISPC_VID_REG(n, 0x0024)
-#define DISPC_VID_PICTURE_SIZE(n)	DISPC_VID_REG(n, 0x0028)
-#define DISPC_VID_ACCU0(n)		DISPC_VID_REG(n, 0x002C)
-#define DISPC_VID_ACCU1(n)		DISPC_VID_REG(n, 0x0030)
-
-/* coef index i = {0, 1, 2, 3, 4, 5, 6, 7} */
-#define DISPC_VID_FIR_COEF_H(n, i)	DISPC_REG(0x00F0 + (n)*0x90 + (i)*0x8)
-/* coef index i = {0, 1, 2, 3, 4, 5, 6, 7} */
-#define DISPC_VID_FIR_COEF_HV(n, i)	DISPC_REG(0x00F4 + (n)*0x90 + (i)*0x8)
-/* coef index i = {0, 1, 2, 3, 4} */
-#define DISPC_VID_CONV_COEF(n, i)	DISPC_REG(0x0130 + (n)*0x90 + (i)*0x4)
-/* coef index i = {0, 1, 2, 3, 4, 5, 6, 7} */
-#define DISPC_VID_FIR_COEF_V(n, i)	DISPC_REG(0x01E0 + (n)*0x20 + (i)*0x4)
-
-#define DISPC_VID_PRELOAD(n)		DISPC_REG(0x230 + (n)*0x04)
-
-#define DISPC_DIVISOR			DISPC_REG(0x0804)
-
 #define DISPC_IRQ_MASK_ERROR            (DISPC_IRQ_GFX_FIFO_UNDERFLOW | \
 					 DISPC_IRQ_OCP_ERR | \
 					 DISPC_IRQ_VID1_FIFO_UNDERFLOW | \
@@ -167,10 +83,6 @@
 #define REG_FLD_MOD(idx, val, start, end)				\
 	dispc_write_reg(idx, FLD_MOD(dispc_read_reg(idx), val, start, end))
 
-static const struct dispc_reg dispc_reg_att[] = { DISPC_GFX_ATTRIBUTES,
-	DISPC_VID_ATTRIBUTES(0),
-	DISPC_VID_ATTRIBUTES(1) };
-
 struct dispc_irq_stats {
 	unsigned long last_reset;
 	unsigned irq_count;
@@ -198,25 +110,38 @@
 #endif
 } dispc;
 
+enum omap_color_component {
+	/* used for all color formats for OMAP3 and earlier
+	 * and for RGB and Y color component on OMAP4
+	 */
+	DISPC_COLOR_COMPONENT_RGB_Y		= 1 << 0,
+	/* used for UV component for
+	 * OMAP_DSS_COLOR_YUV2, OMAP_DSS_COLOR_UYVY, OMAP_DSS_COLOR_NV12
+	 * color formats on OMAP4
+	 */
+	DISPC_COLOR_COMPONENT_UV		= 1 << 1,
+};
+
 static void _omap_dispc_set_irqs(void);
 
-static inline void dispc_write_reg(const struct dispc_reg idx, u32 val)
+static inline void dispc_write_reg(const u16 idx, u32 val)
 {
-	__raw_writel(val, dispc.base + idx.idx);
+	__raw_writel(val, dispc.base + idx);
 }
 
-static inline u32 dispc_read_reg(const struct dispc_reg idx)
+static inline u32 dispc_read_reg(const u16 idx)
 {
-	return __raw_readl(dispc.base + idx.idx);
+	return __raw_readl(dispc.base + idx);
 }
 
 #define SR(reg) \
-	dispc.ctx[(DISPC_##reg).idx / sizeof(u32)] = dispc_read_reg(DISPC_##reg)
+	dispc.ctx[DISPC_##reg / sizeof(u32)] = dispc_read_reg(DISPC_##reg)
 #define RR(reg) \
-	dispc_write_reg(DISPC_##reg, dispc.ctx[(DISPC_##reg).idx / sizeof(u32)])
+	dispc_write_reg(DISPC_##reg, dispc.ctx[DISPC_##reg / sizeof(u32)])
 
 void dispc_save_context(void)
 {
+	int i;
 	if (cpu_is_omap24xx())
 		return;
 
@@ -224,157 +149,153 @@
 	SR(IRQENABLE);
 	SR(CONTROL);
 	SR(CONFIG);
-	SR(DEFAULT_COLOR(0));
-	SR(DEFAULT_COLOR(1));
-	SR(TRANS_COLOR(0));
-	SR(TRANS_COLOR(1));
+	SR(DEFAULT_COLOR(OMAP_DSS_CHANNEL_LCD));
+	SR(DEFAULT_COLOR(OMAP_DSS_CHANNEL_DIGIT));
+	SR(TRANS_COLOR(OMAP_DSS_CHANNEL_LCD));
+	SR(TRANS_COLOR(OMAP_DSS_CHANNEL_DIGIT));
 	SR(LINE_NUMBER);
-	SR(TIMING_H(0));
-	SR(TIMING_V(0));
-	SR(POL_FREQ(0));
-	SR(DIVISORo(0));
+	SR(TIMING_H(OMAP_DSS_CHANNEL_LCD));
+	SR(TIMING_V(OMAP_DSS_CHANNEL_LCD));
+	SR(POL_FREQ(OMAP_DSS_CHANNEL_LCD));
+	SR(DIVISORo(OMAP_DSS_CHANNEL_LCD));
 	SR(GLOBAL_ALPHA);
-	SR(SIZE_DIG);
-	SR(SIZE_LCD(0));
+	SR(SIZE_MGR(OMAP_DSS_CHANNEL_DIGIT));
+	SR(SIZE_MGR(OMAP_DSS_CHANNEL_LCD));
 	if (dss_has_feature(FEAT_MGR_LCD2)) {
 		SR(CONTROL2);
-		SR(DEFAULT_COLOR(2));
-		SR(TRANS_COLOR(2));
-		SR(SIZE_LCD(2));
-		SR(TIMING_H(2));
-		SR(TIMING_V(2));
-		SR(POL_FREQ(2));
-		SR(DIVISORo(2));
+		SR(DEFAULT_COLOR(OMAP_DSS_CHANNEL_LCD2));
+		SR(TRANS_COLOR(OMAP_DSS_CHANNEL_LCD2));
+		SR(SIZE_MGR(OMAP_DSS_CHANNEL_LCD2));
+		SR(TIMING_H(OMAP_DSS_CHANNEL_LCD2));
+		SR(TIMING_V(OMAP_DSS_CHANNEL_LCD2));
+		SR(POL_FREQ(OMAP_DSS_CHANNEL_LCD2));
+		SR(DIVISORo(OMAP_DSS_CHANNEL_LCD2));
 		SR(CONFIG2);
 	}
 
-	SR(GFX_BA0);
-	SR(GFX_BA1);
-	SR(GFX_POSITION);
-	SR(GFX_SIZE);
-	SR(GFX_ATTRIBUTES);
-	SR(GFX_FIFO_THRESHOLD);
-	SR(GFX_ROW_INC);
-	SR(GFX_PIXEL_INC);
-	SR(GFX_WINDOW_SKIP);
-	SR(GFX_TABLE_BA);
+	SR(OVL_BA0(OMAP_DSS_GFX));
+	SR(OVL_BA1(OMAP_DSS_GFX));
+	SR(OVL_POSITION(OMAP_DSS_GFX));
+	SR(OVL_SIZE(OMAP_DSS_GFX));
+	SR(OVL_ATTRIBUTES(OMAP_DSS_GFX));
+	SR(OVL_FIFO_THRESHOLD(OMAP_DSS_GFX));
+	SR(OVL_ROW_INC(OMAP_DSS_GFX));
+	SR(OVL_PIXEL_INC(OMAP_DSS_GFX));
+	SR(OVL_WINDOW_SKIP(OMAP_DSS_GFX));
+	SR(OVL_TABLE_BA(OMAP_DSS_GFX));
 
-	SR(DATA_CYCLE1(0));
-	SR(DATA_CYCLE2(0));
-	SR(DATA_CYCLE3(0));
+	SR(DATA_CYCLE1(OMAP_DSS_CHANNEL_LCD));
+	SR(DATA_CYCLE2(OMAP_DSS_CHANNEL_LCD));
+	SR(DATA_CYCLE3(OMAP_DSS_CHANNEL_LCD));
 
-	SR(CPR_COEF_R(0));
-	SR(CPR_COEF_G(0));
-	SR(CPR_COEF_B(0));
+	SR(CPR_COEF_R(OMAP_DSS_CHANNEL_LCD));
+	SR(CPR_COEF_G(OMAP_DSS_CHANNEL_LCD));
+	SR(CPR_COEF_B(OMAP_DSS_CHANNEL_LCD));
 	if (dss_has_feature(FEAT_MGR_LCD2)) {
-		SR(CPR_COEF_B(2));
-		SR(CPR_COEF_G(2));
-		SR(CPR_COEF_R(2));
+		SR(CPR_COEF_B(OMAP_DSS_CHANNEL_LCD2));
+		SR(CPR_COEF_G(OMAP_DSS_CHANNEL_LCD2));
+		SR(CPR_COEF_R(OMAP_DSS_CHANNEL_LCD2));
 
-		SR(DATA_CYCLE1(2));
-		SR(DATA_CYCLE2(2));
-		SR(DATA_CYCLE3(2));
+		SR(DATA_CYCLE1(OMAP_DSS_CHANNEL_LCD2));
+		SR(DATA_CYCLE2(OMAP_DSS_CHANNEL_LCD2));
+		SR(DATA_CYCLE3(OMAP_DSS_CHANNEL_LCD2));
 	}
 
-	SR(GFX_PRELOAD);
+	SR(OVL_PRELOAD(OMAP_DSS_GFX));
 
 	/* VID1 */
-	SR(VID_BA0(0));
-	SR(VID_BA1(0));
-	SR(VID_POSITION(0));
-	SR(VID_SIZE(0));
-	SR(VID_ATTRIBUTES(0));
-	SR(VID_FIFO_THRESHOLD(0));
-	SR(VID_ROW_INC(0));
-	SR(VID_PIXEL_INC(0));
-	SR(VID_FIR(0));
-	SR(VID_PICTURE_SIZE(0));
-	SR(VID_ACCU0(0));
-	SR(VID_ACCU1(0));
+	SR(OVL_BA0(OMAP_DSS_VIDEO1));
+	SR(OVL_BA1(OMAP_DSS_VIDEO1));
+	SR(OVL_POSITION(OMAP_DSS_VIDEO1));
+	SR(OVL_SIZE(OMAP_DSS_VIDEO1));
+	SR(OVL_ATTRIBUTES(OMAP_DSS_VIDEO1));
+	SR(OVL_FIFO_THRESHOLD(OMAP_DSS_VIDEO1));
+	SR(OVL_ROW_INC(OMAP_DSS_VIDEO1));
+	SR(OVL_PIXEL_INC(OMAP_DSS_VIDEO1));
+	SR(OVL_FIR(OMAP_DSS_VIDEO1));
+	SR(OVL_PICTURE_SIZE(OMAP_DSS_VIDEO1));
+	SR(OVL_ACCU0(OMAP_DSS_VIDEO1));
+	SR(OVL_ACCU1(OMAP_DSS_VIDEO1));
 
-	SR(VID_FIR_COEF_H(0, 0));
-	SR(VID_FIR_COEF_H(0, 1));
-	SR(VID_FIR_COEF_H(0, 2));
-	SR(VID_FIR_COEF_H(0, 3));
-	SR(VID_FIR_COEF_H(0, 4));
-	SR(VID_FIR_COEF_H(0, 5));
-	SR(VID_FIR_COEF_H(0, 6));
-	SR(VID_FIR_COEF_H(0, 7));
+	for (i = 0; i < 8; i++)
+		SR(OVL_FIR_COEF_H(OMAP_DSS_VIDEO1, i));
 
-	SR(VID_FIR_COEF_HV(0, 0));
-	SR(VID_FIR_COEF_HV(0, 1));
-	SR(VID_FIR_COEF_HV(0, 2));
-	SR(VID_FIR_COEF_HV(0, 3));
-	SR(VID_FIR_COEF_HV(0, 4));
-	SR(VID_FIR_COEF_HV(0, 5));
-	SR(VID_FIR_COEF_HV(0, 6));
-	SR(VID_FIR_COEF_HV(0, 7));
+	for (i = 0; i < 8; i++)
+		SR(OVL_FIR_COEF_HV(OMAP_DSS_VIDEO1, i));
 
-	SR(VID_CONV_COEF(0, 0));
-	SR(VID_CONV_COEF(0, 1));
-	SR(VID_CONV_COEF(0, 2));
-	SR(VID_CONV_COEF(0, 3));
-	SR(VID_CONV_COEF(0, 4));
+	for (i = 0; i < 5; i++)
+		SR(OVL_CONV_COEF(OMAP_DSS_VIDEO1, i));
 
-	SR(VID_FIR_COEF_V(0, 0));
-	SR(VID_FIR_COEF_V(0, 1));
-	SR(VID_FIR_COEF_V(0, 2));
-	SR(VID_FIR_COEF_V(0, 3));
-	SR(VID_FIR_COEF_V(0, 4));
-	SR(VID_FIR_COEF_V(0, 5));
-	SR(VID_FIR_COEF_V(0, 6));
-	SR(VID_FIR_COEF_V(0, 7));
+	for (i = 0; i < 8; i++)
+		SR(OVL_FIR_COEF_V(OMAP_DSS_VIDEO1, i));
 
-	SR(VID_PRELOAD(0));
+	if (dss_has_feature(FEAT_HANDLE_UV_SEPARATE)) {
+		SR(OVL_BA0_UV(OMAP_DSS_VIDEO1));
+		SR(OVL_BA1_UV(OMAP_DSS_VIDEO1));
+		SR(OVL_FIR2(OMAP_DSS_VIDEO1));
+		SR(OVL_ACCU2_0(OMAP_DSS_VIDEO1));
+		SR(OVL_ACCU2_1(OMAP_DSS_VIDEO1));
+
+		for (i = 0; i < 8; i++)
+			SR(OVL_FIR_COEF_H2(OMAP_DSS_VIDEO1, i));
+
+		for (i = 0; i < 8; i++)
+			SR(OVL_FIR_COEF_HV2(OMAP_DSS_VIDEO1, i));
+
+		for (i = 0; i < 8; i++)
+			SR(OVL_FIR_COEF_V2(OMAP_DSS_VIDEO1, i));
+	}
+	if (dss_has_feature(FEAT_ATTR2))
+		SR(OVL_ATTRIBUTES2(OMAP_DSS_VIDEO1));
+
+	SR(OVL_PRELOAD(OMAP_DSS_VIDEO1));
 
 	/* VID2 */
-	SR(VID_BA0(1));
-	SR(VID_BA1(1));
-	SR(VID_POSITION(1));
-	SR(VID_SIZE(1));
-	SR(VID_ATTRIBUTES(1));
-	SR(VID_FIFO_THRESHOLD(1));
-	SR(VID_ROW_INC(1));
-	SR(VID_PIXEL_INC(1));
-	SR(VID_FIR(1));
-	SR(VID_PICTURE_SIZE(1));
-	SR(VID_ACCU0(1));
-	SR(VID_ACCU1(1));
+	SR(OVL_BA0(OMAP_DSS_VIDEO2));
+	SR(OVL_BA1(OMAP_DSS_VIDEO2));
+	SR(OVL_POSITION(OMAP_DSS_VIDEO2));
+	SR(OVL_SIZE(OMAP_DSS_VIDEO2));
+	SR(OVL_ATTRIBUTES(OMAP_DSS_VIDEO2));
+	SR(OVL_FIFO_THRESHOLD(OMAP_DSS_VIDEO2));
+	SR(OVL_ROW_INC(OMAP_DSS_VIDEO2));
+	SR(OVL_PIXEL_INC(OMAP_DSS_VIDEO2));
+	SR(OVL_FIR(OMAP_DSS_VIDEO2));
+	SR(OVL_PICTURE_SIZE(OMAP_DSS_VIDEO2));
+	SR(OVL_ACCU0(OMAP_DSS_VIDEO2));
+	SR(OVL_ACCU1(OMAP_DSS_VIDEO2));
 
-	SR(VID_FIR_COEF_H(1, 0));
-	SR(VID_FIR_COEF_H(1, 1));
-	SR(VID_FIR_COEF_H(1, 2));
-	SR(VID_FIR_COEF_H(1, 3));
-	SR(VID_FIR_COEF_H(1, 4));
-	SR(VID_FIR_COEF_H(1, 5));
-	SR(VID_FIR_COEF_H(1, 6));
-	SR(VID_FIR_COEF_H(1, 7));
+	for (i = 0; i < 8; i++)
+		SR(OVL_FIR_COEF_H(OMAP_DSS_VIDEO2, i));
 
-	SR(VID_FIR_COEF_HV(1, 0));
-	SR(VID_FIR_COEF_HV(1, 1));
-	SR(VID_FIR_COEF_HV(1, 2));
-	SR(VID_FIR_COEF_HV(1, 3));
-	SR(VID_FIR_COEF_HV(1, 4));
-	SR(VID_FIR_COEF_HV(1, 5));
-	SR(VID_FIR_COEF_HV(1, 6));
-	SR(VID_FIR_COEF_HV(1, 7));
+	for (i = 0; i < 8; i++)
+		SR(OVL_FIR_COEF_HV(OMAP_DSS_VIDEO2, i));
 
-	SR(VID_CONV_COEF(1, 0));
-	SR(VID_CONV_COEF(1, 1));
-	SR(VID_CONV_COEF(1, 2));
-	SR(VID_CONV_COEF(1, 3));
-	SR(VID_CONV_COEF(1, 4));
+	for (i = 0; i < 5; i++)
+		SR(OVL_CONV_COEF(OMAP_DSS_VIDEO2, i));
 
-	SR(VID_FIR_COEF_V(1, 0));
-	SR(VID_FIR_COEF_V(1, 1));
-	SR(VID_FIR_COEF_V(1, 2));
-	SR(VID_FIR_COEF_V(1, 3));
-	SR(VID_FIR_COEF_V(1, 4));
-	SR(VID_FIR_COEF_V(1, 5));
-	SR(VID_FIR_COEF_V(1, 6));
-	SR(VID_FIR_COEF_V(1, 7));
+	for (i = 0; i < 8; i++)
+		SR(OVL_FIR_COEF_V(OMAP_DSS_VIDEO2, i));
 
-	SR(VID_PRELOAD(1));
+	if (dss_has_feature(FEAT_HANDLE_UV_SEPARATE)) {
+		SR(OVL_BA0_UV(OMAP_DSS_VIDEO2));
+		SR(OVL_BA1_UV(OMAP_DSS_VIDEO2));
+		SR(OVL_FIR2(OMAP_DSS_VIDEO2));
+		SR(OVL_ACCU2_0(OMAP_DSS_VIDEO2));
+		SR(OVL_ACCU2_1(OMAP_DSS_VIDEO2));
+
+		for (i = 0; i < 8; i++)
+			SR(OVL_FIR_COEF_H2(OMAP_DSS_VIDEO2, i));
+
+		for (i = 0; i < 8; i++)
+			SR(OVL_FIR_COEF_HV2(OMAP_DSS_VIDEO2, i));
+
+		for (i = 0; i < 8; i++)
+			SR(OVL_FIR_COEF_V2(OMAP_DSS_VIDEO2, i));
+	}
+	if (dss_has_feature(FEAT_ATTR2))
+		SR(OVL_ATTRIBUTES2(OMAP_DSS_VIDEO2));
+
+	SR(OVL_PRELOAD(OMAP_DSS_VIDEO2));
 
 	if (dss_has_feature(FEAT_CORE_CLK_DIV))
 		SR(DIVISOR);
@@ -382,160 +303,158 @@
 
 void dispc_restore_context(void)
 {
+	int i;
 	RR(SYSCONFIG);
 	/*RR(IRQENABLE);*/
 	/*RR(CONTROL);*/
 	RR(CONFIG);
-	RR(DEFAULT_COLOR(0));
-	RR(DEFAULT_COLOR(1));
-	RR(TRANS_COLOR(0));
-	RR(TRANS_COLOR(1));
+	RR(DEFAULT_COLOR(OMAP_DSS_CHANNEL_LCD));
+	RR(DEFAULT_COLOR(OMAP_DSS_CHANNEL_DIGIT));
+	RR(TRANS_COLOR(OMAP_DSS_CHANNEL_LCD));
+	RR(TRANS_COLOR(OMAP_DSS_CHANNEL_DIGIT));
 	RR(LINE_NUMBER);
-	RR(TIMING_H(0));
-	RR(TIMING_V(0));
-	RR(POL_FREQ(0));
-	RR(DIVISORo(0));
+	RR(TIMING_H(OMAP_DSS_CHANNEL_LCD));
+	RR(TIMING_V(OMAP_DSS_CHANNEL_LCD));
+	RR(POL_FREQ(OMAP_DSS_CHANNEL_LCD));
+	RR(DIVISORo(OMAP_DSS_CHANNEL_LCD));
 	RR(GLOBAL_ALPHA);
-	RR(SIZE_DIG);
-	RR(SIZE_LCD(0));
+	RR(SIZE_MGR(OMAP_DSS_CHANNEL_DIGIT));
+	RR(SIZE_MGR(OMAP_DSS_CHANNEL_LCD));
 	if (dss_has_feature(FEAT_MGR_LCD2)) {
-		RR(DEFAULT_COLOR(2));
-		RR(TRANS_COLOR(2));
-		RR(SIZE_LCD(2));
-		RR(TIMING_H(2));
-		RR(TIMING_V(2));
-		RR(POL_FREQ(2));
-		RR(DIVISORo(2));
+		RR(DEFAULT_COLOR(OMAP_DSS_CHANNEL_LCD2));
+		RR(TRANS_COLOR(OMAP_DSS_CHANNEL_LCD2));
+		RR(SIZE_MGR(OMAP_DSS_CHANNEL_LCD2));
+		RR(TIMING_H(OMAP_DSS_CHANNEL_LCD2));
+		RR(TIMING_V(OMAP_DSS_CHANNEL_LCD2));
+		RR(POL_FREQ(OMAP_DSS_CHANNEL_LCD2));
+		RR(DIVISORo(OMAP_DSS_CHANNEL_LCD2));
 		RR(CONFIG2);
 	}
 
-	RR(GFX_BA0);
-	RR(GFX_BA1);
-	RR(GFX_POSITION);
-	RR(GFX_SIZE);
-	RR(GFX_ATTRIBUTES);
-	RR(GFX_FIFO_THRESHOLD);
-	RR(GFX_ROW_INC);
-	RR(GFX_PIXEL_INC);
-	RR(GFX_WINDOW_SKIP);
-	RR(GFX_TABLE_BA);
+	RR(OVL_BA0(OMAP_DSS_GFX));
+	RR(OVL_BA1(OMAP_DSS_GFX));
+	RR(OVL_POSITION(OMAP_DSS_GFX));
+	RR(OVL_SIZE(OMAP_DSS_GFX));
+	RR(OVL_ATTRIBUTES(OMAP_DSS_GFX));
+	RR(OVL_FIFO_THRESHOLD(OMAP_DSS_GFX));
+	RR(OVL_ROW_INC(OMAP_DSS_GFX));
+	RR(OVL_PIXEL_INC(OMAP_DSS_GFX));
+	RR(OVL_WINDOW_SKIP(OMAP_DSS_GFX));
+	RR(OVL_TABLE_BA(OMAP_DSS_GFX));
 
-	RR(DATA_CYCLE1(0));
-	RR(DATA_CYCLE2(0));
-	RR(DATA_CYCLE3(0));
 
-	RR(CPR_COEF_R(0));
-	RR(CPR_COEF_G(0));
-	RR(CPR_COEF_B(0));
+	RR(DATA_CYCLE1(OMAP_DSS_CHANNEL_LCD));
+	RR(DATA_CYCLE2(OMAP_DSS_CHANNEL_LCD));
+	RR(DATA_CYCLE3(OMAP_DSS_CHANNEL_LCD));
+
+	RR(CPR_COEF_R(OMAP_DSS_CHANNEL_LCD));
+	RR(CPR_COEF_G(OMAP_DSS_CHANNEL_LCD));
+	RR(CPR_COEF_B(OMAP_DSS_CHANNEL_LCD));
 	if (dss_has_feature(FEAT_MGR_LCD2)) {
-		RR(DATA_CYCLE1(2));
-		RR(DATA_CYCLE2(2));
-		RR(DATA_CYCLE3(2));
+		RR(DATA_CYCLE1(OMAP_DSS_CHANNEL_LCD2));
+		RR(DATA_CYCLE2(OMAP_DSS_CHANNEL_LCD2));
+		RR(DATA_CYCLE3(OMAP_DSS_CHANNEL_LCD2));
 
-		RR(CPR_COEF_B(2));
-		RR(CPR_COEF_G(2));
-		RR(CPR_COEF_R(2));
+		RR(CPR_COEF_B(OMAP_DSS_CHANNEL_LCD2));
+		RR(CPR_COEF_G(OMAP_DSS_CHANNEL_LCD2));
+		RR(CPR_COEF_R(OMAP_DSS_CHANNEL_LCD2));
 	}
 
-	RR(GFX_PRELOAD);
+	RR(OVL_PRELOAD(OMAP_DSS_GFX));
 
 	/* VID1 */
-	RR(VID_BA0(0));
-	RR(VID_BA1(0));
-	RR(VID_POSITION(0));
-	RR(VID_SIZE(0));
-	RR(VID_ATTRIBUTES(0));
-	RR(VID_FIFO_THRESHOLD(0));
-	RR(VID_ROW_INC(0));
-	RR(VID_PIXEL_INC(0));
-	RR(VID_FIR(0));
-	RR(VID_PICTURE_SIZE(0));
-	RR(VID_ACCU0(0));
-	RR(VID_ACCU1(0));
+	RR(OVL_BA0(OMAP_DSS_VIDEO1));
+	RR(OVL_BA1(OMAP_DSS_VIDEO1));
+	RR(OVL_POSITION(OMAP_DSS_VIDEO1));
+	RR(OVL_SIZE(OMAP_DSS_VIDEO1));
+	RR(OVL_ATTRIBUTES(OMAP_DSS_VIDEO1));
+	RR(OVL_FIFO_THRESHOLD(OMAP_DSS_VIDEO1));
+	RR(OVL_ROW_INC(OMAP_DSS_VIDEO1));
+	RR(OVL_PIXEL_INC(OMAP_DSS_VIDEO1));
+	RR(OVL_FIR(OMAP_DSS_VIDEO1));
+	RR(OVL_PICTURE_SIZE(OMAP_DSS_VIDEO1));
+	RR(OVL_ACCU0(OMAP_DSS_VIDEO1));
+	RR(OVL_ACCU1(OMAP_DSS_VIDEO1));
 
-	RR(VID_FIR_COEF_H(0, 0));
-	RR(VID_FIR_COEF_H(0, 1));
-	RR(VID_FIR_COEF_H(0, 2));
-	RR(VID_FIR_COEF_H(0, 3));
-	RR(VID_FIR_COEF_H(0, 4));
-	RR(VID_FIR_COEF_H(0, 5));
-	RR(VID_FIR_COEF_H(0, 6));
-	RR(VID_FIR_COEF_H(0, 7));
+	for (i = 0; i < 8; i++)
+		RR(OVL_FIR_COEF_H(OMAP_DSS_VIDEO1, i));
 
-	RR(VID_FIR_COEF_HV(0, 0));
-	RR(VID_FIR_COEF_HV(0, 1));
-	RR(VID_FIR_COEF_HV(0, 2));
-	RR(VID_FIR_COEF_HV(0, 3));
-	RR(VID_FIR_COEF_HV(0, 4));
-	RR(VID_FIR_COEF_HV(0, 5));
-	RR(VID_FIR_COEF_HV(0, 6));
-	RR(VID_FIR_COEF_HV(0, 7));
+	for (i = 0; i < 8; i++)
+		RR(OVL_FIR_COEF_HV(OMAP_DSS_VIDEO1, i));
 
-	RR(VID_CONV_COEF(0, 0));
-	RR(VID_CONV_COEF(0, 1));
-	RR(VID_CONV_COEF(0, 2));
-	RR(VID_CONV_COEF(0, 3));
-	RR(VID_CONV_COEF(0, 4));
+	for (i = 0; i < 5; i++)
+		RR(OVL_CONV_COEF(OMAP_DSS_VIDEO1, i));
 
-	RR(VID_FIR_COEF_V(0, 0));
-	RR(VID_FIR_COEF_V(0, 1));
-	RR(VID_FIR_COEF_V(0, 2));
-	RR(VID_FIR_COEF_V(0, 3));
-	RR(VID_FIR_COEF_V(0, 4));
-	RR(VID_FIR_COEF_V(0, 5));
-	RR(VID_FIR_COEF_V(0, 6));
-	RR(VID_FIR_COEF_V(0, 7));
+	for (i = 0; i < 8; i++)
+		RR(OVL_FIR_COEF_V(OMAP_DSS_VIDEO1, i));
 
-	RR(VID_PRELOAD(0));
+	if (dss_has_feature(FEAT_HANDLE_UV_SEPARATE)) {
+		RR(OVL_BA0_UV(OMAP_DSS_VIDEO1));
+		RR(OVL_BA1_UV(OMAP_DSS_VIDEO1));
+		RR(OVL_FIR2(OMAP_DSS_VIDEO1));
+		RR(OVL_ACCU2_0(OMAP_DSS_VIDEO1));
+		RR(OVL_ACCU2_1(OMAP_DSS_VIDEO1));
+
+		for (i = 0; i < 8; i++)
+			RR(OVL_FIR_COEF_H2(OMAP_DSS_VIDEO1, i));
+
+		for (i = 0; i < 8; i++)
+			RR(OVL_FIR_COEF_HV2(OMAP_DSS_VIDEO1, i));
+
+		for (i = 0; i < 8; i++)
+			RR(OVL_FIR_COEF_V2(OMAP_DSS_VIDEO1, i));
+	}
+	if (dss_has_feature(FEAT_ATTR2))
+		RR(OVL_ATTRIBUTES2(OMAP_DSS_VIDEO1));
+
+	RR(OVL_PRELOAD(OMAP_DSS_VIDEO1));
 
 	/* VID2 */
-	RR(VID_BA0(1));
-	RR(VID_BA1(1));
-	RR(VID_POSITION(1));
-	RR(VID_SIZE(1));
-	RR(VID_ATTRIBUTES(1));
-	RR(VID_FIFO_THRESHOLD(1));
-	RR(VID_ROW_INC(1));
-	RR(VID_PIXEL_INC(1));
-	RR(VID_FIR(1));
-	RR(VID_PICTURE_SIZE(1));
-	RR(VID_ACCU0(1));
-	RR(VID_ACCU1(1));
+	RR(OVL_BA0(OMAP_DSS_VIDEO2));
+	RR(OVL_BA1(OMAP_DSS_VIDEO2));
+	RR(OVL_POSITION(OMAP_DSS_VIDEO2));
+	RR(OVL_SIZE(OMAP_DSS_VIDEO2));
+	RR(OVL_ATTRIBUTES(OMAP_DSS_VIDEO2));
+	RR(OVL_FIFO_THRESHOLD(OMAP_DSS_VIDEO2));
+	RR(OVL_ROW_INC(OMAP_DSS_VIDEO2));
+	RR(OVL_PIXEL_INC(OMAP_DSS_VIDEO2));
+	RR(OVL_FIR(OMAP_DSS_VIDEO2));
+	RR(OVL_PICTURE_SIZE(OMAP_DSS_VIDEO2));
+	RR(OVL_ACCU0(OMAP_DSS_VIDEO2));
+	RR(OVL_ACCU1(OMAP_DSS_VIDEO2));
 
-	RR(VID_FIR_COEF_H(1, 0));
-	RR(VID_FIR_COEF_H(1, 1));
-	RR(VID_FIR_COEF_H(1, 2));
-	RR(VID_FIR_COEF_H(1, 3));
-	RR(VID_FIR_COEF_H(1, 4));
-	RR(VID_FIR_COEF_H(1, 5));
-	RR(VID_FIR_COEF_H(1, 6));
-	RR(VID_FIR_COEF_H(1, 7));
+	for (i = 0; i < 8; i++)
+		RR(OVL_FIR_COEF_H(OMAP_DSS_VIDEO2, i));
 
-	RR(VID_FIR_COEF_HV(1, 0));
-	RR(VID_FIR_COEF_HV(1, 1));
-	RR(VID_FIR_COEF_HV(1, 2));
-	RR(VID_FIR_COEF_HV(1, 3));
-	RR(VID_FIR_COEF_HV(1, 4));
-	RR(VID_FIR_COEF_HV(1, 5));
-	RR(VID_FIR_COEF_HV(1, 6));
-	RR(VID_FIR_COEF_HV(1, 7));
+	for (i = 0; i < 8; i++)
+		RR(OVL_FIR_COEF_HV(OMAP_DSS_VIDEO2, i));
 
-	RR(VID_CONV_COEF(1, 0));
-	RR(VID_CONV_COEF(1, 1));
-	RR(VID_CONV_COEF(1, 2));
-	RR(VID_CONV_COEF(1, 3));
-	RR(VID_CONV_COEF(1, 4));
+	for (i = 0; i < 5; i++)
+		RR(OVL_CONV_COEF(OMAP_DSS_VIDEO2, i));
 
-	RR(VID_FIR_COEF_V(1, 0));
-	RR(VID_FIR_COEF_V(1, 1));
-	RR(VID_FIR_COEF_V(1, 2));
-	RR(VID_FIR_COEF_V(1, 3));
-	RR(VID_FIR_COEF_V(1, 4));
-	RR(VID_FIR_COEF_V(1, 5));
-	RR(VID_FIR_COEF_V(1, 6));
-	RR(VID_FIR_COEF_V(1, 7));
+	for (i = 0; i < 8; i++)
+		RR(OVL_FIR_COEF_V(OMAP_DSS_VIDEO2, i));
 
-	RR(VID_PRELOAD(1));
+	if (dss_has_feature(FEAT_HANDLE_UV_SEPARATE)) {
+		RR(OVL_BA0_UV(OMAP_DSS_VIDEO2));
+		RR(OVL_BA1_UV(OMAP_DSS_VIDEO2));
+		RR(OVL_FIR2(OMAP_DSS_VIDEO2));
+		RR(OVL_ACCU2_0(OMAP_DSS_VIDEO2));
+		RR(OVL_ACCU2_1(OMAP_DSS_VIDEO2));
+
+		for (i = 0; i < 8; i++)
+			RR(OVL_FIR_COEF_H2(OMAP_DSS_VIDEO2, i));
+
+		for (i = 0; i < 8; i++)
+			RR(OVL_FIR_COEF_HV2(OMAP_DSS_VIDEO2, i));
+
+		for (i = 0; i < 8; i++)
+			RR(OVL_FIR_COEF_V2(OMAP_DSS_VIDEO2, i));
+	}
+	if (dss_has_feature(FEAT_ATTR2))
+		RR(OVL_ATTRIBUTES2(OMAP_DSS_VIDEO2));
+
+	RR(OVL_PRELOAD(OMAP_DSS_VIDEO2));
 
 	if (dss_has_feature(FEAT_CORE_CLK_DIV))
 		RR(DIVISOR);
@@ -632,27 +551,43 @@
 
 static void _dispc_write_firh_reg(enum omap_plane plane, int reg, u32 value)
 {
-	BUG_ON(plane == OMAP_DSS_GFX);
-
-	dispc_write_reg(DISPC_VID_FIR_COEF_H(plane-1, reg), value);
+	dispc_write_reg(DISPC_OVL_FIR_COEF_H(plane, reg), value);
 }
 
 static void _dispc_write_firhv_reg(enum omap_plane plane, int reg, u32 value)
 {
-	BUG_ON(plane == OMAP_DSS_GFX);
-
-	dispc_write_reg(DISPC_VID_FIR_COEF_HV(plane-1, reg), value);
+	dispc_write_reg(DISPC_OVL_FIR_COEF_HV(plane, reg), value);
 }
 
 static void _dispc_write_firv_reg(enum omap_plane plane, int reg, u32 value)
 {
+	dispc_write_reg(DISPC_OVL_FIR_COEF_V(plane, reg), value);
+}
+
+static void _dispc_write_firh2_reg(enum omap_plane plane, int reg, u32 value)
+{
 	BUG_ON(plane == OMAP_DSS_GFX);
 
-	dispc_write_reg(DISPC_VID_FIR_COEF_V(plane-1, reg), value);
+	dispc_write_reg(DISPC_OVL_FIR_COEF_H2(plane, reg), value);
+}
+
+static void _dispc_write_firhv2_reg(enum omap_plane plane, int reg, u32 value)
+{
+	BUG_ON(plane == OMAP_DSS_GFX);
+
+	dispc_write_reg(DISPC_OVL_FIR_COEF_HV2(plane, reg), value);
+}
+
+static void _dispc_write_firv2_reg(enum omap_plane plane, int reg, u32 value)
+{
+	BUG_ON(plane == OMAP_DSS_GFX);
+
+	dispc_write_reg(DISPC_OVL_FIR_COEF_V2(plane, reg), value);
 }
 
 static void _dispc_set_scale_coef(enum omap_plane plane, int hscaleup,
-		int vscaleup, int five_taps)
+				  int vscaleup, int five_taps,
+				  enum omap_color_component color_comp)
 {
 	/* Coefficients for horizontal up-sampling */
 	static const struct dispc_h_coef coef_hup[8] = {
@@ -750,8 +685,14 @@
 			| FLD_VAL(v_coef[i].vc1, 23, 16)
 			| FLD_VAL(v_coef[i].vc2, 31, 24);
 
-		_dispc_write_firh_reg(plane, i, h);
-		_dispc_write_firhv_reg(plane, i, hv);
+		if (color_comp == DISPC_COLOR_COMPONENT_RGB_Y) {
+			_dispc_write_firh_reg(plane, i, h);
+			_dispc_write_firhv_reg(plane, i, hv);
+		} else {
+			_dispc_write_firh2_reg(plane, i, h);
+			_dispc_write_firhv2_reg(plane, i, hv);
+		}
+
 	}
 
 	if (five_taps) {
@@ -759,7 +700,10 @@
 			u32 v;
 			v = FLD_VAL(v_coef[i].vc00, 7, 0)
 				| FLD_VAL(v_coef[i].vc22, 15, 8);
-			_dispc_write_firv_reg(plane, i, v);
+			if (color_comp == DISPC_COLOR_COMPONENT_RGB_Y)
+				_dispc_write_firv_reg(plane, i, v);
+			else
+				_dispc_write_firv2_reg(plane, i, v);
 		}
 	}
 }
@@ -779,72 +723,83 @@
 
 	ct = &ctbl_bt601_5;
 
-	dispc_write_reg(DISPC_VID_CONV_COEF(0, 0), CVAL(ct->rcr, ct->ry));
-	dispc_write_reg(DISPC_VID_CONV_COEF(0, 1), CVAL(ct->gy,	 ct->rcb));
-	dispc_write_reg(DISPC_VID_CONV_COEF(0, 2), CVAL(ct->gcb, ct->gcr));
-	dispc_write_reg(DISPC_VID_CONV_COEF(0, 3), CVAL(ct->bcr, ct->by));
-	dispc_write_reg(DISPC_VID_CONV_COEF(0, 4), CVAL(0,       ct->bcb));
+	dispc_write_reg(DISPC_OVL_CONV_COEF(OMAP_DSS_VIDEO1, 0),
+		CVAL(ct->rcr, ct->ry));
+	dispc_write_reg(DISPC_OVL_CONV_COEF(OMAP_DSS_VIDEO1, 1),
+		CVAL(ct->gy,  ct->rcb));
+	dispc_write_reg(DISPC_OVL_CONV_COEF(OMAP_DSS_VIDEO1, 2),
+		CVAL(ct->gcb, ct->gcr));
+	dispc_write_reg(DISPC_OVL_CONV_COEF(OMAP_DSS_VIDEO1, 3),
+		CVAL(ct->bcr, ct->by));
+	dispc_write_reg(DISPC_OVL_CONV_COEF(OMAP_DSS_VIDEO1, 4),
+		CVAL(0, ct->bcb));
 
-	dispc_write_reg(DISPC_VID_CONV_COEF(1, 0), CVAL(ct->rcr, ct->ry));
-	dispc_write_reg(DISPC_VID_CONV_COEF(1, 1), CVAL(ct->gy,	 ct->rcb));
-	dispc_write_reg(DISPC_VID_CONV_COEF(1, 2), CVAL(ct->gcb, ct->gcr));
-	dispc_write_reg(DISPC_VID_CONV_COEF(1, 3), CVAL(ct->bcr, ct->by));
-	dispc_write_reg(DISPC_VID_CONV_COEF(1, 4), CVAL(0,       ct->bcb));
+	dispc_write_reg(DISPC_OVL_CONV_COEF(OMAP_DSS_VIDEO2, 0),
+		CVAL(ct->rcr, ct->ry));
+	dispc_write_reg(DISPC_OVL_CONV_COEF(OMAP_DSS_VIDEO2, 1),
+		CVAL(ct->gy, ct->rcb));
+	dispc_write_reg(DISPC_OVL_CONV_COEF(OMAP_DSS_VIDEO2, 2),
+		CVAL(ct->gcb, ct->gcr));
+	dispc_write_reg(DISPC_OVL_CONV_COEF(OMAP_DSS_VIDEO2, 3),
+		CVAL(ct->bcr, ct->by));
+	dispc_write_reg(DISPC_OVL_CONV_COEF(OMAP_DSS_VIDEO2, 4),
+		CVAL(0, ct->bcb));
 
 #undef CVAL
 
-	REG_FLD_MOD(DISPC_VID_ATTRIBUTES(0), ct->full_range, 11, 11);
-	REG_FLD_MOD(DISPC_VID_ATTRIBUTES(1), ct->full_range, 11, 11);
+	REG_FLD_MOD(DISPC_OVL_ATTRIBUTES(OMAP_DSS_VIDEO1),
+		ct->full_range, 11, 11);
+	REG_FLD_MOD(DISPC_OVL_ATTRIBUTES(OMAP_DSS_VIDEO2),
+		ct->full_range, 11, 11);
 }
 
 
 static void _dispc_set_plane_ba0(enum omap_plane plane, u32 paddr)
 {
-	const struct dispc_reg ba0_reg[] = { DISPC_GFX_BA0,
-		DISPC_VID_BA0(0),
-		DISPC_VID_BA0(1) };
-
-	dispc_write_reg(ba0_reg[plane], paddr);
+	dispc_write_reg(DISPC_OVL_BA0(plane), paddr);
 }
 
 static void _dispc_set_plane_ba1(enum omap_plane plane, u32 paddr)
 {
-	const struct dispc_reg ba1_reg[] = { DISPC_GFX_BA1,
-				      DISPC_VID_BA1(0),
-				      DISPC_VID_BA1(1) };
+	dispc_write_reg(DISPC_OVL_BA1(plane), paddr);
+}
 
-	dispc_write_reg(ba1_reg[plane], paddr);
+static void _dispc_set_plane_ba0_uv(enum omap_plane plane, u32 paddr)
+{
+	dispc_write_reg(DISPC_OVL_BA0_UV(plane), paddr);
+}
+
+static void _dispc_set_plane_ba1_uv(enum omap_plane plane, u32 paddr)
+{
+	dispc_write_reg(DISPC_OVL_BA1_UV(plane), paddr);
 }
 
 static void _dispc_set_plane_pos(enum omap_plane plane, int x, int y)
 {
-	const struct dispc_reg pos_reg[] = { DISPC_GFX_POSITION,
-				      DISPC_VID_POSITION(0),
-				      DISPC_VID_POSITION(1) };
-
 	u32 val = FLD_VAL(y, 26, 16) | FLD_VAL(x, 10, 0);
-	dispc_write_reg(pos_reg[plane], val);
+
+	dispc_write_reg(DISPC_OVL_POSITION(plane), val);
 }
 
 static void _dispc_set_pic_size(enum omap_plane plane, int width, int height)
 {
-	const struct dispc_reg siz_reg[] = { DISPC_GFX_SIZE,
-				      DISPC_VID_PICTURE_SIZE(0),
-				      DISPC_VID_PICTURE_SIZE(1) };
 	u32 val = FLD_VAL(height - 1, 26, 16) | FLD_VAL(width - 1, 10, 0);
-	dispc_write_reg(siz_reg[plane], val);
+
+	if (plane == OMAP_DSS_GFX)
+		dispc_write_reg(DISPC_OVL_SIZE(plane), val);
+	else
+		dispc_write_reg(DISPC_OVL_PICTURE_SIZE(plane), val);
 }
 
 static void _dispc_set_vid_size(enum omap_plane plane, int width, int height)
 {
 	u32 val;
-	const struct dispc_reg vsi_reg[] = { DISPC_VID_SIZE(0),
-				      DISPC_VID_SIZE(1) };
 
 	BUG_ON(plane == OMAP_DSS_GFX);
 
 	val = FLD_VAL(height - 1, 26, 16) | FLD_VAL(width - 1, 10, 0);
-	dispc_write_reg(vsi_reg[plane-1], val);
+
+	dispc_write_reg(DISPC_OVL_SIZE(plane), val);
 }
 
 static void _dispc_set_pre_mult_alpha(enum omap_plane plane, bool enable)
@@ -856,7 +811,7 @@
 		plane == OMAP_DSS_VIDEO1)
 		return;
 
-	REG_FLD_MOD(dispc_reg_att[plane], enable ? 1 : 0, 28, 28);
+	REG_FLD_MOD(DISPC_OVL_ATTRIBUTES(plane), enable ? 1 : 0, 28, 28);
 }
 
 static void _dispc_setup_global_alpha(enum omap_plane plane, u8 global_alpha)
@@ -876,61 +831,93 @@
 
 static void _dispc_set_pix_inc(enum omap_plane plane, s32 inc)
 {
-	const struct dispc_reg ri_reg[] = { DISPC_GFX_PIXEL_INC,
-				     DISPC_VID_PIXEL_INC(0),
-				     DISPC_VID_PIXEL_INC(1) };
-
-	dispc_write_reg(ri_reg[plane], inc);
+	dispc_write_reg(DISPC_OVL_PIXEL_INC(plane), inc);
 }
 
 static void _dispc_set_row_inc(enum omap_plane plane, s32 inc)
 {
-	const struct dispc_reg ri_reg[] = { DISPC_GFX_ROW_INC,
-				     DISPC_VID_ROW_INC(0),
-				     DISPC_VID_ROW_INC(1) };
-
-	dispc_write_reg(ri_reg[plane], inc);
+	dispc_write_reg(DISPC_OVL_ROW_INC(plane), inc);
 }
 
 static void _dispc_set_color_mode(enum omap_plane plane,
 		enum omap_color_mode color_mode)
 {
 	u32 m = 0;
-
-	switch (color_mode) {
-	case OMAP_DSS_COLOR_CLUT1:
-		m = 0x0; break;
-	case OMAP_DSS_COLOR_CLUT2:
-		m = 0x1; break;
-	case OMAP_DSS_COLOR_CLUT4:
-		m = 0x2; break;
-	case OMAP_DSS_COLOR_CLUT8:
-		m = 0x3; break;
-	case OMAP_DSS_COLOR_RGB12U:
-		m = 0x4; break;
-	case OMAP_DSS_COLOR_ARGB16:
-		m = 0x5; break;
-	case OMAP_DSS_COLOR_RGB16:
-		m = 0x6; break;
-	case OMAP_DSS_COLOR_RGB24U:
-		m = 0x8; break;
-	case OMAP_DSS_COLOR_RGB24P:
-		m = 0x9; break;
-	case OMAP_DSS_COLOR_YUV2:
-		m = 0xa; break;
-	case OMAP_DSS_COLOR_UYVY:
-		m = 0xb; break;
-	case OMAP_DSS_COLOR_ARGB32:
-		m = 0xc; break;
-	case OMAP_DSS_COLOR_RGBA32:
-		m = 0xd; break;
-	case OMAP_DSS_COLOR_RGBX32:
-		m = 0xe; break;
-	default:
-		BUG(); break;
+	if (plane != OMAP_DSS_GFX) {
+		switch (color_mode) {
+		case OMAP_DSS_COLOR_NV12:
+			m = 0x0; break;
+		case OMAP_DSS_COLOR_RGB12U:
+			m = 0x1; break;
+		case OMAP_DSS_COLOR_RGBA16:
+			m = 0x2; break;
+		case OMAP_DSS_COLOR_RGBX16:
+			m = 0x4; break;
+		case OMAP_DSS_COLOR_ARGB16:
+			m = 0x5; break;
+		case OMAP_DSS_COLOR_RGB16:
+			m = 0x6; break;
+		case OMAP_DSS_COLOR_ARGB16_1555:
+			m = 0x7; break;
+		case OMAP_DSS_COLOR_RGB24U:
+			m = 0x8; break;
+		case OMAP_DSS_COLOR_RGB24P:
+			m = 0x9; break;
+		case OMAP_DSS_COLOR_YUV2:
+			m = 0xa; break;
+		case OMAP_DSS_COLOR_UYVY:
+			m = 0xb; break;
+		case OMAP_DSS_COLOR_ARGB32:
+			m = 0xc; break;
+		case OMAP_DSS_COLOR_RGBA32:
+			m = 0xd; break;
+		case OMAP_DSS_COLOR_RGBX32:
+			m = 0xe; break;
+		case OMAP_DSS_COLOR_XRGB16_1555:
+			m = 0xf; break;
+		default:
+			BUG(); break;
+		}
+	} else {
+		switch (color_mode) {
+		case OMAP_DSS_COLOR_CLUT1:
+			m = 0x0; break;
+		case OMAP_DSS_COLOR_CLUT2:
+			m = 0x1; break;
+		case OMAP_DSS_COLOR_CLUT4:
+			m = 0x2; break;
+		case OMAP_DSS_COLOR_CLUT8:
+			m = 0x3; break;
+		case OMAP_DSS_COLOR_RGB12U:
+			m = 0x4; break;
+		case OMAP_DSS_COLOR_ARGB16:
+			m = 0x5; break;
+		case OMAP_DSS_COLOR_RGB16:
+			m = 0x6; break;
+		case OMAP_DSS_COLOR_ARGB16_1555:
+			m = 0x7; break;
+		case OMAP_DSS_COLOR_RGB24U:
+			m = 0x8; break;
+		case OMAP_DSS_COLOR_RGB24P:
+			m = 0x9; break;
+		case OMAP_DSS_COLOR_YUV2:
+			m = 0xa; break;
+		case OMAP_DSS_COLOR_UYVY:
+			m = 0xb; break;
+		case OMAP_DSS_COLOR_ARGB32:
+			m = 0xc; break;
+		case OMAP_DSS_COLOR_RGBA32:
+			m = 0xd; break;
+		case OMAP_DSS_COLOR_RGBX32:
+			m = 0xe; break;
+		case OMAP_DSS_COLOR_XRGB16_1555:
+			m = 0xf; break;
+		default:
+			BUG(); break;
+		}
 	}
 
-	REG_FLD_MOD(dispc_reg_att[plane], m, 4, 1);
+	REG_FLD_MOD(DISPC_OVL_ATTRIBUTES(plane), m, 4, 1);
 }
 
 static void _dispc_set_channel_out(enum omap_plane plane,
@@ -953,7 +940,7 @@
 		return;
 	}
 
-	val = dispc_read_reg(dispc_reg_att[plane]);
+	val = dispc_read_reg(DISPC_OVL_ATTRIBUTES(plane));
 	if (dss_has_feature(FEAT_MGR_LCD2)) {
 		switch (channel) {
 		case OMAP_DSS_CHANNEL_LCD:
@@ -977,7 +964,7 @@
 	} else {
 		val = FLD_MOD(val, channel, shift, shift);
 	}
-	dispc_write_reg(dispc_reg_att[plane], val);
+	dispc_write_reg(DISPC_OVL_ATTRIBUTES(plane), val);
 }
 
 void dispc_set_burst_size(enum omap_plane plane,
@@ -1001,9 +988,9 @@
 		return;
 	}
 
-	val = dispc_read_reg(dispc_reg_att[plane]);
+	val = dispc_read_reg(DISPC_OVL_ATTRIBUTES(plane));
 	val = FLD_MOD(val, burst_size, shift+1, shift);
-	dispc_write_reg(dispc_reg_att[plane], val);
+	dispc_write_reg(DISPC_OVL_ATTRIBUTES(plane), val);
 
 	enable_clocks(0);
 }
@@ -1028,9 +1015,9 @@
 
 	BUG_ON(plane == OMAP_DSS_GFX);
 
-	val = dispc_read_reg(dispc_reg_att[plane]);
+	val = dispc_read_reg(DISPC_OVL_ATTRIBUTES(plane));
 	val = FLD_MOD(val, enable, 9, 9);
-	dispc_write_reg(dispc_reg_att[plane], val);
+	dispc_write_reg(DISPC_OVL_ATTRIBUTES(plane), val);
 }
 
 void dispc_enable_replication(enum omap_plane plane, bool enable)
@@ -1043,7 +1030,7 @@
 		bit = 10;
 
 	enable_clocks(1);
-	REG_FLD_MOD(dispc_reg_att[plane], enable, bit, bit);
+	REG_FLD_MOD(DISPC_OVL_ATTRIBUTES(plane), enable, bit, bit);
 	enable_clocks(0);
 }
 
@@ -1053,7 +1040,7 @@
 	BUG_ON((width > (1 << 11)) || (height > (1 << 11)));
 	val = FLD_VAL(height - 1, 26, 16) | FLD_VAL(width - 1, 10, 0);
 	enable_clocks(1);
-	dispc_write_reg(DISPC_SIZE_LCD(channel), val);
+	dispc_write_reg(DISPC_SIZE_MGR(channel), val);
 	enable_clocks(0);
 }
 
@@ -1063,15 +1050,12 @@
 	BUG_ON((width > (1 << 11)) || (height > (1 << 11)));
 	val = FLD_VAL(height - 1, 26, 16) | FLD_VAL(width - 1, 10, 0);
 	enable_clocks(1);
-	dispc_write_reg(DISPC_SIZE_DIG, val);
+	dispc_write_reg(DISPC_SIZE_MGR(OMAP_DSS_CHANNEL_DIGIT), val);
 	enable_clocks(0);
 }
 
 static void dispc_read_plane_fifo_sizes(void)
 {
-	const struct dispc_reg fsz_reg[] = { DISPC_GFX_FIFO_SIZE_STATUS,
-				      DISPC_VID_FIFO_SIZE_STATUS(0),
-				      DISPC_VID_FIFO_SIZE_STATUS(1) };
 	u32 size;
 	int plane;
 	u8 start, end;
@@ -1081,7 +1065,8 @@
 	dss_feat_get_reg_field(FEAT_REG_FIFOSIZE, &start, &end);
 
 	for (plane = 0; plane < ARRAY_SIZE(dispc.fifo_size); ++plane) {
-		size = FLD_GET(dispc_read_reg(fsz_reg[plane]), start, end);
+		size = FLD_GET(dispc_read_reg(DISPC_OVL_FIFO_SIZE_STATUS(plane)),
+			start, end);
 		dispc.fifo_size[plane] = size;
 	}
 
@@ -1095,23 +1080,22 @@
 
 void dispc_setup_plane_fifo(enum omap_plane plane, u32 low, u32 high)
 {
-	const struct dispc_reg ftrs_reg[] = { DISPC_GFX_FIFO_THRESHOLD,
-				       DISPC_VID_FIFO_THRESHOLD(0),
-				       DISPC_VID_FIFO_THRESHOLD(1) };
 	u8 hi_start, hi_end, lo_start, lo_end;
 
+	dss_feat_get_reg_field(FEAT_REG_FIFOHIGHTHRESHOLD, &hi_start, &hi_end);
+	dss_feat_get_reg_field(FEAT_REG_FIFOLOWTHRESHOLD, &lo_start, &lo_end);
+
 	enable_clocks(1);
 
 	DSSDBG("fifo(%d) low/high old %u/%u, new %u/%u\n",
 			plane,
-			REG_GET(ftrs_reg[plane], 11, 0),
-			REG_GET(ftrs_reg[plane], 27, 16),
+			REG_GET(DISPC_OVL_FIFO_THRESHOLD(plane),
+				lo_start, lo_end),
+			REG_GET(DISPC_OVL_FIFO_THRESHOLD(plane),
+				hi_start, hi_end),
 			low, high);
 
-	dss_feat_get_reg_field(FEAT_REG_FIFOHIGHTHRESHOLD, &hi_start, &hi_end);
-	dss_feat_get_reg_field(FEAT_REG_FIFOLOWTHRESHOLD, &lo_start, &lo_end);
-
-	dispc_write_reg(ftrs_reg[plane],
+	dispc_write_reg(DISPC_OVL_FIFO_THRESHOLD(plane),
 			FLD_VAL(high, hi_start, hi_end) |
 			FLD_VAL(low, lo_start, lo_end));
 
@@ -1128,106 +1112,120 @@
 	enable_clocks(0);
 }
 
-static void _dispc_set_fir(enum omap_plane plane, int hinc, int vinc)
+static void _dispc_set_fir(enum omap_plane plane,
+				int hinc, int vinc,
+				enum omap_color_component color_comp)
 {
 	u32 val;
-	const struct dispc_reg fir_reg[] = { DISPC_VID_FIR(0),
-				      DISPC_VID_FIR(1) };
-	u8 hinc_start, hinc_end, vinc_start, vinc_end;
 
-	BUG_ON(plane == OMAP_DSS_GFX);
+	if (color_comp == DISPC_COLOR_COMPONENT_RGB_Y) {
+		u8 hinc_start, hinc_end, vinc_start, vinc_end;
 
-	dss_feat_get_reg_field(FEAT_REG_FIRHINC, &hinc_start, &hinc_end);
-	dss_feat_get_reg_field(FEAT_REG_FIRVINC, &vinc_start, &vinc_end);
+		dss_feat_get_reg_field(FEAT_REG_FIRHINC,
+					&hinc_start, &hinc_end);
+		dss_feat_get_reg_field(FEAT_REG_FIRVINC,
+					&vinc_start, &vinc_end);
+		val = FLD_VAL(vinc, vinc_start, vinc_end) |
+				FLD_VAL(hinc, hinc_start, hinc_end);
 
-	val = FLD_VAL(vinc, vinc_start, vinc_end) |
-			FLD_VAL(hinc, hinc_start, hinc_end);
-
-	dispc_write_reg(fir_reg[plane-1], val);
+		dispc_write_reg(DISPC_OVL_FIR(plane), val);
+	} else {
+		val = FLD_VAL(vinc, 28, 16) | FLD_VAL(hinc, 12, 0);
+		dispc_write_reg(DISPC_OVL_FIR2(plane), val);
+	}
 }
 
 static void _dispc_set_vid_accu0(enum omap_plane plane, int haccu, int vaccu)
 {
 	u32 val;
-	const struct dispc_reg ac0_reg[] = { DISPC_VID_ACCU0(0),
-				      DISPC_VID_ACCU0(1) };
 	u8 hor_start, hor_end, vert_start, vert_end;
 
-	BUG_ON(plane == OMAP_DSS_GFX);
-
 	dss_feat_get_reg_field(FEAT_REG_HORIZONTALACCU, &hor_start, &hor_end);
 	dss_feat_get_reg_field(FEAT_REG_VERTICALACCU, &vert_start, &vert_end);
 
 	val = FLD_VAL(vaccu, vert_start, vert_end) |
 			FLD_VAL(haccu, hor_start, hor_end);
 
-	dispc_write_reg(ac0_reg[plane-1], val);
+	dispc_write_reg(DISPC_OVL_ACCU0(plane), val);
 }
 
 static void _dispc_set_vid_accu1(enum omap_plane plane, int haccu, int vaccu)
 {
 	u32 val;
-	const struct dispc_reg ac1_reg[] = { DISPC_VID_ACCU1(0),
-				      DISPC_VID_ACCU1(1) };
 	u8 hor_start, hor_end, vert_start, vert_end;
 
-	BUG_ON(plane == OMAP_DSS_GFX);
-
 	dss_feat_get_reg_field(FEAT_REG_HORIZONTALACCU, &hor_start, &hor_end);
 	dss_feat_get_reg_field(FEAT_REG_VERTICALACCU, &vert_start, &vert_end);
 
 	val = FLD_VAL(vaccu, vert_start, vert_end) |
 			FLD_VAL(haccu, hor_start, hor_end);
 
-	dispc_write_reg(ac1_reg[plane-1], val);
+	dispc_write_reg(DISPC_OVL_ACCU1(plane), val);
 }
 
+static void _dispc_set_vid_accu2_0(enum omap_plane plane, int haccu, int vaccu)
+{
+	u32 val;
 
-static void _dispc_set_scaling(enum omap_plane plane,
+	val = FLD_VAL(vaccu, 26, 16) | FLD_VAL(haccu, 10, 0);
+	dispc_write_reg(DISPC_OVL_ACCU2_0(plane), val);
+}
+
+static void _dispc_set_vid_accu2_1(enum omap_plane plane, int haccu, int vaccu)
+{
+	u32 val;
+
+	val = FLD_VAL(vaccu, 26, 16) | FLD_VAL(haccu, 10, 0);
+	dispc_write_reg(DISPC_OVL_ACCU2_1(plane), val);
+}
+
+static void _dispc_set_scale_param(enum omap_plane plane,
 		u16 orig_width, u16 orig_height,
 		u16 out_width, u16 out_height,
-		bool ilace, bool five_taps,
-		bool fieldmode)
+		bool five_taps, u8 rotation,
+		enum omap_color_component color_comp)
 {
-	int fir_hinc;
-	int fir_vinc;
+	int fir_hinc, fir_vinc;
 	int hscaleup, vscaleup;
-	int accu0 = 0;
-	int accu1 = 0;
-	u32 l;
-
-	BUG_ON(plane == OMAP_DSS_GFX);
 
 	hscaleup = orig_width <= out_width;
 	vscaleup = orig_height <= out_height;
 
-	_dispc_set_scale_coef(plane, hscaleup, vscaleup, five_taps);
+	_dispc_set_scale_coef(plane, hscaleup, vscaleup, five_taps, color_comp);
 
-	if (!orig_width || orig_width == out_width)
-		fir_hinc = 0;
-	else
-		fir_hinc = 1024 * orig_width / out_width;
+	fir_hinc = 1024 * orig_width / out_width;
+	fir_vinc = 1024 * orig_height / out_height;
 
-	if (!orig_height || orig_height == out_height)
-		fir_vinc = 0;
-	else
-		fir_vinc = 1024 * orig_height / out_height;
+	_dispc_set_fir(plane, fir_hinc, fir_vinc, color_comp);
+}
 
-	_dispc_set_fir(plane, fir_hinc, fir_vinc);
+static void _dispc_set_scaling_common(enum omap_plane plane,
+		u16 orig_width, u16 orig_height,
+		u16 out_width, u16 out_height,
+		bool ilace, bool five_taps,
+		bool fieldmode, enum omap_color_mode color_mode,
+		u8 rotation)
+{
+	int accu0 = 0;
+	int accu1 = 0;
+	u32 l;
 
-	l = dispc_read_reg(dispc_reg_att[plane]);
+	_dispc_set_scale_param(plane, orig_width, orig_height,
+				out_width, out_height, five_taps,
+				rotation, DISPC_COLOR_COMPONENT_RGB_Y);
+	l = dispc_read_reg(DISPC_OVL_ATTRIBUTES(plane));
 
 	/* RESIZEENABLE and VERTICALTAPS */
 	l &= ~((0x3 << 5) | (0x1 << 21));
-	l |= fir_hinc ? (1 << 5) : 0;
-	l |= fir_vinc ? (1 << 6) : 0;
+	l |= (orig_width != out_width) ? (1 << 5) : 0;
+	l |= (orig_height != out_height) ? (1 << 6) : 0;
 	l |= five_taps ? (1 << 21) : 0;
 
 	/* VRESIZECONF and HRESIZECONF */
 	if (dss_has_feature(FEAT_RESIZECONF)) {
 		l &= ~(0x3 << 7);
-		l |= hscaleup ? 0 : (1 << 7);
-		l |= vscaleup ? 0 : (1 << 8);
+		l |= (orig_width <= out_width) ? 0 : (1 << 7);
+		l |= (orig_height <= out_height) ? 0 : (1 << 8);
 	}
 
 	/* LINEBUFFERSPLIT */
@@ -1236,7 +1234,7 @@
 		l |= five_taps ? (1 << 22) : 0;
 	}
 
-	dispc_write_reg(dispc_reg_att[plane], l);
+	dispc_write_reg(DISPC_OVL_ATTRIBUTES(plane), l);
 
 	/*
 	 * field 0 = even field = bottom field
@@ -1244,7 +1242,7 @@
 	 */
 	if (ilace && !fieldmode) {
 		accu1 = 0;
-		accu0 = (fir_vinc / 2) & 0x3ff;
+		accu0 = ((1024 * orig_height / out_height) / 2) & 0x3ff;
 		if (accu0 >= 1024/2) {
 			accu1 = 1024/2;
 			accu0 -= accu1;
@@ -1255,6 +1253,93 @@
 	_dispc_set_vid_accu1(plane, 0, accu1);
 }
 
+static void _dispc_set_scaling_uv(enum omap_plane plane,
+		u16 orig_width, u16 orig_height,
+		u16 out_width, u16 out_height,
+		bool ilace, bool five_taps,
+		bool fieldmode, enum omap_color_mode color_mode,
+		u8 rotation)
+{
+	int scale_x = out_width != orig_width;
+	int scale_y = out_height != orig_height;
+
+	if (!dss_has_feature(FEAT_HANDLE_UV_SEPARATE))
+		return;
+	if ((color_mode != OMAP_DSS_COLOR_YUV2 &&
+			color_mode != OMAP_DSS_COLOR_UYVY &&
+			color_mode != OMAP_DSS_COLOR_NV12)) {
+		/* reset chroma resampling for RGB formats  */
+		REG_FLD_MOD(DISPC_OVL_ATTRIBUTES2(plane), 0, 8, 8);
+		return;
+	}
+	switch (color_mode) {
+	case OMAP_DSS_COLOR_NV12:
+		/* UV is subsampled by 2 vertically*/
+		orig_height >>= 1;
+		/* UV is subsampled by 2 horz.*/
+		orig_width >>= 1;
+		break;
+	case OMAP_DSS_COLOR_YUV2:
+	case OMAP_DSS_COLOR_UYVY:
+		/*For YUV422 with 90/270 rotation,
+		 *we don't upsample chroma
+		 */
+		if (rotation == OMAP_DSS_ROT_0 ||
+			rotation == OMAP_DSS_ROT_180)
+			/* UV is subsampled by 2 hrz*/
+			orig_width >>= 1;
+		/* must use FIR for YUV422 if rotated */
+		if (rotation != OMAP_DSS_ROT_0)
+			scale_x = scale_y = true;
+		break;
+	default:
+		BUG();
+	}
+
+	if (out_width != orig_width)
+		scale_x = true;
+	if (out_height != orig_height)
+		scale_y = true;
+
+	_dispc_set_scale_param(plane, orig_width, orig_height,
+			out_width, out_height, five_taps,
+				rotation, DISPC_COLOR_COMPONENT_UV);
+
+	REG_FLD_MOD(DISPC_OVL_ATTRIBUTES2(plane),
+		(scale_x || scale_y) ? 1 : 0, 8, 8);
+	/* set H scaling */
+	REG_FLD_MOD(DISPC_OVL_ATTRIBUTES(plane), scale_x ? 1 : 0, 5, 5);
+	/* set V scaling */
+	REG_FLD_MOD(DISPC_OVL_ATTRIBUTES(plane), scale_y ? 1 : 0, 6, 6);
+
+	_dispc_set_vid_accu2_0(plane, 0x80, 0);
+	_dispc_set_vid_accu2_1(plane, 0x80, 0);
+}
+
+static void _dispc_set_scaling(enum omap_plane plane,
+		u16 orig_width, u16 orig_height,
+		u16 out_width, u16 out_height,
+		bool ilace, bool five_taps,
+		bool fieldmode, enum omap_color_mode color_mode,
+		u8 rotation)
+{
+	BUG_ON(plane == OMAP_DSS_GFX);
+
+	_dispc_set_scaling_common(plane,
+			orig_width, orig_height,
+			out_width, out_height,
+			ilace, five_taps,
+			fieldmode, color_mode,
+			rotation);
+
+	_dispc_set_scaling_uv(plane,
+		orig_width, orig_height,
+		out_width, out_height,
+		ilace, five_taps,
+		fieldmode, color_mode,
+		rotation);
+}
+
 static void _dispc_set_rotation_attrs(enum omap_plane plane, u8 rotation,
 		bool mirroring, enum omap_color_mode color_mode)
 {
@@ -1302,9 +1387,10 @@
 			row_repeat = false;
 	}
 
-	REG_FLD_MOD(dispc_reg_att[plane], vidrot, 13, 12);
+	REG_FLD_MOD(DISPC_OVL_ATTRIBUTES(plane), vidrot, 13, 12);
 	if (dss_has_feature(FEAT_ROWREPEATENABLE))
-		REG_FLD_MOD(dispc_reg_att[plane], row_repeat ? 1 : 0, 18, 18);
+		REG_FLD_MOD(DISPC_OVL_ATTRIBUTES(plane),
+			row_repeat ? 1 : 0, 18, 18);
 }
 
 static int color_mode_to_bpp(enum omap_color_mode color_mode)
@@ -1317,12 +1403,17 @@
 	case OMAP_DSS_COLOR_CLUT4:
 		return 4;
 	case OMAP_DSS_COLOR_CLUT8:
+	case OMAP_DSS_COLOR_NV12:
 		return 8;
 	case OMAP_DSS_COLOR_RGB12U:
 	case OMAP_DSS_COLOR_RGB16:
 	case OMAP_DSS_COLOR_ARGB16:
 	case OMAP_DSS_COLOR_YUV2:
 	case OMAP_DSS_COLOR_UYVY:
+	case OMAP_DSS_COLOR_RGBA16:
+	case OMAP_DSS_COLOR_RGBX16:
+	case OMAP_DSS_COLOR_ARGB16_1555:
+	case OMAP_DSS_COLOR_XRGB16_1555:
 		return 16;
 	case OMAP_DSS_COLOR_RGB24P:
 		return 24;
@@ -1655,7 +1746,7 @@
 		enum omap_dss_rotation_type rotation_type,
 		u8 rotation, int mirror,
 		u8 global_alpha, u8 pre_mult_alpha,
-		enum omap_channel channel)
+		enum omap_channel channel, u32 puv_addr)
 {
 	const int maxdownscale = cpu_is_omap34xx() ? 4 : 2;
 	bool five_taps = 0;
@@ -1704,7 +1795,8 @@
 			return -EINVAL;
 
 		if (color_mode == OMAP_DSS_COLOR_YUV2 ||
-			color_mode == OMAP_DSS_COLOR_UYVY)
+			color_mode == OMAP_DSS_COLOR_UYVY ||
+			color_mode == OMAP_DSS_COLOR_NV12)
 			cconv = 1;
 
 		/* Must use 5-tap filter? */
@@ -1778,6 +1870,12 @@
 	_dispc_set_plane_ba0(plane, paddr + offset0);
 	_dispc_set_plane_ba1(plane, paddr + offset1);
 
+	if (OMAP_DSS_COLOR_NV12 == color_mode) {
+		_dispc_set_plane_ba0_uv(plane, puv_addr + offset0);
+		_dispc_set_plane_ba1_uv(plane, puv_addr + offset1);
+	}
+
+
 	_dispc_set_row_inc(plane, row_inc);
 	_dispc_set_pix_inc(plane, pix_inc);
 
@@ -1791,7 +1889,8 @@
 	if (plane != OMAP_DSS_GFX) {
 		_dispc_set_scaling(plane, width, height,
 				   out_width, out_height,
-				   ilace, five_taps, fieldmode);
+				   ilace, five_taps, fieldmode,
+				   color_mode, rotation);
 		_dispc_set_vid_size(plane, out_width, out_height);
 		_dispc_set_vid_color_conv(plane, cconv);
 	}
@@ -1806,7 +1905,7 @@
 
 static void _dispc_enable_plane(enum omap_plane plane, bool enable)
 {
-	REG_FLD_MOD(dispc_reg_att[plane], enable ? 1 : 0, 0, 0);
+	REG_FLD_MOD(DISPC_OVL_ATTRIBUTES(plane), enable ? 1 : 0, 0, 0);
 }
 
 static void dispc_disable_isr(void *data, u32 mask)
@@ -2353,14 +2452,20 @@
 
 unsigned long dispc_fclk_rate(void)
 {
+	struct platform_device *dsidev;
 	unsigned long r = 0;
 
 	switch (dss_get_dispc_clk_source()) {
-	case DSS_CLK_SRC_FCK:
+	case OMAP_DSS_CLK_SRC_FCK:
 		r = dss_clk_get_rate(DSS_CLK_FCK);
 		break;
-	case DSS_CLK_SRC_DSI_PLL_HSDIV_DISPC:
-		r = dsi_get_pll_hsdiv_dispc_rate();
+	case OMAP_DSS_CLK_SRC_DSI_PLL_HSDIV_DISPC:
+		dsidev = dsi_get_dsidev_from_id(0);
+		r = dsi_get_pll_hsdiv_dispc_rate(dsidev);
+		break;
+	case OMAP_DSS_CLK_SRC_DSI2_PLL_HSDIV_DISPC:
+		dsidev = dsi_get_dsidev_from_id(1);
+		r = dsi_get_pll_hsdiv_dispc_rate(dsidev);
 		break;
 	default:
 		BUG();
@@ -2371,6 +2476,7 @@
 
 unsigned long dispc_lclk_rate(enum omap_channel channel)
 {
+	struct platform_device *dsidev;
 	int lcd;
 	unsigned long r;
 	u32 l;
@@ -2380,11 +2486,16 @@
 	lcd = FLD_GET(l, 23, 16);
 
 	switch (dss_get_lcd_clk_source(channel)) {
-	case DSS_CLK_SRC_FCK:
+	case OMAP_DSS_CLK_SRC_FCK:
 		r = dss_clk_get_rate(DSS_CLK_FCK);
 		break;
-	case DSS_CLK_SRC_DSI_PLL_HSDIV_DISPC:
-		r = dsi_get_pll_hsdiv_dispc_rate();
+	case OMAP_DSS_CLK_SRC_DSI_PLL_HSDIV_DISPC:
+		dsidev = dsi_get_dsidev_from_id(0);
+		r = dsi_get_pll_hsdiv_dispc_rate(dsidev);
+		break;
+	case OMAP_DSS_CLK_SRC_DSI2_PLL_HSDIV_DISPC:
+		dsidev = dsi_get_dsidev_from_id(1);
+		r = dsi_get_pll_hsdiv_dispc_rate(dsidev);
 		break;
 	default:
 		BUG();
@@ -2412,8 +2523,8 @@
 {
 	int lcd, pcd;
 	u32 l;
-	enum dss_clk_source dispc_clk_src = dss_get_dispc_clk_source();
-	enum dss_clk_source lcd_clk_src;
+	enum omap_dss_clk_source dispc_clk_src = dss_get_dispc_clk_source();
+	enum omap_dss_clk_source lcd_clk_src;
 
 	enable_clocks(1);
 
@@ -2516,7 +2627,7 @@
 
 void dispc_dump_regs(struct seq_file *s)
 {
-#define DUMPREG(r) seq_printf(s, "%-35s %08x\n", #r, dispc_read_reg(r))
+#define DUMPREG(r) seq_printf(s, "%-50s %08x\n", #r, dispc_read_reg(r))
 
 	dss_clk_enable(DSS_CLK_ICK | DSS_CLK_FCK);
 
@@ -2528,152 +2639,227 @@
 	DUMPREG(DISPC_CONTROL);
 	DUMPREG(DISPC_CONFIG);
 	DUMPREG(DISPC_CAPABLE);
-	DUMPREG(DISPC_DEFAULT_COLOR(0));
-	DUMPREG(DISPC_DEFAULT_COLOR(1));
-	DUMPREG(DISPC_TRANS_COLOR(0));
-	DUMPREG(DISPC_TRANS_COLOR(1));
+	DUMPREG(DISPC_DEFAULT_COLOR(OMAP_DSS_CHANNEL_LCD));
+	DUMPREG(DISPC_DEFAULT_COLOR(OMAP_DSS_CHANNEL_DIGIT));
+	DUMPREG(DISPC_TRANS_COLOR(OMAP_DSS_CHANNEL_LCD));
+	DUMPREG(DISPC_TRANS_COLOR(OMAP_DSS_CHANNEL_DIGIT));
 	DUMPREG(DISPC_LINE_STATUS);
 	DUMPREG(DISPC_LINE_NUMBER);
-	DUMPREG(DISPC_TIMING_H(0));
-	DUMPREG(DISPC_TIMING_V(0));
-	DUMPREG(DISPC_POL_FREQ(0));
-	DUMPREG(DISPC_DIVISORo(0));
+	DUMPREG(DISPC_TIMING_H(OMAP_DSS_CHANNEL_LCD));
+	DUMPREG(DISPC_TIMING_V(OMAP_DSS_CHANNEL_LCD));
+	DUMPREG(DISPC_POL_FREQ(OMAP_DSS_CHANNEL_LCD));
+	DUMPREG(DISPC_DIVISORo(OMAP_DSS_CHANNEL_LCD));
 	DUMPREG(DISPC_GLOBAL_ALPHA);
-	DUMPREG(DISPC_SIZE_DIG);
-	DUMPREG(DISPC_SIZE_LCD(0));
+	DUMPREG(DISPC_SIZE_MGR(OMAP_DSS_CHANNEL_DIGIT));
+	DUMPREG(DISPC_SIZE_MGR(OMAP_DSS_CHANNEL_LCD));
 	if (dss_has_feature(FEAT_MGR_LCD2)) {
 		DUMPREG(DISPC_CONTROL2);
 		DUMPREG(DISPC_CONFIG2);
-		DUMPREG(DISPC_DEFAULT_COLOR(2));
-		DUMPREG(DISPC_TRANS_COLOR(2));
-		DUMPREG(DISPC_TIMING_H(2));
-		DUMPREG(DISPC_TIMING_V(2));
-		DUMPREG(DISPC_POL_FREQ(2));
-		DUMPREG(DISPC_DIVISORo(2));
-		DUMPREG(DISPC_SIZE_LCD(2));
+		DUMPREG(DISPC_DEFAULT_COLOR(OMAP_DSS_CHANNEL_LCD2));
+		DUMPREG(DISPC_TRANS_COLOR(OMAP_DSS_CHANNEL_LCD2));
+		DUMPREG(DISPC_TIMING_H(OMAP_DSS_CHANNEL_LCD2));
+		DUMPREG(DISPC_TIMING_V(OMAP_DSS_CHANNEL_LCD2));
+		DUMPREG(DISPC_POL_FREQ(OMAP_DSS_CHANNEL_LCD2));
+		DUMPREG(DISPC_DIVISORo(OMAP_DSS_CHANNEL_LCD2));
+		DUMPREG(DISPC_SIZE_MGR(OMAP_DSS_CHANNEL_LCD2));
 	}
 
-	DUMPREG(DISPC_GFX_BA0);
-	DUMPREG(DISPC_GFX_BA1);
-	DUMPREG(DISPC_GFX_POSITION);
-	DUMPREG(DISPC_GFX_SIZE);
-	DUMPREG(DISPC_GFX_ATTRIBUTES);
-	DUMPREG(DISPC_GFX_FIFO_THRESHOLD);
-	DUMPREG(DISPC_GFX_FIFO_SIZE_STATUS);
-	DUMPREG(DISPC_GFX_ROW_INC);
-	DUMPREG(DISPC_GFX_PIXEL_INC);
-	DUMPREG(DISPC_GFX_WINDOW_SKIP);
-	DUMPREG(DISPC_GFX_TABLE_BA);
+	DUMPREG(DISPC_OVL_BA0(OMAP_DSS_GFX));
+	DUMPREG(DISPC_OVL_BA1(OMAP_DSS_GFX));
+	DUMPREG(DISPC_OVL_POSITION(OMAP_DSS_GFX));
+	DUMPREG(DISPC_OVL_SIZE(OMAP_DSS_GFX));
+	DUMPREG(DISPC_OVL_ATTRIBUTES(OMAP_DSS_GFX));
+	DUMPREG(DISPC_OVL_FIFO_THRESHOLD(OMAP_DSS_GFX));
+	DUMPREG(DISPC_OVL_FIFO_SIZE_STATUS(OMAP_DSS_GFX));
+	DUMPREG(DISPC_OVL_ROW_INC(OMAP_DSS_GFX));
+	DUMPREG(DISPC_OVL_PIXEL_INC(OMAP_DSS_GFX));
+	DUMPREG(DISPC_OVL_WINDOW_SKIP(OMAP_DSS_GFX));
+	DUMPREG(DISPC_OVL_TABLE_BA(OMAP_DSS_GFX));
 
-	DUMPREG(DISPC_DATA_CYCLE1(0));
-	DUMPREG(DISPC_DATA_CYCLE2(0));
-	DUMPREG(DISPC_DATA_CYCLE3(0));
+	DUMPREG(DISPC_DATA_CYCLE1(OMAP_DSS_CHANNEL_LCD));
+	DUMPREG(DISPC_DATA_CYCLE2(OMAP_DSS_CHANNEL_LCD));
+	DUMPREG(DISPC_DATA_CYCLE3(OMAP_DSS_CHANNEL_LCD));
 
-	DUMPREG(DISPC_CPR_COEF_R(0));
-	DUMPREG(DISPC_CPR_COEF_G(0));
-	DUMPREG(DISPC_CPR_COEF_B(0));
+	DUMPREG(DISPC_CPR_COEF_R(OMAP_DSS_CHANNEL_LCD));
+	DUMPREG(DISPC_CPR_COEF_G(OMAP_DSS_CHANNEL_LCD));
+	DUMPREG(DISPC_CPR_COEF_B(OMAP_DSS_CHANNEL_LCD));
 	if (dss_has_feature(FEAT_MGR_LCD2)) {
-		DUMPREG(DISPC_DATA_CYCLE1(2));
-		DUMPREG(DISPC_DATA_CYCLE2(2));
-		DUMPREG(DISPC_DATA_CYCLE3(2));
+		DUMPREG(DISPC_DATA_CYCLE1(OMAP_DSS_CHANNEL_LCD2));
+		DUMPREG(DISPC_DATA_CYCLE2(OMAP_DSS_CHANNEL_LCD2));
+		DUMPREG(DISPC_DATA_CYCLE3(OMAP_DSS_CHANNEL_LCD2));
 
-		DUMPREG(DISPC_CPR_COEF_R(2));
-		DUMPREG(DISPC_CPR_COEF_G(2));
-		DUMPREG(DISPC_CPR_COEF_B(2));
+		DUMPREG(DISPC_CPR_COEF_R(OMAP_DSS_CHANNEL_LCD2));
+		DUMPREG(DISPC_CPR_COEF_G(OMAP_DSS_CHANNEL_LCD2));
+		DUMPREG(DISPC_CPR_COEF_B(OMAP_DSS_CHANNEL_LCD2));
 	}
 
-	DUMPREG(DISPC_GFX_PRELOAD);
+	DUMPREG(DISPC_OVL_PRELOAD(OMAP_DSS_GFX));
 
-	DUMPREG(DISPC_VID_BA0(0));
-	DUMPREG(DISPC_VID_BA1(0));
-	DUMPREG(DISPC_VID_POSITION(0));
-	DUMPREG(DISPC_VID_SIZE(0));
-	DUMPREG(DISPC_VID_ATTRIBUTES(0));
-	DUMPREG(DISPC_VID_FIFO_THRESHOLD(0));
-	DUMPREG(DISPC_VID_FIFO_SIZE_STATUS(0));
-	DUMPREG(DISPC_VID_ROW_INC(0));
-	DUMPREG(DISPC_VID_PIXEL_INC(0));
-	DUMPREG(DISPC_VID_FIR(0));
-	DUMPREG(DISPC_VID_PICTURE_SIZE(0));
-	DUMPREG(DISPC_VID_ACCU0(0));
-	DUMPREG(DISPC_VID_ACCU1(0));
+	DUMPREG(DISPC_OVL_BA0(OMAP_DSS_VIDEO1));
+	DUMPREG(DISPC_OVL_BA1(OMAP_DSS_VIDEO1));
+	DUMPREG(DISPC_OVL_POSITION(OMAP_DSS_VIDEO1));
+	DUMPREG(DISPC_OVL_SIZE(OMAP_DSS_VIDEO1));
+	DUMPREG(DISPC_OVL_ATTRIBUTES(OMAP_DSS_VIDEO1));
+	DUMPREG(DISPC_OVL_FIFO_THRESHOLD(OMAP_DSS_VIDEO1));
+	DUMPREG(DISPC_OVL_FIFO_SIZE_STATUS(OMAP_DSS_VIDEO1));
+	DUMPREG(DISPC_OVL_ROW_INC(OMAP_DSS_VIDEO1));
+	DUMPREG(DISPC_OVL_PIXEL_INC(OMAP_DSS_VIDEO1));
+	DUMPREG(DISPC_OVL_FIR(OMAP_DSS_VIDEO1));
+	DUMPREG(DISPC_OVL_PICTURE_SIZE(OMAP_DSS_VIDEO1));
+	DUMPREG(DISPC_OVL_ACCU0(OMAP_DSS_VIDEO1));
+	DUMPREG(DISPC_OVL_ACCU1(OMAP_DSS_VIDEO1));
 
-	DUMPREG(DISPC_VID_BA0(1));
-	DUMPREG(DISPC_VID_BA1(1));
-	DUMPREG(DISPC_VID_POSITION(1));
-	DUMPREG(DISPC_VID_SIZE(1));
-	DUMPREG(DISPC_VID_ATTRIBUTES(1));
-	DUMPREG(DISPC_VID_FIFO_THRESHOLD(1));
-	DUMPREG(DISPC_VID_FIFO_SIZE_STATUS(1));
-	DUMPREG(DISPC_VID_ROW_INC(1));
-	DUMPREG(DISPC_VID_PIXEL_INC(1));
-	DUMPREG(DISPC_VID_FIR(1));
-	DUMPREG(DISPC_VID_PICTURE_SIZE(1));
-	DUMPREG(DISPC_VID_ACCU0(1));
-	DUMPREG(DISPC_VID_ACCU1(1));
+	DUMPREG(DISPC_OVL_BA0(OMAP_DSS_VIDEO2));
+	DUMPREG(DISPC_OVL_BA1(OMAP_DSS_VIDEO2));
+	DUMPREG(DISPC_OVL_POSITION(OMAP_DSS_VIDEO2));
+	DUMPREG(DISPC_OVL_SIZE(OMAP_DSS_VIDEO2));
+	DUMPREG(DISPC_OVL_ATTRIBUTES(OMAP_DSS_VIDEO2));
+	DUMPREG(DISPC_OVL_FIFO_THRESHOLD(OMAP_DSS_VIDEO2));
+	DUMPREG(DISPC_OVL_FIFO_SIZE_STATUS(OMAP_DSS_VIDEO2));
+	DUMPREG(DISPC_OVL_ROW_INC(OMAP_DSS_VIDEO2));
+	DUMPREG(DISPC_OVL_PIXEL_INC(OMAP_DSS_VIDEO2));
+	DUMPREG(DISPC_OVL_FIR(OMAP_DSS_VIDEO2));
+	DUMPREG(DISPC_OVL_PICTURE_SIZE(OMAP_DSS_VIDEO2));
+	DUMPREG(DISPC_OVL_ACCU0(OMAP_DSS_VIDEO2));
+	DUMPREG(DISPC_OVL_ACCU1(OMAP_DSS_VIDEO2));
 
-	DUMPREG(DISPC_VID_FIR_COEF_H(0, 0));
-	DUMPREG(DISPC_VID_FIR_COEF_H(0, 1));
-	DUMPREG(DISPC_VID_FIR_COEF_H(0, 2));
-	DUMPREG(DISPC_VID_FIR_COEF_H(0, 3));
-	DUMPREG(DISPC_VID_FIR_COEF_H(0, 4));
-	DUMPREG(DISPC_VID_FIR_COEF_H(0, 5));
-	DUMPREG(DISPC_VID_FIR_COEF_H(0, 6));
-	DUMPREG(DISPC_VID_FIR_COEF_H(0, 7));
-	DUMPREG(DISPC_VID_FIR_COEF_HV(0, 0));
-	DUMPREG(DISPC_VID_FIR_COEF_HV(0, 1));
-	DUMPREG(DISPC_VID_FIR_COEF_HV(0, 2));
-	DUMPREG(DISPC_VID_FIR_COEF_HV(0, 3));
-	DUMPREG(DISPC_VID_FIR_COEF_HV(0, 4));
-	DUMPREG(DISPC_VID_FIR_COEF_HV(0, 5));
-	DUMPREG(DISPC_VID_FIR_COEF_HV(0, 6));
-	DUMPREG(DISPC_VID_FIR_COEF_HV(0, 7));
-	DUMPREG(DISPC_VID_CONV_COEF(0, 0));
-	DUMPREG(DISPC_VID_CONV_COEF(0, 1));
-	DUMPREG(DISPC_VID_CONV_COEF(0, 2));
-	DUMPREG(DISPC_VID_CONV_COEF(0, 3));
-	DUMPREG(DISPC_VID_CONV_COEF(0, 4));
-	DUMPREG(DISPC_VID_FIR_COEF_V(0, 0));
-	DUMPREG(DISPC_VID_FIR_COEF_V(0, 1));
-	DUMPREG(DISPC_VID_FIR_COEF_V(0, 2));
-	DUMPREG(DISPC_VID_FIR_COEF_V(0, 3));
-	DUMPREG(DISPC_VID_FIR_COEF_V(0, 4));
-	DUMPREG(DISPC_VID_FIR_COEF_V(0, 5));
-	DUMPREG(DISPC_VID_FIR_COEF_V(0, 6));
-	DUMPREG(DISPC_VID_FIR_COEF_V(0, 7));
+	DUMPREG(DISPC_OVL_FIR_COEF_H(OMAP_DSS_VIDEO1, 0));
+	DUMPREG(DISPC_OVL_FIR_COEF_H(OMAP_DSS_VIDEO1, 1));
+	DUMPREG(DISPC_OVL_FIR_COEF_H(OMAP_DSS_VIDEO1, 2));
+	DUMPREG(DISPC_OVL_FIR_COEF_H(OMAP_DSS_VIDEO1, 3));
+	DUMPREG(DISPC_OVL_FIR_COEF_H(OMAP_DSS_VIDEO1, 4));
+	DUMPREG(DISPC_OVL_FIR_COEF_H(OMAP_DSS_VIDEO1, 5));
+	DUMPREG(DISPC_OVL_FIR_COEF_H(OMAP_DSS_VIDEO1, 6));
+	DUMPREG(DISPC_OVL_FIR_COEF_H(OMAP_DSS_VIDEO1, 7));
+	DUMPREG(DISPC_OVL_FIR_COEF_HV(OMAP_DSS_VIDEO1, 0));
+	DUMPREG(DISPC_OVL_FIR_COEF_HV(OMAP_DSS_VIDEO1, 1));
+	DUMPREG(DISPC_OVL_FIR_COEF_HV(OMAP_DSS_VIDEO1, 2));
+	DUMPREG(DISPC_OVL_FIR_COEF_HV(OMAP_DSS_VIDEO1, 3));
+	DUMPREG(DISPC_OVL_FIR_COEF_HV(OMAP_DSS_VIDEO1, 4));
+	DUMPREG(DISPC_OVL_FIR_COEF_HV(OMAP_DSS_VIDEO1, 5));
+	DUMPREG(DISPC_OVL_FIR_COEF_HV(OMAP_DSS_VIDEO1, 6));
+	DUMPREG(DISPC_OVL_FIR_COEF_HV(OMAP_DSS_VIDEO1, 7));
+	DUMPREG(DISPC_OVL_CONV_COEF(OMAP_DSS_VIDEO1, 0));
+	DUMPREG(DISPC_OVL_CONV_COEF(OMAP_DSS_VIDEO1, 1));
+	DUMPREG(DISPC_OVL_CONV_COEF(OMAP_DSS_VIDEO1, 2));
+	DUMPREG(DISPC_OVL_CONV_COEF(OMAP_DSS_VIDEO1, 3));
+	DUMPREG(DISPC_OVL_CONV_COEF(OMAP_DSS_VIDEO1, 4));
+	DUMPREG(DISPC_OVL_FIR_COEF_V(OMAP_DSS_VIDEO1, 0));
+	DUMPREG(DISPC_OVL_FIR_COEF_V(OMAP_DSS_VIDEO1, 1));
+	DUMPREG(DISPC_OVL_FIR_COEF_V(OMAP_DSS_VIDEO1, 2));
+	DUMPREG(DISPC_OVL_FIR_COEF_V(OMAP_DSS_VIDEO1, 3));
+	DUMPREG(DISPC_OVL_FIR_COEF_V(OMAP_DSS_VIDEO1, 4));
+	DUMPREG(DISPC_OVL_FIR_COEF_V(OMAP_DSS_VIDEO1, 5));
+	DUMPREG(DISPC_OVL_FIR_COEF_V(OMAP_DSS_VIDEO1, 6));
+	DUMPREG(DISPC_OVL_FIR_COEF_V(OMAP_DSS_VIDEO1, 7));
 
-	DUMPREG(DISPC_VID_FIR_COEF_H(1, 0));
-	DUMPREG(DISPC_VID_FIR_COEF_H(1, 1));
-	DUMPREG(DISPC_VID_FIR_COEF_H(1, 2));
-	DUMPREG(DISPC_VID_FIR_COEF_H(1, 3));
-	DUMPREG(DISPC_VID_FIR_COEF_H(1, 4));
-	DUMPREG(DISPC_VID_FIR_COEF_H(1, 5));
-	DUMPREG(DISPC_VID_FIR_COEF_H(1, 6));
-	DUMPREG(DISPC_VID_FIR_COEF_H(1, 7));
-	DUMPREG(DISPC_VID_FIR_COEF_HV(1, 0));
-	DUMPREG(DISPC_VID_FIR_COEF_HV(1, 1));
-	DUMPREG(DISPC_VID_FIR_COEF_HV(1, 2));
-	DUMPREG(DISPC_VID_FIR_COEF_HV(1, 3));
-	DUMPREG(DISPC_VID_FIR_COEF_HV(1, 4));
-	DUMPREG(DISPC_VID_FIR_COEF_HV(1, 5));
-	DUMPREG(DISPC_VID_FIR_COEF_HV(1, 6));
-	DUMPREG(DISPC_VID_FIR_COEF_HV(1, 7));
-	DUMPREG(DISPC_VID_CONV_COEF(1, 0));
-	DUMPREG(DISPC_VID_CONV_COEF(1, 1));
-	DUMPREG(DISPC_VID_CONV_COEF(1, 2));
-	DUMPREG(DISPC_VID_CONV_COEF(1, 3));
-	DUMPREG(DISPC_VID_CONV_COEF(1, 4));
-	DUMPREG(DISPC_VID_FIR_COEF_V(1, 0));
-	DUMPREG(DISPC_VID_FIR_COEF_V(1, 1));
-	DUMPREG(DISPC_VID_FIR_COEF_V(1, 2));
-	DUMPREG(DISPC_VID_FIR_COEF_V(1, 3));
-	DUMPREG(DISPC_VID_FIR_COEF_V(1, 4));
-	DUMPREG(DISPC_VID_FIR_COEF_V(1, 5));
-	DUMPREG(DISPC_VID_FIR_COEF_V(1, 6));
-	DUMPREG(DISPC_VID_FIR_COEF_V(1, 7));
+	if (dss_has_feature(FEAT_HANDLE_UV_SEPARATE)) {
+		DUMPREG(DISPC_OVL_BA0_UV(OMAP_DSS_VIDEO1));
+		DUMPREG(DISPC_OVL_BA1_UV(OMAP_DSS_VIDEO1));
+		DUMPREG(DISPC_OVL_FIR2(OMAP_DSS_VIDEO1));
+		DUMPREG(DISPC_OVL_ACCU2_0(OMAP_DSS_VIDEO1));
+		DUMPREG(DISPC_OVL_ACCU2_1(OMAP_DSS_VIDEO1));
 
-	DUMPREG(DISPC_VID_PRELOAD(0));
-	DUMPREG(DISPC_VID_PRELOAD(1));
+		DUMPREG(DISPC_OVL_FIR_COEF_H2(OMAP_DSS_VIDEO1, 0));
+		DUMPREG(DISPC_OVL_FIR_COEF_H2(OMAP_DSS_VIDEO1, 1));
+		DUMPREG(DISPC_OVL_FIR_COEF_H2(OMAP_DSS_VIDEO1, 2));
+		DUMPREG(DISPC_OVL_FIR_COEF_H2(OMAP_DSS_VIDEO1, 3));
+		DUMPREG(DISPC_OVL_FIR_COEF_H2(OMAP_DSS_VIDEO1, 4));
+		DUMPREG(DISPC_OVL_FIR_COEF_H2(OMAP_DSS_VIDEO1, 5));
+		DUMPREG(DISPC_OVL_FIR_COEF_H2(OMAP_DSS_VIDEO1, 6));
+		DUMPREG(DISPC_OVL_FIR_COEF_H2(OMAP_DSS_VIDEO1, 7));
+
+		DUMPREG(DISPC_OVL_FIR_COEF_HV2(OMAP_DSS_VIDEO1, 0));
+		DUMPREG(DISPC_OVL_FIR_COEF_HV2(OMAP_DSS_VIDEO1, 1));
+		DUMPREG(DISPC_OVL_FIR_COEF_HV2(OMAP_DSS_VIDEO1, 2));
+		DUMPREG(DISPC_OVL_FIR_COEF_HV2(OMAP_DSS_VIDEO1, 3));
+		DUMPREG(DISPC_OVL_FIR_COEF_HV2(OMAP_DSS_VIDEO1, 4));
+		DUMPREG(DISPC_OVL_FIR_COEF_HV2(OMAP_DSS_VIDEO1, 5));
+		DUMPREG(DISPC_OVL_FIR_COEF_HV2(OMAP_DSS_VIDEO1, 6));
+		DUMPREG(DISPC_OVL_FIR_COEF_HV2(OMAP_DSS_VIDEO1, 7));
+
+		DUMPREG(DISPC_OVL_FIR_COEF_V2(OMAP_DSS_VIDEO1, 0));
+		DUMPREG(DISPC_OVL_FIR_COEF_V2(OMAP_DSS_VIDEO1, 1));
+		DUMPREG(DISPC_OVL_FIR_COEF_V2(OMAP_DSS_VIDEO1, 2));
+		DUMPREG(DISPC_OVL_FIR_COEF_V2(OMAP_DSS_VIDEO1, 3));
+		DUMPREG(DISPC_OVL_FIR_COEF_V2(OMAP_DSS_VIDEO1, 4));
+		DUMPREG(DISPC_OVL_FIR_COEF_V2(OMAP_DSS_VIDEO1, 5));
+		DUMPREG(DISPC_OVL_FIR_COEF_V2(OMAP_DSS_VIDEO1, 6));
+		DUMPREG(DISPC_OVL_FIR_COEF_V2(OMAP_DSS_VIDEO1, 7));
+	}
+	if (dss_has_feature(FEAT_ATTR2))
+		DUMPREG(DISPC_OVL_ATTRIBUTES2(OMAP_DSS_VIDEO1));
+
+
+	DUMPREG(DISPC_OVL_FIR_COEF_H(OMAP_DSS_VIDEO2, 0));
+	DUMPREG(DISPC_OVL_FIR_COEF_H(OMAP_DSS_VIDEO2, 1));
+	DUMPREG(DISPC_OVL_FIR_COEF_H(OMAP_DSS_VIDEO2, 2));
+	DUMPREG(DISPC_OVL_FIR_COEF_H(OMAP_DSS_VIDEO2, 3));
+	DUMPREG(DISPC_OVL_FIR_COEF_H(OMAP_DSS_VIDEO2, 4));
+	DUMPREG(DISPC_OVL_FIR_COEF_H(OMAP_DSS_VIDEO2, 5));
+	DUMPREG(DISPC_OVL_FIR_COEF_H(OMAP_DSS_VIDEO2, 6));
+	DUMPREG(DISPC_OVL_FIR_COEF_H(OMAP_DSS_VIDEO2, 7));
+	DUMPREG(DISPC_OVL_FIR_COEF_HV(OMAP_DSS_VIDEO2, 0));
+	DUMPREG(DISPC_OVL_FIR_COEF_HV(OMAP_DSS_VIDEO2, 1));
+	DUMPREG(DISPC_OVL_FIR_COEF_HV(OMAP_DSS_VIDEO2, 2));
+	DUMPREG(DISPC_OVL_FIR_COEF_HV(OMAP_DSS_VIDEO2, 3));
+	DUMPREG(DISPC_OVL_FIR_COEF_HV(OMAP_DSS_VIDEO2, 4));
+	DUMPREG(DISPC_OVL_FIR_COEF_HV(OMAP_DSS_VIDEO2, 5));
+	DUMPREG(DISPC_OVL_FIR_COEF_HV(OMAP_DSS_VIDEO2, 6));
+	DUMPREG(DISPC_OVL_FIR_COEF_HV(OMAP_DSS_VIDEO2, 7));
+	DUMPREG(DISPC_OVL_CONV_COEF(OMAP_DSS_VIDEO2, 0));
+	DUMPREG(DISPC_OVL_CONV_COEF(OMAP_DSS_VIDEO2, 1));
+	DUMPREG(DISPC_OVL_CONV_COEF(OMAP_DSS_VIDEO2, 2));
+	DUMPREG(DISPC_OVL_CONV_COEF(OMAP_DSS_VIDEO2, 3));
+	DUMPREG(DISPC_OVL_CONV_COEF(OMAP_DSS_VIDEO2, 4));
+	DUMPREG(DISPC_OVL_FIR_COEF_V(OMAP_DSS_VIDEO2, 0));
+	DUMPREG(DISPC_OVL_FIR_COEF_V(OMAP_DSS_VIDEO2, 1));
+	DUMPREG(DISPC_OVL_FIR_COEF_V(OMAP_DSS_VIDEO2, 2));
+	DUMPREG(DISPC_OVL_FIR_COEF_V(OMAP_DSS_VIDEO2, 3));
+	DUMPREG(DISPC_OVL_FIR_COEF_V(OMAP_DSS_VIDEO2, 4));
+	DUMPREG(DISPC_OVL_FIR_COEF_V(OMAP_DSS_VIDEO2, 5));
+	DUMPREG(DISPC_OVL_FIR_COEF_V(OMAP_DSS_VIDEO2, 6));
+	DUMPREG(DISPC_OVL_FIR_COEF_V(OMAP_DSS_VIDEO2, 7));
+
+	if (dss_has_feature(FEAT_HANDLE_UV_SEPARATE)) {
+		DUMPREG(DISPC_OVL_BA0_UV(OMAP_DSS_VIDEO2));
+		DUMPREG(DISPC_OVL_BA1_UV(OMAP_DSS_VIDEO2));
+		DUMPREG(DISPC_OVL_FIR2(OMAP_DSS_VIDEO2));
+		DUMPREG(DISPC_OVL_ACCU2_0(OMAP_DSS_VIDEO2));
+		DUMPREG(DISPC_OVL_ACCU2_1(OMAP_DSS_VIDEO2));
+
+		DUMPREG(DISPC_OVL_FIR_COEF_H2(OMAP_DSS_VIDEO2, 0));
+		DUMPREG(DISPC_OVL_FIR_COEF_H2(OMAP_DSS_VIDEO2, 1));
+		DUMPREG(DISPC_OVL_FIR_COEF_H2(OMAP_DSS_VIDEO2, 2));
+		DUMPREG(DISPC_OVL_FIR_COEF_H2(OMAP_DSS_VIDEO2, 3));
+		DUMPREG(DISPC_OVL_FIR_COEF_H2(OMAP_DSS_VIDEO2, 4));
+		DUMPREG(DISPC_OVL_FIR_COEF_H2(OMAP_DSS_VIDEO2, 5));
+		DUMPREG(DISPC_OVL_FIR_COEF_H2(OMAP_DSS_VIDEO2, 6));
+		DUMPREG(DISPC_OVL_FIR_COEF_H2(OMAP_DSS_VIDEO2, 7));
+
+		DUMPREG(DISPC_OVL_FIR_COEF_HV2(OMAP_DSS_VIDEO2, 0));
+		DUMPREG(DISPC_OVL_FIR_COEF_HV2(OMAP_DSS_VIDEO2, 1));
+		DUMPREG(DISPC_OVL_FIR_COEF_HV2(OMAP_DSS_VIDEO2, 2));
+		DUMPREG(DISPC_OVL_FIR_COEF_HV2(OMAP_DSS_VIDEO2, 3));
+		DUMPREG(DISPC_OVL_FIR_COEF_HV2(OMAP_DSS_VIDEO2, 4));
+		DUMPREG(DISPC_OVL_FIR_COEF_HV2(OMAP_DSS_VIDEO2, 5));
+		DUMPREG(DISPC_OVL_FIR_COEF_HV2(OMAP_DSS_VIDEO2, 6));
+		DUMPREG(DISPC_OVL_FIR_COEF_HV2(OMAP_DSS_VIDEO2, 7));
+
+		DUMPREG(DISPC_OVL_FIR_COEF_V2(OMAP_DSS_VIDEO2, 0));
+		DUMPREG(DISPC_OVL_FIR_COEF_V2(OMAP_DSS_VIDEO2, 1));
+		DUMPREG(DISPC_OVL_FIR_COEF_V2(OMAP_DSS_VIDEO2, 2));
+		DUMPREG(DISPC_OVL_FIR_COEF_V2(OMAP_DSS_VIDEO2, 3));
+		DUMPREG(DISPC_OVL_FIR_COEF_V2(OMAP_DSS_VIDEO2, 4));
+		DUMPREG(DISPC_OVL_FIR_COEF_V2(OMAP_DSS_VIDEO2, 5));
+		DUMPREG(DISPC_OVL_FIR_COEF_V2(OMAP_DSS_VIDEO2, 6));
+		DUMPREG(DISPC_OVL_FIR_COEF_V2(OMAP_DSS_VIDEO2, 7));
+	}
+	if (dss_has_feature(FEAT_ATTR2))
+		DUMPREG(DISPC_OVL_ATTRIBUTES2(OMAP_DSS_VIDEO2));
+
+	DUMPREG(DISPC_OVL_PRELOAD(OMAP_DSS_VIDEO1));
+	DUMPREG(DISPC_OVL_PRELOAD(OMAP_DSS_VIDEO2));
 
 	dss_clk_disable(DSS_CLK_ICK | DSS_CLK_FCK);
 #undef DUMPREG
@@ -3388,11 +3574,12 @@
 		       bool ilace,
 		       enum omap_dss_rotation_type rotation_type,
 		       u8 rotation, bool mirror, u8 global_alpha,
-		       u8 pre_mult_alpha, enum omap_channel channel)
+		       u8 pre_mult_alpha, enum omap_channel channel,
+		       u32 puv_addr)
 {
 	int r = 0;
 
-	DSSDBG("dispc_setup_plane %d, pa %x, sw %d, %d,%d, %dx%d -> "
+	DSSDBG("dispc_setup_plane %d, pa %x, sw %d, %d, %d, %dx%d -> "
 	       "%dx%d, ilace %d, cmode %x, rot %d, mir %d chan %d\n",
 	       plane, paddr, screen_width, pos_x, pos_y,
 	       width, height,
@@ -3411,7 +3598,8 @@
 			   rotation_type,
 			   rotation, mirror,
 			   global_alpha,
-			   pre_mult_alpha, channel);
+			   pre_mult_alpha,
+			   channel, puv_addr);
 
 	enable_clocks(0);
 
diff --git a/drivers/video/omap2/dss/dispc.h b/drivers/video/omap2/dss/dispc.h
new file mode 100644
index 0000000..6c9ee0a
--- /dev/null
+++ b/drivers/video/omap2/dss/dispc.h
@@ -0,0 +1,691 @@
+/*
+ * linux/drivers/video/omap2/dss/dispc.h
+ *
+ * Copyright (C) 2011 Texas Instruments
+ * Author: Archit Taneja <archit@ti.com>
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __OMAP2_DISPC_REG_H
+#define __OMAP2_DISPC_REG_H
+
+/* DISPC common registers */
+#define DISPC_REVISION			0x0000
+#define DISPC_SYSCONFIG			0x0010
+#define DISPC_SYSSTATUS			0x0014
+#define DISPC_IRQSTATUS			0x0018
+#define DISPC_IRQENABLE			0x001C
+#define DISPC_CONTROL			0x0040
+#define DISPC_CONFIG			0x0044
+#define DISPC_CAPABLE			0x0048
+#define DISPC_LINE_STATUS		0x005C
+#define DISPC_LINE_NUMBER		0x0060
+#define DISPC_GLOBAL_ALPHA		0x0074
+#define DISPC_CONTROL2			0x0238
+#define DISPC_CONFIG2			0x0620
+#define DISPC_DIVISOR			0x0804
+
+/* DISPC overlay registers */
+#define DISPC_OVL_BA0(n)		(DISPC_OVL_BASE(n) + \
+					DISPC_BA0_OFFSET(n))
+#define DISPC_OVL_BA1(n)		(DISPC_OVL_BASE(n) + \
+					DISPC_BA1_OFFSET(n))
+#define DISPC_OVL_BA0_UV(n)		(DISPC_OVL_BASE(n) + \
+					DISPC_BA0_UV_OFFSET(n))
+#define DISPC_OVL_BA1_UV(n)		(DISPC_OVL_BASE(n) + \
+					DISPC_BA1_UV_OFFSET(n))
+#define DISPC_OVL_POSITION(n)		(DISPC_OVL_BASE(n) + \
+					DISPC_POS_OFFSET(n))
+#define DISPC_OVL_SIZE(n)		(DISPC_OVL_BASE(n) + \
+					DISPC_SIZE_OFFSET(n))
+#define DISPC_OVL_ATTRIBUTES(n)		(DISPC_OVL_BASE(n) + \
+					DISPC_ATTR_OFFSET(n))
+#define DISPC_OVL_ATTRIBUTES2(n)	(DISPC_OVL_BASE(n) + \
+					DISPC_ATTR2_OFFSET(n))
+#define DISPC_OVL_FIFO_THRESHOLD(n)	(DISPC_OVL_BASE(n) + \
+					DISPC_FIFO_THRESH_OFFSET(n))
+#define DISPC_OVL_FIFO_SIZE_STATUS(n)	(DISPC_OVL_BASE(n) + \
+					DISPC_FIFO_SIZE_STATUS_OFFSET(n))
+#define DISPC_OVL_ROW_INC(n)		(DISPC_OVL_BASE(n) + \
+					DISPC_ROW_INC_OFFSET(n))
+#define DISPC_OVL_PIXEL_INC(n)		(DISPC_OVL_BASE(n) + \
+					DISPC_PIX_INC_OFFSET(n))
+#define DISPC_OVL_WINDOW_SKIP(n)	(DISPC_OVL_BASE(n) + \
+					DISPC_WINDOW_SKIP_OFFSET(n))
+#define DISPC_OVL_TABLE_BA(n)		(DISPC_OVL_BASE(n) + \
+					DISPC_TABLE_BA_OFFSET(n))
+#define DISPC_OVL_FIR(n)		(DISPC_OVL_BASE(n) + \
+					DISPC_FIR_OFFSET(n))
+#define DISPC_OVL_FIR2(n)		(DISPC_OVL_BASE(n) + \
+					DISPC_FIR2_OFFSET(n))
+#define DISPC_OVL_PICTURE_SIZE(n)	(DISPC_OVL_BASE(n) + \
+					DISPC_PIC_SIZE_OFFSET(n))
+#define DISPC_OVL_ACCU0(n)		(DISPC_OVL_BASE(n) + \
+					DISPC_ACCU0_OFFSET(n))
+#define DISPC_OVL_ACCU1(n)		(DISPC_OVL_BASE(n) + \
+					DISPC_ACCU1_OFFSET(n))
+#define DISPC_OVL_ACCU2_0(n)		(DISPC_OVL_BASE(n) + \
+					DISPC_ACCU2_0_OFFSET(n))
+#define DISPC_OVL_ACCU2_1(n)		(DISPC_OVL_BASE(n) + \
+					DISPC_ACCU2_1_OFFSET(n))
+#define DISPC_OVL_FIR_COEF_H(n, i)	(DISPC_OVL_BASE(n) + \
+					DISPC_FIR_COEF_H_OFFSET(n, i))
+#define DISPC_OVL_FIR_COEF_HV(n, i)	(DISPC_OVL_BASE(n) + \
+					DISPC_FIR_COEF_HV_OFFSET(n, i))
+#define DISPC_OVL_FIR_COEF_H2(n, i)	(DISPC_OVL_BASE(n) + \
+					DISPC_FIR_COEF_H2_OFFSET(n, i))
+#define DISPC_OVL_FIR_COEF_HV2(n, i)	(DISPC_OVL_BASE(n) + \
+					DISPC_FIR_COEF_HV2_OFFSET(n, i))
+#define DISPC_OVL_CONV_COEF(n, i)	(DISPC_OVL_BASE(n) + \
+					DISPC_CONV_COEF_OFFSET(n, i))
+#define DISPC_OVL_FIR_COEF_V(n, i)	(DISPC_OVL_BASE(n) + \
+					DISPC_FIR_COEF_V_OFFSET(n, i))
+#define DISPC_OVL_FIR_COEF_V2(n, i)	(DISPC_OVL_BASE(n) + \
+					DISPC_FIR_COEF_V2_OFFSET(n, i))
+#define DISPC_OVL_PRELOAD(n)		(DISPC_OVL_BASE(n) + \
+					DISPC_PRELOAD_OFFSET(n))
+
+/* DISPC manager/channel specific registers */
+static inline u16 DISPC_DEFAULT_COLOR(enum omap_channel channel)
+{
+	switch (channel) {
+	case OMAP_DSS_CHANNEL_LCD:
+		return 0x004C;
+	case OMAP_DSS_CHANNEL_DIGIT:
+		return 0x0050;
+	case OMAP_DSS_CHANNEL_LCD2:
+		return 0x03AC;
+	default:
+		BUG();
+	}
+}
+
+static inline u16 DISPC_TRANS_COLOR(enum omap_channel channel)
+{
+	switch (channel) {
+	case OMAP_DSS_CHANNEL_LCD:
+		return 0x0054;
+	case OMAP_DSS_CHANNEL_DIGIT:
+		return 0x0058;
+	case OMAP_DSS_CHANNEL_LCD2:
+		return 0x03B0;
+	default:
+		BUG();
+	}
+}
+
+static inline u16 DISPC_TIMING_H(enum omap_channel channel)
+{
+	switch (channel) {
+	case OMAP_DSS_CHANNEL_LCD:
+		return 0x0064;
+	case OMAP_DSS_CHANNEL_DIGIT:
+		BUG();
+	case OMAP_DSS_CHANNEL_LCD2:
+		return 0x0400;
+	default:
+		BUG();
+	}
+}
+
+static inline u16 DISPC_TIMING_V(enum omap_channel channel)
+{
+	switch (channel) {
+	case OMAP_DSS_CHANNEL_LCD:
+		return 0x0068;
+	case OMAP_DSS_CHANNEL_DIGIT:
+		BUG();
+	case OMAP_DSS_CHANNEL_LCD2:
+		return 0x0404;
+	default:
+		BUG();
+	}
+}
+
+static inline u16 DISPC_POL_FREQ(enum omap_channel channel)
+{
+	switch (channel) {
+	case OMAP_DSS_CHANNEL_LCD:
+		return 0x006C;
+	case OMAP_DSS_CHANNEL_DIGIT:
+		BUG();
+	case OMAP_DSS_CHANNEL_LCD2:
+		return 0x0408;
+	default:
+		BUG();
+	}
+}
+
+static inline u16 DISPC_DIVISORo(enum omap_channel channel)
+{
+	switch (channel) {
+	case OMAP_DSS_CHANNEL_LCD:
+		return 0x0070;
+	case OMAP_DSS_CHANNEL_DIGIT:
+		BUG();
+	case OMAP_DSS_CHANNEL_LCD2:
+		return 0x040C;
+	default:
+		BUG();
+	}
+}
+
+/* Named as DISPC_SIZE_LCD, DISPC_SIZE_DIGIT and DISPC_SIZE_LCD2 in TRM */
+static inline u16 DISPC_SIZE_MGR(enum omap_channel channel)
+{
+	switch (channel) {
+	case OMAP_DSS_CHANNEL_LCD:
+		return 0x007C;
+	case OMAP_DSS_CHANNEL_DIGIT:
+		return 0x0078;
+	case OMAP_DSS_CHANNEL_LCD2:
+		return 0x03CC;
+	default:
+		BUG();
+	}
+}
+
+static inline u16 DISPC_DATA_CYCLE1(enum omap_channel channel)
+{
+	switch (channel) {
+	case OMAP_DSS_CHANNEL_LCD:
+		return 0x01D4;
+	case OMAP_DSS_CHANNEL_DIGIT:
+		BUG();
+	case OMAP_DSS_CHANNEL_LCD2:
+		return 0x03C0;
+	default:
+		BUG();
+	}
+}
+
+static inline u16 DISPC_DATA_CYCLE2(enum omap_channel channel)
+{
+	switch (channel) {
+	case OMAP_DSS_CHANNEL_LCD:
+		return 0x01D8;
+	case OMAP_DSS_CHANNEL_DIGIT:
+		BUG();
+	case OMAP_DSS_CHANNEL_LCD2:
+		return 0x03C4;
+	default:
+		BUG();
+	}
+}
+
+static inline u16 DISPC_DATA_CYCLE3(enum omap_channel channel)
+{
+	switch (channel) {
+	case OMAP_DSS_CHANNEL_LCD:
+		return 0x01DC;
+	case OMAP_DSS_CHANNEL_DIGIT:
+		BUG();
+	case OMAP_DSS_CHANNEL_LCD2:
+		return 0x03C8;
+	default:
+		BUG();
+	}
+}
+
+static inline u16 DISPC_CPR_COEF_R(enum omap_channel channel)
+{
+	switch (channel) {
+	case OMAP_DSS_CHANNEL_LCD:
+		return 0x0220;
+	case OMAP_DSS_CHANNEL_DIGIT:
+		BUG();
+	case OMAP_DSS_CHANNEL_LCD2:
+		return 0x03BC;
+	default:
+		BUG();
+	}
+}
+
+static inline u16 DISPC_CPR_COEF_G(enum omap_channel channel)
+{
+	switch (channel) {
+	case OMAP_DSS_CHANNEL_LCD:
+		return 0x0224;
+	case OMAP_DSS_CHANNEL_DIGIT:
+		BUG();
+	case OMAP_DSS_CHANNEL_LCD2:
+		return 0x03B8;
+	default:
+		BUG();
+	}
+}
+
+static inline u16 DISPC_CPR_COEF_B(enum omap_channel channel)
+{
+	switch (channel) {
+	case OMAP_DSS_CHANNEL_LCD:
+		return 0x0228;
+	case OMAP_DSS_CHANNEL_DIGIT:
+		BUG();
+	case OMAP_DSS_CHANNEL_LCD2:
+		return 0x03B4;
+	default:
+		BUG();
+	}
+}
+
+/* DISPC overlay register base addresses */
+static inline u16 DISPC_OVL_BASE(enum omap_plane plane)
+{
+	switch (plane) {
+	case OMAP_DSS_GFX:
+		return 0x0080;
+	case OMAP_DSS_VIDEO1:
+		return 0x00BC;
+	case OMAP_DSS_VIDEO2:
+		return 0x014C;
+	default:
+		BUG();
+	}
+}
+
+/* DISPC overlay register offsets */
+static inline u16 DISPC_BA0_OFFSET(enum omap_plane plane)
+{
+	switch (plane) {
+	case OMAP_DSS_GFX:
+	case OMAP_DSS_VIDEO1:
+	case OMAP_DSS_VIDEO2:
+		return 0x0000;
+	default:
+		BUG();
+	}
+}
+
+static inline u16 DISPC_BA1_OFFSET(enum omap_plane plane)
+{
+	switch (plane) {
+	case OMAP_DSS_GFX:
+	case OMAP_DSS_VIDEO1:
+	case OMAP_DSS_VIDEO2:
+		return 0x0004;
+	default:
+		BUG();
+	}
+}
+
+static inline u16 DISPC_BA0_UV_OFFSET(enum omap_plane plane)
+{
+	switch (plane) {
+	case OMAP_DSS_GFX:
+		BUG();
+	case OMAP_DSS_VIDEO1:
+		return 0x0544;
+	case OMAP_DSS_VIDEO2:
+		return 0x04BC;
+	default:
+		BUG();
+	}
+}
+
+static inline u16 DISPC_BA1_UV_OFFSET(enum omap_plane plane)
+{
+	switch (plane) {
+	case OMAP_DSS_GFX:
+		BUG();
+	case OMAP_DSS_VIDEO1:
+		return 0x0548;
+	case OMAP_DSS_VIDEO2:
+		return 0x04C0;
+	default:
+		BUG();
+	}
+}
+
+static inline u16 DISPC_POS_OFFSET(enum omap_plane plane)
+{
+	switch (plane) {
+	case OMAP_DSS_GFX:
+	case OMAP_DSS_VIDEO1:
+	case OMAP_DSS_VIDEO2:
+		return 0x0008;
+	default:
+		BUG();
+	}
+}
+
+static inline u16 DISPC_SIZE_OFFSET(enum omap_plane plane)
+{
+	switch (plane) {
+	case OMAP_DSS_GFX:
+	case OMAP_DSS_VIDEO1:
+	case OMAP_DSS_VIDEO2:
+		return 0x000C;
+	default:
+		BUG();
+	}
+}
+
+static inline u16 DISPC_ATTR_OFFSET(enum omap_plane plane)
+{
+	switch (plane) {
+	case OMAP_DSS_GFX:
+		return 0x0020;
+	case OMAP_DSS_VIDEO1:
+	case OMAP_DSS_VIDEO2:
+		return 0x0010;
+	default:
+		BUG();
+	}
+}
+
+static inline u16 DISPC_ATTR2_OFFSET(enum omap_plane plane)
+{
+	switch (plane) {
+	case OMAP_DSS_GFX:
+		BUG();
+	case OMAP_DSS_VIDEO1:
+		return 0x0568;
+	case OMAP_DSS_VIDEO2:
+		return 0x04DC;
+	default:
+		BUG();
+	}
+}
+
+static inline u16 DISPC_FIFO_THRESH_OFFSET(enum omap_plane plane)
+{
+	switch (plane) {
+	case OMAP_DSS_GFX:
+		return 0x0024;
+	case OMAP_DSS_VIDEO1:
+	case OMAP_DSS_VIDEO2:
+		return 0x0014;
+	default:
+		BUG();
+	}
+}
+
+static inline u16 DISPC_FIFO_SIZE_STATUS_OFFSET(enum omap_plane plane)
+{
+	switch (plane) {
+	case OMAP_DSS_GFX:
+		return 0x0028;
+	case OMAP_DSS_VIDEO1:
+	case OMAP_DSS_VIDEO2:
+		return 0x0018;
+	default:
+		BUG();
+	}
+}
+
+static inline u16 DISPC_ROW_INC_OFFSET(enum omap_plane plane)
+{
+	switch (plane) {
+	case OMAP_DSS_GFX:
+		return 0x002C;
+	case OMAP_DSS_VIDEO1:
+	case OMAP_DSS_VIDEO2:
+		return 0x001C;
+	default:
+		BUG();
+	}
+}
+
+static inline u16 DISPC_PIX_INC_OFFSET(enum omap_plane plane)
+{
+	switch (plane) {
+	case OMAP_DSS_GFX:
+		return 0x0030;
+	case OMAP_DSS_VIDEO1:
+	case OMAP_DSS_VIDEO2:
+		return 0x0020;
+	default:
+		BUG();
+	}
+}
+
+static inline u16 DISPC_WINDOW_SKIP_OFFSET(enum omap_plane plane)
+{
+	switch (plane) {
+	case OMAP_DSS_GFX:
+		return 0x0034;
+	case OMAP_DSS_VIDEO1:
+	case OMAP_DSS_VIDEO2:
+		BUG();
+	default:
+		BUG();
+	}
+}
+
+static inline u16 DISPC_TABLE_BA_OFFSET(enum omap_plane plane)
+{
+	switch (plane) {
+	case OMAP_DSS_GFX:
+		return 0x0038;
+	case OMAP_DSS_VIDEO1:
+	case OMAP_DSS_VIDEO2:
+		BUG();
+	default:
+		BUG();
+	}
+}
+
+static inline u16 DISPC_FIR_OFFSET(enum omap_plane plane)
+{
+	switch (plane) {
+	case OMAP_DSS_GFX:
+		BUG();
+	case OMAP_DSS_VIDEO1:
+	case OMAP_DSS_VIDEO2:
+		return 0x0024;
+	default:
+		BUG();
+	}
+}
+
+static inline u16 DISPC_FIR2_OFFSET(enum omap_plane plane)
+{
+	switch (plane) {
+	case OMAP_DSS_GFX:
+		BUG();
+	case OMAP_DSS_VIDEO1:
+		return 0x0580;
+	case OMAP_DSS_VIDEO2:
+		return 0x055C;
+	default:
+		BUG();
+	}
+}
+
+static inline u16 DISPC_PIC_SIZE_OFFSET(enum omap_plane plane)
+{
+	switch (plane) {
+	case OMAP_DSS_GFX:
+		BUG();
+	case OMAP_DSS_VIDEO1:
+	case OMAP_DSS_VIDEO2:
+		return 0x0028;
+	default:
+		BUG();
+	}
+}
+
+
+static inline u16 DISPC_ACCU0_OFFSET(enum omap_plane plane)
+{
+	switch (plane) {
+	case OMAP_DSS_GFX:
+		BUG();
+	case OMAP_DSS_VIDEO1:
+	case OMAP_DSS_VIDEO2:
+		return 0x002C;
+	default:
+		BUG();
+	}
+}
+
+static inline u16 DISPC_ACCU2_0_OFFSET(enum omap_plane plane)
+{
+	switch (plane) {
+	case OMAP_DSS_GFX:
+		BUG();
+	case OMAP_DSS_VIDEO1:
+		return 0x0584;
+	case OMAP_DSS_VIDEO2:
+		return 0x0560;
+	default:
+		BUG();
+	}
+}
+
+static inline u16 DISPC_ACCU1_OFFSET(enum omap_plane plane)
+{
+	switch (plane) {
+	case OMAP_DSS_GFX:
+		BUG();
+	case OMAP_DSS_VIDEO1:
+	case OMAP_DSS_VIDEO2:
+		return 0x0030;
+	default:
+		BUG();
+	}
+}
+
+static inline u16 DISPC_ACCU2_1_OFFSET(enum omap_plane plane)
+{
+	switch (plane) {
+	case OMAP_DSS_GFX:
+		BUG();
+	case OMAP_DSS_VIDEO1:
+		return 0x0588;
+	case OMAP_DSS_VIDEO2:
+		return 0x0564;
+	default:
+		BUG();
+	}
+}
+
+/* coef index i = {0, 1, 2, 3, 4, 5, 6, 7} */
+static inline u16 DISPC_FIR_COEF_H_OFFSET(enum omap_plane plane, u16 i)
+{
+	switch (plane) {
+	case OMAP_DSS_GFX:
+		BUG();
+	case OMAP_DSS_VIDEO1:
+	case OMAP_DSS_VIDEO2:
+		return 0x0034 + i * 0x8;
+	default:
+		BUG();
+	}
+}
+
+/* coef index i = {0, 1, 2, 3, 4, 5, 6, 7} */
+static inline u16 DISPC_FIR_COEF_H2_OFFSET(enum omap_plane plane, u16 i)
+{
+	switch (plane) {
+	case OMAP_DSS_GFX:
+		BUG();
+	case OMAP_DSS_VIDEO1:
+		return 0x058C + i * 0x8;
+	case OMAP_DSS_VIDEO2:
+		return 0x0568 + i * 0x8;
+	default:
+		BUG();
+	}
+}
+
+/* coef index i = {0, 1, 2, 3, 4, 5, 6, 7} */
+static inline u16 DISPC_FIR_COEF_HV_OFFSET(enum omap_plane plane, u16 i)
+{
+	switch (plane) {
+	case OMAP_DSS_GFX:
+		BUG();
+	case OMAP_DSS_VIDEO1:
+	case OMAP_DSS_VIDEO2:
+		return 0x0038 + i * 0x8;
+	default:
+		BUG();
+	}
+}
+
+/* coef index i = {0, 1, 2, 3, 4, 5, 6, 7} */
+static inline u16 DISPC_FIR_COEF_HV2_OFFSET(enum omap_plane plane, u16 i)
+{
+	switch (plane) {
+	case OMAP_DSS_GFX:
+		BUG();
+	case OMAP_DSS_VIDEO1:
+		return 0x0590 + i * 8;
+	case OMAP_DSS_VIDEO2:
+		return 0x056C + i * 0x8;
+	default:
+		BUG();
+	}
+}
+
+/* coef index i = {0, 1, 2, 3, 4,} */
+static inline u16 DISPC_CONV_COEF_OFFSET(enum omap_plane plane, u16 i)
+{
+	switch (plane) {
+	case OMAP_DSS_GFX:
+		BUG();
+	case OMAP_DSS_VIDEO1:
+	case OMAP_DSS_VIDEO2:
+		return 0x0074 + i * 0x4;
+	default:
+		BUG();
+	}
+}
+
+/* coef index i = {0, 1, 2, 3, 4, 5, 6, 7} */
+static inline u16 DISPC_FIR_COEF_V_OFFSET(enum omap_plane plane, u16 i)
+{
+	switch (plane) {
+	case OMAP_DSS_GFX:
+		BUG();
+	case OMAP_DSS_VIDEO1:
+		return 0x0124 + i * 0x4;
+	case OMAP_DSS_VIDEO2:
+		return 0x00B4 + i * 0x4;
+	default:
+		BUG();
+	}
+}
+
+/* coef index i = {0, 1, 2, 3, 4, 5, 6, 7} */
+static inline u16 DISPC_FIR_COEF_V2_OFFSET(enum omap_plane plane, u16 i)
+{
+	switch (plane) {
+	case OMAP_DSS_GFX:
+		BUG();
+	case OMAP_DSS_VIDEO1:
+		return 0x05CC + i * 0x4;
+	case OMAP_DSS_VIDEO2:
+		return 0x05A8 + i * 0x4;
+	default:
+		BUG();
+	}
+}
+
+static inline u16 DISPC_PRELOAD_OFFSET(enum omap_plane plane)
+{
+	switch (plane) {
+	case OMAP_DSS_GFX:
+		return 0x01AC;
+	case OMAP_DSS_VIDEO1:
+		return 0x0174;
+	case OMAP_DSS_VIDEO2:
+		return 0x00E8;
+	default:
+		BUG();
+	}
+}
+#endif
diff --git a/drivers/video/omap2/dss/display.c b/drivers/video/omap2/dss/display.c
index a85a6f3..c2dfc8c 100644
--- a/drivers/video/omap2/dss/display.c
+++ b/drivers/video/omap2/dss/display.c
@@ -27,7 +27,7 @@
 #include <linux/jiffies.h>
 #include <linux/platform_device.h>
 
-#include <plat/display.h>
+#include <video/omapdss.h>
 #include "dss.h"
 
 static ssize_t display_enabled_show(struct device *dev,
@@ -44,9 +44,13 @@
 		const char *buf, size_t size)
 {
 	struct omap_dss_device *dssdev = to_dss_device(dev);
-	bool enabled, r;
+	int r, enabled;
 
-	enabled = simple_strtoul(buf, NULL, 10);
+	r = kstrtoint(buf, 0, &enabled);
+	if (r)
+		return r;
+
+	enabled = !!enabled;
 
 	if (enabled != (dssdev->state != OMAP_DSS_DISPLAY_DISABLED)) {
 		if (enabled) {
@@ -82,7 +86,9 @@
 	if (!dssdev->driver->set_update_mode)
 		return -EINVAL;
 
-	val = simple_strtoul(buf, NULL, 10);
+	r = kstrtoint(buf, 0, &val);
+	if (r)
+		return r;
 
 	switch (val) {
 	case OMAP_DSS_UPDATE_DISABLED:
@@ -114,13 +120,16 @@
 		struct device_attribute *attr, const char *buf, size_t size)
 {
 	struct omap_dss_device *dssdev = to_dss_device(dev);
-	unsigned long te;
-	int r;
+	int te, r;
 
 	if (!dssdev->driver->enable_te || !dssdev->driver->get_te)
 		return -ENOENT;
 
-	te = simple_strtoul(buf, NULL, 0);
+	r = kstrtoint(buf, 0, &te);
+	if (r)
+		return r;
+
+	te = !!te;
 
 	r = dssdev->driver->enable_te(dssdev, te);
 	if (r)
@@ -196,13 +205,14 @@
 		struct device_attribute *attr, const char *buf, size_t size)
 {
 	struct omap_dss_device *dssdev = to_dss_device(dev);
-	unsigned long rot;
-	int r;
+	int rot, r;
 
 	if (!dssdev->driver->set_rotate || !dssdev->driver->get_rotate)
 		return -ENOENT;
 
-	rot = simple_strtoul(buf, NULL, 0);
+	r = kstrtoint(buf, 0, &rot);
+	if (r)
+		return r;
 
 	r = dssdev->driver->set_rotate(dssdev, rot);
 	if (r)
@@ -226,13 +236,16 @@
 		struct device_attribute *attr, const char *buf, size_t size)
 {
 	struct omap_dss_device *dssdev = to_dss_device(dev);
-	unsigned long mirror;
-	int r;
+	int mirror, r;
 
 	if (!dssdev->driver->set_mirror || !dssdev->driver->get_mirror)
 		return -ENOENT;
 
-	mirror = simple_strtoul(buf, NULL, 0);
+	r = kstrtoint(buf, 0, &mirror);
+	if (r)
+		return r;
+
+	mirror = !!mirror;
 
 	r = dssdev->driver->set_mirror(dssdev, mirror);
 	if (r)
@@ -259,14 +272,15 @@
 		struct device_attribute *attr, const char *buf, size_t size)
 {
 	struct omap_dss_device *dssdev = to_dss_device(dev);
-	unsigned long wss;
+	u32 wss;
 	int r;
 
 	if (!dssdev->driver->get_wss || !dssdev->driver->set_wss)
 		return -ENOENT;
 
-	if (strict_strtoul(buf, 0, &wss))
-		return -EINVAL;
+	r = kstrtou32(buf, 0, &wss);
+	if (r)
+		return r;
 
 	if (wss > 0xfffff)
 		return -EINVAL;
diff --git a/drivers/video/omap2/dss/dpi.c b/drivers/video/omap2/dss/dpi.c
index 2d3ca4c..ff6bd30 100644
--- a/drivers/video/omap2/dss/dpi.c
+++ b/drivers/video/omap2/dss/dpi.c
@@ -30,16 +30,40 @@
 #include <linux/platform_device.h>
 #include <linux/regulator/consumer.h>
 
-#include <plat/display.h>
+#include <video/omapdss.h>
 #include <plat/cpu.h>
 
 #include "dss.h"
 
 static struct {
 	struct regulator *vdds_dsi_reg;
+	struct platform_device *dsidev;
 } dpi;
 
-#ifdef CONFIG_OMAP2_DSS_USE_DSI_PLL
+static struct platform_device *dpi_get_dsidev(enum omap_dss_clk_source clk)
+{
+	int dsi_module;
+
+	dsi_module = clk == OMAP_DSS_CLK_SRC_DSI_PLL_HSDIV_DISPC ? 0 : 1;
+
+	return dsi_get_dsidev_from_id(dsi_module);
+}
+
+static bool dpi_use_dsi_pll(struct omap_dss_device *dssdev)
+{
+	if (dssdev->clocks.dispc.dispc_fclk_src ==
+			OMAP_DSS_CLK_SRC_DSI_PLL_HSDIV_DISPC ||
+			dssdev->clocks.dispc.dispc_fclk_src ==
+			OMAP_DSS_CLK_SRC_DSI2_PLL_HSDIV_DISPC ||
+			dssdev->clocks.dispc.channel.lcd_clk_src ==
+			OMAP_DSS_CLK_SRC_DSI_PLL_HSDIV_DISPC ||
+			dssdev->clocks.dispc.channel.lcd_clk_src ==
+			OMAP_DSS_CLK_SRC_DSI2_PLL_HSDIV_DISPC)
+		return true;
+	else
+		return false;
+}
+
 static int dpi_set_dsi_clk(struct omap_dss_device *dssdev, bool is_tft,
 		unsigned long pck_req, unsigned long *fck, int *lck_div,
 		int *pck_div)
@@ -48,16 +72,16 @@
 	struct dispc_clock_info dispc_cinfo;
 	int r;
 
-	r = dsi_pll_calc_clock_div_pck(is_tft, pck_req, &dsi_cinfo,
-			&dispc_cinfo);
+	r = dsi_pll_calc_clock_div_pck(dpi.dsidev, is_tft, pck_req,
+			&dsi_cinfo, &dispc_cinfo);
 	if (r)
 		return r;
 
-	r = dsi_pll_set_clock_div(&dsi_cinfo);
+	r = dsi_pll_set_clock_div(dpi.dsidev, &dsi_cinfo);
 	if (r)
 		return r;
 
-	dss_select_dispc_clk_source(DSS_CLK_SRC_DSI_PLL_HSDIV_DISPC);
+	dss_select_dispc_clk_source(dssdev->clocks.dispc.dispc_fclk_src);
 
 	r = dispc_set_clock_div(dssdev->manager->id, &dispc_cinfo);
 	if (r)
@@ -69,7 +93,7 @@
 
 	return 0;
 }
-#else
+
 static int dpi_set_dispc_clk(struct omap_dss_device *dssdev, bool is_tft,
 		unsigned long pck_req, unsigned long *fck, int *lck_div,
 		int *pck_div)
@@ -96,13 +120,12 @@
 
 	return 0;
 }
-#endif
 
 static int dpi_set_mode(struct omap_dss_device *dssdev)
 {
 	struct omap_video_timings *t = &dssdev->panel.timings;
-	int lck_div, pck_div;
-	unsigned long fck;
+	int lck_div = 0, pck_div = 0;
+	unsigned long fck = 0;
 	unsigned long pck;
 	bool is_tft;
 	int r = 0;
@@ -114,13 +137,12 @@
 
 	is_tft = (dssdev->panel.config & OMAP_DSS_LCD_TFT) != 0;
 
-#ifdef CONFIG_OMAP2_DSS_USE_DSI_PLL
-	r = dpi_set_dsi_clk(dssdev, is_tft, t->pixel_clock * 1000, &fck,
-			&lck_div, &pck_div);
-#else
-	r = dpi_set_dispc_clk(dssdev, is_tft, t->pixel_clock * 1000, &fck,
-			&lck_div, &pck_div);
-#endif
+	if (dpi_use_dsi_pll(dssdev))
+		r = dpi_set_dsi_clk(dssdev, is_tft, t->pixel_clock * 1000,
+				&fck, &lck_div, &pck_div);
+	else
+		r = dpi_set_dispc_clk(dssdev, is_tft, t->pixel_clock * 1000,
+				&fck, &lck_div, &pck_div);
 	if (r)
 		goto err0;
 
@@ -179,12 +201,13 @@
 	if (r)
 		goto err2;
 
-#ifdef CONFIG_OMAP2_DSS_USE_DSI_PLL
-	dss_clk_enable(DSS_CLK_SYSCK);
-	r = dsi_pll_init(dssdev, 0, 1);
-	if (r)
-		goto err3;
-#endif
+	if (dpi_use_dsi_pll(dssdev)) {
+		dss_clk_enable(DSS_CLK_SYSCK);
+		r = dsi_pll_init(dpi.dsidev, 0, 1);
+		if (r)
+			goto err3;
+	}
+
 	r = dpi_set_mode(dssdev);
 	if (r)
 		goto err4;
@@ -196,11 +219,11 @@
 	return 0;
 
 err4:
-#ifdef CONFIG_OMAP2_DSS_USE_DSI_PLL
-	dsi_pll_uninit();
+	if (dpi_use_dsi_pll(dssdev))
+		dsi_pll_uninit(dpi.dsidev, true);
 err3:
-	dss_clk_disable(DSS_CLK_SYSCK);
-#endif
+	if (dpi_use_dsi_pll(dssdev))
+		dss_clk_disable(DSS_CLK_SYSCK);
 err2:
 	dss_clk_disable(DSS_CLK_ICK | DSS_CLK_FCK);
 	if (cpu_is_omap34xx())
@@ -216,11 +239,11 @@
 {
 	dssdev->manager->disable(dssdev->manager);
 
-#ifdef CONFIG_OMAP2_DSS_USE_DSI_PLL
-	dss_select_dispc_clk_source(DSS_CLK_SRC_FCK);
-	dsi_pll_uninit();
-	dss_clk_disable(DSS_CLK_SYSCK);
-#endif
+	if (dpi_use_dsi_pll(dssdev)) {
+		dss_select_dispc_clk_source(OMAP_DSS_CLK_SRC_FCK);
+		dsi_pll_uninit(dpi.dsidev, true);
+		dss_clk_disable(DSS_CLK_SYSCK);
+	}
 
 	dss_clk_disable(DSS_CLK_ICK | DSS_CLK_FCK);
 
@@ -251,6 +274,7 @@
 	int lck_div, pck_div;
 	unsigned long fck;
 	unsigned long pck;
+	struct dispc_clock_info dispc_cinfo;
 
 	if (!dispc_lcd_timings_ok(timings))
 		return -EINVAL;
@@ -260,11 +284,9 @@
 
 	is_tft = (dssdev->panel.config & OMAP_DSS_LCD_TFT) != 0;
 
-#ifdef CONFIG_OMAP2_DSS_USE_DSI_PLL
-	{
+	if (dpi_use_dsi_pll(dssdev)) {
 		struct dsi_clock_info dsi_cinfo;
-		struct dispc_clock_info dispc_cinfo;
-		r = dsi_pll_calc_clock_div_pck(is_tft,
+		r = dsi_pll_calc_clock_div_pck(dpi.dsidev, is_tft,
 				timings->pixel_clock * 1000,
 				&dsi_cinfo, &dispc_cinfo);
 
@@ -272,13 +294,8 @@
 			return r;
 
 		fck = dsi_cinfo.dsi_pll_hsdiv_dispc_clk;
-		lck_div = dispc_cinfo.lck_div;
-		pck_div = dispc_cinfo.pck_div;
-	}
-#else
-	{
+	} else {
 		struct dss_clock_info dss_cinfo;
-		struct dispc_clock_info dispc_cinfo;
 		r = dss_calc_clock_div(is_tft, timings->pixel_clock * 1000,
 				&dss_cinfo, &dispc_cinfo);
 
@@ -286,10 +303,10 @@
 			return r;
 
 		fck = dss_cinfo.fck;
-		lck_div = dispc_cinfo.lck_div;
-		pck_div = dispc_cinfo.pck_div;
 	}
-#endif
+
+	lck_div = dispc_cinfo.lck_div;
+	pck_div = dispc_cinfo.pck_div;
 
 	pck = fck / lck_div / pck_div / 1000;
 
@@ -316,6 +333,12 @@
 		dpi.vdds_dsi_reg = vdds_dsi;
 	}
 
+	if (dpi_use_dsi_pll(dssdev)) {
+		enum omap_dss_clk_source dispc_fclk_src =
+			dssdev->clocks.dispc.dispc_fclk_src;
+		dpi.dsidev = dpi_get_dsidev(dispc_fclk_src);
+	}
+
 	return 0;
 }
 
diff --git a/drivers/video/omap2/dss/dsi.c b/drivers/video/omap2/dss/dsi.c
index 0a7f1a4..345757c 100644
--- a/drivers/video/omap2/dss/dsi.c
+++ b/drivers/video/omap2/dss/dsi.c
@@ -33,8 +33,11 @@
 #include <linux/regulator/consumer.h>
 #include <linux/wait.h>
 #include <linux/workqueue.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/debugfs.h>
 
-#include <plat/display.h>
+#include <video/omapdss.h>
 #include <plat/clock.h>
 
 #include "dss.h"
@@ -56,6 +59,7 @@
 #define DSI_IRQSTATUS			DSI_REG(0x0018)
 #define DSI_IRQENABLE			DSI_REG(0x001C)
 #define DSI_CTRL			DSI_REG(0x0040)
+#define DSI_GNQ				DSI_REG(0x0044)
 #define DSI_COMPLEXIO_CFG1		DSI_REG(0x0048)
 #define DSI_COMPLEXIO_IRQ_STATUS	DSI_REG(0x004C)
 #define DSI_COMPLEXIO_IRQ_ENABLE	DSI_REG(0x0050)
@@ -90,6 +94,7 @@
 #define DSI_DSIPHY_CFG1			DSI_REG(0x200 + 0x0004)
 #define DSI_DSIPHY_CFG2			DSI_REG(0x200 + 0x0008)
 #define DSI_DSIPHY_CFG5			DSI_REG(0x200 + 0x0014)
+#define DSI_DSIPHY_CFG10		DSI_REG(0x200 + 0x0028)
 
 /* DSI_PLL_CTRL_SCP */
 
@@ -99,11 +104,11 @@
 #define DSI_PLL_CONFIGURATION1		DSI_REG(0x300 + 0x000C)
 #define DSI_PLL_CONFIGURATION2		DSI_REG(0x300 + 0x0010)
 
-#define REG_GET(idx, start, end) \
-	FLD_GET(dsi_read_reg(idx), start, end)
+#define REG_GET(dsidev, idx, start, end) \
+	FLD_GET(dsi_read_reg(dsidev, idx), start, end)
 
-#define REG_FLD_MOD(idx, val, start, end) \
-	dsi_write_reg(idx, FLD_MOD(dsi_read_reg(idx), val, start, end))
+#define REG_FLD_MOD(dsidev, idx, val, start, end) \
+	dsi_write_reg(dsidev, idx, FLD_MOD(dsi_read_reg(dsidev, idx), val, start, end))
 
 /* Global interrupts */
 #define DSI_IRQ_VC0		(1 << 0)
@@ -147,31 +152,50 @@
 #define DSI_CIO_IRQ_ERRSYNCESC1		(1 << 0)
 #define DSI_CIO_IRQ_ERRSYNCESC2		(1 << 1)
 #define DSI_CIO_IRQ_ERRSYNCESC3		(1 << 2)
+#define DSI_CIO_IRQ_ERRSYNCESC4		(1 << 3)
+#define DSI_CIO_IRQ_ERRSYNCESC5		(1 << 4)
 #define DSI_CIO_IRQ_ERRESC1		(1 << 5)
 #define DSI_CIO_IRQ_ERRESC2		(1 << 6)
 #define DSI_CIO_IRQ_ERRESC3		(1 << 7)
+#define DSI_CIO_IRQ_ERRESC4		(1 << 8)
+#define DSI_CIO_IRQ_ERRESC5		(1 << 9)
 #define DSI_CIO_IRQ_ERRCONTROL1		(1 << 10)
 #define DSI_CIO_IRQ_ERRCONTROL2		(1 << 11)
 #define DSI_CIO_IRQ_ERRCONTROL3		(1 << 12)
+#define DSI_CIO_IRQ_ERRCONTROL4		(1 << 13)
+#define DSI_CIO_IRQ_ERRCONTROL5		(1 << 14)
 #define DSI_CIO_IRQ_STATEULPS1		(1 << 15)
 #define DSI_CIO_IRQ_STATEULPS2		(1 << 16)
 #define DSI_CIO_IRQ_STATEULPS3		(1 << 17)
+#define DSI_CIO_IRQ_STATEULPS4		(1 << 18)
+#define DSI_CIO_IRQ_STATEULPS5		(1 << 19)
 #define DSI_CIO_IRQ_ERRCONTENTIONLP0_1	(1 << 20)
 #define DSI_CIO_IRQ_ERRCONTENTIONLP1_1	(1 << 21)
 #define DSI_CIO_IRQ_ERRCONTENTIONLP0_2	(1 << 22)
 #define DSI_CIO_IRQ_ERRCONTENTIONLP1_2	(1 << 23)
 #define DSI_CIO_IRQ_ERRCONTENTIONLP0_3	(1 << 24)
 #define DSI_CIO_IRQ_ERRCONTENTIONLP1_3	(1 << 25)
+#define DSI_CIO_IRQ_ERRCONTENTIONLP0_4	(1 << 26)
+#define DSI_CIO_IRQ_ERRCONTENTIONLP1_4	(1 << 27)
+#define DSI_CIO_IRQ_ERRCONTENTIONLP0_5	(1 << 28)
+#define DSI_CIO_IRQ_ERRCONTENTIONLP1_5	(1 << 29)
 #define DSI_CIO_IRQ_ULPSACTIVENOT_ALL0	(1 << 30)
 #define DSI_CIO_IRQ_ULPSACTIVENOT_ALL1	(1 << 31)
 #define DSI_CIO_IRQ_ERROR_MASK \
 	(DSI_CIO_IRQ_ERRSYNCESC1 | DSI_CIO_IRQ_ERRSYNCESC2 | \
-	 DSI_CIO_IRQ_ERRSYNCESC3 | DSI_CIO_IRQ_ERRESC1 | DSI_CIO_IRQ_ERRESC2 | \
-	 DSI_CIO_IRQ_ERRESC3 | DSI_CIO_IRQ_ERRCONTROL1 | \
-	 DSI_CIO_IRQ_ERRCONTROL2 | DSI_CIO_IRQ_ERRCONTROL3 | \
+	 DSI_CIO_IRQ_ERRSYNCESC3 | DSI_CIO_IRQ_ERRSYNCESC4 | \
+	 DSI_CIO_IRQ_ERRSYNCESC5 | \
+	 DSI_CIO_IRQ_ERRESC1 | DSI_CIO_IRQ_ERRESC2 | \
+	 DSI_CIO_IRQ_ERRESC3 | DSI_CIO_IRQ_ERRESC4 | \
+	 DSI_CIO_IRQ_ERRESC5 | \
+	 DSI_CIO_IRQ_ERRCONTROL1 | DSI_CIO_IRQ_ERRCONTROL2 | \
+	 DSI_CIO_IRQ_ERRCONTROL3 | DSI_CIO_IRQ_ERRCONTROL4 | \
+	 DSI_CIO_IRQ_ERRCONTROL5 | \
 	 DSI_CIO_IRQ_ERRCONTENTIONLP0_1 | DSI_CIO_IRQ_ERRCONTENTIONLP1_1 | \
 	 DSI_CIO_IRQ_ERRCONTENTIONLP0_2 | DSI_CIO_IRQ_ERRCONTENTIONLP1_2 | \
-	 DSI_CIO_IRQ_ERRCONTENTIONLP0_3 | DSI_CIO_IRQ_ERRCONTENTIONLP1_3)
+	 DSI_CIO_IRQ_ERRCONTENTIONLP0_3 | DSI_CIO_IRQ_ERRCONTENTIONLP1_3 | \
+	 DSI_CIO_IRQ_ERRCONTENTIONLP0_4 | DSI_CIO_IRQ_ERRCONTENTIONLP1_4 | \
+	 DSI_CIO_IRQ_ERRCONTENTIONLP0_5 | DSI_CIO_IRQ_ERRCONTENTIONLP1_5)
 
 #define DSI_DT_DCS_SHORT_WRITE_0	0x05
 #define DSI_DT_DCS_SHORT_WRITE_1	0x15
@@ -208,6 +232,19 @@
 	DSI_VC_MODE_VP,
 };
 
+enum dsi_lane {
+	DSI_CLK_P	= 1 << 0,
+	DSI_CLK_N	= 1 << 1,
+	DSI_DATA1_P	= 1 << 2,
+	DSI_DATA1_N	= 1 << 3,
+	DSI_DATA2_P	= 1 << 4,
+	DSI_DATA2_N	= 1 << 5,
+	DSI_DATA3_P	= 1 << 6,
+	DSI_DATA3_N	= 1 << 7,
+	DSI_DATA4_P	= 1 << 8,
+	DSI_DATA4_N	= 1 << 9,
+};
+
 struct dsi_update_region {
 	u16 x, y, w, h;
 	struct omap_dss_device *device;
@@ -227,14 +264,16 @@
 	struct dsi_isr_data isr_table_cio[DSI_MAX_NR_ISRS];
 };
 
-static struct
-{
+struct dsi_data {
 	struct platform_device *pdev;
 	void __iomem	*base;
 	int irq;
 
+	void (*dsi_mux_pads)(bool enable);
+
 	struct dsi_clock_info current_cinfo;
 
+	bool vdds_dsi_enabled;
 	struct regulator *vdds_dsi_reg;
 
 	struct {
@@ -258,8 +297,7 @@
 	struct dsi_update_region update_region;
 
 	bool te_enabled;
-
-	struct workqueue_struct *workqueue;
+	bool ulps_enabled;
 
 	void (*framedone_callback)(int, void *);
 	void *framedone_data;
@@ -292,21 +330,63 @@
 	unsigned long  regm_dispc_max, regm_dsi_max;
 	unsigned long  fint_min, fint_max;
 	unsigned long lpdiv_max;
-} dsi;
+
+	int num_data_lanes;
+
+	unsigned scp_clk_refcount;
+};
+
+struct dsi_packet_sent_handler_data {
+	struct platform_device *dsidev;
+	struct completion *completion;
+};
+
+static struct platform_device *dsi_pdev_map[MAX_NUM_DSI];
 
 #ifdef DEBUG
 static unsigned int dsi_perf;
 module_param_named(dsi_perf, dsi_perf, bool, 0644);
 #endif
 
-static inline void dsi_write_reg(const struct dsi_reg idx, u32 val)
+static inline struct dsi_data *dsi_get_dsidrv_data(struct platform_device *dsidev)
 {
-	__raw_writel(val, dsi.base + idx.idx);
+	return dev_get_drvdata(&dsidev->dev);
 }
 
-static inline u32 dsi_read_reg(const struct dsi_reg idx)
+static inline struct platform_device *dsi_get_dsidev_from_dssdev(struct omap_dss_device *dssdev)
 {
-	return __raw_readl(dsi.base + idx.idx);
+	return dsi_pdev_map[dssdev->phy.dsi.module];
+}
+
+struct platform_device *dsi_get_dsidev_from_id(int module)
+{
+	return dsi_pdev_map[module];
+}
+
+static int dsi_get_dsidev_id(struct platform_device *dsidev)
+{
+	/* TEMP: Pass 0 as the dsi module index till the time the dsi platform
+	 * device names aren't changed to the form "omapdss_dsi.0",
+	 * "omapdss_dsi.1" and so on */
+	BUG_ON(dsidev->id != -1);
+
+	return 0;
+}
+
+static inline void dsi_write_reg(struct platform_device *dsidev,
+		const struct dsi_reg idx, u32 val)
+{
+	struct dsi_data *dsi = dsi_get_dsidrv_data(dsidev);
+
+	__raw_writel(val, dsi->base + idx.idx);
+}
+
+static inline u32 dsi_read_reg(struct platform_device *dsidev,
+		const struct dsi_reg idx)
+{
+	struct dsi_data *dsi = dsi_get_dsidrv_data(dsidev);
+
+	return __raw_readl(dsi->base + idx.idx);
 }
 
 
@@ -318,21 +398,29 @@
 {
 }
 
-void dsi_bus_lock(void)
+void dsi_bus_lock(struct omap_dss_device *dssdev)
 {
-	down(&dsi.bus_lock);
+	struct platform_device *dsidev = dsi_get_dsidev_from_dssdev(dssdev);
+	struct dsi_data *dsi = dsi_get_dsidrv_data(dsidev);
+
+	down(&dsi->bus_lock);
 }
 EXPORT_SYMBOL(dsi_bus_lock);
 
-void dsi_bus_unlock(void)
+void dsi_bus_unlock(struct omap_dss_device *dssdev)
 {
-	up(&dsi.bus_lock);
+	struct platform_device *dsidev = dsi_get_dsidev_from_dssdev(dssdev);
+	struct dsi_data *dsi = dsi_get_dsidrv_data(dsidev);
+
+	up(&dsi->bus_lock);
 }
 EXPORT_SYMBOL(dsi_bus_unlock);
 
-static bool dsi_bus_is_locked(void)
+static bool dsi_bus_is_locked(struct platform_device *dsidev)
 {
-	return dsi.bus_lock.count == 0;
+	struct dsi_data *dsi = dsi_get_dsidrv_data(dsidev);
+
+	return dsi->bus_lock.count == 0;
 }
 
 static void dsi_completion_handler(void *data, u32 mask)
@@ -340,12 +428,12 @@
 	complete((struct completion *)data);
 }
 
-static inline int wait_for_bit_change(const struct dsi_reg idx, int bitnum,
-		int value)
+static inline int wait_for_bit_change(struct platform_device *dsidev,
+		const struct dsi_reg idx, int bitnum, int value)
 {
 	int t = 100000;
 
-	while (REG_GET(idx, bitnum, bitnum) != value) {
+	while (REG_GET(dsidev, idx, bitnum, bitnum) != value) {
 		if (--t == 0)
 			return !value;
 	}
@@ -354,18 +442,21 @@
 }
 
 #ifdef DEBUG
-static void dsi_perf_mark_setup(void)
+static void dsi_perf_mark_setup(struct platform_device *dsidev)
 {
-	dsi.perf_setup_time = ktime_get();
+	struct dsi_data *dsi = dsi_get_dsidrv_data(dsidev);
+	dsi->perf_setup_time = ktime_get();
 }
 
-static void dsi_perf_mark_start(void)
+static void dsi_perf_mark_start(struct platform_device *dsidev)
 {
-	dsi.perf_start_time = ktime_get();
+	struct dsi_data *dsi = dsi_get_dsidrv_data(dsidev);
+	dsi->perf_start_time = ktime_get();
 }
 
-static void dsi_perf_show(const char *name)
+static void dsi_perf_show(struct platform_device *dsidev, const char *name)
 {
+	struct dsi_data *dsi = dsi_get_dsidrv_data(dsidev);
 	ktime_t t, setup_time, trans_time;
 	u32 total_bytes;
 	u32 setup_us, trans_us, total_us;
@@ -375,21 +466,21 @@
 
 	t = ktime_get();
 
-	setup_time = ktime_sub(dsi.perf_start_time, dsi.perf_setup_time);
+	setup_time = ktime_sub(dsi->perf_start_time, dsi->perf_setup_time);
 	setup_us = (u32)ktime_to_us(setup_time);
 	if (setup_us == 0)
 		setup_us = 1;
 
-	trans_time = ktime_sub(t, dsi.perf_start_time);
+	trans_time = ktime_sub(t, dsi->perf_start_time);
 	trans_us = (u32)ktime_to_us(trans_time);
 	if (trans_us == 0)
 		trans_us = 1;
 
 	total_us = setup_us + trans_us;
 
-	total_bytes = dsi.update_region.w *
-		dsi.update_region.h *
-		dsi.update_region.device->ctrl.pixel_size / 8;
+	total_bytes = dsi->update_region.w *
+		dsi->update_region.h *
+		dsi->update_region.device->ctrl.pixel_size / 8;
 
 	printk(KERN_INFO "DSI(%s): %u us + %u us = %u us (%uHz), "
 			"%u bytes, %u kbytes/sec\n",
@@ -402,9 +493,9 @@
 			total_bytes * 1000 / total_us);
 }
 #else
-#define dsi_perf_mark_setup()
-#define dsi_perf_mark_start()
-#define dsi_perf_show(x)
+#define dsi_perf_mark_setup(x)
+#define dsi_perf_mark_start(x)
+#define dsi_perf_show(x, y)
 #endif
 
 static void print_irq_status(u32 status)
@@ -510,38 +601,42 @@
 }
 
 #ifdef CONFIG_OMAP2_DSS_COLLECT_IRQ_STATS
-static void dsi_collect_irq_stats(u32 irqstatus, u32 *vcstatus, u32 ciostatus)
+static void dsi_collect_irq_stats(struct platform_device *dsidev, u32 irqstatus,
+		u32 *vcstatus, u32 ciostatus)
 {
+	struct dsi_data *dsi = dsi_get_dsidrv_data(dsidev);
 	int i;
 
-	spin_lock(&dsi.irq_stats_lock);
+	spin_lock(&dsi->irq_stats_lock);
 
-	dsi.irq_stats.irq_count++;
-	dss_collect_irq_stats(irqstatus, dsi.irq_stats.dsi_irqs);
+	dsi->irq_stats.irq_count++;
+	dss_collect_irq_stats(irqstatus, dsi->irq_stats.dsi_irqs);
 
 	for (i = 0; i < 4; ++i)
-		dss_collect_irq_stats(vcstatus[i], dsi.irq_stats.vc_irqs[i]);
+		dss_collect_irq_stats(vcstatus[i], dsi->irq_stats.vc_irqs[i]);
 
-	dss_collect_irq_stats(ciostatus, dsi.irq_stats.cio_irqs);
+	dss_collect_irq_stats(ciostatus, dsi->irq_stats.cio_irqs);
 
-	spin_unlock(&dsi.irq_stats_lock);
+	spin_unlock(&dsi->irq_stats_lock);
 }
 #else
-#define dsi_collect_irq_stats(irqstatus, vcstatus, ciostatus)
+#define dsi_collect_irq_stats(dsidev, irqstatus, vcstatus, ciostatus)
 #endif
 
 static int debug_irq;
 
-static void dsi_handle_irq_errors(u32 irqstatus, u32 *vcstatus, u32 ciostatus)
+static void dsi_handle_irq_errors(struct platform_device *dsidev, u32 irqstatus,
+		u32 *vcstatus, u32 ciostatus)
 {
+	struct dsi_data *dsi = dsi_get_dsidrv_data(dsidev);
 	int i;
 
 	if (irqstatus & DSI_IRQ_ERROR_MASK) {
 		DSSERR("DSI error, irqstatus %x\n", irqstatus);
 		print_irq_status(irqstatus);
-		spin_lock(&dsi.errors_lock);
-		dsi.errors |= irqstatus & DSI_IRQ_ERROR_MASK;
-		spin_unlock(&dsi.errors_lock);
+		spin_lock(&dsi->errors_lock);
+		dsi->errors |= irqstatus & DSI_IRQ_ERROR_MASK;
+		spin_unlock(&dsi->errors_lock);
 	} else if (debug_irq) {
 		print_irq_status(irqstatus);
 	}
@@ -602,22 +697,27 @@
 
 static irqreturn_t omap_dsi_irq_handler(int irq, void *arg)
 {
+	struct platform_device *dsidev;
+	struct dsi_data *dsi;
 	u32 irqstatus, vcstatus[4], ciostatus;
 	int i;
 
-	spin_lock(&dsi.irq_lock);
+	dsidev = (struct platform_device *) arg;
+	dsi = dsi_get_dsidrv_data(dsidev);
 
-	irqstatus = dsi_read_reg(DSI_IRQSTATUS);
+	spin_lock(&dsi->irq_lock);
+
+	irqstatus = dsi_read_reg(dsidev, DSI_IRQSTATUS);
 
 	/* IRQ is not for us */
 	if (!irqstatus) {
-		spin_unlock(&dsi.irq_lock);
+		spin_unlock(&dsi->irq_lock);
 		return IRQ_NONE;
 	}
 
-	dsi_write_reg(DSI_IRQSTATUS, irqstatus & ~DSI_IRQ_CHANNEL_MASK);
+	dsi_write_reg(dsidev, DSI_IRQSTATUS, irqstatus & ~DSI_IRQ_CHANNEL_MASK);
 	/* flush posted write */
-	dsi_read_reg(DSI_IRQSTATUS);
+	dsi_read_reg(dsidev, DSI_IRQSTATUS);
 
 	for (i = 0; i < 4; ++i) {
 		if ((irqstatus & (1 << i)) == 0) {
@@ -625,45 +725,47 @@
 			continue;
 		}
 
-		vcstatus[i] = dsi_read_reg(DSI_VC_IRQSTATUS(i));
+		vcstatus[i] = dsi_read_reg(dsidev, DSI_VC_IRQSTATUS(i));
 
-		dsi_write_reg(DSI_VC_IRQSTATUS(i), vcstatus[i]);
+		dsi_write_reg(dsidev, DSI_VC_IRQSTATUS(i), vcstatus[i]);
 		/* flush posted write */
-		dsi_read_reg(DSI_VC_IRQSTATUS(i));
+		dsi_read_reg(dsidev, DSI_VC_IRQSTATUS(i));
 	}
 
 	if (irqstatus & DSI_IRQ_COMPLEXIO_ERR) {
-		ciostatus = dsi_read_reg(DSI_COMPLEXIO_IRQ_STATUS);
+		ciostatus = dsi_read_reg(dsidev, DSI_COMPLEXIO_IRQ_STATUS);
 
-		dsi_write_reg(DSI_COMPLEXIO_IRQ_STATUS, ciostatus);
+		dsi_write_reg(dsidev, DSI_COMPLEXIO_IRQ_STATUS, ciostatus);
 		/* flush posted write */
-		dsi_read_reg(DSI_COMPLEXIO_IRQ_STATUS);
+		dsi_read_reg(dsidev, DSI_COMPLEXIO_IRQ_STATUS);
 	} else {
 		ciostatus = 0;
 	}
 
 #ifdef DSI_CATCH_MISSING_TE
 	if (irqstatus & DSI_IRQ_TE_TRIGGER)
-		del_timer(&dsi.te_timer);
+		del_timer(&dsi->te_timer);
 #endif
 
 	/* make a copy and unlock, so that isrs can unregister
 	 * themselves */
-	memcpy(&dsi.isr_tables_copy, &dsi.isr_tables, sizeof(dsi.isr_tables));
+	memcpy(&dsi->isr_tables_copy, &dsi->isr_tables,
+		sizeof(dsi->isr_tables));
 
-	spin_unlock(&dsi.irq_lock);
+	spin_unlock(&dsi->irq_lock);
 
-	dsi_handle_isrs(&dsi.isr_tables_copy, irqstatus, vcstatus, ciostatus);
+	dsi_handle_isrs(&dsi->isr_tables_copy, irqstatus, vcstatus, ciostatus);
 
-	dsi_handle_irq_errors(irqstatus, vcstatus, ciostatus);
+	dsi_handle_irq_errors(dsidev, irqstatus, vcstatus, ciostatus);
 
-	dsi_collect_irq_stats(irqstatus, vcstatus, ciostatus);
+	dsi_collect_irq_stats(dsidev, irqstatus, vcstatus, ciostatus);
 
 	return IRQ_HANDLED;
 }
 
-/* dsi.irq_lock has to be locked by the caller */
-static void _omap_dsi_configure_irqs(struct dsi_isr_data *isr_array,
+/* dsi->irq_lock has to be locked by the caller */
+static void _omap_dsi_configure_irqs(struct platform_device *dsidev,
+		struct dsi_isr_data *isr_array,
 		unsigned isr_array_size, u32 default_mask,
 		const struct dsi_reg enable_reg,
 		const struct dsi_reg status_reg)
@@ -684,61 +786,67 @@
 		mask |= isr_data->mask;
 	}
 
-	old_mask = dsi_read_reg(enable_reg);
+	old_mask = dsi_read_reg(dsidev, enable_reg);
 	/* clear the irqstatus for newly enabled irqs */
-	dsi_write_reg(status_reg, (mask ^ old_mask) & mask);
-	dsi_write_reg(enable_reg, mask);
+	dsi_write_reg(dsidev, status_reg, (mask ^ old_mask) & mask);
+	dsi_write_reg(dsidev, enable_reg, mask);
 
 	/* flush posted writes */
-	dsi_read_reg(enable_reg);
-	dsi_read_reg(status_reg);
+	dsi_read_reg(dsidev, enable_reg);
+	dsi_read_reg(dsidev, status_reg);
 }
 
-/* dsi.irq_lock has to be locked by the caller */
-static void _omap_dsi_set_irqs(void)
+/* dsi->irq_lock has to be locked by the caller */
+static void _omap_dsi_set_irqs(struct platform_device *dsidev)
 {
+	struct dsi_data *dsi = dsi_get_dsidrv_data(dsidev);
 	u32 mask = DSI_IRQ_ERROR_MASK;
 #ifdef DSI_CATCH_MISSING_TE
 	mask |= DSI_IRQ_TE_TRIGGER;
 #endif
-	_omap_dsi_configure_irqs(dsi.isr_tables.isr_table,
-			ARRAY_SIZE(dsi.isr_tables.isr_table), mask,
+	_omap_dsi_configure_irqs(dsidev, dsi->isr_tables.isr_table,
+			ARRAY_SIZE(dsi->isr_tables.isr_table), mask,
 			DSI_IRQENABLE, DSI_IRQSTATUS);
 }
 
-/* dsi.irq_lock has to be locked by the caller */
-static void _omap_dsi_set_irqs_vc(int vc)
+/* dsi->irq_lock has to be locked by the caller */
+static void _omap_dsi_set_irqs_vc(struct platform_device *dsidev, int vc)
 {
-	_omap_dsi_configure_irqs(dsi.isr_tables.isr_table_vc[vc],
-			ARRAY_SIZE(dsi.isr_tables.isr_table_vc[vc]),
+	struct dsi_data *dsi = dsi_get_dsidrv_data(dsidev);
+
+	_omap_dsi_configure_irqs(dsidev, dsi->isr_tables.isr_table_vc[vc],
+			ARRAY_SIZE(dsi->isr_tables.isr_table_vc[vc]),
 			DSI_VC_IRQ_ERROR_MASK,
 			DSI_VC_IRQENABLE(vc), DSI_VC_IRQSTATUS(vc));
 }
 
-/* dsi.irq_lock has to be locked by the caller */
-static void _omap_dsi_set_irqs_cio(void)
+/* dsi->irq_lock has to be locked by the caller */
+static void _omap_dsi_set_irqs_cio(struct platform_device *dsidev)
 {
-	_omap_dsi_configure_irqs(dsi.isr_tables.isr_table_cio,
-			ARRAY_SIZE(dsi.isr_tables.isr_table_cio),
+	struct dsi_data *dsi = dsi_get_dsidrv_data(dsidev);
+
+	_omap_dsi_configure_irqs(dsidev, dsi->isr_tables.isr_table_cio,
+			ARRAY_SIZE(dsi->isr_tables.isr_table_cio),
 			DSI_CIO_IRQ_ERROR_MASK,
 			DSI_COMPLEXIO_IRQ_ENABLE, DSI_COMPLEXIO_IRQ_STATUS);
 }
 
-static void _dsi_initialize_irq(void)
+static void _dsi_initialize_irq(struct platform_device *dsidev)
 {
+	struct dsi_data *dsi = dsi_get_dsidrv_data(dsidev);
 	unsigned long flags;
 	int vc;
 
-	spin_lock_irqsave(&dsi.irq_lock, flags);
+	spin_lock_irqsave(&dsi->irq_lock, flags);
 
-	memset(&dsi.isr_tables, 0, sizeof(dsi.isr_tables));
+	memset(&dsi->isr_tables, 0, sizeof(dsi->isr_tables));
 
-	_omap_dsi_set_irqs();
+	_omap_dsi_set_irqs(dsidev);
 	for (vc = 0; vc < 4; ++vc)
-		_omap_dsi_set_irqs_vc(vc);
-	_omap_dsi_set_irqs_cio();
+		_omap_dsi_set_irqs_vc(dsidev, vc);
+	_omap_dsi_set_irqs_cio(dsidev);
 
-	spin_unlock_irqrestore(&dsi.irq_lock, flags);
+	spin_unlock_irqrestore(&dsi->irq_lock, flags);
 }
 
 static int _dsi_register_isr(omap_dsi_isr_t isr, void *arg, u32 mask,
@@ -797,126 +905,137 @@
 	return -EINVAL;
 }
 
-static int dsi_register_isr(omap_dsi_isr_t isr, void *arg, u32 mask)
+static int dsi_register_isr(struct platform_device *dsidev, omap_dsi_isr_t isr,
+		void *arg, u32 mask)
 {
+	struct dsi_data *dsi = dsi_get_dsidrv_data(dsidev);
 	unsigned long flags;
 	int r;
 
-	spin_lock_irqsave(&dsi.irq_lock, flags);
+	spin_lock_irqsave(&dsi->irq_lock, flags);
 
-	r = _dsi_register_isr(isr, arg, mask, dsi.isr_tables.isr_table,
-			ARRAY_SIZE(dsi.isr_tables.isr_table));
+	r = _dsi_register_isr(isr, arg, mask, dsi->isr_tables.isr_table,
+			ARRAY_SIZE(dsi->isr_tables.isr_table));
 
 	if (r == 0)
-		_omap_dsi_set_irqs();
+		_omap_dsi_set_irqs(dsidev);
 
-	spin_unlock_irqrestore(&dsi.irq_lock, flags);
+	spin_unlock_irqrestore(&dsi->irq_lock, flags);
 
 	return r;
 }
 
-static int dsi_unregister_isr(omap_dsi_isr_t isr, void *arg, u32 mask)
+static int dsi_unregister_isr(struct platform_device *dsidev,
+		omap_dsi_isr_t isr, void *arg, u32 mask)
 {
+	struct dsi_data *dsi = dsi_get_dsidrv_data(dsidev);
 	unsigned long flags;
 	int r;
 
-	spin_lock_irqsave(&dsi.irq_lock, flags);
+	spin_lock_irqsave(&dsi->irq_lock, flags);
 
-	r = _dsi_unregister_isr(isr, arg, mask, dsi.isr_tables.isr_table,
-			ARRAY_SIZE(dsi.isr_tables.isr_table));
+	r = _dsi_unregister_isr(isr, arg, mask, dsi->isr_tables.isr_table,
+			ARRAY_SIZE(dsi->isr_tables.isr_table));
 
 	if (r == 0)
-		_omap_dsi_set_irqs();
+		_omap_dsi_set_irqs(dsidev);
 
-	spin_unlock_irqrestore(&dsi.irq_lock, flags);
+	spin_unlock_irqrestore(&dsi->irq_lock, flags);
 
 	return r;
 }
 
-static int dsi_register_isr_vc(int channel, omap_dsi_isr_t isr, void *arg,
-		u32 mask)
+static int dsi_register_isr_vc(struct platform_device *dsidev, int channel,
+		omap_dsi_isr_t isr, void *arg, u32 mask)
 {
+	struct dsi_data *dsi = dsi_get_dsidrv_data(dsidev);
 	unsigned long flags;
 	int r;
 
-	spin_lock_irqsave(&dsi.irq_lock, flags);
+	spin_lock_irqsave(&dsi->irq_lock, flags);
 
 	r = _dsi_register_isr(isr, arg, mask,
-			dsi.isr_tables.isr_table_vc[channel],
-			ARRAY_SIZE(dsi.isr_tables.isr_table_vc[channel]));
+			dsi->isr_tables.isr_table_vc[channel],
+			ARRAY_SIZE(dsi->isr_tables.isr_table_vc[channel]));
 
 	if (r == 0)
-		_omap_dsi_set_irqs_vc(channel);
+		_omap_dsi_set_irqs_vc(dsidev, channel);
 
-	spin_unlock_irqrestore(&dsi.irq_lock, flags);
+	spin_unlock_irqrestore(&dsi->irq_lock, flags);
 
 	return r;
 }
 
-static int dsi_unregister_isr_vc(int channel, omap_dsi_isr_t isr, void *arg,
-		u32 mask)
+static int dsi_unregister_isr_vc(struct platform_device *dsidev, int channel,
+		omap_dsi_isr_t isr, void *arg, u32 mask)
 {
+	struct dsi_data *dsi = dsi_get_dsidrv_data(dsidev);
 	unsigned long flags;
 	int r;
 
-	spin_lock_irqsave(&dsi.irq_lock, flags);
+	spin_lock_irqsave(&dsi->irq_lock, flags);
 
 	r = _dsi_unregister_isr(isr, arg, mask,
-			dsi.isr_tables.isr_table_vc[channel],
-			ARRAY_SIZE(dsi.isr_tables.isr_table_vc[channel]));
+			dsi->isr_tables.isr_table_vc[channel],
+			ARRAY_SIZE(dsi->isr_tables.isr_table_vc[channel]));
 
 	if (r == 0)
-		_omap_dsi_set_irqs_vc(channel);
+		_omap_dsi_set_irqs_vc(dsidev, channel);
 
-	spin_unlock_irqrestore(&dsi.irq_lock, flags);
+	spin_unlock_irqrestore(&dsi->irq_lock, flags);
 
 	return r;
 }
 
-static int dsi_register_isr_cio(omap_dsi_isr_t isr, void *arg, u32 mask)
+static int dsi_register_isr_cio(struct platform_device *dsidev,
+		omap_dsi_isr_t isr, void *arg, u32 mask)
 {
+	struct dsi_data *dsi = dsi_get_dsidrv_data(dsidev);
 	unsigned long flags;
 	int r;
 
-	spin_lock_irqsave(&dsi.irq_lock, flags);
+	spin_lock_irqsave(&dsi->irq_lock, flags);
 
-	r = _dsi_register_isr(isr, arg, mask, dsi.isr_tables.isr_table_cio,
-			ARRAY_SIZE(dsi.isr_tables.isr_table_cio));
+	r = _dsi_register_isr(isr, arg, mask, dsi->isr_tables.isr_table_cio,
+			ARRAY_SIZE(dsi->isr_tables.isr_table_cio));
 
 	if (r == 0)
-		_omap_dsi_set_irqs_cio();
+		_omap_dsi_set_irqs_cio(dsidev);
 
-	spin_unlock_irqrestore(&dsi.irq_lock, flags);
+	spin_unlock_irqrestore(&dsi->irq_lock, flags);
 
 	return r;
 }
 
-static int dsi_unregister_isr_cio(omap_dsi_isr_t isr, void *arg, u32 mask)
+static int dsi_unregister_isr_cio(struct platform_device *dsidev,
+		omap_dsi_isr_t isr, void *arg, u32 mask)
 {
+	struct dsi_data *dsi = dsi_get_dsidrv_data(dsidev);
 	unsigned long flags;
 	int r;
 
-	spin_lock_irqsave(&dsi.irq_lock, flags);
+	spin_lock_irqsave(&dsi->irq_lock, flags);
 
-	r = _dsi_unregister_isr(isr, arg, mask, dsi.isr_tables.isr_table_cio,
-			ARRAY_SIZE(dsi.isr_tables.isr_table_cio));
+	r = _dsi_unregister_isr(isr, arg, mask, dsi->isr_tables.isr_table_cio,
+			ARRAY_SIZE(dsi->isr_tables.isr_table_cio));
 
 	if (r == 0)
-		_omap_dsi_set_irqs_cio();
+		_omap_dsi_set_irqs_cio(dsidev);
 
-	spin_unlock_irqrestore(&dsi.irq_lock, flags);
+	spin_unlock_irqrestore(&dsi->irq_lock, flags);
 
 	return r;
 }
 
-static u32 dsi_get_errors(void)
+static u32 dsi_get_errors(struct platform_device *dsidev)
 {
+	struct dsi_data *dsi = dsi_get_dsidrv_data(dsidev);
 	unsigned long flags;
 	u32 e;
-	spin_lock_irqsave(&dsi.errors_lock, flags);
-	e = dsi.errors;
-	dsi.errors = 0;
-	spin_unlock_irqrestore(&dsi.errors_lock, flags);
+	spin_lock_irqsave(&dsi->errors_lock, flags);
+	e = dsi->errors;
+	dsi->errors = 0;
+	spin_unlock_irqrestore(&dsi->errors_lock, flags);
 	return e;
 }
 
@@ -930,23 +1049,27 @@
 }
 
 /* source clock for DSI PLL. this could also be PCLKFREE */
-static inline void dsi_enable_pll_clock(bool enable)
+static inline void dsi_enable_pll_clock(struct platform_device *dsidev,
+		bool enable)
 {
+	struct dsi_data *dsi = dsi_get_dsidrv_data(dsidev);
+
 	if (enable)
 		dss_clk_enable(DSS_CLK_SYSCK);
 	else
 		dss_clk_disable(DSS_CLK_SYSCK);
 
-	if (enable && dsi.pll_locked) {
-		if (wait_for_bit_change(DSI_PLL_STATUS, 1, 1) != 1)
+	if (enable && dsi->pll_locked) {
+		if (wait_for_bit_change(dsidev, DSI_PLL_STATUS, 1, 1) != 1)
 			DSSERR("cannot lock PLL when enabling clocks\n");
 	}
 }
 
 #ifdef DEBUG
-static void _dsi_print_reset_status(void)
+static void _dsi_print_reset_status(struct platform_device *dsidev)
 {
 	u32 l;
+	int b0, b1, b2;
 
 	if (!dss_debug)
 		return;
@@ -954,35 +1077,47 @@
 	/* A dummy read using the SCP interface to any DSIPHY register is
 	 * required after DSIPHY reset to complete the reset of the DSI complex
 	 * I/O. */
-	l = dsi_read_reg(DSI_DSIPHY_CFG5);
+	l = dsi_read_reg(dsidev, DSI_DSIPHY_CFG5);
 
 	printk(KERN_DEBUG "DSI resets: ");
 
-	l = dsi_read_reg(DSI_PLL_STATUS);
+	l = dsi_read_reg(dsidev, DSI_PLL_STATUS);
 	printk("PLL (%d) ", FLD_GET(l, 0, 0));
 
-	l = dsi_read_reg(DSI_COMPLEXIO_CFG1);
+	l = dsi_read_reg(dsidev, DSI_COMPLEXIO_CFG1);
 	printk("CIO (%d) ", FLD_GET(l, 29, 29));
 
-	l = dsi_read_reg(DSI_DSIPHY_CFG5);
-	printk("PHY (%x, %d, %d, %d)\n",
-			FLD_GET(l, 28, 26),
+	if (dss_has_feature(FEAT_DSI_REVERSE_TXCLKESC)) {
+		b0 = 28;
+		b1 = 27;
+		b2 = 26;
+	} else {
+		b0 = 24;
+		b1 = 25;
+		b2 = 26;
+	}
+
+	l = dsi_read_reg(dsidev, DSI_DSIPHY_CFG5);
+	printk("PHY (%x%x%x, %d, %d, %d)\n",
+			FLD_GET(l, b0, b0),
+			FLD_GET(l, b1, b1),
+			FLD_GET(l, b2, b2),
 			FLD_GET(l, 29, 29),
 			FLD_GET(l, 30, 30),
 			FLD_GET(l, 31, 31));
 }
 #else
-#define _dsi_print_reset_status()
+#define _dsi_print_reset_status(x)
 #endif
 
-static inline int dsi_if_enable(bool enable)
+static inline int dsi_if_enable(struct platform_device *dsidev, bool enable)
 {
 	DSSDBG("dsi_if_enable(%d)\n", enable);
 
 	enable = enable ? 1 : 0;
-	REG_FLD_MOD(DSI_CTRL, enable, 0, 0); /* IF_EN */
+	REG_FLD_MOD(dsidev, DSI_CTRL, enable, 0, 0); /* IF_EN */
 
-	if (wait_for_bit_change(DSI_CTRL, 0, enable) != enable) {
+	if (wait_for_bit_change(dsidev, DSI_CTRL, 0, enable) != enable) {
 			DSSERR("Failed to set dsi_if_enable to %d\n", enable);
 			return -EIO;
 	}
@@ -990,31 +1125,38 @@
 	return 0;
 }
 
-unsigned long dsi_get_pll_hsdiv_dispc_rate(void)
+unsigned long dsi_get_pll_hsdiv_dispc_rate(struct platform_device *dsidev)
 {
-	return dsi.current_cinfo.dsi_pll_hsdiv_dispc_clk;
+	struct dsi_data *dsi = dsi_get_dsidrv_data(dsidev);
+
+	return dsi->current_cinfo.dsi_pll_hsdiv_dispc_clk;
 }
 
-static unsigned long dsi_get_pll_hsdiv_dsi_rate(void)
+static unsigned long dsi_get_pll_hsdiv_dsi_rate(struct platform_device *dsidev)
 {
-	return dsi.current_cinfo.dsi_pll_hsdiv_dsi_clk;
+	struct dsi_data *dsi = dsi_get_dsidrv_data(dsidev);
+
+	return dsi->current_cinfo.dsi_pll_hsdiv_dsi_clk;
 }
 
-static unsigned long dsi_get_txbyteclkhs(void)
+static unsigned long dsi_get_txbyteclkhs(struct platform_device *dsidev)
 {
-	return dsi.current_cinfo.clkin4ddr / 16;
+	struct dsi_data *dsi = dsi_get_dsidrv_data(dsidev);
+
+	return dsi->current_cinfo.clkin4ddr / 16;
 }
 
-static unsigned long dsi_fclk_rate(void)
+static unsigned long dsi_fclk_rate(struct platform_device *dsidev)
 {
 	unsigned long r;
+	int dsi_module = dsi_get_dsidev_id(dsidev);
 
-	if (dss_get_dsi_clk_source() == DSS_CLK_SRC_FCK) {
+	if (dss_get_dsi_clk_source(dsi_module) == OMAP_DSS_CLK_SRC_FCK) {
 		/* DSI FCLK source is DSS_CLK_FCK */
 		r = dss_clk_get_rate(DSS_CLK_FCK);
 	} else {
 		/* DSI FCLK source is dsi_pll_hsdiv_dsi_clk */
-		r = dsi_get_pll_hsdiv_dsi_rate();
+		r = dsi_get_pll_hsdiv_dsi_rate(dsidev);
 	}
 
 	return r;
@@ -1022,31 +1164,50 @@
 
 static int dsi_set_lp_clk_divisor(struct omap_dss_device *dssdev)
 {
+	struct platform_device *dsidev = dsi_get_dsidev_from_dssdev(dssdev);
+	struct dsi_data *dsi = dsi_get_dsidrv_data(dsidev);
 	unsigned long dsi_fclk;
 	unsigned lp_clk_div;
 	unsigned long lp_clk;
 
-	lp_clk_div = dssdev->phy.dsi.div.lp_clk_div;
+	lp_clk_div = dssdev->clocks.dsi.lp_clk_div;
 
-	if (lp_clk_div == 0 || lp_clk_div > dsi.lpdiv_max)
+	if (lp_clk_div == 0 || lp_clk_div > dsi->lpdiv_max)
 		return -EINVAL;
 
-	dsi_fclk = dsi_fclk_rate();
+	dsi_fclk = dsi_fclk_rate(dsidev);
 
 	lp_clk = dsi_fclk / 2 / lp_clk_div;
 
 	DSSDBG("LP_CLK_DIV %u, LP_CLK %lu\n", lp_clk_div, lp_clk);
-	dsi.current_cinfo.lp_clk = lp_clk;
-	dsi.current_cinfo.lp_clk_div = lp_clk_div;
+	dsi->current_cinfo.lp_clk = lp_clk;
+	dsi->current_cinfo.lp_clk_div = lp_clk_div;
 
-	REG_FLD_MOD(DSI_CLK_CTRL, lp_clk_div, 12, 0);   /* LP_CLK_DIVISOR */
+	/* LP_CLK_DIVISOR */
+	REG_FLD_MOD(dsidev, DSI_CLK_CTRL, lp_clk_div, 12, 0);
 
-	REG_FLD_MOD(DSI_CLK_CTRL, dsi_fclk > 30000000 ? 1 : 0,
-			21, 21);		/* LP_RX_SYNCHRO_ENABLE */
+	/* LP_RX_SYNCHRO_ENABLE */
+	REG_FLD_MOD(dsidev, DSI_CLK_CTRL, dsi_fclk > 30000000 ? 1 : 0, 21, 21);
 
 	return 0;
 }
 
+static void dsi_enable_scp_clk(struct platform_device *dsidev)
+{
+	struct dsi_data *dsi = dsi_get_dsidrv_data(dsidev);
+
+	if (dsi->scp_clk_refcount++ == 0)
+		REG_FLD_MOD(dsidev, DSI_CLK_CTRL, 1, 14, 14); /* CIO_CLK_ICG */
+}
+
+static void dsi_disable_scp_clk(struct platform_device *dsidev)
+{
+	struct dsi_data *dsi = dsi_get_dsidrv_data(dsidev);
+
+	WARN_ON(dsi->scp_clk_refcount == 0);
+	if (--dsi->scp_clk_refcount == 0)
+		REG_FLD_MOD(dsidev, DSI_CLK_CTRL, 0, 14, 14); /* CIO_CLK_ICG */
+}
 
 enum dsi_pll_power_state {
 	DSI_PLL_POWER_OFF	= 0x0,
@@ -1055,14 +1216,21 @@
 	DSI_PLL_POWER_ON_DIV	= 0x3,
 };
 
-static int dsi_pll_power(enum dsi_pll_power_state state)
+static int dsi_pll_power(struct platform_device *dsidev,
+		enum dsi_pll_power_state state)
 {
 	int t = 0;
 
-	REG_FLD_MOD(DSI_CLK_CTRL, state, 31, 30);	/* PLL_PWR_CMD */
+	/* DSI-PLL power command 0x3 is not working */
+	if (dss_has_feature(FEAT_DSI_PLL_PWR_BUG) &&
+			state == DSI_PLL_POWER_ON_DIV)
+		state = DSI_PLL_POWER_ON_ALL;
+
+	/* PLL_PWR_CMD */
+	REG_FLD_MOD(dsidev, DSI_CLK_CTRL, state, 31, 30);
 
 	/* PLL_PWR_STATUS */
-	while (FLD_GET(dsi_read_reg(DSI_CLK_CTRL), 29, 28) != state) {
+	while (FLD_GET(dsi_read_reg(dsidev, DSI_CLK_CTRL), 29, 28) != state) {
 		if (++t > 1000) {
 			DSSERR("Failed to set DSI PLL power mode to %d\n",
 					state);
@@ -1078,16 +1246,19 @@
 static int dsi_calc_clock_rates(struct omap_dss_device *dssdev,
 		struct dsi_clock_info *cinfo)
 {
-	if (cinfo->regn == 0 || cinfo->regn > dsi.regn_max)
+	struct platform_device *dsidev = dsi_get_dsidev_from_dssdev(dssdev);
+	struct dsi_data *dsi = dsi_get_dsidrv_data(dsidev);
+
+	if (cinfo->regn == 0 || cinfo->regn > dsi->regn_max)
 		return -EINVAL;
 
-	if (cinfo->regm == 0 || cinfo->regm > dsi.regm_max)
+	if (cinfo->regm == 0 || cinfo->regm > dsi->regm_max)
 		return -EINVAL;
 
-	if (cinfo->regm_dispc > dsi.regm_dispc_max)
+	if (cinfo->regm_dispc > dsi->regm_dispc_max)
 		return -EINVAL;
 
-	if (cinfo->regm_dsi > dsi.regm_dsi_max)
+	if (cinfo->regm_dsi > dsi->regm_dsi_max)
 		return -EINVAL;
 
 	if (cinfo->use_sys_clk) {
@@ -1106,7 +1277,7 @@
 
 	cinfo->fint = cinfo->clkin / (cinfo->regn * (cinfo->highfreq ? 2 : 1));
 
-	if (cinfo->fint > dsi.fint_max || cinfo->fint < dsi.fint_min)
+	if (cinfo->fint > dsi->fint_max || cinfo->fint < dsi->fint_min)
 		return -EINVAL;
 
 	cinfo->clkin4ddr = 2 * cinfo->regm * cinfo->fint;
@@ -1129,10 +1300,11 @@
 	return 0;
 }
 
-int dsi_pll_calc_clock_div_pck(bool is_tft, unsigned long req_pck,
-		struct dsi_clock_info *dsi_cinfo,
+int dsi_pll_calc_clock_div_pck(struct platform_device *dsidev, bool is_tft,
+		unsigned long req_pck, struct dsi_clock_info *dsi_cinfo,
 		struct dispc_clock_info *dispc_cinfo)
 {
+	struct dsi_data *dsi = dsi_get_dsidrv_data(dsidev);
 	struct dsi_clock_info cur, best;
 	struct dispc_clock_info best_dispc;
 	int min_fck_per_pck;
@@ -1143,10 +1315,10 @@
 
 	max_dss_fck = dss_feat_get_param_max(FEAT_PARAM_DSS_FCK);
 
-	if (req_pck == dsi.cache_req_pck &&
-			dsi.cache_cinfo.clkin == dss_sys_clk) {
+	if (req_pck == dsi->cache_req_pck &&
+			dsi->cache_cinfo.clkin == dss_sys_clk) {
 		DSSDBG("DSI clock info found from cache\n");
-		*dsi_cinfo = dsi.cache_cinfo;
+		*dsi_cinfo = dsi->cache_cinfo;
 		dispc_find_clk_divs(is_tft, req_pck,
 			dsi_cinfo->dsi_pll_hsdiv_dispc_clk, dispc_cinfo);
 		return 0;
@@ -1176,17 +1348,17 @@
 	/* no highfreq: 0.75MHz < Fint = clkin / regn < 2.1MHz */
 	/* highfreq: 0.75MHz < Fint = clkin / (2*regn) < 2.1MHz */
 	/* To reduce PLL lock time, keep Fint high (around 2 MHz) */
-	for (cur.regn = 1; cur.regn < dsi.regn_max; ++cur.regn) {
+	for (cur.regn = 1; cur.regn < dsi->regn_max; ++cur.regn) {
 		if (cur.highfreq == 0)
 			cur.fint = cur.clkin / cur.regn;
 		else
 			cur.fint = cur.clkin / (2 * cur.regn);
 
-		if (cur.fint > dsi.fint_max || cur.fint < dsi.fint_min)
+		if (cur.fint > dsi->fint_max || cur.fint < dsi->fint_min)
 			continue;
 
 		/* DSIPHY(MHz) = (2 * regm / regn) * (clkin / (highfreq + 1)) */
-		for (cur.regm = 1; cur.regm < dsi.regm_max; ++cur.regm) {
+		for (cur.regm = 1; cur.regm < dsi->regm_max; ++cur.regm) {
 			unsigned long a, b;
 
 			a = 2 * cur.regm * (cur.clkin/1000);
@@ -1198,8 +1370,8 @@
 
 			/* dsi_pll_hsdiv_dispc_clk(MHz) =
 			 * DSIPHY(MHz) / regm_dispc  < 173MHz/186Mhz */
-			for (cur.regm_dispc = 1; cur.regm_dispc < dsi.regm_dispc_max;
-					++cur.regm_dispc) {
+			for (cur.regm_dispc = 1; cur.regm_dispc <
+					dsi->regm_dispc_max; ++cur.regm_dispc) {
 				struct dispc_clock_info cur_dispc;
 				cur.dsi_pll_hsdiv_dispc_clk =
 					cur.clkin4ddr / cur.regm_dispc;
@@ -1259,34 +1431,39 @@
 	if (dispc_cinfo)
 		*dispc_cinfo = best_dispc;
 
-	dsi.cache_req_pck = req_pck;
-	dsi.cache_clk_freq = 0;
-	dsi.cache_cinfo = best;
+	dsi->cache_req_pck = req_pck;
+	dsi->cache_clk_freq = 0;
+	dsi->cache_cinfo = best;
 
 	return 0;
 }
 
-int dsi_pll_set_clock_div(struct dsi_clock_info *cinfo)
+int dsi_pll_set_clock_div(struct platform_device *dsidev,
+		struct dsi_clock_info *cinfo)
 {
+	struct dsi_data *dsi = dsi_get_dsidrv_data(dsidev);
 	int r = 0;
 	u32 l;
-	int f;
+	int f = 0;
 	u8 regn_start, regn_end, regm_start, regm_end;
 	u8 regm_dispc_start, regm_dispc_end, regm_dsi_start, regm_dsi_end;
 
 	DSSDBGF();
 
-	dsi.current_cinfo.fint = cinfo->fint;
-	dsi.current_cinfo.clkin4ddr = cinfo->clkin4ddr;
-	dsi.current_cinfo.dsi_pll_hsdiv_dispc_clk =
+	dsi->current_cinfo.use_sys_clk = cinfo->use_sys_clk;
+	dsi->current_cinfo.highfreq = cinfo->highfreq;
+
+	dsi->current_cinfo.fint = cinfo->fint;
+	dsi->current_cinfo.clkin4ddr = cinfo->clkin4ddr;
+	dsi->current_cinfo.dsi_pll_hsdiv_dispc_clk =
 			cinfo->dsi_pll_hsdiv_dispc_clk;
-	dsi.current_cinfo.dsi_pll_hsdiv_dsi_clk =
+	dsi->current_cinfo.dsi_pll_hsdiv_dsi_clk =
 			cinfo->dsi_pll_hsdiv_dsi_clk;
 
-	dsi.current_cinfo.regn = cinfo->regn;
-	dsi.current_cinfo.regm = cinfo->regm;
-	dsi.current_cinfo.regm_dispc = cinfo->regm_dispc;
-	dsi.current_cinfo.regm_dsi = cinfo->regm_dsi;
+	dsi->current_cinfo.regn = cinfo->regn;
+	dsi->current_cinfo.regm = cinfo->regm;
+	dsi->current_cinfo.regm_dispc = cinfo->regm_dispc;
+	dsi->current_cinfo.regm_dsi = cinfo->regm_dsi;
 
 	DSSDBG("DSI Fint %ld\n", cinfo->fint);
 
@@ -1309,12 +1486,12 @@
 	DSSDBG("Clock lane freq %ld Hz\n", cinfo->clkin4ddr / 4);
 
 	DSSDBG("regm_dispc = %d, %s (%s) = %lu\n", cinfo->regm_dispc,
-		dss_get_generic_clk_source_name(DSS_CLK_SRC_DSI_PLL_HSDIV_DISPC),
-		dss_feat_get_clk_source_name(DSS_CLK_SRC_DSI_PLL_HSDIV_DISPC),
+		dss_get_generic_clk_source_name(OMAP_DSS_CLK_SRC_DSI_PLL_HSDIV_DISPC),
+		dss_feat_get_clk_source_name(OMAP_DSS_CLK_SRC_DSI_PLL_HSDIV_DISPC),
 		cinfo->dsi_pll_hsdiv_dispc_clk);
 	DSSDBG("regm_dsi = %d, %s (%s) = %lu\n", cinfo->regm_dsi,
-		dss_get_generic_clk_source_name(DSS_CLK_SRC_DSI_PLL_HSDIV_DSI),
-		dss_feat_get_clk_source_name(DSS_CLK_SRC_DSI_PLL_HSDIV_DSI),
+		dss_get_generic_clk_source_name(OMAP_DSS_CLK_SRC_DSI_PLL_HSDIV_DSI),
+		dss_feat_get_clk_source_name(OMAP_DSS_CLK_SRC_DSI_PLL_HSDIV_DSI),
 		cinfo->dsi_pll_hsdiv_dsi_clk);
 
 	dss_feat_get_reg_field(FEAT_REG_DSIPLL_REGN, &regn_start, &regn_end);
@@ -1324,9 +1501,10 @@
 	dss_feat_get_reg_field(FEAT_REG_DSIPLL_REGM_DSI, &regm_dsi_start,
 			&regm_dsi_end);
 
-	REG_FLD_MOD(DSI_PLL_CONTROL, 0, 0, 0); /* DSI_PLL_AUTOMODE = manual */
+	/* DSI_PLL_AUTOMODE = manual */
+	REG_FLD_MOD(dsidev, DSI_PLL_CONTROL, 0, 0, 0);
 
-	l = dsi_read_reg(DSI_PLL_CONFIGURATION1);
+	l = dsi_read_reg(dsidev, DSI_PLL_CONFIGURATION1);
 	l = FLD_MOD(l, 1, 0, 0);		/* DSI_PLL_STOPMODE */
 	/* DSI_PLL_REGN */
 	l = FLD_MOD(l, cinfo->regn - 1, regn_start, regn_end);
@@ -1338,22 +1516,22 @@
 	/* DSIPROTO_CLOCK_DIV */
 	l = FLD_MOD(l, cinfo->regm_dsi > 0 ? cinfo->regm_dsi - 1 : 0,
 			regm_dsi_start, regm_dsi_end);
-	dsi_write_reg(DSI_PLL_CONFIGURATION1, l);
+	dsi_write_reg(dsidev, DSI_PLL_CONFIGURATION1, l);
 
-	BUG_ON(cinfo->fint < dsi.fint_min || cinfo->fint > dsi.fint_max);
-	if (cinfo->fint < 1000000)
-		f = 0x3;
-	else if (cinfo->fint < 1250000)
-		f = 0x4;
-	else if (cinfo->fint < 1500000)
-		f = 0x5;
-	else if (cinfo->fint < 1750000)
-		f = 0x6;
-	else
-		f = 0x7;
+	BUG_ON(cinfo->fint < dsi->fint_min || cinfo->fint > dsi->fint_max);
 
-	l = dsi_read_reg(DSI_PLL_CONFIGURATION2);
-	l = FLD_MOD(l, f, 4, 1);		/* DSI_PLL_FREQSEL */
+	if (dss_has_feature(FEAT_DSI_PLL_FREQSEL)) {
+		f = cinfo->fint < 1000000 ? 0x3 :
+			cinfo->fint < 1250000 ? 0x4 :
+			cinfo->fint < 1500000 ? 0x5 :
+			cinfo->fint < 1750000 ? 0x6 :
+			0x7;
+	}
+
+	l = dsi_read_reg(dsidev, DSI_PLL_CONFIGURATION2);
+
+	if (dss_has_feature(FEAT_DSI_PLL_FREQSEL))
+		l = FLD_MOD(l, f, 4, 1);	/* DSI_PLL_FREQSEL */
 	l = FLD_MOD(l, cinfo->use_sys_clk ? 0 : 1,
 			11, 11);		/* DSI_PLL_CLKSEL */
 	l = FLD_MOD(l, cinfo->highfreq,
@@ -1361,25 +1539,25 @@
 	l = FLD_MOD(l, 1, 13, 13);		/* DSI_PLL_REFEN */
 	l = FLD_MOD(l, 0, 14, 14);		/* DSIPHY_CLKINEN */
 	l = FLD_MOD(l, 1, 20, 20);		/* DSI_HSDIVBYPASS */
-	dsi_write_reg(DSI_PLL_CONFIGURATION2, l);
+	dsi_write_reg(dsidev, DSI_PLL_CONFIGURATION2, l);
 
-	REG_FLD_MOD(DSI_PLL_GO, 1, 0, 0);	/* DSI_PLL_GO */
+	REG_FLD_MOD(dsidev, DSI_PLL_GO, 1, 0, 0);	/* DSI_PLL_GO */
 
-	if (wait_for_bit_change(DSI_PLL_GO, 0, 0) != 0) {
+	if (wait_for_bit_change(dsidev, DSI_PLL_GO, 0, 0) != 0) {
 		DSSERR("dsi pll go bit not going down.\n");
 		r = -EIO;
 		goto err;
 	}
 
-	if (wait_for_bit_change(DSI_PLL_STATUS, 1, 1) != 1) {
+	if (wait_for_bit_change(dsidev, DSI_PLL_STATUS, 1, 1) != 1) {
 		DSSERR("cannot lock PLL\n");
 		r = -EIO;
 		goto err;
 	}
 
-	dsi.pll_locked = 1;
+	dsi->pll_locked = 1;
 
-	l = dsi_read_reg(DSI_PLL_CONFIGURATION2);
+	l = dsi_read_reg(dsidev, DSI_PLL_CONFIGURATION2);
 	l = FLD_MOD(l, 0, 0, 0);	/* DSI_PLL_IDLE */
 	l = FLD_MOD(l, 0, 5, 5);	/* DSI_PLL_PLLLPMODE */
 	l = FLD_MOD(l, 0, 6, 6);	/* DSI_PLL_LOWCURRSTBY */
@@ -1394,52 +1572,53 @@
 	l = FLD_MOD(l, 1, 18, 18);	/* DSI_PROTO_CLOCK_EN */
 	l = FLD_MOD(l, 0, 19, 19);	/* DSI_PROTO_CLOCK_PWDN */
 	l = FLD_MOD(l, 0, 20, 20);	/* DSI_HSDIVBYPASS */
-	dsi_write_reg(DSI_PLL_CONFIGURATION2, l);
+	dsi_write_reg(dsidev, DSI_PLL_CONFIGURATION2, l);
 
 	DSSDBG("PLL config done\n");
 err:
 	return r;
 }
 
-int dsi_pll_init(struct omap_dss_device *dssdev, bool enable_hsclk,
+int dsi_pll_init(struct platform_device *dsidev, bool enable_hsclk,
 		bool enable_hsdiv)
 {
+	struct dsi_data *dsi = dsi_get_dsidrv_data(dsidev);
 	int r = 0;
 	enum dsi_pll_power_state pwstate;
 
 	DSSDBG("PLL init\n");
 
-#ifdef CONFIG_OMAP2_DSS_USE_DSI_PLL
-	/*
-	 * HACK: this is just a quick hack to get the USE_DSI_PLL
-	 * option working. USE_DSI_PLL is itself a big hack, and
-	 * should be removed.
-	 */
-	if (dsi.vdds_dsi_reg == NULL) {
+	if (dsi->vdds_dsi_reg == NULL) {
 		struct regulator *vdds_dsi;
 
-		vdds_dsi = regulator_get(&dsi.pdev->dev, "vdds_dsi");
+		vdds_dsi = regulator_get(&dsi->pdev->dev, "vdds_dsi");
 
 		if (IS_ERR(vdds_dsi)) {
 			DSSERR("can't get VDDS_DSI regulator\n");
 			return PTR_ERR(vdds_dsi);
 		}
 
-		dsi.vdds_dsi_reg = vdds_dsi;
+		dsi->vdds_dsi_reg = vdds_dsi;
 	}
-#endif
 
 	enable_clocks(1);
-	dsi_enable_pll_clock(1);
+	dsi_enable_pll_clock(dsidev, 1);
+	/*
+	 * Note: SCP CLK is not required on OMAP3, but it is required on OMAP4.
+	 */
+	dsi_enable_scp_clk(dsidev);
 
-	r = regulator_enable(dsi.vdds_dsi_reg);
-	if (r)
-		goto err0;
+	if (!dsi->vdds_dsi_enabled) {
+		r = regulator_enable(dsi->vdds_dsi_reg);
+		if (r)
+			goto err0;
+		dsi->vdds_dsi_enabled = true;
+	}
 
 	/* XXX PLL does not come out of reset without this... */
 	dispc_pck_free_enable(1);
 
-	if (wait_for_bit_change(DSI_PLL_STATUS, 0, 1) != 1) {
+	if (wait_for_bit_change(dsidev, DSI_PLL_STATUS, 0, 1) != 1) {
 		DSSERR("PLL not coming out of reset.\n");
 		r = -ENODEV;
 		dispc_pck_free_enable(0);
@@ -1459,7 +1638,7 @@
 	else
 		pwstate = DSI_PLL_POWER_OFF;
 
-	r = dsi_pll_power(pwstate);
+	r = dsi_pll_power(dsidev, pwstate);
 
 	if (r)
 		goto err1;
@@ -1468,42 +1647,53 @@
 
 	return 0;
 err1:
-	regulator_disable(dsi.vdds_dsi_reg);
+	if (dsi->vdds_dsi_enabled) {
+		regulator_disable(dsi->vdds_dsi_reg);
+		dsi->vdds_dsi_enabled = false;
+	}
 err0:
+	dsi_disable_scp_clk(dsidev);
 	enable_clocks(0);
-	dsi_enable_pll_clock(0);
+	dsi_enable_pll_clock(dsidev, 0);
 	return r;
 }
 
-void dsi_pll_uninit(void)
+void dsi_pll_uninit(struct platform_device *dsidev, bool disconnect_lanes)
 {
-	enable_clocks(0);
-	dsi_enable_pll_clock(0);
+	struct dsi_data *dsi = dsi_get_dsidrv_data(dsidev);
 
-	dsi.pll_locked = 0;
-	dsi_pll_power(DSI_PLL_POWER_OFF);
-	regulator_disable(dsi.vdds_dsi_reg);
+	dsi->pll_locked = 0;
+	dsi_pll_power(dsidev, DSI_PLL_POWER_OFF);
+	if (disconnect_lanes) {
+		WARN_ON(!dsi->vdds_dsi_enabled);
+		regulator_disable(dsi->vdds_dsi_reg);
+		dsi->vdds_dsi_enabled = false;
+	}
+
+	dsi_disable_scp_clk(dsidev);
+	enable_clocks(0);
+	dsi_enable_pll_clock(dsidev, 0);
+
 	DSSDBG("PLL uninit done\n");
 }
 
-void dsi_dump_clocks(struct seq_file *s)
+static void dsi_dump_dsidev_clocks(struct platform_device *dsidev,
+		struct seq_file *s)
 {
-	int clksel;
-	struct dsi_clock_info *cinfo = &dsi.current_cinfo;
-	enum dss_clk_source dispc_clk_src, dsi_clk_src;
+	struct dsi_data *dsi = dsi_get_dsidrv_data(dsidev);
+	struct dsi_clock_info *cinfo = &dsi->current_cinfo;
+	enum omap_dss_clk_source dispc_clk_src, dsi_clk_src;
+	int dsi_module = dsi_get_dsidev_id(dsidev);
 
 	dispc_clk_src = dss_get_dispc_clk_source();
-	dsi_clk_src = dss_get_dsi_clk_source();
+	dsi_clk_src = dss_get_dsi_clk_source(dsi_module);
 
 	enable_clocks(1);
 
-	clksel = REG_GET(DSI_PLL_CONFIGURATION2, 11, 11);
-
-	seq_printf(s,	"- DSI PLL -\n");
+	seq_printf(s,	"- DSI%d PLL -\n", dsi_module + 1);
 
 	seq_printf(s,	"dsi pll source = %s\n",
-			clksel == 0 ?
-			"dss_sys_clk" : "pclkfree");
+			cinfo->use_sys_clk ? "dss_sys_clk" : "pclkfree");
 
 	seq_printf(s,	"Fint\t\t%-16luregn %u\n", cinfo->fint, cinfo->regn);
 
@@ -1515,7 +1705,7 @@
 			dss_feat_get_clk_source_name(dispc_clk_src),
 			cinfo->dsi_pll_hsdiv_dispc_clk,
 			cinfo->regm_dispc,
-			dispc_clk_src == DSS_CLK_SRC_FCK ?
+			dispc_clk_src == OMAP_DSS_CLK_SRC_FCK ?
 			"off" : "on");
 
 	seq_printf(s,	"%s (%s)\t%-16luregm_dsi %u\t(%s)\n",
@@ -1523,45 +1713,55 @@
 			dss_feat_get_clk_source_name(dsi_clk_src),
 			cinfo->dsi_pll_hsdiv_dsi_clk,
 			cinfo->regm_dsi,
-			dsi_clk_src == DSS_CLK_SRC_FCK ?
+			dsi_clk_src == OMAP_DSS_CLK_SRC_FCK ?
 			"off" : "on");
 
-	seq_printf(s,	"- DSI -\n");
+	seq_printf(s,	"- DSI%d -\n", dsi_module + 1);
 
 	seq_printf(s,	"dsi fclk source = %s (%s)\n",
 			dss_get_generic_clk_source_name(dsi_clk_src),
 			dss_feat_get_clk_source_name(dsi_clk_src));
 
-	seq_printf(s,	"DSI_FCLK\t%lu\n", dsi_fclk_rate());
+	seq_printf(s,	"DSI_FCLK\t%lu\n", dsi_fclk_rate(dsidev));
 
 	seq_printf(s,	"DDR_CLK\t\t%lu\n",
 			cinfo->clkin4ddr / 4);
 
-	seq_printf(s,	"TxByteClkHS\t%lu\n", dsi_get_txbyteclkhs());
+	seq_printf(s,	"TxByteClkHS\t%lu\n", dsi_get_txbyteclkhs(dsidev));
 
 	seq_printf(s,	"LP_CLK\t\t%lu\n", cinfo->lp_clk);
 
-	seq_printf(s,	"VP_CLK\t\t%lu\n"
-			"VP_PCLK\t\t%lu\n",
-			dispc_lclk_rate(OMAP_DSS_CHANNEL_LCD),
-			dispc_pclk_rate(OMAP_DSS_CHANNEL_LCD));
-
 	enable_clocks(0);
 }
 
-#ifdef CONFIG_OMAP2_DSS_COLLECT_IRQ_STATS
-void dsi_dump_irqs(struct seq_file *s)
+void dsi_dump_clocks(struct seq_file *s)
 {
+	struct platform_device *dsidev;
+	int i;
+
+	for  (i = 0; i < MAX_NUM_DSI; i++) {
+		dsidev = dsi_get_dsidev_from_id(i);
+		if (dsidev)
+			dsi_dump_dsidev_clocks(dsidev, s);
+	}
+}
+
+#ifdef CONFIG_OMAP2_DSS_COLLECT_IRQ_STATS
+static void dsi_dump_dsidev_irqs(struct platform_device *dsidev,
+		struct seq_file *s)
+{
+	struct dsi_data *dsi = dsi_get_dsidrv_data(dsidev);
 	unsigned long flags;
 	struct dsi_irq_stats stats;
+	int dsi_module = dsi_get_dsidev_id(dsidev);
 
-	spin_lock_irqsave(&dsi.irq_stats_lock, flags);
+	spin_lock_irqsave(&dsi->irq_stats_lock, flags);
 
-	stats = dsi.irq_stats;
-	memset(&dsi.irq_stats, 0, sizeof(dsi.irq_stats));
-	dsi.irq_stats.last_reset = jiffies;
+	stats = dsi->irq_stats;
+	memset(&dsi->irq_stats, 0, sizeof(dsi->irq_stats));
+	dsi->irq_stats.last_reset = jiffies;
 
-	spin_unlock_irqrestore(&dsi.irq_stats_lock, flags);
+	spin_unlock_irqrestore(&dsi->irq_stats_lock, flags);
 
 	seq_printf(s, "period %u ms\n",
 			jiffies_to_msecs(jiffies - stats.last_reset));
@@ -1570,7 +1770,7 @@
 #define PIS(x) \
 	seq_printf(s, "%-20s %10d\n", #x, stats.dsi_irqs[ffs(DSI_IRQ_##x)-1]);
 
-	seq_printf(s, "-- DSI interrupts --\n");
+	seq_printf(s, "-- DSI%d interrupts --\n", dsi_module + 1);
 	PIS(VC0);
 	PIS(VC1);
 	PIS(VC2);
@@ -1636,13 +1836,45 @@
 	PIS(ULPSACTIVENOT_ALL1);
 #undef PIS
 }
+
+static void dsi1_dump_irqs(struct seq_file *s)
+{
+	struct platform_device *dsidev = dsi_get_dsidev_from_id(0);
+
+	dsi_dump_dsidev_irqs(dsidev, s);
+}
+
+static void dsi2_dump_irqs(struct seq_file *s)
+{
+	struct platform_device *dsidev = dsi_get_dsidev_from_id(1);
+
+	dsi_dump_dsidev_irqs(dsidev, s);
+}
+
+void dsi_create_debugfs_files_irq(struct dentry *debugfs_dir,
+		const struct file_operations *debug_fops)
+{
+	struct platform_device *dsidev;
+
+	dsidev = dsi_get_dsidev_from_id(0);
+	if (dsidev)
+		debugfs_create_file("dsi1_irqs", S_IRUGO, debugfs_dir,
+			&dsi1_dump_irqs, debug_fops);
+
+	dsidev = dsi_get_dsidev_from_id(1);
+	if (dsidev)
+		debugfs_create_file("dsi2_irqs", S_IRUGO, debugfs_dir,
+			&dsi2_dump_irqs, debug_fops);
+}
 #endif
 
-void dsi_dump_regs(struct seq_file *s)
+static void dsi_dump_dsidev_regs(struct platform_device *dsidev,
+		struct seq_file *s)
 {
-#define DUMPREG(r) seq_printf(s, "%-35s %08x\n", #r, dsi_read_reg(r))
+#define DUMPREG(r) seq_printf(s, "%-35s %08x\n", #r, dsi_read_reg(dsidev, r))
 
 	dss_clk_enable(DSS_CLK_ICK | DSS_CLK_FCK);
+	dsi_enable_scp_clk(dsidev);
 
 	DUMPREG(DSI_REVISION);
 	DUMPREG(DSI_SYSCONFIG);
@@ -1714,25 +1946,57 @@
 	DUMPREG(DSI_PLL_CONFIGURATION1);
 	DUMPREG(DSI_PLL_CONFIGURATION2);
 
+	dsi_disable_scp_clk(dsidev);
 	dss_clk_disable(DSS_CLK_ICK | DSS_CLK_FCK);
 #undef DUMPREG
 }
 
-enum dsi_complexio_power_state {
+static void dsi1_dump_regs(struct seq_file *s)
+{
+	struct platform_device *dsidev = dsi_get_dsidev_from_id(0);
+
+	dsi_dump_dsidev_regs(dsidev, s);
+}
+
+static void dsi2_dump_regs(struct seq_file *s)
+{
+	struct platform_device *dsidev = dsi_get_dsidev_from_id(1);
+
+	dsi_dump_dsidev_regs(dsidev, s);
+}
+
+void dsi_create_debugfs_files_reg(struct dentry *debugfs_dir,
+		const struct file_operations *debug_fops)
+{
+	struct platform_device *dsidev;
+
+	dsidev = dsi_get_dsidev_from_id(0);
+	if (dsidev)
+		debugfs_create_file("dsi1_regs", S_IRUGO, debugfs_dir,
+			&dsi1_dump_regs, debug_fops);
+
+	dsidev = dsi_get_dsidev_from_id(1);
+	if (dsidev)
+		debugfs_create_file("dsi2_regs", S_IRUGO, debugfs_dir,
+			&dsi2_dump_regs, debug_fops);
+}
+enum dsi_cio_power_state {
 	DSI_COMPLEXIO_POWER_OFF		= 0x0,
 	DSI_COMPLEXIO_POWER_ON		= 0x1,
 	DSI_COMPLEXIO_POWER_ULPS	= 0x2,
 };
 
-static int dsi_complexio_power(enum dsi_complexio_power_state state)
+static int dsi_cio_power(struct platform_device *dsidev,
+		enum dsi_cio_power_state state)
 {
 	int t = 0;
 
 	/* PWR_CMD */
-	REG_FLD_MOD(DSI_COMPLEXIO_CFG1, state, 28, 27);
+	REG_FLD_MOD(dsidev, DSI_COMPLEXIO_CFG1, state, 28, 27);
 
 	/* PWR_STATUS */
-	while (FLD_GET(dsi_read_reg(DSI_COMPLEXIO_CFG1), 26, 25) != state) {
+	while (FLD_GET(dsi_read_reg(dsidev, DSI_COMPLEXIO_CFG1),
+			26, 25) != state) {
 		if (++t > 1000) {
 			DSSERR("failed to set complexio power state to "
 					"%d\n", state);
@@ -1744,9 +2008,70 @@
 	return 0;
 }
 
-static void dsi_complexio_config(struct omap_dss_device *dssdev)
+/* Number of data lanes present on DSI interface */
+static inline int dsi_get_num_data_lanes(struct platform_device *dsidev)
 {
+	/* DSI on OMAP3 doesn't have register DSI_GNQ, set number
+	 * of data lanes as 2 by default */
+	if (dss_has_feature(FEAT_DSI_GNQ))
+		return REG_GET(dsidev, DSI_GNQ, 11, 9);	/* NB_DATA_LANES */
+	else
+		return 2;
+}
+
+/* Number of data lanes used by the dss device */
+static inline int dsi_get_num_data_lanes_dssdev(struct omap_dss_device *dssdev)
+{
+	int num_data_lanes = 0;
+
+	if (dssdev->phy.dsi.data1_lane != 0)
+		num_data_lanes++;
+	if (dssdev->phy.dsi.data2_lane != 0)
+		num_data_lanes++;
+	if (dssdev->phy.dsi.data3_lane != 0)
+		num_data_lanes++;
+	if (dssdev->phy.dsi.data4_lane != 0)
+		num_data_lanes++;
+
+	return num_data_lanes;
+}
+
+static unsigned dsi_get_line_buf_size(struct platform_device *dsidev)
+{
+	int val;
+
+	/* line buffer on OMAP3 is 1024 x 24bits */
+	/* XXX: for some reason using full buffer size causes
+	 * considerable TX slowdown with update sizes that fill the
+	 * whole buffer */
+	if (!dss_has_feature(FEAT_DSI_GNQ))
+		return 1023 * 3;
+
+	val = REG_GET(dsidev, DSI_GNQ, 14, 12); /* VP1_LINE_BUFFER_SIZE */
+
+	switch (val) {
+	case 1:
+		return 512 * 3;		/* 512x24 bits */
+	case 2:
+		return 682 * 3;		/* 682x24 bits */
+	case 3:
+		return 853 * 3;		/* 853x24 bits */
+	case 4:
+		return 1024 * 3;	/* 1024x24 bits */
+	case 5:
+		return 1194 * 3;	/* 1194x24 bits */
+	case 6:
+		return 1365 * 3;	/* 1365x24 bits */
+	default:
+		BUG();
+	}
+}
+
+static void dsi_set_lane_config(struct omap_dss_device *dssdev)
+{
+	struct platform_device *dsidev = dsi_get_dsidev_from_dssdev(dssdev);
 	u32 r;
+	int num_data_lanes_dssdev = dsi_get_num_data_lanes_dssdev(dssdev);
 
 	int clk_lane   = dssdev->phy.dsi.clk_lane;
 	int data1_lane = dssdev->phy.dsi.data1_lane;
@@ -1755,14 +2080,28 @@
 	int data1_pol  = dssdev->phy.dsi.data1_pol;
 	int data2_pol  = dssdev->phy.dsi.data2_pol;
 
-	r = dsi_read_reg(DSI_COMPLEXIO_CFG1);
+	r = dsi_read_reg(dsidev, DSI_COMPLEXIO_CFG1);
 	r = FLD_MOD(r, clk_lane, 2, 0);
 	r = FLD_MOD(r, clk_pol, 3, 3);
 	r = FLD_MOD(r, data1_lane, 6, 4);
 	r = FLD_MOD(r, data1_pol, 7, 7);
 	r = FLD_MOD(r, data2_lane, 10, 8);
 	r = FLD_MOD(r, data2_pol, 11, 11);
-	dsi_write_reg(DSI_COMPLEXIO_CFG1, r);
+	if (num_data_lanes_dssdev > 2) {
+		int data3_lane  = dssdev->phy.dsi.data3_lane;
+		int data3_pol  = dssdev->phy.dsi.data3_pol;
+
+		r = FLD_MOD(r, data3_lane, 14, 12);
+		r = FLD_MOD(r, data3_pol, 15, 15);
+	}
+	if (num_data_lanes_dssdev > 3) {
+		int data4_lane  = dssdev->phy.dsi.data4_lane;
+		int data4_pol  = dssdev->phy.dsi.data4_pol;
+
+		r = FLD_MOD(r, data4_lane, 18, 16);
+		r = FLD_MOD(r, data4_pol, 19, 19);
+	}
+	dsi_write_reg(dsidev, DSI_COMPLEXIO_CFG1, r);
 
 	/* The configuration of the DSI complex I/O (number of data lanes,
 	   position, differential order) should not be changed while
@@ -1776,27 +2115,31 @@
 	   DSI complex I/O configuration is unknown. */
 
 	/*
-	REG_FLD_MOD(DSI_CTRL, 1, 0, 0);
-	REG_FLD_MOD(DSI_CTRL, 0, 0, 0);
-	REG_FLD_MOD(DSI_CLK_CTRL, 1, 20, 20);
-	REG_FLD_MOD(DSI_CTRL, 1, 0, 0);
+	REG_FLD_MOD(dsidev, DSI_CTRL, 1, 0, 0);
+	REG_FLD_MOD(dsidev, DSI_CTRL, 0, 0, 0);
+	REG_FLD_MOD(dsidev, DSI_CLK_CTRL, 1, 20, 20);
+	REG_FLD_MOD(dsidev, DSI_CTRL, 1, 0, 0);
 	*/
 }
 
-static inline unsigned ns2ddr(unsigned ns)
+static inline unsigned ns2ddr(struct platform_device *dsidev, unsigned ns)
 {
+	struct dsi_data *dsi = dsi_get_dsidrv_data(dsidev);
+
 	/* convert time in ns to ddr ticks, rounding up */
-	unsigned long ddr_clk = dsi.current_cinfo.clkin4ddr / 4;
+	unsigned long ddr_clk = dsi->current_cinfo.clkin4ddr / 4;
 	return (ns * (ddr_clk / 1000 / 1000) + 999) / 1000;
 }
 
-static inline unsigned ddr2ns(unsigned ddr)
+static inline unsigned ddr2ns(struct platform_device *dsidev, unsigned ddr)
 {
-	unsigned long ddr_clk = dsi.current_cinfo.clkin4ddr / 4;
+	struct dsi_data *dsi = dsi_get_dsidrv_data(dsidev);
+
+	unsigned long ddr_clk = dsi->current_cinfo.clkin4ddr / 4;
 	return ddr * 1000 * 1000 / (ddr_clk / 1000);
 }
 
-static void dsi_complexio_timings(void)
+static void dsi_cio_timings(struct platform_device *dsidev)
 {
 	u32 r;
 	u32 ths_prepare, ths_prepare_ths_zero, ths_trail, ths_exit;
@@ -1808,139 +2151,323 @@
 	/* 1 * DDR_CLK = 2 * UI */
 
 	/* min 40ns + 4*UI	max 85ns + 6*UI */
-	ths_prepare = ns2ddr(70) + 2;
+	ths_prepare = ns2ddr(dsidev, 70) + 2;
 
 	/* min 145ns + 10*UI */
-	ths_prepare_ths_zero = ns2ddr(175) + 2;
+	ths_prepare_ths_zero = ns2ddr(dsidev, 175) + 2;
 
 	/* min max(8*UI, 60ns+4*UI) */
-	ths_trail = ns2ddr(60) + 5;
+	ths_trail = ns2ddr(dsidev, 60) + 5;
 
 	/* min 100ns */
-	ths_exit = ns2ddr(145);
+	ths_exit = ns2ddr(dsidev, 145);
 
 	/* tlpx min 50n */
-	tlpx_half = ns2ddr(25);
+	tlpx_half = ns2ddr(dsidev, 25);
 
 	/* min 60ns */
-	tclk_trail = ns2ddr(60) + 2;
+	tclk_trail = ns2ddr(dsidev, 60) + 2;
 
 	/* min 38ns, max 95ns */
-	tclk_prepare = ns2ddr(65);
+	tclk_prepare = ns2ddr(dsidev, 65);
 
 	/* min tclk-prepare + tclk-zero = 300ns */
-	tclk_zero = ns2ddr(260);
+	tclk_zero = ns2ddr(dsidev, 260);
 
 	DSSDBG("ths_prepare %u (%uns), ths_prepare_ths_zero %u (%uns)\n",
-		ths_prepare, ddr2ns(ths_prepare),
-		ths_prepare_ths_zero, ddr2ns(ths_prepare_ths_zero));
+		ths_prepare, ddr2ns(dsidev, ths_prepare),
+		ths_prepare_ths_zero, ddr2ns(dsidev, ths_prepare_ths_zero));
 	DSSDBG("ths_trail %u (%uns), ths_exit %u (%uns)\n",
-			ths_trail, ddr2ns(ths_trail),
-			ths_exit, ddr2ns(ths_exit));
+			ths_trail, ddr2ns(dsidev, ths_trail),
+			ths_exit, ddr2ns(dsidev, ths_exit));
 
 	DSSDBG("tlpx_half %u (%uns), tclk_trail %u (%uns), "
 			"tclk_zero %u (%uns)\n",
-			tlpx_half, ddr2ns(tlpx_half),
-			tclk_trail, ddr2ns(tclk_trail),
-			tclk_zero, ddr2ns(tclk_zero));
+			tlpx_half, ddr2ns(dsidev, tlpx_half),
+			tclk_trail, ddr2ns(dsidev, tclk_trail),
+			tclk_zero, ddr2ns(dsidev, tclk_zero));
 	DSSDBG("tclk_prepare %u (%uns)\n",
-			tclk_prepare, ddr2ns(tclk_prepare));
+			tclk_prepare, ddr2ns(dsidev, tclk_prepare));
 
 	/* program timings */
 
-	r = dsi_read_reg(DSI_DSIPHY_CFG0);
+	r = dsi_read_reg(dsidev, DSI_DSIPHY_CFG0);
 	r = FLD_MOD(r, ths_prepare, 31, 24);
 	r = FLD_MOD(r, ths_prepare_ths_zero, 23, 16);
 	r = FLD_MOD(r, ths_trail, 15, 8);
 	r = FLD_MOD(r, ths_exit, 7, 0);
-	dsi_write_reg(DSI_DSIPHY_CFG0, r);
+	dsi_write_reg(dsidev, DSI_DSIPHY_CFG0, r);
 
-	r = dsi_read_reg(DSI_DSIPHY_CFG1);
+	r = dsi_read_reg(dsidev, DSI_DSIPHY_CFG1);
 	r = FLD_MOD(r, tlpx_half, 22, 16);
 	r = FLD_MOD(r, tclk_trail, 15, 8);
 	r = FLD_MOD(r, tclk_zero, 7, 0);
-	dsi_write_reg(DSI_DSIPHY_CFG1, r);
+	dsi_write_reg(dsidev, DSI_DSIPHY_CFG1, r);
 
-	r = dsi_read_reg(DSI_DSIPHY_CFG2);
+	r = dsi_read_reg(dsidev, DSI_DSIPHY_CFG2);
 	r = FLD_MOD(r, tclk_prepare, 7, 0);
-	dsi_write_reg(DSI_DSIPHY_CFG2, r);
+	dsi_write_reg(dsidev, DSI_DSIPHY_CFG2, r);
 }
 
-
-static int dsi_complexio_init(struct omap_dss_device *dssdev)
+static void dsi_cio_enable_lane_override(struct omap_dss_device *dssdev,
+		enum dsi_lane lanes)
 {
-	int r = 0;
+	struct platform_device *dsidev = dsi_get_dsidev_from_dssdev(dssdev);
+	struct dsi_data *dsi = dsi_get_dsidrv_data(dsidev);
+	int clk_lane   = dssdev->phy.dsi.clk_lane;
+	int data1_lane = dssdev->phy.dsi.data1_lane;
+	int data2_lane = dssdev->phy.dsi.data2_lane;
+	int data3_lane = dssdev->phy.dsi.data3_lane;
+	int data4_lane = dssdev->phy.dsi.data4_lane;
+	int clk_pol    = dssdev->phy.dsi.clk_pol;
+	int data1_pol  = dssdev->phy.dsi.data1_pol;
+	int data2_pol  = dssdev->phy.dsi.data2_pol;
+	int data3_pol  = dssdev->phy.dsi.data3_pol;
+	int data4_pol  = dssdev->phy.dsi.data4_pol;
 
-	DSSDBG("dsi_complexio_init\n");
+	u32 l = 0;
+	u8 lptxscp_start = dsi->num_data_lanes == 2 ? 22 : 26;
 
-	/* CIO_CLK_ICG, enable L3 clk to CIO */
-	REG_FLD_MOD(DSI_CLK_CTRL, 1, 14, 14);
+	if (lanes & DSI_CLK_P)
+		l |= 1 << ((clk_lane - 1) * 2 + (clk_pol ? 0 : 1));
+	if (lanes & DSI_CLK_N)
+		l |= 1 << ((clk_lane - 1) * 2 + (clk_pol ? 1 : 0));
+
+	if (lanes & DSI_DATA1_P)
+		l |= 1 << ((data1_lane - 1) * 2 + (data1_pol ? 0 : 1));
+	if (lanes & DSI_DATA1_N)
+		l |= 1 << ((data1_lane - 1) * 2 + (data1_pol ? 1 : 0));
+
+	if (lanes & DSI_DATA2_P)
+		l |= 1 << ((data2_lane - 1) * 2 + (data2_pol ? 0 : 1));
+	if (lanes & DSI_DATA2_N)
+		l |= 1 << ((data2_lane - 1) * 2 + (data2_pol ? 1 : 0));
+
+	if (lanes & DSI_DATA3_P)
+		l |= 1 << ((data3_lane - 1) * 2 + (data3_pol ? 0 : 1));
+	if (lanes & DSI_DATA3_N)
+		l |= 1 << ((data3_lane - 1) * 2 + (data3_pol ? 1 : 0));
+
+	if (lanes & DSI_DATA4_P)
+		l |= 1 << ((data4_lane - 1) * 2 + (data4_pol ? 0 : 1));
+	if (lanes & DSI_DATA4_N)
+		l |= 1 << ((data4_lane - 1) * 2 + (data4_pol ? 1 : 0));
+	/*
+	 * Bits in REGLPTXSCPDAT4TO0DXDY:
+	 * 17: DY0 18: DX0
+	 * 19: DY1 20: DX1
+	 * 21: DY2 22: DX2
+	 * 23: DY3 24: DX3
+	 * 25: DY4 26: DX4
+	 */
+
+	/* Set the lane override configuration */
+
+	/* REGLPTXSCPDAT4TO0DXDY */
+	REG_FLD_MOD(dsidev, DSI_DSIPHY_CFG10, l, lptxscp_start, 17);
+
+	/* Enable lane override */
+
+	/* ENLPTXSCPDAT */
+	REG_FLD_MOD(dsidev, DSI_DSIPHY_CFG10, 1, 27, 27);
+}
+
+static void dsi_cio_disable_lane_override(struct platform_device *dsidev)
+{
+	/* Disable lane override */
+	REG_FLD_MOD(dsidev, DSI_DSIPHY_CFG10, 0, 27, 27); /* ENLPTXSCPDAT */
+	/* Reset the lane override configuration */
+	/* REGLPTXSCPDAT4TO0DXDY */
+	REG_FLD_MOD(dsidev, DSI_DSIPHY_CFG10, 0, 22, 17);
+}
+
+static int dsi_cio_wait_tx_clk_esc_reset(struct omap_dss_device *dssdev)
+{
+	struct platform_device *dsidev = dsi_get_dsidev_from_dssdev(dssdev);
+	int t;
+	int bits[3];
+	bool in_use[3];
+
+	if (dss_has_feature(FEAT_DSI_REVERSE_TXCLKESC)) {
+		bits[0] = 28;
+		bits[1] = 27;
+		bits[2] = 26;
+	} else {
+		bits[0] = 24;
+		bits[1] = 25;
+		bits[2] = 26;
+	}
+
+	in_use[0] = false;
+	in_use[1] = false;
+	in_use[2] = false;
+
+	if (dssdev->phy.dsi.clk_lane != 0)
+		in_use[dssdev->phy.dsi.clk_lane - 1] = true;
+	if (dssdev->phy.dsi.data1_lane != 0)
+		in_use[dssdev->phy.dsi.data1_lane - 1] = true;
+	if (dssdev->phy.dsi.data2_lane != 0)
+		in_use[dssdev->phy.dsi.data2_lane - 1] = true;
+
+	t = 100000;
+	while (true) {
+		u32 l;
+		int i;
+		int ok;
+
+		l = dsi_read_reg(dsidev, DSI_DSIPHY_CFG5);
+
+		ok = 0;
+		for (i = 0; i < 3; ++i) {
+			if (!in_use[i] || (l & (1 << bits[i])))
+				ok++;
+		}
+
+		if (ok == 3)
+			break;
+
+		if (--t == 0) {
+			for (i = 0; i < 3; ++i) {
+				if (!in_use[i] || (l & (1 << bits[i])))
+					continue;
+
+				DSSERR("CIO TXCLKESC%d domain not coming " \
+						"out of reset\n", i);
+			}
+			return -EIO;
+		}
+	}
+
+	return 0;
+}
+
+static int dsi_cio_init(struct omap_dss_device *dssdev)
+{
+	struct platform_device *dsidev = dsi_get_dsidev_from_dssdev(dssdev);
+	struct dsi_data *dsi = dsi_get_dsidrv_data(dsidev);
+	int r;
+	int num_data_lanes_dssdev = dsi_get_num_data_lanes_dssdev(dssdev);
+	u32 l;
+
+	DSSDBGF();
+
+	if (dsi->dsi_mux_pads)
+		dsi->dsi_mux_pads(true);
+
+	dsi_enable_scp_clk(dsidev);
 
 	/* A dummy read using the SCP interface to any DSIPHY register is
 	 * required after DSIPHY reset to complete the reset of the DSI complex
 	 * I/O. */
-	dsi_read_reg(DSI_DSIPHY_CFG5);
+	dsi_read_reg(dsidev, DSI_DSIPHY_CFG5);
 
-	if (wait_for_bit_change(DSI_DSIPHY_CFG5, 30, 1) != 1) {
-		DSSERR("ComplexIO PHY not coming out of reset.\n");
-		r = -ENODEV;
-		goto err;
+	if (wait_for_bit_change(dsidev, DSI_DSIPHY_CFG5, 30, 1) != 1) {
+		DSSERR("CIO SCP Clock domain not coming out of reset.\n");
+		r = -EIO;
+		goto err_scp_clk_dom;
 	}
 
-	dsi_complexio_config(dssdev);
+	dsi_set_lane_config(dssdev);
 
-	r = dsi_complexio_power(DSI_COMPLEXIO_POWER_ON);
+	/* set TX STOP MODE timer to maximum for this operation */
+	l = dsi_read_reg(dsidev, DSI_TIMING1);
+	l = FLD_MOD(l, 1, 15, 15);	/* FORCE_TX_STOP_MODE_IO */
+	l = FLD_MOD(l, 1, 14, 14);	/* STOP_STATE_X16_IO */
+	l = FLD_MOD(l, 1, 13, 13);	/* STOP_STATE_X4_IO */
+	l = FLD_MOD(l, 0x1fff, 12, 0);	/* STOP_STATE_COUNTER_IO */
+	dsi_write_reg(dsidev, DSI_TIMING1, l);
 
+	if (dsi->ulps_enabled) {
+		u32 lane_mask = DSI_CLK_P | DSI_DATA1_P | DSI_DATA2_P;
+
+		DSSDBG("manual ulps exit\n");
+
+		/* ULPS is exited by Mark-1 state for 1ms, followed by
+		 * stop state. DSS HW cannot do this via the normal
+		 * ULPS exit sequence, as after reset the DSS HW thinks
+		 * that we are not in ULPS mode, and refuses to send the
+		 * sequence. So we need to send the ULPS exit sequence
+		 * manually.
+		 */
+
+		if (num_data_lanes_dssdev > 2)
+			lane_mask |= DSI_DATA3_P;
+
+		if (num_data_lanes_dssdev > 3)
+			lane_mask |= DSI_DATA4_P;
+
+		dsi_cio_enable_lane_override(dssdev, lane_mask);
+	}
+
+	r = dsi_cio_power(dsidev, DSI_COMPLEXIO_POWER_ON);
 	if (r)
-		goto err;
+		goto err_cio_pwr;
 
-	if (wait_for_bit_change(DSI_COMPLEXIO_CFG1, 29, 1) != 1) {
-		DSSERR("ComplexIO not coming out of reset.\n");
+	if (wait_for_bit_change(dsidev, DSI_COMPLEXIO_CFG1, 29, 1) != 1) {
+		DSSERR("CIO PWR clock domain not coming out of reset.\n");
 		r = -ENODEV;
-		goto err;
+		goto err_cio_pwr_dom;
 	}
 
-	if (wait_for_bit_change(DSI_COMPLEXIO_CFG1, 21, 1) != 1) {
-		DSSERR("ComplexIO LDO power down.\n");
-		r = -ENODEV;
-		goto err;
+	dsi_if_enable(dsidev, true);
+	dsi_if_enable(dsidev, false);
+	REG_FLD_MOD(dsidev, DSI_CLK_CTRL, 1, 20, 20); /* LP_CLK_ENABLE */
+
+	r = dsi_cio_wait_tx_clk_esc_reset(dssdev);
+	if (r)
+		goto err_tx_clk_esc_rst;
+
+	if (dsi->ulps_enabled) {
+		/* Keep Mark-1 state for 1ms (as per DSI spec) */
+		ktime_t wait = ns_to_ktime(1000 * 1000);
+		set_current_state(TASK_UNINTERRUPTIBLE);
+		schedule_hrtimeout(&wait, HRTIMER_MODE_REL);
+
+		/* Disable the override. The lanes should be set to Mark-11
+		 * state by the HW */
+		dsi_cio_disable_lane_override(dsidev);
 	}
 
-	dsi_complexio_timings();
+	/* FORCE_TX_STOP_MODE_IO */
+	REG_FLD_MOD(dsidev, DSI_TIMING1, 0, 15, 15);
 
-	/*
-	   The configuration of the DSI complex I/O (number of data lanes,
-	   position, differential order) should not be changed while
-	   DSS.DSI_CLK_CRTRL[20] LP_CLK_ENABLE bit is set to 1. For the
-	   hardware to recognize a new configuration of the complex I/O (done
-	   in DSS.DSI_COMPLEXIO_CFG1 register), it is recommended to follow
-	   this sequence: First set the DSS.DSI_CTRL[0] IF_EN bit to 1, next
-	   reset the DSS.DSI_CTRL[0] IF_EN to 0, then set DSS.DSI_CLK_CTRL[20]
-	   LP_CLK_ENABLE to 1, and finally, set again the DSS.DSI_CTRL[0] IF_EN
-	   bit to 1. If the sequence is not followed, the DSi complex I/O
-	   configuration is undetermined.
-	   */
-	dsi_if_enable(1);
-	dsi_if_enable(0);
-	REG_FLD_MOD(DSI_CLK_CTRL, 1, 20, 20); /* LP_CLK_ENABLE */
-	dsi_if_enable(1);
-	dsi_if_enable(0);
+	dsi_cio_timings(dsidev);
+
+	dsi->ulps_enabled = false;
 
 	DSSDBG("CIO init done\n");
-err:
+
+	return 0;
+
+err_tx_clk_esc_rst:
+	REG_FLD_MOD(dsidev, DSI_CLK_CTRL, 0, 20, 20); /* LP_CLK_ENABLE */
+err_cio_pwr_dom:
+	dsi_cio_power(dsidev, DSI_COMPLEXIO_POWER_OFF);
+err_cio_pwr:
+	if (dsi->ulps_enabled)
+		dsi_cio_disable_lane_override(dsidev);
+err_scp_clk_dom:
+	dsi_disable_scp_clk(dsidev);
+	if (dsi->dsi_mux_pads)
+		dsi->dsi_mux_pads(false);
 	return r;
 }
 
-static void dsi_complexio_uninit(void)
+static void dsi_cio_uninit(struct platform_device *dsidev)
 {
-	dsi_complexio_power(DSI_COMPLEXIO_POWER_OFF);
+	struct dsi_data *dsi = dsi_get_dsidrv_data(dsidev);
+
+	dsi_cio_power(dsidev, DSI_COMPLEXIO_POWER_OFF);
+	dsi_disable_scp_clk(dsidev);
+	if (dsi->dsi_mux_pads)
+		dsi->dsi_mux_pads(false);
 }
 
-static int _dsi_wait_reset(void)
+static int _dsi_wait_reset(struct platform_device *dsidev)
 {
 	int t = 0;
 
-	while (REG_GET(DSI_SYSSTATUS, 0, 0) == 0) {
+	while (REG_GET(dsidev, DSI_SYSSTATUS, 0, 0) == 0) {
 		if (++t > 5) {
 			DSSERR("soft reset failed\n");
 			return -ENODEV;
@@ -1951,28 +2478,30 @@
 	return 0;
 }
 
-static int _dsi_reset(void)
+static int _dsi_reset(struct platform_device *dsidev)
 {
 	/* Soft reset */
-	REG_FLD_MOD(DSI_SYSCONFIG, 1, 1, 1);
-	return _dsi_wait_reset();
+	REG_FLD_MOD(dsidev, DSI_SYSCONFIG, 1, 1, 1);
+	return _dsi_wait_reset(dsidev);
 }
 
-static void dsi_config_tx_fifo(enum fifo_size size1, enum fifo_size size2,
+static void dsi_config_tx_fifo(struct platform_device *dsidev,
+		enum fifo_size size1, enum fifo_size size2,
 		enum fifo_size size3, enum fifo_size size4)
 {
+	struct dsi_data *dsi = dsi_get_dsidrv_data(dsidev);
 	u32 r = 0;
 	int add = 0;
 	int i;
 
-	dsi.vc[0].fifo_size = size1;
-	dsi.vc[1].fifo_size = size2;
-	dsi.vc[2].fifo_size = size3;
-	dsi.vc[3].fifo_size = size4;
+	dsi->vc[0].fifo_size = size1;
+	dsi->vc[1].fifo_size = size2;
+	dsi->vc[2].fifo_size = size3;
+	dsi->vc[3].fifo_size = size4;
 
 	for (i = 0; i < 4; i++) {
 		u8 v;
-		int size = dsi.vc[i].fifo_size;
+		int size = dsi->vc[i].fifo_size;
 
 		if (add + size > 4) {
 			DSSERR("Illegal FIFO configuration\n");
@@ -1985,24 +2514,26 @@
 		add += size;
 	}
 
-	dsi_write_reg(DSI_TX_FIFO_VC_SIZE, r);
+	dsi_write_reg(dsidev, DSI_TX_FIFO_VC_SIZE, r);
 }
 
-static void dsi_config_rx_fifo(enum fifo_size size1, enum fifo_size size2,
+static void dsi_config_rx_fifo(struct platform_device *dsidev,
+		enum fifo_size size1, enum fifo_size size2,
 		enum fifo_size size3, enum fifo_size size4)
 {
+	struct dsi_data *dsi = dsi_get_dsidrv_data(dsidev);
 	u32 r = 0;
 	int add = 0;
 	int i;
 
-	dsi.vc[0].fifo_size = size1;
-	dsi.vc[1].fifo_size = size2;
-	dsi.vc[2].fifo_size = size3;
-	dsi.vc[3].fifo_size = size4;
+	dsi->vc[0].fifo_size = size1;
+	dsi->vc[1].fifo_size = size2;
+	dsi->vc[2].fifo_size = size3;
+	dsi->vc[3].fifo_size = size4;
 
 	for (i = 0; i < 4; i++) {
 		u8 v;
-		int size = dsi.vc[i].fifo_size;
+		int size = dsi->vc[i].fifo_size;
 
 		if (add + size > 4) {
 			DSSERR("Illegal FIFO configuration\n");
@@ -2015,18 +2546,18 @@
 		add += size;
 	}
 
-	dsi_write_reg(DSI_RX_FIFO_VC_SIZE, r);
+	dsi_write_reg(dsidev, DSI_RX_FIFO_VC_SIZE, r);
 }
 
-static int dsi_force_tx_stop_mode_io(void)
+static int dsi_force_tx_stop_mode_io(struct platform_device *dsidev)
 {
 	u32 r;
 
-	r = dsi_read_reg(DSI_TIMING1);
+	r = dsi_read_reg(dsidev, DSI_TIMING1);
 	r = FLD_MOD(r, 1, 15, 15);	/* FORCE_TX_STOP_MODE_IO */
-	dsi_write_reg(DSI_TIMING1, r);
+	dsi_write_reg(dsidev, DSI_TIMING1, r);
 
-	if (wait_for_bit_change(DSI_TIMING1, 15, 0) != 0) {
+	if (wait_for_bit_change(dsidev, DSI_TIMING1, 15, 0) != 0) {
 		DSSERR("TX_STOP bit not going down\n");
 		return -EIO;
 	}
@@ -2034,16 +2565,135 @@
 	return 0;
 }
 
-static int dsi_vc_enable(int channel, bool enable)
+static bool dsi_vc_is_enabled(struct platform_device *dsidev, int channel)
+{
+	return REG_GET(dsidev, DSI_VC_CTRL(channel), 0, 0);
+}
+
+static void dsi_packet_sent_handler_vp(void *data, u32 mask)
+{
+	struct dsi_packet_sent_handler_data *vp_data =
+		(struct dsi_packet_sent_handler_data *) data;
+	struct dsi_data *dsi = dsi_get_dsidrv_data(vp_data->dsidev);
+	const int channel = dsi->update_channel;
+	u8 bit = dsi->te_enabled ? 30 : 31;
+
+	if (REG_GET(vp_data->dsidev, DSI_VC_TE(channel), bit, bit) == 0)
+		complete(vp_data->completion);
+}
+
+static int dsi_sync_vc_vp(struct platform_device *dsidev, int channel)
+{
+	struct dsi_data *dsi = dsi_get_dsidrv_data(dsidev);
+	DECLARE_COMPLETION_ONSTACK(completion);
+	struct dsi_packet_sent_handler_data vp_data = { dsidev, &completion };
+	int r = 0;
+	u8 bit;
+
+	bit = dsi->te_enabled ? 30 : 31;
+
+	r = dsi_register_isr_vc(dsidev, channel, dsi_packet_sent_handler_vp,
+		&vp_data, DSI_VC_IRQ_PACKET_SENT);
+	if (r)
+		goto err0;
+
+	/* Wait for completion only if TE_EN/TE_START is still set */
+	if (REG_GET(dsidev, DSI_VC_TE(channel), bit, bit)) {
+		if (wait_for_completion_timeout(&completion,
+				msecs_to_jiffies(10)) == 0) {
+			DSSERR("Failed to complete previous frame transfer\n");
+			r = -EIO;
+			goto err1;
+		}
+	}
+
+	dsi_unregister_isr_vc(dsidev, channel, dsi_packet_sent_handler_vp,
+		&vp_data, DSI_VC_IRQ_PACKET_SENT);
+
+	return 0;
+err1:
+	dsi_unregister_isr_vc(dsidev, channel, dsi_packet_sent_handler_vp,
+		&vp_data, DSI_VC_IRQ_PACKET_SENT);
+err0:
+	return r;
+}
+
+static void dsi_packet_sent_handler_l4(void *data, u32 mask)
+{
+	struct dsi_packet_sent_handler_data *l4_data =
+		(struct dsi_packet_sent_handler_data *) data;
+	struct dsi_data *dsi = dsi_get_dsidrv_data(l4_data->dsidev);
+	const int channel = dsi->update_channel;
+
+	if (REG_GET(l4_data->dsidev, DSI_VC_CTRL(channel), 5, 5) == 0)
+		complete(l4_data->completion);
+}
+
+static int dsi_sync_vc_l4(struct platform_device *dsidev, int channel)
+{
+	DECLARE_COMPLETION_ONSTACK(completion);
+	struct dsi_packet_sent_handler_data l4_data = { dsidev, &completion };
+	int r = 0;
+
+	r = dsi_register_isr_vc(dsidev, channel, dsi_packet_sent_handler_l4,
+		&l4_data, DSI_VC_IRQ_PACKET_SENT);
+	if (r)
+		goto err0;
+
+	/* Wait for completion only if TX_FIFO_NOT_EMPTY is still set */
+	if (REG_GET(dsidev, DSI_VC_CTRL(channel), 5, 5)) {
+		if (wait_for_completion_timeout(&completion,
+				msecs_to_jiffies(10)) == 0) {
+			DSSERR("Failed to complete previous l4 transfer\n");
+			r = -EIO;
+			goto err1;
+		}
+	}
+
+	dsi_unregister_isr_vc(dsidev, channel, dsi_packet_sent_handler_l4,
+		&l4_data, DSI_VC_IRQ_PACKET_SENT);
+
+	return 0;
+err1:
+	dsi_unregister_isr_vc(dsidev, channel, dsi_packet_sent_handler_l4,
+		&l4_data, DSI_VC_IRQ_PACKET_SENT);
+err0:
+	return r;
+}
+
+static int dsi_sync_vc(struct platform_device *dsidev, int channel)
+{
+	struct dsi_data *dsi = dsi_get_dsidrv_data(dsidev);
+
+	WARN_ON(!dsi_bus_is_locked(dsidev));
+
+	WARN_ON(in_interrupt());
+
+	if (!dsi_vc_is_enabled(dsidev, channel))
+		return 0;
+
+	switch (dsi->vc[channel].mode) {
+	case DSI_VC_MODE_VP:
+		return dsi_sync_vc_vp(dsidev, channel);
+	case DSI_VC_MODE_L4:
+		return dsi_sync_vc_l4(dsidev, channel);
+	default:
+		BUG();
+	}
+}
+
+static int dsi_vc_enable(struct platform_device *dsidev, int channel,
+		bool enable)
 {
 	DSSDBG("dsi_vc_enable channel %d, enable %d\n",
 			channel, enable);
 
 	enable = enable ? 1 : 0;
 
-	REG_FLD_MOD(DSI_VC_CTRL(channel), enable, 0, 0);
+	REG_FLD_MOD(dsidev, DSI_VC_CTRL(channel), enable, 0, 0);
 
-	if (wait_for_bit_change(DSI_VC_CTRL(channel), 0, enable) != enable) {
+	if (wait_for_bit_change(dsidev, DSI_VC_CTRL(channel),
+		0, enable) != enable) {
 			DSSERR("Failed to set dsi_vc_enable to %d\n", enable);
 			return -EIO;
 	}
@@ -2051,13 +2701,13 @@
 	return 0;
 }
 
-static void dsi_vc_initial_config(int channel)
+static void dsi_vc_initial_config(struct platform_device *dsidev, int channel)
 {
 	u32 r;
 
 	DSSDBGF("%d", channel);
 
-	r = dsi_read_reg(DSI_VC_CTRL(channel));
+	r = dsi_read_reg(dsidev, DSI_VC_CTRL(channel));
 
 	if (FLD_GET(r, 15, 15)) /* VC_BUSY */
 		DSSERR("VC(%d) busy when trying to configure it!\n",
@@ -2070,85 +2720,107 @@
 	r = FLD_MOD(r, 1, 7, 7); /* CS_TX_EN */
 	r = FLD_MOD(r, 1, 8, 8); /* ECC_TX_EN */
 	r = FLD_MOD(r, 0, 9, 9); /* MODE_SPEED, high speed on/off */
+	if (dss_has_feature(FEAT_DSI_VC_OCP_WIDTH))
+		r = FLD_MOD(r, 3, 11, 10);	/* OCP_WIDTH = 32 bit */
 
 	r = FLD_MOD(r, 4, 29, 27); /* DMA_RX_REQ_NB = no dma */
 	r = FLD_MOD(r, 4, 23, 21); /* DMA_TX_REQ_NB = no dma */
 
-	dsi_write_reg(DSI_VC_CTRL(channel), r);
+	dsi_write_reg(dsidev, DSI_VC_CTRL(channel), r);
 }
 
-static int dsi_vc_config_l4(int channel)
+static int dsi_vc_config_l4(struct platform_device *dsidev, int channel)
 {
-	if (dsi.vc[channel].mode == DSI_VC_MODE_L4)
+	struct dsi_data *dsi = dsi_get_dsidrv_data(dsidev);
+
+	if (dsi->vc[channel].mode == DSI_VC_MODE_L4)
 		return 0;
 
 	DSSDBGF("%d", channel);
 
-	dsi_vc_enable(channel, 0);
+	dsi_sync_vc(dsidev, channel);
+
+	dsi_vc_enable(dsidev, channel, 0);
 
 	/* VC_BUSY */
-	if (wait_for_bit_change(DSI_VC_CTRL(channel), 15, 0) != 0) {
+	if (wait_for_bit_change(dsidev, DSI_VC_CTRL(channel), 15, 0) != 0) {
 		DSSERR("vc(%d) busy when trying to config for L4\n", channel);
 		return -EIO;
 	}
 
-	REG_FLD_MOD(DSI_VC_CTRL(channel), 0, 1, 1); /* SOURCE, 0 = L4 */
+	REG_FLD_MOD(dsidev, DSI_VC_CTRL(channel), 0, 1, 1); /* SOURCE, 0 = L4 */
 
-	dsi_vc_enable(channel, 1);
+	/* DCS_CMD_ENABLE */
+	if (dss_has_feature(FEAT_DSI_DCS_CMD_CONFIG_VC))
+		REG_FLD_MOD(dsidev, DSI_VC_CTRL(channel), 0, 30, 30);
 
-	dsi.vc[channel].mode = DSI_VC_MODE_L4;
+	dsi_vc_enable(dsidev, channel, 1);
+
+	dsi->vc[channel].mode = DSI_VC_MODE_L4;
 
 	return 0;
 }
 
-static int dsi_vc_config_vp(int channel)
+static int dsi_vc_config_vp(struct platform_device *dsidev, int channel)
 {
-	if (dsi.vc[channel].mode == DSI_VC_MODE_VP)
+	struct dsi_data *dsi = dsi_get_dsidrv_data(dsidev);
+
+	if (dsi->vc[channel].mode == DSI_VC_MODE_VP)
 		return 0;
 
 	DSSDBGF("%d", channel);
 
-	dsi_vc_enable(channel, 0);
+	dsi_sync_vc(dsidev, channel);
+
+	dsi_vc_enable(dsidev, channel, 0);
 
 	/* VC_BUSY */
-	if (wait_for_bit_change(DSI_VC_CTRL(channel), 15, 0) != 0) {
+	if (wait_for_bit_change(dsidev, DSI_VC_CTRL(channel), 15, 0) != 0) {
 		DSSERR("vc(%d) busy when trying to config for VP\n", channel);
 		return -EIO;
 	}
 
-	REG_FLD_MOD(DSI_VC_CTRL(channel), 1, 1, 1); /* SOURCE, 1 = video port */
+	/* SOURCE, 1 = video port */
+	REG_FLD_MOD(dsidev, DSI_VC_CTRL(channel), 1, 1, 1);
 
-	dsi_vc_enable(channel, 1);
+	/* DCS_CMD_ENABLE */
+	if (dss_has_feature(FEAT_DSI_DCS_CMD_CONFIG_VC))
+		REG_FLD_MOD(dsidev, DSI_VC_CTRL(channel), 1, 30, 30);
 
-	dsi.vc[channel].mode = DSI_VC_MODE_VP;
+	dsi_vc_enable(dsidev, channel, 1);
+
+	dsi->vc[channel].mode = DSI_VC_MODE_VP;
 
 	return 0;
 }
 
 
-void omapdss_dsi_vc_enable_hs(int channel, bool enable)
+void omapdss_dsi_vc_enable_hs(struct omap_dss_device *dssdev, int channel,
+		bool enable)
 {
+	struct platform_device *dsidev = dsi_get_dsidev_from_dssdev(dssdev);
+
 	DSSDBG("dsi_vc_enable_hs(%d, %d)\n", channel, enable);
 
-	WARN_ON(!dsi_bus_is_locked());
+	WARN_ON(!dsi_bus_is_locked(dsidev));
 
-	dsi_vc_enable(channel, 0);
-	dsi_if_enable(0);
+	dsi_vc_enable(dsidev, channel, 0);
+	dsi_if_enable(dsidev, 0);
 
-	REG_FLD_MOD(DSI_VC_CTRL(channel), enable, 9, 9);
+	REG_FLD_MOD(dsidev, DSI_VC_CTRL(channel), enable, 9, 9);
 
-	dsi_vc_enable(channel, 1);
-	dsi_if_enable(1);
+	dsi_vc_enable(dsidev, channel, 1);
+	dsi_if_enable(dsidev, 1);
 
-	dsi_force_tx_stop_mode_io();
+	dsi_force_tx_stop_mode_io(dsidev);
 }
 EXPORT_SYMBOL(omapdss_dsi_vc_enable_hs);
 
-static void dsi_vc_flush_long_data(int channel)
+static void dsi_vc_flush_long_data(struct platform_device *dsidev, int channel)
 {
-	while (REG_GET(DSI_VC_CTRL(channel), 20, 20)) {
+	while (REG_GET(dsidev, DSI_VC_CTRL(channel), 20, 20)) {
 		u32 val;
-		val = dsi_read_reg(DSI_VC_SHORT_PACKET_HEADER(channel));
+		val = dsi_read_reg(dsidev, DSI_VC_SHORT_PACKET_HEADER(channel));
 		DSSDBG("\t\tb1 %#02x b2 %#02x b3 %#02x b4 %#02x\n",
 				(val >> 0) & 0xff,
 				(val >> 8) & 0xff,
@@ -2194,13 +2866,14 @@
 		DSSERR("\t\tDSI Protocol Violation\n");
 }
 
-static u16 dsi_vc_flush_receive_data(int channel)
+static u16 dsi_vc_flush_receive_data(struct platform_device *dsidev,
+		int channel)
 {
 	/* RX_FIFO_NOT_EMPTY */
-	while (REG_GET(DSI_VC_CTRL(channel), 20, 20)) {
+	while (REG_GET(dsidev, DSI_VC_CTRL(channel), 20, 20)) {
 		u32 val;
 		u8 dt;
-		val = dsi_read_reg(DSI_VC_SHORT_PACKET_HEADER(channel));
+		val = dsi_read_reg(dsidev, DSI_VC_SHORT_PACKET_HEADER(channel));
 		DSSERR("\trawval %#08x\n", val);
 		dt = FLD_GET(val, 5, 0);
 		if (dt == DSI_DT_RX_ACK_WITH_ERR) {
@@ -2215,7 +2888,7 @@
 		} else if (dt == DSI_DT_RX_DCS_LONG_READ) {
 			DSSERR("\tDCS long response, len %d\n",
 					FLD_GET(val, 23, 8));
-			dsi_vc_flush_long_data(channel);
+			dsi_vc_flush_long_data(dsidev, channel);
 		} else {
 			DSSERR("\tunknown datatype 0x%02x\n", dt);
 		}
@@ -2223,40 +2896,44 @@
 	return 0;
 }
 
-static int dsi_vc_send_bta(int channel)
+static int dsi_vc_send_bta(struct platform_device *dsidev, int channel)
 {
-	if (dsi.debug_write || dsi.debug_read)
+	struct dsi_data *dsi = dsi_get_dsidrv_data(dsidev);
+
+	if (dsi->debug_write || dsi->debug_read)
 		DSSDBG("dsi_vc_send_bta %d\n", channel);
 
-	WARN_ON(!dsi_bus_is_locked());
+	WARN_ON(!dsi_bus_is_locked(dsidev));
 
-	if (REG_GET(DSI_VC_CTRL(channel), 20, 20)) {	/* RX_FIFO_NOT_EMPTY */
+	/* RX_FIFO_NOT_EMPTY */
+	if (REG_GET(dsidev, DSI_VC_CTRL(channel), 20, 20)) {
 		DSSERR("rx fifo not empty when sending BTA, dumping data:\n");
-		dsi_vc_flush_receive_data(channel);
+		dsi_vc_flush_receive_data(dsidev, channel);
 	}
 
-	REG_FLD_MOD(DSI_VC_CTRL(channel), 1, 6, 6); /* BTA_EN */
+	REG_FLD_MOD(dsidev, DSI_VC_CTRL(channel), 1, 6, 6); /* BTA_EN */
 
 	return 0;
 }
 
-int dsi_vc_send_bta_sync(int channel)
+int dsi_vc_send_bta_sync(struct omap_dss_device *dssdev, int channel)
 {
+	struct platform_device *dsidev = dsi_get_dsidev_from_dssdev(dssdev);
 	DECLARE_COMPLETION_ONSTACK(completion);
 	int r = 0;
 	u32 err;
 
-	r = dsi_register_isr_vc(channel, dsi_completion_handler,
+	r = dsi_register_isr_vc(dsidev, channel, dsi_completion_handler,
 			&completion, DSI_VC_IRQ_BTA);
 	if (r)
 		goto err0;
 
-	r = dsi_register_isr(dsi_completion_handler, &completion,
+	r = dsi_register_isr(dsidev, dsi_completion_handler, &completion,
 			DSI_IRQ_ERROR_MASK);
 	if (r)
 		goto err1;
 
-	r = dsi_vc_send_bta(channel);
+	r = dsi_vc_send_bta(dsidev, channel);
 	if (r)
 		goto err2;
 
@@ -2267,41 +2944,42 @@
 		goto err2;
 	}
 
-	err = dsi_get_errors();
+	err = dsi_get_errors(dsidev);
 	if (err) {
 		DSSERR("Error while sending BTA: %x\n", err);
 		r = -EIO;
 		goto err2;
 	}
 err2:
-	dsi_unregister_isr(dsi_completion_handler, &completion,
+	dsi_unregister_isr(dsidev, dsi_completion_handler, &completion,
 			DSI_IRQ_ERROR_MASK);
 err1:
-	dsi_unregister_isr_vc(channel, dsi_completion_handler,
+	dsi_unregister_isr_vc(dsidev, channel, dsi_completion_handler,
 			&completion, DSI_VC_IRQ_BTA);
 err0:
 	return r;
 }
 EXPORT_SYMBOL(dsi_vc_send_bta_sync);
 
-static inline void dsi_vc_write_long_header(int channel, u8 data_type,
-		u16 len, u8 ecc)
+static inline void dsi_vc_write_long_header(struct platform_device *dsidev,
+		int channel, u8 data_type, u16 len, u8 ecc)
 {
+	struct dsi_data *dsi = dsi_get_dsidrv_data(dsidev);
 	u32 val;
 	u8 data_id;
 
-	WARN_ON(!dsi_bus_is_locked());
+	WARN_ON(!dsi_bus_is_locked(dsidev));
 
-	data_id = data_type | dsi.vc[channel].vc_id << 6;
+	data_id = data_type | dsi->vc[channel].vc_id << 6;
 
 	val = FLD_VAL(data_id, 7, 0) | FLD_VAL(len, 23, 8) |
 		FLD_VAL(ecc, 31, 24);
 
-	dsi_write_reg(DSI_VC_LONG_PACKET_HEADER(channel), val);
+	dsi_write_reg(dsidev, DSI_VC_LONG_PACKET_HEADER(channel), val);
 }
 
-static inline void dsi_vc_write_long_payload(int channel,
-		u8 b1, u8 b2, u8 b3, u8 b4)
+static inline void dsi_vc_write_long_payload(struct platform_device *dsidev,
+		int channel, u8 b1, u8 b2, u8 b3, u8 b4)
 {
 	u32 val;
 
@@ -2310,34 +2988,35 @@
 /*	DSSDBG("\twriting %02x, %02x, %02x, %02x (%#010x)\n",
 			b1, b2, b3, b4, val); */
 
-	dsi_write_reg(DSI_VC_LONG_PACKET_PAYLOAD(channel), val);
+	dsi_write_reg(dsidev, DSI_VC_LONG_PACKET_PAYLOAD(channel), val);
 }
 
-static int dsi_vc_send_long(int channel, u8 data_type, u8 *data, u16 len,
-		u8 ecc)
+static int dsi_vc_send_long(struct platform_device *dsidev, int channel,
+		u8 data_type, u8 *data, u16 len, u8 ecc)
 {
 	/*u32 val; */
+	struct dsi_data *dsi = dsi_get_dsidrv_data(dsidev);
 	int i;
 	u8 *p;
 	int r = 0;
 	u8 b1, b2, b3, b4;
 
-	if (dsi.debug_write)
+	if (dsi->debug_write)
 		DSSDBG("dsi_vc_send_long, %d bytes\n", len);
 
 	/* len + header */
-	if (dsi.vc[channel].fifo_size * 32 * 4 < len + 4) {
+	if (dsi->vc[channel].fifo_size * 32 * 4 < len + 4) {
 		DSSERR("unable to send long packet: packet too long.\n");
 		return -EINVAL;
 	}
 
-	dsi_vc_config_l4(channel);
+	dsi_vc_config_l4(dsidev, channel);
 
-	dsi_vc_write_long_header(channel, data_type, len, ecc);
+	dsi_vc_write_long_header(dsidev, channel, data_type, len, ecc);
 
 	p = data;
 	for (i = 0; i < len >> 2; i++) {
-		if (dsi.debug_write)
+		if (dsi->debug_write)
 			DSSDBG("\tsending full packet %d\n", i);
 
 		b1 = *p++;
@@ -2345,14 +3024,14 @@
 		b3 = *p++;
 		b4 = *p++;
 
-		dsi_vc_write_long_payload(channel, b1, b2, b3, b4);
+		dsi_vc_write_long_payload(dsidev, channel, b1, b2, b3, b4);
 	}
 
 	i = len % 4;
 	if (i) {
 		b1 = 0; b2 = 0; b3 = 0;
 
-		if (dsi.debug_write)
+		if (dsi->debug_write)
 			DSSDBG("\tsending remainder bytes %d\n", i);
 
 		switch (i) {
@@ -2370,62 +3049,69 @@
 			break;
 		}
 
-		dsi_vc_write_long_payload(channel, b1, b2, b3, 0);
+		dsi_vc_write_long_payload(dsidev, channel, b1, b2, b3, 0);
 	}
 
 	return r;
 }
 
-static int dsi_vc_send_short(int channel, u8 data_type, u16 data, u8 ecc)
+static int dsi_vc_send_short(struct platform_device *dsidev, int channel,
+		u8 data_type, u16 data, u8 ecc)
 {
+	struct dsi_data *dsi = dsi_get_dsidrv_data(dsidev);
 	u32 r;
 	u8 data_id;
 
-	WARN_ON(!dsi_bus_is_locked());
+	WARN_ON(!dsi_bus_is_locked(dsidev));
 
-	if (dsi.debug_write)
+	if (dsi->debug_write)
 		DSSDBG("dsi_vc_send_short(ch%d, dt %#x, b1 %#x, b2 %#x)\n",
 				channel,
 				data_type, data & 0xff, (data >> 8) & 0xff);
 
-	dsi_vc_config_l4(channel);
+	dsi_vc_config_l4(dsidev, channel);
 
-	if (FLD_GET(dsi_read_reg(DSI_VC_CTRL(channel)), 16, 16)) {
+	if (FLD_GET(dsi_read_reg(dsidev, DSI_VC_CTRL(channel)), 16, 16)) {
 		DSSERR("ERROR FIFO FULL, aborting transfer\n");
 		return -EINVAL;
 	}
 
-	data_id = data_type | dsi.vc[channel].vc_id << 6;
+	data_id = data_type | dsi->vc[channel].vc_id << 6;
 
 	r = (data_id << 0) | (data << 8) | (ecc << 24);
 
-	dsi_write_reg(DSI_VC_SHORT_PACKET_HEADER(channel), r);
+	dsi_write_reg(dsidev, DSI_VC_SHORT_PACKET_HEADER(channel), r);
 
 	return 0;
 }
 
-int dsi_vc_send_null(int channel)
+int dsi_vc_send_null(struct omap_dss_device *dssdev, int channel)
 {
+	struct platform_device *dsidev = dsi_get_dsidev_from_dssdev(dssdev);
 	u8 nullpkg[] = {0, 0, 0, 0};
-	return dsi_vc_send_long(channel, DSI_DT_NULL_PACKET, nullpkg, 4, 0);
+
+	return dsi_vc_send_long(dsidev, channel, DSI_DT_NULL_PACKET, nullpkg,
+		4, 0);
 }
 EXPORT_SYMBOL(dsi_vc_send_null);
 
-int dsi_vc_dcs_write_nosync(int channel, u8 *data, int len)
+int dsi_vc_dcs_write_nosync(struct omap_dss_device *dssdev, int channel,
+		u8 *data, int len)
 {
+	struct platform_device *dsidev = dsi_get_dsidev_from_dssdev(dssdev);
 	int r;
 
 	BUG_ON(len == 0);
 
 	if (len == 1) {
-		r = dsi_vc_send_short(channel, DSI_DT_DCS_SHORT_WRITE_0,
+		r = dsi_vc_send_short(dsidev, channel, DSI_DT_DCS_SHORT_WRITE_0,
 				data[0], 0);
 	} else if (len == 2) {
-		r = dsi_vc_send_short(channel, DSI_DT_DCS_SHORT_WRITE_1,
+		r = dsi_vc_send_short(dsidev, channel, DSI_DT_DCS_SHORT_WRITE_1,
 				data[0] | (data[1] << 8), 0);
 	} else {
 		/* 0x39 = DCS Long Write */
-		r = dsi_vc_send_long(channel, DSI_DT_DCS_LONG_WRITE,
+		r = dsi_vc_send_long(dsidev, channel, DSI_DT_DCS_LONG_WRITE,
 				data, len, 0);
 	}
 
@@ -2433,21 +3119,24 @@
 }
 EXPORT_SYMBOL(dsi_vc_dcs_write_nosync);
 
-int dsi_vc_dcs_write(int channel, u8 *data, int len)
+int dsi_vc_dcs_write(struct omap_dss_device *dssdev, int channel, u8 *data,
+		int len)
 {
+	struct platform_device *dsidev = dsi_get_dsidev_from_dssdev(dssdev);
 	int r;
 
-	r = dsi_vc_dcs_write_nosync(channel, data, len);
+	r = dsi_vc_dcs_write_nosync(dssdev, channel, data, len);
 	if (r)
 		goto err;
 
-	r = dsi_vc_send_bta_sync(channel);
+	r = dsi_vc_send_bta_sync(dssdev, channel);
 	if (r)
 		goto err;
 
-	if (REG_GET(DSI_VC_CTRL(channel), 20, 20)) {	/* RX_FIFO_NOT_EMPTY */
+	/* RX_FIFO_NOT_EMPTY */
+	if (REG_GET(dsidev, DSI_VC_CTRL(channel), 20, 20)) {
 		DSSERR("rx fifo not empty after write, dumping data:\n");
-		dsi_vc_flush_receive_data(channel);
+		dsi_vc_flush_receive_data(dsidev, channel);
 		r = -EIO;
 		goto err;
 	}
@@ -2460,47 +3149,51 @@
 }
 EXPORT_SYMBOL(dsi_vc_dcs_write);
 
-int dsi_vc_dcs_write_0(int channel, u8 dcs_cmd)
+int dsi_vc_dcs_write_0(struct omap_dss_device *dssdev, int channel, u8 dcs_cmd)
 {
-	return dsi_vc_dcs_write(channel, &dcs_cmd, 1);
+	return dsi_vc_dcs_write(dssdev, channel, &dcs_cmd, 1);
 }
 EXPORT_SYMBOL(dsi_vc_dcs_write_0);
 
-int dsi_vc_dcs_write_1(int channel, u8 dcs_cmd, u8 param)
+int dsi_vc_dcs_write_1(struct omap_dss_device *dssdev, int channel, u8 dcs_cmd,
+		u8 param)
 {
 	u8 buf[2];
 	buf[0] = dcs_cmd;
 	buf[1] = param;
-	return dsi_vc_dcs_write(channel, buf, 2);
+	return dsi_vc_dcs_write(dssdev, channel, buf, 2);
 }
 EXPORT_SYMBOL(dsi_vc_dcs_write_1);
 
-int dsi_vc_dcs_read(int channel, u8 dcs_cmd, u8 *buf, int buflen)
+int dsi_vc_dcs_read(struct omap_dss_device *dssdev, int channel, u8 dcs_cmd,
+		u8 *buf, int buflen)
 {
+	struct platform_device *dsidev = dsi_get_dsidev_from_dssdev(dssdev);
+	struct dsi_data *dsi = dsi_get_dsidrv_data(dsidev);
 	u32 val;
 	u8 dt;
 	int r;
 
-	if (dsi.debug_read)
+	if (dsi->debug_read)
 		DSSDBG("dsi_vc_dcs_read(ch%d, dcs_cmd %x)\n", channel, dcs_cmd);
 
-	r = dsi_vc_send_short(channel, DSI_DT_DCS_READ, dcs_cmd, 0);
+	r = dsi_vc_send_short(dsidev, channel, DSI_DT_DCS_READ, dcs_cmd, 0);
 	if (r)
 		goto err;
 
-	r = dsi_vc_send_bta_sync(channel);
+	r = dsi_vc_send_bta_sync(dssdev, channel);
 	if (r)
 		goto err;
 
 	/* RX_FIFO_NOT_EMPTY */
-	if (REG_GET(DSI_VC_CTRL(channel), 20, 20) == 0) {
+	if (REG_GET(dsidev, DSI_VC_CTRL(channel), 20, 20) == 0) {
 		DSSERR("RX fifo empty when trying to read.\n");
 		r = -EIO;
 		goto err;
 	}
 
-	val = dsi_read_reg(DSI_VC_SHORT_PACKET_HEADER(channel));
-	if (dsi.debug_read)
+	val = dsi_read_reg(dsidev, DSI_VC_SHORT_PACKET_HEADER(channel));
+	if (dsi->debug_read)
 		DSSDBG("\theader: %08x\n", val);
 	dt = FLD_GET(val, 5, 0);
 	if (dt == DSI_DT_RX_ACK_WITH_ERR) {
@@ -2511,7 +3204,7 @@
 
 	} else if (dt == DSI_DT_RX_SHORT_READ_1) {
 		u8 data = FLD_GET(val, 15, 8);
-		if (dsi.debug_read)
+		if (dsi->debug_read)
 			DSSDBG("\tDCS short response, 1 byte: %02x\n", data);
 
 		if (buflen < 1) {
@@ -2524,7 +3217,7 @@
 		return 1;
 	} else if (dt == DSI_DT_RX_SHORT_READ_2) {
 		u16 data = FLD_GET(val, 23, 8);
-		if (dsi.debug_read)
+		if (dsi->debug_read)
 			DSSDBG("\tDCS short response, 2 byte: %04x\n", data);
 
 		if (buflen < 2) {
@@ -2539,7 +3232,7 @@
 	} else if (dt == DSI_DT_RX_DCS_LONG_READ) {
 		int w;
 		int len = FLD_GET(val, 23, 8);
-		if (dsi.debug_read)
+		if (dsi->debug_read)
 			DSSDBG("\tDCS long response, len %d\n", len);
 
 		if (len > buflen) {
@@ -2550,8 +3243,9 @@
 		/* two byte checksum ends the packet, not included in len */
 		for (w = 0; w < len + 2;) {
 			int b;
-			val = dsi_read_reg(DSI_VC_SHORT_PACKET_HEADER(channel));
-			if (dsi.debug_read)
+			val = dsi_read_reg(dsidev,
+				DSI_VC_SHORT_PACKET_HEADER(channel));
+			if (dsi->debug_read)
 				DSSDBG("\t\t%02x %02x %02x %02x\n",
 						(val >> 0) & 0xff,
 						(val >> 8) & 0xff,
@@ -2582,11 +3276,12 @@
 }
 EXPORT_SYMBOL(dsi_vc_dcs_read);
 
-int dsi_vc_dcs_read_1(int channel, u8 dcs_cmd, u8 *data)
+int dsi_vc_dcs_read_1(struct omap_dss_device *dssdev, int channel, u8 dcs_cmd,
+		u8 *data)
 {
 	int r;
 
-	r = dsi_vc_dcs_read(channel, dcs_cmd, data, 1);
+	r = dsi_vc_dcs_read(dssdev, channel, dcs_cmd, data, 1);
 
 	if (r < 0)
 		return r;
@@ -2598,12 +3293,13 @@
 }
 EXPORT_SYMBOL(dsi_vc_dcs_read_1);
 
-int dsi_vc_dcs_read_2(int channel, u8 dcs_cmd, u8 *data1, u8 *data2)
+int dsi_vc_dcs_read_2(struct omap_dss_device *dssdev, int channel, u8 dcs_cmd,
+		u8 *data1, u8 *data2)
 {
 	u8 buf[2];
 	int r;
 
-	r = dsi_vc_dcs_read(channel, dcs_cmd, buf, 2);
+	r = dsi_vc_dcs_read(dssdev, channel, dcs_cmd, buf, 2);
 
 	if (r < 0)
 		return r;
@@ -2618,14 +3314,94 @@
 }
 EXPORT_SYMBOL(dsi_vc_dcs_read_2);
 
-int dsi_vc_set_max_rx_packet_size(int channel, u16 len)
+int dsi_vc_set_max_rx_packet_size(struct omap_dss_device *dssdev, int channel,
+		u16 len)
 {
-	return dsi_vc_send_short(channel, DSI_DT_SET_MAX_RET_PKG_SIZE,
+	struct platform_device *dsidev = dsi_get_dsidev_from_dssdev(dssdev);
+
+	return dsi_vc_send_short(dsidev, channel, DSI_DT_SET_MAX_RET_PKG_SIZE,
 			len, 0);
 }
 EXPORT_SYMBOL(dsi_vc_set_max_rx_packet_size);
 
-static void dsi_set_lp_rx_timeout(unsigned ticks, bool x4, bool x16)
+static int dsi_enter_ulps(struct platform_device *dsidev)
+{
+	struct dsi_data *dsi = dsi_get_dsidrv_data(dsidev);
+	DECLARE_COMPLETION_ONSTACK(completion);
+	int r;
+
+	DSSDBGF();
+
+	WARN_ON(!dsi_bus_is_locked(dsidev));
+
+	WARN_ON(dsi->ulps_enabled);
+
+	if (dsi->ulps_enabled)
+		return 0;
+
+	if (REG_GET(dsidev, DSI_CLK_CTRL, 13, 13)) {
+		DSSERR("DDR_CLK_ALWAYS_ON enabled when entering ULPS\n");
+		return -EIO;
+	}
+
+	dsi_sync_vc(dsidev, 0);
+	dsi_sync_vc(dsidev, 1);
+	dsi_sync_vc(dsidev, 2);
+	dsi_sync_vc(dsidev, 3);
+
+	dsi_force_tx_stop_mode_io(dsidev);
+
+	dsi_vc_enable(dsidev, 0, false);
+	dsi_vc_enable(dsidev, 1, false);
+	dsi_vc_enable(dsidev, 2, false);
+	dsi_vc_enable(dsidev, 3, false);
+
+	if (REG_GET(dsidev, DSI_COMPLEXIO_CFG2, 16, 16)) {	/* HS_BUSY */
+		DSSERR("HS busy when enabling ULPS\n");
+		return -EIO;
+	}
+
+	if (REG_GET(dsidev, DSI_COMPLEXIO_CFG2, 17, 17)) {	/* LP_BUSY */
+		DSSERR("LP busy when enabling ULPS\n");
+		return -EIO;
+	}
+
+	r = dsi_register_isr_cio(dsidev, dsi_completion_handler, &completion,
+			DSI_CIO_IRQ_ULPSACTIVENOT_ALL0);
+	if (r)
+		return r;
+
+	/* Assert TxRequestEsc for data lanes and TxUlpsClk for clk lane */
+	/* LANEx_ULPS_SIG2 */
+	REG_FLD_MOD(dsidev, DSI_COMPLEXIO_CFG2, (1 << 0) | (1 << 1) | (1 << 2),
+		7, 5);
+
+	if (wait_for_completion_timeout(&completion,
+				msecs_to_jiffies(1000)) == 0) {
+		DSSERR("ULPS enable timeout\n");
+		r = -EIO;
+		goto err;
+	}
+
+	dsi_unregister_isr_cio(dsidev, dsi_completion_handler, &completion,
+			DSI_CIO_IRQ_ULPSACTIVENOT_ALL0);
+
+	dsi_cio_power(dsidev, DSI_COMPLEXIO_POWER_ULPS);
+
+	dsi_if_enable(dsidev, false);
+
+	dsi->ulps_enabled = true;
+
+	return 0;
+
+err:
+	dsi_unregister_isr_cio(dsidev, dsi_completion_handler, &completion,
+			DSI_CIO_IRQ_ULPSACTIVENOT_ALL0);
+	return r;
+}
+
+static void dsi_set_lp_rx_timeout(struct platform_device *dsidev,
+		unsigned ticks, bool x4, bool x16)
 {
 	unsigned long fck;
 	unsigned long total_ticks;
@@ -2634,14 +3410,14 @@
 	BUG_ON(ticks > 0x1fff);
 
 	/* ticks in DSI_FCK */
-	fck = dsi_fclk_rate();
+	fck = dsi_fclk_rate(dsidev);
 
-	r = dsi_read_reg(DSI_TIMING2);
+	r = dsi_read_reg(dsidev, DSI_TIMING2);
 	r = FLD_MOD(r, 1, 15, 15);	/* LP_RX_TO */
 	r = FLD_MOD(r, x16 ? 1 : 0, 14, 14);	/* LP_RX_TO_X16 */
 	r = FLD_MOD(r, x4 ? 1 : 0, 13, 13);	/* LP_RX_TO_X4 */
 	r = FLD_MOD(r, ticks, 12, 0);	/* LP_RX_COUNTER */
-	dsi_write_reg(DSI_TIMING2, r);
+	dsi_write_reg(dsidev, DSI_TIMING2, r);
 
 	total_ticks = ticks * (x16 ? 16 : 1) * (x4 ? 4 : 1);
 
@@ -2651,7 +3427,8 @@
 			(total_ticks * 1000) / (fck / 1000 / 1000));
 }
 
-static void dsi_set_ta_timeout(unsigned ticks, bool x8, bool x16)
+static void dsi_set_ta_timeout(struct platform_device *dsidev, unsigned ticks,
+		bool x8, bool x16)
 {
 	unsigned long fck;
 	unsigned long total_ticks;
@@ -2660,14 +3437,14 @@
 	BUG_ON(ticks > 0x1fff);
 
 	/* ticks in DSI_FCK */
-	fck = dsi_fclk_rate();
+	fck = dsi_fclk_rate(dsidev);
 
-	r = dsi_read_reg(DSI_TIMING1);
+	r = dsi_read_reg(dsidev, DSI_TIMING1);
 	r = FLD_MOD(r, 1, 31, 31);	/* TA_TO */
 	r = FLD_MOD(r, x16 ? 1 : 0, 30, 30);	/* TA_TO_X16 */
 	r = FLD_MOD(r, x8 ? 1 : 0, 29, 29);	/* TA_TO_X8 */
 	r = FLD_MOD(r, ticks, 28, 16);	/* TA_TO_COUNTER */
-	dsi_write_reg(DSI_TIMING1, r);
+	dsi_write_reg(dsidev, DSI_TIMING1, r);
 
 	total_ticks = ticks * (x16 ? 16 : 1) * (x8 ? 8 : 1);
 
@@ -2677,7 +3454,8 @@
 			(total_ticks * 1000) / (fck / 1000 / 1000));
 }
 
-static void dsi_set_stop_state_counter(unsigned ticks, bool x4, bool x16)
+static void dsi_set_stop_state_counter(struct platform_device *dsidev,
+		unsigned ticks, bool x4, bool x16)
 {
 	unsigned long fck;
 	unsigned long total_ticks;
@@ -2686,14 +3464,14 @@
 	BUG_ON(ticks > 0x1fff);
 
 	/* ticks in DSI_FCK */
-	fck = dsi_fclk_rate();
+	fck = dsi_fclk_rate(dsidev);
 
-	r = dsi_read_reg(DSI_TIMING1);
+	r = dsi_read_reg(dsidev, DSI_TIMING1);
 	r = FLD_MOD(r, 1, 15, 15);	/* FORCE_TX_STOP_MODE_IO */
 	r = FLD_MOD(r, x16 ? 1 : 0, 14, 14);	/* STOP_STATE_X16_IO */
 	r = FLD_MOD(r, x4 ? 1 : 0, 13, 13);	/* STOP_STATE_X4_IO */
 	r = FLD_MOD(r, ticks, 12, 0);	/* STOP_STATE_COUNTER_IO */
-	dsi_write_reg(DSI_TIMING1, r);
+	dsi_write_reg(dsidev, DSI_TIMING1, r);
 
 	total_ticks = ticks * (x16 ? 16 : 1) * (x4 ? 4 : 1);
 
@@ -2703,7 +3481,8 @@
 			(total_ticks * 1000) / (fck / 1000 / 1000));
 }
 
-static void dsi_set_hs_tx_timeout(unsigned ticks, bool x4, bool x16)
+static void dsi_set_hs_tx_timeout(struct platform_device *dsidev,
+		unsigned ticks, bool x4, bool x16)
 {
 	unsigned long fck;
 	unsigned long total_ticks;
@@ -2712,14 +3491,14 @@
 	BUG_ON(ticks > 0x1fff);
 
 	/* ticks in TxByteClkHS */
-	fck = dsi_get_txbyteclkhs();
+	fck = dsi_get_txbyteclkhs(dsidev);
 
-	r = dsi_read_reg(DSI_TIMING2);
+	r = dsi_read_reg(dsidev, DSI_TIMING2);
 	r = FLD_MOD(r, 1, 31, 31);	/* HS_TX_TO */
 	r = FLD_MOD(r, x16 ? 1 : 0, 30, 30);	/* HS_TX_TO_X16 */
 	r = FLD_MOD(r, x4 ? 1 : 0, 29, 29);	/* HS_TX_TO_X8 (4 really) */
 	r = FLD_MOD(r, ticks, 28, 16);	/* HS_TX_TO_COUNTER */
-	dsi_write_reg(DSI_TIMING2, r);
+	dsi_write_reg(dsidev, DSI_TIMING2, r);
 
 	total_ticks = ticks * (x16 ? 16 : 1) * (x4 ? 4 : 1);
 
@@ -2730,24 +3509,25 @@
 }
 static int dsi_proto_config(struct omap_dss_device *dssdev)
 {
+	struct platform_device *dsidev = dsi_get_dsidev_from_dssdev(dssdev);
 	u32 r;
 	int buswidth = 0;
 
-	dsi_config_tx_fifo(DSI_FIFO_SIZE_32,
+	dsi_config_tx_fifo(dsidev, DSI_FIFO_SIZE_32,
 			DSI_FIFO_SIZE_32,
 			DSI_FIFO_SIZE_32,
 			DSI_FIFO_SIZE_32);
 
-	dsi_config_rx_fifo(DSI_FIFO_SIZE_32,
+	dsi_config_rx_fifo(dsidev, DSI_FIFO_SIZE_32,
 			DSI_FIFO_SIZE_32,
 			DSI_FIFO_SIZE_32,
 			DSI_FIFO_SIZE_32);
 
 	/* XXX what values for the timeouts? */
-	dsi_set_stop_state_counter(0x1000, false, false);
-	dsi_set_ta_timeout(0x1fff, true, true);
-	dsi_set_lp_rx_timeout(0x1fff, true, true);
-	dsi_set_hs_tx_timeout(0x1fff, true, true);
+	dsi_set_stop_state_counter(dsidev, 0x1000, false, false);
+	dsi_set_ta_timeout(dsidev, 0x1fff, true, true);
+	dsi_set_lp_rx_timeout(dsidev, 0x1fff, true, true);
+	dsi_set_hs_tx_timeout(dsidev, 0x1fff, true, true);
 
 	switch (dssdev->ctrl.pixel_size) {
 	case 16:
@@ -2763,7 +3543,7 @@
 		BUG();
 	}
 
-	r = dsi_read_reg(DSI_CTRL);
+	r = dsi_read_reg(dsidev, DSI_CTRL);
 	r = FLD_MOD(r, 1, 1, 1);	/* CS_RX_EN */
 	r = FLD_MOD(r, 1, 2, 2);	/* ECC_RX_EN */
 	r = FLD_MOD(r, 1, 3, 3);	/* TX_FIFO_ARBITRATION */
@@ -2773,21 +3553,25 @@
 	r = FLD_MOD(r, 2, 13, 12);	/* LINE_BUFFER, 2 lines */
 	r = FLD_MOD(r, 1, 14, 14);	/* TRIGGER_RESET_MODE */
 	r = FLD_MOD(r, 1, 19, 19);	/* EOT_ENABLE */
-	r = FLD_MOD(r, 1, 24, 24);	/* DCS_CMD_ENABLE */
-	r = FLD_MOD(r, 0, 25, 25);	/* DCS_CMD_CODE, 1=start, 0=continue */
+	if (!dss_has_feature(FEAT_DSI_DCS_CMD_CONFIG_VC)) {
+		r = FLD_MOD(r, 1, 24, 24);	/* DCS_CMD_ENABLE */
+		/* DCS_CMD_CODE, 1=start, 0=continue */
+		r = FLD_MOD(r, 0, 25, 25);
+	}
 
-	dsi_write_reg(DSI_CTRL, r);
+	dsi_write_reg(dsidev, DSI_CTRL, r);
 
-	dsi_vc_initial_config(0);
-	dsi_vc_initial_config(1);
-	dsi_vc_initial_config(2);
-	dsi_vc_initial_config(3);
+	dsi_vc_initial_config(dsidev, 0);
+	dsi_vc_initial_config(dsidev, 1);
+	dsi_vc_initial_config(dsidev, 2);
+	dsi_vc_initial_config(dsidev, 3);
 
 	return 0;
 }
 
 static void dsi_proto_timings(struct omap_dss_device *dssdev)
 {
+	struct platform_device *dsidev = dsi_get_dsidev_from_dssdev(dssdev);
 	unsigned tlpx, tclk_zero, tclk_prepare, tclk_trail;
 	unsigned tclk_pre, tclk_post;
 	unsigned ths_prepare, ths_prepare_ths_zero, ths_zero;
@@ -2797,32 +3581,27 @@
 	unsigned ths_eot;
 	u32 r;
 
-	r = dsi_read_reg(DSI_DSIPHY_CFG0);
+	r = dsi_read_reg(dsidev, DSI_DSIPHY_CFG0);
 	ths_prepare = FLD_GET(r, 31, 24);
 	ths_prepare_ths_zero = FLD_GET(r, 23, 16);
 	ths_zero = ths_prepare_ths_zero - ths_prepare;
 	ths_trail = FLD_GET(r, 15, 8);
 	ths_exit = FLD_GET(r, 7, 0);
 
-	r = dsi_read_reg(DSI_DSIPHY_CFG1);
+	r = dsi_read_reg(dsidev, DSI_DSIPHY_CFG1);
 	tlpx = FLD_GET(r, 22, 16) * 2;
 	tclk_trail = FLD_GET(r, 15, 8);
 	tclk_zero = FLD_GET(r, 7, 0);
 
-	r = dsi_read_reg(DSI_DSIPHY_CFG2);
+	r = dsi_read_reg(dsidev, DSI_DSIPHY_CFG2);
 	tclk_prepare = FLD_GET(r, 7, 0);
 
 	/* min 8*UI */
 	tclk_pre = 20;
 	/* min 60ns + 52*UI */
-	tclk_post = ns2ddr(60) + 26;
+	tclk_post = ns2ddr(dsidev, 60) + 26;
 
-	/* ths_eot is 2 for 2 datalanes and 4 for 1 datalane */
-	if (dssdev->phy.dsi.data1_lane != 0 &&
-			dssdev->phy.dsi.data2_lane != 0)
-		ths_eot = 2;
-	else
-		ths_eot = 4;
+	ths_eot = DIV_ROUND_UP(4, dsi_get_num_data_lanes_dssdev(dssdev));
 
 	ddr_clk_pre = DIV_ROUND_UP(tclk_pre + tlpx + tclk_zero + tclk_prepare,
 			4);
@@ -2831,10 +3610,10 @@
 	BUG_ON(ddr_clk_pre == 0 || ddr_clk_pre > 255);
 	BUG_ON(ddr_clk_post == 0 || ddr_clk_post > 255);
 
-	r = dsi_read_reg(DSI_CLK_TIMING);
+	r = dsi_read_reg(dsidev, DSI_CLK_TIMING);
 	r = FLD_MOD(r, ddr_clk_pre, 15, 8);
 	r = FLD_MOD(r, ddr_clk_post, 7, 0);
-	dsi_write_reg(DSI_CLK_TIMING, r);
+	dsi_write_reg(dsidev, DSI_CLK_TIMING, r);
 
 	DSSDBG("ddr_clk_pre %u, ddr_clk_post %u\n",
 			ddr_clk_pre,
@@ -2848,7 +3627,7 @@
 
 	r = FLD_VAL(enter_hs_mode_lat, 31, 16) |
 		FLD_VAL(exit_hs_mode_lat, 15, 0);
-	dsi_write_reg(DSI_VM_TIMING7, r);
+	dsi_write_reg(dsidev, DSI_VM_TIMING7, r);
 
 	DSSDBG("enter_hs_mode_lat %u, exit_hs_mode_lat %u\n",
 			enter_hs_mode_lat, exit_hs_mode_lat);
@@ -2858,25 +3637,27 @@
 #define DSI_DECL_VARS \
 	int __dsi_cb = 0; u32 __dsi_cv = 0;
 
-#define DSI_FLUSH(ch) \
+#define DSI_FLUSH(dsidev, ch) \
 	if (__dsi_cb > 0) { \
 		/*DSSDBG("sending long packet %#010x\n", __dsi_cv);*/ \
-		dsi_write_reg(DSI_VC_LONG_PACKET_PAYLOAD(ch), __dsi_cv); \
+		dsi_write_reg(dsidev, DSI_VC_LONG_PACKET_PAYLOAD(ch), __dsi_cv); \
 		__dsi_cb = __dsi_cv = 0; \
 	}
 
-#define DSI_PUSH(ch, data) \
+#define DSI_PUSH(dsidev, ch, data) \
 	do { \
 		__dsi_cv |= (data) << (__dsi_cb * 8); \
 		/*DSSDBG("cv = %#010x, cb = %d\n", __dsi_cv, __dsi_cb);*/ \
 		if (++__dsi_cb > 3) \
-			DSI_FLUSH(ch); \
+			DSI_FLUSH(dsidev, ch); \
 	} while (0)
 
 static int dsi_update_screen_l4(struct omap_dss_device *dssdev,
 			int x, int y, int w, int h)
 {
 	/* Note: supports only 24bit colors in 32bit container */
+	struct platform_device *dsidev = dsi_get_dsidev_from_dssdev(dssdev);
+	struct dsi_data *dsi = dsi_get_dsidrv_data(dsidev);
 	int first = 1;
 	int fifo_stalls = 0;
 	int max_dsi_packet_size;
@@ -2915,7 +3696,7 @@
 	 * in fifo */
 
 	/* When using CPU, max long packet size is TX buffer size */
-	max_dsi_packet_size = dsi.vc[0].fifo_size * 32 * 4;
+	max_dsi_packet_size = dsi->vc[0].fifo_size * 32 * 4;
 
 	/* we seem to get better perf if we divide the tx fifo to half,
 	   and while the other half is being sent, we fill the other half
@@ -2944,35 +3725,36 @@
 #if 1
 		/* using fifo not empty */
 		/* TX_FIFO_NOT_EMPTY */
-		while (FLD_GET(dsi_read_reg(DSI_VC_CTRL(0)), 5, 5)) {
+		while (FLD_GET(dsi_read_reg(dsidev, DSI_VC_CTRL(0)), 5, 5)) {
 			fifo_stalls++;
 			if (fifo_stalls > 0xfffff) {
 				DSSERR("fifo stalls overflow, pixels left %d\n",
 						pixels_left);
-				dsi_if_enable(0);
+				dsi_if_enable(dsidev, 0);
 				return -EIO;
 			}
 			udelay(1);
 		}
 #elif 1
 		/* using fifo emptiness */
-		while ((REG_GET(DSI_TX_FIFO_VC_EMPTINESS, 7, 0)+1)*4 <
+		while ((REG_GET(dsidev, DSI_TX_FIFO_VC_EMPTINESS, 7, 0)+1)*4 <
 				max_dsi_packet_size) {
 			fifo_stalls++;
 			if (fifo_stalls > 0xfffff) {
 				DSSERR("fifo stalls overflow, pixels left %d\n",
 					       pixels_left);
-				dsi_if_enable(0);
+				dsi_if_enable(dsidev, 0);
 				return -EIO;
 			}
 		}
 #else
-		while ((REG_GET(DSI_TX_FIFO_VC_EMPTINESS, 7, 0)+1)*4 == 0) {
+		while ((REG_GET(dsidev, DSI_TX_FIFO_VC_EMPTINESS,
+				7, 0) + 1) * 4 == 0) {
 			fifo_stalls++;
 			if (fifo_stalls > 0xfffff) {
 				DSSERR("fifo stalls overflow, pixels left %d\n",
 					       pixels_left);
-				dsi_if_enable(0);
+				dsi_if_enable(dsidev, 0);
 				return -EIO;
 			}
 		}
@@ -2981,17 +3763,17 @@
 
 		pixels_left -= pixels;
 
-		dsi_vc_write_long_header(0, DSI_DT_DCS_LONG_WRITE,
+		dsi_vc_write_long_header(dsidev, 0, DSI_DT_DCS_LONG_WRITE,
 				1 + pixels * bytespp, 0);
 
-		DSI_PUSH(0, dcs_cmd);
+		DSI_PUSH(dsidev, 0, dcs_cmd);
 
 		while (pixels-- > 0) {
 			u32 pix = __raw_readl(data++);
 
-			DSI_PUSH(0, (pix >> 16) & 0xff);
-			DSI_PUSH(0, (pix >> 8) & 0xff);
-			DSI_PUSH(0, (pix >> 0) & 0xff);
+			DSI_PUSH(dsidev, 0, (pix >> 16) & 0xff);
+			DSI_PUSH(dsidev, 0, (pix >> 8) & 0xff);
+			DSI_PUSH(dsidev, 0, (pix >> 0) & 0xff);
 
 			current_x++;
 			if (current_x == x+w) {
@@ -3000,7 +3782,7 @@
 			}
 		}
 
-		DSI_FLUSH(0);
+		DSI_FLUSH(dsidev, 0);
 	}
 
 	return 0;
@@ -3009,6 +3791,8 @@
 static void dsi_update_screen_dispc(struct omap_dss_device *dssdev,
 		u16 x, u16 y, u16 w, u16 h)
 {
+	struct platform_device *dsidev = dsi_get_dsidev_from_dssdev(dssdev);
+	struct dsi_data *dsi = dsi_get_dsidrv_data(dsidev);
 	unsigned bytespp;
 	unsigned bytespl;
 	unsigned bytespf;
@@ -3017,16 +3801,13 @@
 	unsigned packet_len;
 	u32 l;
 	int r;
-	const unsigned channel = dsi.update_channel;
-	/* line buffer is 1024 x 24bits */
-	/* XXX: for some reason using full buffer size causes considerable TX
-	 * slowdown with update sizes that fill the whole buffer */
-	const unsigned line_buf_size = 1023 * 3;
+	const unsigned channel = dsi->update_channel;
+	const unsigned line_buf_size = dsi_get_line_buf_size(dsidev);
 
 	DSSDBG("dsi_update_screen_dispc(%d,%d %dx%d)\n",
 			x, y, w, h);
 
-	dsi_vc_config_vp(channel);
+	dsi_vc_config_vp(dsidev, channel);
 
 	bytespp	= dssdev->ctrl.pixel_size / 8;
 	bytespl = w * bytespp;
@@ -3047,15 +3828,16 @@
 		total_len += (bytespf % packet_payload) + 1;
 
 	l = FLD_VAL(total_len, 23, 0); /* TE_SIZE */
-	dsi_write_reg(DSI_VC_TE(channel), l);
+	dsi_write_reg(dsidev, DSI_VC_TE(channel), l);
 
-	dsi_vc_write_long_header(channel, DSI_DT_DCS_LONG_WRITE, packet_len, 0);
+	dsi_vc_write_long_header(dsidev, channel, DSI_DT_DCS_LONG_WRITE,
+		packet_len, 0);
 
-	if (dsi.te_enabled)
+	if (dsi->te_enabled)
 		l = FLD_MOD(l, 1, 30, 30); /* TE_EN */
 	else
 		l = FLD_MOD(l, 1, 31, 31); /* TE_START */
-	dsi_write_reg(DSI_VC_TE(channel), l);
+	dsi_write_reg(dsidev, DSI_VC_TE(channel), l);
 
 	/* We put SIDLEMODE to no-idle for the duration of the transfer,
 	 * because DSS interrupts are not capable of waking up the CPU and the
@@ -3065,23 +3847,23 @@
 	 */
 	dispc_disable_sidle();
 
-	dsi_perf_mark_start();
+	dsi_perf_mark_start(dsidev);
 
-	r = queue_delayed_work(dsi.workqueue, &dsi.framedone_timeout_work,
-			msecs_to_jiffies(250));
+	r = schedule_delayed_work(&dsi->framedone_timeout_work,
+		msecs_to_jiffies(250));
 	BUG_ON(r == 0);
 
 	dss_start_update(dssdev);
 
-	if (dsi.te_enabled) {
+	if (dsi->te_enabled) {
 		/* disable LP_RX_TO, so that we can receive TE.  Time to wait
 		 * for TE is longer than the timer allows */
-		REG_FLD_MOD(DSI_TIMING2, 0, 15, 15); /* LP_RX_TO */
+		REG_FLD_MOD(dsidev, DSI_TIMING2, 0, 15, 15); /* LP_RX_TO */
 
-		dsi_vc_send_bta(channel);
+		dsi_vc_send_bta(dsidev, channel);
 
 #ifdef DSI_CATCH_MISSING_TE
-		mod_timer(&dsi.te_timer, jiffies + msecs_to_jiffies(250));
+		mod_timer(&dsi->te_timer, jiffies + msecs_to_jiffies(250));
 #endif
 	}
 }
@@ -3093,41 +3875,28 @@
 }
 #endif
 
-static void dsi_framedone_bta_callback(void *data, u32 mask);
-
-static void dsi_handle_framedone(int error)
+static void dsi_handle_framedone(struct platform_device *dsidev, int error)
 {
-	const int channel = dsi.update_channel;
-
-	dsi_unregister_isr_vc(channel, dsi_framedone_bta_callback,
-			NULL, DSI_VC_IRQ_BTA);
-
-	cancel_delayed_work(&dsi.framedone_timeout_work);
+	struct dsi_data *dsi = dsi_get_dsidrv_data(dsidev);
 
 	/* SIDLEMODE back to smart-idle */
 	dispc_enable_sidle();
 
-	if (dsi.te_enabled) {
+	if (dsi->te_enabled) {
 		/* enable LP_RX_TO again after the TE */
-		REG_FLD_MOD(DSI_TIMING2, 1, 15, 15); /* LP_RX_TO */
+		REG_FLD_MOD(dsidev, DSI_TIMING2, 1, 15, 15); /* LP_RX_TO */
 	}
 
-	/* RX_FIFO_NOT_EMPTY */
-	if (REG_GET(DSI_VC_CTRL(channel), 20, 20)) {
-		DSSERR("Received error during frame transfer:\n");
-		dsi_vc_flush_receive_data(channel);
-		if (!error)
-			error = -EIO;
-	}
-
-	dsi.framedone_callback(error, dsi.framedone_data);
+	dsi->framedone_callback(error, dsi->framedone_data);
 
 	if (!error)
-		dsi_perf_show("DISPC");
+		dsi_perf_show(dsidev, "DISPC");
 }
 
 static void dsi_framedone_timeout_work_callback(struct work_struct *work)
 {
+	struct dsi_data *dsi = container_of(work, struct dsi_data,
+			framedone_timeout_work.work);
 	/* XXX While extremely unlikely, we could get FRAMEDONE interrupt after
 	 * 250ms which would conflict with this timeout work. What should be
 	 * done is first cancel the transfer on the HW, and then cancel the
@@ -3137,70 +3906,34 @@
 
 	DSSERR("Framedone not received for 250ms!\n");
 
-	dsi_handle_framedone(-ETIMEDOUT);
-}
-
-static void dsi_framedone_bta_callback(void *data, u32 mask)
-{
-	dsi_handle_framedone(0);
-
-#ifdef CONFIG_OMAP2_DSS_FAKE_VSYNC
-	dispc_fake_vsync_irq();
-#endif
+	dsi_handle_framedone(dsi->pdev, -ETIMEDOUT);
 }
 
 static void dsi_framedone_irq_callback(void *data, u32 mask)
 {
-	const int channel = dsi.update_channel;
-	int r;
+	struct omap_dss_device *dssdev = (struct omap_dss_device *) data;
+	struct platform_device *dsidev = dsi_get_dsidev_from_dssdev(dssdev);
+	struct dsi_data *dsi = dsi_get_dsidrv_data(dsidev);
 
 	/* Note: We get FRAMEDONE when DISPC has finished sending pixels and
 	 * turns itself off. However, DSI still has the pixels in its buffers,
 	 * and is sending the data.
 	 */
 
-	if (dsi.te_enabled) {
-		/* enable LP_RX_TO again after the TE */
-		REG_FLD_MOD(DSI_TIMING2, 1, 15, 15); /* LP_RX_TO */
-	}
+	__cancel_delayed_work(&dsi->framedone_timeout_work);
 
-	/* Send BTA after the frame. We need this for the TE to work, as TE
-	 * trigger is only sent for BTAs without preceding packet. Thus we need
-	 * to BTA after the pixel packets so that next BTA will cause TE
-	 * trigger.
-	 *
-	 * This is not needed when TE is not in use, but we do it anyway to
-	 * make sure that the transfer has been completed. It would be more
-	 * optimal, but more complex, to wait only just before starting next
-	 * transfer.
-	 *
-	 * Also, as there's no interrupt telling when the transfer has been
-	 * done and the channel could be reconfigured, the only way is to
-	 * busyloop until TE_SIZE is zero. With BTA we can do this
-	 * asynchronously.
-	 * */
+	dsi_handle_framedone(dsidev, 0);
 
-	r = dsi_register_isr_vc(channel, dsi_framedone_bta_callback,
-			NULL, DSI_VC_IRQ_BTA);
-	if (r) {
-		DSSERR("Failed to register BTA ISR\n");
-		dsi_handle_framedone(-EIO);
-		return;
-	}
-
-	r = dsi_vc_send_bta(channel);
-	if (r) {
-		DSSERR("BTA after framedone failed\n");
-		dsi_unregister_isr_vc(channel, dsi_framedone_bta_callback,
-				NULL, DSI_VC_IRQ_BTA);
-		dsi_handle_framedone(-EIO);
-	}
+#ifdef CONFIG_OMAP2_DSS_FAKE_VSYNC
+	dispc_fake_vsync_irq();
+#endif
 }
 
 int omap_dsi_prepare_update(struct omap_dss_device *dssdev,
 				    u16 *x, u16 *y, u16 *w, u16 *h,
 				    bool enlarge_update_area)
 {
+	struct platform_device *dsidev = dsi_get_dsidev_from_dssdev(dssdev);
 	u16 dw, dh;
 
 	dssdev->driver->get_resolution(dssdev, &dw, &dh);
@@ -3220,7 +3953,7 @@
 	if (*w == 0 || *h == 0)
 		return -EINVAL;
 
-	dsi_perf_mark_setup();
+	dsi_perf_mark_setup(dsidev);
 
 	if (dssdev->manager->caps & OMAP_DSS_OVL_MGR_CAP_DISPC) {
 		dss_setup_partial_planes(dssdev, x, y, w, h,
@@ -3237,7 +3970,10 @@
 		u16 x, u16 y, u16 w, u16 h,
 		void (*callback)(int, void *), void *data)
 {
-	dsi.update_channel = channel;
+	struct platform_device *dsidev = dsi_get_dsidev_from_dssdev(dssdev);
+	struct dsi_data *dsi = dsi_get_dsidrv_data(dsidev);
+
+	dsi->update_channel = channel;
 
 	/* OMAP DSS cannot send updates of odd widths.
 	 * omap_dsi_prepare_update() makes the widths even, but add a BUG_ON
@@ -3246,14 +3982,14 @@
 	BUG_ON(x % 2 == 1);
 
 	if (dssdev->manager->caps & OMAP_DSS_OVL_MGR_CAP_DISPC) {
-		dsi.framedone_callback = callback;
-		dsi.framedone_data = data;
+		dsi->framedone_callback = callback;
+		dsi->framedone_data = data;
 
-		dsi.update_region.x = x;
-		dsi.update_region.y = y;
-		dsi.update_region.w = w;
-		dsi.update_region.h = h;
-		dsi.update_region.device = dssdev;
+		dsi->update_region.x = x;
+		dsi->update_region.y = y;
+		dsi->update_region.w = w;
+		dsi->update_region.h = h;
+		dsi->update_region.device = dssdev;
 
 		dsi_update_screen_dispc(dssdev, x, y, w, h);
 	} else {
@@ -3263,7 +3999,7 @@
 		if (r)
 			return r;
 
-		dsi_perf_show("L4");
+		dsi_perf_show(dsidev, "L4");
 		callback(0, data);
 	}
 
@@ -3276,9 +4012,13 @@
 static int dsi_display_init_dispc(struct omap_dss_device *dssdev)
 {
 	int r;
+	u32 irq;
 
-	r = omap_dispc_register_isr(dsi_framedone_irq_callback, NULL,
-			DISPC_IRQ_FRAMEDONE);
+	irq = dssdev->manager->id == OMAP_DSS_CHANNEL_LCD ?
+		DISPC_IRQ_FRAMEDONE : DISPC_IRQ_FRAMEDONE2;
+
+	r = omap_dispc_register_isr(dsi_framedone_irq_callback, (void *) dssdev,
+			irq);
 	if (r) {
 		DSSERR("can't get FRAMEDONE irq\n");
 		return r;
@@ -3311,28 +4051,34 @@
 
 static void dsi_display_uninit_dispc(struct omap_dss_device *dssdev)
 {
-	omap_dispc_unregister_isr(dsi_framedone_irq_callback, NULL,
-			DISPC_IRQ_FRAMEDONE);
+	u32 irq;
+
+	irq = dssdev->manager->id == OMAP_DSS_CHANNEL_LCD ?
+		DISPC_IRQ_FRAMEDONE : DISPC_IRQ_FRAMEDONE2;
+
+	omap_dispc_unregister_isr(dsi_framedone_irq_callback, (void *) dssdev,
+			irq);
 }
 
 static int dsi_configure_dsi_clocks(struct omap_dss_device *dssdev)
 {
+	struct platform_device *dsidev = dsi_get_dsidev_from_dssdev(dssdev);
 	struct dsi_clock_info cinfo;
 	int r;
 
 	/* we always use DSS_CLK_SYSCK as input clock */
 	cinfo.use_sys_clk = true;
-	cinfo.regn  = dssdev->phy.dsi.div.regn;
-	cinfo.regm  = dssdev->phy.dsi.div.regm;
-	cinfo.regm_dispc = dssdev->phy.dsi.div.regm_dispc;
-	cinfo.regm_dsi = dssdev->phy.dsi.div.regm_dsi;
+	cinfo.regn  = dssdev->clocks.dsi.regn;
+	cinfo.regm  = dssdev->clocks.dsi.regm;
+	cinfo.regm_dispc = dssdev->clocks.dsi.regm_dispc;
+	cinfo.regm_dsi = dssdev->clocks.dsi.regm_dsi;
 	r = dsi_calc_clock_rates(dssdev, &cinfo);
 	if (r) {
 		DSSERR("Failed to calc dsi clocks\n");
 		return r;
 	}
 
-	r = dsi_pll_set_clock_div(&cinfo);
+	r = dsi_pll_set_clock_div(dsidev, &cinfo);
 	if (r) {
 		DSSERR("Failed to set dsi clocks\n");
 		return r;
@@ -3343,14 +4089,15 @@
 
 static int dsi_configure_dispc_clocks(struct omap_dss_device *dssdev)
 {
+	struct platform_device *dsidev = dsi_get_dsidev_from_dssdev(dssdev);
 	struct dispc_clock_info dispc_cinfo;
 	int r;
 	unsigned long long fck;
 
-	fck = dsi_get_pll_hsdiv_dispc_rate();
+	fck = dsi_get_pll_hsdiv_dispc_rate(dsidev);
 
-	dispc_cinfo.lck_div = dssdev->phy.dsi.div.lck_div;
-	dispc_cinfo.pck_div = dssdev->phy.dsi.div.pck_div;
+	dispc_cinfo.lck_div = dssdev->clocks.dispc.channel.lck_div;
+	dispc_cinfo.pck_div = dssdev->clocks.dispc.channel.pck_div;
 
 	r = dispc_calc_clock_rates(fck, &dispc_cinfo);
 	if (r) {
@@ -3369,11 +4116,11 @@
 
 static int dsi_display_init_dsi(struct omap_dss_device *dssdev)
 {
+	struct platform_device *dsidev = dsi_get_dsidev_from_dssdev(dssdev);
+	int dsi_module = dsi_get_dsidev_id(dsidev);
 	int r;
 
-	_dsi_print_reset_status();
-
-	r = dsi_pll_init(dssdev, true, true);
+	r = dsi_pll_init(dsidev, true, true);
 	if (r)
 		goto err0;
 
@@ -3381,8 +4128,10 @@
 	if (r)
 		goto err1;
 
-	dss_select_dispc_clk_source(DSS_CLK_SRC_DSI_PLL_HSDIV_DISPC);
-	dss_select_dsi_clk_source(DSS_CLK_SRC_DSI_PLL_HSDIV_DSI);
+	dss_select_dispc_clk_source(dssdev->clocks.dispc.dispc_fclk_src);
+	dss_select_dsi_clk_source(dsi_module, dssdev->clocks.dsi.dsi_fclk_src);
+	dss_select_lcd_clk_source(dssdev->manager->id,
+			dssdev->clocks.dispc.channel.lcd_clk_src);
 
 	DSSDBG("PLL OK\n");
 
@@ -3390,82 +4139,92 @@
 	if (r)
 		goto err2;
 
-	r = dsi_complexio_init(dssdev);
+	r = dsi_cio_init(dssdev);
 	if (r)
 		goto err2;
 
-	_dsi_print_reset_status();
+	_dsi_print_reset_status(dsidev);
 
 	dsi_proto_timings(dssdev);
 	dsi_set_lp_clk_divisor(dssdev);
 
 	if (1)
-		_dsi_print_reset_status();
+		_dsi_print_reset_status(dsidev);
 
 	r = dsi_proto_config(dssdev);
 	if (r)
 		goto err3;
 
 	/* enable interface */
-	dsi_vc_enable(0, 1);
-	dsi_vc_enable(1, 1);
-	dsi_vc_enable(2, 1);
-	dsi_vc_enable(3, 1);
-	dsi_if_enable(1);
-	dsi_force_tx_stop_mode_io();
+	dsi_vc_enable(dsidev, 0, 1);
+	dsi_vc_enable(dsidev, 1, 1);
+	dsi_vc_enable(dsidev, 2, 1);
+	dsi_vc_enable(dsidev, 3, 1);
+	dsi_if_enable(dsidev, 1);
+	dsi_force_tx_stop_mode_io(dsidev);
 
 	return 0;
 err3:
-	dsi_complexio_uninit();
+	dsi_cio_uninit(dsidev);
 err2:
-	dss_select_dispc_clk_source(DSS_CLK_SRC_FCK);
-	dss_select_dsi_clk_source(DSS_CLK_SRC_FCK);
+	dss_select_dispc_clk_source(OMAP_DSS_CLK_SRC_FCK);
+	dss_select_dsi_clk_source(dsi_module, OMAP_DSS_CLK_SRC_FCK);
 err1:
-	dsi_pll_uninit();
+	dsi_pll_uninit(dsidev, true);
 err0:
 	return r;
 }
 
-static void dsi_display_uninit_dsi(struct omap_dss_device *dssdev)
+static void dsi_display_uninit_dsi(struct omap_dss_device *dssdev,
+		bool disconnect_lanes, bool enter_ulps)
 {
-	/* disable interface */
-	dsi_if_enable(0);
-	dsi_vc_enable(0, 0);
-	dsi_vc_enable(1, 0);
-	dsi_vc_enable(2, 0);
-	dsi_vc_enable(3, 0);
+	struct platform_device *dsidev = dsi_get_dsidev_from_dssdev(dssdev);
+	struct dsi_data *dsi = dsi_get_dsidrv_data(dsidev);
+	int dsi_module = dsi_get_dsidev_id(dsidev);
 
-	dss_select_dispc_clk_source(DSS_CLK_SRC_FCK);
-	dss_select_dsi_clk_source(DSS_CLK_SRC_FCK);
-	dsi_complexio_uninit();
-	dsi_pll_uninit();
+	if (enter_ulps && !dsi->ulps_enabled)
+		dsi_enter_ulps(dsidev);
+
+	/* disable interface */
+	dsi_if_enable(dsidev, 0);
+	dsi_vc_enable(dsidev, 0, 0);
+	dsi_vc_enable(dsidev, 1, 0);
+	dsi_vc_enable(dsidev, 2, 0);
+	dsi_vc_enable(dsidev, 3, 0);
+
+	dss_select_dispc_clk_source(OMAP_DSS_CLK_SRC_FCK);
+	dss_select_dsi_clk_source(dsi_module, OMAP_DSS_CLK_SRC_FCK);
+	dsi_cio_uninit(dsidev);
+	dsi_pll_uninit(dsidev, disconnect_lanes);
 }
 
-static int dsi_core_init(void)
+static int dsi_core_init(struct platform_device *dsidev)
 {
 	/* Autoidle */
-	REG_FLD_MOD(DSI_SYSCONFIG, 1, 0, 0);
+	REG_FLD_MOD(dsidev, DSI_SYSCONFIG, 1, 0, 0);
 
 	/* ENWAKEUP */
-	REG_FLD_MOD(DSI_SYSCONFIG, 1, 2, 2);
+	REG_FLD_MOD(dsidev, DSI_SYSCONFIG, 1, 2, 2);
 
 	/* SIDLEMODE smart-idle */
-	REG_FLD_MOD(DSI_SYSCONFIG, 2, 4, 3);
+	REG_FLD_MOD(dsidev, DSI_SYSCONFIG, 2, 4, 3);
 
-	_dsi_initialize_irq();
+	_dsi_initialize_irq(dsidev);
 
 	return 0;
 }
 
 int omapdss_dsi_display_enable(struct omap_dss_device *dssdev)
 {
+	struct platform_device *dsidev = dsi_get_dsidev_from_dssdev(dssdev);
+	struct dsi_data *dsi = dsi_get_dsidrv_data(dsidev);
 	int r = 0;
 
 	DSSDBG("dsi_display_enable\n");
 
-	WARN_ON(!dsi_bus_is_locked());
+	WARN_ON(!dsi_bus_is_locked(dsidev));
 
-	mutex_lock(&dsi.lock);
+	mutex_lock(&dsi->lock);
 
 	r = omap_dss_start_device(dssdev);
 	if (r) {
@@ -3474,13 +4233,13 @@
 	}
 
 	enable_clocks(1);
-	dsi_enable_pll_clock(1);
+	dsi_enable_pll_clock(dsidev, 1);
 
-	r = _dsi_reset();
+	r = _dsi_reset(dsidev);
 	if (r)
 		goto err1;
 
-	dsi_core_init();
+	dsi_core_init(dsidev);
 
 	r = dsi_display_init_dispc(dssdev);
 	if (r)
@@ -3490,7 +4249,7 @@
 	if (r)
 		goto err2;
 
-	mutex_unlock(&dsi.lock);
+	mutex_unlock(&dsi->lock);
 
 	return 0;
 
@@ -3498,39 +4257,46 @@
 	dsi_display_uninit_dispc(dssdev);
 err1:
 	enable_clocks(0);
-	dsi_enable_pll_clock(0);
+	dsi_enable_pll_clock(dsidev, 0);
 	omap_dss_stop_device(dssdev);
 err0:
-	mutex_unlock(&dsi.lock);
+	mutex_unlock(&dsi->lock);
 	DSSDBG("dsi_display_enable FAILED\n");
 	return r;
 }
 EXPORT_SYMBOL(omapdss_dsi_display_enable);
 
-void omapdss_dsi_display_disable(struct omap_dss_device *dssdev)
+void omapdss_dsi_display_disable(struct omap_dss_device *dssdev,
+		bool disconnect_lanes, bool enter_ulps)
 {
+	struct platform_device *dsidev = dsi_get_dsidev_from_dssdev(dssdev);
+	struct dsi_data *dsi = dsi_get_dsidrv_data(dsidev);
+
 	DSSDBG("dsi_display_disable\n");
 
-	WARN_ON(!dsi_bus_is_locked());
+	WARN_ON(!dsi_bus_is_locked(dsidev));
 
-	mutex_lock(&dsi.lock);
+	mutex_lock(&dsi->lock);
 
 	dsi_display_uninit_dispc(dssdev);
 
-	dsi_display_uninit_dsi(dssdev);
+	dsi_display_uninit_dsi(dssdev, disconnect_lanes, enter_ulps);
 
 	enable_clocks(0);
-	dsi_enable_pll_clock(0);
+	dsi_enable_pll_clock(dsidev, 0);
 
 	omap_dss_stop_device(dssdev);
 
-	mutex_unlock(&dsi.lock);
+	mutex_unlock(&dsi->lock);
 }
 EXPORT_SYMBOL(omapdss_dsi_display_disable);
 
 int omapdss_dsi_enable_te(struct omap_dss_device *dssdev, bool enable)
 {
-	dsi.te_enabled = enable;
+	struct platform_device *dsidev = dsi_get_dsidev_from_dssdev(dssdev);
+	struct dsi_data *dsi = dsi_get_dsidrv_data(dsidev);
+
+	dsi->te_enabled = enable;
 	return 0;
 }
 EXPORT_SYMBOL(omapdss_dsi_enable_te);
@@ -3550,23 +4316,33 @@
 
 int dsi_init_display(struct omap_dss_device *dssdev)
 {
+	struct platform_device *dsidev = dsi_get_dsidev_from_dssdev(dssdev);
+	struct dsi_data *dsi = dsi_get_dsidrv_data(dsidev);
+	int dsi_module = dsi_get_dsidev_id(dsidev);
+
 	DSSDBG("DSI init\n");
 
 	/* XXX these should be figured out dynamically */
 	dssdev->caps = OMAP_DSS_DISPLAY_CAP_MANUAL_UPDATE |
 		OMAP_DSS_DISPLAY_CAP_TEAR_ELIM;
 
-	if (dsi.vdds_dsi_reg == NULL) {
+	if (dsi->vdds_dsi_reg == NULL) {
 		struct regulator *vdds_dsi;
 
-		vdds_dsi = regulator_get(&dsi.pdev->dev, "vdds_dsi");
+		vdds_dsi = regulator_get(&dsi->pdev->dev, "vdds_dsi");
 
 		if (IS_ERR(vdds_dsi)) {
 			DSSERR("can't get VDDS_DSI regulator\n");
 			return PTR_ERR(vdds_dsi);
 		}
 
-		dsi.vdds_dsi_reg = vdds_dsi;
+		dsi->vdds_dsi_reg = vdds_dsi;
+	}
+
+	if (dsi_get_num_data_lanes_dssdev(dssdev) > dsi->num_data_lanes) {
+		DSSERR("DSI%d can't support more than %d data lanes\n",
+			dsi_module + 1, dsi->num_data_lanes);
+		return -EINVAL;
 	}
 
 	return 0;
@@ -3574,11 +4350,13 @@
 
 int omap_dsi_request_vc(struct omap_dss_device *dssdev, int *channel)
 {
+	struct platform_device *dsidev = dsi_get_dsidev_from_dssdev(dssdev);
+	struct dsi_data *dsi = dsi_get_dsidrv_data(dsidev);
 	int i;
 
-	for (i = 0; i < ARRAY_SIZE(dsi.vc); i++) {
-		if (!dsi.vc[i].dssdev) {
-			dsi.vc[i].dssdev = dssdev;
+	for (i = 0; i < ARRAY_SIZE(dsi->vc); i++) {
+		if (!dsi->vc[i].dssdev) {
+			dsi->vc[i].dssdev = dssdev;
 			*channel = i;
 			return 0;
 		}
@@ -3591,6 +4369,9 @@
 
 int omap_dsi_set_vc_id(struct omap_dss_device *dssdev, int channel, int vc_id)
 {
+	struct platform_device *dsidev = dsi_get_dsidev_from_dssdev(dssdev);
+	struct dsi_data *dsi = dsi_get_dsidrv_data(dsidev);
+
 	if (vc_id < 0 || vc_id > 3) {
 		DSSERR("VC ID out of range\n");
 		return -EINVAL;
@@ -3601,13 +4382,13 @@
 		return -EINVAL;
 	}
 
-	if (dsi.vc[channel].dssdev != dssdev) {
+	if (dsi->vc[channel].dssdev != dssdev) {
 		DSSERR("Virtual Channel not allocated to display %s\n",
 			dssdev->name);
 		return -EINVAL;
 	}
 
-	dsi.vc[channel].vc_id = vc_id;
+	dsi->vc[channel].vc_id = vc_id;
 
 	return 0;
 }
@@ -3615,143 +4396,172 @@
 
 void omap_dsi_release_vc(struct omap_dss_device *dssdev, int channel)
 {
+	struct platform_device *dsidev = dsi_get_dsidev_from_dssdev(dssdev);
+	struct dsi_data *dsi = dsi_get_dsidrv_data(dsidev);
+
 	if ((channel >= 0 && channel <= 3) &&
-		dsi.vc[channel].dssdev == dssdev) {
-		dsi.vc[channel].dssdev = NULL;
-		dsi.vc[channel].vc_id = 0;
+		dsi->vc[channel].dssdev == dssdev) {
+		dsi->vc[channel].dssdev = NULL;
+		dsi->vc[channel].vc_id = 0;
 	}
 }
 EXPORT_SYMBOL(omap_dsi_release_vc);
 
-void dsi_wait_pll_hsdiv_dispc_active(void)
+void dsi_wait_pll_hsdiv_dispc_active(struct platform_device *dsidev)
 {
-	if (wait_for_bit_change(DSI_PLL_STATUS, 7, 1) != 1)
+	if (wait_for_bit_change(dsidev, DSI_PLL_STATUS, 7, 1) != 1)
 		DSSERR("%s (%s) not active\n",
-			dss_get_generic_clk_source_name(DSS_CLK_SRC_DSI_PLL_HSDIV_DISPC),
-			dss_feat_get_clk_source_name(DSS_CLK_SRC_DSI_PLL_HSDIV_DISPC));
+			dss_get_generic_clk_source_name(OMAP_DSS_CLK_SRC_DSI_PLL_HSDIV_DISPC),
+			dss_feat_get_clk_source_name(OMAP_DSS_CLK_SRC_DSI_PLL_HSDIV_DISPC));
 }
 
-void dsi_wait_pll_hsdiv_dsi_active(void)
+void dsi_wait_pll_hsdiv_dsi_active(struct platform_device *dsidev)
 {
-	if (wait_for_bit_change(DSI_PLL_STATUS, 8, 1) != 1)
+	if (wait_for_bit_change(dsidev, DSI_PLL_STATUS, 8, 1) != 1)
 		DSSERR("%s (%s) not active\n",
-			dss_get_generic_clk_source_name(DSS_CLK_SRC_DSI_PLL_HSDIV_DSI),
-			dss_feat_get_clk_source_name(DSS_CLK_SRC_DSI_PLL_HSDIV_DSI));
+			dss_get_generic_clk_source_name(OMAP_DSS_CLK_SRC_DSI_PLL_HSDIV_DSI),
+			dss_feat_get_clk_source_name(OMAP_DSS_CLK_SRC_DSI_PLL_HSDIV_DSI));
 }
 
-static void dsi_calc_clock_param_ranges(void)
+static void dsi_calc_clock_param_ranges(struct platform_device *dsidev)
 {
-	dsi.regn_max = dss_feat_get_param_max(FEAT_PARAM_DSIPLL_REGN);
-	dsi.regm_max = dss_feat_get_param_max(FEAT_PARAM_DSIPLL_REGM);
-	dsi.regm_dispc_max = dss_feat_get_param_max(FEAT_PARAM_DSIPLL_REGM_DISPC);
-	dsi.regm_dsi_max = dss_feat_get_param_max(FEAT_PARAM_DSIPLL_REGM_DSI);
-	dsi.fint_min = dss_feat_get_param_min(FEAT_PARAM_DSIPLL_FINT);
-	dsi.fint_max = dss_feat_get_param_max(FEAT_PARAM_DSIPLL_FINT);
-	dsi.lpdiv_max = dss_feat_get_param_max(FEAT_PARAM_DSIPLL_LPDIV);
+	struct dsi_data *dsi = dsi_get_dsidrv_data(dsidev);
+
+	dsi->regn_max = dss_feat_get_param_max(FEAT_PARAM_DSIPLL_REGN);
+	dsi->regm_max = dss_feat_get_param_max(FEAT_PARAM_DSIPLL_REGM);
+	dsi->regm_dispc_max =
+		dss_feat_get_param_max(FEAT_PARAM_DSIPLL_REGM_DISPC);
+	dsi->regm_dsi_max = dss_feat_get_param_max(FEAT_PARAM_DSIPLL_REGM_DSI);
+	dsi->fint_min = dss_feat_get_param_min(FEAT_PARAM_DSIPLL_FINT);
+	dsi->fint_max = dss_feat_get_param_max(FEAT_PARAM_DSIPLL_FINT);
+	dsi->lpdiv_max = dss_feat_get_param_max(FEAT_PARAM_DSIPLL_LPDIV);
 }
 
-static int dsi_init(struct platform_device *pdev)
+static int dsi_init(struct platform_device *dsidev)
 {
+	struct omap_display_platform_data *dss_plat_data;
+	struct omap_dss_board_info *board_info;
 	u32 rev;
-	int r, i;
+	int r, i, dsi_module = dsi_get_dsidev_id(dsidev);
 	struct resource *dsi_mem;
+	struct dsi_data *dsi;
 
-	spin_lock_init(&dsi.irq_lock);
-	spin_lock_init(&dsi.errors_lock);
-	dsi.errors = 0;
+	dsi = kzalloc(sizeof(*dsi), GFP_KERNEL);
+	if (!dsi) {
+		r = -ENOMEM;
+		goto err0;
+	}
+
+	dsi->pdev = dsidev;
+	dsi_pdev_map[dsi_module] = dsidev;
+	dev_set_drvdata(&dsidev->dev, dsi);
+
+	dss_plat_data = dsidev->dev.platform_data;
+	board_info = dss_plat_data->board_data;
+	dsi->dsi_mux_pads = board_info->dsi_mux_pads;
+
+	spin_lock_init(&dsi->irq_lock);
+	spin_lock_init(&dsi->errors_lock);
+	dsi->errors = 0;
 
 #ifdef CONFIG_OMAP2_DSS_COLLECT_IRQ_STATS
-	spin_lock_init(&dsi.irq_stats_lock);
-	dsi.irq_stats.last_reset = jiffies;
+	spin_lock_init(&dsi->irq_stats_lock);
+	dsi->irq_stats.last_reset = jiffies;
 #endif
 
-	mutex_init(&dsi.lock);
-	sema_init(&dsi.bus_lock, 1);
+	mutex_init(&dsi->lock);
+	sema_init(&dsi->bus_lock, 1);
 
-	dsi.workqueue = create_singlethread_workqueue("dsi");
-	if (dsi.workqueue == NULL)
-		return -ENOMEM;
-
-	INIT_DELAYED_WORK_DEFERRABLE(&dsi.framedone_timeout_work,
+	INIT_DELAYED_WORK_DEFERRABLE(&dsi->framedone_timeout_work,
 			dsi_framedone_timeout_work_callback);
 
 #ifdef DSI_CATCH_MISSING_TE
-	init_timer(&dsi.te_timer);
-	dsi.te_timer.function = dsi_te_timeout;
-	dsi.te_timer.data = 0;
+	init_timer(&dsi->te_timer);
+	dsi->te_timer.function = dsi_te_timeout;
+	dsi->te_timer.data = 0;
 #endif
-	dsi_mem = platform_get_resource(dsi.pdev, IORESOURCE_MEM, 0);
+	dsi_mem = platform_get_resource(dsi->pdev, IORESOURCE_MEM, 0);
 	if (!dsi_mem) {
 		DSSERR("can't get IORESOURCE_MEM DSI\n");
 		r = -EINVAL;
 		goto err1;
 	}
-	dsi.base = ioremap(dsi_mem->start, resource_size(dsi_mem));
-	if (!dsi.base) {
+	dsi->base = ioremap(dsi_mem->start, resource_size(dsi_mem));
+	if (!dsi->base) {
 		DSSERR("can't ioremap DSI\n");
 		r = -ENOMEM;
 		goto err1;
 	}
-	dsi.irq	= platform_get_irq(dsi.pdev, 0);
-	if (dsi.irq < 0) {
+	dsi->irq = platform_get_irq(dsi->pdev, 0);
+	if (dsi->irq < 0) {
 		DSSERR("platform_get_irq failed\n");
 		r = -ENODEV;
 		goto err2;
 	}
 
-	r = request_irq(dsi.irq, omap_dsi_irq_handler, IRQF_SHARED,
-		"OMAP DSI1", dsi.pdev);
+	r = request_irq(dsi->irq, omap_dsi_irq_handler, IRQF_SHARED,
+		dev_name(&dsidev->dev), dsi->pdev);
 	if (r < 0) {
 		DSSERR("request_irq failed\n");
 		goto err2;
 	}
 
 	/* DSI VCs initialization */
-	for (i = 0; i < ARRAY_SIZE(dsi.vc); i++) {
-		dsi.vc[i].mode = DSI_VC_MODE_L4;
-		dsi.vc[i].dssdev = NULL;
-		dsi.vc[i].vc_id = 0;
+	for (i = 0; i < ARRAY_SIZE(dsi->vc); i++) {
+		dsi->vc[i].mode = DSI_VC_MODE_L4;
+		dsi->vc[i].dssdev = NULL;
+		dsi->vc[i].vc_id = 0;
 	}
 
-	dsi_calc_clock_param_ranges();
+	dsi_calc_clock_param_ranges(dsidev);
 
 	enable_clocks(1);
 
-	rev = dsi_read_reg(DSI_REVISION);
-	dev_dbg(&pdev->dev, "OMAP DSI rev %d.%d\n",
+	rev = dsi_read_reg(dsidev, DSI_REVISION);
+	dev_dbg(&dsidev->dev, "OMAP DSI rev %d.%d\n",
 	       FLD_GET(rev, 7, 4), FLD_GET(rev, 3, 0));
 
+	dsi->num_data_lanes = dsi_get_num_data_lanes(dsidev);
+
 	enable_clocks(0);
 
 	return 0;
 err2:
-	iounmap(dsi.base);
+	iounmap(dsi->base);
 err1:
-	destroy_workqueue(dsi.workqueue);
+	kfree(dsi);
+err0:
 	return r;
 }
 
-static void dsi_exit(void)
+static void dsi_exit(struct platform_device *dsidev)
 {
-	if (dsi.vdds_dsi_reg != NULL) {
-		regulator_put(dsi.vdds_dsi_reg);
-		dsi.vdds_dsi_reg = NULL;
+	struct dsi_data *dsi = dsi_get_dsidrv_data(dsidev);
+
+	if (dsi->vdds_dsi_reg != NULL) {
+		if (dsi->vdds_dsi_enabled) {
+			regulator_disable(dsi->vdds_dsi_reg);
+			dsi->vdds_dsi_enabled = false;
+		}
+
+		regulator_put(dsi->vdds_dsi_reg);
+		dsi->vdds_dsi_reg = NULL;
 	}
 
-	free_irq(dsi.irq, dsi.pdev);
-	iounmap(dsi.base);
+	free_irq(dsi->irq, dsi->pdev);
+	iounmap(dsi->base);
 
-	destroy_workqueue(dsi.workqueue);
+	kfree(dsi);
 
 	DSSDBG("omap_dsi_exit\n");
 }
 
 /* DSI1 HW IP initialisation */
-static int omap_dsi1hw_probe(struct platform_device *pdev)
+static int omap_dsi1hw_probe(struct platform_device *dsidev)
 {
 	int r;
-	dsi.pdev = pdev;
-	r = dsi_init(pdev);
+
+	r = dsi_init(dsidev);
 	if (r) {
 		DSSERR("Failed to initialize DSI\n");
 		goto err_dsi;
@@ -3760,9 +4570,12 @@
 	return r;
 }
 
-static int omap_dsi1hw_remove(struct platform_device *pdev)
+static int omap_dsi1hw_remove(struct platform_device *dsidev)
 {
-	dsi_exit();
+	struct dsi_data *dsi = dsi_get_dsidrv_data(dsidev);
+
+	dsi_exit(dsidev);
+	WARN_ON(dsi->scp_clk_refcount > 0);
 	return 0;
 }
 
diff --git a/drivers/video/omap2/dss/dss.c b/drivers/video/omap2/dss/dss.c
index 3f1fee6..d9489d5 100644
--- a/drivers/video/omap2/dss/dss.c
+++ b/drivers/video/omap2/dss/dss.c
@@ -29,7 +29,7 @@
 #include <linux/seq_file.h>
 #include <linux/clk.h>
 
-#include <plat/display.h>
+#include <video/omapdss.h>
 #include <plat/clock.h>
 #include "dss.h"
 #include "dss_features.h"
@@ -45,7 +45,6 @@
 #define DSS_REVISION			DSS_REG(0x0000)
 #define DSS_SYSCONFIG			DSS_REG(0x0010)
 #define DSS_SYSSTATUS			DSS_REG(0x0014)
-#define DSS_IRQSTATUS			DSS_REG(0x0018)
 #define DSS_CONTROL			DSS_REG(0x0040)
 #define DSS_SDI_CONTROL			DSS_REG(0x0044)
 #define DSS_PLL_CONTROL			DSS_REG(0x0048)
@@ -75,17 +74,17 @@
 	struct dss_clock_info cache_dss_cinfo;
 	struct dispc_clock_info cache_dispc_cinfo;
 
-	enum dss_clk_source dsi_clk_source;
-	enum dss_clk_source dispc_clk_source;
-	enum dss_clk_source lcd_clk_source[MAX_DSS_LCD_MANAGERS];
+	enum omap_dss_clk_source dsi_clk_source[MAX_NUM_DSI];
+	enum omap_dss_clk_source dispc_clk_source;
+	enum omap_dss_clk_source lcd_clk_source[MAX_DSS_LCD_MANAGERS];
 
 	u32		ctx[DSS_SZ_REGS / sizeof(u32)];
 } dss;
 
 static const char * const dss_generic_clk_source_names[] = {
-	[DSS_CLK_SRC_DSI_PLL_HSDIV_DISPC]	= "DSI_PLL_HSDIV_DISPC",
-	[DSS_CLK_SRC_DSI_PLL_HSDIV_DSI]		= "DSI_PLL_HSDIV_DSI",
-	[DSS_CLK_SRC_FCK]			= "DSS_FCK",
+	[OMAP_DSS_CLK_SRC_DSI_PLL_HSDIV_DISPC]	= "DSI_PLL_HSDIV_DISPC",
+	[OMAP_DSS_CLK_SRC_DSI_PLL_HSDIV_DSI]	= "DSI_PLL_HSDIV_DSI",
+	[OMAP_DSS_CLK_SRC_FCK]			= "DSS_FCK",
 };
 
 static void dss_clk_enable_all_no_ctx(void);
@@ -230,7 +229,7 @@
 	REG_FLD_MOD(DSS_PLL_CONTROL, 0, 18, 18); /* SDI_PLL_SYSRESET */
 }
 
-const char *dss_get_generic_clk_source_name(enum dss_clk_source clk_src)
+const char *dss_get_generic_clk_source_name(enum omap_dss_clk_source clk_src)
 {
 	return dss_generic_clk_source_names[clk_src];
 }
@@ -246,8 +245,8 @@
 
 	seq_printf(s, "- DSS -\n");
 
-	fclk_name = dss_get_generic_clk_source_name(DSS_CLK_SRC_FCK);
-	fclk_real_name = dss_feat_get_clk_source_name(DSS_CLK_SRC_FCK);
+	fclk_name = dss_get_generic_clk_source_name(OMAP_DSS_CLK_SRC_FCK);
+	fclk_real_name = dss_feat_get_clk_source_name(OMAP_DSS_CLK_SRC_FCK);
 	fclk_rate = dss_clk_get_rate(DSS_CLK_FCK);
 
 	if (dss.dpll4_m4_ck) {
@@ -286,7 +285,6 @@
 	DUMPREG(DSS_REVISION);
 	DUMPREG(DSS_SYSCONFIG);
 	DUMPREG(DSS_SYSSTATUS);
-	DUMPREG(DSS_IRQSTATUS);
 	DUMPREG(DSS_CONTROL);
 
 	if (dss_feat_get_supported_displays(OMAP_DSS_CHANNEL_LCD) &
@@ -300,18 +298,25 @@
 #undef DUMPREG
 }
 
-void dss_select_dispc_clk_source(enum dss_clk_source clk_src)
+void dss_select_dispc_clk_source(enum omap_dss_clk_source clk_src)
 {
+	struct platform_device *dsidev;
 	int b;
 	u8 start, end;
 
 	switch (clk_src) {
-	case DSS_CLK_SRC_FCK:
+	case OMAP_DSS_CLK_SRC_FCK:
 		b = 0;
 		break;
-	case DSS_CLK_SRC_DSI_PLL_HSDIV_DISPC:
+	case OMAP_DSS_CLK_SRC_DSI_PLL_HSDIV_DISPC:
 		b = 1;
-		dsi_wait_pll_hsdiv_dispc_active();
+		dsidev = dsi_get_dsidev_from_id(0);
+		dsi_wait_pll_hsdiv_dispc_active(dsidev);
+		break;
+	case OMAP_DSS_CLK_SRC_DSI2_PLL_HSDIV_DISPC:
+		b = 2;
+		dsidev = dsi_get_dsidev_from_id(1);
+		dsi_wait_pll_hsdiv_dispc_active(dsidev);
 		break;
 	default:
 		BUG();
@@ -324,17 +329,27 @@
 	dss.dispc_clk_source = clk_src;
 }
 
-void dss_select_dsi_clk_source(enum dss_clk_source clk_src)
+void dss_select_dsi_clk_source(int dsi_module,
+		enum omap_dss_clk_source clk_src)
 {
+	struct platform_device *dsidev;
 	int b;
 
 	switch (clk_src) {
-	case DSS_CLK_SRC_FCK:
+	case OMAP_DSS_CLK_SRC_FCK:
 		b = 0;
 		break;
-	case DSS_CLK_SRC_DSI_PLL_HSDIV_DSI:
+	case OMAP_DSS_CLK_SRC_DSI_PLL_HSDIV_DSI:
+		BUG_ON(dsi_module != 0);
 		b = 1;
-		dsi_wait_pll_hsdiv_dsi_active();
+		dsidev = dsi_get_dsidev_from_id(0);
+		dsi_wait_pll_hsdiv_dsi_active(dsidev);
+		break;
+	case OMAP_DSS_CLK_SRC_DSI2_PLL_HSDIV_DSI:
+		BUG_ON(dsi_module != 1);
+		b = 1;
+		dsidev = dsi_get_dsidev_from_id(1);
+		dsi_wait_pll_hsdiv_dsi_active(dsidev);
 		break;
 	default:
 		BUG();
@@ -342,25 +357,33 @@
 
 	REG_FLD_MOD(DSS_CONTROL, b, 1, 1);	/* DSI_CLK_SWITCH */
 
-	dss.dsi_clk_source = clk_src;
+	dss.dsi_clk_source[dsi_module] = clk_src;
 }
 
 void dss_select_lcd_clk_source(enum omap_channel channel,
-		enum dss_clk_source clk_src)
+		enum omap_dss_clk_source clk_src)
 {
+	struct platform_device *dsidev;
 	int b, ix, pos;
 
 	if (!dss_has_feature(FEAT_LCD_CLK_SRC))
 		return;
 
 	switch (clk_src) {
-	case DSS_CLK_SRC_FCK:
+	case OMAP_DSS_CLK_SRC_FCK:
 		b = 0;
 		break;
-	case DSS_CLK_SRC_DSI_PLL_HSDIV_DISPC:
+	case OMAP_DSS_CLK_SRC_DSI_PLL_HSDIV_DISPC:
 		BUG_ON(channel != OMAP_DSS_CHANNEL_LCD);
 		b = 1;
-		dsi_wait_pll_hsdiv_dispc_active();
+		dsidev = dsi_get_dsidev_from_id(0);
+		dsi_wait_pll_hsdiv_dispc_active(dsidev);
+		break;
+	case OMAP_DSS_CLK_SRC_DSI2_PLL_HSDIV_DISPC:
+		BUG_ON(channel != OMAP_DSS_CHANNEL_LCD2);
+		b = 1;
+		dsidev = dsi_get_dsidev_from_id(1);
+		dsi_wait_pll_hsdiv_dispc_active(dsidev);
 		break;
 	default:
 		BUG();
@@ -373,20 +396,26 @@
 	dss.lcd_clk_source[ix] = clk_src;
 }
 
-enum dss_clk_source dss_get_dispc_clk_source(void)
+enum omap_dss_clk_source dss_get_dispc_clk_source(void)
 {
 	return dss.dispc_clk_source;
 }
 
-enum dss_clk_source dss_get_dsi_clk_source(void)
+enum omap_dss_clk_source dss_get_dsi_clk_source(int dsi_module)
 {
-	return dss.dsi_clk_source;
+	return dss.dsi_clk_source[dsi_module];
 }
 
-enum dss_clk_source dss_get_lcd_clk_source(enum omap_channel channel)
+enum omap_dss_clk_source dss_get_lcd_clk_source(enum omap_channel channel)
 {
-	int ix = channel == OMAP_DSS_CHANNEL_LCD ? 0 : 1;
-	return dss.lcd_clk_source[ix];
+	if (dss_has_feature(FEAT_LCD_CLK_SRC)) {
+		int ix = channel == OMAP_DSS_CHANNEL_LCD ? 0 : 1;
+		return dss.lcd_clk_source[ix];
+	} else {
+		/* LCD_CLK source is the same as DISPC_FCLK source for
+		 * OMAP2 and OMAP3 */
+		return dss.dispc_clk_source;
+	}
 }
 
 /* calculate clock rates using dividers in cinfo */
@@ -659,13 +688,18 @@
 	 * the kernel resets it */
 	omap_writel(omap_readl(0x48050440) & ~0x3, 0x48050440);
 
+#ifdef CONFIG_OMAP2_DSS_SLEEP_BEFORE_RESET
 	/* We need to wait here a bit, otherwise we sometimes start to
 	 * get synclost errors, and after that only power cycle will
 	 * restore DSS functionality. I have no idea why this happens.
 	 * And we have to wait _before_ resetting the DSS, but after
 	 * enabling clocks.
+	 *
+	 * This bug was at least present on OMAP3430. It's unknown
+	 * if it happens on OMAP2 or OMAP3630.
 	 */
 	msleep(50);
+#endif
 
 	_omap_dss_reset();
 
@@ -700,10 +734,11 @@
 
 	dss.dpll4_m4_ck = dpll4_m4_ck;
 
-	dss.dsi_clk_source = DSS_CLK_SRC_FCK;
-	dss.dispc_clk_source = DSS_CLK_SRC_FCK;
-	dss.lcd_clk_source[0] = DSS_CLK_SRC_FCK;
-	dss.lcd_clk_source[1] = DSS_CLK_SRC_FCK;
+	dss.dsi_clk_source[0] = OMAP_DSS_CLK_SRC_FCK;
+	dss.dsi_clk_source[1] = OMAP_DSS_CLK_SRC_FCK;
+	dss.dispc_clk_source = OMAP_DSS_CLK_SRC_FCK;
+	dss.lcd_clk_source[0] = OMAP_DSS_CLK_SRC_FCK;
+	dss.lcd_clk_source[1] = OMAP_DSS_CLK_SRC_FCK;
 
 	dss_save_context();
 
@@ -1015,6 +1050,14 @@
 		dss.dss_video_fck
 	};
 
+	const char *names[5] = {
+		"ick",
+		"fck",
+		"sys_clk",
+		"tv_fck",
+		"video_fck"
+	};
+
 	seq_printf(s, "- CORE -\n");
 
 	seq_printf(s, "internal clk count\t\t%u\n", dss.num_clks_enabled);
@@ -1022,8 +1065,11 @@
 	for (i = 0; i < 5; i++) {
 		if (!clocks[i])
 			continue;
-		seq_printf(s, "%-15s\t%lu\t%d\n",
+		seq_printf(s, "%s (%s)%*s\t%lu\t%d\n",
+				names[i],
 				clocks[i]->name,
+				24 - strlen(names[i]) - strlen(clocks[i]->name),
+				"",
 				clk_get_rate(clocks[i]),
 				clocks[i]->usecount);
 	}
diff --git a/drivers/video/omap2/dss/dss.h b/drivers/video/omap2/dss/dss.h
index c2f582bb19..8ab6d43 100644
--- a/drivers/video/omap2/dss/dss.h
+++ b/drivers/video/omap2/dss/dss.h
@@ -117,15 +117,6 @@
 	DSS_CLK_VIDFCK	= 1 << 4,	/* DSS_96M_FCLK*/
 };
 
-enum dss_clk_source {
-	DSS_CLK_SRC_DSI_PLL_HSDIV_DISPC,	/* OMAP3: DSI1_PLL_FCLK
-						 * OMAP4: PLL1_CLK1 */
-	DSS_CLK_SRC_DSI_PLL_HSDIV_DSI,		/* OMAP3: DSI2_PLL_FCLK
-						 * OMAP4: PLL1_CLK2 */
-	DSS_CLK_SRC_FCK,			/* OMAP2/3: DSS1_ALWON_FCLK
-						 * OMAP4: DSS_FCLK */
-};
-
 enum dss_hdmi_venc_clk_source_select {
 	DSS_VENC_TV_CLK = 0,
 	DSS_HDMI_M_PCLK = 1,
@@ -236,7 +227,7 @@
 void dss_clk_disable(enum dss_clock clks);
 unsigned long dss_clk_get_rate(enum dss_clock clk);
 int dss_need_ctx_restore(void);
-const char *dss_get_generic_clk_source_name(enum dss_clk_source clk_src);
+const char *dss_get_generic_clk_source_name(enum omap_dss_clk_source clk_src);
 void dss_dump_clocks(struct seq_file *s);
 
 void dss_dump_regs(struct seq_file *s);
@@ -248,13 +239,14 @@
 int dss_sdi_enable(void);
 void dss_sdi_disable(void);
 
-void dss_select_dispc_clk_source(enum dss_clk_source clk_src);
-void dss_select_dsi_clk_source(enum dss_clk_source clk_src);
+void dss_select_dispc_clk_source(enum omap_dss_clk_source clk_src);
+void dss_select_dsi_clk_source(int dsi_module,
+		enum omap_dss_clk_source clk_src);
 void dss_select_lcd_clk_source(enum omap_channel channel,
-		enum dss_clk_source clk_src);
-enum dss_clk_source dss_get_dispc_clk_source(void);
-enum dss_clk_source dss_get_dsi_clk_source(void);
-enum dss_clk_source dss_get_lcd_clk_source(enum omap_channel channel);
+		enum omap_dss_clk_source clk_src);
+enum omap_dss_clk_source dss_get_dispc_clk_source(void);
+enum omap_dss_clk_source dss_get_dsi_clk_source(int dsi_module);
+enum omap_dss_clk_source dss_get_lcd_clk_source(enum omap_channel channel);
 
 void dss_set_venc_output(enum omap_dss_venc_type type);
 void dss_set_dac_pwrdn_bgz(bool enable);
@@ -284,31 +276,39 @@
 
 /* DSI */
 #ifdef CONFIG_OMAP2_DSS_DSI
+
+struct dentry;
+struct file_operations;
+
 int dsi_init_platform_driver(void);
 void dsi_uninit_platform_driver(void);
 
 void dsi_dump_clocks(struct seq_file *s);
-void dsi_dump_irqs(struct seq_file *s);
-void dsi_dump_regs(struct seq_file *s);
+void dsi_create_debugfs_files_irq(struct dentry *debugfs_dir,
+		const struct file_operations *debug_fops);
+void dsi_create_debugfs_files_reg(struct dentry *debugfs_dir,
+		const struct file_operations *debug_fops);
 
 void dsi_save_context(void);
 void dsi_restore_context(void);
 
 int dsi_init_display(struct omap_dss_device *display);
 void dsi_irq_handler(void);
-unsigned long dsi_get_pll_hsdiv_dispc_rate(void);
-int dsi_pll_set_clock_div(struct dsi_clock_info *cinfo);
-int dsi_pll_calc_clock_div_pck(bool is_tft, unsigned long req_pck,
-		struct dsi_clock_info *cinfo,
+unsigned long dsi_get_pll_hsdiv_dispc_rate(struct platform_device *dsidev);
+int dsi_pll_set_clock_div(struct platform_device *dsidev,
+		struct dsi_clock_info *cinfo);
+int dsi_pll_calc_clock_div_pck(struct platform_device *dsidev, bool is_tft,
+		unsigned long req_pck, struct dsi_clock_info *cinfo,
 		struct dispc_clock_info *dispc_cinfo);
-int dsi_pll_init(struct omap_dss_device *dssdev, bool enable_hsclk,
+int dsi_pll_init(struct platform_device *dsidev, bool enable_hsclk,
 		bool enable_hsdiv);
-void dsi_pll_uninit(void);
+void dsi_pll_uninit(struct platform_device *dsidev, bool disconnect_lanes);
 void dsi_get_overlay_fifo_thresholds(enum omap_plane plane,
 		u32 fifo_size, enum omap_burst_size *burst_size,
 		u32 *fifo_low, u32 *fifo_high);
-void dsi_wait_pll_hsdiv_dispc_active(void);
-void dsi_wait_pll_hsdiv_dsi_active(void);
+void dsi_wait_pll_hsdiv_dispc_active(struct platform_device *dsidev);
+void dsi_wait_pll_hsdiv_dsi_active(struct platform_device *dsidev);
+struct platform_device *dsi_get_dsidev_from_id(int module);
 #else
 static inline int dsi_init_platform_driver(void)
 {
@@ -317,17 +317,47 @@
 static inline void dsi_uninit_platform_driver(void)
 {
 }
-static inline unsigned long dsi_get_pll_hsdiv_dispc_rate(void)
+static inline unsigned long dsi_get_pll_hsdiv_dispc_rate(struct platform_device *dsidev)
 {
 	WARN("%s: DSI not compiled in, returning rate as 0\n", __func__);
 	return 0;
 }
-static inline void dsi_wait_pll_hsdiv_dispc_active(void)
+static inline int dsi_pll_set_clock_div(struct platform_device *dsidev,
+		struct dsi_clock_info *cinfo)
+{
+	WARN("%s: DSI not compiled in\n", __func__);
+	return -ENODEV;
+}
+static inline int dsi_pll_calc_clock_div_pck(struct platform_device *dsidev,
+		bool is_tft, unsigned long req_pck,
+		struct dsi_clock_info *dsi_cinfo,
+		struct dispc_clock_info *dispc_cinfo)
+{
+	WARN("%s: DSI not compiled in\n", __func__);
+	return -ENODEV;
+}
+static inline int dsi_pll_init(struct platform_device *dsidev,
+		bool enable_hsclk, bool enable_hsdiv)
+{
+	WARN("%s: DSI not compiled in\n", __func__);
+	return -ENODEV;
+}
+static inline void dsi_pll_uninit(struct platform_device *dsidev,
+		bool disconnect_lanes)
 {
 }
-static inline void dsi_wait_pll_hsdiv_dsi_active(void)
+static inline void dsi_wait_pll_hsdiv_dispc_active(struct platform_device *dsidev)
 {
 }
+static inline void dsi_wait_pll_hsdiv_dsi_active(struct platform_device *dsidev)
+{
+}
+static inline struct platform_device *dsi_get_dsidev_from_id(int module)
+{
+	WARN("%s: DSI not compiled in, returning platform device as NULL\n",
+			__func__);
+	return NULL;
+}
 #endif
 
 /* DPI */
@@ -391,7 +421,8 @@
 		      enum omap_dss_rotation_type rotation_type,
 		      u8 rotation, bool mirror,
 		      u8 global_alpha, u8 pre_mult_alpha,
-		      enum omap_channel channel);
+		      enum omap_channel channel,
+		      u32 puv_addr);
 
 bool dispc_go_busy(enum omap_channel channel);
 void dispc_go(enum omap_channel channel);
@@ -485,13 +516,6 @@
 int rfbi_init_platform_driver(void);
 void rfbi_uninit_platform_driver(void);
 void rfbi_dump_regs(struct seq_file *s);
-
-int rfbi_configure(int rfbi_module, int bpp, int lines);
-void rfbi_enable_rfbi(bool enable);
-void rfbi_transfer_area(struct omap_dss_device *dssdev, u16 width,
-		u16 height, void (callback)(void *data), void *data);
-void rfbi_set_timings(int rfbi_module, struct rfbi_timings *t);
-unsigned long rfbi_get_max_tx_rate(void);
 int rfbi_init_display(struct omap_dss_device *display);
 #else
 static inline int rfbi_init_platform_driver(void)
diff --git a/drivers/video/omap2/dss/dss_features.c b/drivers/video/omap2/dss/dss_features.c
index aa16222..1c18888 100644
--- a/drivers/video/omap2/dss/dss_features.c
+++ b/drivers/video/omap2/dss/dss_features.c
@@ -22,7 +22,7 @@
 #include <linux/err.h>
 #include <linux/slab.h>
 
-#include <plat/display.h>
+#include <video/omapdss.h>
 #include <plat/cpu.h>
 
 #include "dss.h"
@@ -52,7 +52,7 @@
 };
 
 /* This struct is assigned to one of the below during initialization */
-static struct omap_dss_features *omap_current_dss_features;
+static const struct omap_dss_features *omap_current_dss_features;
 
 static const struct dss_reg_field omap2_dss_reg_fields[] = {
 	[FEAT_REG_FIRHINC]			= { 11, 0 },
@@ -177,22 +177,55 @@
 	OMAP_DSS_COLOR_RGBA32 | OMAP_DSS_COLOR_RGBX32,
 };
 
+static const enum omap_color_mode omap4_dss_supported_color_modes[] = {
+	/* OMAP_DSS_GFX */
+	OMAP_DSS_COLOR_CLUT1 | OMAP_DSS_COLOR_CLUT2 |
+	OMAP_DSS_COLOR_CLUT4 | OMAP_DSS_COLOR_CLUT8 |
+	OMAP_DSS_COLOR_RGB12U | OMAP_DSS_COLOR_ARGB16 |
+	OMAP_DSS_COLOR_RGB16 | OMAP_DSS_COLOR_RGB24U |
+	OMAP_DSS_COLOR_RGB24P | OMAP_DSS_COLOR_ARGB32 |
+	OMAP_DSS_COLOR_RGBA32 | OMAP_DSS_COLOR_RGBX32 |
+	OMAP_DSS_COLOR_ARGB16_1555,
+
+	/* OMAP_DSS_VIDEO1 */
+	OMAP_DSS_COLOR_RGB16 | OMAP_DSS_COLOR_RGB12U |
+	OMAP_DSS_COLOR_YUV2 | OMAP_DSS_COLOR_ARGB16_1555 |
+	OMAP_DSS_COLOR_RGBA32 | OMAP_DSS_COLOR_NV12 |
+	OMAP_DSS_COLOR_RGBA16 | OMAP_DSS_COLOR_RGB24U |
+	OMAP_DSS_COLOR_RGB24P | OMAP_DSS_COLOR_UYVY |
+	OMAP_DSS_COLOR_ARGB16 | OMAP_DSS_COLOR_XRGB16_1555 |
+	OMAP_DSS_COLOR_ARGB32 | OMAP_DSS_COLOR_RGBX16 |
+	OMAP_DSS_COLOR_RGBX32,
+
+       /* OMAP_DSS_VIDEO2 */
+	OMAP_DSS_COLOR_RGB16 | OMAP_DSS_COLOR_RGB12U |
+	OMAP_DSS_COLOR_YUV2 | OMAP_DSS_COLOR_ARGB16_1555 |
+	OMAP_DSS_COLOR_RGBA32 | OMAP_DSS_COLOR_NV12 |
+	OMAP_DSS_COLOR_RGBA16 | OMAP_DSS_COLOR_RGB24U |
+	OMAP_DSS_COLOR_RGB24P | OMAP_DSS_COLOR_UYVY |
+	OMAP_DSS_COLOR_ARGB16 | OMAP_DSS_COLOR_XRGB16_1555 |
+	OMAP_DSS_COLOR_ARGB32 | OMAP_DSS_COLOR_RGBX16 |
+	OMAP_DSS_COLOR_RGBX32,
+};
+
 static const char * const omap2_dss_clk_source_names[] = {
-	[DSS_CLK_SRC_DSI_PLL_HSDIV_DISPC]	= "N/A",
-	[DSS_CLK_SRC_DSI_PLL_HSDIV_DSI]		= "N/A",
-	[DSS_CLK_SRC_FCK]			= "DSS_FCLK1",
+	[OMAP_DSS_CLK_SRC_DSI_PLL_HSDIV_DISPC]	= "N/A",
+	[OMAP_DSS_CLK_SRC_DSI_PLL_HSDIV_DSI]	= "N/A",
+	[OMAP_DSS_CLK_SRC_FCK]			= "DSS_FCLK1",
 };
 
 static const char * const omap3_dss_clk_source_names[] = {
-	[DSS_CLK_SRC_DSI_PLL_HSDIV_DISPC]	= "DSI1_PLL_FCLK",
-	[DSS_CLK_SRC_DSI_PLL_HSDIV_DSI]		= "DSI2_PLL_FCLK",
-	[DSS_CLK_SRC_FCK]			= "DSS1_ALWON_FCLK",
+	[OMAP_DSS_CLK_SRC_DSI_PLL_HSDIV_DISPC]	= "DSI1_PLL_FCLK",
+	[OMAP_DSS_CLK_SRC_DSI_PLL_HSDIV_DSI]	= "DSI2_PLL_FCLK",
+	[OMAP_DSS_CLK_SRC_FCK]			= "DSS1_ALWON_FCLK",
 };
 
 static const char * const omap4_dss_clk_source_names[] = {
-	[DSS_CLK_SRC_DSI_PLL_HSDIV_DISPC]	= "PLL1_CLK1",
-	[DSS_CLK_SRC_DSI_PLL_HSDIV_DSI]		= "PLL1_CLK2",
-	[DSS_CLK_SRC_FCK]			= "DSS_FCLK",
+	[OMAP_DSS_CLK_SRC_DSI_PLL_HSDIV_DISPC]	= "PLL1_CLK1",
+	[OMAP_DSS_CLK_SRC_DSI_PLL_HSDIV_DSI]	= "PLL1_CLK2",
+	[OMAP_DSS_CLK_SRC_FCK]			= "DSS_FCLK",
+	[OMAP_DSS_CLK_SRC_DSI2_PLL_HSDIV_DISPC]	= "PLL2_CLK1",
+	[OMAP_DSS_CLK_SRC_DSI2_PLL_HSDIV_DSI]	= "PLL2_CLK2",
 };
 
 static const struct dss_param_range omap2_dss_param_range[] = {
@@ -226,7 +259,7 @@
 };
 
 /* OMAP2 DSS Features */
-static struct omap_dss_features omap2_dss_features = {
+static const struct omap_dss_features omap2_dss_features = {
 	.reg_fields = omap2_dss_reg_fields,
 	.num_reg_fields = ARRAY_SIZE(omap2_dss_reg_fields),
 
@@ -244,7 +277,7 @@
 };
 
 /* OMAP3 DSS Features */
-static struct omap_dss_features omap3430_dss_features = {
+static const struct omap_dss_features omap3430_dss_features = {
 	.reg_fields = omap3_dss_reg_fields,
 	.num_reg_fields = ARRAY_SIZE(omap3_dss_reg_fields),
 
@@ -252,7 +285,8 @@
 		FEAT_GLOBAL_ALPHA | FEAT_LCDENABLEPOL |
 		FEAT_LCDENABLESIGNAL | FEAT_PCKFREEENABLE |
 		FEAT_FUNCGATED | FEAT_ROWREPEATENABLE |
-		FEAT_LINEBUFFERSPLIT | FEAT_RESIZECONF,
+		FEAT_LINEBUFFERSPLIT | FEAT_RESIZECONF |
+		FEAT_DSI_PLL_FREQSEL | FEAT_DSI_REVERSE_TXCLKESC,
 
 	.num_mgrs = 2,
 	.num_ovls = 3,
@@ -262,7 +296,7 @@
 	.dss_params = omap3_dss_param_range,
 };
 
-static struct omap_dss_features omap3630_dss_features = {
+static const struct omap_dss_features omap3630_dss_features = {
 	.reg_fields = omap3_dss_reg_fields,
 	.num_reg_fields = ARRAY_SIZE(omap3_dss_reg_fields),
 
@@ -271,7 +305,8 @@
 		FEAT_LCDENABLESIGNAL | FEAT_PCKFREEENABLE |
 		FEAT_PRE_MULT_ALPHA | FEAT_FUNCGATED |
 		FEAT_ROWREPEATENABLE | FEAT_LINEBUFFERSPLIT |
-		FEAT_RESIZECONF,
+		FEAT_RESIZECONF | FEAT_DSI_PLL_PWR_BUG |
+		FEAT_DSI_PLL_FREQSEL,
 
 	.num_mgrs = 2,
 	.num_ovls = 3,
@@ -282,19 +317,43 @@
 };
 
 /* OMAP4 DSS Features */
-static struct omap_dss_features omap4_dss_features = {
+/* For OMAP4430 ES 1.0 revision */
+static const struct omap_dss_features omap4430_es1_0_dss_features  = {
 	.reg_fields = omap4_dss_reg_fields,
 	.num_reg_fields = ARRAY_SIZE(omap4_dss_reg_fields),
 
 	.has_feature	=
 		FEAT_GLOBAL_ALPHA | FEAT_PRE_MULT_ALPHA |
 		FEAT_MGR_LCD2 | FEAT_GLOBAL_ALPHA_VID1 |
-		FEAT_CORE_CLK_DIV | FEAT_LCD_CLK_SRC,
+		FEAT_CORE_CLK_DIV | FEAT_LCD_CLK_SRC |
+		FEAT_DSI_DCS_CMD_CONFIG_VC | FEAT_DSI_VC_OCP_WIDTH |
+		FEAT_DSI_GNQ | FEAT_HANDLE_UV_SEPARATE | FEAT_ATTR2,
 
 	.num_mgrs = 3,
 	.num_ovls = 3,
 	.supported_displays = omap4_dss_supported_displays,
-	.supported_color_modes = omap3_dss_supported_color_modes,
+	.supported_color_modes = omap4_dss_supported_color_modes,
+	.clksrc_names = omap4_dss_clk_source_names,
+	.dss_params = omap4_dss_param_range,
+};
+
+/* For all the other OMAP4 versions */
+static const struct omap_dss_features omap4_dss_features = {
+	.reg_fields = omap4_dss_reg_fields,
+	.num_reg_fields = ARRAY_SIZE(omap4_dss_reg_fields),
+
+	.has_feature	=
+		FEAT_GLOBAL_ALPHA | FEAT_PRE_MULT_ALPHA |
+		FEAT_MGR_LCD2 | FEAT_GLOBAL_ALPHA_VID1 |
+		FEAT_CORE_CLK_DIV | FEAT_LCD_CLK_SRC |
+		FEAT_DSI_DCS_CMD_CONFIG_VC | FEAT_DSI_VC_OCP_WIDTH |
+		FEAT_DSI_GNQ | FEAT_HDMI_CTS_SWMODE |
+		FEAT_HANDLE_UV_SEPARATE | FEAT_ATTR2,
+
+	.num_mgrs = 3,
+	.num_ovls = 3,
+	.supported_displays = omap4_dss_supported_displays,
+	.supported_color_modes = omap4_dss_supported_color_modes,
 	.clksrc_names = omap4_dss_clk_source_names,
 	.dss_params = omap4_dss_param_range,
 };
@@ -337,7 +396,7 @@
 			color_mode;
 }
 
-const char *dss_feat_get_clk_source_name(enum dss_clk_source id)
+const char *dss_feat_get_clk_source_name(enum omap_dss_clk_source id)
 {
 	return omap_current_dss_features->clksrc_names[id];
 }
@@ -365,6 +424,10 @@
 		omap_current_dss_features = &omap3630_dss_features;
 	else if (cpu_is_omap34xx())
 		omap_current_dss_features = &omap3430_dss_features;
-	else
+	else if (omap_rev() == OMAP4430_REV_ES1_0)
+		omap_current_dss_features = &omap4430_es1_0_dss_features;
+	else if (cpu_is_omap44xx())
 		omap_current_dss_features = &omap4_dss_features;
+	else
+		DSSWARN("Unsupported OMAP version");
 }
diff --git a/drivers/video/omap2/dss/dss_features.h b/drivers/video/omap2/dss/dss_features.h
index 12e9c4e..07b346f 100644
--- a/drivers/video/omap2/dss/dss_features.h
+++ b/drivers/video/omap2/dss/dss_features.h
@@ -23,23 +23,34 @@
 #define MAX_DSS_MANAGERS	3
 #define MAX_DSS_OVERLAYS	3
 #define MAX_DSS_LCD_MANAGERS	2
+#define MAX_NUM_DSI		2
 
 /* DSS has feature id */
 enum dss_feat_id {
-	FEAT_GLOBAL_ALPHA	= 1 << 0,
-	FEAT_GLOBAL_ALPHA_VID1	= 1 << 1,
-	FEAT_PRE_MULT_ALPHA	= 1 << 2,
-	FEAT_LCDENABLEPOL	= 1 << 3,
-	FEAT_LCDENABLESIGNAL	= 1 << 4,
-	FEAT_PCKFREEENABLE	= 1 << 5,
-	FEAT_FUNCGATED		= 1 << 6,
-	FEAT_MGR_LCD2		= 1 << 7,
-	FEAT_LINEBUFFERSPLIT	= 1 << 8,
-	FEAT_ROWREPEATENABLE	= 1 << 9,
-	FEAT_RESIZECONF		= 1 << 10,
+	FEAT_GLOBAL_ALPHA		= 1 << 0,
+	FEAT_GLOBAL_ALPHA_VID1		= 1 << 1,
+	FEAT_PRE_MULT_ALPHA		= 1 << 2,
+	FEAT_LCDENABLEPOL		= 1 << 3,
+	FEAT_LCDENABLESIGNAL		= 1 << 4,
+	FEAT_PCKFREEENABLE		= 1 << 5,
+	FEAT_FUNCGATED			= 1 << 6,
+	FEAT_MGR_LCD2			= 1 << 7,
+	FEAT_LINEBUFFERSPLIT		= 1 << 8,
+	FEAT_ROWREPEATENABLE		= 1 << 9,
+	FEAT_RESIZECONF			= 1 << 10,
 	/* Independent core clk divider */
-	FEAT_CORE_CLK_DIV	= 1 << 11,
-	FEAT_LCD_CLK_SRC	= 1 << 12,
+	FEAT_CORE_CLK_DIV		= 1 << 11,
+	FEAT_LCD_CLK_SRC		= 1 << 12,
+	/* DSI-PLL power command 0x3 is not working */
+	FEAT_DSI_PLL_PWR_BUG		= 1 << 13,
+	FEAT_DSI_PLL_FREQSEL		= 1 << 14,
+	FEAT_DSI_DCS_CMD_CONFIG_VC	= 1 << 15,
+	FEAT_DSI_VC_OCP_WIDTH		= 1 << 16,
+	FEAT_DSI_REVERSE_TXCLKESC	= 1 << 17,
+	FEAT_DSI_GNQ			= 1 << 18,
+	FEAT_HDMI_CTS_SWMODE		= 1 << 19,
+	FEAT_HANDLE_UV_SEPARATE         = 1 << 20,
+	FEAT_ATTR2                      = 1 << 21,
 };
 
 /* DSS register field id */
@@ -77,7 +88,7 @@
 enum omap_color_mode dss_feat_get_supported_color_modes(enum omap_plane plane);
 bool dss_feat_color_mode_supported(enum omap_plane plane,
 		enum omap_color_mode color_mode);
-const char *dss_feat_get_clk_source_name(enum dss_clk_source id);
+const char *dss_feat_get_clk_source_name(enum omap_dss_clk_source id);
 
 bool dss_has_feature(enum dss_feat_id id);
 void dss_feat_get_reg_field(enum dss_feat_reg_field id, u8 *start, u8 *end);
diff --git a/drivers/video/omap2/dss/hdmi.c b/drivers/video/omap2/dss/hdmi.c
index a981def..b0555f4 100644
--- a/drivers/video/omap2/dss/hdmi.c
+++ b/drivers/video/omap2/dss/hdmi.c
@@ -29,10 +29,16 @@
 #include <linux/mutex.h>
 #include <linux/delay.h>
 #include <linux/string.h>
-#include <plat/display.h>
+#include <video/omapdss.h>
+#if defined(CONFIG_SND_OMAP_SOC_OMAP4_HDMI) || \
+	defined(CONFIG_SND_OMAP_SOC_OMAP4_HDMI_MODULE)
+#include <sound/soc.h>
+#include <sound/pcm_params.h>
+#endif
 
 #include "dss.h"
 #include "hdmi.h"
+#include "dss_features.h"
 
 static struct {
 	struct mutex lock;
@@ -1052,25 +1058,26 @@
 	cfg->timings.hsync_pol = cea_vesa_timings[code].hsync_pol;
 }
 
-static void hdmi_compute_pll(unsigned long clkin, int phy,
-	int n, struct hdmi_pll_info *pi)
+static void hdmi_compute_pll(struct omap_dss_device *dssdev, int phy,
+		struct hdmi_pll_info *pi)
 {
-	unsigned long refclk;
+	unsigned long clkin, refclk;
 	u32 mf;
 
+	clkin = dss_clk_get_rate(DSS_CLK_SYSCK) / 10000;
 	/*
 	 * Input clock is predivided by N + 1
 	 * out put of which is reference clk
 	 */
-	refclk = clkin / (n + 1);
-	pi->regn = n;
+	pi->regn = dssdev->clocks.hdmi.regn;
+	refclk = clkin / (pi->regn + 1);
 
 	/*
 	 * multiplier is pixel_clk/ref_clk
 	 * Multiplying by 100 to avoid fractional part removal
 	 */
-	pi->regm = (phy * 100/(refclk))/100;
-	pi->regm2 = 1;
+	pi->regm = (phy * 100 / (refclk)) / 100;
+	pi->regm2 = dssdev->clocks.hdmi.regm2;
 
 	/*
 	 * fractional multiplier is remainder of the difference between
@@ -1078,14 +1085,14 @@
 	 * multiplied by 2^18(262144) divided by the reference clock
 	 */
 	mf = (phy - pi->regm * refclk) * 262144;
-	pi->regmf = mf/(refclk);
+	pi->regmf = mf / (refclk);
 
 	/*
 	 * Dcofreq should be set to 1 if required pixel clock
 	 * is greater than 1000MHz
 	 */
 	pi->dcofreq = phy > 1000 * 100;
-	pi->regsd = ((pi->regm * clkin / 10) / ((n + 1) * 250) + 5) / 10;
+	pi->regsd = ((pi->regm * clkin / 10) / ((pi->regn + 1) * 250) + 5) / 10;
 
 	DSSDBG("M = %d Mf = %d\n", pi->regm, pi->regmf);
 	DSSDBG("range = %d sd = %d\n", pi->dcofreq, pi->regsd);
@@ -1106,7 +1113,7 @@
 	int r, code = 0;
 	struct hdmi_pll_info pll_data;
 	struct omap_video_timings *p;
-	int clkin, n, phy;
+	unsigned long phy;
 
 	hdmi_enable_clocks(1);
 
@@ -1126,11 +1133,9 @@
 	dssdev->panel.timings = cea_vesa_timings[code].timings;
 	update_hdmi_timings(&hdmi.cfg, p, code);
 
-	clkin = 3840; /* 38.4 MHz */
-	n = 15; /* this is a constant for our math */
 	phy = p->pixel_clock;
 
-	hdmi_compute_pll(clkin, phy, n, &pll_data);
+	hdmi_compute_pll(dssdev, phy, &pll_data);
 
 	hdmi_wp_video_start(0);
 
@@ -1160,7 +1165,7 @@
 	 * dynamically by user. This can be moved to single location , say
 	 * Boardfile.
 	 */
-	dss_select_dispc_clk_source(DSS_CLK_SRC_FCK);
+	dss_select_dispc_clk_source(dssdev->clocks.dispc.dispc_fclk_src);
 
 	/* bypass TV gamma table */
 	dispc_enable_gamma_table(0);
@@ -1275,10 +1280,420 @@
 	mutex_unlock(&hdmi.lock);
 }
 
+#if defined(CONFIG_SND_OMAP_SOC_OMAP4_HDMI) || \
+	defined(CONFIG_SND_OMAP_SOC_OMAP4_HDMI_MODULE)
+static void hdmi_wp_audio_config_format(
+		struct hdmi_audio_format *aud_fmt)
+{
+	u32 r;
+
+	DSSDBG("Enter hdmi_wp_audio_config_format\n");
+
+	r = hdmi_read_reg(HDMI_WP_AUDIO_CFG);
+	r = FLD_MOD(r, aud_fmt->stereo_channels, 26, 24);
+	r = FLD_MOD(r, aud_fmt->active_chnnls_msk, 23, 16);
+	r = FLD_MOD(r, aud_fmt->en_sig_blk_strt_end, 5, 5);
+	r = FLD_MOD(r, aud_fmt->type, 4, 4);
+	r = FLD_MOD(r, aud_fmt->justification, 3, 3);
+	r = FLD_MOD(r, aud_fmt->sample_order, 2, 2);
+	r = FLD_MOD(r, aud_fmt->samples_per_word, 1, 1);
+	r = FLD_MOD(r, aud_fmt->sample_size, 0, 0);
+	hdmi_write_reg(HDMI_WP_AUDIO_CFG, r);
+}
+
+static void hdmi_wp_audio_config_dma(struct hdmi_audio_dma *aud_dma)
+{
+	u32 r;
+
+	DSSDBG("Enter hdmi_wp_audio_config_dma\n");
+
+	r = hdmi_read_reg(HDMI_WP_AUDIO_CFG2);
+	r = FLD_MOD(r, aud_dma->transfer_size, 15, 8);
+	r = FLD_MOD(r, aud_dma->block_size, 7, 0);
+	hdmi_write_reg(HDMI_WP_AUDIO_CFG2, r);
+
+	r = hdmi_read_reg(HDMI_WP_AUDIO_CTRL);
+	r = FLD_MOD(r, aud_dma->mode, 9, 9);
+	r = FLD_MOD(r, aud_dma->fifo_threshold, 8, 0);
+	hdmi_write_reg(HDMI_WP_AUDIO_CTRL, r);
+}
+
+static void hdmi_core_audio_config(struct hdmi_core_audio_config *cfg)
+{
+	u32 r;
+
+	/* audio clock recovery parameters */
+	r = hdmi_read_reg(HDMI_CORE_AV_ACR_CTRL);
+	r = FLD_MOD(r, cfg->use_mclk, 2, 2);
+	r = FLD_MOD(r, cfg->en_acr_pkt, 1, 1);
+	r = FLD_MOD(r, cfg->cts_mode, 0, 0);
+	hdmi_write_reg(HDMI_CORE_AV_ACR_CTRL, r);
+
+	REG_FLD_MOD(HDMI_CORE_AV_N_SVAL1, cfg->n, 7, 0);
+	REG_FLD_MOD(HDMI_CORE_AV_N_SVAL2, cfg->n >> 8, 7, 0);
+	REG_FLD_MOD(HDMI_CORE_AV_N_SVAL3, cfg->n >> 16, 7, 0);
+
+	if (cfg->cts_mode == HDMI_AUDIO_CTS_MODE_SW) {
+		REG_FLD_MOD(HDMI_CORE_AV_CTS_SVAL1, cfg->cts, 7, 0);
+		REG_FLD_MOD(HDMI_CORE_AV_CTS_SVAL2, cfg->cts >> 8, 7, 0);
+		REG_FLD_MOD(HDMI_CORE_AV_CTS_SVAL3, cfg->cts >> 16, 7, 0);
+	} else {
+		/*
+		 * HDMI IP uses this configuration to divide the MCLK to
+		 * update CTS value.
+		 */
+		REG_FLD_MOD(HDMI_CORE_AV_FREQ_SVAL, cfg->mclk_mode, 2, 0);
+
+		/* Configure clock for audio packets */
+		REG_FLD_MOD(HDMI_CORE_AV_AUD_PAR_BUSCLK_1,
+			cfg->aud_par_busclk, 7, 0);
+		REG_FLD_MOD(HDMI_CORE_AV_AUD_PAR_BUSCLK_2,
+			(cfg->aud_par_busclk >> 8), 7, 0);
+		REG_FLD_MOD(HDMI_CORE_AV_AUD_PAR_BUSCLK_3,
+			(cfg->aud_par_busclk >> 16), 7, 0);
+	}
+
+	/* Override of SPDIF sample frequency with value in I2S_CHST4 */
+	REG_FLD_MOD(HDMI_CORE_AV_SPDIF_CTRL, cfg->fs_override, 1, 1);
+
+	/* I2S parameters */
+	REG_FLD_MOD(HDMI_CORE_AV_I2S_CHST4, cfg->freq_sample, 3, 0);
+
+	r = hdmi_read_reg(HDMI_CORE_AV_I2S_IN_CTRL);
+	r = FLD_MOD(r, cfg->i2s_cfg.en_high_bitrate_aud, 7, 7);
+	r = FLD_MOD(r, cfg->i2s_cfg.sck_edge_mode, 6, 6);
+	r = FLD_MOD(r, cfg->i2s_cfg.cbit_order, 5, 5);
+	r = FLD_MOD(r, cfg->i2s_cfg.vbit, 4, 4);
+	r = FLD_MOD(r, cfg->i2s_cfg.ws_polarity, 3, 3);
+	r = FLD_MOD(r, cfg->i2s_cfg.justification, 2, 2);
+	r = FLD_MOD(r, cfg->i2s_cfg.direction, 1, 1);
+	r = FLD_MOD(r, cfg->i2s_cfg.shift, 0, 0);
+	hdmi_write_reg(HDMI_CORE_AV_I2S_IN_CTRL, r);
+
+	r = hdmi_read_reg(HDMI_CORE_AV_I2S_CHST5);
+	r = FLD_MOD(r, cfg->freq_sample, 7, 4);
+	r = FLD_MOD(r, cfg->i2s_cfg.word_length, 3, 1);
+	r = FLD_MOD(r, cfg->i2s_cfg.word_max_length, 0, 0);
+	hdmi_write_reg(HDMI_CORE_AV_I2S_CHST5, r);
+
+	REG_FLD_MOD(HDMI_CORE_AV_I2S_IN_LEN, cfg->i2s_cfg.in_length_bits, 3, 0);
+
+	/* Audio channels and mode parameters */
+	REG_FLD_MOD(HDMI_CORE_AV_HDMI_CTRL, cfg->layout, 2, 1);
+	r = hdmi_read_reg(HDMI_CORE_AV_AUD_MODE);
+	r = FLD_MOD(r, cfg->i2s_cfg.active_sds, 7, 4);
+	r = FLD_MOD(r, cfg->en_dsd_audio, 3, 3);
+	r = FLD_MOD(r, cfg->en_parallel_aud_input, 2, 2);
+	r = FLD_MOD(r, cfg->en_spdif, 1, 1);
+	hdmi_write_reg(HDMI_CORE_AV_AUD_MODE, r);
+}
+
+static void hdmi_core_audio_infoframe_config(
+		struct hdmi_core_infoframe_audio *info_aud)
+{
+	u8 val;
+	u8 sum = 0, checksum = 0;
+
+	/*
+	 * Set audio info frame type, version and length as
+	 * described in HDMI 1.4a Section 8.2.2 specification.
+	 * Checksum calculation is defined in Section 5.3.5.
+	 */
+	hdmi_write_reg(HDMI_CORE_AV_AUDIO_TYPE, 0x84);
+	hdmi_write_reg(HDMI_CORE_AV_AUDIO_VERS, 0x01);
+	hdmi_write_reg(HDMI_CORE_AV_AUDIO_LEN, 0x0a);
+	sum += 0x84 + 0x001 + 0x00a;
+
+	val = (info_aud->db1_coding_type << 4)
+			| (info_aud->db1_channel_count - 1);
+	hdmi_write_reg(HDMI_CORE_AV_AUD_DBYTE(0), val);
+	sum += val;
+
+	val = (info_aud->db2_sample_freq << 2) | info_aud->db2_sample_size;
+	hdmi_write_reg(HDMI_CORE_AV_AUD_DBYTE(1), val);
+	sum += val;
+
+	hdmi_write_reg(HDMI_CORE_AV_AUD_DBYTE(2), 0x00);
+
+	val = info_aud->db4_channel_alloc;
+	hdmi_write_reg(HDMI_CORE_AV_AUD_DBYTE(3), val);
+	sum += val;
+
+	val = (info_aud->db5_downmix_inh << 7) | (info_aud->db5_lsv << 3);
+	hdmi_write_reg(HDMI_CORE_AV_AUD_DBYTE(4), val);
+	sum += val;
+
+	hdmi_write_reg(HDMI_CORE_AV_AUD_DBYTE(5), 0x00);
+	hdmi_write_reg(HDMI_CORE_AV_AUD_DBYTE(6), 0x00);
+	hdmi_write_reg(HDMI_CORE_AV_AUD_DBYTE(7), 0x00);
+	hdmi_write_reg(HDMI_CORE_AV_AUD_DBYTE(8), 0x00);
+	hdmi_write_reg(HDMI_CORE_AV_AUD_DBYTE(9), 0x00);
+
+	checksum = 0x100 - sum;
+	hdmi_write_reg(HDMI_CORE_AV_AUDIO_CHSUM, checksum);
+
+	/*
+	 * TODO: Add MPEG and SPD enable and repeat cfg when EDID parsing
+	 * is available.
+	 */
+}
+
+static int hdmi_config_audio_acr(u32 sample_freq, u32 *n, u32 *cts)
+{
+	u32 r;
+	u32 deep_color = 0;
+	u32 pclk = hdmi.cfg.timings.timings.pixel_clock;
+
+	if (n == NULL || cts == NULL)
+		return -EINVAL;
+	/*
+	 * Obtain current deep color configuration. This needed
+	 * to calculate the TMDS clock based on the pixel clock.
+	 */
+	r = REG_GET(HDMI_WP_VIDEO_CFG, 1, 0);
+	switch (r) {
+	case 1: /* No deep color selected */
+		deep_color = 100;
+		break;
+	case 2: /* 10-bit deep color selected */
+		deep_color = 125;
+		break;
+	case 3: /* 12-bit deep color selected */
+		deep_color = 150;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	switch (sample_freq) {
+	case 32000:
+		if ((deep_color == 125) && ((pclk == 54054)
+				|| (pclk == 74250)))
+			*n = 8192;
+		else
+			*n = 4096;
+		break;
+	case 44100:
+		*n = 6272;
+		break;
+	case 48000:
+		if ((deep_color == 125) && ((pclk == 54054)
+				|| (pclk == 74250)))
+			*n = 8192;
+		else
+			*n = 6144;
+		break;
+	default:
+		*n = 0;
+		return -EINVAL;
+	}
+
+	/* Calculate CTS. See HDMI 1.3a or 1.4a specifications */
+	*cts = pclk * (*n / 128) * deep_color / (sample_freq / 10);
+
+	return 0;
+}
+
+static int hdmi_audio_hw_params(struct snd_pcm_substream *substream,
+				    struct snd_pcm_hw_params *params,
+				    struct snd_soc_dai *dai)
+{
+	struct hdmi_audio_format audio_format;
+	struct hdmi_audio_dma audio_dma;
+	struct hdmi_core_audio_config core_cfg;
+	struct hdmi_core_infoframe_audio aud_if_cfg;
+	int err, n, cts;
+	enum hdmi_core_audio_sample_freq sample_freq;
+
+	switch (params_format(params)) {
+	case SNDRV_PCM_FORMAT_S16_LE:
+		core_cfg.i2s_cfg.word_max_length =
+			HDMI_AUDIO_I2S_MAX_WORD_20BITS;
+		core_cfg.i2s_cfg.word_length = HDMI_AUDIO_I2S_CHST_WORD_16_BITS;
+		core_cfg.i2s_cfg.in_length_bits =
+			HDMI_AUDIO_I2S_INPUT_LENGTH_16;
+		core_cfg.i2s_cfg.justification = HDMI_AUDIO_JUSTIFY_LEFT;
+		audio_format.samples_per_word = HDMI_AUDIO_ONEWORD_TWOSAMPLES;
+		audio_format.sample_size = HDMI_AUDIO_SAMPLE_16BITS;
+		audio_format.justification = HDMI_AUDIO_JUSTIFY_LEFT;
+		audio_dma.transfer_size = 0x10;
+		break;
+	case SNDRV_PCM_FORMAT_S24_LE:
+		core_cfg.i2s_cfg.word_max_length =
+			HDMI_AUDIO_I2S_MAX_WORD_24BITS;
+		core_cfg.i2s_cfg.word_length = HDMI_AUDIO_I2S_CHST_WORD_24_BITS;
+		core_cfg.i2s_cfg.in_length_bits =
+			HDMI_AUDIO_I2S_INPUT_LENGTH_24;
+		audio_format.samples_per_word = HDMI_AUDIO_ONEWORD_ONESAMPLE;
+		audio_format.sample_size = HDMI_AUDIO_SAMPLE_24BITS;
+		audio_format.justification = HDMI_AUDIO_JUSTIFY_RIGHT;
+		core_cfg.i2s_cfg.justification = HDMI_AUDIO_JUSTIFY_RIGHT;
+		audio_dma.transfer_size = 0x20;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	switch (params_rate(params)) {
+	case 32000:
+		sample_freq = HDMI_AUDIO_FS_32000;
+		break;
+	case 44100:
+		sample_freq = HDMI_AUDIO_FS_44100;
+		break;
+	case 48000:
+		sample_freq = HDMI_AUDIO_FS_48000;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	err = hdmi_config_audio_acr(params_rate(params), &n, &cts);
+	if (err < 0)
+		return err;
+
+	/* Audio wrapper config */
+	audio_format.stereo_channels = HDMI_AUDIO_STEREO_ONECHANNEL;
+	audio_format.active_chnnls_msk = 0x03;
+	audio_format.type = HDMI_AUDIO_TYPE_LPCM;
+	audio_format.sample_order = HDMI_AUDIO_SAMPLE_LEFT_FIRST;
+	/* Disable start/stop signals of IEC 60958 blocks */
+	audio_format.en_sig_blk_strt_end = HDMI_AUDIO_BLOCK_SIG_STARTEND_OFF;
+
+	audio_dma.block_size = 0xC0;
+	audio_dma.mode = HDMI_AUDIO_TRANSF_DMA;
+	audio_dma.fifo_threshold = 0x20; /* in number of samples */
+
+	hdmi_wp_audio_config_dma(&audio_dma);
+	hdmi_wp_audio_config_format(&audio_format);
+
+	/*
+	 * I2S config
+	 */
+	core_cfg.i2s_cfg.en_high_bitrate_aud = false;
+	/* Only used with high bitrate audio */
+	core_cfg.i2s_cfg.cbit_order = false;
+	/* Serial data and word select should change on sck rising edge */
+	core_cfg.i2s_cfg.sck_edge_mode = HDMI_AUDIO_I2S_SCK_EDGE_RISING;
+	core_cfg.i2s_cfg.vbit = HDMI_AUDIO_I2S_VBIT_FOR_PCM;
+	/* Set I2S word select polarity */
+	core_cfg.i2s_cfg.ws_polarity = HDMI_AUDIO_I2S_WS_POLARITY_LOW_IS_LEFT;
+	core_cfg.i2s_cfg.direction = HDMI_AUDIO_I2S_MSB_SHIFTED_FIRST;
+	/* Set serial data to word select shift. See Phillips spec. */
+	core_cfg.i2s_cfg.shift = HDMI_AUDIO_I2S_FIRST_BIT_SHIFT;
+	/* Enable one of the four available serial data channels */
+	core_cfg.i2s_cfg.active_sds = HDMI_AUDIO_I2S_SD0_EN;
+
+	/* Core audio config */
+	core_cfg.freq_sample = sample_freq;
+	core_cfg.n = n;
+	core_cfg.cts = cts;
+	if (dss_has_feature(FEAT_HDMI_CTS_SWMODE)) {
+		core_cfg.aud_par_busclk = 0;
+		core_cfg.cts_mode = HDMI_AUDIO_CTS_MODE_SW;
+		core_cfg.use_mclk = false;
+	} else {
+		core_cfg.aud_par_busclk = (((128 * 31) - 1) << 8);
+		core_cfg.cts_mode = HDMI_AUDIO_CTS_MODE_HW;
+		core_cfg.use_mclk = true;
+		core_cfg.mclk_mode = HDMI_AUDIO_MCLK_128FS;
+	}
+	core_cfg.layout = HDMI_AUDIO_LAYOUT_2CH;
+	core_cfg.en_spdif = false;
+	/* Use sample frequency from channel status word */
+	core_cfg.fs_override = true;
+	/* Enable ACR packets */
+	core_cfg.en_acr_pkt = true;
+	/* Disable direct streaming digital audio */
+	core_cfg.en_dsd_audio = false;
+	/* Use parallel audio interface */
+	core_cfg.en_parallel_aud_input = true;
+
+	hdmi_core_audio_config(&core_cfg);
+
+	/*
+	 * Configure packet
+	 * info frame audio see doc CEA861-D page 74
+	 */
+	aud_if_cfg.db1_coding_type = HDMI_INFOFRAME_AUDIO_DB1CT_FROM_STREAM;
+	aud_if_cfg.db1_channel_count = 2;
+	aud_if_cfg.db2_sample_freq = HDMI_INFOFRAME_AUDIO_DB2SF_FROM_STREAM;
+	aud_if_cfg.db2_sample_size = HDMI_INFOFRAME_AUDIO_DB2SS_FROM_STREAM;
+	aud_if_cfg.db4_channel_alloc = 0x00;
+	aud_if_cfg.db5_downmix_inh = false;
+	aud_if_cfg.db5_lsv = 0;
+
+	hdmi_core_audio_infoframe_config(&aud_if_cfg);
+	return 0;
+}
+
+static int hdmi_audio_trigger(struct snd_pcm_substream *substream, int cmd,
+				  struct snd_soc_dai *dai)
+{
+	int err = 0;
+	switch (cmd) {
+	case SNDRV_PCM_TRIGGER_START:
+	case SNDRV_PCM_TRIGGER_RESUME:
+	case SNDRV_PCM_TRIGGER_PAUSE_RELEASE:
+		REG_FLD_MOD(HDMI_CORE_AV_AUD_MODE, 1, 0, 0);
+		REG_FLD_MOD(HDMI_WP_AUDIO_CTRL, 1, 31, 31);
+		REG_FLD_MOD(HDMI_WP_AUDIO_CTRL, 1, 30, 30);
+		break;
+
+	case SNDRV_PCM_TRIGGER_STOP:
+	case SNDRV_PCM_TRIGGER_SUSPEND:
+	case SNDRV_PCM_TRIGGER_PAUSE_PUSH:
+		REG_FLD_MOD(HDMI_CORE_AV_AUD_MODE, 0, 0, 0);
+		REG_FLD_MOD(HDMI_WP_AUDIO_CTRL, 0, 30, 30);
+		REG_FLD_MOD(HDMI_WP_AUDIO_CTRL, 0, 31, 31);
+		break;
+	default:
+		err = -EINVAL;
+	}
+	return err;
+}
+
+static int hdmi_audio_startup(struct snd_pcm_substream *substream,
+				  struct snd_soc_dai *dai)
+{
+	if (!hdmi.mode) {
+		pr_err("Current video settings do not support audio.\n");
+		return -EIO;
+	}
+	return 0;
+}
+
+static struct snd_soc_codec_driver hdmi_audio_codec_drv = {
+};
+
+static struct snd_soc_dai_ops hdmi_audio_codec_ops = {
+	.hw_params = hdmi_audio_hw_params,
+	.trigger = hdmi_audio_trigger,
+	.startup = hdmi_audio_startup,
+};
+
+static struct snd_soc_dai_driver hdmi_codec_dai_drv = {
+		.name = "hdmi-audio-codec",
+		.playback = {
+			.channels_min = 2,
+			.channels_max = 2,
+			.rates = SNDRV_PCM_RATE_32000 |
+				SNDRV_PCM_RATE_44100 | SNDRV_PCM_RATE_48000,
+			.formats = SNDRV_PCM_FMTBIT_S16_LE |
+				SNDRV_PCM_FMTBIT_S24_LE,
+		},
+		.ops = &hdmi_audio_codec_ops,
+};
+#endif
+
 /* HDMI HW IP initialisation */
 static int omapdss_hdmihw_probe(struct platform_device *pdev)
 {
 	struct resource *hdmi_mem;
+#if defined(CONFIG_SND_OMAP_SOC_OMAP4_HDMI) || \
+	defined(CONFIG_SND_OMAP_SOC_OMAP4_HDMI_MODULE)
+	int ret;
+#endif
 
 	hdmi.pdata = pdev->dev.platform_data;
 	hdmi.pdev = pdev;
@@ -1300,6 +1715,17 @@
 
 	hdmi_panel_init();
 
+#if defined(CONFIG_SND_OMAP_SOC_OMAP4_HDMI) || \
+	defined(CONFIG_SND_OMAP_SOC_OMAP4_HDMI_MODULE)
+
+	/* Register ASoC codec DAI */
+	ret = snd_soc_register_codec(&pdev->dev, &hdmi_audio_codec_drv,
+					&hdmi_codec_dai_drv, 1);
+	if (ret) {
+		DSSERR("can't register ASoC HDMI audio codec\n");
+		return ret;
+	}
+#endif
 	return 0;
 }
 
@@ -1307,6 +1733,11 @@
 {
 	hdmi_panel_exit();
 
+#if defined(CONFIG_SND_OMAP_SOC_OMAP4_HDMI) || \
+	defined(CONFIG_SND_OMAP_SOC_OMAP4_HDMI_MODULE)
+	snd_soc_unregister_codec(&pdev->dev);
+#endif
+
 	iounmap(hdmi.base_wp);
 
 	return 0;
diff --git a/drivers/video/omap2/dss/hdmi.h b/drivers/video/omap2/dss/hdmi.h
index 9887ab9..c885f9c 100644
--- a/drivers/video/omap2/dss/hdmi.h
+++ b/drivers/video/omap2/dss/hdmi.h
@@ -22,7 +22,7 @@
 #define _OMAP4_DSS_HDMI_H_
 
 #include <linux/string.h>
-#include <plat/display.h>
+#include <video/omapdss.h>
 
 #define HDMI_WP		0x0
 #define HDMI_CORE_SYS		0x400
@@ -48,6 +48,10 @@
 #define HDMI_WP_VIDEO_TIMING_H			HDMI_WP_REG(0x68)
 #define HDMI_WP_VIDEO_TIMING_V			HDMI_WP_REG(0x6C)
 #define HDMI_WP_WP_CLK				HDMI_WP_REG(0x70)
+#define HDMI_WP_AUDIO_CFG			HDMI_WP_REG(0x80)
+#define HDMI_WP_AUDIO_CFG2			HDMI_WP_REG(0x84)
+#define HDMI_WP_AUDIO_CTRL			HDMI_WP_REG(0x88)
+#define HDMI_WP_AUDIO_DATA			HDMI_WP_REG(0x8C)
 
 /* HDMI IP Core System */
 #define HDMI_CORE_SYS_REG(idx)			HDMI_REG(HDMI_CORE_SYS + idx)
@@ -105,6 +109,8 @@
 #define HDMI_CORE_AV_AVI_DBYTE_NELEMS		HDMI_CORE_AV_REG(15)
 #define HDMI_CORE_AV_SPD_DBYTE			HDMI_CORE_AV_REG(0x190)
 #define HDMI_CORE_AV_SPD_DBYTE_NELEMS		HDMI_CORE_AV_REG(27)
+#define HDMI_CORE_AV_AUD_DBYTE(n)		HDMI_CORE_AV_REG(n * 4 + 0x210)
+#define HDMI_CORE_AV_AUD_DBYTE_NELEMS		HDMI_CORE_AV_REG(10)
 #define HDMI_CORE_AV_MPEG_DBYTE		HDMI_CORE_AV_REG(0x290)
 #define HDMI_CORE_AV_MPEG_DBYTE_NELEMS		HDMI_CORE_AV_REG(27)
 #define HDMI_CORE_AV_GEN_DBYTE			HDMI_CORE_AV_REG(0x300)
@@ -153,6 +159,10 @@
 #define HDMI_CORE_AV_SPD_VERS			HDMI_CORE_AV_REG(0x184)
 #define HDMI_CORE_AV_SPD_LEN			HDMI_CORE_AV_REG(0x188)
 #define HDMI_CORE_AV_SPD_CHSUM			HDMI_CORE_AV_REG(0x18C)
+#define HDMI_CORE_AV_AUDIO_TYPE		HDMI_CORE_AV_REG(0x200)
+#define HDMI_CORE_AV_AUDIO_VERS		HDMI_CORE_AV_REG(0x204)
+#define HDMI_CORE_AV_AUDIO_LEN			HDMI_CORE_AV_REG(0x208)
+#define HDMI_CORE_AV_AUDIO_CHSUM		HDMI_CORE_AV_REG(0x20C)
 #define HDMI_CORE_AV_MPEG_TYPE			HDMI_CORE_AV_REG(0x280)
 #define HDMI_CORE_AV_MPEG_VERS			HDMI_CORE_AV_REG(0x284)
 #define HDMI_CORE_AV_MPEG_LEN			HDMI_CORE_AV_REG(0x288)
@@ -272,7 +282,7 @@
 	HDMI_PACKETREPEATOFF = 0
 };
 
-/* INFOFRAME_AVI_ definitions */
+/* INFOFRAME_AVI_ and INFOFRAME_AUDIO_ definitions */
 enum hdmi_core_infoframe {
 	HDMI_INFOFRAME_AVI_DB1Y_RGB = 0,
 	HDMI_INFOFRAME_AVI_DB1Y_YUV422 = 1,
@@ -317,7 +327,36 @@
 	HDMI_INFOFRAME_AVI_DB5PR_7 = 6,
 	HDMI_INFOFRAME_AVI_DB5PR_8 = 7,
 	HDMI_INFOFRAME_AVI_DB5PR_9 = 8,
-	HDMI_INFOFRAME_AVI_DB5PR_10 = 9
+	HDMI_INFOFRAME_AVI_DB5PR_10 = 9,
+	HDMI_INFOFRAME_AUDIO_DB1CT_FROM_STREAM = 0,
+	HDMI_INFOFRAME_AUDIO_DB1CT_IEC60958 = 1,
+	HDMI_INFOFRAME_AUDIO_DB1CT_AC3 = 2,
+	HDMI_INFOFRAME_AUDIO_DB1CT_MPEG1 = 3,
+	HDMI_INFOFRAME_AUDIO_DB1CT_MP3 = 4,
+	HDMI_INFOFRAME_AUDIO_DB1CT_MPEG2_MULTICH = 5,
+	HDMI_INFOFRAME_AUDIO_DB1CT_AAC = 6,
+	HDMI_INFOFRAME_AUDIO_DB1CT_DTS = 7,
+	HDMI_INFOFRAME_AUDIO_DB1CT_ATRAC = 8,
+	HDMI_INFOFRAME_AUDIO_DB1CT_ONEBIT = 9,
+	HDMI_INFOFRAME_AUDIO_DB1CT_DOLBY_DIGITAL_PLUS = 10,
+	HDMI_INFOFRAME_AUDIO_DB1CT_DTS_HD = 11,
+	HDMI_INFOFRAME_AUDIO_DB1CT_MAT = 12,
+	HDMI_INFOFRAME_AUDIO_DB1CT_DST = 13,
+	HDMI_INFOFRAME_AUDIO_DB1CT_WMA_PRO = 14,
+	HDMI_INFOFRAME_AUDIO_DB2SF_FROM_STREAM = 0,
+	HDMI_INFOFRAME_AUDIO_DB2SF_32000 = 1,
+	HDMI_INFOFRAME_AUDIO_DB2SF_44100 = 2,
+	HDMI_INFOFRAME_AUDIO_DB2SF_48000 = 3,
+	HDMI_INFOFRAME_AUDIO_DB2SF_88200 = 4,
+	HDMI_INFOFRAME_AUDIO_DB2SF_96000 = 5,
+	HDMI_INFOFRAME_AUDIO_DB2SF_176400 = 6,
+	HDMI_INFOFRAME_AUDIO_DB2SF_192000 = 7,
+	HDMI_INFOFRAME_AUDIO_DB2SS_FROM_STREAM = 0,
+	HDMI_INFOFRAME_AUDIO_DB2SS_16BIT = 1,
+	HDMI_INFOFRAME_AUDIO_DB2SS_20BIT = 2,
+	HDMI_INFOFRAME_AUDIO_DB2SS_24BIT = 3,
+	HDMI_INFOFRAME_AUDIO_DB5_DM_INH_PERMITTED = 0,
+	HDMI_INFOFRAME_AUDIO_DB5_DM_INH_PROHIBITED = 1
 };
 
 enum hdmi_packing_mode {
@@ -327,6 +366,121 @@
 	HDMI_PACK_ALREADYPACKED = 7
 };
 
+enum hdmi_core_audio_sample_freq {
+	HDMI_AUDIO_FS_32000 = 0x3,
+	HDMI_AUDIO_FS_44100 = 0x0,
+	HDMI_AUDIO_FS_48000 = 0x2,
+	HDMI_AUDIO_FS_88200 = 0x8,
+	HDMI_AUDIO_FS_96000 = 0xA,
+	HDMI_AUDIO_FS_176400 = 0xC,
+	HDMI_AUDIO_FS_192000 = 0xE,
+	HDMI_AUDIO_FS_NOT_INDICATED = 0x1
+};
+
+enum hdmi_core_audio_layout {
+	HDMI_AUDIO_LAYOUT_2CH = 0,
+	HDMI_AUDIO_LAYOUT_8CH = 1
+};
+
+enum hdmi_core_cts_mode {
+	HDMI_AUDIO_CTS_MODE_HW = 0,
+	HDMI_AUDIO_CTS_MODE_SW = 1
+};
+
+enum hdmi_stereo_channels {
+	HDMI_AUDIO_STEREO_NOCHANNELS = 0,
+	HDMI_AUDIO_STEREO_ONECHANNEL = 1,
+	HDMI_AUDIO_STEREO_TWOCHANNELS = 2,
+	HDMI_AUDIO_STEREO_THREECHANNELS = 3,
+	HDMI_AUDIO_STEREO_FOURCHANNELS = 4
+};
+
+enum hdmi_audio_type {
+	HDMI_AUDIO_TYPE_LPCM = 0,
+	HDMI_AUDIO_TYPE_IEC = 1
+};
+
+enum hdmi_audio_justify {
+	HDMI_AUDIO_JUSTIFY_LEFT = 0,
+	HDMI_AUDIO_JUSTIFY_RIGHT = 1
+};
+
+enum hdmi_audio_sample_order {
+	HDMI_AUDIO_SAMPLE_RIGHT_FIRST = 0,
+	HDMI_AUDIO_SAMPLE_LEFT_FIRST = 1
+};
+
+enum hdmi_audio_samples_perword {
+	HDMI_AUDIO_ONEWORD_ONESAMPLE = 0,
+	HDMI_AUDIO_ONEWORD_TWOSAMPLES = 1
+};
+
+enum hdmi_audio_sample_size {
+	HDMI_AUDIO_SAMPLE_16BITS = 0,
+	HDMI_AUDIO_SAMPLE_24BITS = 1
+};
+
+enum hdmi_audio_transf_mode {
+	HDMI_AUDIO_TRANSF_DMA = 0,
+	HDMI_AUDIO_TRANSF_IRQ = 1
+};
+
+enum hdmi_audio_blk_strt_end_sig {
+	HDMI_AUDIO_BLOCK_SIG_STARTEND_ON = 0,
+	HDMI_AUDIO_BLOCK_SIG_STARTEND_OFF = 1
+};
+
+enum hdmi_audio_i2s_config {
+	HDMI_AUDIO_I2S_WS_POLARITY_LOW_IS_LEFT = 0,
+	HDMI_AUDIO_I2S_WS_POLARIT_YLOW_IS_RIGHT = 1,
+	HDMI_AUDIO_I2S_MSB_SHIFTED_FIRST = 0,
+	HDMI_AUDIO_I2S_LSB_SHIFTED_FIRST = 1,
+	HDMI_AUDIO_I2S_MAX_WORD_20BITS = 0,
+	HDMI_AUDIO_I2S_MAX_WORD_24BITS = 1,
+	HDMI_AUDIO_I2S_CHST_WORD_NOT_SPECIFIED = 0,
+	HDMI_AUDIO_I2S_CHST_WORD_16_BITS = 1,
+	HDMI_AUDIO_I2S_CHST_WORD_17_BITS = 6,
+	HDMI_AUDIO_I2S_CHST_WORD_18_BITS = 2,
+	HDMI_AUDIO_I2S_CHST_WORD_19_BITS = 4,
+	HDMI_AUDIO_I2S_CHST_WORD_20_BITS_20MAX = 5,
+	HDMI_AUDIO_I2S_CHST_WORD_20_BITS_24MAX = 1,
+	HDMI_AUDIO_I2S_CHST_WORD_21_BITS = 6,
+	HDMI_AUDIO_I2S_CHST_WORD_22_BITS = 2,
+	HDMI_AUDIO_I2S_CHST_WORD_23_BITS = 4,
+	HDMI_AUDIO_I2S_CHST_WORD_24_BITS = 5,
+	HDMI_AUDIO_I2S_SCK_EDGE_FALLING = 0,
+	HDMI_AUDIO_I2S_SCK_EDGE_RISING = 1,
+	HDMI_AUDIO_I2S_VBIT_FOR_PCM = 0,
+	HDMI_AUDIO_I2S_VBIT_FOR_COMPRESSED = 1,
+	HDMI_AUDIO_I2S_INPUT_LENGTH_NA = 0,
+	HDMI_AUDIO_I2S_INPUT_LENGTH_16 = 2,
+	HDMI_AUDIO_I2S_INPUT_LENGTH_17 = 12,
+	HDMI_AUDIO_I2S_INPUT_LENGTH_18 = 4,
+	HDMI_AUDIO_I2S_INPUT_LENGTH_19 = 8,
+	HDMI_AUDIO_I2S_INPUT_LENGTH_20 = 10,
+	HDMI_AUDIO_I2S_INPUT_LENGTH_21 = 13,
+	HDMI_AUDIO_I2S_INPUT_LENGTH_22 = 5,
+	HDMI_AUDIO_I2S_INPUT_LENGTH_23 = 9,
+	HDMI_AUDIO_I2S_INPUT_LENGTH_24 = 11,
+	HDMI_AUDIO_I2S_FIRST_BIT_SHIFT = 0,
+	HDMI_AUDIO_I2S_FIRST_BIT_NO_SHIFT = 1,
+	HDMI_AUDIO_I2S_SD0_EN = 1,
+	HDMI_AUDIO_I2S_SD1_EN = 1 << 1,
+	HDMI_AUDIO_I2S_SD2_EN = 1 << 2,
+	HDMI_AUDIO_I2S_SD3_EN = 1 << 3,
+};
+
+enum hdmi_audio_mclk_mode {
+	HDMI_AUDIO_MCLK_128FS = 0,
+	HDMI_AUDIO_MCLK_256FS = 1,
+	HDMI_AUDIO_MCLK_384FS = 2,
+	HDMI_AUDIO_MCLK_512FS = 3,
+	HDMI_AUDIO_MCLK_768FS = 4,
+	HDMI_AUDIO_MCLK_1024FS = 5,
+	HDMI_AUDIO_MCLK_1152FS = 6,
+	HDMI_AUDIO_MCLK_192FS = 7
+};
+
 struct hdmi_core_video_config {
 	enum hdmi_core_inputbus_width	ip_bus_width;
 	enum hdmi_core_dither_trunc	op_dither_truc;
@@ -376,6 +530,19 @@
 	u16	db12_13_pixel_sofright;
 		/* Pixel number start of right bar */
 };
+/*
+ * Refer to section 8.2 in HDMI 1.3 specification for
+ * details about infoframe databytes
+ */
+struct hdmi_core_infoframe_audio {
+	u8 db1_coding_type;
+	u8 db1_channel_count;
+	u8 db2_sample_freq;
+	u8 db2_sample_size;
+	u8 db4_channel_alloc;
+	bool db5_downmix_inh;
+	u8 db5_lsv;	/* Level shift values for downmix */
+};
 
 struct hdmi_core_packet_enable_repeat {
 	u32	audio_pkt;
@@ -412,4 +579,53 @@
 	struct hdmi_cm cm;
 };
 
+struct hdmi_audio_format {
+	enum hdmi_stereo_channels		stereo_channels;
+	u8					active_chnnls_msk;
+	enum hdmi_audio_type			type;
+	enum hdmi_audio_justify			justification;
+	enum hdmi_audio_sample_order		sample_order;
+	enum hdmi_audio_samples_perword		samples_per_word;
+	enum hdmi_audio_sample_size		sample_size;
+	enum hdmi_audio_blk_strt_end_sig	en_sig_blk_strt_end;
+};
+
+struct hdmi_audio_dma {
+	u8				transfer_size;
+	u8				block_size;
+	enum hdmi_audio_transf_mode	mode;
+	u16				fifo_threshold;
+};
+
+struct hdmi_core_audio_i2s_config {
+	u8 word_max_length;
+	u8 word_length;
+	u8 in_length_bits;
+	u8 justification;
+	u8 en_high_bitrate_aud;
+	u8 sck_edge_mode;
+	u8 cbit_order;
+	u8 vbit;
+	u8 ws_polarity;
+	u8 direction;
+	u8 shift;
+	u8 active_sds;
+};
+
+struct hdmi_core_audio_config {
+	struct hdmi_core_audio_i2s_config	i2s_cfg;
+	enum hdmi_core_audio_sample_freq	freq_sample;
+	bool					fs_override;
+	u32					n;
+	u32					cts;
+	u32					aud_par_busclk;
+	enum hdmi_core_audio_layout		layout;
+	enum hdmi_core_cts_mode			cts_mode;
+	bool					use_mclk;
+	enum hdmi_audio_mclk_mode		mclk_mode;
+	bool					en_acr_pkt;
+	bool					en_dsd_audio;
+	bool					en_parallel_aud_input;
+	bool					en_spdif;
+};
 #endif
diff --git a/drivers/video/omap2/dss/hdmi_omap4_panel.c b/drivers/video/omap2/dss/hdmi_omap4_panel.c
index ffb5de9..7d4f2bd 100644
--- a/drivers/video/omap2/dss/hdmi_omap4_panel.c
+++ b/drivers/video/omap2/dss/hdmi_omap4_panel.c
@@ -24,7 +24,7 @@
 #include <linux/io.h>
 #include <linux/mutex.h>
 #include <linux/module.h>
-#include <plat/display.h>
+#include <video/omapdss.h>
 
 #include "dss.h"
 
diff --git a/drivers/video/omap2/dss/manager.c b/drivers/video/omap2/dss/manager.c
index bcd37ec..9aeea50 100644
--- a/drivers/video/omap2/dss/manager.c
+++ b/drivers/video/omap2/dss/manager.c
@@ -29,7 +29,7 @@
 #include <linux/spinlock.h>
 #include <linux/jiffies.h>
 
-#include <plat/display.h>
+#include <video/omapdss.h>
 #include <plat/cpu.h>
 
 #include "dss.h"
@@ -393,6 +393,7 @@
 
 	u32 paddr;
 	void __iomem *vaddr;
+	u32 p_uv_addr; /* relevant for NV12 format only */
 	u16 screen_width;
 	u16 width;
 	u16 height;
@@ -775,10 +776,17 @@
 		}
 
 		switch (c->color_mode) {
+		case OMAP_DSS_COLOR_NV12:
+			bpp = 8;
+			break;
 		case OMAP_DSS_COLOR_RGB16:
 		case OMAP_DSS_COLOR_ARGB16:
 		case OMAP_DSS_COLOR_YUV2:
 		case OMAP_DSS_COLOR_UYVY:
+		case OMAP_DSS_COLOR_RGBA16:
+		case OMAP_DSS_COLOR_RGBX16:
+		case OMAP_DSS_COLOR_ARGB16_1555:
+		case OMAP_DSS_COLOR_XRGB16_1555:
 			bpp = 16;
 			break;
 
@@ -854,7 +862,8 @@
 			c->mirror,
 			c->global_alpha,
 			c->pre_mult_alpha,
-			c->channel);
+			c->channel,
+			c->p_uv_addr);
 
 	if (r) {
 		/* this shouldn't happen */
@@ -1269,6 +1278,7 @@
 
 		oc->paddr = ovl->info.paddr;
 		oc->vaddr = ovl->info.vaddr;
+		oc->p_uv_addr = ovl->info.p_uv_addr;
 		oc->screen_width = ovl->info.screen_width;
 		oc->width = ovl->info.width;
 		oc->height = ovl->info.height;
diff --git a/drivers/video/omap2/dss/overlay.c b/drivers/video/omap2/dss/overlay.c
index f1aca6d..0f08025b 100644
--- a/drivers/video/omap2/dss/overlay.c
+++ b/drivers/video/omap2/dss/overlay.c
@@ -31,7 +31,7 @@
 #include <linux/delay.h>
 #include <linux/slab.h>
 
-#include <plat/display.h>
+#include <video/omapdss.h>
 #include <plat/cpu.h>
 
 #include "dss.h"
@@ -201,12 +201,16 @@
 static ssize_t overlay_enabled_store(struct omap_overlay *ovl, const char *buf,
 		size_t size)
 {
-	int r;
+	int r, enable;
 	struct omap_overlay_info info;
 
 	ovl->get_overlay_info(ovl, &info);
 
-	info.enabled = simple_strtoul(buf, NULL, 10);
+	r = kstrtoint(buf, 0, &enable);
+	if (r)
+		return r;
+
+	info.enabled = !!enable;
 
 	r = ovl->set_overlay_info(ovl, &info);
 	if (r)
@@ -231,8 +235,13 @@
 		const char *buf, size_t size)
 {
 	int r;
+	u8 alpha;
 	struct omap_overlay_info info;
 
+	r = kstrtou8(buf, 0, &alpha);
+	if (r)
+		return r;
+
 	ovl->get_overlay_info(ovl, &info);
 
 	/* Video1 plane does not support global alpha
@@ -242,7 +251,7 @@
 			ovl->id == OMAP_DSS_VIDEO1)
 		info.global_alpha = 255;
 	else
-		info.global_alpha = simple_strtoul(buf, NULL, 10);
+		info.global_alpha = alpha;
 
 	r = ovl->set_overlay_info(ovl, &info);
 	if (r)
@@ -268,8 +277,13 @@
 		const char *buf, size_t size)
 {
 	int r;
+	u8 alpha;
 	struct omap_overlay_info info;
 
+	r = kstrtou8(buf, 0, &alpha);
+	if (r)
+		return r;
+
 	ovl->get_overlay_info(ovl, &info);
 
 	/* only GFX and Video2 plane support pre alpha multiplied
@@ -279,7 +293,7 @@
 		ovl->id == OMAP_DSS_VIDEO1)
 		info.pre_mult_alpha = 0;
 	else
-		info.pre_mult_alpha = simple_strtoul(buf, NULL, 10);
+		info.pre_mult_alpha = alpha;
 
 	r = ovl->set_overlay_info(ovl, &info);
 	if (r)
@@ -491,13 +505,18 @@
 	ovl->manager = mgr;
 
 	dss_clk_enable(DSS_CLK_ICK | DSS_CLK_FCK);
-	/* XXX: on manual update display, in auto update mode, a bug happens
-	 * here. When an overlay is first enabled on LCD, then it's disabled,
-	 * and the manager is changed to TV, we sometimes get SYNC_LOST_DIGIT
-	 * errors. Waiting before changing the channel_out fixes it. I'm
-	 * guessing that the overlay is still somehow being used for the LCD,
-	 * but I don't understand how or why. */
-	msleep(40);
+	/* XXX: When there is an overlay on a DSI manual update display, and
+	 * the overlay is first disabled, then moved to tv, and enabled, we
+	 * seem to get SYNC_LOST_DIGIT error.
+	 *
+	 * Waiting doesn't seem to help, but updating the manual update display
+	 * after disabling the overlay seems to fix this. This hints that the
+	 * overlay is perhaps somehow tied to the LCD output until the output
+	 * is updated.
+	 *
+	 * Userspace workaround for this is to update the LCD after disabling
+	 * the overlay, but before moving the overlay to TV.
+	 */
 	dispc_set_channel_out(ovl->id, mgr->id);
 	dss_clk_disable(DSS_CLK_ICK | DSS_CLK_FCK);
 
diff --git a/drivers/video/omap2/dss/rfbi.c b/drivers/video/omap2/dss/rfbi.c
index 5ea17f4..c06fbe0 100644
--- a/drivers/video/omap2/dss/rfbi.c
+++ b/drivers/video/omap2/dss/rfbi.c
@@ -32,8 +32,9 @@
 #include <linux/ktime.h>
 #include <linux/hrtimer.h>
 #include <linux/seq_file.h>
+#include <linux/semaphore.h>
 
-#include <plat/display.h>
+#include <video/omapdss.h>
 #include "dss.h"
 
 struct rfbi_reg { u16 idx; };
@@ -65,9 +66,6 @@
 #define REG_FLD_MOD(idx, val, start, end) \
 	rfbi_write_reg(idx, FLD_MOD(rfbi_read_reg(idx), val, start, end))
 
-/* To work around an RFBI transfer rate limitation */
-#define OMAP_RFBI_RATE_LIMIT    1
-
 enum omap_rfbi_cycleformat {
 	OMAP_DSS_RFBI_CYCLEFORMAT_1_1 = 0,
 	OMAP_DSS_RFBI_CYCLEFORMAT_2_1 = 1,
@@ -89,11 +87,6 @@
 	OMAP_DSS_RFBI_PARALLELMODE_16 = 3,
 };
 
-enum update_cmd {
-	RFBI_CMD_UPDATE = 0,
-	RFBI_CMD_SYNC   = 1,
-};
-
 static int rfbi_convert_timings(struct rfbi_timings *t);
 static void rfbi_get_clk_info(u32 *clk_period, u32 *max_clk_div);
 
@@ -114,20 +107,9 @@
 
 	struct omap_dss_device *dssdev[2];
 
-	struct kfifo      cmd_fifo;
-	spinlock_t        cmd_lock;
-	struct completion cmd_done;
-	atomic_t          cmd_fifo_full;
-	atomic_t          cmd_pending;
+	struct semaphore bus_lock;
 } rfbi;
 
-struct update_region {
-	u16	x;
-	u16     y;
-	u16     w;
-	u16     h;
-};
-
 static inline void rfbi_write_reg(const struct rfbi_reg idx, u32 val)
 {
 	__raw_writel(val, rfbi.base + idx.idx);
@@ -146,9 +128,20 @@
 		dss_clk_disable(DSS_CLK_ICK | DSS_CLK_FCK);
 }
 
+void rfbi_bus_lock(void)
+{
+	down(&rfbi.bus_lock);
+}
+EXPORT_SYMBOL(rfbi_bus_lock);
+
+void rfbi_bus_unlock(void)
+{
+	up(&rfbi.bus_lock);
+}
+EXPORT_SYMBOL(rfbi_bus_unlock);
+
 void omap_rfbi_write_command(const void *buf, u32 len)
 {
-	rfbi_enable_clocks(1);
 	switch (rfbi.parallelmode) {
 	case OMAP_DSS_RFBI_PARALLELMODE_8:
 	{
@@ -172,13 +165,11 @@
 	default:
 		BUG();
 	}
-	rfbi_enable_clocks(0);
 }
 EXPORT_SYMBOL(omap_rfbi_write_command);
 
 void omap_rfbi_read_data(void *buf, u32 len)
 {
-	rfbi_enable_clocks(1);
 	switch (rfbi.parallelmode) {
 	case OMAP_DSS_RFBI_PARALLELMODE_8:
 	{
@@ -206,13 +197,11 @@
 	default:
 		BUG();
 	}
-	rfbi_enable_clocks(0);
 }
 EXPORT_SYMBOL(omap_rfbi_read_data);
 
 void omap_rfbi_write_data(const void *buf, u32 len)
 {
-	rfbi_enable_clocks(1);
 	switch (rfbi.parallelmode) {
 	case OMAP_DSS_RFBI_PARALLELMODE_8:
 	{
@@ -237,7 +226,6 @@
 		BUG();
 
 	}
-	rfbi_enable_clocks(0);
 }
 EXPORT_SYMBOL(omap_rfbi_write_data);
 
@@ -249,8 +237,6 @@
 	int horiz_offset = scr_width - w;
 	int i;
 
-	rfbi_enable_clocks(1);
-
 	if (rfbi.datatype == OMAP_DSS_RFBI_DATATYPE_16 &&
 	   rfbi.parallelmode == OMAP_DSS_RFBI_PARALLELMODE_8) {
 		const u16 __iomem *pd = buf;
@@ -295,12 +281,10 @@
 	} else {
 		BUG();
 	}
-
-	rfbi_enable_clocks(0);
 }
 EXPORT_SYMBOL(omap_rfbi_write_pixels);
 
-void rfbi_transfer_area(struct omap_dss_device *dssdev, u16 width,
+static void rfbi_transfer_area(struct omap_dss_device *dssdev, u16 width,
 		u16 height, void (*callback)(void *data), void *data)
 {
 	u32 l;
@@ -317,8 +301,6 @@
 	rfbi.framedone_callback = callback;
 	rfbi.framedone_callback_data = data;
 
-	rfbi_enable_clocks(1);
-
 	rfbi_write_reg(RFBI_PIXEL_CNT, width * height);
 
 	l = rfbi_read_reg(RFBI_CONTROL);
@@ -337,15 +319,11 @@
 
 	REG_FLD_MOD(RFBI_CONTROL, 0, 0, 0);
 
-	rfbi_enable_clocks(0);
-
 	callback = rfbi.framedone_callback;
 	rfbi.framedone_callback = NULL;
 
 	if (callback != NULL)
 		callback(rfbi.framedone_callback_data);
-
-	atomic_set(&rfbi.cmd_pending, 0);
 }
 
 #if 1 /* VERBOSE */
@@ -435,7 +413,7 @@
 }
 
 
-void rfbi_set_timings(int rfbi_module, struct rfbi_timings *t)
+static void rfbi_set_timings(int rfbi_module, struct rfbi_timings *t)
 {
 	int r;
 
@@ -447,7 +425,6 @@
 
 	BUG_ON(!t->converted);
 
-	rfbi_enable_clocks(1);
 	rfbi_write_reg(RFBI_ONOFF_TIME(rfbi_module), t->tim[0]);
 	rfbi_write_reg(RFBI_CYCLE_TIME(rfbi_module), t->tim[1]);
 
@@ -456,7 +433,6 @@
 		    (t->tim[2] ? 1 : 0), 4, 4);
 
 	rfbi_print_timings();
-	rfbi_enable_clocks(0);
 }
 
 static int ps_to_rfbi_ticks(int time, int div)
@@ -472,59 +448,6 @@
 	return ret;
 }
 
-#ifdef OMAP_RFBI_RATE_LIMIT
-unsigned long rfbi_get_max_tx_rate(void)
-{
-	unsigned long   l4_rate, dss1_rate;
-	int             min_l4_ticks = 0;
-	int             i;
-
-	/* According to TI this can't be calculated so make the
-	 * adjustments for a couple of known frequencies and warn for
-	 * others.
-	 */
-	static const struct {
-		unsigned long l4_clk;           /* HZ */
-		unsigned long dss1_clk;         /* HZ */
-		unsigned long min_l4_ticks;
-	} ftab[] = {
-		{ 55,   132,    7, },           /* 7.86 MPix/s */
-		{ 110,  110,    12, },          /* 9.16 MPix/s */
-		{ 110,  132,    10, },          /* 11   Mpix/s */
-		{ 120,  120,    10, },          /* 12   Mpix/s */
-		{ 133,  133,    10, },          /* 13.3 Mpix/s */
-	};
-
-	l4_rate = rfbi.l4_khz / 1000;
-	dss1_rate = dss_clk_get_rate(DSS_CLK_FCK) / 1000000;
-
-	for (i = 0; i < ARRAY_SIZE(ftab); i++) {
-		/* Use a window instead of an exact match, to account
-		 * for different DPLL multiplier / divider pairs.
-		 */
-		if (abs(ftab[i].l4_clk - l4_rate) < 3 &&
-		    abs(ftab[i].dss1_clk - dss1_rate) < 3) {
-			min_l4_ticks = ftab[i].min_l4_ticks;
-			break;
-		}
-	}
-	if (i == ARRAY_SIZE(ftab)) {
-		/* Can't be sure, return anyway the maximum not
-		 * rate-limited. This might cause a problem only for the
-		 * tearing synchronisation.
-		 */
-		DSSERR("can't determine maximum RFBI transfer rate\n");
-		return rfbi.l4_khz * 1000;
-	}
-	return rfbi.l4_khz * 1000 / min_l4_ticks;
-}
-#else
-int rfbi_get_max_tx_rate(void)
-{
-	return rfbi.l4_khz * 1000;
-}
-#endif
-
 static void rfbi_get_clk_info(u32 *clk_period, u32 *max_clk_div)
 {
 	*clk_period = 1000000000 / rfbi.l4_khz;
@@ -644,7 +567,6 @@
 	DSSDBG("setup_te: mode %d hs %d vs %d hs_inv %d vs_inv %d\n",
 		mode, hs, vs, hs_pol_inv, vs_pol_inv);
 
-	rfbi_enable_clocks(1);
 	rfbi_write_reg(RFBI_HSYNC_WIDTH, hs);
 	rfbi_write_reg(RFBI_VSYNC_WIDTH, vs);
 
@@ -657,7 +579,6 @@
 		l &= ~(1 << 20);
 	else
 		l |= 1 << 20;
-	rfbi_enable_clocks(0);
 
 	return 0;
 }
@@ -672,7 +593,6 @@
 	if (line > (1 << 11) - 1)
 		return -EINVAL;
 
-	rfbi_enable_clocks(1);
 	l = rfbi_read_reg(RFBI_CONFIG(0));
 	l &= ~(0x3 << 2);
 	if (enable) {
@@ -682,50 +602,12 @@
 		rfbi.te_enabled = 0;
 	rfbi_write_reg(RFBI_CONFIG(0), l);
 	rfbi_write_reg(RFBI_LINE_NUMBER, line);
-	rfbi_enable_clocks(0);
 
 	return 0;
 }
 EXPORT_SYMBOL(omap_rfbi_enable_te);
 
-#if 0
-static void rfbi_enable_config(int enable1, int enable2)
-{
-	u32 l;
-	int cs = 0;
-
-	if (enable1)
-		cs |= 1<<0;
-	if (enable2)
-		cs |= 1<<1;
-
-	rfbi_enable_clocks(1);
-
-	l = rfbi_read_reg(RFBI_CONTROL);
-
-	l = FLD_MOD(l, cs, 3, 2);
-	l = FLD_MOD(l, 0, 1, 1);
-
-	rfbi_write_reg(RFBI_CONTROL, l);
-
-
-	l = rfbi_read_reg(RFBI_CONFIG(0));
-	l = FLD_MOD(l, 0, 3, 2); /* TRIGGERMODE: ITE */
-	/*l |= FLD_VAL(2, 8, 7); */ /* L4FORMAT, 2pix/L4 */
-	/*l |= FLD_VAL(0, 8, 7); */ /* L4FORMAT, 1pix/L4 */
-
-	l = FLD_MOD(l, 0, 16, 16); /* A0POLARITY */
-	l = FLD_MOD(l, 1, 20, 20); /* TE_VSYNC_POLARITY */
-	l = FLD_MOD(l, 1, 21, 21); /* HSYNCPOLARITY */
-
-	l = FLD_MOD(l, OMAP_DSS_RFBI_PARALLELMODE_8, 1, 0);
-	rfbi_write_reg(RFBI_CONFIG(0), l);
-
-	rfbi_enable_clocks(0);
-}
-#endif
-
-int rfbi_configure(int rfbi_module, int bpp, int lines)
+static int rfbi_configure(int rfbi_module, int bpp, int lines)
 {
 	u32 l;
 	int cycle1 = 0, cycle2 = 0, cycle3 = 0;
@@ -821,8 +703,6 @@
 		break;
 	}
 
-	rfbi_enable_clocks(1);
-
 	REG_FLD_MOD(RFBI_CONTROL, 0, 3, 2); /* clear CS */
 
 	l = 0;
@@ -856,11 +736,15 @@
 	DSSDBG("RFBI config: bpp %d, lines %d, cycles: 0x%x 0x%x 0x%x\n",
 	       bpp, lines, cycle1, cycle2, cycle3);
 
-	rfbi_enable_clocks(0);
-
 	return 0;
 }
-EXPORT_SYMBOL(rfbi_configure);
+
+int omap_rfbi_configure(struct omap_dss_device *dssdev, int pixel_size,
+		int data_lines)
+{
+	return rfbi_configure(dssdev->phy.rfbi.channel, pixel_size, data_lines);
+}
+EXPORT_SYMBOL(omap_rfbi_configure);
 
 int omap_rfbi_prepare_update(struct omap_dss_device *dssdev,
 		u16 *x, u16 *y, u16 *w, u16 *h)
@@ -960,6 +844,8 @@
 {
 	int r;
 
+	rfbi_enable_clocks(1);
+
 	r = omap_dss_start_device(dssdev);
 	if (r) {
 		DSSERR("failed to start device\n");
@@ -1002,6 +888,8 @@
 	omap_dispc_unregister_isr(framedone_callback, NULL,
 			DISPC_IRQ_FRAMEDONE);
 	omap_dss_stop_device(dssdev);
+
+	rfbi_enable_clocks(0);
 }
 EXPORT_SYMBOL(omapdss_rfbi_display_disable);
 
@@ -1021,11 +909,7 @@
 
 	rfbi.pdev = pdev;
 
-	spin_lock_init(&rfbi.cmd_lock);
-
-	init_completion(&rfbi.cmd_done);
-	atomic_set(&rfbi.cmd_fifo_full, 0);
-	atomic_set(&rfbi.cmd_pending, 0);
+	sema_init(&rfbi.bus_lock, 1);
 
 	rfbi_mem = platform_get_resource(rfbi.pdev, IORESOURCE_MEM, 0);
 	if (!rfbi_mem) {
diff --git a/drivers/video/omap2/dss/sdi.c b/drivers/video/omap2/dss/sdi.c
index 54a53e6..0bd4b03 100644
--- a/drivers/video/omap2/dss/sdi.c
+++ b/drivers/video/omap2/dss/sdi.c
@@ -25,7 +25,7 @@
 #include <linux/err.h>
 #include <linux/regulator/consumer.h>
 
-#include <plat/display.h>
+#include <video/omapdss.h>
 #include <plat/cpu.h>
 #include "dss.h"
 
diff --git a/drivers/video/omap2/dss/venc.c b/drivers/video/omap2/dss/venc.c
index 8e35a5b..980f919 100644
--- a/drivers/video/omap2/dss/venc.c
+++ b/drivers/video/omap2/dss/venc.c
@@ -34,7 +34,7 @@
 #include <linux/platform_device.h>
 #include <linux/regulator/consumer.h>
 
-#include <plat/display.h>
+#include <video/omapdss.h>
 #include <plat/cpu.h>
 
 #include "dss.h"
@@ -373,8 +373,11 @@
 		}
 	}
 
+#ifdef CONFIG_OMAP2_DSS_SLEEP_AFTER_VENC_RESET
 	/* the magical sleep that makes things work */
+	/* XXX more info? What bug this circumvents? */
 	msleep(20);
+#endif
 }
 
 static void venc_enable_clocks(int enable)
@@ -473,6 +476,12 @@
 
 	mutex_lock(&venc.venc_lock);
 
+	r = omap_dss_start_device(dssdev);
+	if (r) {
+		DSSERR("failed to start device\n");
+		goto err0;
+	}
+
 	if (dssdev->state != OMAP_DSS_DISPLAY_DISABLED) {
 		r = -EINVAL;
 		goto err1;
@@ -484,10 +493,11 @@
 
 	dssdev->state = OMAP_DSS_DISPLAY_ACTIVE;
 
-	/* wait couple of vsyncs until enabling the LCD */
-	msleep(50);
-
+	mutex_unlock(&venc.venc_lock);
+	return 0;
 err1:
+	omap_dss_stop_device(dssdev);
+err0:
 	mutex_unlock(&venc.venc_lock);
 
 	return r;
@@ -510,10 +520,9 @@
 
 	venc_power_off(dssdev);
 
-	/* wait at least 5 vsyncs after disabling the LCD */
-	msleep(100);
-
 	dssdev->state = OMAP_DSS_DISPLAY_DISABLED;
+
+	omap_dss_stop_device(dssdev);
 end:
 	mutex_unlock(&venc.venc_lock);
 }
diff --git a/drivers/video/omap2/omapfb/omapfb-ioctl.c b/drivers/video/omap2/omapfb/omapfb-ioctl.c
index 6f435450..cff4503 100644
--- a/drivers/video/omap2/omapfb/omapfb-ioctl.c
+++ b/drivers/video/omap2/omapfb/omapfb-ioctl.c
@@ -28,7 +28,7 @@
 #include <linux/omapfb.h>
 #include <linux/vmalloc.h>
 
-#include <plat/display.h>
+#include <video/omapdss.h>
 #include <plat/vrfb.h>
 #include <plat/vram.h>
 
@@ -895,8 +895,16 @@
 
 		p.display_info.xres = xres;
 		p.display_info.yres = yres;
-		p.display_info.width = 0;
-		p.display_info.height = 0;
+
+		if (display->driver->get_dimensions) {
+			u32 w, h;
+			display->driver->get_dimensions(display, &w, &h);
+			p.display_info.width = w;
+			p.display_info.height = h;
+		} else {
+			p.display_info.width = 0;
+			p.display_info.height = 0;
+		}
 
 		if (copy_to_user((void __user *)arg, &p.display_info,
 					sizeof(p.display_info)))
diff --git a/drivers/video/omap2/omapfb/omapfb-main.c b/drivers/video/omap2/omapfb/omapfb-main.c
index 505ec667..505bc12 100644
--- a/drivers/video/omap2/omapfb/omapfb-main.c
+++ b/drivers/video/omap2/omapfb/omapfb-main.c
@@ -30,7 +30,7 @@
 #include <linux/platform_device.h>
 #include <linux/omapfb.h>
 
-#include <plat/display.h>
+#include <video/omapdss.h>
 #include <plat/vram.h>
 #include <plat/vrfb.h>
 
@@ -702,8 +702,16 @@
 			var->xres, var->yres,
 			var->xres_virtual, var->yres_virtual);
 
-	var->height             = -1;
-	var->width              = -1;
+	if (display && display->driver->get_dimensions) {
+		u32 w, h;
+		display->driver->get_dimensions(display, &w, &h);
+		var->width = DIV_ROUND_CLOSEST(w, 1000);
+		var->height = DIV_ROUND_CLOSEST(h, 1000);
+	} else {
+		var->height = -1;
+		var->width = -1;
+	}
+
 	var->grayscale          = 0;
 
 	if (display && display->driver->get_timings) {
@@ -749,35 +757,6 @@
 
 static int omapfb_release(struct fb_info *fbi, int user)
 {
-#if 0
-	struct omapfb_info *ofbi = FB2OFB(fbi);
-	struct omapfb2_device *fbdev = ofbi->fbdev;
-	struct omap_dss_device *display = fb2display(fbi);
-
-	DBG("Closing fb with plane index %d\n", ofbi->id);
-
-	omapfb_lock(fbdev);
-
-	if (display && display->get_update_mode && display->update) {
-		/* XXX this update should be removed, I think. But it's
-		 * good for debugging */
-		if (display->get_update_mode(display) ==
-				OMAP_DSS_UPDATE_MANUAL) {
-			u16 w, h;
-
-			if (display->sync)
-				display->sync(display);
-
-			display->get_resolution(display, &w, &h);
-			display->update(display, 0, 0, w, h);
-		}
-	}
-
-	if (display && display->sync)
-		display->sync(display);
-
-	omapfb_unlock(fbdev);
-#endif
 	return 0;
 }
 
@@ -1263,7 +1242,6 @@
 	struct omapfb_info *ofbi = FB2OFB(fbi);
 	struct omapfb2_device *fbdev = ofbi->fbdev;
 	struct omap_dss_device *display = fb2display(fbi);
-	int do_update = 0;
 	int r = 0;
 
 	if (!display)
@@ -1279,11 +1257,6 @@
 		if (display->driver->resume)
 			r = display->driver->resume(display);
 
-		if (r == 0 && display->driver->get_update_mode &&
-				display->driver->get_update_mode(display) ==
-				OMAP_DSS_UPDATE_MANUAL)
-			do_update = 1;
-
 		break;
 
 	case FB_BLANK_NORMAL:
@@ -1307,13 +1280,6 @@
 exit:
 	omapfb_unlock(fbdev);
 
-	if (r == 0 && do_update && display->driver->update) {
-		u16 w, h;
-		display->driver->get_resolution(display, &w, &h);
-
-		r = display->driver->update(display, 0, 0, w, h);
-	}
-
 	return r;
 }
 
@@ -2030,9 +1996,9 @@
 static int omapfb_mode_to_timings(const char *mode_str,
 		struct omap_video_timings *timings, u8 *bpp)
 {
-	struct fb_info fbi;
-	struct fb_var_screeninfo var;
-	struct fb_ops fbops;
+	struct fb_info *fbi;
+	struct fb_var_screeninfo *var;
+	struct fb_ops *fbops;
 	int r;
 
 #ifdef CONFIG_OMAP2_DSS_VENC
@@ -2050,39 +2016,66 @@
 	/* this is quite a hack, but I wanted to use the modedb and for
 	 * that we need fb_info and var, so we create dummy ones */
 
-	memset(&fbi, 0, sizeof(fbi));
-	memset(&var, 0, sizeof(var));
-	memset(&fbops, 0, sizeof(fbops));
-	fbi.fbops = &fbops;
+	*bpp = 0;
+	fbi = NULL;
+	var = NULL;
+	fbops = NULL;
 
-	r = fb_find_mode(&var, &fbi, mode_str, NULL, 0, NULL, 24);
-
-	if (r != 0) {
-		timings->pixel_clock = PICOS2KHZ(var.pixclock);
-		timings->hbp = var.left_margin;
-		timings->hfp = var.right_margin;
-		timings->vbp = var.upper_margin;
-		timings->vfp = var.lower_margin;
-		timings->hsw = var.hsync_len;
-		timings->vsw = var.vsync_len;
-		timings->x_res = var.xres;
-		timings->y_res = var.yres;
-
-		switch (var.bits_per_pixel) {
-		case 16:
-			*bpp = 16;
-			break;
-		case 24:
-		case 32:
-		default:
-			*bpp = 24;
-			break;
-		}
-
-		return 0;
-	} else {
-		return -EINVAL;
+	fbi = kzalloc(sizeof(*fbi), GFP_KERNEL);
+	if (fbi == NULL) {
+		r = -ENOMEM;
+		goto err;
 	}
+
+	var = kzalloc(sizeof(*var), GFP_KERNEL);
+	if (var == NULL) {
+		r = -ENOMEM;
+		goto err;
+	}
+
+	fbops = kzalloc(sizeof(*fbops), GFP_KERNEL);
+	if (fbops == NULL) {
+		r = -ENOMEM;
+		goto err;
+	}
+
+	fbi->fbops = fbops;
+
+	r = fb_find_mode(var, fbi, mode_str, NULL, 0, NULL, 24);
+	if (r == 0) {
+		r = -EINVAL;
+		goto err;
+	}
+
+	timings->pixel_clock = PICOS2KHZ(var->pixclock);
+	timings->hbp = var->left_margin;
+	timings->hfp = var->right_margin;
+	timings->vbp = var->upper_margin;
+	timings->vfp = var->lower_margin;
+	timings->hsw = var->hsync_len;
+	timings->vsw = var->vsync_len;
+	timings->x_res = var->xres;
+	timings->y_res = var->yres;
+
+	switch (var->bits_per_pixel) {
+	case 16:
+		*bpp = 16;
+		break;
+	case 24:
+	case 32:
+	default:
+		*bpp = 24;
+		break;
+	}
+
+	r = 0;
+
+err:
+	kfree(fbi);
+	kfree(var);
+	kfree(fbops);
+
+	return r;
 }
 
 static int omapfb_set_def_mode(struct omapfb2_device *fbdev,
@@ -2185,6 +2178,61 @@
 	return r;
 }
 
+static int omapfb_init_display(struct omapfb2_device *fbdev,
+		struct omap_dss_device *dssdev)
+{
+	struct omap_dss_driver *dssdrv = dssdev->driver;
+	int r;
+
+	r = dssdrv->enable(dssdev);
+	if (r) {
+		dev_warn(fbdev->dev, "Failed to enable display '%s'\n",
+				dssdev->name);
+		return r;
+	}
+
+	if (dssdev->caps & OMAP_DSS_DISPLAY_CAP_MANUAL_UPDATE) {
+		u16 w, h;
+		if (dssdrv->enable_te) {
+			r = dssdrv->enable_te(dssdev, 1);
+			if (r) {
+				dev_err(fbdev->dev, "Failed to set TE\n");
+				return r;
+			}
+		}
+
+		if (dssdrv->set_update_mode) {
+			r = dssdrv->set_update_mode(dssdev,
+					OMAP_DSS_UPDATE_MANUAL);
+			if (r) {
+				dev_err(fbdev->dev,
+						"Failed to set update mode\n");
+				return r;
+			}
+		}
+
+		dssdrv->get_resolution(dssdev, &w, &h);
+		r = dssdrv->update(dssdev, 0, 0, w, h);
+		if (r) {
+			dev_err(fbdev->dev,
+					"Failed to update display\n");
+			return r;
+		}
+	} else {
+		if (dssdrv->set_update_mode) {
+			r = dssdrv->set_update_mode(dssdev,
+					OMAP_DSS_UPDATE_AUTO);
+			if (r) {
+				dev_err(fbdev->dev,
+						"Failed to set update mode\n");
+				return r;
+			}
+		}
+	}
+
+	return 0;
+}
+
 static int omapfb_probe(struct platform_device *pdev)
 {
 	struct omapfb2_device *fbdev = NULL;
@@ -2284,30 +2332,13 @@
 	}
 
 	if (def_display) {
-		struct omap_dss_driver *dssdrv = def_display->driver;
-
-		r = def_display->driver->enable(def_display);
+		r = omapfb_init_display(fbdev, def_display);
 		if (r) {
-			dev_warn(fbdev->dev, "Failed to enable display '%s'\n",
-					def_display->name);
+			dev_err(fbdev->dev,
+					"failed to initialize default "
+					"display\n");
 			goto cleanup;
 		}
-
-		if (def_display->caps & OMAP_DSS_DISPLAY_CAP_MANUAL_UPDATE) {
-			u16 w, h;
-			if (dssdrv->enable_te)
-				dssdrv->enable_te(def_display, 1);
-			if (dssdrv->set_update_mode)
-				dssdrv->set_update_mode(def_display,
-						OMAP_DSS_UPDATE_MANUAL);
-
-			dssdrv->get_resolution(def_display, &w, &h);
-			def_display->driver->update(def_display, 0, 0, w, h);
-		} else {
-			if (dssdrv->set_update_mode)
-				dssdrv->set_update_mode(def_display,
-						OMAP_DSS_UPDATE_AUTO);
-		}
 	}
 
 	DBG("create sysfs for fbs\n");
diff --git a/drivers/video/omap2/omapfb/omapfb-sysfs.c b/drivers/video/omap2/omapfb/omapfb-sysfs.c
index 6f9c72c..2f5e817 100644
--- a/drivers/video/omap2/omapfb/omapfb-sysfs.c
+++ b/drivers/video/omap2/omapfb/omapfb-sysfs.c
@@ -29,7 +29,7 @@
 #include <linux/mm.h>
 #include <linux/omapfb.h>
 
-#include <plat/display.h>
+#include <video/omapdss.h>
 #include <plat/vrfb.h>
 
 #include "omapfb.h"
@@ -50,10 +50,12 @@
 	struct fb_info *fbi = dev_get_drvdata(dev);
 	struct omapfb_info *ofbi = FB2OFB(fbi);
 	struct omapfb2_mem_region *rg;
-	enum omap_dss_rotation_type rot_type;
+	int rot_type;
 	int r;
 
-	rot_type = simple_strtoul(buf, NULL, 0);
+	r = kstrtoint(buf, 0, &rot_type);
+	if (r)
+		return r;
 
 	if (rot_type != OMAP_DSS_ROT_DMA && rot_type != OMAP_DSS_ROT_VRFB)
 		return -EINVAL;
@@ -102,14 +104,15 @@
 {
 	struct fb_info *fbi = dev_get_drvdata(dev);
 	struct omapfb_info *ofbi = FB2OFB(fbi);
-	unsigned long mirror;
+	int mirror;
 	int r;
 	struct fb_var_screeninfo new_var;
 
-	mirror = simple_strtoul(buf, NULL, 0);
+	r = kstrtoint(buf, 0, &mirror);
+	if (r)
+		return r;
 
-	if (mirror != 0 && mirror != 1)
-		return -EINVAL;
+	mirror = !!mirror;
 
 	if (!lock_fb_info(fbi))
 		return -ENODEV;
@@ -445,7 +448,11 @@
 	int r;
 	int i;
 
-	size = PAGE_ALIGN(simple_strtoul(buf, NULL, 0));
+	r = kstrtoul(buf, 0, &size);
+	if (r)
+		return r;
+
+	size = PAGE_ALIGN(size);
 
 	if (!lock_fb_info(fbi))
 		return -ENODEV;
diff --git a/drivers/video/omap2/omapfb/omapfb.h b/drivers/video/omap2/omapfb/omapfb.h
index 1305fc9..aa1b1d9 100644
--- a/drivers/video/omap2/omapfb/omapfb.h
+++ b/drivers/video/omap2/omapfb/omapfb.h
@@ -29,13 +29,15 @@
 
 #include <linux/rwsem.h>
 
-#include <plat/display.h>
+#include <video/omapdss.h>
 
 #ifdef DEBUG
 extern unsigned int omapfb_debug;
 #define DBG(format, ...) \
-	if (omapfb_debug) \
-		printk(KERN_DEBUG "OMAPFB: " format, ## __VA_ARGS__)
+	do { \
+		if (omapfb_debug) \
+			printk(KERN_DEBUG "OMAPFB: " format, ## __VA_ARGS__); \
+	} while (0)
 #else
 #define DBG(format, ...)
 #endif
diff --git a/drivers/video/s3c-fb.c b/drivers/video/s3c-fb.c
index 3b6cdca..0352afa 100644
--- a/drivers/video/s3c-fb.c
+++ b/drivers/video/s3c-fb.c
@@ -182,6 +182,7 @@
 
 /**
  * struct s3c_fb - overall hardware state of the hardware
+ * @slock: The spinlock protection for this data sturcture.
  * @dev: The device that we bound to, for printing, etc.
  * @regs_res: The resource we claimed for the IO registers.
  * @bus_clk: The clk (hclk) feeding our interface and possibly pixclk.
@@ -195,6 +196,7 @@
  * @vsync_info: VSYNC-related information (count, queues...)
  */
 struct s3c_fb {
+	spinlock_t		slock;
 	struct device		*dev;
 	struct resource		*regs_res;
 	struct clk		*bus_clk;
@@ -300,6 +302,7 @@
 		var->blue.length	= 5;
 		break;
 
+	case 32:
 	case 28:
 	case 25:
 		var->transp.length	= var->bits_per_pixel - 24;
@@ -308,7 +311,6 @@
 	case 24:
 		/* our 24bpp is unpacked, so 32bpp */
 		var->bits_per_pixel	= 32;
-	case 32:
 		var->red.offset		= 16;
 		var->red.length		= 8;
 		var->green.offset	= 8;
@@ -947,6 +949,8 @@
 	void __iomem  *regs = sfb->regs;
 	u32 irq_sts_reg;
 
+	spin_lock(&sfb->slock);
+
 	irq_sts_reg = readl(regs + VIDINTCON1);
 
 	if (irq_sts_reg & VIDINTCON1_INT_FRAME) {
@@ -963,6 +967,7 @@
 	 */
 	s3c_fb_disable_irq(sfb);
 
+	spin_unlock(&sfb->slock);
 	return IRQ_HANDLED;
 }
 
@@ -1339,6 +1344,8 @@
 	sfb->pdata = pd;
 	sfb->variant = fbdrv->variant;
 
+	spin_lock_init(&sfb->slock);
+
 	sfb->bus_clk = clk_get(dev, "lcd");
 	if (IS_ERR(sfb->bus_clk)) {
 		dev_err(dev, "failed to get bus clock\n");
@@ -1442,8 +1449,7 @@
 	iounmap(sfb->regs);
 
 err_req_region:
-	release_resource(sfb->regs_res);
-	kfree(sfb->regs_res);
+	release_mem_region(sfb->regs_res->start, resource_size(sfb->regs_res));
 
 err_clk:
 	clk_disable(sfb->bus_clk);
@@ -1479,8 +1485,7 @@
 	clk_disable(sfb->bus_clk);
 	clk_put(sfb->bus_clk);
 
-	release_resource(sfb->regs_res);
-	kfree(sfb->regs_res);
+	release_mem_region(sfb->regs_res->start, resource_size(sfb->regs_res));
 
 	kfree(sfb);
 
@@ -1521,7 +1526,8 @@
 
 	clk_enable(sfb->bus_clk);
 
-	/* setup registers */
+	/* setup gpio and output polarity controls */
+	pd->setup_gpio();
 	writel(pd->vidcon1, sfb->regs + VIDCON1);
 
 	/* zero all windows before we do anything */
@@ -1549,7 +1555,7 @@
 	return 0;
 }
 
-int s3c_fb_runtime_suspend(struct device *dev)
+static int s3c_fb_runtime_suspend(struct device *dev)
 {
 	struct platform_device *pdev = to_platform_device(dev);
 	struct s3c_fb *sfb = platform_get_drvdata(pdev);
@@ -1569,7 +1575,7 @@
 	return 0;
 }
 
-int s3c_fb_runtime_resume(struct device *dev)
+static int s3c_fb_runtime_resume(struct device *dev)
 {
 	struct platform_device *pdev = to_platform_device(dev);
 	struct s3c_fb *sfb = platform_get_drvdata(pdev);
@@ -1579,7 +1585,8 @@
 
 	clk_enable(sfb->bus_clk);
 
-	/* setup registers */
+	/* setup gpio and output polarity controls */
+	pd->setup_gpio();
 	writel(pd->vidcon1, sfb->regs + VIDCON1);
 
 	/* zero all windows before we do anything */
@@ -1623,28 +1630,31 @@
 		.has_osd_c	= 1,
 		.osd_size_off	= 0x8,
 		.palette_sz	= 256,
-		.valid_bpp	= VALID_BPP1248 | VALID_BPP(16) | VALID_BPP(24),
+		.valid_bpp	= (VALID_BPP1248 | VALID_BPP(16) |
+				   VALID_BPP(18) | VALID_BPP(24)),
 	},
 	[1] = {
 		.has_osd_c	= 1,
 		.has_osd_d	= 1,
-		.osd_size_off	= 0x12,
+		.osd_size_off	= 0xc,
 		.has_osd_alpha	= 1,
 		.palette_sz	= 256,
 		.valid_bpp	= (VALID_BPP1248 | VALID_BPP(16) |
 				   VALID_BPP(18) | VALID_BPP(19) |
-				   VALID_BPP(24) | VALID_BPP(25)),
+				   VALID_BPP(24) | VALID_BPP(25) |
+				   VALID_BPP(28)),
 	},
 	[2] = {
 		.has_osd_c	= 1,
 		.has_osd_d	= 1,
-		.osd_size_off	= 0x12,
+		.osd_size_off	= 0xc,
 		.has_osd_alpha	= 1,
 		.palette_sz	= 16,
 		.palette_16bpp	= 1,
 		.valid_bpp	= (VALID_BPP1248 | VALID_BPP(16) |
 				   VALID_BPP(18) | VALID_BPP(19) |
-				   VALID_BPP(24) | VALID_BPP(25)),
+				   VALID_BPP(24) | VALID_BPP(25) |
+				   VALID_BPP(28)),
 	},
 	[3] = {
 		.has_osd_c	= 1,
@@ -1653,7 +1663,8 @@
 		.palette_16bpp	= 1,
 		.valid_bpp	= (VALID_BPP124  | VALID_BPP(16) |
 				   VALID_BPP(18) | VALID_BPP(19) |
-				   VALID_BPP(24) | VALID_BPP(25)),
+				   VALID_BPP(24) | VALID_BPP(25) |
+				   VALID_BPP(28)),
 	},
 	[4] = {
 		.has_osd_c	= 1,
@@ -1662,7 +1673,65 @@
 		.palette_16bpp	= 1,
 		.valid_bpp	= (VALID_BPP(1) | VALID_BPP(2) |
 				   VALID_BPP(16) | VALID_BPP(18) |
-				   VALID_BPP(24) | VALID_BPP(25)),
+				   VALID_BPP(19) | VALID_BPP(24) |
+				   VALID_BPP(25) | VALID_BPP(28)),
+	},
+};
+
+static struct s3c_fb_win_variant s3c_fb_data_s5p_wins[] = {
+	[0] = {
+		.has_osd_c	= 1,
+		.osd_size_off	= 0x8,
+		.palette_sz	= 256,
+		.valid_bpp	= (VALID_BPP1248 | VALID_BPP(13) |
+				   VALID_BPP(15) | VALID_BPP(16) |
+				   VALID_BPP(18) | VALID_BPP(19) |
+				   VALID_BPP(24) | VALID_BPP(25) |
+				   VALID_BPP(32)),
+	},
+	[1] = {
+		.has_osd_c	= 1,
+		.has_osd_d	= 1,
+		.osd_size_off	= 0xc,
+		.has_osd_alpha	= 1,
+		.palette_sz	= 256,
+		.valid_bpp	= (VALID_BPP1248 | VALID_BPP(13) |
+				   VALID_BPP(15) | VALID_BPP(16) |
+				   VALID_BPP(18) | VALID_BPP(19) |
+				   VALID_BPP(24) | VALID_BPP(25) |
+				   VALID_BPP(32)),
+	},
+	[2] = {
+		.has_osd_c	= 1,
+		.has_osd_d	= 1,
+		.osd_size_off	= 0xc,
+		.has_osd_alpha	= 1,
+		.palette_sz	= 256,
+		.valid_bpp	= (VALID_BPP1248 | VALID_BPP(13) |
+				   VALID_BPP(15) | VALID_BPP(16) |
+				   VALID_BPP(18) | VALID_BPP(19) |
+				   VALID_BPP(24) | VALID_BPP(25) |
+				   VALID_BPP(32)),
+	},
+	[3] = {
+		.has_osd_c	= 1,
+		.has_osd_alpha	= 1,
+		.palette_sz	= 256,
+		.valid_bpp	= (VALID_BPP1248 | VALID_BPP(13) |
+				   VALID_BPP(15) | VALID_BPP(16) |
+				   VALID_BPP(18) | VALID_BPP(19) |
+				   VALID_BPP(24) | VALID_BPP(25) |
+				   VALID_BPP(32)),
+	},
+	[4] = {
+		.has_osd_c	= 1,
+		.has_osd_alpha	= 1,
+		.palette_sz	= 256,
+		.valid_bpp	= (VALID_BPP1248 | VALID_BPP(13) |
+				   VALID_BPP(15) | VALID_BPP(16) |
+				   VALID_BPP(18) | VALID_BPP(19) |
+				   VALID_BPP(24) | VALID_BPP(25) |
+				   VALID_BPP(32)),
 	},
 };
 
@@ -1719,11 +1788,11 @@
 
 		.has_prtcon	= 1,
 	},
-	.win[0]	= &s3c_fb_data_64xx_wins[0],
-	.win[1]	= &s3c_fb_data_64xx_wins[1],
-	.win[2]	= &s3c_fb_data_64xx_wins[2],
-	.win[3]	= &s3c_fb_data_64xx_wins[3],
-	.win[4]	= &s3c_fb_data_64xx_wins[4],
+	.win[0]	= &s3c_fb_data_s5p_wins[0],
+	.win[1]	= &s3c_fb_data_s5p_wins[1],
+	.win[2]	= &s3c_fb_data_s5p_wins[2],
+	.win[3]	= &s3c_fb_data_s5p_wins[3],
+	.win[4]	= &s3c_fb_data_s5p_wins[4],
 };
 
 static struct s3c_fb_driverdata s3c_fb_data_s5pv210 = {
@@ -1749,11 +1818,11 @@
 
 		.has_shadowcon	= 1,
 	},
-	.win[0]	= &s3c_fb_data_64xx_wins[0],
-	.win[1]	= &s3c_fb_data_64xx_wins[1],
-	.win[2]	= &s3c_fb_data_64xx_wins[2],
-	.win[3]	= &s3c_fb_data_64xx_wins[3],
-	.win[4]	= &s3c_fb_data_64xx_wins[4],
+	.win[0]	= &s3c_fb_data_s5p_wins[0],
+	.win[1]	= &s3c_fb_data_s5p_wins[1],
+	.win[2]	= &s3c_fb_data_s5p_wins[2],
+	.win[3]	= &s3c_fb_data_s5p_wins[3],
+	.win[4]	= &s3c_fb_data_s5p_wins[4],
 };
 
 /* S3C2443/S3C2416 style hardware */
diff --git a/drivers/video/s3c2410fb.c b/drivers/video/s3c2410fb.c
index 61c819e3..0aa1376 100644
--- a/drivers/video/s3c2410fb.c
+++ b/drivers/video/s3c2410fb.c
@@ -867,7 +867,7 @@
 		goto dealloc_fb;
 	}
 
-	size = (res->end - res->start) + 1;
+	size = resource_size(res);
 	info->mem = request_mem_region(res->start, size, pdev->name);
 	if (info->mem == NULL) {
 		dev_err(&pdev->dev, "failed to get memory region\n");
@@ -997,8 +997,7 @@
 release_regs:
 	iounmap(info->io);
 release_mem:
-	release_resource(info->mem);
-	kfree(info->mem);
+	release_mem_region(res->start, size);
 dealloc_fb:
 	platform_set_drvdata(pdev, NULL);
 	framebuffer_release(fbinfo);
@@ -1044,8 +1043,7 @@
 
 	iounmap(info->io);
 
-	release_resource(info->mem);
-	kfree(info->mem);
+	release_mem_region(info->mem->start, resource_size(info->mem));
 
 	platform_set_drvdata(pdev, NULL);
 	framebuffer_release(fbinfo);
diff --git a/drivers/video/s3fb.c b/drivers/video/s3fb.c
index c4482f2..4ca5d0c 100644
--- a/drivers/video/s3fb.c
+++ b/drivers/video/s3fb.c
@@ -25,6 +25,9 @@
 #include <linux/console.h> /* Why should fb driver call console functions? because console_lock() */
 #include <video/vga.h>
 
+#include <linux/i2c.h>
+#include <linux/i2c-algo-bit.h>
+
 #ifdef CONFIG_MTRR
 #include <asm/mtrr.h>
 #endif
@@ -36,6 +39,12 @@
 	struct mutex open_lock;
 	unsigned int ref_count;
 	u32 pseudo_palette[16];
+#ifdef CONFIG_FB_S3_DDC
+	u8 __iomem *mmio;
+	bool ddc_registered;
+	struct i2c_adapter ddc_adapter;
+	struct i2c_algo_bit_data ddc_algo;
+#endif
 };
 
 
@@ -105,6 +114,9 @@
 #define CHIP_UNDECIDED_FLAG	0x80
 #define CHIP_MASK		0xFF
 
+#define MMIO_OFFSET		0x1000000
+#define MMIO_SIZE		0x10000
+
 /* CRT timing register sets */
 
 static const struct vga_regset s3_h_total_regs[]        = {{0x00, 0, 7}, {0x5D, 0, 0}, VGA_REGSET_END};
@@ -140,7 +152,7 @@
 /* Module parameters */
 
 
-static char *mode_option __devinitdata = "640x480-8@60";
+static char *mode_option __devinitdata;
 
 #ifdef CONFIG_MTRR
 static int mtrr __devinitdata = 1;
@@ -169,6 +181,119 @@
 
 /* ------------------------------------------------------------------------- */
 
+#ifdef CONFIG_FB_S3_DDC
+
+#define DDC_REG		0xaa		/* Trio 3D/1X/2X */
+#define DDC_MMIO_REG	0xff20		/* all other chips */
+#define DDC_SCL_OUT	(1 << 0)
+#define DDC_SDA_OUT	(1 << 1)
+#define DDC_SCL_IN	(1 << 2)
+#define DDC_SDA_IN	(1 << 3)
+#define DDC_DRIVE_EN	(1 << 4)
+
+static bool s3fb_ddc_needs_mmio(int chip)
+{
+	return !(chip == CHIP_360_TRIO3D_1X  ||
+		 chip == CHIP_362_TRIO3D_2X  ||
+		 chip == CHIP_368_TRIO3D_2X);
+}
+
+static u8 s3fb_ddc_read(struct s3fb_info *par)
+{
+	if (s3fb_ddc_needs_mmio(par->chip))
+		return readb(par->mmio + DDC_MMIO_REG);
+	else
+		return vga_rcrt(par->state.vgabase, DDC_REG);
+}
+
+static void s3fb_ddc_write(struct s3fb_info *par, u8 val)
+{
+	if (s3fb_ddc_needs_mmio(par->chip))
+		writeb(val, par->mmio + DDC_MMIO_REG);
+	else
+		vga_wcrt(par->state.vgabase, DDC_REG, val);
+}
+
+static void s3fb_ddc_setscl(void *data, int val)
+{
+	struct s3fb_info *par = data;
+	unsigned char reg;
+
+	reg = s3fb_ddc_read(par) | DDC_DRIVE_EN;
+	if (val)
+		reg |= DDC_SCL_OUT;
+	else
+		reg &= ~DDC_SCL_OUT;
+	s3fb_ddc_write(par, reg);
+}
+
+static void s3fb_ddc_setsda(void *data, int val)
+{
+	struct s3fb_info *par = data;
+	unsigned char reg;
+
+	reg = s3fb_ddc_read(par) | DDC_DRIVE_EN;
+	if (val)
+		reg |= DDC_SDA_OUT;
+	else
+		reg &= ~DDC_SDA_OUT;
+	s3fb_ddc_write(par, reg);
+}
+
+static int s3fb_ddc_getscl(void *data)
+{
+	struct s3fb_info *par = data;
+
+	return !!(s3fb_ddc_read(par) & DDC_SCL_IN);
+}
+
+static int s3fb_ddc_getsda(void *data)
+{
+	struct s3fb_info *par = data;
+
+	return !!(s3fb_ddc_read(par) & DDC_SDA_IN);
+}
+
+static int __devinit s3fb_setup_ddc_bus(struct fb_info *info)
+{
+	struct s3fb_info *par = info->par;
+
+	strlcpy(par->ddc_adapter.name, info->fix.id,
+		sizeof(par->ddc_adapter.name));
+	par->ddc_adapter.owner		= THIS_MODULE;
+	par->ddc_adapter.class		= I2C_CLASS_DDC;
+	par->ddc_adapter.algo_data	= &par->ddc_algo;
+	par->ddc_adapter.dev.parent	= info->device;
+	par->ddc_algo.setsda		= s3fb_ddc_setsda;
+	par->ddc_algo.setscl		= s3fb_ddc_setscl;
+	par->ddc_algo.getsda		= s3fb_ddc_getsda;
+	par->ddc_algo.getscl		= s3fb_ddc_getscl;
+	par->ddc_algo.udelay		= 10;
+	par->ddc_algo.timeout		= 20;
+	par->ddc_algo.data		= par;
+
+	i2c_set_adapdata(&par->ddc_adapter, par);
+
+	/*
+	 * some Virge cards have external MUX to switch chip I2C bus between
+	 * DDC and extension pins - switch it do DDC
+	 */
+/*	vga_wseq(par->state.vgabase, 0x08, 0x06); - not needed, already unlocked */
+	if (par->chip == CHIP_357_VIRGE_GX2 ||
+	    par->chip == CHIP_359_VIRGE_GX2P)
+		svga_wseq_mask(par->state.vgabase, 0x0d, 0x01, 0x03);
+	else
+		svga_wseq_mask(par->state.vgabase, 0x0d, 0x00, 0x03);
+	/* some Virge need this or the DDC is ignored */
+	svga_wcrt_mask(par->state.vgabase, 0x5c, 0x03, 0x03);
+
+	return i2c_bit_add_bus(&par->ddc_adapter);
+}
+#endif /* CONFIG_FB_S3_DDC */
+
+
+/* ------------------------------------------------------------------------- */
+
 /* Set font in S3 fast text mode */
 
 static void s3fb_settile_fast(struct fb_info *info, struct fb_tilemap *map)
@@ -994,6 +1119,7 @@
 	struct s3fb_info *par;
 	int rc;
 	u8 regval, cr38, cr39;
+	bool found = false;
 
 	/* Ignore secondary VGA device because there is no VGA arbitration */
 	if (! svga_primary_device(dev)) {
@@ -1110,12 +1236,69 @@
 	info->fix.ypanstep = 0;
 	info->fix.accel = FB_ACCEL_NONE;
 	info->pseudo_palette = (void*) (par->pseudo_palette);
+	info->var.bits_per_pixel = 8;
+
+#ifdef CONFIG_FB_S3_DDC
+	/* Enable MMIO if needed */
+	if (s3fb_ddc_needs_mmio(par->chip)) {
+		par->mmio = ioremap(info->fix.smem_start + MMIO_OFFSET, MMIO_SIZE);
+		if (par->mmio)
+			svga_wcrt_mask(par->state.vgabase, 0x53, 0x08, 0x08);	/* enable MMIO */
+		else
+			dev_err(info->device, "unable to map MMIO at 0x%lx, disabling DDC",
+				info->fix.smem_start + MMIO_OFFSET);
+	}
+	if (!s3fb_ddc_needs_mmio(par->chip) || par->mmio)
+		if (s3fb_setup_ddc_bus(info) == 0) {
+			u8 *edid = fb_ddc_read(&par->ddc_adapter);
+			par->ddc_registered = true;
+			if (edid) {
+				fb_edid_to_monspecs(edid, &info->monspecs);
+				kfree(edid);
+				if (!info->monspecs.modedb)
+					dev_err(info->device, "error getting mode database\n");
+				else {
+					const struct fb_videomode *m;
+
+					fb_videomode_to_modelist(info->monspecs.modedb,
+								 info->monspecs.modedb_len,
+								 &info->modelist);
+					m = fb_find_best_display(&info->monspecs, &info->modelist);
+					if (m) {
+						fb_videomode_to_var(&info->var, m);
+						/* fill all other info->var's fields */
+						if (s3fb_check_var(&info->var, info) == 0)
+							found = true;
+					}
+				}
+			}
+		}
+#endif
+	if (!mode_option && !found)
+		mode_option = "640x480-8@60";
 
 	/* Prepare startup mode */
-	rc = fb_find_mode(&(info->var), info, mode_option, NULL, 0, NULL, 8);
-	if (! ((rc == 1) || (rc == 2))) {
-		rc = -EINVAL;
-		dev_err(info->device, "mode %s not found\n", mode_option);
+	if (mode_option) {
+		rc = fb_find_mode(&info->var, info, mode_option,
+				   info->monspecs.modedb, info->monspecs.modedb_len,
+				   NULL, info->var.bits_per_pixel);
+		if (!rc || rc == 4) {
+			rc = -EINVAL;
+			dev_err(info->device, "mode %s not found\n", mode_option);
+			fb_destroy_modedb(info->monspecs.modedb);
+			info->monspecs.modedb = NULL;
+			goto err_find_mode;
+		}
+	}
+
+	fb_destroy_modedb(info->monspecs.modedb);
+	info->monspecs.modedb = NULL;
+
+	/* maximize virtual vertical size for fast scrolling */
+	info->var.yres_virtual = info->fix.smem_len * 8 /
+			(info->var.bits_per_pixel * info->var.xres_virtual);
+	if (info->var.yres_virtual < info->var.yres) {
+		dev_err(info->device, "virtual vertical size smaller than real\n");
 		goto err_find_mode;
 	}
 
@@ -1164,6 +1347,12 @@
 	fb_dealloc_cmap(&info->cmap);
 err_alloc_cmap:
 err_find_mode:
+#ifdef CONFIG_FB_S3_DDC
+	if (par->ddc_registered)
+		i2c_del_adapter(&par->ddc_adapter);
+	if (par->mmio)
+		iounmap(par->mmio);
+#endif
 	pci_iounmap(dev, info->screen_base);
 err_iomap:
 	pci_release_regions(dev);
@@ -1180,12 +1369,11 @@
 static void __devexit s3_pci_remove(struct pci_dev *dev)
 {
 	struct fb_info *info = pci_get_drvdata(dev);
+	struct s3fb_info __maybe_unused *par = info->par;
 
 	if (info) {
 
 #ifdef CONFIG_MTRR
-		struct s3fb_info *par = info->par;
-
 		if (par->mtrr_reg >= 0) {
 			mtrr_del(par->mtrr_reg, 0, 0);
 			par->mtrr_reg = -1;
@@ -1195,6 +1383,13 @@
 		unregister_framebuffer(info);
 		fb_dealloc_cmap(&info->cmap);
 
+#ifdef CONFIG_FB_S3_DDC
+		if (par->ddc_registered)
+			i2c_del_adapter(&par->ddc_adapter);
+		if (par->mmio)
+			iounmap(par->mmio);
+#endif
+
 		pci_iounmap(dev, info->screen_base);
 		pci_release_regions(dev);
 /*		pci_disable_device(dev); */
diff --git a/drivers/video/savage/savagefb-i2c.c b/drivers/video/savage/savagefb-i2c.c
index bb71fea..80fa87e 100644
--- a/drivers/video/savage/savagefb-i2c.c
+++ b/drivers/video/savage/savagefb-i2c.c
@@ -171,6 +171,8 @@
 
 	switch (par->chip) {
 	case S3_PROSAVAGE:
+	case S3_PROSAVAGEDDR:
+	case S3_TWISTER:
 		par->chan.reg         = CR_SERIAL2;
 		par->chan.ioaddr      = par->mmio.vbase;
 		par->chan.algo.setsda = prosavage_gpio_setsda;
diff --git a/drivers/video/savage/savagefb.h b/drivers/video/savage/savagefb.h
index 4e9490c..32549d1 100644
--- a/drivers/video/savage/savagefb.h
+++ b/drivers/video/savage/savagefb.h
@@ -36,7 +36,6 @@
 #define PCI_CHIP_SAVAGE_IX    0x8c13
 #define PCI_CHIP_PROSAVAGE_PM 0x8a25
 #define PCI_CHIP_PROSAVAGE_KM 0x8a26
- /* Twister is a code name; hope I get the real name soon. */
 #define PCI_CHIP_S3TWISTER_P  0x8d01
 #define PCI_CHIP_S3TWISTER_K  0x8d02
 #define PCI_CHIP_PROSAVAGE_DDR          0x8d03
@@ -52,14 +51,15 @@
 #define PCI_CHIP_SUPSAV_IXCDDR		0x8c2f
 
 
+#define S3_SAVAGE_SERIES(chip)    ((chip>=S3_SAVAGE3D) && (chip<=S3_SAVAGE2000))
 
 #define S3_SAVAGE3D_SERIES(chip)  ((chip>=S3_SAVAGE3D) && (chip<=S3_SAVAGE_MX))
 
-#define S3_SAVAGE4_SERIES(chip)   ((chip==S3_SAVAGE4) || (chip==S3_PROSAVAGE))
+#define S3_SAVAGE4_SERIES(chip)   ((chip>=S3_SAVAGE4) || (chip<=S3_PROSAVAGEDDR))
 
 #define S3_SAVAGE_MOBILE_SERIES(chip)  ((chip==S3_SAVAGE_MX) || (chip==S3_SUPERSAVAGE))
 
-#define S3_SAVAGE_SERIES(chip)    ((chip>=S3_SAVAGE3D) && (chip<=S3_SAVAGE2000))
+#define S3_MOBILE_TWISTER_SERIES(chip) ((chip==S3_TWISTER) || (chip==S3_PROSAVAGEDDR))
 
 /* Chip tags.  These are used to group the adapters into
  * related families.
@@ -71,6 +71,8 @@
   S3_SAVAGE_MX,
   S3_SAVAGE4,
   S3_PROSAVAGE,
+  S3_TWISTER,
+  S3_PROSAVAGEDDR,
   S3_SUPERSAVAGE,
   S3_SAVAGE2000,
   S3_LAST
diff --git a/drivers/video/savage/savagefb_driver.c b/drivers/video/savage/savagefb_driver.c
index a2dc1a7..3b7f2f5 100644
--- a/drivers/video/savage/savagefb_driver.c
+++ b/drivers/video/savage/savagefb_driver.c
@@ -328,7 +328,9 @@
 		savage_out32(0x48C18, savage_in32(0x48C18, par) | 0x0C, par);
 		break;
 	case S3_SAVAGE4:
+	case S3_TWISTER:
 	case S3_PROSAVAGE:
+	case S3_PROSAVAGEDDR:
 	case S3_SUPERSAVAGE:
 		/* Disable BCI */
 		savage_out32(0x48C18, savage_in32(0x48C18, par) & 0x3FF0, par);
@@ -1886,6 +1888,8 @@
 		break;
 
 	case S3_PROSAVAGE:
+	case S3_PROSAVAGEDDR:
+	case S3_TWISTER:
 		videoRam = RamSavageNB[(config1 & 0xE0) >> 5] * 1024;
 		break;
 
@@ -1963,7 +1967,8 @@
 		}
 	}
 
-	if (S3_SAVAGE_MOBILE_SERIES(par->chip) && !par->crtonly)
+	if ((S3_SAVAGE_MOBILE_SERIES(par->chip) ||
+	     S3_MOBILE_TWISTER_SERIES(par->chip)) && !par->crtonly)
 		par->display_type = DISP_LCD;
 	else if (dvi || (par->chip == S3_SAVAGE4 && par->dvi))
 		par->display_type = DISP_DFP;
@@ -2111,19 +2116,19 @@
 		snprintf(info->fix.id, 16, "ProSavageKM");
 		break;
 	case FB_ACCEL_S3TWISTER_P:
-		par->chip = S3_PROSAVAGE;
+		par->chip = S3_TWISTER;
 		snprintf(info->fix.id, 16, "TwisterP");
 		break;
 	case FB_ACCEL_S3TWISTER_K:
-		par->chip = S3_PROSAVAGE;
+		par->chip = S3_TWISTER;
 		snprintf(info->fix.id, 16, "TwisterK");
 		break;
 	case FB_ACCEL_PROSAVAGE_DDR:
-		par->chip = S3_PROSAVAGE;
+		par->chip = S3_PROSAVAGEDDR;
 		snprintf(info->fix.id, 16, "ProSavageDDR");
 		break;
 	case FB_ACCEL_PROSAVAGE_DDRK:
-		par->chip = S3_PROSAVAGE;
+		par->chip = S3_PROSAVAGEDDR;
 		snprintf(info->fix.id, 16, "ProSavage8");
 		break;
 	}
diff --git a/drivers/video/sh7760fb.c b/drivers/video/sh7760fb.c
index 8fe1958..45e47d8 100644
--- a/drivers/video/sh7760fb.c
+++ b/drivers/video/sh7760fb.c
@@ -551,8 +551,7 @@
 		free_irq(par->irq, &par->vsync);
 	iounmap(par->base);
 out_res:
-	release_resource(par->ioarea);
-	kfree(par->ioarea);
+	release_mem_region(res->start, resource_size(res));
 out_fb:
 	framebuffer_release(info);
 	return ret;
@@ -570,8 +569,7 @@
 	if (par->irq >= 0)
 		free_irq(par->irq, par);
 	iounmap(par->base);
-	release_resource(par->ioarea);
-	kfree(par->ioarea);
+	release_mem_region(par->ioarea->start, resource_size(par->ioarea));
 	framebuffer_release(info);
 	platform_set_drvdata(dev, NULL);
 
diff --git a/drivers/video/sh_mobile_hdmi.c b/drivers/video/sh_mobile_hdmi.c
index 2b9e56a..6ae40b6 100644
--- a/drivers/video/sh_mobile_hdmi.c
+++ b/drivers/video/sh_mobile_hdmi.c
@@ -1131,15 +1131,19 @@
 		pm_runtime_get_sync(hdmi->dev);
 
 		ret = sh_hdmi_read_edid(hdmi, &hdmi_rate, &parent_rate);
-		if (ret < 0)
+		if (ret < 0) {
+			pm_runtime_put(hdmi->dev);
 			goto out;
+		}
 
 		hdmi->hp_state = HDMI_HOTPLUG_EDID_DONE;
 
 		/* Reconfigure the clock */
 		ret = sh_hdmi_clk_configure(hdmi, hdmi_rate, parent_rate);
-		if (ret < 0)
+		if (ret < 0) {
+			pm_runtime_put(hdmi->dev);
 			goto out;
+		}
 
 		msleep(10);
 		sh_hdmi_configure(hdmi);
@@ -1336,6 +1340,7 @@
 ecodec:
 	free_irq(irq, hdmi);
 ereqirq:
+	pm_runtime_suspend(&pdev->dev);
 	pm_runtime_disable(&pdev->dev);
 	iounmap(hdmi->base);
 emap:
@@ -1372,6 +1377,7 @@
 	free_irq(irq, hdmi);
 	/* Wait for already scheduled work */
 	cancel_delayed_work_sync(&hdmi->edid_work);
+	pm_runtime_suspend(&pdev->dev);
 	pm_runtime_disable(&pdev->dev);
 	clk_disable(hdmi->hdmi_clk);
 	clk_put(hdmi->hdmi_clk);
diff --git a/drivers/video/sh_mobile_lcdcfb.c b/drivers/video/sh_mobile_lcdcfb.c
index 9bcc61b..404c03b 100644
--- a/drivers/video/sh_mobile_lcdcfb.c
+++ b/drivers/video/sh_mobile_lcdcfb.c
@@ -27,6 +27,7 @@
 #include <asm/atomic.h>
 
 #include "sh_mobile_lcdcfb.h"
+#include "sh_mobile_meram.h"
 
 #define SIDE_B_OFFSET 0x1000
 #define MIRROR_OFFSET 0x2000
@@ -143,6 +144,7 @@
 	unsigned long saved_shared_regs[NR_SHARED_REGS];
 	int started;
 	int forced_bpp; /* 2 channel LCDC must share bpp setting */
+	struct sh_mobile_meram_info *meram_dev;
 };
 
 static bool banked(int reg_nr)
@@ -469,7 +471,6 @@
 	int bpp = 0;
 	unsigned long ldddsr;
 	int k, m;
-	int ret = 0;
 
 	/* enable clocks before accessing the hardware */
 	for (k = 0; k < ARRAY_SIZE(priv->ch); k++) {
@@ -538,11 +539,12 @@
 		lcdc_write_chan(ch, LDPMR, 0);
 
 		board_cfg = &ch->cfg.board_cfg;
-		if (board_cfg->setup_sys)
-			ret = board_cfg->setup_sys(board_cfg->board_data, ch,
-						   &sh_mobile_lcdc_sys_bus_ops);
-		if (ret)
-			return ret;
+		if (board_cfg->setup_sys) {
+			int ret = board_cfg->setup_sys(board_cfg->board_data,
+						ch, &sh_mobile_lcdc_sys_bus_ops);
+			if (ret)
+				return ret;
+		}
 	}
 
 	/* word and long word swap */
@@ -564,6 +566,9 @@
 	}
 
 	for (k = 0; k < ARRAY_SIZE(priv->ch); k++) {
+		unsigned long base_addr_y;
+		unsigned long base_addr_c = 0;
+		int pitch;
 		ch = &priv->ch[k];
 
 		if (!priv->ch[k].enabled)
@@ -598,16 +603,68 @@
 		}
 		lcdc_write_chan(ch, LDDFR, tmp);
 
-		/* point out our frame buffer */
-		lcdc_write_chan(ch, LDSA1R, ch->info->fix.smem_start);
-		if (ch->info->var.nonstd)
-			lcdc_write_chan(ch, LDSA2R,
-				ch->info->fix.smem_start +
+		base_addr_y = ch->info->fix.smem_start;
+		base_addr_c = base_addr_y +
 				ch->info->var.xres *
-				ch->info->var.yres_virtual);
+				ch->info->var.yres_virtual;
+		pitch = ch->info->fix.line_length;
+
+		/* test if we can enable meram */
+		if (ch->cfg.meram_cfg && priv->meram_dev &&
+				priv->meram_dev->ops) {
+			struct sh_mobile_meram_cfg *cfg;
+			struct sh_mobile_meram_info *mdev;
+			unsigned long icb_addr_y, icb_addr_c;
+			int icb_pitch;
+			int pf;
+
+			cfg = ch->cfg.meram_cfg;
+			mdev = priv->meram_dev;
+			/* we need to de-init configured ICBs before we
+			 * we can re-initialize them.
+			 */
+			if (ch->meram_enabled)
+				mdev->ops->meram_unregister(mdev, cfg);
+
+			ch->meram_enabled = 0;
+
+			if (ch->info->var.nonstd) {
+				if (ch->info->var.bits_per_pixel == 24)
+					pf = SH_MOBILE_MERAM_PF_NV24;
+				else
+					pf = SH_MOBILE_MERAM_PF_NV;
+			} else {
+				pf = SH_MOBILE_MERAM_PF_RGB;
+			}
+
+			ret = mdev->ops->meram_register(mdev, cfg, pitch,
+						ch->info->var.yres,
+						pf,
+						base_addr_y,
+						base_addr_c,
+						&icb_addr_y,
+						&icb_addr_c,
+						&icb_pitch);
+			if (!ret)  {
+				/* set LDSA1R value */
+				base_addr_y = icb_addr_y;
+				pitch = icb_pitch;
+
+				/* set LDSA2R value if required */
+				if (base_addr_c)
+					base_addr_c = icb_addr_c;
+
+				ch->meram_enabled = 1;
+			}
+		}
+
+		/* point out our frame buffer */
+		lcdc_write_chan(ch, LDSA1R, base_addr_y);
+		if (ch->info->var.nonstd)
+			lcdc_write_chan(ch, LDSA2R, base_addr_c);
 
 		/* set line size */
-		lcdc_write_chan(ch, LDMLSR, ch->info->fix.line_length);
+		lcdc_write_chan(ch, LDMLSR, pitch);
 
 		/* setup deferred io if SYS bus */
 		tmp = ch->cfg.sys_bus_cfg.deferred_io_msec;
@@ -692,6 +749,17 @@
 			board_cfg->display_off(board_cfg->board_data);
 			module_put(board_cfg->owner);
 		}
+
+		/* disable the meram */
+		if (ch->meram_enabled) {
+			struct sh_mobile_meram_cfg *cfg;
+			struct sh_mobile_meram_info *mdev;
+			cfg = ch->cfg.meram_cfg;
+			mdev = priv->meram_dev;
+			mdev->ops->meram_unregister(mdev, cfg);
+			ch->meram_enabled = 0;
+		}
+
 	}
 
 	/* stop the lcdc */
@@ -875,9 +943,29 @@
 	} else
 		base_addr_c = 0;
 
-	lcdc_write_chan_mirror(ch, LDSA1R, base_addr_y);
-	if (base_addr_c)
-		lcdc_write_chan_mirror(ch, LDSA2R, base_addr_c);
+	if (!ch->meram_enabled) {
+		lcdc_write_chan_mirror(ch, LDSA1R, base_addr_y);
+		if (base_addr_c)
+			lcdc_write_chan_mirror(ch, LDSA2R, base_addr_c);
+	} else {
+		struct sh_mobile_meram_cfg *cfg;
+		struct sh_mobile_meram_info *mdev;
+		unsigned long icb_addr_y, icb_addr_c;
+		int ret;
+
+		cfg = ch->cfg.meram_cfg;
+		mdev = priv->meram_dev;
+		ret = mdev->ops->meram_update(mdev, cfg,
+					base_addr_y, base_addr_c,
+					&icb_addr_y, &icb_addr_c);
+		if (ret)
+			return ret;
+
+		lcdc_write_chan_mirror(ch, LDSA1R, icb_addr_y);
+		if (icb_addr_c)
+			lcdc_write_chan_mirror(ch, LDSA2R, icb_addr_c);
+
+	}
 
 	if (lcdc_chan_is_sublcd(ch))
 		lcdc_write(ch->lcdc, _LDRCNTR, ldrcntr ^ LDRCNTR_SRS);
@@ -1288,7 +1376,6 @@
 	struct fb_info *info = event->info;
 	struct sh_mobile_lcdc_chan *ch = info->par;
 	struct sh_mobile_lcdc_board_cfg	*board_cfg = &ch->cfg.board_cfg;
-	int ret;
 
 	if (&ch->lcdc->notifier != nb)
 		return NOTIFY_DONE;
@@ -1302,7 +1389,6 @@
 			board_cfg->display_off(board_cfg->board_data);
 			module_put(board_cfg->owner);
 		}
-		pm_runtime_put(info->device);
 		sh_mobile_lcdc_stop(ch->lcdc);
 		break;
 	case FB_EVENT_RESUME:
@@ -1316,9 +1402,7 @@
 			module_put(board_cfg->owner);
 		}
 
-		ret = sh_mobile_lcdc_start(ch->lcdc);
-		if (!ret)
-			pm_runtime_get_sync(info->device);
+		sh_mobile_lcdc_start(ch->lcdc);
 	}
 
 	return NOTIFY_OK;
@@ -1420,6 +1504,8 @@
 		goto err1;
 	}
 
+	priv->meram_dev = pdata->meram_dev;
+
 	for (i = 0; i < j; i++) {
 		struct fb_var_screeninfo *var;
 		const struct fb_videomode *lcd_cfg, *max_cfg = NULL;
diff --git a/drivers/video/sh_mobile_lcdcfb.h b/drivers/video/sh_mobile_lcdcfb.h
index f16cb56..aeed668 100644
--- a/drivers/video/sh_mobile_lcdcfb.h
+++ b/drivers/video/sh_mobile_lcdcfb.h
@@ -39,6 +39,7 @@
 	int use_count;
 	int blank_status;
 	struct mutex open_lock;		/* protects the use counter */
+	int meram_enabled;
 };
 
 #endif
diff --git a/drivers/video/sh_mobile_meram.c b/drivers/video/sh_mobile_meram.c
new file mode 100644
index 0000000..9170c82
--- /dev/null
+++ b/drivers/video/sh_mobile_meram.c
@@ -0,0 +1,567 @@
+/*
+ * SuperH Mobile MERAM Driver for SuperH Mobile LCDC Driver
+ *
+ * Copyright (c) 2011	Damian Hobson-Garcia <dhobsong@igel.co.jp>
+ *                      Takanari Hayama <taki@igel.co.jp>
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/io.h>
+#include <linux/slab.h>
+#include <linux/platform_device.h>
+
+#include "sh_mobile_meram.h"
+
+/* meram registers */
+#define MExxCTL 0x0
+#define MExxBSIZE 0x4
+#define MExxMNCF 0x8
+#define MExxSARA 0x10
+#define MExxSARB 0x14
+#define MExxSBSIZE 0x18
+
+#define MERAM_MExxCTL_VAL(ctl, next_icb, addr)	\
+	((ctl) | (((next_icb) & 0x1f) << 11) | (((addr) & 0x7ff) << 16))
+#define	MERAM_MExxBSIZE_VAL(a, b, c) \
+	(((a) << 28) | ((b) << 16) | (c))
+
+#define MEVCR1 0x4
+#define MEACTS 0x10
+#define MEQSEL1 0x40
+#define MEQSEL2 0x44
+
+/* settings */
+#define MERAM_SEC_LINE 15
+#define MERAM_LINE_WIDTH 2048
+
+/*
+ * MERAM/ICB access functions
+ */
+
+#define MERAM_ICB_OFFSET(base, idx, off)	\
+	((base) + (0x400 + ((idx) * 0x20) + (off)))
+
+static inline void meram_write_icb(void __iomem *base, int idx, int off,
+	unsigned long val)
+{
+	iowrite32(val, MERAM_ICB_OFFSET(base, idx, off));
+}
+
+static inline unsigned long meram_read_icb(void __iomem *base, int idx, int off)
+{
+	return ioread32(MERAM_ICB_OFFSET(base, idx, off));
+}
+
+static inline void meram_write_reg(void __iomem *base, int off,
+		unsigned long val)
+{
+	iowrite32(val, base + off);
+}
+
+static inline unsigned long meram_read_reg(void __iomem *base, int off)
+{
+	return ioread32(base + off);
+}
+
+/*
+ * register ICB
+ */
+
+#define MERAM_CACHE_START(p)	 ((p) >> 16)
+#define MERAM_CACHE_END(p)	 ((p) & 0xffff)
+#define MERAM_CACHE_SET(o, s)	 ((((o) & 0xffff) << 16) | \
+				  (((o) + (s) - 1) & 0xffff))
+
+/*
+ * check if there's no overlaps in MERAM allocation.
+ */
+
+static inline int meram_check_overlap(struct sh_mobile_meram_priv *priv,
+				      struct sh_mobile_meram_icb *new)
+{
+	int i;
+	int used_start, used_end, meram_start, meram_end;
+
+	/* valid ICB? */
+	if (new->marker_icb & ~0x1f || new->cache_icb & ~0x1f)
+		return 1;
+
+	if (test_bit(new->marker_icb, &priv->used_icb) ||
+			test_bit(new->cache_icb,  &priv->used_icb))
+		return  1;
+
+	for (i = 0; i < priv->used_meram_cache_regions; i++) {
+		used_start = MERAM_CACHE_START(priv->used_meram_cache[i]);
+		used_end   = MERAM_CACHE_END(priv->used_meram_cache[i]);
+		meram_start = new->meram_offset;
+		meram_end   = new->meram_offset + new->meram_size;
+
+		if ((meram_start >= used_start && meram_start < used_end) ||
+			(meram_end > used_start && meram_end < used_end))
+			return 1;
+	}
+
+	return 0;
+}
+
+/*
+ * mark the specified ICB as used
+ */
+
+static inline void meram_mark(struct sh_mobile_meram_priv *priv,
+			      struct sh_mobile_meram_icb *new)
+{
+	int n;
+
+	if (new->marker_icb < 0 || new->cache_icb < 0)
+		return;
+
+	__set_bit(new->marker_icb, &priv->used_icb);
+	__set_bit(new->cache_icb, &priv->used_icb);
+
+	n = priv->used_meram_cache_regions;
+
+	priv->used_meram_cache[n] = MERAM_CACHE_SET(new->meram_offset,
+						    new->meram_size);
+
+	priv->used_meram_cache_regions++;
+}
+
+/*
+ * unmark the specified ICB as used
+ */
+
+static inline void meram_unmark(struct sh_mobile_meram_priv *priv,
+				struct sh_mobile_meram_icb *icb)
+{
+	int i;
+	unsigned long pattern;
+
+	if (icb->marker_icb < 0 || icb->cache_icb < 0)
+		return;
+
+	__clear_bit(icb->marker_icb, &priv->used_icb);
+	__clear_bit(icb->cache_icb, &priv->used_icb);
+
+	pattern = MERAM_CACHE_SET(icb->meram_offset, icb->meram_size);
+	for (i = 0; i < priv->used_meram_cache_regions; i++) {
+		if (priv->used_meram_cache[i] == pattern) {
+			while (i < priv->used_meram_cache_regions - 1) {
+				priv->used_meram_cache[i] =
+					priv->used_meram_cache[i + 1] ;
+				i++;
+			}
+			priv->used_meram_cache[i] = 0;
+			priv->used_meram_cache_regions--;
+			break;
+		}
+	}
+}
+
+/*
+ * is this a YCbCr(NV12, NV16 or NV24) colorspace
+ */
+static inline int is_nvcolor(int cspace)
+{
+	if (cspace == SH_MOBILE_MERAM_PF_NV ||
+			cspace == SH_MOBILE_MERAM_PF_NV24)
+		return 1;
+	return 0;
+}
+
+/*
+ * set the next address to fetch
+ */
+static inline void meram_set_next_addr(struct sh_mobile_meram_priv *priv,
+				       struct sh_mobile_meram_cfg *cfg,
+				       unsigned long base_addr_y,
+				       unsigned long base_addr_c)
+{
+	unsigned long target;
+
+	target = (cfg->current_reg) ? MExxSARA : MExxSARB;
+	cfg->current_reg ^= 1;
+
+	/* set the next address to fetch */
+	meram_write_icb(priv->base, cfg->icb[0].cache_icb,  target,
+			base_addr_y);
+	meram_write_icb(priv->base, cfg->icb[0].marker_icb, target,
+			base_addr_y + cfg->icb[0].cache_unit);
+
+	if (is_nvcolor(cfg->pixelformat)) {
+		meram_write_icb(priv->base, cfg->icb[1].cache_icb,  target,
+				base_addr_c);
+		meram_write_icb(priv->base, cfg->icb[1].marker_icb, target,
+				base_addr_c + cfg->icb[1].cache_unit);
+	}
+}
+
+/*
+ * get the next ICB address
+ */
+static inline void meram_get_next_icb_addr(struct sh_mobile_meram_info *pdata,
+					   struct sh_mobile_meram_cfg *cfg,
+					   unsigned long *icb_addr_y,
+					   unsigned long *icb_addr_c)
+{
+	unsigned long icb_offset;
+
+	if (pdata->addr_mode == SH_MOBILE_MERAM_MODE0)
+		icb_offset = 0x80000000 | (cfg->current_reg << 29);
+	else
+		icb_offset = 0xc0000000 | (cfg->current_reg << 23);
+
+	*icb_addr_y = icb_offset | (cfg->icb[0].marker_icb << 24);
+	if ((*icb_addr_c) && is_nvcolor(cfg->pixelformat))
+		*icb_addr_c = icb_offset | (cfg->icb[1].marker_icb << 24);
+}
+
+#define MERAM_CALC_BYTECOUNT(x, y) \
+	(((x) * (y) + (MERAM_LINE_WIDTH - 1)) & ~(MERAM_LINE_WIDTH - 1))
+
+/*
+ * initialize MERAM
+ */
+
+static int meram_init(struct sh_mobile_meram_priv *priv,
+		      struct sh_mobile_meram_icb *icb,
+		      int xres, int yres, int *out_pitch)
+{
+	unsigned long total_byte_count = MERAM_CALC_BYTECOUNT(xres, yres);
+	unsigned long bnm;
+	int lcdc_pitch, xpitch, line_cnt;
+	int save_lines;
+
+	/* adjust pitch to 1024, 2048, 4096 or 8192 */
+	lcdc_pitch = (xres - 1) | 1023;
+	lcdc_pitch = lcdc_pitch | (lcdc_pitch >> 1);
+	lcdc_pitch = lcdc_pitch | (lcdc_pitch >> 2);
+	lcdc_pitch += 1;
+
+	/* derive settings */
+	if (lcdc_pitch == 8192 && yres >= 1024) {
+		lcdc_pitch = xpitch = MERAM_LINE_WIDTH;
+		line_cnt = total_byte_count >> 11;
+		*out_pitch = xres;
+		save_lines = (icb->meram_size / 16 / MERAM_SEC_LINE);
+		save_lines *= MERAM_SEC_LINE;
+	} else {
+		xpitch = xres;
+		line_cnt = yres;
+		*out_pitch = lcdc_pitch;
+		save_lines = icb->meram_size / (lcdc_pitch >> 10) / 2;
+		save_lines &= 0xff;
+	}
+	bnm = (save_lines - 1) << 16;
+
+	/* TODO: we better to check if we have enough MERAM buffer size */
+
+	/* set up ICB */
+	meram_write_icb(priv->base, icb->cache_icb,  MExxBSIZE,
+			MERAM_MExxBSIZE_VAL(0x0, line_cnt - 1, xpitch - 1));
+	meram_write_icb(priv->base, icb->marker_icb, MExxBSIZE,
+			MERAM_MExxBSIZE_VAL(0xf, line_cnt - 1, xpitch - 1));
+
+	meram_write_icb(priv->base, icb->cache_icb,  MExxMNCF, bnm);
+	meram_write_icb(priv->base, icb->marker_icb, MExxMNCF, bnm);
+
+	meram_write_icb(priv->base, icb->cache_icb,  MExxSBSIZE, xpitch);
+	meram_write_icb(priv->base, icb->marker_icb, MExxSBSIZE, xpitch);
+
+	/* save a cache unit size */
+	icb->cache_unit = xres * save_lines;
+
+	/*
+	 * Set MERAM for framebuffer
+	 *
+	 * 0x70f:  WD = 0x3, WS=0x1, CM=0x1, MD=FB mode
+	 * we also chain the cache_icb and the marker_icb.
+	 * we also split the allocated MERAM buffer between two ICBs.
+	 */
+	meram_write_icb(priv->base, icb->cache_icb, MExxCTL,
+			MERAM_MExxCTL_VAL(0x70f, icb->marker_icb,
+					  icb->meram_offset));
+	meram_write_icb(priv->base, icb->marker_icb, MExxCTL,
+			MERAM_MExxCTL_VAL(0x70f, icb->cache_icb,
+					  icb->meram_offset +
+					  icb->meram_size / 2));
+
+	return 0;
+}
+
+static void meram_deinit(struct sh_mobile_meram_priv *priv,
+			struct sh_mobile_meram_icb *icb)
+{
+	/* disable ICB */
+	meram_write_icb(priv->base, icb->cache_icb,  MExxCTL, 0);
+	meram_write_icb(priv->base, icb->marker_icb, MExxCTL, 0);
+	icb->cache_unit = 0;
+}
+
+/*
+ * register the ICB
+ */
+
+static int sh_mobile_meram_register(struct sh_mobile_meram_info *pdata,
+				    struct sh_mobile_meram_cfg *cfg,
+				    int xres, int yres, int pixelformat,
+				    unsigned long base_addr_y,
+				    unsigned long base_addr_c,
+				    unsigned long *icb_addr_y,
+				    unsigned long *icb_addr_c,
+				    int *pitch)
+{
+	struct platform_device *pdev;
+	struct sh_mobile_meram_priv *priv;
+	int n, out_pitch;
+	int error = 0;
+
+	if (!pdata || !pdata->priv || !pdata->pdev || !cfg)
+		return -EINVAL;
+
+	if (pixelformat != SH_MOBILE_MERAM_PF_NV &&
+	    pixelformat != SH_MOBILE_MERAM_PF_NV24 &&
+	    pixelformat != SH_MOBILE_MERAM_PF_RGB)
+		return -EINVAL;
+
+	priv = pdata->priv;
+	pdev = pdata->pdev;
+
+	dev_dbg(&pdev->dev, "registering %dx%d (%s) (y=%08lx, c=%08lx)",
+		xres, yres, (!pixelformat) ? "yuv" : "rgb",
+		base_addr_y, base_addr_c);
+
+	mutex_lock(&priv->lock);
+
+	/* we can't handle wider than 8192px */
+	if (xres > 8192) {
+		dev_err(&pdev->dev, "width exceeding the limit (> 8192).");
+		error = -EINVAL;
+		goto err;
+	}
+
+	if (priv->used_meram_cache_regions + 2 > SH_MOBILE_MERAM_ICB_NUM) {
+		dev_err(&pdev->dev, "no more ICB available.");
+		error = -EINVAL;
+		goto err;
+	}
+
+	/* do we have at least one ICB config? */
+	if (cfg->icb[0].marker_icb < 0 || cfg->icb[0].cache_icb < 0) {
+		dev_err(&pdev->dev, "at least one ICB is required.");
+		error = -EINVAL;
+		goto err;
+	}
+
+	/* make sure that there's no overlaps */
+	if (meram_check_overlap(priv, &cfg->icb[0])) {
+		dev_err(&pdev->dev, "conflicting config detected.");
+		error = -EINVAL;
+		goto err;
+	}
+	n = 1;
+
+	/* do the same if we have the second ICB set */
+	if (cfg->icb[1].marker_icb >= 0 && cfg->icb[1].cache_icb >= 0) {
+		if (meram_check_overlap(priv, &cfg->icb[1])) {
+			dev_err(&pdev->dev, "conflicting config detected.");
+			error = -EINVAL;
+			goto err;
+		}
+		n = 2;
+	}
+
+	if (is_nvcolor(pixelformat) && n != 2) {
+		dev_err(&pdev->dev, "requires two ICB sets for planar Y/C.");
+		error =  -EINVAL;
+		goto err;
+	}
+
+	/* we now register the ICB */
+	cfg->pixelformat = pixelformat;
+	meram_mark(priv, &cfg->icb[0]);
+	if (is_nvcolor(pixelformat))
+		meram_mark(priv, &cfg->icb[1]);
+
+	/* initialize MERAM */
+	meram_init(priv, &cfg->icb[0], xres, yres, &out_pitch);
+	*pitch = out_pitch;
+	if (pixelformat == SH_MOBILE_MERAM_PF_NV)
+		meram_init(priv, &cfg->icb[1], xres, (yres + 1) / 2,
+			&out_pitch);
+	else if (pixelformat == SH_MOBILE_MERAM_PF_NV24)
+		meram_init(priv, &cfg->icb[1], 2 * xres, (yres + 1) / 2,
+			&out_pitch);
+
+	cfg->current_reg = 1;
+	meram_set_next_addr(priv, cfg, base_addr_y, base_addr_c);
+	meram_get_next_icb_addr(pdata, cfg, icb_addr_y, icb_addr_c);
+
+	dev_dbg(&pdev->dev, "registered - can access via y=%08lx, c=%08lx",
+		*icb_addr_y, *icb_addr_c);
+
+err:
+	mutex_unlock(&priv->lock);
+	return error;
+}
+
+static int sh_mobile_meram_unregister(struct sh_mobile_meram_info *pdata,
+				      struct sh_mobile_meram_cfg *cfg)
+{
+	struct sh_mobile_meram_priv *priv;
+
+	if (!pdata || !pdata->priv || !cfg)
+		return -EINVAL;
+
+	priv = pdata->priv;
+
+	mutex_lock(&priv->lock);
+
+	/* deinit & unmark */
+	if (is_nvcolor(cfg->pixelformat)) {
+		meram_deinit(priv, &cfg->icb[1]);
+		meram_unmark(priv, &cfg->icb[1]);
+	}
+	meram_deinit(priv, &cfg->icb[0]);
+	meram_unmark(priv, &cfg->icb[0]);
+
+	mutex_unlock(&priv->lock);
+
+	return 0;
+}
+
+static int sh_mobile_meram_update(struct sh_mobile_meram_info *pdata,
+				  struct sh_mobile_meram_cfg *cfg,
+				  unsigned long base_addr_y,
+				  unsigned long base_addr_c,
+				  unsigned long *icb_addr_y,
+				  unsigned long *icb_addr_c)
+{
+	struct sh_mobile_meram_priv *priv;
+
+	if (!pdata || !pdata->priv || !cfg)
+		return -EINVAL;
+
+	priv = pdata->priv;
+
+	mutex_lock(&priv->lock);
+
+	meram_set_next_addr(priv, cfg, base_addr_y, base_addr_c);
+	meram_get_next_icb_addr(pdata, cfg, icb_addr_y, icb_addr_c);
+
+	mutex_unlock(&priv->lock);
+
+	return 0;
+}
+
+static struct sh_mobile_meram_ops sh_mobile_meram_ops = {
+	.module			= THIS_MODULE,
+	.meram_register		= sh_mobile_meram_register,
+	.meram_unregister	= sh_mobile_meram_unregister,
+	.meram_update		= sh_mobile_meram_update,
+};
+
+/*
+ * initialize MERAM
+ */
+
+static int sh_mobile_meram_remove(struct platform_device *pdev);
+
+static int __devinit sh_mobile_meram_probe(struct platform_device *pdev)
+{
+	struct sh_mobile_meram_priv *priv;
+	struct sh_mobile_meram_info *pdata = pdev->dev.platform_data;
+	struct resource *res;
+	int error;
+
+	if (!pdata) {
+		dev_err(&pdev->dev, "no platform data defined\n");
+		return -EINVAL;
+	}
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res) {
+		dev_err(&pdev->dev, "cannot get platform resources\n");
+		return -ENOENT;
+	}
+
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv) {
+		dev_err(&pdev->dev, "cannot allocate device data\n");
+		return -ENOMEM;
+	}
+
+	platform_set_drvdata(pdev, priv);
+
+	/* initialize private data */
+	mutex_init(&priv->lock);
+	priv->base = ioremap_nocache(res->start, resource_size(res));
+	if (!priv->base) {
+		dev_err(&pdev->dev, "ioremap failed\n");
+		error = -EFAULT;
+		goto err;
+	}
+	pdata->ops = &sh_mobile_meram_ops;
+	pdata->priv = priv;
+	pdata->pdev = pdev;
+
+	/* initialize ICB addressing mode */
+	if (pdata->addr_mode == SH_MOBILE_MERAM_MODE1)
+		meram_write_reg(priv->base, MEVCR1, 1 << 29);
+
+	dev_info(&pdev->dev, "sh_mobile_meram initialized.");
+
+	return 0;
+
+err:
+	sh_mobile_meram_remove(pdev);
+
+	return error;
+}
+
+
+static int sh_mobile_meram_remove(struct platform_device *pdev)
+{
+	struct sh_mobile_meram_priv *priv = platform_get_drvdata(pdev);
+
+	if (priv->base)
+		iounmap(priv->base);
+
+	mutex_destroy(&priv->lock);
+
+	kfree(priv);
+
+	return 0;
+}
+
+static struct platform_driver sh_mobile_meram_driver = {
+	.driver	= {
+		.name		= "sh_mobile_meram",
+		.owner		= THIS_MODULE,
+	},
+	.probe		= sh_mobile_meram_probe,
+	.remove		= sh_mobile_meram_remove,
+};
+
+static int __init sh_mobile_meram_init(void)
+{
+	return platform_driver_register(&sh_mobile_meram_driver);
+}
+
+static void __exit sh_mobile_meram_exit(void)
+{
+	platform_driver_unregister(&sh_mobile_meram_driver);
+}
+
+module_init(sh_mobile_meram_init);
+module_exit(sh_mobile_meram_exit);
+
+MODULE_DESCRIPTION("SuperH Mobile MERAM driver");
+MODULE_AUTHOR("Damian Hobson-Garcia / Takanari Hayama");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/video/sh_mobile_meram.h b/drivers/video/sh_mobile_meram.h
new file mode 100644
index 0000000..82c54fb
--- /dev/null
+++ b/drivers/video/sh_mobile_meram.h
@@ -0,0 +1,41 @@
+#ifndef __sh_mobile_meram_h__
+#define __sh_mobile_meram_h__
+
+#include <linux/mutex.h>
+#include <video/sh_mobile_meram.h>
+
+/*
+ * MERAM private
+ */
+
+#define MERAM_ICB_Y 0x1
+#define MERAM_ICB_C 0x2
+
+/* MERAM cache size */
+#define SH_MOBILE_MERAM_ICB_NUM		32
+
+#define SH_MOBILE_MERAM_CACHE_OFFSET(p)	((p) >> 16)
+#define SH_MOBILE_MERAM_CACHE_SIZE(p)	((p) & 0xffff)
+
+struct sh_mobile_meram_priv {
+	void __iomem	*base;
+	struct mutex	lock;
+	unsigned long	used_icb;
+	int		used_meram_cache_regions;
+	unsigned long	used_meram_cache[SH_MOBILE_MERAM_ICB_NUM];
+};
+
+int sh_mobile_meram_alloc_icb(const struct sh_mobile_meram_cfg *cfg,
+		   int xres,
+		   int yres,
+		   unsigned int base_addr,
+		   int yuv_mode,
+		   int *marker_icb,
+		   int *out_pitch);
+
+void sh_mobile_meram_free_icb(int marker_icb);
+
+#define SH_MOBILE_MERAM_START(ind, ab) \
+	(0xC0000000 | ((ab & 0x1) << 23) | ((ind & 0x1F) << 24))
+
+#endif /* !__sh_mobile_meram_h__ */
diff --git a/drivers/video/sm501fb.c b/drivers/video/sm501fb.c
index 56ef6b3..87f0be1 100644
--- a/drivers/video/sm501fb.c
+++ b/drivers/video/sm501fb.c
@@ -1625,22 +1625,22 @@
 	return 0; /* everything is setup */
 
  err_mem_res:
-	release_resource(info->fbmem_res);
-	kfree(info->fbmem_res);
+	release_mem_region(info->fbmem_res->start,
+			   resource_size(info->fbmem_res));
 
  err_regs2d_map:
 	iounmap(info->regs2d);
 
  err_regs2d_res:
-	release_resource(info->regs2d_res);
-	kfree(info->regs2d_res);
+	release_mem_region(info->regs2d_res->start,
+			   resource_size(info->regs2d_res));
 
  err_regs_map:
 	iounmap(info->regs);
 
  err_regs_res:
-	release_resource(info->regs_res);
-	kfree(info->regs_res);
+	release_mem_region(info->regs_res->start,
+			   resource_size(info->regs_res));
 
  err_release:
 	return ret;
@@ -1652,16 +1652,16 @@
 	sm501_unit_power(info->dev->parent, SM501_GATE_DISPLAY, 0);
 
 	iounmap(info->fbmem);
-	release_resource(info->fbmem_res);
-	kfree(info->fbmem_res);
+	release_mem_region(info->fbmem_res->start,
+			   resource_size(info->fbmem_res));
 
 	iounmap(info->regs2d);
-	release_resource(info->regs2d_res);
-	kfree(info->regs2d_res);
+	release_mem_region(info->regs2d_res->start,
+			   resource_size(info->regs2d_res));
 
 	iounmap(info->regs);
-	release_resource(info->regs_res);
-	kfree(info->regs_res);
+	release_mem_region(info->regs_res->start,
+			   resource_size(info->regs_res));
 }
 
 static int sm501fb_init_fb(struct fb_info *fb,
diff --git a/drivers/video/udlfb.c b/drivers/video/udlfb.c
index 695066b..52b0f3e 100644
--- a/drivers/video/udlfb.c
+++ b/drivers/video/udlfb.c
@@ -29,6 +29,7 @@
 #include <linux/slab.h>
 #include <linux/prefetch.h>
 #include <linux/delay.h>
+#include <linux/prefetch.h>
 #include <video/udlfb.h>
 #include "edid.h"
 
@@ -1587,10 +1588,19 @@
 		goto error;
 	}
 
-	for (i = 0; i < ARRAY_SIZE(fb_device_attrs); i++)
-		device_create_file(info->dev, &fb_device_attrs[i]);
+	for (i = 0; i < ARRAY_SIZE(fb_device_attrs); i++) {
+		retval = device_create_file(info->dev, &fb_device_attrs[i]);
+		if (retval) {
+			pr_err("device_create_file failed %d\n", retval);
+			goto err_del_attrs;
+		}
+	}
 
-	device_create_bin_file(info->dev, &edid_attr);
+	retval = device_create_bin_file(info->dev, &edid_attr);
+	if (retval) {
+		pr_err("device_create_bin_file failed %d\n", retval);
+		goto err_del_attrs;
+	}
 
 	pr_info("DisplayLink USB device /dev/fb%d attached. %dx%d resolution."
 			" Using %dK framebuffer memory\n", info->node,
@@ -1599,6 +1609,10 @@
 			info->fix.smem_len * 2 : info->fix.smem_len) >> 10);
 	return 0;
 
+err_del_attrs:
+	for (i -= 1; i >= 0; i--)
+		device_remove_file(info->dev, &fb_device_attrs[i]);
+
 error:
 	if (dev) {
 
diff --git a/fs/Kconfig b/fs/Kconfig
index f3aa9b0..979992d 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -121,9 +121,25 @@
 
 	  See <file:Documentation/filesystems/tmpfs.txt> for details.
 
+config TMPFS_XATTR
+	bool "Tmpfs extended attributes"
+	depends on TMPFS
+	default n
+	help
+	  Extended attributes are name:value pairs associated with inodes by
+	  the kernel or by users (see the attr(5) manual page, or visit
+	  <http://acl.bestbits.at/> for details).
+
+	  Currently this enables support for the trusted.* and
+	  security.* namespaces.
+
+	  If unsure, say N.
+
+	  You need this for POSIX ACL support on tmpfs.
+
 config TMPFS_POSIX_ACL
 	bool "Tmpfs POSIX Access Control Lists"
-	depends on TMPFS
+	depends on TMPFS_XATTR
 	select GENERIC_ACL
 	help
 	  POSIX Access Control Lists (ACLs) support permissions for users and
diff --git a/fs/block_dev.c b/fs/block_dev.c
index bf9c7a7..1f2b199 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1238,6 +1238,8 @@
 	res = __blkdev_get(bdev, mode, 0);
 
 	if (whole) {
+		struct gendisk *disk = whole->bd_disk;
+
 		/* finish claiming */
 		mutex_lock(&bdev->bd_mutex);
 		spin_lock(&bdev_lock);
@@ -1264,15 +1266,16 @@
 		spin_unlock(&bdev_lock);
 
 		/*
-		 * Block event polling for write claims.  Any write
-		 * holder makes the write_holder state stick until all
-		 * are released.  This is good enough and tracking
-		 * individual writeable reference is too fragile given
-		 * the way @mode is used in blkdev_get/put().
+		 * Block event polling for write claims if requested.  Any
+		 * write holder makes the write_holder state stick until
+		 * all are released.  This is good enough and tracking
+		 * individual writeable reference is too fragile given the
+		 * way @mode is used in blkdev_get/put().
 		 */
-		if (!res && (mode & FMODE_WRITE) && !bdev->bd_write_holder) {
+		if ((disk->flags & GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE) &&
+		    !res && (mode & FMODE_WRITE) && !bdev->bd_write_holder) {
 			bdev->bd_write_holder = true;
-			disk_block_events(bdev->bd_disk);
+			disk_block_events(disk);
 		}
 
 		mutex_unlock(&bdev->bd_mutex);
diff --git a/fs/dcache.c b/fs/dcache.c
index 18b2a1f..37f72ee 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1220,7 +1220,7 @@
 EXPORT_SYMBOL(shrink_dcache_parent);
 
 /*
- * Scan `nr' dentries and return the number which remain.
+ * Scan `sc->nr_slab_to_reclaim' dentries and return the number which remain.
  *
  * We need to avoid reentering the filesystem if the caller is performing a
  * GFP_NOFS allocation attempt.  One example deadlock is:
@@ -1231,8 +1231,12 @@
  *
  * In this case we return -1 to tell the caller that we baled.
  */
-static int shrink_dcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
+static int shrink_dcache_memory(struct shrinker *shrink,
+				struct shrink_control *sc)
 {
+	int nr = sc->nr_to_scan;
+	gfp_t gfp_mask = sc->gfp_mask;
+
 	if (nr) {
 		if (!(gfp_mask & __GFP_FS))
 			return -1;
diff --git a/fs/drop_caches.c b/fs/drop_caches.c
index 98b77c8..c00e055 100644
--- a/fs/drop_caches.c
+++ b/fs/drop_caches.c
@@ -40,9 +40,12 @@
 static void drop_slab(void)
 {
 	int nr_objects;
+	struct shrink_control shrink = {
+		.gfp_mask = GFP_KERNEL,
+	};
 
 	do {
-		nr_objects = shrink_slab(1000, GFP_KERNEL, 1000);
+		nr_objects = shrink_slab(&shrink, 1000, 1000);
 	} while (nr_objects > 10);
 }
 
diff --git a/fs/exec.c b/fs/exec.c
index c1cf372..936f577 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -200,7 +200,7 @@
 
 #ifdef CONFIG_STACK_GROWSUP
 	if (write) {
-		ret = expand_stack_downwards(bprm->vma, pos);
+		ret = expand_downwards(bprm->vma, pos);
 		if (ret < 0)
 			return NULL;
 	}
@@ -600,7 +600,7 @@
 	unsigned long length = old_end - old_start;
 	unsigned long new_start = old_start - shift;
 	unsigned long new_end = old_end - shift;
-	struct mmu_gather *tlb;
+	struct mmu_gather tlb;
 
 	BUG_ON(new_start > new_end);
 
@@ -626,12 +626,12 @@
 		return -ENOMEM;
 
 	lru_add_drain();
-	tlb = tlb_gather_mmu(mm, 0);
+	tlb_gather_mmu(&tlb, mm, 0);
 	if (new_end > old_start) {
 		/*
 		 * when the old and new regions overlap clear from new_end.
 		 */
-		free_pgd_range(tlb, new_end, old_end, new_end,
+		free_pgd_range(&tlb, new_end, old_end, new_end,
 			vma->vm_next ? vma->vm_next->vm_start : 0);
 	} else {
 		/*
@@ -640,10 +640,10 @@
 		 * have constraints on va-space that make this illegal (IA64) -
 		 * for the others its just a little faster.
 		 */
-		free_pgd_range(tlb, old_start, old_end, new_end,
+		free_pgd_range(&tlb, old_start, old_end, new_end,
 			vma->vm_next ? vma->vm_next->vm_start : 0);
 	}
-	tlb_finish_mmu(tlb, new_end, old_end);
+	tlb_finish_mmu(&tlb, new_end, old_end);
 
 	/*
 	 * Shrink the vma to just the new range.  Always succeeds.
diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c
index 48a18f1..30afdfa 100644
--- a/fs/fscache/operation.c
+++ b/fs/fscache/operation.c
@@ -33,8 +33,6 @@
 	_enter("{OBJ%x OP%x,%u}",
 	       op->object->debug_id, op->debug_id, atomic_read(&op->usage));
 
-	fscache_set_op_state(op, "EnQ");
-
 	ASSERT(list_empty(&op->pend_link));
 	ASSERT(op->processor != NULL);
 	ASSERTCMP(op->object->state, >=, FSCACHE_OBJECT_AVAILABLE);
@@ -66,8 +64,6 @@
 static void fscache_run_op(struct fscache_object *object,
 			   struct fscache_operation *op)
 {
-	fscache_set_op_state(op, "Run");
-
 	object->n_in_progress++;
 	if (test_and_clear_bit(FSCACHE_OP_WAITING, &op->flags))
 		wake_up_bit(&op->flags, FSCACHE_OP_WAITING);
@@ -88,8 +84,6 @@
 
 	_enter("{OBJ%x OP%x},", object->debug_id, op->debug_id);
 
-	fscache_set_op_state(op, "SubmitX");
-
 	spin_lock(&object->lock);
 	ASSERTCMP(object->n_ops, >=, object->n_in_progress);
 	ASSERTCMP(object->n_ops, >=, object->n_exclusive);
@@ -194,8 +188,6 @@
 
 	ASSERTCMP(atomic_read(&op->usage), >, 0);
 
-	fscache_set_op_state(op, "Submit");
-
 	spin_lock(&object->lock);
 	ASSERTCMP(object->n_ops, >=, object->n_in_progress);
 	ASSERTCMP(object->n_ops, >=, object->n_exclusive);
@@ -335,8 +327,6 @@
 	if (!atomic_dec_and_test(&op->usage))
 		return;
 
-	fscache_set_op_state(op, "Put");
-
 	_debug("PUT OP");
 	if (test_and_set_bit(FSCACHE_OP_DEAD, &op->flags))
 		BUG();
diff --git a/fs/fscache/page.c b/fs/fscache/page.c
index 41c441c..a2a5d19 100644
--- a/fs/fscache/page.c
+++ b/fs/fscache/page.c
@@ -155,11 +155,9 @@
 	fscache_stat(&fscache_n_attr_changed_calls);
 
 	if (fscache_object_is_active(object)) {
-		fscache_set_op_state(op, "CallFS");
 		fscache_stat(&fscache_n_cop_attr_changed);
 		ret = object->cache->ops->attr_changed(object);
 		fscache_stat_d(&fscache_n_cop_attr_changed);
-		fscache_set_op_state(op, "Done");
 		if (ret < 0)
 			fscache_abort_object(object);
 	}
@@ -190,7 +188,6 @@
 
 	fscache_operation_init(op, fscache_attr_changed_op, NULL);
 	op->flags = FSCACHE_OP_ASYNC | (1 << FSCACHE_OP_EXCLUSIVE);
-	fscache_set_op_name(op, "Attr");
 
 	spin_lock(&cookie->lock);
 
@@ -257,7 +254,6 @@
 	op->context	= context;
 	op->start_time	= jiffies;
 	INIT_LIST_HEAD(&op->to_do);
-	fscache_set_op_name(&op->op, "Retr");
 	return op;
 }
 
@@ -368,7 +364,6 @@
 		_leave(" = -ENOMEM");
 		return -ENOMEM;
 	}
-	fscache_set_op_name(&op->op, "RetrRA1");
 
 	spin_lock(&cookie->lock);
 
@@ -487,7 +482,6 @@
 	op = fscache_alloc_retrieval(mapping, end_io_func, context);
 	if (!op)
 		return -ENOMEM;
-	fscache_set_op_name(&op->op, "RetrRAN");
 
 	spin_lock(&cookie->lock);
 
@@ -589,7 +583,6 @@
 	op = fscache_alloc_retrieval(page->mapping, NULL, NULL);
 	if (!op)
 		return -ENOMEM;
-	fscache_set_op_name(&op->op, "RetrAL1");
 
 	spin_lock(&cookie->lock);
 
@@ -662,8 +655,6 @@
 
 	_enter("{OP%x,%d}", op->op.debug_id, atomic_read(&op->op.usage));
 
-	fscache_set_op_state(&op->op, "GetPage");
-
 	spin_lock(&object->lock);
 	cookie = object->cookie;
 
@@ -698,15 +689,12 @@
 	spin_unlock(&cookie->stores_lock);
 	spin_unlock(&object->lock);
 
-	fscache_set_op_state(&op->op, "Store");
 	fscache_stat(&fscache_n_store_pages);
 	fscache_stat(&fscache_n_cop_write_page);
 	ret = object->cache->ops->write_page(op, page);
 	fscache_stat_d(&fscache_n_cop_write_page);
-	fscache_set_op_state(&op->op, "EndWrite");
 	fscache_end_page_write(object, page);
 	if (ret < 0) {
-		fscache_set_op_state(&op->op, "Abort");
 		fscache_abort_object(object);
 	} else {
 		fscache_enqueue_operation(&op->op);
@@ -778,7 +766,6 @@
 	fscache_operation_init(&op->op, fscache_write_op,
 			       fscache_release_write_op);
 	op->op.flags = FSCACHE_OP_ASYNC | (1 << FSCACHE_OP_WAITING);
-	fscache_set_op_name(&op->op, "Write1");
 
 	ret = radix_tree_preload(gfp & ~__GFP_HIGHMEM);
 	if (ret < 0)
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index a2a6abb..2792a79 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1346,11 +1346,14 @@
 }
 
 
-static int gfs2_shrink_glock_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
+static int gfs2_shrink_glock_memory(struct shrinker *shrink,
+				    struct shrink_control *sc)
 {
 	struct gfs2_glock *gl;
 	int may_demote;
 	int nr_skipped = 0;
+	int nr = sc->nr_to_scan;
+	gfp_t gfp_mask = sc->gfp_mask;
 	LIST_HEAD(skipped);
 
 	if (nr == 0)
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index e23d986..42e8d23 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -38,6 +38,7 @@
 
 #include <linux/sched.h>
 #include <linux/slab.h>
+#include <linux/mm.h>
 #include <linux/spinlock.h>
 #include <linux/completion.h>
 #include <linux/buffer_head.h>
@@ -77,19 +78,20 @@
 static atomic_t qd_lru_count = ATOMIC_INIT(0);
 static DEFINE_SPINLOCK(qd_lru_lock);
 
-int gfs2_shrink_qd_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
+int gfs2_shrink_qd_memory(struct shrinker *shrink, struct shrink_control *sc)
 {
 	struct gfs2_quota_data *qd;
 	struct gfs2_sbd *sdp;
+	int nr_to_scan = sc->nr_to_scan;
 
-	if (nr == 0)
+	if (nr_to_scan == 0)
 		goto out;
 
-	if (!(gfp_mask & __GFP_FS))
+	if (!(sc->gfp_mask & __GFP_FS))
 		return -1;
 
 	spin_lock(&qd_lru_lock);
-	while (nr && !list_empty(&qd_lru_list)) {
+	while (nr_to_scan && !list_empty(&qd_lru_list)) {
 		qd = list_entry(qd_lru_list.next,
 				struct gfs2_quota_data, qd_reclaim);
 		sdp = qd->qd_gl->gl_sbd;
@@ -110,7 +112,7 @@
 		spin_unlock(&qd_lru_lock);
 		kmem_cache_free(gfs2_quotad_cachep, qd);
 		spin_lock(&qd_lru_lock);
-		nr--;
+		nr_to_scan--;
 	}
 	spin_unlock(&qd_lru_lock);
 
diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h
index e7d236c..90bf1c3 100644
--- a/fs/gfs2/quota.h
+++ b/fs/gfs2/quota.h
@@ -12,6 +12,7 @@
 
 struct gfs2_inode;
 struct gfs2_sbd;
+struct shrink_control;
 
 #define NO_QUOTA_CHANGE ((u32)-1)
 
@@ -51,7 +52,8 @@
 	return ret;
 }
 
-extern int gfs2_shrink_qd_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask);
+extern int gfs2_shrink_qd_memory(struct shrinker *shrink,
+				 struct shrink_control *sc);
 extern const struct quotactl_ops gfs2_quotactl_ops;
 
 #endif /* __QUOTA_DOT_H__ */
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index b9eeb1c..e7a0357 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -412,10 +412,10 @@
 	pgoff = offset >> PAGE_SHIFT;
 
 	i_size_write(inode, offset);
-	spin_lock(&mapping->i_mmap_lock);
+	mutex_lock(&mapping->i_mmap_mutex);
 	if (!prio_tree_empty(&mapping->i_mmap))
 		hugetlb_vmtruncate_list(&mapping->i_mmap, pgoff);
-	spin_unlock(&mapping->i_mmap_lock);
+	mutex_unlock(&mapping->i_mmap_mutex);
 	truncate_hugepages(inode, offset);
 	return 0;
 }
diff --git a/fs/inode.c b/fs/inode.c
index 05f4fa5..990d284 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -326,12 +326,11 @@
 	memset(mapping, 0, sizeof(*mapping));
 	INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC);
 	spin_lock_init(&mapping->tree_lock);
-	spin_lock_init(&mapping->i_mmap_lock);
+	mutex_init(&mapping->i_mmap_mutex);
 	INIT_LIST_HEAD(&mapping->private_list);
 	spin_lock_init(&mapping->private_lock);
 	INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap);
 	INIT_LIST_HEAD(&mapping->i_mmap_nonlinear);
-	mutex_init(&mapping->unmap_mutex);
 }
 EXPORT_SYMBOL(address_space_init_once);
 
@@ -752,8 +751,12 @@
  * This function is passed the number of inodes to scan, and it returns the
  * total number of remaining possibly-reclaimable inodes.
  */
-static int shrink_icache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
+static int shrink_icache_memory(struct shrinker *shrink,
+				struct shrink_control *sc)
 {
+	int nr = sc->nr_to_scan;
+	gfp_t gfp_mask = sc->gfp_mask;
+
 	if (nr) {
 		/*
 		 * Nasty deadlock avoidance.  We may hold various FS locks,
diff --git a/fs/mbcache.c b/fs/mbcache.c
index 2f174be..8c32ef3 100644
--- a/fs/mbcache.c
+++ b/fs/mbcache.c
@@ -90,7 +90,8 @@
  * What the mbcache registers as to get shrunk dynamically.
  */
 
-static int mb_cache_shrink_fn(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask);
+static int mb_cache_shrink_fn(struct shrinker *shrink,
+			      struct shrink_control *sc);
 
 static struct shrinker mb_cache_shrinker = {
 	.shrink = mb_cache_shrink_fn,
@@ -156,18 +157,19 @@
  * gets low.
  *
  * @shrink: (ignored)
- * @nr_to_scan: Number of objects to scan
- * @gfp_mask: (ignored)
+ * @sc: shrink_control passed from reclaim
  *
  * Returns the number of objects which are present in the cache.
  */
 static int
-mb_cache_shrink_fn(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
+mb_cache_shrink_fn(struct shrinker *shrink, struct shrink_control *sc)
 {
 	LIST_HEAD(free_list);
 	struct mb_cache *cache;
 	struct mb_cache_entry *entry, *tmp;
 	int count = 0;
+	int nr_to_scan = sc->nr_to_scan;
+	gfp_t gfp_mask = sc->gfp_mask;
 
 	mb_debug("trying to free %d entries", nr_to_scan);
 	spin_lock(&mb_cache_spinlock);
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 0250e4c..202f370 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -461,7 +461,7 @@
 #endif
 	struct ncp_entry_info finfo;
 
-	data.wdog_pid = NULL;
+	memset(&data, 0, sizeof(data));
 	server = kzalloc(sizeof(struct ncp_server), GFP_KERNEL);
 	if (!server)
 		return -ENOMEM;
@@ -496,7 +496,6 @@
 				struct ncp_mount_data_v4* md = (struct ncp_mount_data_v4*)raw_data;
 
 				data.flags = md->flags;
-				data.int_flags = 0;
 				data.mounted_uid = md->mounted_uid;
 				data.wdog_pid = find_get_pid(md->wdog_pid);
 				data.ncp_fd = md->ncp_fd;
@@ -507,7 +506,6 @@
 				data.file_mode = md->file_mode;
 				data.dir_mode = md->dir_mode;
 				data.info_fd = -1;
-				data.mounted_vol[0] = 0;
 			}
 			break;
 		default:
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 7237672..424e477 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -2042,11 +2042,14 @@
 	}
 }
 
-int nfs_access_cache_shrinker(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
+int nfs_access_cache_shrinker(struct shrinker *shrink,
+			      struct shrink_control *sc)
 {
 	LIST_HEAD(head);
 	struct nfs_inode *nfsi, *next;
 	struct nfs_access_entry *cache;
+	int nr_to_scan = sc->nr_to_scan;
+	gfp_t gfp_mask = sc->gfp_mask;
 
 	if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL)
 		return (nr_to_scan == 0) ? 0 : -1;
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index ce118ce..2df6ca7 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -234,7 +234,7 @@
 
 /* dir.c */
 extern int nfs_access_cache_shrinker(struct shrinker *shrink,
-					int nr_to_scan, gfp_t gfp_mask);
+					struct shrink_control *sc);
 
 /* inode.c */
 extern struct workqueue_struct *nfsiod_workqueue;
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index d545e97..8ed4d343 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -255,7 +255,11 @@
 				   struct device_attribute *attr, char *buf)
 {
 	struct hd_struct *p = dev_to_part(dev);
-	return sprintf(buf, "%u\n", p->discard_alignment);
+	struct gendisk *disk = dev_to_disk(dev);
+
+	return sprintf(buf, "%u\n",
+			queue_limit_discard_alignment(&disk->queue->limits,
+							p->start_sect));
 }
 
 ssize_t part_stat_show(struct device *dev,
@@ -449,8 +453,6 @@
 	p->start_sect = start;
 	p->alignment_offset =
 		queue_limit_alignment_offset(&disk->queue->limits, start);
-	p->discard_alignment =
-		queue_limit_discard_alignment(&disk->queue->limits, start);
 	p->nr_sects = len;
 	p->partno = partno;
 	p->policy = get_disk_ro(disk);
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index c03e8d3..3763b43 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -61,6 +61,14 @@
 extern const struct file_operations proc_net_operations;
 extern const struct inode_operations proc_net_inode_operations;
 
+struct proc_maps_private {
+	struct pid *pid;
+	struct task_struct *task;
+#ifdef CONFIG_MMU
+	struct vm_area_struct *tail_vma;
+#endif
+};
+
 void proc_init_inodecache(void);
 
 static inline struct pid *proc_pid(struct inode *inode)
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 318d865..2c9db29 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -858,7 +858,192 @@
 #endif /* CONFIG_PROC_PAGE_MONITOR */
 
 #ifdef CONFIG_NUMA
-extern int show_numa_map(struct seq_file *m, void *v);
+
+struct numa_maps {
+	struct vm_area_struct *vma;
+	unsigned long pages;
+	unsigned long anon;
+	unsigned long active;
+	unsigned long writeback;
+	unsigned long mapcount_max;
+	unsigned long dirty;
+	unsigned long swapcache;
+	unsigned long node[MAX_NUMNODES];
+};
+
+struct numa_maps_private {
+	struct proc_maps_private proc_maps;
+	struct numa_maps md;
+};
+
+static void gather_stats(struct page *page, struct numa_maps *md, int pte_dirty)
+{
+	int count = page_mapcount(page);
+
+	md->pages++;
+	if (pte_dirty || PageDirty(page))
+		md->dirty++;
+
+	if (PageSwapCache(page))
+		md->swapcache++;
+
+	if (PageActive(page) || PageUnevictable(page))
+		md->active++;
+
+	if (PageWriteback(page))
+		md->writeback++;
+
+	if (PageAnon(page))
+		md->anon++;
+
+	if (count > md->mapcount_max)
+		md->mapcount_max = count;
+
+	md->node[page_to_nid(page)]++;
+}
+
+static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
+		unsigned long end, struct mm_walk *walk)
+{
+	struct numa_maps *md;
+	spinlock_t *ptl;
+	pte_t *orig_pte;
+	pte_t *pte;
+
+	md = walk->private;
+	orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
+	do {
+		struct page *page;
+		int nid;
+
+		if (!pte_present(*pte))
+			continue;
+
+		page = vm_normal_page(md->vma, addr, *pte);
+		if (!page)
+			continue;
+
+		if (PageReserved(page))
+			continue;
+
+		nid = page_to_nid(page);
+		if (!node_isset(nid, node_states[N_HIGH_MEMORY]))
+			continue;
+
+		gather_stats(page, md, pte_dirty(*pte));
+
+	} while (pte++, addr += PAGE_SIZE, addr != end);
+	pte_unmap_unlock(orig_pte, ptl);
+	return 0;
+}
+#ifdef CONFIG_HUGETLB_PAGE
+static int gather_hugetbl_stats(pte_t *pte, unsigned long hmask,
+		unsigned long addr, unsigned long end, struct mm_walk *walk)
+{
+	struct numa_maps *md;
+	struct page *page;
+
+	if (pte_none(*pte))
+		return 0;
+
+	page = pte_page(*pte);
+	if (!page)
+		return 0;
+
+	md = walk->private;
+	gather_stats(page, md, pte_dirty(*pte));
+	return 0;
+}
+
+#else
+static int gather_hugetbl_stats(pte_t *pte, unsigned long hmask,
+		unsigned long addr, unsigned long end, struct mm_walk *walk)
+{
+	return 0;
+}
+#endif
+
+/*
+ * Display pages allocated per node and memory policy via /proc.
+ */
+static int show_numa_map(struct seq_file *m, void *v)
+{
+	struct numa_maps_private *numa_priv = m->private;
+	struct proc_maps_private *proc_priv = &numa_priv->proc_maps;
+	struct vm_area_struct *vma = v;
+	struct numa_maps *md = &numa_priv->md;
+	struct file *file = vma->vm_file;
+	struct mm_struct *mm = vma->vm_mm;
+	struct mm_walk walk = {};
+	struct mempolicy *pol;
+	int n;
+	char buffer[50];
+
+	if (!mm)
+		return 0;
+
+	/* Ensure we start with an empty set of numa_maps statistics. */
+	memset(md, 0, sizeof(*md));
+
+	md->vma = vma;
+
+	walk.hugetlb_entry = gather_hugetbl_stats;
+	walk.pmd_entry = gather_pte_stats;
+	walk.private = md;
+	walk.mm = mm;
+
+	pol = get_vma_policy(proc_priv->task, vma, vma->vm_start);
+	mpol_to_str(buffer, sizeof(buffer), pol, 0);
+	mpol_cond_put(pol);
+
+	seq_printf(m, "%08lx %s", vma->vm_start, buffer);
+
+	if (file) {
+		seq_printf(m, " file=");
+		seq_path(m, &file->f_path, "\n\t= ");
+	} else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) {
+		seq_printf(m, " heap");
+	} else if (vma->vm_start <= mm->start_stack &&
+			vma->vm_end >= mm->start_stack) {
+		seq_printf(m, " stack");
+	}
+
+	walk_page_range(vma->vm_start, vma->vm_end, &walk);
+
+	if (!md->pages)
+		goto out;
+
+	if (md->anon)
+		seq_printf(m, " anon=%lu", md->anon);
+
+	if (md->dirty)
+		seq_printf(m, " dirty=%lu", md->dirty);
+
+	if (md->pages != md->anon && md->pages != md->dirty)
+		seq_printf(m, " mapped=%lu", md->pages);
+
+	if (md->mapcount_max > 1)
+		seq_printf(m, " mapmax=%lu", md->mapcount_max);
+
+	if (md->swapcache)
+		seq_printf(m, " swapcache=%lu", md->swapcache);
+
+	if (md->active < md->pages && !is_vm_hugetlb_page(vma))
+		seq_printf(m, " active=%lu", md->active);
+
+	if (md->writeback)
+		seq_printf(m, " writeback=%lu", md->writeback);
+
+	for_each_node_state(n, N_HIGH_MEMORY)
+		if (md->node[n])
+			seq_printf(m, " N%d=%lu", n, md->node[n]);
+out:
+	seq_putc(m, '\n');
+
+	if (m->count < m->size)
+		m->version = (vma != proc_priv->tail_vma) ? vma->vm_start : 0;
+	return 0;
+}
 
 static const struct seq_operations proc_pid_numa_maps_op = {
         .start  = m_start,
@@ -869,7 +1054,20 @@
 
 static int numa_maps_open(struct inode *inode, struct file *file)
 {
-	return do_maps_open(inode, file, &proc_pid_numa_maps_op);
+	struct numa_maps_private *priv;
+	int ret = -ENOMEM;
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (priv) {
+		priv->proc_maps.pid = proc_pid(inode);
+		ret = seq_open(file, &proc_pid_numa_maps_op);
+		if (!ret) {
+			struct seq_file *m = file->private_data;
+			m->private = priv;
+		} else {
+			kfree(priv);
+		}
+	}
+	return ret;
 }
 
 const struct file_operations proc_numa_maps_operations = {
@@ -878,4 +1076,4 @@
 	.llseek		= seq_lseek,
 	.release	= seq_release_private,
 };
-#endif
+#endif /* CONFIG_NUMA */
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index d3c032f..5b572c8 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -691,8 +691,11 @@
  * This is called from kswapd when we think we need some
  * more memory
  */
-static int shrink_dqcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
+static int shrink_dqcache_memory(struct shrinker *shrink,
+				 struct shrink_control *sc)
 {
+	int nr = sc->nr_to_scan;
+
 	if (nr) {
 		spin_lock(&dq_list_lock);
 		prune_dqcache(nr);
diff --git a/fs/splice.c b/fs/splice.c
index 50a5d978..aa866d3 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -162,6 +162,14 @@
 	.get = generic_pipe_buf_get,
 };
 
+static void wakeup_pipe_readers(struct pipe_inode_info *pipe)
+{
+	smp_mb();
+	if (waitqueue_active(&pipe->wait))
+		wake_up_interruptible(&pipe->wait);
+	kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
+}
+
 /**
  * splice_to_pipe - fill passed data into a pipe
  * @pipe:	pipe to fill
@@ -247,12 +255,8 @@
 
 	pipe_unlock(pipe);
 
-	if (do_wakeup) {
-		smp_mb();
-		if (waitqueue_active(&pipe->wait))
-			wake_up_interruptible(&pipe->wait);
-		kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
-	}
+	if (do_wakeup)
+		wakeup_pipe_readers(pipe);
 
 	while (page_nr < spd_pages)
 		spd->spd_release(spd, page_nr++);
@@ -1892,12 +1896,9 @@
 	/*
 	 * If we put data in the output pipe, wakeup any potential readers.
 	 */
-	if (ret > 0) {
-		smp_mb();
-		if (waitqueue_active(&opipe->wait))
-			wake_up_interruptible(&opipe->wait);
-		kill_fasync(&opipe->fasync_readers, SIGIO, POLL_IN);
-	}
+	if (ret > 0)
+		wakeup_pipe_readers(opipe);
+
 	if (input_wakeup)
 		wakeup_pipe_writers(ipipe);
 
@@ -1976,12 +1977,8 @@
 	/*
 	 * If we put data in the output pipe, wakeup any potential readers.
 	 */
-	if (ret > 0) {
-		smp_mb();
-		if (waitqueue_active(&opipe->wait))
-			wake_up_interruptible(&opipe->wait);
-		kill_fasync(&opipe->fasync_readers, SIGIO, POLL_IN);
-	}
+	if (ret > 0)
+		wakeup_pipe_readers(opipe);
 
 	return ret;
 }
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 52b2b5d..5e68099 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -1422,12 +1422,12 @@
 int
 xfs_buftarg_shrink(
 	struct shrinker		*shrink,
-	int			nr_to_scan,
-	gfp_t			mask)
+	struct shrink_control	*sc)
 {
 	struct xfs_buftarg	*btp = container_of(shrink,
 					struct xfs_buftarg, bt_shrinker);
 	struct xfs_buf		*bp;
+	int nr_to_scan = sc->nr_to_scan;
 	LIST_HEAD(dispose);
 
 	if (!nr_to_scan)
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index cb1bb20..8ecad5f 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -1032,13 +1032,14 @@
 static int
 xfs_reclaim_inode_shrink(
 	struct shrinker	*shrink,
-	int		nr_to_scan,
-	gfp_t		gfp_mask)
+	struct shrink_control *sc)
 {
 	struct xfs_mount *mp;
 	struct xfs_perag *pag;
 	xfs_agnumber_t	ag;
 	int		reclaimable;
+	int nr_to_scan = sc->nr_to_scan;
+	gfp_t gfp_mask = sc->gfp_mask;
 
 	mp = container_of(shrink, struct xfs_mount, m_inode_shrink);
 	if (nr_to_scan) {
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index 69228aa..b94dace 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -60,7 +60,7 @@
 
 STATIC int	xfs_qm_init_quotainos(xfs_mount_t *);
 STATIC int	xfs_qm_init_quotainfo(xfs_mount_t *);
-STATIC int	xfs_qm_shake(struct shrinker *, int, gfp_t);
+STATIC int	xfs_qm_shake(struct shrinker *, struct shrink_control *);
 
 static struct shrinker xfs_qm_shaker = {
 	.shrink = xfs_qm_shake,
@@ -2009,10 +2009,10 @@
 STATIC int
 xfs_qm_shake(
 	struct shrinker	*shrink,
-	int		nr_to_scan,
-	gfp_t		gfp_mask)
+	struct shrink_control *sc)
 {
 	int	ndqused, nfree, n;
+	gfp_t gfp_mask = sc->gfp_mask;
 
 	if (!kmem_shake_allow(gfp_mask))
 		return 0;
diff --git a/include/asm-generic/cacheflush.h b/include/asm-generic/cacheflush.h
index 57b5c3c..87bc536 100644
--- a/include/asm-generic/cacheflush.h
+++ b/include/asm-generic/cacheflush.h
@@ -24,7 +24,10 @@
 #define flush_cache_vunmap(start, end)		do { } while (0)
 
 #define copy_to_user_page(vma, page, vaddr, dst, src, len) \
-	memcpy(dst, src, len)
+	do { \
+		memcpy(dst, src, len); \
+		flush_icache_user_range(vma, page, vaddr, len); \
+	} while (0)
 #define copy_from_user_page(vma, page, vaddr, dst, src, len) \
 	memcpy(dst, src, len)
 
diff --git a/include/asm-generic/resource.h b/include/asm-generic/resource.h
index 587566f..61fa862 100644
--- a/include/asm-generic/resource.h
+++ b/include/asm-generic/resource.h
@@ -78,7 +78,7 @@
 	[RLIMIT_CORE]		= {              0,  RLIM_INFINITY },	\
 	[RLIMIT_RSS]		= {  RLIM_INFINITY,  RLIM_INFINITY },	\
 	[RLIMIT_NPROC]		= {              0,              0 },	\
-	[RLIMIT_NOFILE]		= {       INR_OPEN,       INR_OPEN },	\
+	[RLIMIT_NOFILE]		= {   INR_OPEN_CUR,   INR_OPEN_MAX },	\
 	[RLIMIT_MEMLOCK]	= {    MLOCK_LIMIT,    MLOCK_LIMIT },	\
 	[RLIMIT_AS]		= {  RLIM_INFINITY,  RLIM_INFINITY },	\
 	[RLIMIT_LOCKS]		= {  RLIM_INFINITY,  RLIM_INFINITY },	\
diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index e43f976..e58fa77 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -5,6 +5,8 @@
  * Copyright 2001 Red Hat, Inc.
  * Based on code from mm/memory.c Copyright Linus Torvalds and others.
  *
+ * Copyright 2011 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License
  * as published by the Free Software Foundation; either version
@@ -17,97 +19,111 @@
 #include <asm/pgalloc.h>
 #include <asm/tlbflush.h>
 
+#ifdef CONFIG_HAVE_RCU_TABLE_FREE
 /*
- * For UP we don't need to worry about TLB flush
- * and page free order so much..
+ * Semi RCU freeing of the page directories.
+ *
+ * This is needed by some architectures to implement software pagetable walkers.
+ *
+ * gup_fast() and other software pagetable walkers do a lockless page-table
+ * walk and therefore needs some synchronization with the freeing of the page
+ * directories. The chosen means to accomplish that is by disabling IRQs over
+ * the walk.
+ *
+ * Architectures that use IPIs to flush TLBs will then automagically DTRT,
+ * since we unlink the page, flush TLBs, free the page. Since the disabling of
+ * IRQs delays the completion of the TLB flush we can never observe an already
+ * freed page.
+ *
+ * Architectures that do not have this (PPC) need to delay the freeing by some
+ * other means, this is that means.
+ *
+ * What we do is batch the freed directory pages (tables) and RCU free them.
+ * We use the sched RCU variant, as that guarantees that IRQ/preempt disabling
+ * holds off grace periods.
+ *
+ * However, in order to batch these pages we need to allocate storage, this
+ * allocation is deep inside the MM code and can thus easily fail on memory
+ * pressure. To guarantee progress we fall back to single table freeing, see
+ * the implementation of tlb_remove_table_one().
+ *
  */
-#ifdef CONFIG_SMP
-  #ifdef ARCH_FREE_PTR_NR
-    #define FREE_PTR_NR   ARCH_FREE_PTR_NR
-  #else
-    #define FREE_PTE_NR	506
-  #endif
-  #define tlb_fast_mode(tlb) ((tlb)->nr == ~0U)
-#else
-  #define FREE_PTE_NR	1
-  #define tlb_fast_mode(tlb) 1
+struct mmu_table_batch {
+	struct rcu_head		rcu;
+	unsigned int		nr;
+	void			*tables[0];
+};
+
+#define MAX_TABLE_BATCH		\
+	((PAGE_SIZE - sizeof(struct mmu_table_batch)) / sizeof(void *))
+
+extern void tlb_table_flush(struct mmu_gather *tlb);
+extern void tlb_remove_table(struct mmu_gather *tlb, void *table);
+
 #endif
 
+/*
+ * If we can't allocate a page to make a big batch of page pointers
+ * to work on, then just handle a few from the on-stack structure.
+ */
+#define MMU_GATHER_BUNDLE	8
+
+struct mmu_gather_batch {
+	struct mmu_gather_batch	*next;
+	unsigned int		nr;
+	unsigned int		max;
+	struct page		*pages[0];
+};
+
+#define MAX_GATHER_BATCH	\
+	((PAGE_SIZE - sizeof(struct mmu_gather_batch)) / sizeof(void *))
+
 /* struct mmu_gather is an opaque type used by the mm code for passing around
  * any data needed by arch specific code for tlb_remove_page.
  */
 struct mmu_gather {
 	struct mm_struct	*mm;
-	unsigned int		nr;	/* set to ~0U means fast mode */
-	unsigned int		need_flush;/* Really unmapped some ptes? */
-	unsigned int		fullmm; /* non-zero means full mm flush */
-	struct page *		pages[FREE_PTE_NR];
+#ifdef CONFIG_HAVE_RCU_TABLE_FREE
+	struct mmu_table_batch	*batch;
+#endif
+	unsigned int		need_flush : 1,	/* Did free PTEs */
+				fast_mode  : 1; /* No batching   */
+
+	unsigned int		fullmm;
+
+	struct mmu_gather_batch *active;
+	struct mmu_gather_batch	local;
+	struct page		*__pages[MMU_GATHER_BUNDLE];
 };
 
-/* Users of the generic TLB shootdown code must declare this storage space. */
-DECLARE_PER_CPU(struct mmu_gather, mmu_gathers);
+#define HAVE_GENERIC_MMU_GATHER
 
-/* tlb_gather_mmu
- *	Return a pointer to an initialized struct mmu_gather.
- */
-static inline struct mmu_gather *
-tlb_gather_mmu(struct mm_struct *mm, unsigned int full_mm_flush)
+static inline int tlb_fast_mode(struct mmu_gather *tlb)
 {
-	struct mmu_gather *tlb = &get_cpu_var(mmu_gathers);
-
-	tlb->mm = mm;
-
-	/* Use fast mode if only one CPU is online */
-	tlb->nr = num_online_cpus() > 1 ? 0U : ~0U;
-
-	tlb->fullmm = full_mm_flush;
-
-	return tlb;
+#ifdef CONFIG_SMP
+	return tlb->fast_mode;
+#else
+	/*
+	 * For UP we don't need to worry about TLB flush
+	 * and page free order so much..
+	 */
+	return 1;
+#endif
 }
 
-static inline void
-tlb_flush_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
-{
-	if (!tlb->need_flush)
-		return;
-	tlb->need_flush = 0;
-	tlb_flush(tlb);
-	if (!tlb_fast_mode(tlb)) {
-		free_pages_and_swap_cache(tlb->pages, tlb->nr);
-		tlb->nr = 0;
-	}
-}
-
-/* tlb_finish_mmu
- *	Called at the end of the shootdown operation to free up any resources
- *	that were required.
- */
-static inline void
-tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
-{
-	tlb_flush_mmu(tlb, start, end);
-
-	/* keep the page table cache within bounds */
-	check_pgt_cache();
-
-	put_cpu_var(mmu_gathers);
-}
+void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, bool fullmm);
+void tlb_flush_mmu(struct mmu_gather *tlb);
+void tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end);
+int __tlb_remove_page(struct mmu_gather *tlb, struct page *page);
 
 /* tlb_remove_page
- *	Must perform the equivalent to __free_pte(pte_get_and_clear(ptep)), while
- *	handling the additional races in SMP caused by other CPUs caching valid
- *	mappings in their TLBs.
+ *	Similar to __tlb_remove_page but will call tlb_flush_mmu() itself when
+ *	required.
  */
 static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
 {
-	tlb->need_flush = 1;
-	if (tlb_fast_mode(tlb)) {
-		free_page_and_swap_cache(page);
-		return;
-	}
-	tlb->pages[tlb->nr++] = page;
-	if (tlb->nr >= FREE_PTE_NR)
-		tlb_flush_mmu(tlb, 0, 0);
+	if (!__tlb_remove_page(tlb, page))
+		tlb_flush_mmu(tlb);
 }
 
 /**
diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index cb1ded2..ccfedb4 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -302,6 +302,7 @@
 header-y += ppp_defs.h
 header-y += pps.h
 header-y += prctl.h
+header-y += ptp_clock.h
 header-y += ptrace.h
 header-y += qnx4_fs.h
 header-y += qnxtypes.h
diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index daf8c48..dcafe0b 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -55,7 +55,8 @@
  * bitmap_parse(buf, buflen, dst, nbits)	Parse bitmap dst from kernel buf
  * bitmap_parse_user(ubuf, ulen, dst, nbits)	Parse bitmap dst from user buf
  * bitmap_scnlistprintf(buf, len, src, nbits)	Print bitmap src as list to buf
- * bitmap_parselist(buf, dst, nbits)		Parse bitmap dst from list
+ * bitmap_parselist(buf, dst, nbits)		Parse bitmap dst from kernel buf
+ * bitmap_parselist_user(buf, dst, nbits)	Parse bitmap dst from user buf
  * bitmap_find_free_region(bitmap, bits, order)	Find and allocate bit region
  * bitmap_release_region(bitmap, pos, order)	Free specified bit region
  * bitmap_allocate_region(bitmap, pos, order)	Allocate specified bit region
@@ -129,6 +130,8 @@
 			const unsigned long *src, int nbits);
 extern int bitmap_parselist(const char *buf, unsigned long *maskp,
 			int nmaskbits);
+extern int bitmap_parselist_user(const char __user *ubuf, unsigned int ulen,
+			unsigned long *dst, int nbits);
 extern void bitmap_remap(unsigned long *dst, const unsigned long *src,
 		const unsigned long *old, const unsigned long *new, int bits);
 extern int bitmap_bitremap(int oldbit,
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index be50d9e..2a7cea53 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -151,7 +151,6 @@
 	__REQ_IO_STAT,		/* account I/O stat */
 	__REQ_MIXED_MERGE,	/* merge of different types, fail separately */
 	__REQ_SECURE,		/* secure discard (used with __REQ_DISCARD) */
-	__REQ_ON_PLUG,		/* on plug list */
 	__REQ_NR_BITS,		/* stops here */
 };
 
@@ -192,6 +191,5 @@
 #define REQ_IO_STAT		(1 << __REQ_IO_STAT)
 #define REQ_MIXED_MERGE		(1 << __REQ_MIXED_MERGE)
 #define REQ_SECURE		(1 << __REQ_SECURE)
-#define REQ_ON_PLUG		(1 << __REQ_ON_PLUG)
 
 #endif /* __LINUX_BLK_TYPES_H */
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 2ad95fa..ae9091a 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -257,7 +257,7 @@
 	unsigned char		misaligned;
 	unsigned char		discard_misaligned;
 	unsigned char		cluster;
-	signed char		discard_zeroes_data;
+	unsigned char		discard_zeroes_data;
 };
 
 struct request_queue
@@ -364,6 +364,8 @@
 	 * for flush operations
 	 */
 	unsigned int		flush_flags;
+	unsigned int		flush_not_queueable:1;
+	unsigned int		flush_queue_delayed:1;
 	unsigned int		flush_pending_idx:1;
 	unsigned int		flush_running_idx:1;
 	unsigned long		flush_pending_since;
@@ -843,6 +845,7 @@
 extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *);
 extern void blk_queue_rq_timeout(struct request_queue *, unsigned int);
 extern void blk_queue_flush(struct request_queue *q, unsigned int flush);
+extern void blk_queue_flush_queueable(struct request_queue *q, bool queueable);
 extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev);
 
 extern int blk_rq_map_sg(struct request_queue *, struct request *, struct scatterlist *);
@@ -1066,13 +1069,16 @@
 {
 	unsigned int alignment = (sector << 9) & (lim->discard_granularity - 1);
 
+	if (!lim->max_discard_sectors)
+		return 0;
+
 	return (lim->discard_granularity + lim->discard_alignment - alignment)
 		& (lim->discard_granularity - 1);
 }
 
 static inline unsigned int queue_discard_zeroes_data(struct request_queue *q)
 {
-	if (q->limits.discard_zeroes_data == 1)
+	if (q->limits.max_discard_sectors && q->limits.discard_zeroes_data == 1)
 		return 1;
 
 	return 0;
@@ -1111,6 +1117,11 @@
 	return bdev->bd_block_size;
 }
 
+static inline bool queue_flush_queueable(struct request_queue *q)
+{
+	return !q->flush_not_queueable;
+}
+
 typedef struct {struct page *v;} Sector;
 
 unsigned char *read_dev_sector(struct block_device *, sector_t, Sector *);
diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index 01eca17..ab344a5 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -99,24 +99,31 @@
 				      unsigned long align,
 				      unsigned long goal);
 
+#ifdef CONFIG_NO_BOOTMEM
+/* We are using top down, so it is safe to use 0 here */
+#define BOOTMEM_LOW_LIMIT 0
+#else
+#define BOOTMEM_LOW_LIMIT __pa(MAX_DMA_ADDRESS)
+#endif
+
 #define alloc_bootmem(x) \
-	__alloc_bootmem(x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS))
+	__alloc_bootmem(x, SMP_CACHE_BYTES, BOOTMEM_LOW_LIMIT)
 #define alloc_bootmem_align(x, align) \
-	__alloc_bootmem(x, align, __pa(MAX_DMA_ADDRESS))
+	__alloc_bootmem(x, align, BOOTMEM_LOW_LIMIT)
 #define alloc_bootmem_nopanic(x) \
-	__alloc_bootmem_nopanic(x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS))
+	__alloc_bootmem_nopanic(x, SMP_CACHE_BYTES, BOOTMEM_LOW_LIMIT)
 #define alloc_bootmem_pages(x) \
-	__alloc_bootmem(x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS))
+	__alloc_bootmem(x, PAGE_SIZE, BOOTMEM_LOW_LIMIT)
 #define alloc_bootmem_pages_nopanic(x) \
-	__alloc_bootmem_nopanic(x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS))
+	__alloc_bootmem_nopanic(x, PAGE_SIZE, BOOTMEM_LOW_LIMIT)
 #define alloc_bootmem_node(pgdat, x) \
-	__alloc_bootmem_node(pgdat, x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS))
+	__alloc_bootmem_node(pgdat, x, SMP_CACHE_BYTES, BOOTMEM_LOW_LIMIT)
 #define alloc_bootmem_node_nopanic(pgdat, x) \
-	__alloc_bootmem_node_nopanic(pgdat, x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS))
+	__alloc_bootmem_node_nopanic(pgdat, x, SMP_CACHE_BYTES, BOOTMEM_LOW_LIMIT)
 #define alloc_bootmem_pages_node(pgdat, x) \
-	__alloc_bootmem_node(pgdat, x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS))
+	__alloc_bootmem_node(pgdat, x, PAGE_SIZE, BOOTMEM_LOW_LIMIT)
 #define alloc_bootmem_pages_node_nopanic(pgdat, x) \
-	__alloc_bootmem_node_nopanic(pgdat, x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS))
+	__alloc_bootmem_node_nopanic(pgdat, x, PAGE_SIZE, BOOTMEM_LOW_LIMIT)
 
 #define alloc_bootmem_low(x) \
 	__alloc_bootmem_low(x, SMP_CACHE_BYTES, 0)
diff --git a/include/linux/c2port.h b/include/linux/c2port.h
index 2a5cd86..a2f7d74 100644
--- a/include/linux/c2port.h
+++ b/include/linux/c2port.h
@@ -60,9 +60,6 @@
  * Exported functions
  */
 
-#define to_class_dev(obj) container_of((obj), struct class_device, kobj)
-#define to_c2port_device(obj) container_of((obj), struct c2port_device, class)
-
 extern struct c2port_device *c2port_device_register(char *name,
 					struct c2port_ops *ops, void *devdata);
 extern void c2port_device_unregister(struct c2port_device *dev);
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index cb4c1eb7..59e4028 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -34,8 +34,12 @@
     __asm__ ("" : "=r"(__ptr) : "0"(ptr));		\
     (typeof(ptr)) (__ptr + (off)); })
 
+#ifdef __CHECKER__
+#define __must_be_array(arr) 0
+#else
 /* &a[0] degrades to a pointer: a different type from an array */
 #define __must_be_array(a) BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0]))
+#endif
 
 /*
  * Force always-inline if the user requests it so via the .config,
diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h
index 64b7c00..dfadc96 100644
--- a/include/linux/compiler-gcc4.h
+++ b/include/linux/compiler-gcc4.h
@@ -51,7 +51,7 @@
 #if __GNUC_MINOR__ > 0
 #define __compiletime_object_size(obj) __builtin_object_size(obj, 0)
 #endif
-#if __GNUC_MINOR__ >= 4
+#if __GNUC_MINOR__ >= 4 && !defined(__CHECKER__)
 #define __compiletime_warning(message) __attribute__((warning(message)))
 #define __compiletime_error(message) __attribute__((error(message)))
 #endif
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index bae6fe2..b24ac564 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -547,6 +547,21 @@
 }
 
 /**
+ * cpumask_parselist_user - extract a cpumask from a user string
+ * @buf: the buffer to extract from
+ * @len: the length of the buffer
+ * @dstp: the cpumask to set.
+ *
+ * Returns -errno, or 0 for success.
+ */
+static inline int cpumask_parselist_user(const char __user *buf, int len,
+				     struct cpumask *dstp)
+{
+	return bitmap_parselist_user(buf, len, cpumask_bits(dstp),
+							nr_cpumask_bits);
+}
+
+/**
  * cpulist_scnprintf - print a cpumask into a string as comma-separated list
  * @buf: the buffer to sprintf into
  * @len: the length of the buffer
diff --git a/include/linux/drbd.h b/include/linux/drbd.h
index cec467f..9e5f560 100644
--- a/include/linux/drbd.h
+++ b/include/linux/drbd.h
@@ -38,7 +38,7 @@
 
 /* Although the Linux source code makes a difference between
    generic endianness and the bitfields' endianness, there is no
-   architecture as of Linux-2.6.24-rc4 where the bitfileds' endianness
+   architecture as of Linux-2.6.24-rc4 where the bitfields' endianness
    does not match the generic endianness. */
 
 #if __BYTE_ORDER == __LITTLE_ENDIAN
@@ -53,7 +53,7 @@
 
 
 extern const char *drbd_buildtag(void);
-#define REL_VERSION "8.3.10"
+#define REL_VERSION "8.3.11"
 #define API_VERSION 88
 #define PRO_VERSION_MIN 86
 #define PRO_VERSION_MAX 96
@@ -195,7 +195,7 @@
 	C_WF_REPORT_PARAMS, /* we have a socket */
 	C_CONNECTED,      /* we have introduced each other */
 	C_STARTING_SYNC_S,  /* starting full sync by admin request. */
-	C_STARTING_SYNC_T,  /* stariing full sync by admin request. */
+	C_STARTING_SYNC_T,  /* starting full sync by admin request. */
 	C_WF_BITMAP_S,
 	C_WF_BITMAP_T,
 	C_WF_SYNC_UUID,
@@ -236,7 +236,7 @@
  * pointed out by Maxim Uvarov q<muvarov@ru.mvista.com>
  * even though we transmit as "cpu_to_be32(state)",
  * the offsets of the bitfields still need to be swapped
- * on different endianess.
+ * on different endianness.
  */
 	struct {
 #if defined(__LITTLE_ENDIAN_BITFIELD)
@@ -266,7 +266,7 @@
 		unsigned peer:2 ;   /* 3/4	 primary/secondary/unknown */
 		unsigned role:2 ;   /* 3/4	 primary/secondary/unknown */
 #else
-# error "this endianess is not supported"
+# error "this endianness is not supported"
 #endif
 	};
 	unsigned int i;
diff --git a/include/linux/drbd_tag_magic.h b/include/linux/drbd_tag_magic.h
index f14a165..0695431 100644
--- a/include/linux/drbd_tag_magic.h
+++ b/include/linux/drbd_tag_magic.h
@@ -30,7 +30,7 @@
 	int tag_and_len ## member;
 #include "linux/drbd_nl.h"
 
-/* declate tag-list-sizes */
+/* declare tag-list-sizes */
 static const int tag_list_sizes[] = {
 #define NL_PACKET(name, number, fields) 2 fields ,
 #define NL_INTEGER(pn, pr, member)      + 4 + 4
diff --git a/include/linux/fs.h b/include/linux/fs.h
index cdf9495..3f9d3251 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -23,7 +23,8 @@
 
 /* Fixed constants first: */
 #undef NR_OPEN
-#define INR_OPEN 1024		/* Initial setting for nfile rlimits */
+#define INR_OPEN_CUR 1024	/* Initial setting for nfile rlimits */
+#define INR_OPEN_MAX 4096	/* Hard limit for nfile rlimits */
 
 #define BLOCK_SIZE_BITS 10
 #define BLOCK_SIZE (1<<BLOCK_SIZE_BITS)
@@ -634,8 +635,7 @@
 	unsigned int		i_mmap_writable;/* count VM_SHARED mappings */
 	struct prio_tree_root	i_mmap;		/* tree of private and shared mappings */
 	struct list_head	i_mmap_nonlinear;/*list VM_NONLINEAR mappings */
-	spinlock_t		i_mmap_lock;	/* protect tree, count, list */
-	unsigned int		truncate_count;	/* Cover race condition with truncate */
+	struct mutex		i_mmap_mutex;	/* protect tree, count, list */
 	unsigned long		nrpages;	/* number of total pages */
 	pgoff_t			writeback_index;/* writeback starts here */
 	const struct address_space_operations *a_ops;	/* methods */
@@ -644,7 +644,6 @@
 	spinlock_t		private_lock;	/* for use by the address_space */
 	struct list_head	private_list;	/* ditto */
 	struct address_space	*assoc_mapping;	/* ditto */
-	struct mutex		unmap_mutex;    /* to protect unmapping */
 } __attribute__((aligned(sizeof(long))));
 	/*
 	 * On most architectures that alignment is already the case; but
diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h
index 76427e6..af095b5 100644
--- a/include/linux/fscache-cache.h
+++ b/include/linux/fscache-cache.h
@@ -100,17 +100,6 @@
 
 	/* operation releaser */
 	fscache_operation_release_t release;
-
-#ifdef CONFIG_WORKQUEUE_DEBUGFS
-	struct work_struct put_work;	/* work to delay operation put */
-	const char *name;		/* operation name */
-	const char *state;		/* operation state */
-#define fscache_set_op_name(OP, N)	do { (OP)->name  = (N); } while(0)
-#define fscache_set_op_state(OP, S)	do { (OP)->state = (S); } while(0)
-#else
-#define fscache_set_op_name(OP, N)	do { } while(0)
-#define fscache_set_op_state(OP, S)	do { } while(0)
-#endif
 };
 
 extern atomic_t fscache_op_debug_id;
@@ -137,7 +126,6 @@
 	op->processor = processor;
 	op->release = release;
 	INIT_LIST_HEAD(&op->pend_link);
-	fscache_set_op_state(op, "Init");
 }
 
 /*
diff --git a/include/linux/genalloc.h b/include/linux/genalloc.h
index 9869ef3..5bbebda 100644
--- a/include/linux/genalloc.h
+++ b/include/linux/genalloc.h
@@ -9,6 +9,8 @@
  */
 
 
+#ifndef __GENALLOC_H__
+#define __GENALLOC_H__
 /*
  *  General purpose special memory pool descriptor.
  */
@@ -24,13 +26,34 @@
 struct gen_pool_chunk {
 	spinlock_t lock;
 	struct list_head next_chunk;	/* next chunk in pool */
+	phys_addr_t phys_addr;		/* physical starting address of memory chunk */
 	unsigned long start_addr;	/* starting address of memory chunk */
 	unsigned long end_addr;		/* ending address of memory chunk */
 	unsigned long bits[0];		/* bitmap for allocating memory chunk */
 };
 
 extern struct gen_pool *gen_pool_create(int, int);
-extern int gen_pool_add(struct gen_pool *, unsigned long, size_t, int);
+extern phys_addr_t gen_pool_virt_to_phys(struct gen_pool *pool, unsigned long);
+extern int gen_pool_add_virt(struct gen_pool *, unsigned long, phys_addr_t,
+			     size_t, int);
+/**
+ * gen_pool_add - add a new chunk of special memory to the pool
+ * @pool: pool to add new memory chunk to
+ * @addr: starting address of memory chunk to add to pool
+ * @size: size in bytes of the memory chunk to add to pool
+ * @nid: node id of the node the chunk structure and bitmap should be
+ *       allocated on, or -1
+ *
+ * Add a new chunk of special memory to the specified pool.
+ *
+ * Returns 0 on success or a -ve errno on failure.
+ */
+static inline int gen_pool_add(struct gen_pool *pool, unsigned long addr,
+			       size_t size, int nid)
+{
+	return gen_pool_add_virt(pool, addr, -1, size, nid);
+}
 extern void gen_pool_destroy(struct gen_pool *);
 extern unsigned long gen_pool_alloc(struct gen_pool *, size_t);
 extern void gen_pool_free(struct gen_pool *, unsigned long, size_t);
+#endif /* __GENALLOC_H__ */
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index d764a42..b78956b 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -100,7 +100,6 @@
 	sector_t start_sect;
 	sector_t nr_sects;
 	sector_t alignment_offset;
-	unsigned int discard_alignment;
 	struct device __dev;
 	struct kobject *holder_dir;
 	int policy, partno;
@@ -127,6 +126,7 @@
 #define GENHD_FL_SUPPRESS_PARTITION_INFO	32
 #define GENHD_FL_EXT_DEVT			64 /* allow extended devt */
 #define GENHD_FL_NATIVE_CAPACITY		128
+#define GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE	256
 
 enum {
 	DISK_EVENT_MEDIA_CHANGE			= 1 << 0, /* media changed */
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 56d8fc8..cb40892 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -249,14 +249,7 @@
 
 	z = (GFP_ZONE_TABLE >> (bit * ZONES_SHIFT)) &
 					 ((1 << ZONES_SHIFT) - 1);
-
-	if (__builtin_constant_p(bit))
-		BUILD_BUG_ON((GFP_ZONE_BAD >> bit) & 1);
-	else {
-#ifdef CONFIG_DEBUG_VM
-		BUG_ON((GFP_ZONE_BAD >> bit) & 1);
-#endif
-	}
+	VM_BUG_ON((GFP_ZONE_BAD >> bit) & 1);
 	return z;
 }
 
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 8847c8c..48c32eb 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -92,12 +92,8 @@
 #define wait_split_huge_page(__anon_vma, __pmd)				\
 	do {								\
 		pmd_t *____pmd = (__pmd);				\
-		spin_unlock_wait(&(__anon_vma)->root->lock);		\
-		/*							\
-		 * spin_unlock_wait() is just a loop in C and so the	\
-		 * CPU can reorder anything around it.			\
-		 */							\
-		smp_mb();						\
+		anon_vma_lock(__anon_vma);				\
+		anon_vma_unlock(__anon_vma);				\
 		BUG_ON(pmd_trans_splitting(*____pmd) ||			\
 		       pmd_trans_huge(*____pmd));			\
 	} while (0)
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index f37ba71..fb0e732 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -248,6 +248,37 @@
 int __must_check kstrtou8(const char *s, unsigned int base, u8 *res);
 int __must_check kstrtos8(const char *s, unsigned int base, s8 *res);
 
+int __must_check kstrtoull_from_user(const char __user *s, size_t count, unsigned int base, unsigned long long *res);
+int __must_check kstrtoll_from_user(const char __user *s, size_t count, unsigned int base, long long *res);
+int __must_check kstrtoul_from_user(const char __user *s, size_t count, unsigned int base, unsigned long *res);
+int __must_check kstrtol_from_user(const char __user *s, size_t count, unsigned int base, long *res);
+int __must_check kstrtouint_from_user(const char __user *s, size_t count, unsigned int base, unsigned int *res);
+int __must_check kstrtoint_from_user(const char __user *s, size_t count, unsigned int base, int *res);
+int __must_check kstrtou16_from_user(const char __user *s, size_t count, unsigned int base, u16 *res);
+int __must_check kstrtos16_from_user(const char __user *s, size_t count, unsigned int base, s16 *res);
+int __must_check kstrtou8_from_user(const char __user *s, size_t count, unsigned int base, u8 *res);
+int __must_check kstrtos8_from_user(const char __user *s, size_t count, unsigned int base, s8 *res);
+
+static inline int __must_check kstrtou64_from_user(const char __user *s, size_t count, unsigned int base, u64 *res)
+{
+	return kstrtoull_from_user(s, count, base, res);
+}
+
+static inline int __must_check kstrtos64_from_user(const char __user *s, size_t count, unsigned int base, s64 *res)
+{
+	return kstrtoll_from_user(s, count, base, res);
+}
+
+static inline int __must_check kstrtou32_from_user(const char __user *s, size_t count, unsigned int base, u32 *res)
+{
+	return kstrtouint_from_user(s, count, base, res);
+}
+
+static inline int __must_check kstrtos32_from_user(const char __user *s, size_t count, unsigned int base, s32 *res)
+{
+	return kstrtoint_from_user(s, count, base, res);
+}
+
 extern unsigned long simple_strtoul(const char *,char **,unsigned int);
 extern long simple_strtol(const char *,char **,unsigned int);
 extern unsigned long long simple_strtoull(const char *,char **,unsigned int);
@@ -638,6 +669,13 @@
 	char _f[20-2*sizeof(long)-sizeof(int)];	/* Padding: libc5 uses this.. */
 };
 
+#ifdef __CHECKER__
+#define BUILD_BUG_ON_NOT_POWER_OF_2(n)
+#define BUILD_BUG_ON_ZERO(e)
+#define BUILD_BUG_ON_NULL(e)
+#define BUILD_BUG_ON(condition)
+#else /* __CHECKER__ */
+
 /* Force a compilation error if a constant expression is not a power of 2 */
 #define BUILD_BUG_ON_NOT_POWER_OF_2(n)			\
 	BUILD_BUG_ON((n) == 0 || (((n) & ((n) - 1)) != 0))
@@ -674,6 +712,7 @@
 		if (condition) __build_bug_on_failed = 1;	\
 	} while(0)
 #endif
+#endif	/* __CHECKER__ */
 
 /* Trap pasters of __FUNCTION__ at compile-time */
 #define __FUNCTION__ (__func__)
diff --git a/include/linux/leds-pca9532.h b/include/linux/leds-pca9532.h
index f158eb1..b8d6fff 100644
--- a/include/linux/leds-pca9532.h
+++ b/include/linux/leds-pca9532.h
@@ -25,7 +25,7 @@
 };
 
 enum pca9532_type { PCA9532_TYPE_NONE, PCA9532_TYPE_LED,
-	PCA9532_TYPE_N2100_BEEP };
+	PCA9532_TYPE_N2100_BEEP, PCA9532_TYPE_GPIO };
 
 struct pca9532_led {
 	u8 id;
@@ -41,6 +41,7 @@
 	struct pca9532_led leds[16];
 	u8 pwm[2];
 	u8 psc[2];
+	int gpio_base;
 };
 
 #endif /* __LINUX_PCA9532_H */
diff --git a/include/linux/leds.h b/include/linux/leds.h
index 61e0340..5884def 100644
--- a/include/linux/leds.h
+++ b/include/linux/leds.h
@@ -207,5 +207,7 @@
 					unsigned long *delay_off);
 };
 
+struct platform_device *gpio_led_register_device(
+		int id, const struct gpio_led_platform_data *pdata);
 
 #endif		/* __LINUX_LEDS_H_INCLUDED */
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 4aef1dd..ef820a3c 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -487,12 +487,15 @@
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 # ifdef CONFIG_PROVE_LOCKING
 #  define mutex_acquire(l, s, t, i)		lock_acquire(l, s, t, 0, 2, NULL, i)
+#  define mutex_acquire_nest(l, s, t, n, i)	lock_acquire(l, s, t, 0, 2, n, i)
 # else
 #  define mutex_acquire(l, s, t, i)		lock_acquire(l, s, t, 0, 1, NULL, i)
+#  define mutex_acquire_nest(l, s, t, n, i)	lock_acquire(l, s, t, 0, 1, n, i)
 # endif
 # define mutex_release(l, n, i)			lock_release(l, n, i)
 #else
 # define mutex_acquire(l, s, t, i)		do { } while (0)
+# define mutex_acquire_nest(l, s, t, n, i)	do { } while (0)
 # define mutex_release(l, n, i)			do { } while (0)
 #endif
 
diff --git a/include/linux/lru_cache.h b/include/linux/lru_cache.h
index 6a4fab7..7a71ffa 100644
--- a/include/linux/lru_cache.h
+++ b/include/linux/lru_cache.h
@@ -139,9 +139,9 @@
  * .list is on one of three lists:
  *  in_use: currently in use (refcnt > 0, lc_number != LC_FREE)
  *     lru: unused but ready to be reused or recycled
- *          (ts_refcnt == 0, lc_number != LC_FREE),
+ *          (lc_refcnt == 0, lc_number != LC_FREE),
  *    free: unused but ready to be recycled
- *          (ts_refcnt == 0, lc_number == LC_FREE),
+ *          (lc_refcnt == 0, lc_number == LC_FREE),
  *
  * an element is said to be "in the active set",
  * if either on "in_use" or "lru", i.e. lc_number != LC_FREE.
@@ -160,8 +160,8 @@
 	struct hlist_node colision;
 	struct list_head list;		 /* LRU list or free list */
 	unsigned refcnt;
-	/* back "pointer" into ts_cache->element[index],
-	 * for paranoia, and for "ts_element_to_index" */
+	/* back "pointer" into lc_cache->element[index],
+	 * for paranoia, and for "lc_element_to_index" */
 	unsigned lc_index;
 	/* if we want to track a larger set of objects,
 	 * it needs to become arch independend u64 */
@@ -190,8 +190,8 @@
 	/* Arbitrary limit on maximum tracked objects. Practical limit is much
 	 * lower due to allocation failures, probably. For typical use cases,
 	 * nr_elements should be a few thousand at most.
-	 * This also limits the maximum value of ts_element.ts_index, allowing the
-	 * 8 high bits of .ts_index to be overloaded with flags in the future. */
+	 * This also limits the maximum value of lc_element.lc_index, allowing the
+	 * 8 high bits of .lc_index to be overloaded with flags in the future. */
 #define LC_MAX_ACTIVE	(1<<24)
 
 	/* statistics */
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 62a10c2..7525e38 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -2,6 +2,8 @@
 #define _LINUX_MEMBLOCK_H
 #ifdef __KERNEL__
 
+#define MEMBLOCK_ERROR	0
+
 #ifdef CONFIG_HAVE_MEMBLOCK
 /*
  * Logical memory blocks.
@@ -20,7 +22,6 @@
 #include <asm/memblock.h>
 
 #define INIT_MEMBLOCK_REGIONS	128
-#define MEMBLOCK_ERROR		0
 
 struct memblock_region {
 	phys_addr_t base;
@@ -160,6 +161,12 @@
 #define __initdata_memblock
 #endif
 
+#else
+static inline phys_addr_t memblock_alloc(phys_addr_t size, phys_addr_t align)
+{
+	return MEMBLOCK_ERROR;
+}
+
 #endif /* CONFIG_HAVE_MEMBLOCK */
 
 #endif /* __KERNEL__ */
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 31ac26c..7978eec 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -199,6 +199,9 @@
 struct mempolicy *mpol_shared_policy_lookup(struct shared_policy *sp,
 					    unsigned long idx);
 
+struct mempolicy *get_vma_policy(struct task_struct *tsk,
+		struct vm_area_struct *vma, unsigned long addr);
+
 extern void numa_default_policy(void);
 extern void numa_policy_init(void);
 extern void mpol_rebind_task(struct task_struct *tsk, const nodemask_t *new,
@@ -228,10 +231,10 @@
 
 #ifdef CONFIG_TMPFS
 extern int mpol_parse_str(char *str, struct mempolicy **mpol, int no_context);
+#endif
 
 extern int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol,
 			int no_context);
-#endif
 
 /* Check if a vma is migratable */
 static inline int vma_migratable(struct vm_area_struct *vma)
@@ -368,13 +371,13 @@
 {
 	return 1;	/* error */
 }
+#endif
 
 static inline int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol,
 				int no_context)
 {
 	return 0;
 }
-#endif
 
 #endif /* CONFIG_NUMA */
 #endif /* __KERNEL__ */
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 6507dde..8eb969e 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -153,6 +153,7 @@
 #define FAULT_FLAG_MKWRITE	0x04	/* Fault was mkwrite of existing pte */
 #define FAULT_FLAG_ALLOW_RETRY	0x08	/* Retry fault if blocking */
 #define FAULT_FLAG_RETRY_NOWAIT	0x10	/* Don't drop mmap_sem and wait when retrying */
+#define FAULT_FLAG_KILLABLE	0x20	/* The fault task is in SIGKILL killable region */
 
 /*
  * This interface is used by x86 PAT code to identify a pfn mapping that is
@@ -604,10 +605,6 @@
 #define NODE_NOT_IN_PAGE_FLAGS
 #endif
 
-#ifndef PFN_SECTION_SHIFT
-#define PFN_SECTION_SHIFT 0
-#endif
-
 /*
  * Define the bit shifts to access each section.  For non-existent
  * sections we define the shift as 0; that plus a 0 mask ensures
@@ -681,6 +678,12 @@
 }
 
 #if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
+static inline void set_page_section(struct page *page, unsigned long section)
+{
+	page->flags &= ~(SECTIONS_MASK << SECTIONS_PGSHIFT);
+	page->flags |= (section & SECTIONS_MASK) << SECTIONS_PGSHIFT;
+}
+
 static inline unsigned long page_to_section(struct page *page)
 {
 	return (page->flags >> SECTIONS_PGSHIFT) & SECTIONS_MASK;
@@ -699,18 +702,14 @@
 	page->flags |= (node & NODES_MASK) << NODES_PGSHIFT;
 }
 
-static inline void set_page_section(struct page *page, unsigned long section)
-{
-	page->flags &= ~(SECTIONS_MASK << SECTIONS_PGSHIFT);
-	page->flags |= (section & SECTIONS_MASK) << SECTIONS_PGSHIFT;
-}
-
 static inline void set_page_links(struct page *page, enum zone_type zone,
 	unsigned long node, unsigned long pfn)
 {
 	set_page_zone(page, zone);
 	set_page_node(page, node);
+#if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
 	set_page_section(page, pfn_to_section_nr(pfn));
+#endif
 }
 
 /*
@@ -862,26 +861,18 @@
 #define offset_in_page(p)	((unsigned long)(p) & ~PAGE_MASK)
 
 /*
- * Flags passed to show_mem() and __show_free_areas() to suppress output in
+ * Flags passed to show_mem() and show_free_areas() to suppress output in
  * various contexts.
  */
 #define SHOW_MEM_FILTER_NODES	(0x0001u)	/* filter disallowed nodes */
 
-extern void show_free_areas(void);
-extern void __show_free_areas(unsigned int flags);
+extern void show_free_areas(unsigned int flags);
+extern bool skip_free_areas_node(unsigned int flags, int nid);
 
 int shmem_lock(struct file *file, int lock, struct user_struct *user);
 struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags);
 int shmem_zero_setup(struct vm_area_struct *);
 
-#ifndef CONFIG_MMU
-extern unsigned long shmem_get_unmapped_area(struct file *file,
-					     unsigned long addr,
-					     unsigned long len,
-					     unsigned long pgoff,
-					     unsigned long flags);
-#endif
-
 extern int can_do_mlock(void);
 extern int user_shm_lock(size_t, struct user_struct *);
 extern void user_shm_unlock(size_t, struct user_struct *);
@@ -894,8 +885,6 @@
 	struct address_space *check_mapping;	/* Check page->mapping if set */
 	pgoff_t	first_index;			/* Lowest page->index to unmap */
 	pgoff_t last_index;			/* Highest page->index to unmap */
-	spinlock_t *i_mmap_lock;		/* For unmap_mapping_range: */
-	unsigned long truncate_count;		/* Compare vm_truncate_count */
 };
 
 struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
@@ -905,7 +894,7 @@
 		unsigned long size);
 unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address,
 		unsigned long size, struct zap_details *);
-unsigned long unmap_vmas(struct mmu_gather **tlb,
+unsigned long unmap_vmas(struct mmu_gather *tlb,
 		struct vm_area_struct *start_vma, unsigned long start_addr,
 		unsigned long end_addr, unsigned long *nr_accounted,
 		struct zap_details *);
@@ -1056,17 +1045,19 @@
 /*
  * per-process(per-mm_struct) statistics.
  */
-#if defined(SPLIT_RSS_COUNTING)
-/*
- * The mm counters are not protected by its page_table_lock,
- * so must be incremented atomically.
- */
 static inline void set_mm_counter(struct mm_struct *mm, int member, long value)
 {
 	atomic_long_set(&mm->rss_stat.count[member], value);
 }
 
+#if defined(SPLIT_RSS_COUNTING)
 unsigned long get_mm_counter(struct mm_struct *mm, int member);
+#else
+static inline unsigned long get_mm_counter(struct mm_struct *mm, int member)
+{
+	return atomic_long_read(&mm->rss_stat.count[member]);
+}
+#endif
 
 static inline void add_mm_counter(struct mm_struct *mm, int member, long value)
 {
@@ -1083,38 +1074,6 @@
 	atomic_long_dec(&mm->rss_stat.count[member]);
 }
 
-#else  /* !USE_SPLIT_PTLOCKS */
-/*
- * The mm counters are protected by its page_table_lock,
- * so can be incremented directly.
- */
-static inline void set_mm_counter(struct mm_struct *mm, int member, long value)
-{
-	mm->rss_stat.count[member] = value;
-}
-
-static inline unsigned long get_mm_counter(struct mm_struct *mm, int member)
-{
-	return mm->rss_stat.count[member];
-}
-
-static inline void add_mm_counter(struct mm_struct *mm, int member, long value)
-{
-	mm->rss_stat.count[member] += value;
-}
-
-static inline void inc_mm_counter(struct mm_struct *mm, int member)
-{
-	mm->rss_stat.count[member]++;
-}
-
-static inline void dec_mm_counter(struct mm_struct *mm, int member)
-{
-	mm->rss_stat.count[member]--;
-}
-
-#endif /* !USE_SPLIT_PTLOCKS */
-
 static inline unsigned long get_mm_rss(struct mm_struct *mm)
 {
 	return get_mm_counter(mm, MM_FILEPAGES) +
@@ -1163,13 +1122,24 @@
 #endif
 
 /*
+ * This struct is used to pass information from page reclaim to the shrinkers.
+ * We consolidate the values for easier extention later.
+ */
+struct shrink_control {
+	gfp_t gfp_mask;
+
+	/* How many slab objects shrinker() should scan and try to reclaim */
+	unsigned long nr_to_scan;
+};
+
+/*
  * A callback you can register to apply pressure to ageable caches.
  *
- * 'shrink' is passed a count 'nr_to_scan' and a 'gfpmask'.  It should
- * look through the least-recently-used 'nr_to_scan' entries and
- * attempt to free them up.  It should return the number of objects
- * which remain in the cache.  If it returns -1, it means it cannot do
- * any scanning at this time (eg. there is a risk of deadlock).
+ * 'sc' is passed shrink_control which includes a count 'nr_to_scan'
+ * and a 'gfpmask'.  It should look through the least-recently-used
+ * 'nr_to_scan' entries and attempt to free them up.  It should return
+ * the number of objects which remain in the cache.  If it returns -1, it means
+ * it cannot do any scanning at this time (eg. there is a risk of deadlock).
  *
  * The 'gfpmask' refers to the allocation we are currently trying to
  * fulfil.
@@ -1178,7 +1148,7 @@
  * querying the cache size, so a fastpath for that case is appropriate.
  */
 struct shrinker {
-	int (*shrink)(struct shrinker *, int nr_to_scan, gfp_t gfp_mask);
+	int (*shrink)(struct shrinker *, struct shrink_control *sc);
 	int seeks;	/* seeks to recreate an obj */
 
 	/* These are for internal use */
@@ -1380,7 +1350,7 @@
 extern void memmap_init_zone(unsigned long, int, unsigned long,
 				unsigned long, enum memmap_context);
 extern void setup_per_zone_wmarks(void);
-extern void calculate_zone_inactive_ratio(struct zone *zone);
+extern int __meminit init_per_zone_wmark_min(void);
 extern void mem_init(void);
 extern void __init mmap_init(void);
 extern void show_mem(unsigned int flags);
@@ -1388,6 +1358,8 @@
 extern void si_meminfo_node(struct sysinfo *val, int nid);
 extern int after_bootmem;
 
+extern void warn_alloc_failed(gfp_t gfp_mask, int order, const char *fmt, ...);
+
 extern void setup_per_cpu_pageset(void);
 
 extern void zone_pcp_update(struct zone *zone);
@@ -1517,15 +1489,17 @@
 			struct address_space *mapping,
 			struct file *filp);
 
-/* Do stack extension */
+/* Generic expand stack which grows the stack according to GROWS{UP,DOWN} */
 extern int expand_stack(struct vm_area_struct *vma, unsigned long address);
+
+/* CONFIG_STACK_GROWSUP still needs to to grow downwards at some places */
+extern int expand_downwards(struct vm_area_struct *vma,
+		unsigned long address);
 #if VM_GROWSUP
 extern int expand_upwards(struct vm_area_struct *vma, unsigned long address);
 #else
   #define expand_upwards(vma, address) do { } while (0)
 #endif
-extern int expand_stack_downwards(struct vm_area_struct *vma,
-				  unsigned long address);
 
 /* Look up the first VMA which satisfies  addr < vm_end,  NULL if none. */
 extern struct vm_area_struct * find_vma(struct mm_struct * mm, unsigned long addr);
@@ -1627,8 +1601,9 @@
 
 int drop_caches_sysctl_handler(struct ctl_table *, int,
 					void __user *, size_t *, loff_t *);
-unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask,
-			unsigned long lru_pages);
+unsigned long shrink_slab(struct shrink_control *shrink,
+			  unsigned long nr_pages_scanned,
+			  unsigned long lru_pages);
 
 #ifndef CONFIG_MMU
 #define randomize_va_space 0
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 02aa561..071d459 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -175,7 +175,6 @@
 					   units, *not* PAGE_CACHE_SIZE */
 	struct file * vm_file;		/* File we map to (can be NULL). */
 	void * vm_private_data;		/* was vm_pte (shared mem) */
-	unsigned long vm_truncate_count;/* truncate_count or restart_addr */
 
 #ifndef CONFIG_MMU
 	struct vm_region *vm_region;	/* NOMMU mapping region */
@@ -205,19 +204,16 @@
 
 #if USE_SPLIT_PTLOCKS && defined(CONFIG_MMU)
 #define SPLIT_RSS_COUNTING
-struct mm_rss_stat {
-	atomic_long_t count[NR_MM_COUNTERS];
-};
 /* per-thread cached information, */
 struct task_rss_stat {
 	int events;	/* for synchronization threshold */
 	int count[NR_MM_COUNTERS];
 };
-#else  /* !USE_SPLIT_PTLOCKS */
+#endif /* USE_SPLIT_PTLOCKS */
+
 struct mm_rss_stat {
-	unsigned long count[NR_MM_COUNTERS];
+	atomic_long_t count[NR_MM_COUNTERS];
 };
-#endif /* !USE_SPLIT_PTLOCKS */
 
 struct mm_struct {
 	struct vm_area_struct * mmap;		/* list of VMAs */
@@ -266,8 +262,6 @@
 
 	struct linux_binfmt *binfmt;
 
-	cpumask_t cpu_vm_mask;
-
 	/* Architecture-specific MM context */
 	mm_context_t context;
 
@@ -317,9 +311,14 @@
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 	pgtable_t pmd_huge_pte; /* protected by page_table_lock */
 #endif
+
+	cpumask_var_t cpu_vm_mask_var;
 };
 
 /* Future-safe accessor for struct mm_struct's cpu_vm_mask. */
-#define mm_cpumask(mm) (&(mm)->cpu_vm_mask)
+static inline cpumask_t *mm_cpumask(struct mm_struct *mm)
+{
+	return mm->cpu_vm_mask_var;
+}
 
 #endif /* _LINUX_MM_TYPES_H */
diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index cc2e7df..1d1b1e1 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -150,7 +150,7 @@
  * Therefore notifier chains can only be traversed when either
  *
  * 1. mmap_sem is held.
- * 2. One of the reverse map locks is held (i_mmap_lock or anon_vma->lock).
+ * 2. One of the reverse map locks is held (i_mmap_mutex or anon_vma->mutex).
  * 3. No other concurrent thread can access the list (release)
  */
 struct mmu_notifier {
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index e56f835..217bcf6 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -928,9 +928,6 @@
 #define pfn_to_nid(pfn)		(0)
 #endif
 
-#define pfn_to_section_nr(pfn) ((pfn) >> PFN_SECTION_SHIFT)
-#define section_nr_to_pfn(sec) ((sec) << PFN_SECTION_SHIFT)
-
 #ifdef CONFIG_SPARSEMEM
 
 /*
@@ -956,6 +953,12 @@
 #error Allocator MAX_ORDER exceeds SECTION_SIZE
 #endif
 
+#define pfn_to_section_nr(pfn) ((pfn) >> PFN_SECTION_SHIFT)
+#define section_nr_to_pfn(sec) ((sec) << PFN_SECTION_SHIFT)
+
+#define SECTION_ALIGN_UP(pfn)	(((pfn) + PAGES_PER_SECTION - 1) & PAGE_SECTION_MASK)
+#define SECTION_ALIGN_DOWN(pfn)	((pfn) & PAGE_SECTION_MASK)
+
 struct page;
 struct page_cgroup;
 struct mem_section {
diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index c75471db..a940fe4 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -132,6 +132,7 @@
  */
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 extern void mutex_lock_nested(struct mutex *lock, unsigned int subclass);
+extern void _mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest_lock);
 extern int __must_check mutex_lock_interruptible_nested(struct mutex *lock,
 					unsigned int subclass);
 extern int __must_check mutex_lock_killable_nested(struct mutex *lock,
@@ -140,6 +141,13 @@
 #define mutex_lock(lock) mutex_lock_nested(lock, 0)
 #define mutex_lock_interruptible(lock) mutex_lock_interruptible_nested(lock, 0)
 #define mutex_lock_killable(lock) mutex_lock_killable_nested(lock, 0)
+
+#define mutex_lock_nest_lock(lock, nest_lock)				\
+do {									\
+	typecheck(struct lockdep_map *, &(nest_lock)->dep_map);		\
+	_mutex_lock_nest_lock(lock, &(nest_lock)->dep_map);		\
+} while (0)
+
 #else
 extern void mutex_lock(struct mutex *lock);
 extern int __must_check mutex_lock_interruptible(struct mutex *lock);
@@ -148,6 +156,7 @@
 # define mutex_lock_nested(lock, subclass) mutex_lock(lock)
 # define mutex_lock_interruptible_nested(lock, subclass) mutex_lock_interruptible(lock)
 # define mutex_lock_killable_nested(lock, subclass) mutex_lock_killable(lock)
+# define mutex_lock_nest_lock(lock, nest_lock) mutex_lock(lock)
 #endif
 
 /*
diff --git a/include/linux/oom.h b/include/linux/oom.h
index 5e3aa83..4952fb8 100644
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -40,6 +40,8 @@
 	CONSTRAINT_MEMCG,
 };
 
+extern int test_set_oom_score_adj(int new_val);
+
 extern unsigned int oom_badness(struct task_struct *p, struct mem_cgroup *mem,
 			const nodemask_t *nodemask, unsigned long totalpages);
 extern int try_set_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_flags);
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index c119506..716875e 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -219,6 +219,12 @@
 	return __page_cache_alloc(mapping_gfp_mask(x)|__GFP_COLD);
 }
 
+static inline struct page *page_cache_alloc_readahead(struct address_space *x)
+{
+	return __page_cache_alloc(mapping_gfp_mask(x) |
+				  __GFP_COLD | __GFP_NORETRY | __GFP_NOWARN);
+}
+
 typedef int filler_t(void *, struct page *);
 
 extern struct page * find_get_page(struct address_space *mapping,
@@ -357,6 +363,15 @@
  */
 extern void wait_on_page_bit(struct page *page, int bit_nr);
 
+extern int wait_on_page_bit_killable(struct page *page, int bit_nr);
+
+static inline int wait_on_page_locked_killable(struct page *page)
+{
+	if (PageLocked(page))
+		return wait_on_page_bit_killable(page, PG_locked);
+	return 0;
+}
+
 /* 
  * Wait for a page to be unlocked.
  *
diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h
index 46f6ba5..5edc9014 100644
--- a/include/linux/percpu_counter.h
+++ b/include/linux/percpu_counter.h
@@ -75,7 +75,7 @@
 	barrier();		/* Prevent reloads of fbc->count */
 	if (ret >= 0)
 		return ret;
-	return 1;
+	return 0;
 }
 
 static inline int percpu_counter_initialized(struct percpu_counter *fbc)
@@ -133,6 +133,10 @@
 	return fbc->count;
 }
 
+/*
+ * percpu_counter is intended to track positive numbers. In the UP case the
+ * number should never be negative.
+ */
 static inline s64 percpu_counter_read_positive(struct percpu_counter *fbc)
 {
 	return fbc->count;
diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index 808227d..959c141 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -82,6 +82,7 @@
 			unsigned long expires;
 		} mmtimer;
 		struct alarm alarmtimer;
+		struct rcu_head rcu;
 	} it;
 };
 
diff --git a/include/linux/printk.h b/include/linux/printk.h
index ee048e7..0101d55 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -1,6 +1,8 @@
 #ifndef __KERNEL_PRINTK__
 #define __KERNEL_PRINTK__
 
+#include <linux/init.h>
+
 extern const char linux_banner[];
 extern const char linux_proc_banner[];
 
@@ -113,6 +115,7 @@
 extern int kptr_restrict;
 
 void log_buf_kexec_setup(void);
+void __init setup_log_buf(int early);
 #else
 static inline __attribute__ ((format (printf, 1, 0)))
 int vprintk(const char *s, va_list args)
@@ -137,6 +140,10 @@
 static inline void log_buf_kexec_setup(void)
 {
 }
+
+static inline void setup_log_buf(int early)
+{
+}
 #endif
 
 extern void dump_stack(void) __cold;
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index eaf4350..3686cd6 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -288,12 +288,4 @@
 	return pde->parent->data;
 }
 
-struct proc_maps_private {
-	struct pid *pid;
-	struct task_struct *task;
-#ifdef CONFIG_MMU
-	struct vm_area_struct *tail_vma;
-#endif
-};
-
 #endif /* _LINUX_PROC_FS_H */
diff --git a/include/linux/ptp_classify.h b/include/linux/ptp_classify.h
index 943a85a..e07e274 100644
--- a/include/linux/ptp_classify.h
+++ b/include/linux/ptp_classify.h
@@ -25,6 +25,7 @@
 
 #include <linux/if_ether.h>
 #include <linux/if_vlan.h>
+#include <linux/ip.h>
 #include <linux/filter.h>
 #ifdef __KERNEL__
 #include <linux/in.h>
@@ -58,6 +59,12 @@
 #define OFF_NEXT	6
 #define OFF_UDP_DST	2
 
+#define OFF_PTP_SOURCE_UUID	22 /* PTPv1 only */
+#define OFF_PTP_SEQUENCE_ID	30
+#define OFF_PTP_CONTROL		32 /* PTPv1 only */
+
+#define IPV4_HLEN(data) (((struct iphdr *)(data + OFF_IHL))->ihl << 2)
+
 #define IP6_HLEN	40
 #define UDP_HLEN	8
 
diff --git a/include/linux/ptp_clock.h b/include/linux/ptp_clock.h
new file mode 100644
index 0000000..94e981f
--- /dev/null
+++ b/include/linux/ptp_clock.h
@@ -0,0 +1,84 @@
+/*
+ * PTP 1588 clock support - user space interface
+ *
+ * Copyright (C) 2010 OMICRON electronics GmbH
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef _PTP_CLOCK_H_
+#define _PTP_CLOCK_H_
+
+#include <linux/ioctl.h>
+#include <linux/types.h>
+
+/* PTP_xxx bits, for the flags field within the request structures. */
+#define PTP_ENABLE_FEATURE (1<<0)
+#define PTP_RISING_EDGE    (1<<1)
+#define PTP_FALLING_EDGE   (1<<2)
+
+/*
+ * struct ptp_clock_time - represents a time value
+ *
+ * The sign of the seconds field applies to the whole value. The
+ * nanoseconds field is always unsigned. The reserved field is
+ * included for sub-nanosecond resolution, should the demand for
+ * this ever appear.
+ *
+ */
+struct ptp_clock_time {
+	__s64 sec;  /* seconds */
+	__u32 nsec; /* nanoseconds */
+	__u32 reserved;
+};
+
+struct ptp_clock_caps {
+	int max_adj;   /* Maximum frequency adjustment in parts per billon. */
+	int n_alarm;   /* Number of programmable alarms. */
+	int n_ext_ts;  /* Number of external time stamp channels. */
+	int n_per_out; /* Number of programmable periodic signals. */
+	int pps;       /* Whether the clock supports a PPS callback. */
+	int rsv[15];   /* Reserved for future use. */
+};
+
+struct ptp_extts_request {
+	unsigned int index;  /* Which channel to configure. */
+	unsigned int flags;  /* Bit field for PTP_xxx flags. */
+	unsigned int rsv[2]; /* Reserved for future use. */
+};
+
+struct ptp_perout_request {
+	struct ptp_clock_time start;  /* Absolute start time. */
+	struct ptp_clock_time period; /* Desired period, zero means disable. */
+	unsigned int index;           /* Which channel to configure. */
+	unsigned int flags;           /* Reserved for future use. */
+	unsigned int rsv[4];          /* Reserved for future use. */
+};
+
+#define PTP_CLK_MAGIC '='
+
+#define PTP_CLOCK_GETCAPS  _IOR(PTP_CLK_MAGIC, 1, struct ptp_clock_caps)
+#define PTP_EXTTS_REQUEST  _IOW(PTP_CLK_MAGIC, 2, struct ptp_extts_request)
+#define PTP_PEROUT_REQUEST _IOW(PTP_CLK_MAGIC, 3, struct ptp_perout_request)
+#define PTP_ENABLE_PPS     _IOW(PTP_CLK_MAGIC, 4, int)
+
+struct ptp_extts_event {
+	struct ptp_clock_time t; /* Time event occured. */
+	unsigned int index;      /* Which channel produced the event. */
+	unsigned int flags;      /* Reserved for future use. */
+	unsigned int rsv[2];     /* Reserved for future use. */
+};
+
+#endif
diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h
new file mode 100644
index 0000000..dd2e44f
--- /dev/null
+++ b/include/linux/ptp_clock_kernel.h
@@ -0,0 +1,139 @@
+/*
+ * PTP 1588 clock support
+ *
+ * Copyright (C) 2010 OMICRON electronics GmbH
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef _PTP_CLOCK_KERNEL_H_
+#define _PTP_CLOCK_KERNEL_H_
+
+#include <linux/ptp_clock.h>
+
+
+struct ptp_clock_request {
+	enum {
+		PTP_CLK_REQ_EXTTS,
+		PTP_CLK_REQ_PEROUT,
+		PTP_CLK_REQ_PPS,
+	} type;
+	union {
+		struct ptp_extts_request extts;
+		struct ptp_perout_request perout;
+	};
+};
+
+/**
+ * struct ptp_clock_info - decribes a PTP hardware clock
+ *
+ * @owner:     The clock driver should set to THIS_MODULE.
+ * @name:      A short name to identify the clock.
+ * @max_adj:   The maximum possible frequency adjustment, in parts per billon.
+ * @n_alarm:   The number of programmable alarms.
+ * @n_ext_ts:  The number of external time stamp channels.
+ * @n_per_out: The number of programmable periodic signals.
+ * @pps:       Indicates whether the clock supports a PPS callback.
+ *
+ * clock operations
+ *
+ * @adjfreq:  Adjusts the frequency of the hardware clock.
+ *            parameter delta: Desired period change in parts per billion.
+ *
+ * @adjtime:  Shifts the time of the hardware clock.
+ *            parameter delta: Desired change in nanoseconds.
+ *
+ * @gettime:  Reads the current time from the hardware clock.
+ *            parameter ts: Holds the result.
+ *
+ * @settime:  Set the current time on the hardware clock.
+ *            parameter ts: Time value to set.
+ *
+ * @enable:   Request driver to enable or disable an ancillary feature.
+ *            parameter request: Desired resource to enable or disable.
+ *            parameter on: Caller passes one to enable or zero to disable.
+ *
+ * Drivers should embed their ptp_clock_info within a private
+ * structure, obtaining a reference to it using container_of().
+ *
+ * The callbacks must all return zero on success, non-zero otherwise.
+ */
+
+struct ptp_clock_info {
+	struct module *owner;
+	char name[16];
+	s32 max_adj;
+	int n_alarm;
+	int n_ext_ts;
+	int n_per_out;
+	int pps;
+	int (*adjfreq)(struct ptp_clock_info *ptp, s32 delta);
+	int (*adjtime)(struct ptp_clock_info *ptp, s64 delta);
+	int (*gettime)(struct ptp_clock_info *ptp, struct timespec *ts);
+	int (*settime)(struct ptp_clock_info *ptp, const struct timespec *ts);
+	int (*enable)(struct ptp_clock_info *ptp,
+		      struct ptp_clock_request *request, int on);
+};
+
+struct ptp_clock;
+
+/**
+ * ptp_clock_register() - register a PTP hardware clock driver
+ *
+ * @info:  Structure describing the new clock.
+ */
+
+extern struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info);
+
+/**
+ * ptp_clock_unregister() - unregister a PTP hardware clock driver
+ *
+ * @ptp:  The clock to remove from service.
+ */
+
+extern int ptp_clock_unregister(struct ptp_clock *ptp);
+
+
+enum ptp_clock_events {
+	PTP_CLOCK_ALARM,
+	PTP_CLOCK_EXTTS,
+	PTP_CLOCK_PPS,
+};
+
+/**
+ * struct ptp_clock_event - decribes a PTP hardware clock event
+ *
+ * @type:  One of the ptp_clock_events enumeration values.
+ * @index: Identifies the source of the event.
+ * @timestamp: When the event occured.
+ */
+
+struct ptp_clock_event {
+	int type;
+	int index;
+	u64 timestamp;
+};
+
+/**
+ * ptp_clock_event() - notify the PTP layer about an event
+ *
+ * @ptp:    The clock obtained from ptp_clock_register().
+ * @event:  Message structure describing the event.
+ */
+
+extern void ptp_clock_event(struct ptp_clock *ptp,
+			    struct ptp_clock_event *event);
+
+#endif
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 830e65d..2148b12 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -7,7 +7,7 @@
 #include <linux/list.h>
 #include <linux/slab.h>
 #include <linux/mm.h>
-#include <linux/spinlock.h>
+#include <linux/mutex.h>
 #include <linux/memcontrol.h>
 
 /*
@@ -26,7 +26,7 @@
  */
 struct anon_vma {
 	struct anon_vma *root;	/* Root of this anon_vma tree */
-	spinlock_t lock;	/* Serialize access to vma list */
+	struct mutex mutex;	/* Serialize access to vma list */
 	/*
 	 * The refcount is taken on an anon_vma when there is no
 	 * guarantee that the vma of page tables will exist for
@@ -64,7 +64,7 @@
 	struct vm_area_struct *vma;
 	struct anon_vma *anon_vma;
 	struct list_head same_vma;   /* locked by mmap_sem & page_table_lock */
-	struct list_head same_anon_vma;	/* locked by anon_vma->lock */
+	struct list_head same_anon_vma;	/* locked by anon_vma->mutex */
 };
 
 #ifdef CONFIG_MMU
@@ -93,24 +93,24 @@
 {
 	struct anon_vma *anon_vma = vma->anon_vma;
 	if (anon_vma)
-		spin_lock(&anon_vma->root->lock);
+		mutex_lock(&anon_vma->root->mutex);
 }
 
 static inline void vma_unlock_anon_vma(struct vm_area_struct *vma)
 {
 	struct anon_vma *anon_vma = vma->anon_vma;
 	if (anon_vma)
-		spin_unlock(&anon_vma->root->lock);
+		mutex_unlock(&anon_vma->root->mutex);
 }
 
 static inline void anon_vma_lock(struct anon_vma *anon_vma)
 {
-	spin_lock(&anon_vma->root->lock);
+	mutex_lock(&anon_vma->root->mutex);
 }
 
 static inline void anon_vma_unlock(struct anon_vma *anon_vma)
 {
-	spin_unlock(&anon_vma->root->lock);
+	mutex_unlock(&anon_vma->root->mutex);
 }
 
 /*
@@ -218,20 +218,7 @@
 /*
  * Called by memory-failure.c to kill processes.
  */
-struct anon_vma *__page_lock_anon_vma(struct page *page);
-
-static inline struct anon_vma *page_lock_anon_vma(struct page *page)
-{
-	struct anon_vma *anon_vma;
-
-	__cond_lock(RCU, anon_vma = __page_lock_anon_vma(page));
-
-	/* (void) is needed to make gcc happy */
-	(void) __cond_lock(&anon_vma->root->lock, anon_vma);
-
-	return anon_vma;
-}
-
+struct anon_vma *page_lock_anon_vma(struct page *page);
 void page_unlock_anon_vma(struct anon_vma *anon_vma);
 int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma);
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index aaf71e0..f18300e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1753,7 +1753,6 @@
 #define PF_FROZEN	0x00010000	/* frozen for system suspend */
 #define PF_FSTRANS	0x00020000	/* inside a filesystem transaction */
 #define PF_KSWAPD	0x00040000	/* I am kswapd */
-#define PF_OOM_ORIGIN	0x00080000	/* Allocating much memory to others */
 #define PF_LESS_THROTTLE 0x00100000	/* Throttle me less: I clean memory */
 #define PF_KTHREAD	0x00200000	/* I am a kernel thread */
 #define PF_RANDOMIZE	0x00400000	/* randomize virtual address space */
@@ -2177,6 +2176,7 @@
 	if (unlikely(atomic_dec_and_test(&mm->mm_count)))
 		__mmdrop(mm);
 }
+extern int mm_init_cpumask(struct mm_struct *mm, struct mm_struct *oldmm);
 
 /* mmput gets rid of the mappings and all user-space */
 extern void mmput(struct mm_struct *);
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index 399be5a..2b7fec8 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -9,6 +9,8 @@
 
 #define SHMEM_NR_DIRECT 16
 
+#define SHMEM_SYMLINK_INLINE_LEN (SHMEM_NR_DIRECT * sizeof(swp_entry_t))
+
 struct shmem_inode_info {
 	spinlock_t		lock;
 	unsigned long		flags;
@@ -17,8 +19,12 @@
 	unsigned long		next_index;	/* highest alloced index + 1 */
 	struct shared_policy	policy;		/* NUMA memory alloc policy */
 	struct page		*i_indirect;	/* top indirect blocks page */
-	swp_entry_t		i_direct[SHMEM_NR_DIRECT]; /* first blocks */
+	union {
+		swp_entry_t	i_direct[SHMEM_NR_DIRECT]; /* first blocks */
+		char		inline_symlink[SHMEM_SYMLINK_INLINE_LEN];
+	};
 	struct list_head	swaplist;	/* chain of maybes on swap */
+	struct list_head	xattr_list;	/* list of shmem_xattr */
 	struct inode		vfs_inode;
 };
 
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index 2b3831b..5135983 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -261,6 +261,7 @@
 extern void __dec_zone_state(struct zone *, enum zone_stat_item);
 
 void refresh_cpu_vm_stats(int);
+void refresh_zone_stat_thresholds(void);
 
 int calculate_pressure_threshold(struct zone *zone);
 int calculate_normal_threshold(struct zone *zone);
@@ -313,6 +314,10 @@
 #define set_pgdat_percpu_threshold(pgdat, callback) { }
 
 static inline void refresh_cpu_vm_stats(int cpu) { }
-#endif
+static inline void refresh_zone_stat_thresholds(void) { }
+
+#endif		/* CONFIG_SMP */
+
+extern const char * const vmstat_text[];
 
 #endif /* _LINUX_VMSTAT_H */
diff --git a/include/linux/xattr.h b/include/linux/xattr.h
index 6050783..aed54c5 100644
--- a/include/linux/xattr.h
+++ b/include/linux/xattr.h
@@ -13,10 +13,6 @@
 #define XATTR_CREATE	0x1	/* set value, fail if attr already exists */
 #define XATTR_REPLACE	0x2	/* set value, fail if attr does not exist */
 
-#ifdef  __KERNEL__
-
-#include <linux/types.h>
-
 /* Namespaces */
 #define XATTR_OS2_PREFIX "os2."
 #define XATTR_OS2_PREFIX_LEN (sizeof (XATTR_OS2_PREFIX) - 1)
@@ -53,6 +49,10 @@
 #define XATTR_CAPS_SUFFIX "capability"
 #define XATTR_NAME_CAPS XATTR_SECURITY_PREFIX XATTR_CAPS_SUFFIX
 
+#ifdef  __KERNEL__
+
+#include <linux/types.h>
+
 struct inode;
 struct dentry;
 
diff --git a/arch/arm/plat-omap/include/plat/panel-generic-dpi.h b/include/video/omap-panel-generic-dpi.h
similarity index 86%
rename from arch/arm/plat-omap/include/plat/panel-generic-dpi.h
rename to include/video/omap-panel-generic-dpi.h
index 7906197..127e3f2 100644
--- a/arch/arm/plat-omap/include/plat/panel-generic-dpi.h
+++ b/include/video/omap-panel-generic-dpi.h
@@ -17,10 +17,10 @@
  * this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
-#ifndef __ARCH_ARM_PLAT_OMAP_PANEL_GENERIC_DPI_H
-#define __ARCH_ARM_PLAT_OMAP_PANEL_GENERIC_DPI_H
+#ifndef __OMAP_PANEL_GENERIC_DPI_H
+#define __OMAP_PANEL_GENERIC_DPI_H
 
-#include "display.h"
+struct omap_dss_device;
 
 /**
  * struct panel_generic_dpi_data - panel driver configuration data
@@ -34,4 +34,4 @@
 	void (*platform_disable)(struct omap_dss_device *dssdev);
 };
 
-#endif /* __ARCH_ARM_PLAT_OMAP_PANEL_GENERIC_DPI_H */
+#endif /* __OMAP_PANEL_GENERIC_DPI_H */
diff --git a/arch/arm/plat-omap/include/plat/nokia-dsi-panel.h b/include/video/omap-panel-nokia-dsi.h
similarity index 64%
rename from arch/arm/plat-omap/include/plat/nokia-dsi-panel.h
rename to include/video/omap-panel-nokia-dsi.h
index 01ab657..921ae93 100644
--- a/arch/arm/plat-omap/include/plat/nokia-dsi-panel.h
+++ b/include/video/omap-panel-nokia-dsi.h
@@ -1,14 +1,15 @@
-#ifndef __ARCH_ARM_PLAT_OMAP_NOKIA_DSI_PANEL_H
-#define __ARCH_ARM_PLAT_OMAP_NOKIA_DSI_PANEL_H
+#ifndef __OMAP_NOKIA_DSI_PANEL_H
+#define __OMAP_NOKIA_DSI_PANEL_H
 
-#include "display.h"
+struct omap_dss_device;
 
 /**
  * struct nokia_dsi_panel_data - Nokia DSI panel driver configuration
  * @name: panel name
  * @use_ext_te: use external TE
  * @ext_te_gpio: external TE GPIO
- * @use_esd_check: perform ESD checks
+ * @esd_interval: interval of ESD checks, 0 = disabled (ms)
+ * @ulps_timeout: time to wait before entering ULPS, 0 = disabled (ms)
  * @max_backlight_level: maximum backlight level
  * @set_backlight: pointer to backlight set function
  * @get_backlight: pointer to backlight get function
@@ -21,11 +22,12 @@
 	bool use_ext_te;
 	int ext_te_gpio;
 
-	bool use_esd_check;
+	unsigned esd_interval;
+	unsigned ulps_timeout;
 
 	int max_backlight_level;
 	int (*set_backlight)(struct omap_dss_device *dssdev, int level);
 	int (*get_backlight)(struct omap_dss_device *dssdev);
 };
 
-#endif /* __ARCH_ARM_PLAT_OMAP_NOKIA_DSI_PANEL_H */
+#endif /* __OMAP_NOKIA_DSI_PANEL_H */
diff --git a/arch/arm/plat-omap/include/plat/display.h b/include/video/omapdss.h
similarity index 84%
rename from arch/arm/plat-omap/include/plat/display.h
rename to include/video/omapdss.h
index 5e04ddc..892b97f 100644
--- a/arch/arm/plat-omap/include/plat/display.h
+++ b/include/video/omapdss.h
@@ -1,6 +1,4 @@
 /*
- * linux/include/asm-arm/arch-omap/display.h
- *
  * Copyright (C) 2008 Nokia Corporation
  * Author: Tomi Valkeinen <tomi.valkeinen@nokia.com>
  *
@@ -17,8 +15,8 @@
  * this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
-#ifndef __ASM_ARCH_OMAP_DISPLAY_H
-#define __ASM_ARCH_OMAP_DISPLAY_H
+#ifndef __OMAP_OMAPDSS_H
+#define __OMAP_OMAPDSS_H
 
 #include <linux/list.h>
 #include <linux/kobject.h>
@@ -88,6 +86,11 @@
 	OMAP_DSS_COLOR_ARGB32	= 1 << 11, /* ARGB32 */
 	OMAP_DSS_COLOR_RGBA32	= 1 << 12, /* RGBA32 */
 	OMAP_DSS_COLOR_RGBX32	= 1 << 13, /* RGBx32 */
+	OMAP_DSS_COLOR_NV12		= 1 << 14, /* NV12 format: YUV 4:2:0 */
+	OMAP_DSS_COLOR_RGBA16		= 1 << 15, /* RGBA16 - 4444 */
+	OMAP_DSS_COLOR_RGBX16		= 1 << 16, /* RGBx16 - 4444 */
+	OMAP_DSS_COLOR_ARGB16_1555	= 1 << 17, /* ARGB16 - 1555 */
+	OMAP_DSS_COLOR_XRGB16_1555	= 1 << 18, /* xRGB16 - 1555 */
 };
 
 enum omap_lcd_display_type {
@@ -174,6 +177,17 @@
 	OMAP_DSS_OVL_MGR_CAP_DISPC = 1 << 0,
 };
 
+enum omap_dss_clk_source {
+	OMAP_DSS_CLK_SRC_FCK = 0,		/* OMAP2/3: DSS1_ALWON_FCLK
+						 * OMAP4: DSS_FCLK */
+	OMAP_DSS_CLK_SRC_DSI_PLL_HSDIV_DISPC,	/* OMAP3: DSI1_PLL_FCLK
+						 * OMAP4: PLL1_CLK1 */
+	OMAP_DSS_CLK_SRC_DSI_PLL_HSDIV_DSI,	/* OMAP3: DSI2_PLL_FCLK
+						 * OMAP4: PLL1_CLK2 */
+	OMAP_DSS_CLK_SRC_DSI2_PLL_HSDIV_DISPC,	/* OMAP4: PLL2_CLK1 */
+	OMAP_DSS_CLK_SRC_DSI2_PLL_HSDIV_DSI,	/* OMAP4: PLL2_CLK2 */
+};
+
 /* RFBI */
 
 struct rfbi_timings {
@@ -205,20 +219,30 @@
 int omap_rfbi_setup_te(enum omap_rfbi_te_mode mode,
 			     unsigned hs_pulse_time, unsigned vs_pulse_time,
 			     int hs_pol_inv, int vs_pol_inv, int extif_div);
+void rfbi_bus_lock(void);
+void rfbi_bus_unlock(void);
 
 /* DSI */
-void dsi_bus_lock(void);
-void dsi_bus_unlock(void);
-int dsi_vc_dcs_write(int channel, u8 *data, int len);
-int dsi_vc_dcs_write_0(int channel, u8 dcs_cmd);
-int dsi_vc_dcs_write_1(int channel, u8 dcs_cmd, u8 param);
-int dsi_vc_dcs_write_nosync(int channel, u8 *data, int len);
-int dsi_vc_dcs_read(int channel, u8 dcs_cmd, u8 *buf, int buflen);
-int dsi_vc_dcs_read_1(int channel, u8 dcs_cmd, u8 *data);
-int dsi_vc_dcs_read_2(int channel, u8 dcs_cmd, u8 *data1, u8 *data2);
-int dsi_vc_set_max_rx_packet_size(int channel, u16 len);
-int dsi_vc_send_null(int channel);
-int dsi_vc_send_bta_sync(int channel);
+void dsi_bus_lock(struct omap_dss_device *dssdev);
+void dsi_bus_unlock(struct omap_dss_device *dssdev);
+int dsi_vc_dcs_write(struct omap_dss_device *dssdev, int channel, u8 *data,
+		int len);
+int dsi_vc_dcs_write_0(struct omap_dss_device *dssdev, int channel,
+		u8 dcs_cmd);
+int dsi_vc_dcs_write_1(struct omap_dss_device *dssdev, int channel, u8 dcs_cmd,
+		u8 param);
+int dsi_vc_dcs_write_nosync(struct omap_dss_device *dssdev, int channel,
+		u8 *data, int len);
+int dsi_vc_dcs_read(struct omap_dss_device *dssdev, int channel, u8 dcs_cmd,
+		u8 *buf, int buflen);
+int dsi_vc_dcs_read_1(struct omap_dss_device *dssdev, int channel, u8 dcs_cmd,
+		u8 *data);
+int dsi_vc_dcs_read_2(struct omap_dss_device *dssdev, int channel, u8 dcs_cmd,
+		u8 *data1, u8 *data2);
+int dsi_vc_set_max_rx_packet_size(struct omap_dss_device *dssdev, int channel,
+		u16 len);
+int dsi_vc_send_null(struct omap_dss_device *dssdev, int channel);
+int dsi_vc_send_bta_sync(struct omap_dss_device *dssdev, int channel);
 
 /* Board specific data */
 struct omap_dss_board_info {
@@ -226,6 +250,7 @@
 	int num_devices;
 	struct omap_dss_device **devices;
 	struct omap_dss_device *default_device;
+	void (*dsi_mux_pads)(bool enable);
 };
 
 #if defined(CONFIG_OMAP2_DSS_MODULE) || defined(CONFIG_OMAP2_DSS)
@@ -280,6 +305,7 @@
 
 	u32 paddr;
 	void __iomem *vaddr;
+	u32 p_uv_addr;  /* for NV12 format */
 	u16 screen_width;
 	u16 width;
 	u16 height;
@@ -400,18 +426,12 @@
 			u8 data1_pol;
 			u8 data2_lane;
 			u8 data2_pol;
+			u8 data3_lane;
+			u8 data3_pol;
+			u8 data4_lane;
+			u8 data4_pol;
 
-			struct {
-				u16 regn;
-				u16 regm;
-				u16 regm_dispc;
-				u16 regm_dsi;
-
-				u16 lp_clk_div;
-
-				u16 lck_div;
-				u16 pck_div;
-			} div;
+			int module;
 
 			bool ext_te;
 			u8 ext_te_gpio;
@@ -424,6 +444,33 @@
 	} phy;
 
 	struct {
+		struct {
+			struct {
+				u16 lck_div;
+				u16 pck_div;
+				enum omap_dss_clk_source lcd_clk_src;
+			} channel;
+
+			enum omap_dss_clk_source dispc_fclk_src;
+		} dispc;
+
+		struct {
+			u16 regn;
+			u16 regm;
+			u16 regm_dispc;
+			u16 regm_dsi;
+
+			u16 lp_clk_div;
+			enum omap_dss_clk_source dsi_fclk_src;
+		} dsi;
+
+		struct {
+			u16 regn;
+			u16 regm2;
+		} hdmi;
+	} clocks;
+
+	struct {
 		struct omap_video_timings timings;
 
 		int acbi;	/* ac-bias pin transitions per interrupt */
@@ -503,6 +550,8 @@
 
 	void (*get_resolution)(struct omap_dss_device *dssdev,
 			u16 *xres, u16 *yres);
+	void (*get_dimensions)(struct omap_dss_device *dssdev,
+			u32 *width, u32 *height);
 	int (*get_recommended_bpp)(struct omap_dss_device *dssdev);
 
 	int (*check_timings)(struct omap_dss_device *dssdev,
@@ -519,9 +568,6 @@
 int omap_dss_register_driver(struct omap_dss_driver *);
 void omap_dss_unregister_driver(struct omap_dss_driver *);
 
-int omap_dss_register_device(struct omap_dss_device *);
-void omap_dss_unregister_device(struct omap_dss_device *);
-
 void omap_dss_get_device(struct omap_dss_device *dssdev);
 void omap_dss_put_device(struct omap_dss_device *dssdev);
 #define for_each_dss_dev(d) while ((d = omap_dss_get_next_device(d)) != NULL)
@@ -553,7 +599,8 @@
 #define to_dss_driver(x) container_of((x), struct omap_dss_driver, driver)
 #define to_dss_device(x) container_of((x), struct omap_dss_device, dev)
 
-void omapdss_dsi_vc_enable_hs(int channel, bool enable);
+void omapdss_dsi_vc_enable_hs(struct omap_dss_device *dssdev, int channel,
+		bool enable);
 int omapdss_dsi_enable_te(struct omap_dss_device *dssdev, bool enable);
 
 int omap_dsi_prepare_update(struct omap_dss_device *dssdev,
@@ -568,7 +615,8 @@
 void omap_dsi_release_vc(struct omap_dss_device *dssdev, int channel);
 
 int omapdss_dsi_display_enable(struct omap_dss_device *dssdev);
-void omapdss_dsi_display_disable(struct omap_dss_device *dssdev);
+void omapdss_dsi_display_disable(struct omap_dss_device *dssdev,
+		bool disconnect_lanes, bool enter_ulps);
 
 int omapdss_dpi_display_enable(struct omap_dss_device *dssdev);
 void omapdss_dpi_display_disable(struct omap_dss_device *dssdev);
@@ -587,5 +635,7 @@
 int omap_rfbi_update(struct omap_dss_device *dssdev,
 		u16 x, u16 y, u16 w, u16 h,
 		void (*callback)(void *), void *data);
+int omap_rfbi_configure(struct omap_dss_device *dssdev, int pixel_size,
+		int data_lines);
 
 #endif
diff --git a/include/video/sh_mobile_lcdc.h b/include/video/sh_mobile_lcdc.h
index 2c8d369..d964e68 100644
--- a/include/video/sh_mobile_lcdc.h
+++ b/include/video/sh_mobile_lcdc.h
@@ -2,6 +2,7 @@
 #define __ASM_SH_MOBILE_LCDC_H__
 
 #include <linux/fb.h>
+#include <video/sh_mobile_meram.h>
 
 enum {
 	RGB8,   /* 24bpp, 8:8:8 */
@@ -87,11 +88,13 @@
 	struct sh_mobile_lcdc_bl_info bl_info;
 	struct sh_mobile_lcdc_sys_bus_cfg sys_bus_cfg; /* only for SYSn I/F */
 	int nonstd;
+	struct sh_mobile_meram_cfg *meram_cfg;
 };
 
 struct sh_mobile_lcdc_info {
 	int clock_source;
 	struct sh_mobile_lcdc_chan_cfg ch[2];
+	struct sh_mobile_meram_info *meram_dev;
 };
 
 #endif /* __ASM_SH_MOBILE_LCDC_H__ */
diff --git a/include/video/sh_mobile_meram.h b/include/video/sh_mobile_meram.h
new file mode 100644
index 0000000..af602d6
--- /dev/null
+++ b/include/video/sh_mobile_meram.h
@@ -0,0 +1,68 @@
+#ifndef __VIDEO_SH_MOBILE_MERAM_H__
+#define __VIDEO_SH_MOBILE_MERAM_H__
+
+/* For sh_mobile_meram_info.addr_mode */
+enum {
+	SH_MOBILE_MERAM_MODE0 = 0,
+	SH_MOBILE_MERAM_MODE1
+};
+
+enum {
+	SH_MOBILE_MERAM_PF_NV = 0,
+	SH_MOBILE_MERAM_PF_RGB,
+	SH_MOBILE_MERAM_PF_NV24
+};
+
+
+struct sh_mobile_meram_priv;
+struct sh_mobile_meram_ops;
+
+struct sh_mobile_meram_info {
+	int				addr_mode;
+	struct sh_mobile_meram_ops	*ops;
+	struct sh_mobile_meram_priv	*priv;
+	struct platform_device		*pdev;
+};
+
+/* icb config */
+struct sh_mobile_meram_icb {
+	int marker_icb;		/* ICB # for Marker ICB */
+	int cache_icb;		/* ICB # for Cache ICB */
+	int meram_offset;	/* MERAM Buffer Offset to use */
+	int meram_size;		/* MERAM Buffer Size to use */
+
+	int cache_unit;		/* bytes to cache per ICB */
+};
+
+struct sh_mobile_meram_cfg {
+	struct sh_mobile_meram_icb	icb[2];
+	int				pixelformat;
+	int				current_reg;
+};
+
+struct module;
+struct sh_mobile_meram_ops {
+	struct module	*module;
+	/* register usage of meram */
+	int (*meram_register)(struct sh_mobile_meram_info *meram_dev,
+			      struct sh_mobile_meram_cfg *cfg,
+			      int xres, int yres, int pixelformat,
+			      unsigned long base_addr_y,
+			      unsigned long base_addr_c,
+			      unsigned long *icb_addr_y,
+			      unsigned long *icb_addr_c, int *pitch);
+
+	/* unregister usage of meram */
+	int (*meram_unregister)(struct sh_mobile_meram_info *meram_dev,
+				struct sh_mobile_meram_cfg *cfg);
+
+	/* update meram settings */
+	int (*meram_update)(struct sh_mobile_meram_info *meram_dev,
+			    struct sh_mobile_meram_cfg *cfg,
+			    unsigned long base_addr_y,
+			    unsigned long base_addr_c,
+			    unsigned long *icb_addr_y,
+			    unsigned long *icb_addr_c);
+};
+
+#endif /* __VIDEO_SH_MOBILE_MERAM_H__  */
diff --git a/include/xen/interface/io/blkif.h b/include/xen/interface/io/blkif.h
index 61e523a..3d5d6db 100644
--- a/include/xen/interface/io/blkif.h
+++ b/include/xen/interface/io/blkif.h
@@ -45,6 +45,19 @@
 #define BLKIF_OP_WRITE_BARRIER     2
 
 /*
+ * Recognised if "feature-flush-cache" is present in backend xenbus
+ * info.  A flush will ask the underlying storage hardware to flush its
+ * non-volatile caches as appropriate.  The "feature-flush-cache" node
+ * contains a boolean indicating whether flush requests are likely to
+ * succeed or fail. Either way, a flush request may fail at any time
+ * with BLKIF_RSP_EOPNOTSUPP if it is unsupported by the underlying
+ * block-device hardware. The boolean simply indicates whether or not it
+ * is worthwhile for the frontend to attempt flushes.  If a backend does
+ * not recognise BLKIF_OP_WRITE_FLUSH_CACHE, it should *not* create the
+ * "feature-flush-cache" node!
+ */
+#define BLKIF_OP_FLUSH_DISKCACHE   3
+/*
  * Maximum scatter/gather segments per request.
  * This is carefully chosen so that sizeof(struct blkif_ring) <= PAGE_SIZE.
  * NB. This could be 12 if the ring indexes weren't stored in the same page.
diff --git a/init/calibrate.c b/init/calibrate.c
index 76ac919..cfd7000 100644
--- a/init/calibrate.c
+++ b/init/calibrate.c
@@ -38,6 +38,9 @@
 	unsigned long timer_rate_min, timer_rate_max;
 	unsigned long good_timer_sum = 0;
 	unsigned long good_timer_count = 0;
+	unsigned long measured_times[MAX_DIRECT_CALIBRATION_RETRIES];
+	int max = -1; /* index of measured_times with max/min values or not set */
+	int min = -1;
 	int i;
 
 	if (read_current_timer(&pre_start) < 0 )
@@ -90,18 +93,78 @@
 		 * If the upper limit and lower limit of the timer_rate is
 		 * >= 12.5% apart, redo calibration.
 		 */
-		if (pre_start != 0 && pre_end != 0 &&
+		printk(KERN_DEBUG "calibrate_delay_direct() timer_rate_max=%lu "
+			    "timer_rate_min=%lu pre_start=%lu pre_end=%lu\n",
+			  timer_rate_max, timer_rate_min, pre_start, pre_end);
+		if (start >= post_end)
+			printk(KERN_NOTICE "calibrate_delay_direct() ignoring "
+					"timer_rate as we had a TSC wrap around"
+					" start=%lu >=post_end=%lu\n",
+				start, post_end);
+		if (start < post_end && pre_start != 0 && pre_end != 0 &&
 		    (timer_rate_max - timer_rate_min) < (timer_rate_max >> 3)) {
 			good_timer_count++;
 			good_timer_sum += timer_rate_max;
-		}
+			measured_times[i] = timer_rate_max;
+			if (max < 0 || timer_rate_max > measured_times[max])
+				max = i;
+			if (min < 0 || timer_rate_max < measured_times[min])
+				min = i;
+		} else
+			measured_times[i] = 0;
+
 	}
 
-	if (good_timer_count)
-		return (good_timer_sum/good_timer_count);
+	/*
+	 * Find the maximum & minimum - if they differ too much throw out the
+	 * one with the largest difference from the mean and try again...
+	 */
+	while (good_timer_count > 1) {
+		unsigned long estimate;
+		unsigned long maxdiff;
 
-	printk(KERN_WARNING "calibrate_delay_direct() failed to get a good "
-	       "estimate for loops_per_jiffy.\nProbably due to long platform interrupts. Consider using \"lpj=\" boot option.\n");
+		/* compute the estimate */
+		estimate = (good_timer_sum/good_timer_count);
+		maxdiff = estimate >> 3;
+
+		/* if range is within 12% let's take it */
+		if ((measured_times[max] - measured_times[min]) < maxdiff)
+			return estimate;
+
+		/* ok - drop the worse value and try again... */
+		good_timer_sum = 0;
+		good_timer_count = 0;
+		if ((measured_times[max] - estimate) <
+				(estimate - measured_times[min])) {
+			printk(KERN_NOTICE "calibrate_delay_direct() dropping "
+					"min bogoMips estimate %d = %lu\n",
+				min, measured_times[min]);
+			measured_times[min] = 0;
+			min = max;
+		} else {
+			printk(KERN_NOTICE "calibrate_delay_direct() dropping "
+					"max bogoMips estimate %d = %lu\n",
+				max, measured_times[max]);
+			measured_times[max] = 0;
+			max = min;
+		}
+
+		for (i = 0; i < MAX_DIRECT_CALIBRATION_RETRIES; i++) {
+			if (measured_times[i] == 0)
+				continue;
+			good_timer_count++;
+			good_timer_sum += measured_times[i];
+			if (measured_times[i] < measured_times[min])
+				min = i;
+			if (measured_times[i] > measured_times[max])
+				max = i;
+		}
+
+	}
+
+	printk(KERN_NOTICE "calibrate_delay_direct() failed to get a good "
+	       "estimate for loops_per_jiffy.\nProbably due to long platform "
+		"interrupts. Consider using \"lpj=\" boot option.\n");
 	return 0;
 }
 #else
diff --git a/init/main.c b/init/main.c
index 48df882..d2f1e08 100644
--- a/init/main.c
+++ b/init/main.c
@@ -504,11 +504,14 @@
 	 * These use large bootmem allocations and must precede
 	 * kmem_cache_init()
 	 */
+	setup_log_buf(0);
 	pidhash_init();
 	vfs_caches_init_early();
 	sort_main_extable();
 	trap_init();
 	mm_init();
+	BUG_ON(mm_init_cpumask(&init_mm, 0));
+
 	/*
 	 * Set up the scheduler prior starting any interrupts (such as the
 	 * timer interrupt). Full topology setup happens at smp_init()
diff --git a/kernel/fork.c b/kernel/fork.c
index 2b44d82..8e7e135 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -383,15 +383,14 @@
 			get_file(file);
 			if (tmp->vm_flags & VM_DENYWRITE)
 				atomic_dec(&inode->i_writecount);
-			spin_lock(&mapping->i_mmap_lock);
+			mutex_lock(&mapping->i_mmap_mutex);
 			if (tmp->vm_flags & VM_SHARED)
 				mapping->i_mmap_writable++;
-			tmp->vm_truncate_count = mpnt->vm_truncate_count;
 			flush_dcache_mmap_lock(mapping);
 			/* insert tmp into the share list, just after mpnt */
 			vma_prio_tree_add(tmp, mpnt);
 			flush_dcache_mmap_unlock(mapping);
-			spin_unlock(&mapping->i_mmap_lock);
+			mutex_unlock(&mapping->i_mmap_mutex);
 		}
 
 		/*
@@ -486,6 +485,20 @@
 #endif
 }
 
+int mm_init_cpumask(struct mm_struct *mm, struct mm_struct *oldmm)
+{
+#ifdef CONFIG_CPUMASK_OFFSTACK
+	if (!alloc_cpumask_var(&mm->cpu_vm_mask_var, GFP_KERNEL))
+		return -ENOMEM;
+
+	if (oldmm)
+		cpumask_copy(mm_cpumask(mm), mm_cpumask(oldmm));
+	else
+		memset(mm_cpumask(mm), 0, cpumask_size());
+#endif
+	return 0;
+}
+
 static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
 {
 	atomic_set(&mm->mm_users, 1);
@@ -522,10 +535,20 @@
 	struct mm_struct * mm;
 
 	mm = allocate_mm();
-	if (mm) {
-		memset(mm, 0, sizeof(*mm));
-		mm = mm_init(mm, current);
+	if (!mm)
+		return NULL;
+
+	memset(mm, 0, sizeof(*mm));
+	mm = mm_init(mm, current);
+	if (!mm)
+		return NULL;
+
+	if (mm_init_cpumask(mm, NULL)) {
+		mm_free_pgd(mm);
+		free_mm(mm);
+		return NULL;
 	}
+
 	return mm;
 }
 
@@ -537,6 +560,7 @@
 void __mmdrop(struct mm_struct *mm)
 {
 	BUG_ON(mm == &init_mm);
+	free_cpumask_var(mm->cpu_vm_mask_var);
 	mm_free_pgd(mm);
 	destroy_context(mm);
 	mmu_notifier_mm_destroy(mm);
@@ -691,6 +715,9 @@
 	if (!mm_init(mm, tsk))
 		goto fail_nomem;
 
+	if (mm_init_cpumask(mm, oldmm))
+		goto fail_nocpumask;
+
 	if (init_new_context(tsk, mm))
 		goto fail_nocontext;
 
@@ -717,6 +744,9 @@
 	return NULL;
 
 fail_nocontext:
+	free_cpumask_var(mm->cpu_vm_mask_var);
+
+fail_nocpumask:
 	/*
 	 * If init_new_context() failed, we cannot use mmput() to free the mm
 	 * because it calls destroy_context()
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index 834899f..64e3df6 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -19,7 +19,7 @@
 
 #ifdef CONFIG_SMP
 
-static int irq_affinity_proc_show(struct seq_file *m, void *v)
+static int show_irq_affinity(int type, struct seq_file *m, void *v)
 {
 	struct irq_desc *desc = irq_to_desc((long)m->private);
 	const struct cpumask *mask = desc->irq_data.affinity;
@@ -28,7 +28,10 @@
 	if (irqd_is_setaffinity_pending(&desc->irq_data))
 		mask = desc->pending_mask;
 #endif
-	seq_cpumask(m, mask);
+	if (type)
+		seq_cpumask_list(m, mask);
+	else
+		seq_cpumask(m, mask);
 	seq_putc(m, '\n');
 	return 0;
 }
@@ -59,7 +62,18 @@
 #endif
 
 int no_irq_affinity;
-static ssize_t irq_affinity_proc_write(struct file *file,
+static int irq_affinity_proc_show(struct seq_file *m, void *v)
+{
+	return show_irq_affinity(0, m, v);
+}
+
+static int irq_affinity_list_proc_show(struct seq_file *m, void *v)
+{
+	return show_irq_affinity(1, m, v);
+}
+
+
+static ssize_t write_irq_affinity(int type, struct file *file,
 		const char __user *buffer, size_t count, loff_t *pos)
 {
 	unsigned int irq = (int)(long)PDE(file->f_path.dentry->d_inode)->data;
@@ -72,7 +86,10 @@
 	if (!alloc_cpumask_var(&new_value, GFP_KERNEL))
 		return -ENOMEM;
 
-	err = cpumask_parse_user(buffer, count, new_value);
+	if (type)
+		err = cpumask_parselist_user(buffer, count, new_value);
+	else
+		err = cpumask_parse_user(buffer, count, new_value);
 	if (err)
 		goto free_cpumask;
 
@@ -100,11 +117,28 @@
 	return err;
 }
 
+static ssize_t irq_affinity_proc_write(struct file *file,
+		const char __user *buffer, size_t count, loff_t *pos)
+{
+	return write_irq_affinity(0, file, buffer, count, pos);
+}
+
+static ssize_t irq_affinity_list_proc_write(struct file *file,
+		const char __user *buffer, size_t count, loff_t *pos)
+{
+	return write_irq_affinity(1, file, buffer, count, pos);
+}
+
 static int irq_affinity_proc_open(struct inode *inode, struct file *file)
 {
 	return single_open(file, irq_affinity_proc_show, PDE(inode)->data);
 }
 
+static int irq_affinity_list_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, irq_affinity_list_proc_show, PDE(inode)->data);
+}
+
 static int irq_affinity_hint_proc_open(struct inode *inode, struct file *file)
 {
 	return single_open(file, irq_affinity_hint_proc_show, PDE(inode)->data);
@@ -125,6 +159,14 @@
 	.release	= single_release,
 };
 
+static const struct file_operations irq_affinity_list_proc_fops = {
+	.open		= irq_affinity_list_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+	.write		= irq_affinity_list_proc_write,
+};
+
 static int default_affinity_show(struct seq_file *m, void *v)
 {
 	seq_cpumask(m, irq_default_affinity);
@@ -289,6 +331,10 @@
 	proc_create_data("affinity_hint", 0400, desc->dir,
 			 &irq_affinity_hint_proc_fops, (void *)(long)irq);
 
+	/* create /proc/irq/<irq>/smp_affinity_list */
+	proc_create_data("smp_affinity_list", 0600, desc->dir,
+			 &irq_affinity_list_proc_fops, (void *)(long)irq);
+
 	proc_create_data("node", 0444, desc->dir,
 			 &irq_node_proc_fops, (void *)(long)irq);
 #endif
diff --git a/kernel/mutex.c b/kernel/mutex.c
index 2c938e2..d607ed5 100644
--- a/kernel/mutex.c
+++ b/kernel/mutex.c
@@ -131,14 +131,14 @@
  */
 static inline int __sched
 __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
-	       	unsigned long ip)
+		    struct lockdep_map *nest_lock, unsigned long ip)
 {
 	struct task_struct *task = current;
 	struct mutex_waiter waiter;
 	unsigned long flags;
 
 	preempt_disable();
-	mutex_acquire(&lock->dep_map, subclass, 0, ip);
+	mutex_acquire_nest(&lock->dep_map, subclass, 0, nest_lock, ip);
 
 #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
 	/*
@@ -269,16 +269,25 @@
 mutex_lock_nested(struct mutex *lock, unsigned int subclass)
 {
 	might_sleep();
-	__mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, subclass, _RET_IP_);
+	__mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, subclass, NULL, _RET_IP_);
 }
 
 EXPORT_SYMBOL_GPL(mutex_lock_nested);
 
+void __sched
+_mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest)
+{
+	might_sleep();
+	__mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0, nest, _RET_IP_);
+}
+
+EXPORT_SYMBOL_GPL(_mutex_lock_nest_lock);
+
 int __sched
 mutex_lock_killable_nested(struct mutex *lock, unsigned int subclass)
 {
 	might_sleep();
-	return __mutex_lock_common(lock, TASK_KILLABLE, subclass, _RET_IP_);
+	return __mutex_lock_common(lock, TASK_KILLABLE, subclass, NULL, _RET_IP_);
 }
 EXPORT_SYMBOL_GPL(mutex_lock_killable_nested);
 
@@ -287,7 +296,7 @@
 {
 	might_sleep();
 	return __mutex_lock_common(lock, TASK_INTERRUPTIBLE,
-				   subclass, _RET_IP_);
+				   subclass, NULL, _RET_IP_);
 }
 
 EXPORT_SYMBOL_GPL(mutex_lock_interruptible_nested);
@@ -393,7 +402,7 @@
 {
 	struct mutex *lock = container_of(lock_count, struct mutex, count);
 
-	__mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0, _RET_IP_);
+	__mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0, NULL, _RET_IP_);
 }
 
 static noinline int __sched
@@ -401,7 +410,7 @@
 {
 	struct mutex *lock = container_of(lock_count, struct mutex, count);
 
-	return __mutex_lock_common(lock, TASK_KILLABLE, 0, _RET_IP_);
+	return __mutex_lock_common(lock, TASK_KILLABLE, 0, NULL, _RET_IP_);
 }
 
 static noinline int __sched
@@ -409,7 +418,7 @@
 {
 	struct mutex *lock = container_of(lock_count, struct mutex, count);
 
-	return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, 0, _RET_IP_);
+	return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, 0, NULL, _RET_IP_);
 }
 #endif
 
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index a1b5edf..4556182 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -491,6 +491,13 @@
 	return tmr;
 }
 
+static void k_itimer_rcu_free(struct rcu_head *head)
+{
+	struct k_itimer *tmr = container_of(head, struct k_itimer, it.rcu);
+
+	kmem_cache_free(posix_timers_cache, tmr);
+}
+
 #define IT_ID_SET	1
 #define IT_ID_NOT_SET	0
 static void release_posix_timer(struct k_itimer *tmr, int it_id_set)
@@ -503,7 +510,7 @@
 	}
 	put_pid(tmr->it_pid);
 	sigqueue_free(tmr->sigq);
-	kmem_cache_free(posix_timers_cache, tmr);
+	call_rcu(&tmr->it.rcu, k_itimer_rcu_free);
 }
 
 static struct k_clock *clockid_to_kclock(const clockid_t id)
@@ -631,22 +638,18 @@
 static struct k_itimer *__lock_timer(timer_t timer_id, unsigned long *flags)
 {
 	struct k_itimer *timr;
-	/*
-	 * Watch out here.  We do a irqsave on the idr_lock and pass the
-	 * flags part over to the timer lock.  Must not let interrupts in
-	 * while we are moving the lock.
-	 */
-	spin_lock_irqsave(&idr_lock, *flags);
+
+	rcu_read_lock();
 	timr = idr_find(&posix_timers_id, (int)timer_id);
 	if (timr) {
-		spin_lock(&timr->it_lock);
+		spin_lock_irqsave(&timr->it_lock, *flags);
 		if (timr->it_signal == current->signal) {
-			spin_unlock(&idr_lock);
+			rcu_read_unlock();
 			return timr;
 		}
-		spin_unlock(&timr->it_lock);
+		spin_unlock_irqrestore(&timr->it_lock, *flags);
 	}
-	spin_unlock_irqrestore(&idr_lock, *flags);
+	rcu_read_unlock();
 
 	return NULL;
 }
diff --git a/kernel/printk.c b/kernel/printk.c
index da8ca81..3518539 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -31,6 +31,7 @@
 #include <linux/smp.h>
 #include <linux/security.h>
 #include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/syscalls.h>
 #include <linux/kexec.h>
 #include <linux/kdb.h>
@@ -167,46 +168,74 @@
 }
 #endif
 
+/* requested log_buf_len from kernel cmdline */
+static unsigned long __initdata new_log_buf_len;
+
+/* save requested log_buf_len since it's too early to process it */
 static int __init log_buf_len_setup(char *str)
 {
 	unsigned size = memparse(str, &str);
-	unsigned long flags;
 
 	if (size)
 		size = roundup_pow_of_two(size);
-	if (size > log_buf_len) {
-		unsigned start, dest_idx, offset;
-		char *new_log_buf;
+	if (size > log_buf_len)
+		new_log_buf_len = size;
 
-		new_log_buf = alloc_bootmem(size);
-		if (!new_log_buf) {
-			printk(KERN_WARNING "log_buf_len: allocation failed\n");
-			goto out;
-		}
-
-		spin_lock_irqsave(&logbuf_lock, flags);
-		log_buf_len = size;
-		log_buf = new_log_buf;
-
-		offset = start = min(con_start, log_start);
-		dest_idx = 0;
-		while (start != log_end) {
-			log_buf[dest_idx] = __log_buf[start & (__LOG_BUF_LEN - 1)];
-			start++;
-			dest_idx++;
-		}
-		log_start -= offset;
-		con_start -= offset;
-		log_end -= offset;
-		spin_unlock_irqrestore(&logbuf_lock, flags);
-
-		printk(KERN_NOTICE "log_buf_len: %d\n", log_buf_len);
-	}
-out:
-	return 1;
+	return 0;
 }
+early_param("log_buf_len", log_buf_len_setup);
 
-__setup("log_buf_len=", log_buf_len_setup);
+void __init setup_log_buf(int early)
+{
+	unsigned long flags;
+	unsigned start, dest_idx, offset;
+	char *new_log_buf;
+	int free;
+
+	if (!new_log_buf_len)
+		return;
+
+	if (early) {
+		unsigned long mem;
+
+		mem = memblock_alloc(new_log_buf_len, PAGE_SIZE);
+		if (mem == MEMBLOCK_ERROR)
+			return;
+		new_log_buf = __va(mem);
+	} else {
+		new_log_buf = alloc_bootmem_nopanic(new_log_buf_len);
+	}
+
+	if (unlikely(!new_log_buf)) {
+		pr_err("log_buf_len: %ld bytes not available\n",
+			new_log_buf_len);
+		return;
+	}
+
+	spin_lock_irqsave(&logbuf_lock, flags);
+	log_buf_len = new_log_buf_len;
+	log_buf = new_log_buf;
+	new_log_buf_len = 0;
+	free = __LOG_BUF_LEN - log_end;
+
+	offset = start = min(con_start, log_start);
+	dest_idx = 0;
+	while (start != log_end) {
+		unsigned log_idx_mask = start & (__LOG_BUF_LEN - 1);
+
+		log_buf[dest_idx] = __log_buf[log_idx_mask];
+		start++;
+		dest_idx++;
+	}
+	log_start -= offset;
+	con_start -= offset;
+	log_end -= offset;
+	spin_unlock_irqrestore(&logbuf_lock, flags);
+
+	pr_info("log_buf_len: %d\n", log_buf_len);
+	pr_info("early log buf free: %d(%d%%)\n",
+		free, (free * 100) / __LOG_BUF_LEN);
+}
 
 #ifdef CONFIG_BOOT_PRINTK_DELAY
 
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 0efcdca..28afa4c 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -670,6 +670,15 @@
 	bool
 	depends on STACKTRACE_SUPPORT
 
+config DEBUG_STACK_USAGE
+	bool "Stack utilization instrumentation"
+	depends on DEBUG_KERNEL
+	help
+	  Enables the display of the minimum amount of free stack which each
+	  task has ever had available in the sysrq-T and sysrq-P debug output.
+
+	  This option will slow down process creation somewhat.
+
 config DEBUG_KOBJECT
 	bool "kobject debugging"
 	depends on DEBUG_KERNEL
@@ -983,6 +992,17 @@
 	  To ensure that generic code follows the above rules, this
 	  option forces all percpu variables to be defined as weak.
 
+config DEBUG_PER_CPU_MAPS
+	bool "Debug access to per_cpu maps"
+	depends on DEBUG_KERNEL
+	depends on SMP
+	help
+	  Say Y to verify that the per_cpu map being accessed has
+	  been set up. This adds a fair amount of code to kernel memory
+	  and decreases performance.
+
+	  Say N if unsure.
+
 config LKDTM
 	tristate "Linux Kernel Dump Test Tool Module"
 	depends on DEBUG_FS
diff --git a/lib/bitmap.c b/lib/bitmap.c
index 91e0ccf..41baf02 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -571,8 +571,11 @@
 EXPORT_SYMBOL(bitmap_scnlistprintf);
 
 /**
- * bitmap_parselist - convert list format ASCII string to bitmap
+ * __bitmap_parselist - convert list format ASCII string to bitmap
  * @bp: read nul-terminated user string from this buffer
+ * @buflen: buffer size in bytes.  If string is smaller than this
+ *    then it must be terminated with a \0.
+ * @is_user: location of buffer, 0 indicates kernel space
  * @maskp: write resulting mask here
  * @nmaskbits: number of bits in mask to be written
  *
@@ -587,20 +590,63 @@
  *    %-EINVAL: invalid character in string
  *    %-ERANGE: bit number specified too large for mask
  */
-int bitmap_parselist(const char *bp, unsigned long *maskp, int nmaskbits)
+static int __bitmap_parselist(const char *buf, unsigned int buflen,
+		int is_user, unsigned long *maskp,
+		int nmaskbits)
 {
 	unsigned a, b;
+	int c, old_c, totaldigits;
+	const char __user *ubuf = buf;
+	int exp_digit, in_range;
 
+	totaldigits = c = 0;
 	bitmap_zero(maskp, nmaskbits);
 	do {
-		if (!isdigit(*bp))
-			return -EINVAL;
-		b = a = simple_strtoul(bp, (char **)&bp, BASEDEC);
-		if (*bp == '-') {
-			bp++;
-			if (!isdigit(*bp))
+		exp_digit = 1;
+		in_range = 0;
+		a = b = 0;
+
+		/* Get the next cpu# or a range of cpu#'s */
+		while (buflen) {
+			old_c = c;
+			if (is_user) {
+				if (__get_user(c, ubuf++))
+					return -EFAULT;
+			} else
+				c = *buf++;
+			buflen--;
+			if (isspace(c))
+				continue;
+
+			/*
+			 * If the last character was a space and the current
+			 * character isn't '\0', we've got embedded whitespace.
+			 * This is a no-no, so throw an error.
+			 */
+			if (totaldigits && c && isspace(old_c))
 				return -EINVAL;
-			b = simple_strtoul(bp, (char **)&bp, BASEDEC);
+
+			/* A '\0' or a ',' signal the end of a cpu# or range */
+			if (c == '\0' || c == ',')
+				break;
+
+			if (c == '-') {
+				if (exp_digit || in_range)
+					return -EINVAL;
+				b = 0;
+				in_range = 1;
+				exp_digit = 1;
+				continue;
+			}
+
+			if (!isdigit(c))
+				return -EINVAL;
+
+			b = b * 10 + (c - '0');
+			if (!in_range)
+				a = b;
+			exp_digit = 0;
+			totaldigits++;
 		}
 		if (!(a <= b))
 			return -EINVAL;
@@ -610,13 +656,52 @@
 			set_bit(a, maskp);
 			a++;
 		}
-		if (*bp == ',')
-			bp++;
-	} while (*bp != '\0' && *bp != '\n');
+	} while (buflen && c == ',');
 	return 0;
 }
+
+int bitmap_parselist(const char *bp, unsigned long *maskp, int nmaskbits)
+{
+	char *nl  = strchr(bp, '\n');
+	int len;
+
+	if (nl)
+		len = nl - bp;
+	else
+		len = strlen(bp);
+
+	return __bitmap_parselist(bp, len, 0, maskp, nmaskbits);
+}
 EXPORT_SYMBOL(bitmap_parselist);
 
+
+/**
+ * bitmap_parselist_user()
+ *
+ * @ubuf: pointer to user buffer containing string.
+ * @ulen: buffer size in bytes.  If string is smaller than this
+ *    then it must be terminated with a \0.
+ * @maskp: pointer to bitmap array that will contain result.
+ * @nmaskbits: size of bitmap, in bits.
+ *
+ * Wrapper for bitmap_parselist(), providing it with user buffer.
+ *
+ * We cannot have this as an inline function in bitmap.h because it needs
+ * linux/uaccess.h to get the access_ok() declaration and this causes
+ * cyclic dependencies.
+ */
+int bitmap_parselist_user(const char __user *ubuf,
+			unsigned int ulen, unsigned long *maskp,
+			int nmaskbits)
+{
+	if (!access_ok(VERIFY_READ, ubuf, ulen))
+		return -EFAULT;
+	return __bitmap_parselist((const char *)ubuf,
+					ulen, 1, maskp, nmaskbits);
+}
+EXPORT_SYMBOL(bitmap_parselist_user);
+
+
 /**
  * bitmap_pos_to_ord - find ordinal of set bit at given position in bitmap
  *	@buf: pointer to a bitmap
diff --git a/lib/genalloc.c b/lib/genalloc.c
index 1923f14..577ddf8 100644
--- a/lib/genalloc.c
+++ b/lib/genalloc.c
@@ -39,17 +39,20 @@
 EXPORT_SYMBOL(gen_pool_create);
 
 /**
- * gen_pool_add - add a new chunk of special memory to the pool
+ * gen_pool_add_virt - add a new chunk of special memory to the pool
  * @pool: pool to add new memory chunk to
- * @addr: starting address of memory chunk to add to pool
+ * @virt: virtual starting address of memory chunk to add to pool
+ * @phys: physical starting address of memory chunk to add to pool
  * @size: size in bytes of the memory chunk to add to pool
  * @nid: node id of the node the chunk structure and bitmap should be
  *       allocated on, or -1
  *
  * Add a new chunk of special memory to the specified pool.
+ *
+ * Returns 0 on success or a -ve errno on failure.
  */
-int gen_pool_add(struct gen_pool *pool, unsigned long addr, size_t size,
-		 int nid)
+int gen_pool_add_virt(struct gen_pool *pool, unsigned long virt, phys_addr_t phys,
+		 size_t size, int nid)
 {
 	struct gen_pool_chunk *chunk;
 	int nbits = size >> pool->min_alloc_order;
@@ -58,11 +61,12 @@
 
 	chunk = kmalloc_node(nbytes, GFP_KERNEL | __GFP_ZERO, nid);
 	if (unlikely(chunk == NULL))
-		return -1;
+		return -ENOMEM;
 
 	spin_lock_init(&chunk->lock);
-	chunk->start_addr = addr;
-	chunk->end_addr = addr + size;
+	chunk->phys_addr = phys;
+	chunk->start_addr = virt;
+	chunk->end_addr = virt + size;
 
 	write_lock(&pool->lock);
 	list_add(&chunk->next_chunk, &pool->chunks);
@@ -70,7 +74,32 @@
 
 	return 0;
 }
-EXPORT_SYMBOL(gen_pool_add);
+EXPORT_SYMBOL(gen_pool_add_virt);
+
+/**
+ * gen_pool_virt_to_phys - return the physical address of memory
+ * @pool: pool to allocate from
+ * @addr: starting address of memory
+ *
+ * Returns the physical address on success, or -1 on error.
+ */
+phys_addr_t gen_pool_virt_to_phys(struct gen_pool *pool, unsigned long addr)
+{
+	struct list_head *_chunk;
+	struct gen_pool_chunk *chunk;
+
+	read_lock(&pool->lock);
+	list_for_each(_chunk, &pool->chunks) {
+		chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk);
+
+		if (addr >= chunk->start_addr && addr < chunk->end_addr)
+			return chunk->phys_addr + addr - chunk->start_addr;
+	}
+	read_unlock(&pool->lock);
+
+	return -1;
+}
+EXPORT_SYMBOL(gen_pool_virt_to_phys);
 
 /**
  * gen_pool_destroy - destroy a special memory pool
diff --git a/lib/kstrtox.c b/lib/kstrtox.c
index a235f3c..2dbae88 100644
--- a/lib/kstrtox.c
+++ b/lib/kstrtox.c
@@ -17,6 +17,7 @@
 #include <linux/math64.h>
 #include <linux/module.h>
 #include <linux/types.h>
+#include <asm/uaccess.h>
 
 static inline char _tolower(const char c)
 {
@@ -222,3 +223,28 @@
 	return 0;
 }
 EXPORT_SYMBOL(kstrtos8);
+
+#define kstrto_from_user(f, g, type)					\
+int f(const char __user *s, size_t count, unsigned int base, type *res)	\
+{									\
+	/* sign, base 2 representation, newline, terminator */		\
+	char buf[1 + sizeof(type) * 8 + 1 + 1];				\
+									\
+	count = min(count, sizeof(buf) - 1);				\
+	if (copy_from_user(buf, s, count))				\
+		return -EFAULT;						\
+	buf[count] = '\0';						\
+	return g(buf, base, res);					\
+}									\
+EXPORT_SYMBOL(f)
+
+kstrto_from_user(kstrtoull_from_user,	kstrtoull,	unsigned long long);
+kstrto_from_user(kstrtoll_from_user,	kstrtoll,	long long);
+kstrto_from_user(kstrtoul_from_user,	kstrtoul,	unsigned long);
+kstrto_from_user(kstrtol_from_user,	kstrtol,	long);
+kstrto_from_user(kstrtouint_from_user,	kstrtouint,	unsigned int);
+kstrto_from_user(kstrtoint_from_user,	kstrtoint,	int);
+kstrto_from_user(kstrtou16_from_user,	kstrtou16,	u16);
+kstrto_from_user(kstrtos16_from_user,	kstrtos16,	s16);
+kstrto_from_user(kstrtou8_from_user,	kstrtou8,	u8);
+kstrto_from_user(kstrtos8_from_user,	kstrtos8,	s8);
diff --git a/lib/lru_cache.c b/lib/lru_cache.c
index 270de9d..a07e726 100644
--- a/lib/lru_cache.c
+++ b/lib/lru_cache.c
@@ -84,7 +84,7 @@
 	if (e_count > LC_MAX_ACTIVE)
 		return NULL;
 
-	slot = kzalloc(e_count * sizeof(struct hlist_head*), GFP_KERNEL);
+	slot = kcalloc(e_count, sizeof(struct hlist_head), GFP_KERNEL);
 	if (!slot)
 		goto out_fail;
 	element = kzalloc(e_count * sizeof(struct lc_element *), GFP_KERNEL);
diff --git a/lib/show_mem.c b/lib/show_mem.c
index 90cbe4b..4407f8c 100644
--- a/lib/show_mem.c
+++ b/lib/show_mem.c
@@ -16,7 +16,7 @@
 		nonshared = 0, highmem = 0;
 
 	printk("Mem-Info:\n");
-	__show_free_areas(filter);
+	show_free_areas(filter);
 
 	for_each_online_pgdat(pgdat) {
 		unsigned long i, flags;
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 1d659d7..c112056 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -898,7 +898,7 @@
 	case 'U':
 		return uuid_string(buf, end, ptr, spec, fmt);
 	case 'V':
-		return buf + vsnprintf(buf, end - buf,
+		return buf + vsnprintf(buf, end > buf ? end - buf : 0,
 				       ((struct va_format *)ptr)->fmt,
 				       *(((struct va_format *)ptr)->va));
 	case 'K':
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index befc875..f032e6e 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -63,10 +63,10 @@
 	unsigned long background_thresh;
 	unsigned long dirty_thresh;
 	unsigned long bdi_thresh;
-	unsigned long nr_dirty, nr_io, nr_more_io, nr_wb;
+	unsigned long nr_dirty, nr_io, nr_more_io;
 	struct inode *inode;
 
-	nr_wb = nr_dirty = nr_io = nr_more_io = 0;
+	nr_dirty = nr_io = nr_more_io = 0;
 	spin_lock(&inode_wb_list_lock);
 	list_for_each_entry(inode, &wb->b_dirty, i_wb_list)
 		nr_dirty++;
diff --git a/mm/filemap.c b/mm/filemap.c
index c641edf..68e782b 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -58,16 +58,16 @@
 /*
  * Lock ordering:
  *
- *  ->i_mmap_lock		(truncate_pagecache)
+ *  ->i_mmap_mutex		(truncate_pagecache)
  *    ->private_lock		(__free_pte->__set_page_dirty_buffers)
  *      ->swap_lock		(exclusive_swap_page, others)
  *        ->mapping->tree_lock
  *
  *  ->i_mutex
- *    ->i_mmap_lock		(truncate->unmap_mapping_range)
+ *    ->i_mmap_mutex		(truncate->unmap_mapping_range)
  *
  *  ->mmap_sem
- *    ->i_mmap_lock
+ *    ->i_mmap_mutex
  *      ->page_table_lock or pte_lock	(various, mainly in memory.c)
  *        ->mapping->tree_lock	(arch-dependent flush_dcache_mmap_lock)
  *
@@ -84,7 +84,7 @@
  *    sb_lock			(fs/fs-writeback.c)
  *    ->mapping->tree_lock	(__sync_single_inode)
  *
- *  ->i_mmap_lock
+ *  ->i_mmap_mutex
  *    ->anon_vma.lock		(vma_adjust)
  *
  *  ->anon_vma.lock
@@ -106,7 +106,7 @@
  *
  *  (code doesn't rely on that order, so you could switch it around)
  *  ->tasklist_lock             (memory_failure, collect_procs_ao)
- *    ->i_mmap_lock
+ *    ->i_mmap_mutex
  */
 
 /*
@@ -562,6 +562,17 @@
 }
 EXPORT_SYMBOL(wait_on_page_bit);
 
+int wait_on_page_bit_killable(struct page *page, int bit_nr)
+{
+	DEFINE_WAIT_BIT(wait, &page->flags, bit_nr);
+
+	if (!test_bit(bit_nr, &page->flags))
+		return 0;
+
+	return __wait_on_bit(page_waitqueue(page), &wait,
+			     sleep_on_page_killable, TASK_KILLABLE);
+}
+
 /**
  * add_page_wait_queue - Add an arbitrary waiter to a page's wait queue
  * @page: Page defining the wait queue of interest
@@ -643,15 +654,32 @@
 int __lock_page_or_retry(struct page *page, struct mm_struct *mm,
 			 unsigned int flags)
 {
-	if (!(flags & FAULT_FLAG_ALLOW_RETRY)) {
-		__lock_page(page);
-		return 1;
-	} else {
-		if (!(flags & FAULT_FLAG_RETRY_NOWAIT)) {
-			up_read(&mm->mmap_sem);
+	if (flags & FAULT_FLAG_ALLOW_RETRY) {
+		/*
+		 * CAUTION! In this case, mmap_sem is not released
+		 * even though return 0.
+		 */
+		if (flags & FAULT_FLAG_RETRY_NOWAIT)
+			return 0;
+
+		up_read(&mm->mmap_sem);
+		if (flags & FAULT_FLAG_KILLABLE)
+			wait_on_page_locked_killable(page);
+		else
 			wait_on_page_locked(page);
-		}
 		return 0;
+	} else {
+		if (flags & FAULT_FLAG_KILLABLE) {
+			int ret;
+
+			ret = __lock_page_killable(page);
+			if (ret) {
+				up_read(&mm->mmap_sem);
+				return 0;
+			}
+		} else
+			__lock_page(page);
+		return 1;
 	}
 }
 
@@ -1528,15 +1556,17 @@
 	/* If we don't want any read-ahead, don't bother */
 	if (VM_RandomReadHint(vma))
 		return;
+	if (!ra->ra_pages)
+		return;
 
-	if (VM_SequentialReadHint(vma) ||
-			offset - 1 == (ra->prev_pos >> PAGE_CACHE_SHIFT)) {
+	if (VM_SequentialReadHint(vma)) {
 		page_cache_sync_readahead(mapping, ra, file, offset,
 					  ra->ra_pages);
 		return;
 	}
 
-	if (ra->mmap_miss < INT_MAX)
+	/* Avoid banging the cache line if not needed */
+	if (ra->mmap_miss < MMAP_LOTSAMISS * 10)
 		ra->mmap_miss++;
 
 	/*
@@ -1550,12 +1580,10 @@
 	 * mmap read-around
 	 */
 	ra_pages = max_sane_readahead(ra->ra_pages);
-	if (ra_pages) {
-		ra->start = max_t(long, 0, offset - ra_pages/2);
-		ra->size = ra_pages;
-		ra->async_size = 0;
-		ra_submit(ra, mapping, file);
-	}
+	ra->start = max_t(long, 0, offset - ra_pages / 2);
+	ra->size = ra_pages;
+	ra->async_size = ra_pages / 4;
+	ra_submit(ra, mapping, file);
 }
 
 /*
@@ -1660,7 +1688,6 @@
 		return VM_FAULT_SIGBUS;
 	}
 
-	ra->prev_pos = (loff_t)offset << PAGE_CACHE_SHIFT;
 	vmf->page = page;
 	return ret | VM_FAULT_LOCKED;
 
diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c
index 83364df..93356cd 100644
--- a/mm/filemap_xip.c
+++ b/mm/filemap_xip.c
@@ -183,7 +183,7 @@
 		return;
 
 retry:
-	spin_lock(&mapping->i_mmap_lock);
+	mutex_lock(&mapping->i_mmap_mutex);
 	vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
 		mm = vma->vm_mm;
 		address = vma->vm_start +
@@ -201,7 +201,7 @@
 			page_cache_release(page);
 		}
 	}
-	spin_unlock(&mapping->i_mmap_lock);
+	mutex_unlock(&mapping->i_mmap_mutex);
 
 	if (locked) {
 		mutex_unlock(&xip_sparse_mutex);
diff --git a/mm/fremap.c b/mm/fremap.c
index ec520c7..7f41230 100644
--- a/mm/fremap.c
+++ b/mm/fremap.c
@@ -211,13 +211,13 @@
 			}
 			goto out;
 		}
-		spin_lock(&mapping->i_mmap_lock);
+		mutex_lock(&mapping->i_mmap_mutex);
 		flush_dcache_mmap_lock(mapping);
 		vma->vm_flags |= VM_NONLINEAR;
 		vma_prio_tree_remove(vma, &mapping->i_mmap);
 		vma_nonlinear_insert(vma, &mapping->i_mmap_nonlinear);
 		flush_dcache_mmap_unlock(mapping);
-		spin_unlock(&mapping->i_mmap_lock);
+		mutex_unlock(&mapping->i_mmap_mutex);
 	}
 
 	if (vma->vm_flags & VM_LOCKED) {
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 83326ad..615d974 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1139,7 +1139,7 @@
 		 * We can't temporarily set the pmd to null in order
 		 * to split it, the pmd must remain marked huge at all
 		 * times or the VM won't take the pmd_trans_huge paths
-		 * and it won't wait on the anon_vma->root->lock to
+		 * and it won't wait on the anon_vma->root->mutex to
 		 * serialize against split_huge_page*.
 		 */
 		pmdp_splitting_flush_notify(vma, address, pmd);
@@ -1333,7 +1333,7 @@
 	return ret;
 }
 
-/* must be called with anon_vma->root->lock hold */
+/* must be called with anon_vma->root->mutex hold */
 static void __split_huge_page(struct page *page,
 			      struct anon_vma *anon_vma)
 {
@@ -1771,12 +1771,9 @@
 
 	VM_BUG_ON(address & ~HPAGE_PMD_MASK);
 #ifndef CONFIG_NUMA
+	up_read(&mm->mmap_sem);
 	VM_BUG_ON(!*hpage);
 	new_page = *hpage;
-	if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) {
-		up_read(&mm->mmap_sem);
-		return;
-	}
 #else
 	VM_BUG_ON(*hpage);
 	/*
@@ -1791,22 +1788,26 @@
 	 */
 	new_page = alloc_hugepage_vma(khugepaged_defrag(), vma, address,
 				      node, __GFP_OTHER_NODE);
+
+	/*
+	 * After allocating the hugepage, release the mmap_sem read lock in
+	 * preparation for taking it in write mode.
+	 */
+	up_read(&mm->mmap_sem);
 	if (unlikely(!new_page)) {
-		up_read(&mm->mmap_sem);
 		count_vm_event(THP_COLLAPSE_ALLOC_FAILED);
 		*hpage = ERR_PTR(-ENOMEM);
 		return;
 	}
-	count_vm_event(THP_COLLAPSE_ALLOC);
-	if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) {
-		up_read(&mm->mmap_sem);
-		put_page(new_page);
-		return;
-	}
 #endif
 
-	/* after allocating the hugepage upgrade to mmap_sem write mode */
-	up_read(&mm->mmap_sem);
+	count_vm_event(THP_COLLAPSE_ALLOC);
+	if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) {
+#ifdef CONFIG_NUMA
+		put_page(new_page);
+#endif
+		return;
+	}
 
 	/*
 	 * Prevent all access to pagetables with the exception of
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index bbb4a5b..5fd68b9 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2205,7 +2205,7 @@
 	unsigned long sz = huge_page_size(h);
 
 	/*
-	 * A page gathering list, protected by per file i_mmap_lock. The
+	 * A page gathering list, protected by per file i_mmap_mutex. The
 	 * lock is used to avoid list corruption from multiple unmapping
 	 * of the same page since we are using page->lru.
 	 */
@@ -2274,9 +2274,9 @@
 void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
 			  unsigned long end, struct page *ref_page)
 {
-	spin_lock(&vma->vm_file->f_mapping->i_mmap_lock);
+	mutex_lock(&vma->vm_file->f_mapping->i_mmap_mutex);
 	__unmap_hugepage_range(vma, start, end, ref_page);
-	spin_unlock(&vma->vm_file->f_mapping->i_mmap_lock);
+	mutex_unlock(&vma->vm_file->f_mapping->i_mmap_mutex);
 }
 
 /*
@@ -2308,7 +2308,7 @@
 	 * this mapping should be shared between all the VMAs,
 	 * __unmap_hugepage_range() is called as the lock is already held
 	 */
-	spin_lock(&mapping->i_mmap_lock);
+	mutex_lock(&mapping->i_mmap_mutex);
 	vma_prio_tree_foreach(iter_vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
 		/* Do not unmap the current VMA */
 		if (iter_vma == vma)
@@ -2326,7 +2326,7 @@
 				address, address + huge_page_size(h),
 				page);
 	}
-	spin_unlock(&mapping->i_mmap_lock);
+	mutex_unlock(&mapping->i_mmap_mutex);
 
 	return 1;
 }
@@ -2810,7 +2810,7 @@
 	BUG_ON(address >= end);
 	flush_cache_range(vma, address, end);
 
-	spin_lock(&vma->vm_file->f_mapping->i_mmap_lock);
+	mutex_lock(&vma->vm_file->f_mapping->i_mmap_mutex);
 	spin_lock(&mm->page_table_lock);
 	for (; address < end; address += huge_page_size(h)) {
 		ptep = huge_pte_offset(mm, address);
@@ -2825,7 +2825,7 @@
 		}
 	}
 	spin_unlock(&mm->page_table_lock);
-	spin_unlock(&vma->vm_file->f_mapping->i_mmap_lock);
+	mutex_unlock(&vma->vm_file->f_mapping->i_mmap_mutex);
 
 	flush_tlb_range(vma, start, end);
 }
diff --git a/mm/init-mm.c b/mm/init-mm.c
index 1d29cdf..4019979 100644
--- a/mm/init-mm.c
+++ b/mm/init-mm.c
@@ -21,6 +21,5 @@
 	.mmap_sem	= __RWSEM_INITIALIZER(init_mm.mmap_sem),
 	.page_table_lock =  __SPIN_LOCK_UNLOCKED(init_mm.page_table_lock),
 	.mmlist		= LIST_HEAD_INIT(init_mm.mmlist),
-	.cpu_vm_mask	= CPU_MASK_ALL,
 	INIT_MM_CONTEXT(init_mm)
 };
diff --git a/mm/internal.h b/mm/internal.h
index 9d0ced8e..d071d38 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -66,6 +66,10 @@
 	return page_private(page);
 }
 
+/* mm/util.c */
+void __vma_link_list(struct mm_struct *mm, struct vm_area_struct *vma,
+		struct vm_area_struct *prev, struct rb_node *rb_parent);
+
 #ifdef CONFIG_MMU
 extern long mlock_vma_pages_range(struct vm_area_struct *vma,
 			unsigned long start, unsigned long end);
diff --git a/mm/ksm.c b/mm/ksm.c
index 942dfc7..d708b3e 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -35,6 +35,7 @@
 #include <linux/ksm.h>
 #include <linux/hash.h>
 #include <linux/freezer.h>
+#include <linux/oom.h>
 
 #include <asm/tlbflush.h>
 #include "internal.h"
@@ -1894,9 +1895,11 @@
 	if (ksm_run != flags) {
 		ksm_run = flags;
 		if (flags & KSM_RUN_UNMERGE) {
-			current->flags |= PF_OOM_ORIGIN;
+			int oom_score_adj;
+
+			oom_score_adj = test_set_oom_score_adj(OOM_SCORE_ADJ_MAX);
 			err = unmerge_and_remove_all_rmap_items();
-			current->flags &= ~PF_OOM_ORIGIN;
+			test_set_oom_score_adj(oom_score_adj);
 			if (err) {
 				ksm_run = KSM_RUN_STOP;
 				count = err;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 010f916..d5fd3dc 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -5169,19 +5169,12 @@
 static int __init enable_swap_account(char *s)
 {
 	/* consider enabled if no parameter or 1 is given */
-	if (!(*s) || !strcmp(s, "=1"))
+	if (!strcmp(s, "1"))
 		really_do_swap_account = 1;
-	else if (!strcmp(s, "=0"))
+	else if (!strcmp(s, "0"))
 		really_do_swap_account = 0;
 	return 1;
 }
-__setup("swapaccount", enable_swap_account);
+__setup("swapaccount=", enable_swap_account);
 
-static int __init disable_swap_account(char *s)
-{
-	printk_once("noswapaccount is deprecated and will be removed in 2.6.40. Use swapaccount=0 instead\n");
-	enable_swap_account("=0");
-	return 1;
-}
-__setup("noswapaccount", disable_swap_account);
 #endif
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 2b9a5ee..5c8f7e0 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -239,7 +239,11 @@
 	if (access) {
 		int nr;
 		do {
-			nr = shrink_slab(1000, GFP_KERNEL, 1000);
+			struct shrink_control shrink = {
+				.gfp_mask = GFP_KERNEL,
+			};
+
+			nr = shrink_slab(&shrink, 1000, 1000);
 			if (page_count(p) == 1)
 				break;
 		} while (nr > 10);
@@ -429,7 +433,7 @@
 	 */
 
 	read_lock(&tasklist_lock);
-	spin_lock(&mapping->i_mmap_lock);
+	mutex_lock(&mapping->i_mmap_mutex);
 	for_each_process(tsk) {
 		pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
 
@@ -449,7 +453,7 @@
 				add_to_kill(tsk, page, vma, to_kill, tkc);
 		}
 	}
-	spin_unlock(&mapping->i_mmap_lock);
+	mutex_unlock(&mapping->i_mmap_mutex);
 	read_unlock(&tasklist_lock);
 }
 
@@ -1440,16 +1444,12 @@
 	 */
 	ret = invalidate_inode_page(page);
 	unlock_page(page);
-
 	/*
-	 * Drop count because page migration doesn't like raised
-	 * counts. The page could get re-allocated, but if it becomes
-	 * LRU the isolation will just fail.
 	 * RED-PEN would be better to keep it isolated here, but we
 	 * would need to fix isolation locking first.
 	 */
-	put_page(page);
 	if (ret == 1) {
+		put_page(page);
 		ret = 0;
 		pr_info("soft_offline: %#lx: invalidated\n", pfn);
 		goto done;
@@ -1461,6 +1461,11 @@
 	 * handles a large number of cases for us.
 	 */
 	ret = isolate_lru_page(page);
+	/*
+	 * Drop page reference which is came from get_any_page()
+	 * successful isolate_lru_page() already took another one.
+	 */
+	put_page(page);
 	if (!ret) {
 		LIST_HEAD(pagelist);
 
diff --git a/mm/memory.c b/mm/memory.c
index 61e66f0..b73f677 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -182,7 +182,7 @@
 {
 	__sync_task_rss_stat(task, mm);
 }
-#else
+#else /* SPLIT_RSS_COUNTING */
 
 #define inc_mm_counter_fast(mm, member) inc_mm_counter(mm, member)
 #define dec_mm_counter_fast(mm, member) dec_mm_counter(mm, member)
@@ -191,7 +191,204 @@
 {
 }
 
+#endif /* SPLIT_RSS_COUNTING */
+
+#ifdef HAVE_GENERIC_MMU_GATHER
+
+static int tlb_next_batch(struct mmu_gather *tlb)
+{
+	struct mmu_gather_batch *batch;
+
+	batch = tlb->active;
+	if (batch->next) {
+		tlb->active = batch->next;
+		return 1;
+	}
+
+	batch = (void *)__get_free_pages(GFP_NOWAIT | __GFP_NOWARN, 0);
+	if (!batch)
+		return 0;
+
+	batch->next = NULL;
+	batch->nr   = 0;
+	batch->max  = MAX_GATHER_BATCH;
+
+	tlb->active->next = batch;
+	tlb->active = batch;
+
+	return 1;
+}
+
+/* tlb_gather_mmu
+ *	Called to initialize an (on-stack) mmu_gather structure for page-table
+ *	tear-down from @mm. The @fullmm argument is used when @mm is without
+ *	users and we're going to destroy the full address space (exit/execve).
+ */
+void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, bool fullmm)
+{
+	tlb->mm = mm;
+
+	tlb->fullmm     = fullmm;
+	tlb->need_flush = 0;
+	tlb->fast_mode  = (num_possible_cpus() == 1);
+	tlb->local.next = NULL;
+	tlb->local.nr   = 0;
+	tlb->local.max  = ARRAY_SIZE(tlb->__pages);
+	tlb->active     = &tlb->local;
+
+#ifdef CONFIG_HAVE_RCU_TABLE_FREE
+	tlb->batch = NULL;
 #endif
+}
+
+void tlb_flush_mmu(struct mmu_gather *tlb)
+{
+	struct mmu_gather_batch *batch;
+
+	if (!tlb->need_flush)
+		return;
+	tlb->need_flush = 0;
+	tlb_flush(tlb);
+#ifdef CONFIG_HAVE_RCU_TABLE_FREE
+	tlb_table_flush(tlb);
+#endif
+
+	if (tlb_fast_mode(tlb))
+		return;
+
+	for (batch = &tlb->local; batch; batch = batch->next) {
+		free_pages_and_swap_cache(batch->pages, batch->nr);
+		batch->nr = 0;
+	}
+	tlb->active = &tlb->local;
+}
+
+/* tlb_finish_mmu
+ *	Called at the end of the shootdown operation to free up any resources
+ *	that were required.
+ */
+void tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
+{
+	struct mmu_gather_batch *batch, *next;
+
+	tlb_flush_mmu(tlb);
+
+	/* keep the page table cache within bounds */
+	check_pgt_cache();
+
+	for (batch = tlb->local.next; batch; batch = next) {
+		next = batch->next;
+		free_pages((unsigned long)batch, 0);
+	}
+	tlb->local.next = NULL;
+}
+
+/* __tlb_remove_page
+ *	Must perform the equivalent to __free_pte(pte_get_and_clear(ptep)), while
+ *	handling the additional races in SMP caused by other CPUs caching valid
+ *	mappings in their TLBs. Returns the number of free page slots left.
+ *	When out of page slots we must call tlb_flush_mmu().
+ */
+int __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
+{
+	struct mmu_gather_batch *batch;
+
+	tlb->need_flush = 1;
+
+	if (tlb_fast_mode(tlb)) {
+		free_page_and_swap_cache(page);
+		return 1; /* avoid calling tlb_flush_mmu() */
+	}
+
+	batch = tlb->active;
+	batch->pages[batch->nr++] = page;
+	if (batch->nr == batch->max) {
+		if (!tlb_next_batch(tlb))
+			return 0;
+	}
+	VM_BUG_ON(batch->nr > batch->max);
+
+	return batch->max - batch->nr;
+}
+
+#endif /* HAVE_GENERIC_MMU_GATHER */
+
+#ifdef CONFIG_HAVE_RCU_TABLE_FREE
+
+/*
+ * See the comment near struct mmu_table_batch.
+ */
+
+static void tlb_remove_table_smp_sync(void *arg)
+{
+	/* Simply deliver the interrupt */
+}
+
+static void tlb_remove_table_one(void *table)
+{
+	/*
+	 * This isn't an RCU grace period and hence the page-tables cannot be
+	 * assumed to be actually RCU-freed.
+	 *
+	 * It is however sufficient for software page-table walkers that rely on
+	 * IRQ disabling. See the comment near struct mmu_table_batch.
+	 */
+	smp_call_function(tlb_remove_table_smp_sync, NULL, 1);
+	__tlb_remove_table(table);
+}
+
+static void tlb_remove_table_rcu(struct rcu_head *head)
+{
+	struct mmu_table_batch *batch;
+	int i;
+
+	batch = container_of(head, struct mmu_table_batch, rcu);
+
+	for (i = 0; i < batch->nr; i++)
+		__tlb_remove_table(batch->tables[i]);
+
+	free_page((unsigned long)batch);
+}
+
+void tlb_table_flush(struct mmu_gather *tlb)
+{
+	struct mmu_table_batch **batch = &tlb->batch;
+
+	if (*batch) {
+		call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu);
+		*batch = NULL;
+	}
+}
+
+void tlb_remove_table(struct mmu_gather *tlb, void *table)
+{
+	struct mmu_table_batch **batch = &tlb->batch;
+
+	tlb->need_flush = 1;
+
+	/*
+	 * When there's less then two users of this mm there cannot be a
+	 * concurrent page-table walk.
+	 */
+	if (atomic_read(&tlb->mm->mm_users) < 2) {
+		__tlb_remove_table(table);
+		return;
+	}
+
+	if (*batch == NULL) {
+		*batch = (struct mmu_table_batch *)__get_free_page(GFP_NOWAIT | __GFP_NOWARN);
+		if (*batch == NULL) {
+			tlb_remove_table_one(table);
+			return;
+		}
+		(*batch)->nr = 0;
+	}
+	(*batch)->tables[(*batch)->nr++] = table;
+	if ((*batch)->nr == MAX_TABLE_BATCH)
+		tlb_table_flush(tlb);
+}
+
+#endif /* CONFIG_HAVE_RCU_TABLE_FREE */
 
 /*
  * If a p?d_bad entry is found while walking page tables, report
@@ -909,26 +1106,24 @@
 static unsigned long zap_pte_range(struct mmu_gather *tlb,
 				struct vm_area_struct *vma, pmd_t *pmd,
 				unsigned long addr, unsigned long end,
-				long *zap_work, struct zap_details *details)
+				struct zap_details *details)
 {
 	struct mm_struct *mm = tlb->mm;
-	pte_t *pte;
-	spinlock_t *ptl;
+	int force_flush = 0;
 	int rss[NR_MM_COUNTERS];
+	spinlock_t *ptl;
+	pte_t *pte;
 
+again:
 	init_rss_vec(rss);
-
 	pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
 	arch_enter_lazy_mmu_mode();
 	do {
 		pte_t ptent = *pte;
 		if (pte_none(ptent)) {
-			(*zap_work)--;
 			continue;
 		}
 
-		(*zap_work) -= PAGE_SIZE;
-
 		if (pte_present(ptent)) {
 			struct page *page;
 
@@ -974,7 +1169,9 @@
 			page_remove_rmap(page);
 			if (unlikely(page_mapcount(page) < 0))
 				print_bad_pte(vma, addr, ptent, page);
-			tlb_remove_page(tlb, page);
+			force_flush = !__tlb_remove_page(tlb, page);
+			if (force_flush)
+				break;
 			continue;
 		}
 		/*
@@ -995,19 +1192,31 @@
 				print_bad_pte(vma, addr, ptent, NULL);
 		}
 		pte_clear_not_present_full(mm, addr, pte, tlb->fullmm);
-	} while (pte++, addr += PAGE_SIZE, (addr != end && *zap_work > 0));
+	} while (pte++, addr += PAGE_SIZE, addr != end);
 
 	add_mm_rss_vec(mm, rss);
 	arch_leave_lazy_mmu_mode();
 	pte_unmap_unlock(pte - 1, ptl);
 
+	/*
+	 * mmu_gather ran out of room to batch pages, we break out of
+	 * the PTE lock to avoid doing the potential expensive TLB invalidate
+	 * and page-free while holding it.
+	 */
+	if (force_flush) {
+		force_flush = 0;
+		tlb_flush_mmu(tlb);
+		if (addr != end)
+			goto again;
+	}
+
 	return addr;
 }
 
 static inline unsigned long zap_pmd_range(struct mmu_gather *tlb,
 				struct vm_area_struct *vma, pud_t *pud,
 				unsigned long addr, unsigned long end,
-				long *zap_work, struct zap_details *details)
+				struct zap_details *details)
 {
 	pmd_t *pmd;
 	unsigned long next;
@@ -1019,19 +1228,15 @@
 			if (next-addr != HPAGE_PMD_SIZE) {
 				VM_BUG_ON(!rwsem_is_locked(&tlb->mm->mmap_sem));
 				split_huge_page_pmd(vma->vm_mm, pmd);
-			} else if (zap_huge_pmd(tlb, vma, pmd)) {
-				(*zap_work)--;
+			} else if (zap_huge_pmd(tlb, vma, pmd))
 				continue;
-			}
 			/* fall through */
 		}
-		if (pmd_none_or_clear_bad(pmd)) {
-			(*zap_work)--;
+		if (pmd_none_or_clear_bad(pmd))
 			continue;
-		}
-		next = zap_pte_range(tlb, vma, pmd, addr, next,
-						zap_work, details);
-	} while (pmd++, addr = next, (addr != end && *zap_work > 0));
+		next = zap_pte_range(tlb, vma, pmd, addr, next, details);
+		cond_resched();
+	} while (pmd++, addr = next, addr != end);
 
 	return addr;
 }
@@ -1039,7 +1244,7 @@
 static inline unsigned long zap_pud_range(struct mmu_gather *tlb,
 				struct vm_area_struct *vma, pgd_t *pgd,
 				unsigned long addr, unsigned long end,
-				long *zap_work, struct zap_details *details)
+				struct zap_details *details)
 {
 	pud_t *pud;
 	unsigned long next;
@@ -1047,13 +1252,10 @@
 	pud = pud_offset(pgd, addr);
 	do {
 		next = pud_addr_end(addr, end);
-		if (pud_none_or_clear_bad(pud)) {
-			(*zap_work)--;
+		if (pud_none_or_clear_bad(pud))
 			continue;
-		}
-		next = zap_pmd_range(tlb, vma, pud, addr, next,
-						zap_work, details);
-	} while (pud++, addr = next, (addr != end && *zap_work > 0));
+		next = zap_pmd_range(tlb, vma, pud, addr, next, details);
+	} while (pud++, addr = next, addr != end);
 
 	return addr;
 }
@@ -1061,7 +1263,7 @@
 static unsigned long unmap_page_range(struct mmu_gather *tlb,
 				struct vm_area_struct *vma,
 				unsigned long addr, unsigned long end,
-				long *zap_work, struct zap_details *details)
+				struct zap_details *details)
 {
 	pgd_t *pgd;
 	unsigned long next;
@@ -1075,13 +1277,10 @@
 	pgd = pgd_offset(vma->vm_mm, addr);
 	do {
 		next = pgd_addr_end(addr, end);
-		if (pgd_none_or_clear_bad(pgd)) {
-			(*zap_work)--;
+		if (pgd_none_or_clear_bad(pgd))
 			continue;
-		}
-		next = zap_pud_range(tlb, vma, pgd, addr, next,
-						zap_work, details);
-	} while (pgd++, addr = next, (addr != end && *zap_work > 0));
+		next = zap_pud_range(tlb, vma, pgd, addr, next, details);
+	} while (pgd++, addr = next, addr != end);
 	tlb_end_vma(tlb, vma);
 	mem_cgroup_uncharge_end();
 
@@ -1121,17 +1320,12 @@
  * ensure that any thus-far unmapped pages are flushed before unmap_vmas()
  * drops the lock and schedules.
  */
-unsigned long unmap_vmas(struct mmu_gather **tlbp,
+unsigned long unmap_vmas(struct mmu_gather *tlb,
 		struct vm_area_struct *vma, unsigned long start_addr,
 		unsigned long end_addr, unsigned long *nr_accounted,
 		struct zap_details *details)
 {
-	long zap_work = ZAP_BLOCK_SIZE;
-	unsigned long tlb_start = 0;	/* For tlb_finish_mmu */
-	int tlb_start_valid = 0;
 	unsigned long start = start_addr;
-	spinlock_t *i_mmap_lock = details? details->i_mmap_lock: NULL;
-	int fullmm = (*tlbp)->fullmm;
 	struct mm_struct *mm = vma->vm_mm;
 
 	mmu_notifier_invalidate_range_start(mm, start_addr, end_addr);
@@ -1152,11 +1346,6 @@
 			untrack_pfn_vma(vma, 0, 0);
 
 		while (start != end) {
-			if (!tlb_start_valid) {
-				tlb_start = start;
-				tlb_start_valid = 1;
-			}
-
 			if (unlikely(is_vm_hugetlb_page(vma))) {
 				/*
 				 * It is undesirable to test vma->vm_file as it
@@ -1169,39 +1358,15 @@
 				 * Since no pte has actually been setup, it is
 				 * safe to do nothing in this case.
 				 */
-				if (vma->vm_file) {
+				if (vma->vm_file)
 					unmap_hugepage_range(vma, start, end, NULL);
-					zap_work -= (end - start) /
-					pages_per_huge_page(hstate_vma(vma));
-				}
 
 				start = end;
 			} else
-				start = unmap_page_range(*tlbp, vma,
-						start, end, &zap_work, details);
-
-			if (zap_work > 0) {
-				BUG_ON(start != end);
-				break;
-			}
-
-			tlb_finish_mmu(*tlbp, tlb_start, start);
-
-			if (need_resched() ||
-				(i_mmap_lock && spin_needbreak(i_mmap_lock))) {
-				if (i_mmap_lock) {
-					*tlbp = NULL;
-					goto out;
-				}
-				cond_resched();
-			}
-
-			*tlbp = tlb_gather_mmu(vma->vm_mm, fullmm);
-			tlb_start_valid = 0;
-			zap_work = ZAP_BLOCK_SIZE;
+				start = unmap_page_range(tlb, vma, start, end, details);
 		}
 	}
-out:
+
 	mmu_notifier_invalidate_range_end(mm, start_addr, end_addr);
 	return start;	/* which is now the end (or restart) address */
 }
@@ -1217,16 +1382,15 @@
 		unsigned long size, struct zap_details *details)
 {
 	struct mm_struct *mm = vma->vm_mm;
-	struct mmu_gather *tlb;
+	struct mmu_gather tlb;
 	unsigned long end = address + size;
 	unsigned long nr_accounted = 0;
 
 	lru_add_drain();
-	tlb = tlb_gather_mmu(mm, 0);
+	tlb_gather_mmu(&tlb, mm, 0);
 	update_hiwater_rss(mm);
 	end = unmap_vmas(&tlb, vma, address, end, &nr_accounted, details);
-	if (tlb)
-		tlb_finish_mmu(tlb, address, end);
+	tlb_finish_mmu(&tlb, address, end);
 	return end;
 }
 
@@ -2535,96 +2699,11 @@
 	return ret;
 }
 
-/*
- * Helper functions for unmap_mapping_range().
- *
- * __ Notes on dropping i_mmap_lock to reduce latency while unmapping __
- *
- * We have to restart searching the prio_tree whenever we drop the lock,
- * since the iterator is only valid while the lock is held, and anyway
- * a later vma might be split and reinserted earlier while lock dropped.
- *
- * The list of nonlinear vmas could be handled more efficiently, using
- * a placeholder, but handle it in the same way until a need is shown.
- * It is important to search the prio_tree before nonlinear list: a vma
- * may become nonlinear and be shifted from prio_tree to nonlinear list
- * while the lock is dropped; but never shifted from list to prio_tree.
- *
- * In order to make forward progress despite restarting the search,
- * vm_truncate_count is used to mark a vma as now dealt with, so we can
- * quickly skip it next time around.  Since the prio_tree search only
- * shows us those vmas affected by unmapping the range in question, we
- * can't efficiently keep all vmas in step with mapping->truncate_count:
- * so instead reset them all whenever it wraps back to 0 (then go to 1).
- * mapping->truncate_count and vma->vm_truncate_count are protected by
- * i_mmap_lock.
- *
- * In order to make forward progress despite repeatedly restarting some
- * large vma, note the restart_addr from unmap_vmas when it breaks out:
- * and restart from that address when we reach that vma again.  It might
- * have been split or merged, shrunk or extended, but never shifted: so
- * restart_addr remains valid so long as it remains in the vma's range.
- * unmap_mapping_range forces truncate_count to leap over page-aligned
- * values so we can save vma's restart_addr in its truncate_count field.
- */
-#define is_restart_addr(truncate_count) (!((truncate_count) & ~PAGE_MASK))
-
-static void reset_vma_truncate_counts(struct address_space *mapping)
-{
-	struct vm_area_struct *vma;
-	struct prio_tree_iter iter;
-
-	vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, 0, ULONG_MAX)
-		vma->vm_truncate_count = 0;
-	list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.vm_set.list)
-		vma->vm_truncate_count = 0;
-}
-
-static int unmap_mapping_range_vma(struct vm_area_struct *vma,
+static void unmap_mapping_range_vma(struct vm_area_struct *vma,
 		unsigned long start_addr, unsigned long end_addr,
 		struct zap_details *details)
 {
-	unsigned long restart_addr;
-	int need_break;
-
-	/*
-	 * files that support invalidating or truncating portions of the
-	 * file from under mmaped areas must have their ->fault function
-	 * return a locked page (and set VM_FAULT_LOCKED in the return).
-	 * This provides synchronisation against concurrent unmapping here.
-	 */
-
-again:
-	restart_addr = vma->vm_truncate_count;
-	if (is_restart_addr(restart_addr) && start_addr < restart_addr) {
-		start_addr = restart_addr;
-		if (start_addr >= end_addr) {
-			/* Top of vma has been split off since last time */
-			vma->vm_truncate_count = details->truncate_count;
-			return 0;
-		}
-	}
-
-	restart_addr = zap_page_range(vma, start_addr,
-					end_addr - start_addr, details);
-	need_break = need_resched() || spin_needbreak(details->i_mmap_lock);
-
-	if (restart_addr >= end_addr) {
-		/* We have now completed this vma: mark it so */
-		vma->vm_truncate_count = details->truncate_count;
-		if (!need_break)
-			return 0;
-	} else {
-		/* Note restart_addr in vma's truncate_count field */
-		vma->vm_truncate_count = restart_addr;
-		if (!need_break)
-			goto again;
-	}
-
-	spin_unlock(details->i_mmap_lock);
-	cond_resched();
-	spin_lock(details->i_mmap_lock);
-	return -EINTR;
+	zap_page_range(vma, start_addr, end_addr - start_addr, details);
 }
 
 static inline void unmap_mapping_range_tree(struct prio_tree_root *root,
@@ -2634,12 +2713,8 @@
 	struct prio_tree_iter iter;
 	pgoff_t vba, vea, zba, zea;
 
-restart:
 	vma_prio_tree_foreach(vma, &iter, root,
 			details->first_index, details->last_index) {
-		/* Skip quickly over those we have already dealt with */
-		if (vma->vm_truncate_count == details->truncate_count)
-			continue;
 
 		vba = vma->vm_pgoff;
 		vea = vba + ((vma->vm_end - vma->vm_start) >> PAGE_SHIFT) - 1;
@@ -2651,11 +2726,10 @@
 		if (zea > vea)
 			zea = vea;
 
-		if (unmap_mapping_range_vma(vma,
+		unmap_mapping_range_vma(vma,
 			((zba - vba) << PAGE_SHIFT) + vma->vm_start,
 			((zea - vba + 1) << PAGE_SHIFT) + vma->vm_start,
-				details) < 0)
-			goto restart;
+				details);
 	}
 }
 
@@ -2670,15 +2744,9 @@
 	 * across *all* the pages in each nonlinear VMA, not just the pages
 	 * whose virtual address lies outside the file truncation point.
 	 */
-restart:
 	list_for_each_entry(vma, head, shared.vm_set.list) {
-		/* Skip quickly over those we have already dealt with */
-		if (vma->vm_truncate_count == details->truncate_count)
-			continue;
 		details->nonlinear_vma = vma;
-		if (unmap_mapping_range_vma(vma, vma->vm_start,
-					vma->vm_end, details) < 0)
-			goto restart;
+		unmap_mapping_range_vma(vma, vma->vm_start, vma->vm_end, details);
 	}
 }
 
@@ -2717,26 +2785,14 @@
 	details.last_index = hba + hlen - 1;
 	if (details.last_index < details.first_index)
 		details.last_index = ULONG_MAX;
-	details.i_mmap_lock = &mapping->i_mmap_lock;
 
-	mutex_lock(&mapping->unmap_mutex);
-	spin_lock(&mapping->i_mmap_lock);
 
-	/* Protect against endless unmapping loops */
-	mapping->truncate_count++;
-	if (unlikely(is_restart_addr(mapping->truncate_count))) {
-		if (mapping->truncate_count == 0)
-			reset_vma_truncate_counts(mapping);
-		mapping->truncate_count++;
-	}
-	details.truncate_count = mapping->truncate_count;
-
+	mutex_lock(&mapping->i_mmap_mutex);
 	if (unlikely(!prio_tree_empty(&mapping->i_mmap)))
 		unmap_mapping_range_tree(&mapping->i_mmap, &details);
 	if (unlikely(!list_empty(&mapping->i_mmap_nonlinear)))
 		unmap_mapping_range_list(&mapping->i_mmap_nonlinear, &details);
-	spin_unlock(&mapping->i_mmap_lock);
-	mutex_unlock(&mapping->unmap_mutex);
+	mutex_unlock(&mapping->i_mmap_mutex);
 }
 EXPORT_SYMBOL(unmap_mapping_range);
 
@@ -2966,7 +3022,7 @@
 		if (prev && prev->vm_end == address)
 			return prev->vm_flags & VM_GROWSDOWN ? 0 : -ENOMEM;
 
-		expand_stack(vma, address - PAGE_SIZE);
+		expand_downwards(vma, address - PAGE_SIZE);
 	}
 	if ((vma->vm_flags & VM_GROWSUP) && address + PAGE_SIZE == vma->vm_end) {
 		struct vm_area_struct *next = vma->vm_next;
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 9ca1d60..9f64637 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -374,10 +374,6 @@
 		totalhigh_pages++;
 #endif
 
-#ifdef CONFIG_FLATMEM
-	max_mapnr = max(pfn, max_mapnr);
-#endif
-
 	ClearPageReserved(page);
 	init_page_count(page);
 	__free_page(page);
@@ -400,7 +396,7 @@
 }
 
 
-int online_pages(unsigned long pfn, unsigned long nr_pages)
+int __ref online_pages(unsigned long pfn, unsigned long nr_pages)
 {
 	unsigned long onlined_pages = 0;
 	struct zone *zone;
@@ -459,8 +455,9 @@
 		zone_pcp_update(zone);
 
 	mutex_unlock(&zonelists_mutex);
-	setup_per_zone_wmarks();
-	calculate_zone_inactive_ratio(zone);
+
+	init_per_zone_wmark_min();
+
 	if (onlined_pages) {
 		kswapd_run(zone_to_nid(zone));
 		node_set_state(zone_to_nid(zone), N_HIGH_MEMORY);
@@ -705,7 +702,7 @@
 		if (!pfn_valid(pfn))
 			continue;
 		page = pfn_to_page(pfn);
-		if (!page_count(page))
+		if (!get_page_unless_zero(page))
 			continue;
 		/*
 		 * We can skip free pages. And we can only deal with pages on
@@ -713,6 +710,7 @@
 		 */
 		ret = isolate_lru_page(page);
 		if (!ret) { /* Success */
+			put_page(page);
 			list_add_tail(&page->lru, &source);
 			move_pages--;
 			inc_zone_page_state(page, NR_ISOLATED_ANON +
@@ -724,6 +722,7 @@
 			       pfn);
 			dump_page(page);
 #endif
+			put_page(page);
 			/* Because we don't have big zone->lock. we should
 			   check this again here. */
 			if (page_count(page)) {
@@ -795,7 +794,7 @@
 	return offlined;
 }
 
-static int offline_pages(unsigned long start_pfn,
+static int __ref offline_pages(unsigned long start_pfn,
 		  unsigned long end_pfn, unsigned long timeout)
 {
 	unsigned long pfn, nr_pages, expire;
@@ -893,8 +892,8 @@
 	zone->zone_pgdat->node_present_pages -= offlined_pages;
 	totalram_pages -= offlined_pages;
 
-	setup_per_zone_wmarks();
-	calculate_zone_inactive_ratio(zone);
+	init_per_zone_wmark_min();
+
 	if (!node_present_pages(node)) {
 		node_clear_state(node, N_HIGH_MEMORY);
 		kswapd_stop(node);
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 959a8b8..e7fb9d2 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -99,7 +99,6 @@
 /* Internal flags */
 #define MPOL_MF_DISCONTIG_OK (MPOL_MF_INTERNAL << 0)	/* Skip checks for continuous vmas */
 #define MPOL_MF_INVERT (MPOL_MF_INTERNAL << 1)		/* Invert check for nodemask */
-#define MPOL_MF_STATS (MPOL_MF_INTERNAL << 2)		/* Gather statistics */
 
 static struct kmem_cache *policy_cache;
 static struct kmem_cache *sn_cache;
@@ -457,7 +456,6 @@
 	},
 };
 
-static void gather_stats(struct page *, void *, int pte_dirty);
 static void migrate_page_add(struct page *page, struct list_head *pagelist,
 				unsigned long flags);
 
@@ -492,9 +490,7 @@
 		if (node_isset(nid, *nodes) == !!(flags & MPOL_MF_INVERT))
 			continue;
 
-		if (flags & MPOL_MF_STATS)
-			gather_stats(page, private, pte_dirty(*pte));
-		else if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))
+		if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))
 			migrate_page_add(page, private, flags);
 		else
 			break;
@@ -1489,7 +1485,7 @@
  * freeing by another task.  It is the caller's responsibility to free the
  * extra reference for shared policies.
  */
-static struct mempolicy *get_vma_policy(struct task_struct *task,
+struct mempolicy *get_vma_policy(struct task_struct *task,
 		struct vm_area_struct *vma, unsigned long addr)
 {
 	struct mempolicy *pol = task->mempolicy;
@@ -2529,159 +2525,3 @@
 	}
 	return p - buffer;
 }
-
-struct numa_maps {
-	unsigned long pages;
-	unsigned long anon;
-	unsigned long active;
-	unsigned long writeback;
-	unsigned long mapcount_max;
-	unsigned long dirty;
-	unsigned long swapcache;
-	unsigned long node[MAX_NUMNODES];
-};
-
-static void gather_stats(struct page *page, void *private, int pte_dirty)
-{
-	struct numa_maps *md = private;
-	int count = page_mapcount(page);
-
-	md->pages++;
-	if (pte_dirty || PageDirty(page))
-		md->dirty++;
-
-	if (PageSwapCache(page))
-		md->swapcache++;
-
-	if (PageActive(page) || PageUnevictable(page))
-		md->active++;
-
-	if (PageWriteback(page))
-		md->writeback++;
-
-	if (PageAnon(page))
-		md->anon++;
-
-	if (count > md->mapcount_max)
-		md->mapcount_max = count;
-
-	md->node[page_to_nid(page)]++;
-}
-
-#ifdef CONFIG_HUGETLB_PAGE
-static void check_huge_range(struct vm_area_struct *vma,
-		unsigned long start, unsigned long end,
-		struct numa_maps *md)
-{
-	unsigned long addr;
-	struct page *page;
-	struct hstate *h = hstate_vma(vma);
-	unsigned long sz = huge_page_size(h);
-
-	for (addr = start; addr < end; addr += sz) {
-		pte_t *ptep = huge_pte_offset(vma->vm_mm,
-						addr & huge_page_mask(h));
-		pte_t pte;
-
-		if (!ptep)
-			continue;
-
-		pte = *ptep;
-		if (pte_none(pte))
-			continue;
-
-		page = pte_page(pte);
-		if (!page)
-			continue;
-
-		gather_stats(page, md, pte_dirty(*ptep));
-	}
-}
-#else
-static inline void check_huge_range(struct vm_area_struct *vma,
-		unsigned long start, unsigned long end,
-		struct numa_maps *md)
-{
-}
-#endif
-
-/*
- * Display pages allocated per node and memory policy via /proc.
- */
-int show_numa_map(struct seq_file *m, void *v)
-{
-	struct proc_maps_private *priv = m->private;
-	struct vm_area_struct *vma = v;
-	struct numa_maps *md;
-	struct file *file = vma->vm_file;
-	struct mm_struct *mm = vma->vm_mm;
-	struct mempolicy *pol;
-	int n;
-	char buffer[50];
-
-	if (!mm)
-		return 0;
-
-	md = kzalloc(sizeof(struct numa_maps), GFP_KERNEL);
-	if (!md)
-		return 0;
-
-	pol = get_vma_policy(priv->task, vma, vma->vm_start);
-	mpol_to_str(buffer, sizeof(buffer), pol, 0);
-	mpol_cond_put(pol);
-
-	seq_printf(m, "%08lx %s", vma->vm_start, buffer);
-
-	if (file) {
-		seq_printf(m, " file=");
-		seq_path(m, &file->f_path, "\n\t= ");
-	} else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) {
-		seq_printf(m, " heap");
-	} else if (vma->vm_start <= mm->start_stack &&
-			vma->vm_end >= mm->start_stack) {
-		seq_printf(m, " stack");
-	}
-
-	if (is_vm_hugetlb_page(vma)) {
-		check_huge_range(vma, vma->vm_start, vma->vm_end, md);
-		seq_printf(m, " huge");
-	} else {
-		check_pgd_range(vma, vma->vm_start, vma->vm_end,
-			&node_states[N_HIGH_MEMORY], MPOL_MF_STATS, md);
-	}
-
-	if (!md->pages)
-		goto out;
-
-	if (md->anon)
-		seq_printf(m," anon=%lu",md->anon);
-
-	if (md->dirty)
-		seq_printf(m," dirty=%lu",md->dirty);
-
-	if (md->pages != md->anon && md->pages != md->dirty)
-		seq_printf(m, " mapped=%lu", md->pages);
-
-	if (md->mapcount_max > 1)
-		seq_printf(m, " mapmax=%lu", md->mapcount_max);
-
-	if (md->swapcache)
-		seq_printf(m," swapcache=%lu", md->swapcache);
-
-	if (md->active < md->pages && !is_vm_hugetlb_page(vma))
-		seq_printf(m," active=%lu", md->active);
-
-	if (md->writeback)
-		seq_printf(m," writeback=%lu", md->writeback);
-
-	for_each_node_state(n, N_HIGH_MEMORY)
-		if (md->node[n])
-			seq_printf(m, " N%d=%lu", n, md->node[n]);
-out:
-	seq_putc(m, '\n');
-	kfree(md);
-
-	if (m->count < m->size)
-		m->version = (vma != priv->tail_vma) ? vma->vm_start : 0;
-	return 0;
-}
diff --git a/mm/migrate.c b/mm/migrate.c
index 34132f8..e4a5c91 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -721,15 +721,11 @@
 		 * Only page_lock_anon_vma() understands the subtleties of
 		 * getting a hold on an anon_vma from outside one of its mms.
 		 */
-		anon_vma = page_lock_anon_vma(page);
+		anon_vma = page_get_anon_vma(page);
 		if (anon_vma) {
 			/*
-			 * Take a reference count on the anon_vma if the
-			 * page is mapped so that it is guaranteed to
-			 * exist when the page is remapped later
+			 * Anon page
 			 */
-			get_anon_vma(anon_vma);
-			page_unlock_anon_vma(anon_vma);
 		} else if (PageSwapCache(page)) {
 			/*
 			 * We cannot be sure that the anon_vma of an unmapped
@@ -857,13 +853,8 @@
 		lock_page(hpage);
 	}
 
-	if (PageAnon(hpage)) {
-		anon_vma = page_lock_anon_vma(hpage);
-		if (anon_vma) {
-			get_anon_vma(anon_vma);
-			page_unlock_anon_vma(anon_vma);
-		}
-	}
+	if (PageAnon(hpage))
+		anon_vma = page_get_anon_vma(hpage);
 
 	try_to_unmap(hpage, TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS);
 
diff --git a/mm/mmap.c b/mm/mmap.c
index 772140c..ac2631b 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -84,10 +84,14 @@
 }
 EXPORT_SYMBOL(vm_get_page_prot);
 
-int sysctl_overcommit_memory = OVERCOMMIT_GUESS;  /* heuristic overcommit */
-int sysctl_overcommit_ratio = 50;	/* default is 50% */
+int sysctl_overcommit_memory __read_mostly = OVERCOMMIT_GUESS;  /* heuristic overcommit */
+int sysctl_overcommit_ratio __read_mostly = 50;	/* default is 50% */
 int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT;
-struct percpu_counter vm_committed_as;
+/*
+ * Make sure vm_committed_as in one cacheline and not cacheline shared with
+ * other variables. It can be updated by several CPUs frequently.
+ */
+struct percpu_counter vm_committed_as ____cacheline_aligned_in_smp;
 
 /*
  * Check that a process has enough memory to allocate a new virtual
@@ -190,7 +194,7 @@
 }
 
 /*
- * Requires inode->i_mapping->i_mmap_lock
+ * Requires inode->i_mapping->i_mmap_mutex
  */
 static void __remove_shared_vm_struct(struct vm_area_struct *vma,
 		struct file *file, struct address_space *mapping)
@@ -218,9 +222,9 @@
 
 	if (file) {
 		struct address_space *mapping = file->f_mapping;
-		spin_lock(&mapping->i_mmap_lock);
+		mutex_lock(&mapping->i_mmap_mutex);
 		__remove_shared_vm_struct(vma, file, mapping);
-		spin_unlock(&mapping->i_mmap_lock);
+		mutex_unlock(&mapping->i_mmap_mutex);
 	}
 }
 
@@ -394,29 +398,6 @@
 	return vma;
 }
 
-static inline void
-__vma_link_list(struct mm_struct *mm, struct vm_area_struct *vma,
-		struct vm_area_struct *prev, struct rb_node *rb_parent)
-{
-	struct vm_area_struct *next;
-
-	vma->vm_prev = prev;
-	if (prev) {
-		next = prev->vm_next;
-		prev->vm_next = vma;
-	} else {
-		mm->mmap = vma;
-		if (rb_parent)
-			next = rb_entry(rb_parent,
-					struct vm_area_struct, vm_rb);
-		else
-			next = NULL;
-	}
-	vma->vm_next = next;
-	if (next)
-		next->vm_prev = vma;
-}
-
 void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma,
 		struct rb_node **rb_link, struct rb_node *rb_parent)
 {
@@ -464,16 +445,14 @@
 	if (vma->vm_file)
 		mapping = vma->vm_file->f_mapping;
 
-	if (mapping) {
-		spin_lock(&mapping->i_mmap_lock);
-		vma->vm_truncate_count = mapping->truncate_count;
-	}
+	if (mapping)
+		mutex_lock(&mapping->i_mmap_mutex);
 
 	__vma_link(mm, vma, prev, rb_link, rb_parent);
 	__vma_link_file(vma);
 
 	if (mapping)
-		spin_unlock(&mapping->i_mmap_lock);
+		mutex_unlock(&mapping->i_mmap_mutex);
 
 	mm->map_count++;
 	validate_mm(mm);
@@ -576,17 +555,8 @@
 		mapping = file->f_mapping;
 		if (!(vma->vm_flags & VM_NONLINEAR))
 			root = &mapping->i_mmap;
-		spin_lock(&mapping->i_mmap_lock);
-		if (importer &&
-		    vma->vm_truncate_count != next->vm_truncate_count) {
-			/*
-			 * unmap_mapping_range might be in progress:
-			 * ensure that the expanding vma is rescanned.
-			 */
-			importer->vm_truncate_count = 0;
-		}
+		mutex_lock(&mapping->i_mmap_mutex);
 		if (insert) {
-			insert->vm_truncate_count = vma->vm_truncate_count;
 			/*
 			 * Put into prio_tree now, so instantiated pages
 			 * are visible to arm/parisc __flush_dcache_page
@@ -605,7 +575,7 @@
 	 * lock may be shared between many sibling processes.  Skipping
 	 * the lock for brk adjustments makes a difference sometimes.
 	 */
-	if (vma->anon_vma && (insert || importer || start != vma->vm_start)) {
+	if (vma->anon_vma && (importer || start != vma->vm_start)) {
 		anon_vma = vma->anon_vma;
 		anon_vma_lock(anon_vma);
 	}
@@ -652,7 +622,7 @@
 	if (anon_vma)
 		anon_vma_unlock(anon_vma);
 	if (mapping)
-		spin_unlock(&mapping->i_mmap_lock);
+		mutex_unlock(&mapping->i_mmap_mutex);
 
 	if (remove_next) {
 		if (file) {
@@ -699,9 +669,17 @@
 }
 
 static inline int is_mergeable_anon_vma(struct anon_vma *anon_vma1,
-					struct anon_vma *anon_vma2)
+					struct anon_vma *anon_vma2,
+					struct vm_area_struct *vma)
 {
-	return !anon_vma1 || !anon_vma2 || (anon_vma1 == anon_vma2);
+	/*
+	 * The list_is_singular() test is to avoid merging VMA cloned from
+	 * parents. This can improve scalability caused by anon_vma lock.
+	 */
+	if ((!anon_vma1 || !anon_vma2) && (!vma ||
+		list_is_singular(&vma->anon_vma_chain)))
+		return 1;
+	return anon_vma1 == anon_vma2;
 }
 
 /*
@@ -720,7 +698,7 @@
 	struct anon_vma *anon_vma, struct file *file, pgoff_t vm_pgoff)
 {
 	if (is_mergeable_vma(vma, file, vm_flags) &&
-	    is_mergeable_anon_vma(anon_vma, vma->anon_vma)) {
+	    is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) {
 		if (vma->vm_pgoff == vm_pgoff)
 			return 1;
 	}
@@ -739,7 +717,7 @@
 	struct anon_vma *anon_vma, struct file *file, pgoff_t vm_pgoff)
 {
 	if (is_mergeable_vma(vma, file, vm_flags) &&
-	    is_mergeable_anon_vma(anon_vma, vma->anon_vma)) {
+	    is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) {
 		pgoff_t vm_pglen;
 		vm_pglen = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
 		if (vma->vm_pgoff + vm_pglen == vm_pgoff)
@@ -817,7 +795,7 @@
 				can_vma_merge_before(next, vm_flags,
 					anon_vma, file, pgoff+pglen) &&
 				is_mergeable_anon_vma(prev->anon_vma,
-						      next->anon_vma)) {
+						      next->anon_vma, NULL)) {
 							/* cases 1, 6 */
 			err = vma_adjust(prev, prev->vm_start,
 				next->vm_end, prev->vm_pgoff, NULL);
@@ -1785,7 +1763,7 @@
 /*
  * vma is the first one with address < vma->vm_start.  Have to extend vma.
  */
-static int expand_downwards(struct vm_area_struct *vma,
+int expand_downwards(struct vm_area_struct *vma,
 				   unsigned long address)
 {
 	int error;
@@ -1832,11 +1810,6 @@
 	return error;
 }
 
-int expand_stack_downwards(struct vm_area_struct *vma, unsigned long address)
-{
-	return expand_downwards(vma, address);
-}
-
 #ifdef CONFIG_STACK_GROWSUP
 int expand_stack(struct vm_area_struct *vma, unsigned long address)
 {
@@ -1919,17 +1892,17 @@
 		unsigned long start, unsigned long end)
 {
 	struct vm_area_struct *next = prev? prev->vm_next: mm->mmap;
-	struct mmu_gather *tlb;
+	struct mmu_gather tlb;
 	unsigned long nr_accounted = 0;
 
 	lru_add_drain();
-	tlb = tlb_gather_mmu(mm, 0);
+	tlb_gather_mmu(&tlb, mm, 0);
 	update_hiwater_rss(mm);
 	unmap_vmas(&tlb, vma, start, end, &nr_accounted, NULL);
 	vm_unacct_memory(nr_accounted);
-	free_pgtables(tlb, vma, prev? prev->vm_end: FIRST_USER_ADDRESS,
-				 next? next->vm_start: 0);
-	tlb_finish_mmu(tlb, start, end);
+	free_pgtables(&tlb, vma, prev ? prev->vm_end : FIRST_USER_ADDRESS,
+				 next ? next->vm_start : 0);
+	tlb_finish_mmu(&tlb, start, end);
 }
 
 /*
@@ -2271,7 +2244,7 @@
 /* Release all mmaps. */
 void exit_mmap(struct mm_struct *mm)
 {
-	struct mmu_gather *tlb;
+	struct mmu_gather tlb;
 	struct vm_area_struct *vma;
 	unsigned long nr_accounted = 0;
 	unsigned long end;
@@ -2296,14 +2269,14 @@
 
 	lru_add_drain();
 	flush_cache_mm(mm);
-	tlb = tlb_gather_mmu(mm, 1);
+	tlb_gather_mmu(&tlb, mm, 1);
 	/* update_hiwater_rss(mm) here? but nobody should be looking */
 	/* Use -1 here to ensure all VMAs in the mm are unmapped */
 	end = unmap_vmas(&tlb, vma, 0, -1, &nr_accounted, NULL);
 	vm_unacct_memory(nr_accounted);
 
-	free_pgtables(tlb, vma, FIRST_USER_ADDRESS, 0);
-	tlb_finish_mmu(tlb, 0, end);
+	free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, 0);
+	tlb_finish_mmu(&tlb, 0, end);
 
 	/*
 	 * Walk the list again, actually closing and freeing it,
@@ -2317,7 +2290,7 @@
 
 /* Insert vm structure into process list sorted by address
  * and into the inode's i_mmap tree.  If vm_file is non-NULL
- * then i_mmap_lock is taken here.
+ * then i_mmap_mutex is taken here.
  */
 int insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma)
 {
@@ -2529,15 +2502,15 @@
 		 * The LSB of head.next can't change from under us
 		 * because we hold the mm_all_locks_mutex.
 		 */
-		spin_lock_nest_lock(&anon_vma->root->lock, &mm->mmap_sem);
+		mutex_lock_nest_lock(&anon_vma->root->mutex, &mm->mmap_sem);
 		/*
 		 * We can safely modify head.next after taking the
-		 * anon_vma->root->lock. If some other vma in this mm shares
+		 * anon_vma->root->mutex. If some other vma in this mm shares
 		 * the same anon_vma we won't take it again.
 		 *
 		 * No need of atomic instructions here, head.next
 		 * can't change from under us thanks to the
-		 * anon_vma->root->lock.
+		 * anon_vma->root->mutex.
 		 */
 		if (__test_and_set_bit(0, (unsigned long *)
 				       &anon_vma->root->head.next))
@@ -2559,7 +2532,7 @@
 		 */
 		if (test_and_set_bit(AS_MM_ALL_LOCKS, &mapping->flags))
 			BUG();
-		spin_lock_nest_lock(&mapping->i_mmap_lock, &mm->mmap_sem);
+		mutex_lock_nest_lock(&mapping->i_mmap_mutex, &mm->mmap_sem);
 	}
 }
 
@@ -2586,7 +2559,7 @@
  * vma in this mm is backed by the same anon_vma or address_space.
  *
  * We can take all the locks in random order because the VM code
- * taking i_mmap_lock or anon_vma->lock outside the mmap_sem never
+ * taking i_mmap_mutex or anon_vma->mutex outside the mmap_sem never
  * takes more than one of them in a row. Secondly we're protected
  * against a concurrent mm_take_all_locks() by the mm_all_locks_mutex.
  *
@@ -2642,7 +2615,7 @@
 		 *
 		 * No need of atomic instructions here, head.next
 		 * can't change from under us until we release the
-		 * anon_vma->root->lock.
+		 * anon_vma->root->mutex.
 		 */
 		if (!__test_and_clear_bit(0, (unsigned long *)
 					  &anon_vma->root->head.next))
@@ -2658,7 +2631,7 @@
 		 * AS_MM_ALL_LOCKS can't change to 0 from under us
 		 * because we hold the mm_all_locks_mutex.
 		 */
-		spin_unlock(&mapping->i_mmap_lock);
+		mutex_unlock(&mapping->i_mmap_mutex);
 		if (!test_and_clear_bit(AS_MM_ALL_LOCKS,
 					&mapping->flags))
 			BUG();
diff --git a/mm/mremap.c b/mm/mremap.c
index a7c1f9f..506fa44 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -93,8 +93,7 @@
 		 * and we propagate stale pages into the dst afterward.
 		 */
 		mapping = vma->vm_file->f_mapping;
-		spin_lock(&mapping->i_mmap_lock);
-		new_vma->vm_truncate_count = 0;
+		mutex_lock(&mapping->i_mmap_mutex);
 	}
 
 	/*
@@ -123,7 +122,7 @@
 	pte_unmap(new_pte - 1);
 	pte_unmap_unlock(old_pte - 1, old_ptl);
 	if (mapping)
-		spin_unlock(&mapping->i_mmap_lock);
+		mutex_unlock(&mapping->i_mmap_mutex);
 	mmu_notifier_invalidate_range_end(vma->vm_mm, old_start, old_end);
 }
 
diff --git a/mm/nobootmem.c b/mm/nobootmem.c
index 9109049..6e93dc7 100644
--- a/mm/nobootmem.c
+++ b/mm/nobootmem.c
@@ -307,30 +307,7 @@
 void * __init __alloc_bootmem_node_high(pg_data_t *pgdat, unsigned long size,
 				   unsigned long align, unsigned long goal)
 {
-#ifdef MAX_DMA32_PFN
-	unsigned long end_pfn;
-
-	if (WARN_ON_ONCE(slab_is_available()))
-		return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
-
-	/* update goal according ...MAX_DMA32_PFN */
-	end_pfn = pgdat->node_start_pfn + pgdat->node_spanned_pages;
-
-	if (end_pfn > MAX_DMA32_PFN + (128 >> (20 - PAGE_SHIFT)) &&
-	    (goal >> PAGE_SHIFT) < MAX_DMA32_PFN) {
-		void *ptr;
-		unsigned long new_goal;
-
-		new_goal = MAX_DMA32_PFN << PAGE_SHIFT;
-		ptr =  __alloc_memory_core_early(pgdat->node_id, size, align,
-						 new_goal, -1ULL);
-		if (ptr)
-			return ptr;
-	}
-#endif
-
 	return __alloc_bootmem_node(pgdat, size, align, goal);
-
 }
 
 #ifdef CONFIG_SPARSEMEM
diff --git a/mm/nommu.c b/mm/nommu.c
index c4c542c..1fd0c51 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -680,9 +680,9 @@
  */
 static void add_vma_to_mm(struct mm_struct *mm, struct vm_area_struct *vma)
 {
-	struct vm_area_struct *pvma, **pp, *next;
+	struct vm_area_struct *pvma, *prev;
 	struct address_space *mapping;
-	struct rb_node **p, *parent;
+	struct rb_node **p, *parent, *rb_prev;
 
 	kenter(",%p", vma);
 
@@ -703,7 +703,7 @@
 	}
 
 	/* add the VMA to the tree */
-	parent = NULL;
+	parent = rb_prev = NULL;
 	p = &mm->mm_rb.rb_node;
 	while (*p) {
 		parent = *p;
@@ -713,17 +713,20 @@
 		 * (the latter is necessary as we may get identical VMAs) */
 		if (vma->vm_start < pvma->vm_start)
 			p = &(*p)->rb_left;
-		else if (vma->vm_start > pvma->vm_start)
+		else if (vma->vm_start > pvma->vm_start) {
+			rb_prev = parent;
 			p = &(*p)->rb_right;
-		else if (vma->vm_end < pvma->vm_end)
+		} else if (vma->vm_end < pvma->vm_end)
 			p = &(*p)->rb_left;
-		else if (vma->vm_end > pvma->vm_end)
+		else if (vma->vm_end > pvma->vm_end) {
+			rb_prev = parent;
 			p = &(*p)->rb_right;
-		else if (vma < pvma)
+		} else if (vma < pvma)
 			p = &(*p)->rb_left;
-		else if (vma > pvma)
+		else if (vma > pvma) {
+			rb_prev = parent;
 			p = &(*p)->rb_right;
-		else
+		} else
 			BUG();
 	}
 
@@ -731,20 +734,11 @@
 	rb_insert_color(&vma->vm_rb, &mm->mm_rb);
 
 	/* add VMA to the VMA list also */
-	for (pp = &mm->mmap; (pvma = *pp); pp = &(*pp)->vm_next) {
-		if (pvma->vm_start > vma->vm_start)
-			break;
-		if (pvma->vm_start < vma->vm_start)
-			continue;
-		if (pvma->vm_end < vma->vm_end)
-			break;
-	}
+	prev = NULL;
+	if (rb_prev)
+		prev = rb_entry(rb_prev, struct vm_area_struct, vm_rb);
 
-	next = *pp;
-	*pp = vma;
-	vma->vm_next = next;
-	if (next)
-		next->vm_prev = vma;
+	__vma_link_list(mm, vma, prev, parent);
 }
 
 /*
@@ -752,7 +746,6 @@
  */
 static void delete_vma_from_mm(struct vm_area_struct *vma)
 {
-	struct vm_area_struct **pp;
 	struct address_space *mapping;
 	struct mm_struct *mm = vma->vm_mm;
 
@@ -775,12 +768,14 @@
 
 	/* remove from the MM's tree and list */
 	rb_erase(&vma->vm_rb, &mm->mm_rb);
-	for (pp = &mm->mmap; *pp; pp = &(*pp)->vm_next) {
-		if (*pp == vma) {
-			*pp = vma->vm_next;
-			break;
-		}
-	}
+
+	if (vma->vm_prev)
+		vma->vm_prev->vm_next = vma->vm_next;
+	else
+		mm->mmap = vma->vm_next;
+
+	if (vma->vm_next)
+		vma->vm_next->vm_prev = vma->vm_prev;
 
 	vma->vm_mm = NULL;
 }
@@ -809,17 +804,15 @@
 struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
 {
 	struct vm_area_struct *vma;
-	struct rb_node *n = mm->mm_rb.rb_node;
 
 	/* check the cache first */
 	vma = mm->mmap_cache;
 	if (vma && vma->vm_start <= addr && vma->vm_end > addr)
 		return vma;
 
-	/* trawl the tree (there may be multiple mappings in which addr
+	/* trawl the list (there may be multiple mappings in which addr
 	 * resides) */
-	for (n = rb_first(&mm->mm_rb); n; n = rb_next(n)) {
-		vma = rb_entry(n, struct vm_area_struct, vm_rb);
+	for (vma = mm->mmap; vma; vma = vma->vm_next) {
 		if (vma->vm_start > addr)
 			return NULL;
 		if (vma->vm_end > addr) {
@@ -859,7 +852,6 @@
 					     unsigned long len)
 {
 	struct vm_area_struct *vma;
-	struct rb_node *n = mm->mm_rb.rb_node;
 	unsigned long end = addr + len;
 
 	/* check the cache first */
@@ -867,10 +859,9 @@
 	if (vma && vma->vm_start == addr && vma->vm_end == end)
 		return vma;
 
-	/* trawl the tree (there may be multiple mappings in which addr
+	/* trawl the list (there may be multiple mappings in which addr
 	 * resides) */
-	for (n = rb_first(&mm->mm_rb); n; n = rb_next(n)) {
-		vma = rb_entry(n, struct vm_area_struct, vm_rb);
+	for (vma = mm->mmap; vma; vma = vma->vm_next) {
 		if (vma->vm_start < addr)
 			continue;
 		if (vma->vm_start > addr)
@@ -1133,7 +1124,7 @@
 			   unsigned long capabilities)
 {
 	struct page *pages;
-	unsigned long total, point, n, rlen;
+	unsigned long total, point, n;
 	void *base;
 	int ret, order;
 
@@ -1157,13 +1148,12 @@
 		 * make a private copy of the data and map that instead */
 	}
 
-	rlen = PAGE_ALIGN(len);
 
 	/* allocate some memory to hold the mapping
 	 * - note that this may not return a page-aligned address if the object
 	 *   we're allocating is smaller than a page
 	 */
-	order = get_order(rlen);
+	order = get_order(len);
 	kdebug("alloc order %d for %lx", order, len);
 
 	pages = alloc_pages(GFP_KERNEL, order);
@@ -1173,7 +1163,7 @@
 	total = 1 << order;
 	atomic_long_add(total, &mmap_pages_allocated);
 
-	point = rlen >> PAGE_SHIFT;
+	point = len >> PAGE_SHIFT;
 
 	/* we allocated a power-of-2 sized page set, so we may want to trim off
 	 * the excess */
@@ -1195,7 +1185,7 @@
 	base = page_address(pages);
 	region->vm_flags = vma->vm_flags |= VM_MAPPED_COPY;
 	region->vm_start = (unsigned long) base;
-	region->vm_end   = region->vm_start + rlen;
+	region->vm_end   = region->vm_start + len;
 	region->vm_top   = region->vm_start + (total << PAGE_SHIFT);
 
 	vma->vm_start = region->vm_start;
@@ -1211,22 +1201,22 @@
 
 		old_fs = get_fs();
 		set_fs(KERNEL_DS);
-		ret = vma->vm_file->f_op->read(vma->vm_file, base, rlen, &fpos);
+		ret = vma->vm_file->f_op->read(vma->vm_file, base, len, &fpos);
 		set_fs(old_fs);
 
 		if (ret < 0)
 			goto error_free;
 
 		/* clear the last little bit */
-		if (ret < rlen)
-			memset(base + ret, 0, rlen - ret);
+		if (ret < len)
+			memset(base + ret, 0, len - ret);
 
 	}
 
 	return 0;
 
 error_free:
-	free_page_series(region->vm_start, region->vm_end);
+	free_page_series(region->vm_start, region->vm_top);
 	region->vm_start = vma->vm_start = 0;
 	region->vm_end   = vma->vm_end = 0;
 	region->vm_top   = 0;
@@ -1235,7 +1225,7 @@
 enomem:
 	printk("Allocation of length %lu from process %d (%s) failed\n",
 	       len, current->pid, current->comm);
-	show_free_areas();
+	show_free_areas(0);
 	return -ENOMEM;
 }
 
@@ -1268,6 +1258,7 @@
 
 	/* we ignore the address hint */
 	addr = 0;
+	len = PAGE_ALIGN(len);
 
 	/* we've determined that we can make the mapping, now translate what we
 	 * now know into VMA flags */
@@ -1385,15 +1376,15 @@
 		if (capabilities & BDI_CAP_MAP_DIRECT) {
 			addr = file->f_op->get_unmapped_area(file, addr, len,
 							     pgoff, flags);
-			if (IS_ERR((void *) addr)) {
+			if (IS_ERR_VALUE(addr)) {
 				ret = addr;
-				if (ret != (unsigned long) -ENOSYS)
+				if (ret != -ENOSYS)
 					goto error_just_free;
 
 				/* the driver refused to tell us where to site
 				 * the mapping so we'll have to attempt to copy
 				 * it */
-				ret = (unsigned long) -ENODEV;
+				ret = -ENODEV;
 				if (!(capabilities & BDI_CAP_MAP_COPY))
 					goto error_just_free;
 
@@ -1468,14 +1459,14 @@
 	printk(KERN_WARNING "Allocation of vma for %lu byte allocation"
 	       " from process %d failed\n",
 	       len, current->pid);
-	show_free_areas();
+	show_free_areas(0);
 	return -ENOMEM;
 
 error_getting_region:
 	printk(KERN_WARNING "Allocation of vm region for %lu byte allocation"
 	       " from process %d failed\n",
 	       len, current->pid);
-	show_free_areas();
+	show_free_areas(0);
 	return -ENOMEM;
 }
 EXPORT_SYMBOL(do_mmap_pgoff);
@@ -1644,15 +1635,17 @@
 int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
 {
 	struct vm_area_struct *vma;
-	struct rb_node *rb;
-	unsigned long end = start + len;
+	unsigned long end;
 	int ret;
 
 	kenter(",%lx,%zx", start, len);
 
+	len = PAGE_ALIGN(len);
 	if (len == 0)
 		return -EINVAL;
 
+	end = start + len;
+
 	/* find the first potentially overlapping VMA */
 	vma = find_vma(mm, start);
 	if (!vma) {
@@ -1677,9 +1670,8 @@
 			}
 			if (end == vma->vm_end)
 				goto erase_whole_vma;
-			rb = rb_next(&vma->vm_rb);
-			vma = rb_entry(rb, struct vm_area_struct, vm_rb);
-		} while (rb);
+			vma = vma->vm_next;
+		} while (vma);
 		kleave(" = -EINVAL [split file]");
 		return -EINVAL;
 	} else {
@@ -1773,6 +1765,8 @@
 	struct vm_area_struct *vma;
 
 	/* insanity checks first */
+	old_len = PAGE_ALIGN(old_len);
+	new_len = PAGE_ALIGN(new_len);
 	if (old_len == 0 || new_len == 0)
 		return (unsigned long) -EINVAL;
 
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index f52e85c..e4b0991 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -38,6 +38,33 @@
 int sysctl_oom_dump_tasks = 1;
 static DEFINE_SPINLOCK(zone_scan_lock);
 
+/**
+ * test_set_oom_score_adj() - set current's oom_score_adj and return old value
+ * @new_val: new oom_score_adj value
+ *
+ * Sets the oom_score_adj value for current to @new_val with proper
+ * synchronization and returns the old value.  Usually used to temporarily
+ * set a value, save the old value in the caller, and then reinstate it later.
+ */
+int test_set_oom_score_adj(int new_val)
+{
+	struct sighand_struct *sighand = current->sighand;
+	int old_val;
+
+	spin_lock_irq(&sighand->siglock);
+	old_val = current->signal->oom_score_adj;
+	if (new_val != old_val) {
+		if (new_val == OOM_SCORE_ADJ_MIN)
+			atomic_inc(&current->mm->oom_disable_count);
+		else if (old_val == OOM_SCORE_ADJ_MIN)
+			atomic_dec(&current->mm->oom_disable_count);
+		current->signal->oom_score_adj = new_val;
+	}
+	spin_unlock_irq(&sighand->siglock);
+
+	return old_val;
+}
+
 #ifdef CONFIG_NUMA
 /**
  * has_intersects_mems_allowed() - check task eligiblity for kill
@@ -155,15 +182,6 @@
 	}
 
 	/*
-	 * When the PF_OOM_ORIGIN bit is set, it indicates the task should have
-	 * priority for oom killing.
-	 */
-	if (p->flags & PF_OOM_ORIGIN) {
-		task_unlock(p);
-		return 1000;
-	}
-
-	/*
 	 * The memory controller may have a limit of 0 bytes, so avoid a divide
 	 * by zero, if necessary.
 	 */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 9d5498e..2a00f17 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -30,6 +30,7 @@
 #include <linux/pagevec.h>
 #include <linux/blkdev.h>
 #include <linux/slab.h>
+#include <linux/ratelimit.h>
 #include <linux/oom.h>
 #include <linux/notifier.h>
 #include <linux/topology.h>
@@ -39,6 +40,7 @@
 #include <linux/memory_hotplug.h>
 #include <linux/nodemask.h>
 #include <linux/vmalloc.h>
+#include <linux/vmstat.h>
 #include <linux/mempolicy.h>
 #include <linux/stop_machine.h>
 #include <linux/sort.h>
@@ -1735,6 +1737,45 @@
 	return ret;
 }
 
+static DEFINE_RATELIMIT_STATE(nopage_rs,
+		DEFAULT_RATELIMIT_INTERVAL,
+		DEFAULT_RATELIMIT_BURST);
+
+void warn_alloc_failed(gfp_t gfp_mask, int order, const char *fmt, ...)
+{
+	va_list args;
+	unsigned int filter = SHOW_MEM_FILTER_NODES;
+
+	if ((gfp_mask & __GFP_NOWARN) || !__ratelimit(&nopage_rs))
+		return;
+
+	/*
+	 * This documents exceptions given to allocations in certain
+	 * contexts that are allowed to allocate outside current's set
+	 * of allowed nodes.
+	 */
+	if (!(gfp_mask & __GFP_NOMEMALLOC))
+		if (test_thread_flag(TIF_MEMDIE) ||
+		    (current->flags & (PF_MEMALLOC | PF_EXITING)))
+			filter &= ~SHOW_MEM_FILTER_NODES;
+	if (in_interrupt() || !(gfp_mask & __GFP_WAIT))
+		filter &= ~SHOW_MEM_FILTER_NODES;
+
+	if (fmt) {
+		printk(KERN_WARNING);
+		va_start(args, fmt);
+		vprintk(fmt, args);
+		va_end(args);
+	}
+
+	pr_warning("%s: page allocation failure: order:%d, mode:0x%x\n",
+		   current->comm, order, gfp_mask);
+
+	dump_stack();
+	if (!should_suppress_show_mem())
+		show_mem(filter);
+}
+
 static inline int
 should_alloc_retry(gfp_t gfp_mask, unsigned int order,
 				unsigned long pages_reclaimed)
@@ -2065,6 +2106,7 @@
 		first_zones_zonelist(zonelist, high_zoneidx, NULL,
 					&preferred_zone);
 
+rebalance:
 	/* This is the last chance, in general, before the goto nopage. */
 	page = get_page_from_freelist(gfp_mask, nodemask, order, zonelist,
 			high_zoneidx, alloc_flags & ~ALLOC_NO_WATERMARKS,
@@ -2072,7 +2114,6 @@
 	if (page)
 		goto got_pg;
 
-rebalance:
 	/* Allocate without watermarks if the context allows */
 	if (alloc_flags & ALLOC_NO_WATERMARKS) {
 		page = __alloc_pages_high_priority(gfp_mask, order,
@@ -2106,7 +2147,7 @@
 					sync_migration);
 	if (page)
 		goto got_pg;
-	sync_migration = !(gfp_mask & __GFP_NO_KSWAPD);
+	sync_migration = true;
 
 	/* Try direct reclaim and then allocating */
 	page = __alloc_pages_direct_reclaim(gfp_mask, order,
@@ -2177,27 +2218,7 @@
 	}
 
 nopage:
-	if (!(gfp_mask & __GFP_NOWARN) && printk_ratelimit()) {
-		unsigned int filter = SHOW_MEM_FILTER_NODES;
-
-		/*
-		 * This documents exceptions given to allocations in certain
-		 * contexts that are allowed to allocate outside current's set
-		 * of allowed nodes.
-		 */
-		if (!(gfp_mask & __GFP_NOMEMALLOC))
-			if (test_thread_flag(TIF_MEMDIE) ||
-			    (current->flags & (PF_MEMALLOC | PF_EXITING)))
-				filter &= ~SHOW_MEM_FILTER_NODES;
-		if (in_interrupt() || !wait)
-			filter &= ~SHOW_MEM_FILTER_NODES;
-
-		pr_warning("%s: page allocation failure. order:%d, mode:0x%x\n",
-			current->comm, order, gfp_mask);
-		dump_stack();
-		if (!should_suppress_show_mem())
-			show_mem(filter);
-	}
+	warn_alloc_failed(gfp_mask, order, NULL);
 	return page;
 got_pg:
 	if (kmemcheck_enabled)
@@ -2226,6 +2247,10 @@
 
 	if (should_fail_alloc_page(gfp_mask, order))
 		return NULL;
+#ifndef CONFIG_ZONE_DMA
+	if (WARN_ON_ONCE(gfp_mask & __GFP_DMA))
+		return NULL;
+#endif
 
 	/*
 	 * Check the zones suitable for the gfp_mask contain at least one
@@ -2473,10 +2498,10 @@
 #endif
 
 /*
- * Determine whether the zone's node should be displayed or not, depending on
- * whether SHOW_MEM_FILTER_NODES was passed to __show_free_areas().
+ * Determine whether the node should be displayed or not, depending on whether
+ * SHOW_MEM_FILTER_NODES was passed to show_free_areas().
  */
-static bool skip_free_areas_zone(unsigned int flags, const struct zone *zone)
+bool skip_free_areas_node(unsigned int flags, int nid)
 {
 	bool ret = false;
 
@@ -2484,8 +2509,7 @@
 		goto out;
 
 	get_mems_allowed();
-	ret = !node_isset(zone->zone_pgdat->node_id,
-				cpuset_current_mems_allowed);
+	ret = !node_isset(nid, cpuset_current_mems_allowed);
 	put_mems_allowed();
 out:
 	return ret;
@@ -2500,13 +2524,13 @@
  * Suppresses nodes that are not allowed by current's cpuset if
  * SHOW_MEM_FILTER_NODES is passed.
  */
-void __show_free_areas(unsigned int filter)
+void show_free_areas(unsigned int filter)
 {
 	int cpu;
 	struct zone *zone;
 
 	for_each_populated_zone(zone) {
-		if (skip_free_areas_zone(filter, zone))
+		if (skip_free_areas_node(filter, zone_to_nid(zone)))
 			continue;
 		show_node(zone);
 		printk("%s per-cpu:\n", zone->name);
@@ -2549,7 +2573,7 @@
 	for_each_populated_zone(zone) {
 		int i;
 
-		if (skip_free_areas_zone(filter, zone))
+		if (skip_free_areas_node(filter, zone_to_nid(zone)))
 			continue;
 		show_node(zone);
 		printk("%s"
@@ -2618,7 +2642,7 @@
 	for_each_populated_zone(zone) {
  		unsigned long nr[MAX_ORDER], flags, order, total = 0;
 
-		if (skip_free_areas_zone(filter, zone))
+		if (skip_free_areas_node(filter, zone_to_nid(zone)))
 			continue;
 		show_node(zone);
 		printk("%s: ", zone->name);
@@ -2639,11 +2663,6 @@
 	show_swap_cache_info();
 }
 
-void show_free_areas(void)
-{
-	__show_free_areas(0);
-}
-
 static void zoneref_set_zone(struct zone *zone, struct zoneref *zoneref)
 {
 	zoneref->zone = zone;
@@ -3314,6 +3333,20 @@
 #define LONG_ALIGN(x) (((x)+(sizeof(long))-1)&~((sizeof(long))-1))
 
 /*
+ * Check if a pageblock contains reserved pages
+ */
+static int pageblock_is_reserved(unsigned long start_pfn, unsigned long end_pfn)
+{
+	unsigned long pfn;
+
+	for (pfn = start_pfn; pfn < end_pfn; pfn++) {
+		if (!pfn_valid_within(pfn) || PageReserved(pfn_to_page(pfn)))
+			return 1;
+	}
+	return 0;
+}
+
+/*
  * Mark a number of pageblocks as MIGRATE_RESERVE. The number
  * of blocks reserved is based on min_wmark_pages(zone). The memory within
  * the reserve will tend to store contiguous free pages. Setting min_free_kbytes
@@ -3322,7 +3355,7 @@
  */
 static void setup_zone_migrate_reserve(struct zone *zone)
 {
-	unsigned long start_pfn, pfn, end_pfn;
+	unsigned long start_pfn, pfn, end_pfn, block_end_pfn;
 	struct page *page;
 	unsigned long block_migratetype;
 	int reserve;
@@ -3352,7 +3385,8 @@
 			continue;
 
 		/* Blocks with reserved pages will never free, skip them. */
-		if (PageReserved(page))
+		block_end_pfn = min(pfn + pageblock_nr_pages, end_pfn);
+		if (pageblock_is_reserved(pfn, block_end_pfn))
 			continue;
 
 		block_migratetype = get_pageblock_migratetype(page);
@@ -5100,7 +5134,7 @@
  *    1TB     101        10GB
  *   10TB     320        32GB
  */
-void calculate_zone_inactive_ratio(struct zone *zone)
+static void __meminit calculate_zone_inactive_ratio(struct zone *zone)
 {
 	unsigned int gb, ratio;
 
@@ -5114,7 +5148,7 @@
 	zone->inactive_ratio = ratio;
 }
 
-static void __init setup_per_zone_inactive_ratio(void)
+static void __meminit setup_per_zone_inactive_ratio(void)
 {
 	struct zone *zone;
 
@@ -5146,7 +5180,7 @@
  * 8192MB:	11584k
  * 16384MB:	16384k
  */
-static int __init init_per_zone_wmark_min(void)
+int __meminit init_per_zone_wmark_min(void)
 {
 	unsigned long lowmem_kbytes;
 
@@ -5158,6 +5192,7 @@
 	if (min_free_kbytes > 65536)
 		min_free_kbytes = 65536;
 	setup_per_zone_wmarks();
+	refresh_zone_stat_thresholds();
 	setup_per_zone_lowmem_reserve();
 	setup_per_zone_inactive_ratio();
 	return 0;
@@ -5508,10 +5543,8 @@
 	struct memory_isolate_notify arg;
 	int notifier_ret;
 	int ret = -EBUSY;
-	int zone_idx;
 
 	zone = page_zone(page);
-	zone_idx = zone_idx(zone);
 
 	spin_lock_irqsave(&zone->lock, flags);
 
diff --git a/mm/readahead.c b/mm/readahead.c
index 2c0cc48..867f9dd 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -180,7 +180,7 @@
 		if (page)
 			continue;
 
-		page = page_cache_alloc_cold(mapping);
+		page = page_cache_alloc_readahead(mapping);
 		if (!page)
 			break;
 		page->index = page_offset;
diff --git a/mm/rmap.c b/mm/rmap.c
index 522e4a9..3a39b51 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -24,8 +24,8 @@
  *   inode->i_alloc_sem (vmtruncate_range)
  *   mm->mmap_sem
  *     page->flags PG_locked (lock_page)
- *       mapping->i_mmap_lock
- *         anon_vma->lock
+ *       mapping->i_mmap_mutex
+ *         anon_vma->mutex
  *           mm->page_table_lock or pte_lock
  *             zone->lru_lock (in mark_page_accessed, isolate_lru_page)
  *             swap_lock (in swap_duplicate, swap_info_get)
@@ -40,7 +40,7 @@
  *
  * (code doesn't rely on that order so it could be switched around)
  * ->tasklist_lock
- *   anon_vma->lock      (memory_failure, collect_procs_anon)
+ *   anon_vma->mutex      (memory_failure, collect_procs_anon)
  *     pte map lock
  */
 
@@ -86,6 +86,29 @@
 static inline void anon_vma_free(struct anon_vma *anon_vma)
 {
 	VM_BUG_ON(atomic_read(&anon_vma->refcount));
+
+	/*
+	 * Synchronize against page_lock_anon_vma() such that
+	 * we can safely hold the lock without the anon_vma getting
+	 * freed.
+	 *
+	 * Relies on the full mb implied by the atomic_dec_and_test() from
+	 * put_anon_vma() against the acquire barrier implied by
+	 * mutex_trylock() from page_lock_anon_vma(). This orders:
+	 *
+	 * page_lock_anon_vma()		VS	put_anon_vma()
+	 *   mutex_trylock()			  atomic_dec_and_test()
+	 *   LOCK				  MB
+	 *   atomic_read()			  mutex_is_locked()
+	 *
+	 * LOCK should suffice since the actual taking of the lock must
+	 * happen _before_ what follows.
+	 */
+	if (mutex_is_locked(&anon_vma->root->mutex)) {
+		anon_vma_lock(anon_vma);
+		anon_vma_unlock(anon_vma);
+	}
+
 	kmem_cache_free(anon_vma_cachep, anon_vma);
 }
 
@@ -307,7 +330,7 @@
 {
 	struct anon_vma *anon_vma = data;
 
-	spin_lock_init(&anon_vma->lock);
+	mutex_init(&anon_vma->mutex);
 	atomic_set(&anon_vma->refcount, 0);
 	INIT_LIST_HEAD(&anon_vma->head);
 }
@@ -320,12 +343,26 @@
 }
 
 /*
- * Getting a lock on a stable anon_vma from a page off the LRU is
- * tricky: page_lock_anon_vma rely on RCU to guard against the races.
+ * Getting a lock on a stable anon_vma from a page off the LRU is tricky!
+ *
+ * Since there is no serialization what so ever against page_remove_rmap()
+ * the best this function can do is return a locked anon_vma that might
+ * have been relevant to this page.
+ *
+ * The page might have been remapped to a different anon_vma or the anon_vma
+ * returned may already be freed (and even reused).
+ *
+ * All users of this function must be very careful when walking the anon_vma
+ * chain and verify that the page in question is indeed mapped in it
+ * [ something equivalent to page_mapped_in_vma() ].
+ *
+ * Since anon_vma's slab is DESTROY_BY_RCU and we know from page_remove_rmap()
+ * that the anon_vma pointer from page->mapping is valid if there is a
+ * mapcount, we can dereference the anon_vma after observing those.
  */
-struct anon_vma *__page_lock_anon_vma(struct page *page)
+struct anon_vma *page_get_anon_vma(struct page *page)
 {
-	struct anon_vma *anon_vma, *root_anon_vma;
+	struct anon_vma *anon_vma = NULL;
 	unsigned long anon_mapping;
 
 	rcu_read_lock();
@@ -336,32 +373,97 @@
 		goto out;
 
 	anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON);
-	root_anon_vma = ACCESS_ONCE(anon_vma->root);
-	spin_lock(&root_anon_vma->lock);
+	if (!atomic_inc_not_zero(&anon_vma->refcount)) {
+		anon_vma = NULL;
+		goto out;
+	}
 
 	/*
 	 * If this page is still mapped, then its anon_vma cannot have been
-	 * freed.  But if it has been unmapped, we have no security against
-	 * the anon_vma structure being freed and reused (for another anon_vma:
-	 * SLAB_DESTROY_BY_RCU guarantees that - so the spin_lock above cannot
-	 * corrupt): with anon_vma_prepare() or anon_vma_fork() redirecting
-	 * anon_vma->root before page_unlock_anon_vma() is called to unlock.
+	 * freed.  But if it has been unmapped, we have no security against the
+	 * anon_vma structure being freed and reused (for another anon_vma:
+	 * SLAB_DESTROY_BY_RCU guarantees that - so the atomic_inc_not_zero()
+	 * above cannot corrupt).
 	 */
-	if (page_mapped(page))
-		return anon_vma;
-
-	spin_unlock(&root_anon_vma->lock);
+	if (!page_mapped(page)) {
+		put_anon_vma(anon_vma);
+		anon_vma = NULL;
+	}
 out:
 	rcu_read_unlock();
-	return NULL;
+
+	return anon_vma;
+}
+
+/*
+ * Similar to page_get_anon_vma() except it locks the anon_vma.
+ *
+ * Its a little more complex as it tries to keep the fast path to a single
+ * atomic op -- the trylock. If we fail the trylock, we fall back to getting a
+ * reference like with page_get_anon_vma() and then block on the mutex.
+ */
+struct anon_vma *page_lock_anon_vma(struct page *page)
+{
+	struct anon_vma *anon_vma = NULL;
+	unsigned long anon_mapping;
+
+	rcu_read_lock();
+	anon_mapping = (unsigned long) ACCESS_ONCE(page->mapping);
+	if ((anon_mapping & PAGE_MAPPING_FLAGS) != PAGE_MAPPING_ANON)
+		goto out;
+	if (!page_mapped(page))
+		goto out;
+
+	anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON);
+	if (mutex_trylock(&anon_vma->root->mutex)) {
+		/*
+		 * If we observe a !0 refcount, then holding the lock ensures
+		 * the anon_vma will not go away, see __put_anon_vma().
+		 */
+		if (!atomic_read(&anon_vma->refcount)) {
+			anon_vma_unlock(anon_vma);
+			anon_vma = NULL;
+		}
+		goto out;
+	}
+
+	/* trylock failed, we got to sleep */
+	if (!atomic_inc_not_zero(&anon_vma->refcount)) {
+		anon_vma = NULL;
+		goto out;
+	}
+
+	if (!page_mapped(page)) {
+		put_anon_vma(anon_vma);
+		anon_vma = NULL;
+		goto out;
+	}
+
+	/* we pinned the anon_vma, its safe to sleep */
+	rcu_read_unlock();
+	anon_vma_lock(anon_vma);
+
+	if (atomic_dec_and_test(&anon_vma->refcount)) {
+		/*
+		 * Oops, we held the last refcount, release the lock
+		 * and bail -- can't simply use put_anon_vma() because
+		 * we'll deadlock on the anon_vma_lock() recursion.
+		 */
+		anon_vma_unlock(anon_vma);
+		__put_anon_vma(anon_vma);
+		anon_vma = NULL;
+	}
+
+	return anon_vma;
+
+out:
+	rcu_read_unlock();
+	return anon_vma;
 }
 
 void page_unlock_anon_vma(struct anon_vma *anon_vma)
-	__releases(&anon_vma->root->lock)
-	__releases(RCU)
 {
 	anon_vma_unlock(anon_vma);
-	rcu_read_unlock();
 }
 
 /*
@@ -646,14 +748,14 @@
 	 * The page lock not only makes sure that page->mapping cannot
 	 * suddenly be NULLified by truncation, it makes sure that the
 	 * structure at mapping cannot be freed and reused yet,
-	 * so we can safely take mapping->i_mmap_lock.
+	 * so we can safely take mapping->i_mmap_mutex.
 	 */
 	BUG_ON(!PageLocked(page));
 
-	spin_lock(&mapping->i_mmap_lock);
+	mutex_lock(&mapping->i_mmap_mutex);
 
 	/*
-	 * i_mmap_lock does not stabilize mapcount at all, but mapcount
+	 * i_mmap_mutex does not stabilize mapcount at all, but mapcount
 	 * is more likely to be accurate if we note it after spinning.
 	 */
 	mapcount = page_mapcount(page);
@@ -675,7 +777,7 @@
 			break;
 	}
 
-	spin_unlock(&mapping->i_mmap_lock);
+	mutex_unlock(&mapping->i_mmap_mutex);
 	return referenced;
 }
 
@@ -762,7 +864,7 @@
 
 	BUG_ON(PageAnon(page));
 
-	spin_lock(&mapping->i_mmap_lock);
+	mutex_lock(&mapping->i_mmap_mutex);
 	vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
 		if (vma->vm_flags & VM_SHARED) {
 			unsigned long address = vma_address(page, vma);
@@ -771,7 +873,7 @@
 			ret += page_mkclean_one(page, vma, address);
 		}
 	}
-	spin_unlock(&mapping->i_mmap_lock);
+	mutex_unlock(&mapping->i_mmap_mutex);
 	return ret;
 }
 
@@ -1119,7 +1221,7 @@
 	/*
 	 * We need mmap_sem locking, Otherwise VM_LOCKED check makes
 	 * unstable result and race. Plus, We can't wait here because
-	 * we now hold anon_vma->lock or mapping->i_mmap_lock.
+	 * we now hold anon_vma->mutex or mapping->i_mmap_mutex.
 	 * if trylock failed, the page remain in evictable lru and later
 	 * vmscan could retry to move the page to unevictable lru if the
 	 * page is actually mlocked.
@@ -1345,7 +1447,7 @@
 	unsigned long max_nl_size = 0;
 	unsigned int mapcount;
 
-	spin_lock(&mapping->i_mmap_lock);
+	mutex_lock(&mapping->i_mmap_mutex);
 	vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
 		unsigned long address = vma_address(page, vma);
 		if (address == -EFAULT)
@@ -1391,7 +1493,7 @@
 	mapcount = page_mapcount(page);
 	if (!mapcount)
 		goto out;
-	cond_resched_lock(&mapping->i_mmap_lock);
+	cond_resched();
 
 	max_nl_size = (max_nl_size + CLUSTER_SIZE - 1) & CLUSTER_MASK;
 	if (max_nl_cursor == 0)
@@ -1413,7 +1515,7 @@
 			}
 			vma->vm_private_data = (void *) max_nl_cursor;
 		}
-		cond_resched_lock(&mapping->i_mmap_lock);
+		cond_resched();
 		max_nl_cursor += CLUSTER_SIZE;
 	} while (max_nl_cursor <= max_nl_size);
 
@@ -1425,7 +1527,7 @@
 	list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.vm_set.list)
 		vma->vm_private_data = NULL;
 out:
-	spin_unlock(&mapping->i_mmap_lock);
+	mutex_unlock(&mapping->i_mmap_mutex);
 	return ret;
 }
 
@@ -1544,7 +1646,7 @@
 
 	if (!mapping)
 		return ret;
-	spin_lock(&mapping->i_mmap_lock);
+	mutex_lock(&mapping->i_mmap_mutex);
 	vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
 		unsigned long address = vma_address(page, vma);
 		if (address == -EFAULT)
@@ -1558,7 +1660,7 @@
 	 * never contain migration ptes.  Decide what to do about this
 	 * limitation to linear when we need rmap_walk() on nonlinear.
 	 */
-	spin_unlock(&mapping->i_mmap_lock);
+	mutex_unlock(&mapping->i_mmap_mutex);
 	return ret;
 }
 
diff --git a/mm/shmem.c b/mm/shmem.c
index ba4ad28..69edb45 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -99,6 +99,13 @@
 /* Pretend that each entry is of this size in directory's i_size */
 #define BOGO_DIRENT_SIZE 20
 
+struct shmem_xattr {
+	struct list_head list;	/* anchored by shmem_inode_info->xattr_list */
+	char *name;		/* xattr name */
+	size_t size;
+	char value[0];
+};
+
 /* Flag allocation requirements to shmem_getpage and shmem_swp_alloc */
 enum sgp_type {
 	SGP_READ,	/* don't exceed i_size, don't allocate page */
@@ -822,6 +829,7 @@
 static void shmem_evict_inode(struct inode *inode)
 {
 	struct shmem_inode_info *info = SHMEM_I(inode);
+	struct shmem_xattr *xattr, *nxattr;
 
 	if (inode->i_mapping->a_ops == &shmem_aops) {
 		truncate_inode_pages(inode->i_mapping, 0);
@@ -834,6 +842,11 @@
 			mutex_unlock(&shmem_swaplist_mutex);
 		}
 	}
+
+	list_for_each_entry_safe(xattr, nxattr, &info->xattr_list, list) {
+		kfree(xattr->name);
+		kfree(xattr);
+	}
 	BUG_ON(inode->i_blocks);
 	shmem_free_inode(inode->i_sb);
 	end_writeback(inode);
@@ -1615,6 +1628,7 @@
 		spin_lock_init(&info->lock);
 		info->flags = flags & VM_NORESERVE;
 		INIT_LIST_HEAD(&info->swaplist);
+		INIT_LIST_HEAD(&info->xattr_list);
 		cache_no_acl(inode);
 
 		switch (mode & S_IFMT) {
@@ -2014,9 +2028,9 @@
 
 	info = SHMEM_I(inode);
 	inode->i_size = len-1;
-	if (len <= (char *)inode - (char *)info) {
+	if (len <= SHMEM_SYMLINK_INLINE_LEN) {
 		/* do it inline */
-		memcpy(info, symname, len);
+		memcpy(info->inline_symlink, symname, len);
 		inode->i_op = &shmem_symlink_inline_operations;
 	} else {
 		error = shmem_getpage(inode, 0, &page, SGP_WRITE, NULL);
@@ -2042,7 +2056,7 @@
 
 static void *shmem_follow_link_inline(struct dentry *dentry, struct nameidata *nd)
 {
-	nd_set_link(nd, (char *)SHMEM_I(dentry->d_inode));
+	nd_set_link(nd, SHMEM_I(dentry->d_inode)->inline_symlink);
 	return NULL;
 }
 
@@ -2066,63 +2080,253 @@
 	}
 }
 
+#ifdef CONFIG_TMPFS_XATTR
+/*
+ * Superblocks without xattr inode operations may get some security.* xattr
+ * support from the LSM "for free". As soon as we have any other xattrs
+ * like ACLs, we also need to implement the security.* handlers at
+ * filesystem level, though.
+ */
+
+static int shmem_xattr_get(struct dentry *dentry, const char *name,
+			   void *buffer, size_t size)
+{
+	struct shmem_inode_info *info;
+	struct shmem_xattr *xattr;
+	int ret = -ENODATA;
+
+	info = SHMEM_I(dentry->d_inode);
+
+	spin_lock(&info->lock);
+	list_for_each_entry(xattr, &info->xattr_list, list) {
+		if (strcmp(name, xattr->name))
+			continue;
+
+		ret = xattr->size;
+		if (buffer) {
+			if (size < xattr->size)
+				ret = -ERANGE;
+			else
+				memcpy(buffer, xattr->value, xattr->size);
+		}
+		break;
+	}
+	spin_unlock(&info->lock);
+	return ret;
+}
+
+static int shmem_xattr_set(struct dentry *dentry, const char *name,
+			   const void *value, size_t size, int flags)
+{
+	struct inode *inode = dentry->d_inode;
+	struct shmem_inode_info *info = SHMEM_I(inode);
+	struct shmem_xattr *xattr;
+	struct shmem_xattr *new_xattr = NULL;
+	size_t len;
+	int err = 0;
+
+	/* value == NULL means remove */
+	if (value) {
+		/* wrap around? */
+		len = sizeof(*new_xattr) + size;
+		if (len <= sizeof(*new_xattr))
+			return -ENOMEM;
+
+		new_xattr = kmalloc(len, GFP_KERNEL);
+		if (!new_xattr)
+			return -ENOMEM;
+
+		new_xattr->name = kstrdup(name, GFP_KERNEL);
+		if (!new_xattr->name) {
+			kfree(new_xattr);
+			return -ENOMEM;
+		}
+
+		new_xattr->size = size;
+		memcpy(new_xattr->value, value, size);
+	}
+
+	spin_lock(&info->lock);
+	list_for_each_entry(xattr, &info->xattr_list, list) {
+		if (!strcmp(name, xattr->name)) {
+			if (flags & XATTR_CREATE) {
+				xattr = new_xattr;
+				err = -EEXIST;
+			} else if (new_xattr) {
+				list_replace(&xattr->list, &new_xattr->list);
+			} else {
+				list_del(&xattr->list);
+			}
+			goto out;
+		}
+	}
+	if (flags & XATTR_REPLACE) {
+		xattr = new_xattr;
+		err = -ENODATA;
+	} else {
+		list_add(&new_xattr->list, &info->xattr_list);
+		xattr = NULL;
+	}
+out:
+	spin_unlock(&info->lock);
+	if (xattr)
+		kfree(xattr->name);
+	kfree(xattr);
+	return err;
+}
+
+
+static const struct xattr_handler *shmem_xattr_handlers[] = {
+#ifdef CONFIG_TMPFS_POSIX_ACL
+	&generic_acl_access_handler,
+	&generic_acl_default_handler,
+#endif
+	NULL
+};
+
+static int shmem_xattr_validate(const char *name)
+{
+	struct { const char *prefix; size_t len; } arr[] = {
+		{ XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN },
+		{ XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN }
+	};
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(arr); i++) {
+		size_t preflen = arr[i].len;
+		if (strncmp(name, arr[i].prefix, preflen) == 0) {
+			if (!name[preflen])
+				return -EINVAL;
+			return 0;
+		}
+	}
+	return -EOPNOTSUPP;
+}
+
+static ssize_t shmem_getxattr(struct dentry *dentry, const char *name,
+			      void *buffer, size_t size)
+{
+	int err;
+
+	/*
+	 * If this is a request for a synthetic attribute in the system.*
+	 * namespace use the generic infrastructure to resolve a handler
+	 * for it via sb->s_xattr.
+	 */
+	if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
+		return generic_getxattr(dentry, name, buffer, size);
+
+	err = shmem_xattr_validate(name);
+	if (err)
+		return err;
+
+	return shmem_xattr_get(dentry, name, buffer, size);
+}
+
+static int shmem_setxattr(struct dentry *dentry, const char *name,
+			  const void *value, size_t size, int flags)
+{
+	int err;
+
+	/*
+	 * If this is a request for a synthetic attribute in the system.*
+	 * namespace use the generic infrastructure to resolve a handler
+	 * for it via sb->s_xattr.
+	 */
+	if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
+		return generic_setxattr(dentry, name, value, size, flags);
+
+	err = shmem_xattr_validate(name);
+	if (err)
+		return err;
+
+	if (size == 0)
+		value = "";  /* empty EA, do not remove */
+
+	return shmem_xattr_set(dentry, name, value, size, flags);
+
+}
+
+static int shmem_removexattr(struct dentry *dentry, const char *name)
+{
+	int err;
+
+	/*
+	 * If this is a request for a synthetic attribute in the system.*
+	 * namespace use the generic infrastructure to resolve a handler
+	 * for it via sb->s_xattr.
+	 */
+	if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
+		return generic_removexattr(dentry, name);
+
+	err = shmem_xattr_validate(name);
+	if (err)
+		return err;
+
+	return shmem_xattr_set(dentry, name, NULL, 0, XATTR_REPLACE);
+}
+
+static bool xattr_is_trusted(const char *name)
+{
+	return !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN);
+}
+
+static ssize_t shmem_listxattr(struct dentry *dentry, char *buffer, size_t size)
+{
+	bool trusted = capable(CAP_SYS_ADMIN);
+	struct shmem_xattr *xattr;
+	struct shmem_inode_info *info;
+	size_t used = 0;
+
+	info = SHMEM_I(dentry->d_inode);
+
+	spin_lock(&info->lock);
+	list_for_each_entry(xattr, &info->xattr_list, list) {
+		size_t len;
+
+		/* skip "trusted." attributes for unprivileged callers */
+		if (!trusted && xattr_is_trusted(xattr->name))
+			continue;
+
+		len = strlen(xattr->name) + 1;
+		used += len;
+		if (buffer) {
+			if (size < used) {
+				used = -ERANGE;
+				break;
+			}
+			memcpy(buffer, xattr->name, len);
+			buffer += len;
+		}
+	}
+	spin_unlock(&info->lock);
+
+	return used;
+}
+#endif /* CONFIG_TMPFS_XATTR */
+
 static const struct inode_operations shmem_symlink_inline_operations = {
 	.readlink	= generic_readlink,
 	.follow_link	= shmem_follow_link_inline,
+#ifdef CONFIG_TMPFS_XATTR
+	.setxattr	= shmem_setxattr,
+	.getxattr	= shmem_getxattr,
+	.listxattr	= shmem_listxattr,
+	.removexattr	= shmem_removexattr,
+#endif
 };
 
 static const struct inode_operations shmem_symlink_inode_operations = {
 	.readlink	= generic_readlink,
 	.follow_link	= shmem_follow_link,
 	.put_link	= shmem_put_link,
-};
-
-#ifdef CONFIG_TMPFS_POSIX_ACL
-/*
- * Superblocks without xattr inode operations will get security.* xattr
- * support from the VFS "for free". As soon as we have any other xattrs
- * like ACLs, we also need to implement the security.* handlers at
- * filesystem level, though.
- */
-
-static size_t shmem_xattr_security_list(struct dentry *dentry, char *list,
-					size_t list_len, const char *name,
-					size_t name_len, int handler_flags)
-{
-	return security_inode_listsecurity(dentry->d_inode, list, list_len);
-}
-
-static int shmem_xattr_security_get(struct dentry *dentry, const char *name,
-		void *buffer, size_t size, int handler_flags)
-{
-	if (strcmp(name, "") == 0)
-		return -EINVAL;
-	return xattr_getsecurity(dentry->d_inode, name, buffer, size);
-}
-
-static int shmem_xattr_security_set(struct dentry *dentry, const char *name,
-		const void *value, size_t size, int flags, int handler_flags)
-{
-	if (strcmp(name, "") == 0)
-		return -EINVAL;
-	return security_inode_setsecurity(dentry->d_inode, name, value,
-					  size, flags);
-}
-
-static const struct xattr_handler shmem_xattr_security_handler = {
-	.prefix = XATTR_SECURITY_PREFIX,
-	.list   = shmem_xattr_security_list,
-	.get    = shmem_xattr_security_get,
-	.set    = shmem_xattr_security_set,
-};
-
-static const struct xattr_handler *shmem_xattr_handlers[] = {
-	&generic_acl_access_handler,
-	&generic_acl_default_handler,
-	&shmem_xattr_security_handler,
-	NULL
-};
+#ifdef CONFIG_TMPFS_XATTR
+	.setxattr	= shmem_setxattr,
+	.getxattr	= shmem_getxattr,
+	.listxattr	= shmem_listxattr,
+	.removexattr	= shmem_removexattr,
 #endif
+};
 
 static struct dentry *shmem_get_parent(struct dentry *child)
 {
@@ -2402,8 +2606,10 @@
 	sb->s_magic = TMPFS_MAGIC;
 	sb->s_op = &shmem_ops;
 	sb->s_time_gran = 1;
-#ifdef CONFIG_TMPFS_POSIX_ACL
+#ifdef CONFIG_TMPFS_XATTR
 	sb->s_xattr = shmem_xattr_handlers;
+#endif
+#ifdef CONFIG_TMPFS_POSIX_ACL
 	sb->s_flags |= MS_POSIXACL;
 #endif
 
@@ -2501,11 +2707,13 @@
 static const struct inode_operations shmem_inode_operations = {
 	.setattr	= shmem_notify_change,
 	.truncate_range	= shmem_truncate_range,
+#ifdef CONFIG_TMPFS_XATTR
+	.setxattr	= shmem_setxattr,
+	.getxattr	= shmem_getxattr,
+	.listxattr	= shmem_listxattr,
+	.removexattr	= shmem_removexattr,
+#endif
 #ifdef CONFIG_TMPFS_POSIX_ACL
-	.setxattr	= generic_setxattr,
-	.getxattr	= generic_getxattr,
-	.listxattr	= generic_listxattr,
-	.removexattr	= generic_removexattr,
 	.check_acl	= generic_check_acl,
 #endif
 
@@ -2523,23 +2731,27 @@
 	.mknod		= shmem_mknod,
 	.rename		= shmem_rename,
 #endif
+#ifdef CONFIG_TMPFS_XATTR
+	.setxattr	= shmem_setxattr,
+	.getxattr	= shmem_getxattr,
+	.listxattr	= shmem_listxattr,
+	.removexattr	= shmem_removexattr,
+#endif
 #ifdef CONFIG_TMPFS_POSIX_ACL
 	.setattr	= shmem_notify_change,
-	.setxattr	= generic_setxattr,
-	.getxattr	= generic_getxattr,
-	.listxattr	= generic_listxattr,
-	.removexattr	= generic_removexattr,
 	.check_acl	= generic_check_acl,
 #endif
 };
 
 static const struct inode_operations shmem_special_inode_operations = {
+#ifdef CONFIG_TMPFS_XATTR
+	.setxattr	= shmem_setxattr,
+	.getxattr	= shmem_getxattr,
+	.listxattr	= shmem_listxattr,
+	.removexattr	= shmem_removexattr,
+#endif
 #ifdef CONFIG_TMPFS_POSIX_ACL
 	.setattr	= shmem_notify_change,
-	.setxattr	= generic_setxattr,
-	.getxattr	= generic_getxattr,
-	.listxattr	= generic_listxattr,
-	.removexattr	= generic_removexattr,
 	.check_acl	= generic_check_acl,
 #endif
 };
diff --git a/mm/slub.c b/mm/slub.c
index 4ea7f1a..4aad32d 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1884,7 +1884,8 @@
 	deactivate_slab(s, c);
 	c->page = NULL;
 	c->node = NUMA_NO_NODE;
-	goto unlock_out;
+	local_irq_restore(flags);
+	return object;
 }
 
 /*
diff --git a/mm/swap.c b/mm/swap.c
index 5602f1a..3a442f1 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -272,14 +272,10 @@
 		memcg_reclaim_stat->recent_rotated[file]++;
 }
 
-/*
- * FIXME: speed this up?
- */
-void activate_page(struct page *page)
+static void __activate_page(struct page *page, void *arg)
 {
 	struct zone *zone = page_zone(page);
 
-	spin_lock_irq(&zone->lru_lock);
 	if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
 		int file = page_is_file_cache(page);
 		int lru = page_lru_base_type(page);
@@ -292,8 +288,45 @@
 
 		update_page_reclaim_stat(zone, page, file, 1);
 	}
+}
+
+#ifdef CONFIG_SMP
+static DEFINE_PER_CPU(struct pagevec, activate_page_pvecs);
+
+static void activate_page_drain(int cpu)
+{
+	struct pagevec *pvec = &per_cpu(activate_page_pvecs, cpu);
+
+	if (pagevec_count(pvec))
+		pagevec_lru_move_fn(pvec, __activate_page, NULL);
+}
+
+void activate_page(struct page *page)
+{
+	if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
+		struct pagevec *pvec = &get_cpu_var(activate_page_pvecs);
+
+		page_cache_get(page);
+		if (!pagevec_add(pvec, page))
+			pagevec_lru_move_fn(pvec, __activate_page, NULL);
+		put_cpu_var(activate_page_pvecs);
+	}
+}
+
+#else
+static inline void activate_page_drain(int cpu)
+{
+}
+
+void activate_page(struct page *page)
+{
+	struct zone *zone = page_zone(page);
+
+	spin_lock_irq(&zone->lru_lock);
+	__activate_page(page, NULL);
 	spin_unlock_irq(&zone->lru_lock);
 }
+#endif
 
 /*
  * Mark a page as having seen activity.
@@ -464,6 +497,8 @@
 	pvec = &per_cpu(lru_deactivate_pvecs, cpu);
 	if (pagevec_count(pvec))
 		pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL);
+
+	activate_page_drain(cpu);
 }
 
 /**
@@ -476,6 +511,13 @@
  */
 void deactivate_page(struct page *page)
 {
+	/*
+	 * In a workload with many unevictable page such as mprotect, unevictable
+	 * page deactivation for accelerating reclaim is pointless.
+	 */
+	if (PageUnevictable(page))
+		return;
+
 	if (likely(get_page_unless_zero(page))) {
 		struct pagevec *pvec = &get_cpu_var(lru_deactivate_pvecs);
 
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 8c6b3ce..d537d29 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -31,6 +31,7 @@
 #include <linux/syscalls.h>
 #include <linux/memcontrol.h>
 #include <linux/poll.h>
+#include <linux/oom.h>
 
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
@@ -1555,6 +1556,7 @@
 	struct address_space *mapping;
 	struct inode *inode;
 	char *pathname;
+	int oom_score_adj;
 	int i, type, prev;
 	int err;
 
@@ -1613,9 +1615,9 @@
 	p->flags &= ~SWP_WRITEOK;
 	spin_unlock(&swap_lock);
 
-	current->flags |= PF_OOM_ORIGIN;
+	oom_score_adj = test_set_oom_score_adj(OOM_SCORE_ADJ_MAX);
 	err = try_to_unuse(type);
-	current->flags &= ~PF_OOM_ORIGIN;
+	test_set_oom_score_adj(oom_score_adj);
 
 	if (err) {
 		/*
diff --git a/mm/util.c b/mm/util.c
index e7b103a..88ea1bd 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -6,6 +6,8 @@
 #include <linux/sched.h>
 #include <asm/uaccess.h>
 
+#include "internal.h"
+
 #define CREATE_TRACE_POINTS
 #include <trace/events/kmem.h>
 
@@ -215,6 +217,28 @@
 }
 EXPORT_SYMBOL(strndup_user);
 
+void __vma_link_list(struct mm_struct *mm, struct vm_area_struct *vma,
+		struct vm_area_struct *prev, struct rb_node *rb_parent)
+{
+	struct vm_area_struct *next;
+
+	vma->vm_prev = prev;
+	if (prev) {
+		next = prev->vm_next;
+		prev->vm_next = vma;
+	} else {
+		mm->mmap = vma;
+		if (rb_parent)
+			next = rb_entry(rb_parent,
+					struct vm_area_struct, vm_rb);
+		else
+			next = NULL;
+	}
+	vma->vm_next = next;
+	if (next)
+		next->vm_prev = vma;
+}
+
 #if defined(CONFIG_MMU) && !defined(HAVE_ARCH_PICK_MMAP_LAYOUT)
 void arch_pick_mmap_layout(struct mm_struct *mm)
 {
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 5d60302..b5ccf31 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -375,7 +375,7 @@
 	/* find starting point for our search */
 	if (free_vmap_cache) {
 		first = rb_entry(free_vmap_cache, struct vmap_area, rb_node);
-		addr = ALIGN(first->va_end + PAGE_SIZE, align);
+		addr = ALIGN(first->va_end, align);
 		if (addr < vstart)
 			goto nocache;
 		if (addr + size - 1 < addr)
@@ -406,10 +406,10 @@
 	}
 
 	/* from the starting point, walk areas until a suitable hole is found */
-	while (addr + size >= first->va_start && addr + size <= vend) {
+	while (addr + size > first->va_start && addr + size <= vend) {
 		if (addr + cached_hole_size < first->va_start)
 			cached_hole_size = first->va_start - addr;
-		addr = ALIGN(first->va_end + PAGE_SIZE, align);
+		addr = ALIGN(first->va_end, align);
 		if (addr + size - 1 < addr)
 			goto overflow;
 
@@ -1534,6 +1534,7 @@
 static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
 				 pgprot_t prot, int node, void *caller)
 {
+	const int order = 0;
 	struct page **pages;
 	unsigned int nr_pages, array_size, i;
 	gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO;
@@ -1560,11 +1561,12 @@
 
 	for (i = 0; i < area->nr_pages; i++) {
 		struct page *page;
+		gfp_t tmp_mask = gfp_mask | __GFP_NOWARN;
 
 		if (node < 0)
-			page = alloc_page(gfp_mask);
+			page = alloc_page(tmp_mask);
 		else
-			page = alloc_pages_node(node, gfp_mask, 0);
+			page = alloc_pages_node(node, tmp_mask, order);
 
 		if (unlikely(!page)) {
 			/* Successfully allocated i pages, free them in __vunmap() */
@@ -1579,6 +1581,9 @@
 	return area->addr;
 
 fail:
+	warn_alloc_failed(gfp_mask, order, "vmalloc: allocation failure, "
+			  "allocated %ld of %ld bytes\n",
+			  (area->nr_pages*PAGE_SIZE), area->size);
 	vfree(area->addr);
 	return NULL;
 }
diff --git a/mm/vmscan.c b/mm/vmscan.c
index c917720..7e01161 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -202,6 +202,14 @@
 }
 EXPORT_SYMBOL(unregister_shrinker);
 
+static inline int do_shrinker_shrink(struct shrinker *shrinker,
+				     struct shrink_control *sc,
+				     unsigned long nr_to_scan)
+{
+	sc->nr_to_scan = nr_to_scan;
+	return (*shrinker->shrink)(shrinker, sc);
+}
+
 #define SHRINK_BATCH 128
 /*
  * Call the shrink functions to age shrinkable caches
@@ -222,25 +230,29 @@
  *
  * Returns the number of slab objects which we shrunk.
  */
-unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask,
-			unsigned long lru_pages)
+unsigned long shrink_slab(struct shrink_control *shrink,
+			  unsigned long nr_pages_scanned,
+			  unsigned long lru_pages)
 {
 	struct shrinker *shrinker;
 	unsigned long ret = 0;
 
-	if (scanned == 0)
-		scanned = SWAP_CLUSTER_MAX;
+	if (nr_pages_scanned == 0)
+		nr_pages_scanned = SWAP_CLUSTER_MAX;
 
-	if (!down_read_trylock(&shrinker_rwsem))
-		return 1;	/* Assume we'll be able to shrink next time */
+	if (!down_read_trylock(&shrinker_rwsem)) {
+		/* Assume we'll be able to shrink next time */
+		ret = 1;
+		goto out;
+	}
 
 	list_for_each_entry(shrinker, &shrinker_list, list) {
 		unsigned long long delta;
 		unsigned long total_scan;
 		unsigned long max_pass;
 
-		max_pass = (*shrinker->shrink)(shrinker, 0, gfp_mask);
-		delta = (4 * scanned) / shrinker->seeks;
+		max_pass = do_shrinker_shrink(shrinker, shrink, 0);
+		delta = (4 * nr_pages_scanned) / shrinker->seeks;
 		delta *= max_pass;
 		do_div(delta, lru_pages + 1);
 		shrinker->nr += delta;
@@ -267,9 +279,9 @@
 			int shrink_ret;
 			int nr_before;
 
-			nr_before = (*shrinker->shrink)(shrinker, 0, gfp_mask);
-			shrink_ret = (*shrinker->shrink)(shrinker, this_scan,
-								gfp_mask);
+			nr_before = do_shrinker_shrink(shrinker, shrink, 0);
+			shrink_ret = do_shrinker_shrink(shrinker, shrink,
+							this_scan);
 			if (shrink_ret == -1)
 				break;
 			if (shrink_ret < nr_before)
@@ -283,6 +295,8 @@
 		shrinker->nr += total_scan;
 	}
 	up_read(&shrinker_rwsem);
+out:
+	cond_resched();
 	return ret;
 }
 
@@ -1202,13 +1216,16 @@
 {
 	int ret = -EBUSY;
 
+	VM_BUG_ON(!page_count(page));
+
 	if (PageLRU(page)) {
 		struct zone *zone = page_zone(page);
 
 		spin_lock_irq(&zone->lru_lock);
-		if (PageLRU(page) && get_page_unless_zero(page)) {
+		if (PageLRU(page)) {
 			int lru = page_lru(page);
 			ret = 0;
+			get_page(page);
 			ClearPageLRU(page);
 
 			del_page_from_lru_list(zone, page, lru);
@@ -2027,7 +2044,8 @@
  * 		else, the number of pages reclaimed
  */
 static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
-					struct scan_control *sc)
+					struct scan_control *sc,
+					struct shrink_control *shrink)
 {
 	int priority;
 	unsigned long total_scanned = 0;
@@ -2061,7 +2079,7 @@
 				lru_pages += zone_reclaimable_pages(zone);
 			}
 
-			shrink_slab(sc->nr_scanned, sc->gfp_mask, lru_pages);
+			shrink_slab(shrink, sc->nr_scanned, lru_pages);
 			if (reclaim_state) {
 				sc->nr_reclaimed += reclaim_state->reclaimed_slab;
 				reclaim_state->reclaimed_slab = 0;
@@ -2133,12 +2151,15 @@
 		.mem_cgroup = NULL,
 		.nodemask = nodemask,
 	};
+	struct shrink_control shrink = {
+		.gfp_mask = sc.gfp_mask,
+	};
 
 	trace_mm_vmscan_direct_reclaim_begin(order,
 				sc.may_writepage,
 				gfp_mask);
 
-	nr_reclaimed = do_try_to_free_pages(zonelist, &sc);
+	nr_reclaimed = do_try_to_free_pages(zonelist, &sc, &shrink);
 
 	trace_mm_vmscan_direct_reclaim_end(nr_reclaimed);
 
@@ -2198,17 +2219,20 @@
 		.order = 0,
 		.mem_cgroup = mem_cont,
 		.nodemask = NULL, /* we don't care the placement */
+		.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
+				(GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK),
+	};
+	struct shrink_control shrink = {
+		.gfp_mask = sc.gfp_mask,
 	};
 
-	sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
-			(GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
 	zonelist = NODE_DATA(numa_node_id())->node_zonelists;
 
 	trace_mm_vmscan_memcg_reclaim_begin(0,
 					    sc.may_writepage,
 					    sc.gfp_mask);
 
-	nr_reclaimed = do_try_to_free_pages(zonelist, &sc);
+	nr_reclaimed = do_try_to_free_pages(zonelist, &sc, &shrink);
 
 	trace_mm_vmscan_memcg_reclaim_end(nr_reclaimed);
 
@@ -2287,7 +2311,7 @@
 	 * must be balanced
 	 */
 	if (order)
-		return pgdat_balanced(pgdat, balanced, classzone_idx);
+		return !pgdat_balanced(pgdat, balanced, classzone_idx);
 	else
 		return !all_zones_ok;
 }
@@ -2336,6 +2360,9 @@
 		.order = order,
 		.mem_cgroup = NULL,
 	};
+	struct shrink_control shrink = {
+		.gfp_mask = sc.gfp_mask,
+	};
 loop_again:
 	total_scanned = 0;
 	sc.nr_reclaimed = 0;
@@ -2435,8 +2462,7 @@
 					end_zone, 0))
 				shrink_zone(priority, zone, &sc);
 			reclaim_state->reclaimed_slab = 0;
-			nr_slab = shrink_slab(sc.nr_scanned, GFP_KERNEL,
-						lru_pages);
+			nr_slab = shrink_slab(&shrink, sc.nr_scanned, lru_pages);
 			sc.nr_reclaimed += reclaim_state->reclaimed_slab;
 			total_scanned += sc.nr_scanned;
 
@@ -2788,7 +2814,10 @@
 		.swappiness = vm_swappiness,
 		.order = 0,
 	};
-	struct zonelist * zonelist = node_zonelist(numa_node_id(), sc.gfp_mask);
+	struct shrink_control shrink = {
+		.gfp_mask = sc.gfp_mask,
+	};
+	struct zonelist *zonelist = node_zonelist(numa_node_id(), sc.gfp_mask);
 	struct task_struct *p = current;
 	unsigned long nr_reclaimed;
 
@@ -2797,7 +2826,7 @@
 	reclaim_state.reclaimed_slab = 0;
 	p->reclaim_state = &reclaim_state;
 
-	nr_reclaimed = do_try_to_free_pages(zonelist, &sc);
+	nr_reclaimed = do_try_to_free_pages(zonelist, &sc, &shrink);
 
 	p->reclaim_state = NULL;
 	lockdep_clear_current_reclaim_state();
@@ -2972,6 +3001,9 @@
 		.swappiness = vm_swappiness,
 		.order = order,
 	};
+	struct shrink_control shrink = {
+		.gfp_mask = sc.gfp_mask,
+	};
 	unsigned long nr_slab_pages0, nr_slab_pages1;
 
 	cond_resched();
@@ -3013,7 +3045,7 @@
 			unsigned long lru_pages = zone_reclaimable_pages(zone);
 
 			/* No reclaimable slab or very low memory pressure */
-			if (!shrink_slab(sc.nr_scanned, gfp_mask, lru_pages))
+			if (!shrink_slab(&shrink, sc.nr_scanned, lru_pages))
 				break;
 
 			/* Freed enough memory */
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 897ea9e..20c18b7 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -157,7 +157,7 @@
 /*
  * Refresh the thresholds for each zone.
  */
-static void refresh_zone_stat_thresholds(void)
+void refresh_zone_stat_thresholds(void)
 {
 	struct zone *zone;
 	int cpu;
@@ -659,6 +659,138 @@
 }
 #endif
 
+#if defined(CONFIG_PROC_FS) || defined(CONFIG_SYSFS)
+#ifdef CONFIG_ZONE_DMA
+#define TEXT_FOR_DMA(xx) xx "_dma",
+#else
+#define TEXT_FOR_DMA(xx)
+#endif
+
+#ifdef CONFIG_ZONE_DMA32
+#define TEXT_FOR_DMA32(xx) xx "_dma32",
+#else
+#define TEXT_FOR_DMA32(xx)
+#endif
+
+#ifdef CONFIG_HIGHMEM
+#define TEXT_FOR_HIGHMEM(xx) xx "_high",
+#else
+#define TEXT_FOR_HIGHMEM(xx)
+#endif
+
+#define TEXTS_FOR_ZONES(xx) TEXT_FOR_DMA(xx) TEXT_FOR_DMA32(xx) xx "_normal", \
+					TEXT_FOR_HIGHMEM(xx) xx "_movable",
+
+const char * const vmstat_text[] = {
+	/* Zoned VM counters */
+	"nr_free_pages",
+	"nr_inactive_anon",
+	"nr_active_anon",
+	"nr_inactive_file",
+	"nr_active_file",
+	"nr_unevictable",
+	"nr_mlock",
+	"nr_anon_pages",
+	"nr_mapped",
+	"nr_file_pages",
+	"nr_dirty",
+	"nr_writeback",
+	"nr_slab_reclaimable",
+	"nr_slab_unreclaimable",
+	"nr_page_table_pages",
+	"nr_kernel_stack",
+	"nr_unstable",
+	"nr_bounce",
+	"nr_vmscan_write",
+	"nr_writeback_temp",
+	"nr_isolated_anon",
+	"nr_isolated_file",
+	"nr_shmem",
+	"nr_dirtied",
+	"nr_written",
+
+#ifdef CONFIG_NUMA
+	"numa_hit",
+	"numa_miss",
+	"numa_foreign",
+	"numa_interleave",
+	"numa_local",
+	"numa_other",
+#endif
+	"nr_anon_transparent_hugepages",
+	"nr_dirty_threshold",
+	"nr_dirty_background_threshold",
+
+#ifdef CONFIG_VM_EVENT_COUNTERS
+	"pgpgin",
+	"pgpgout",
+	"pswpin",
+	"pswpout",
+
+	TEXTS_FOR_ZONES("pgalloc")
+
+	"pgfree",
+	"pgactivate",
+	"pgdeactivate",
+
+	"pgfault",
+	"pgmajfault",
+
+	TEXTS_FOR_ZONES("pgrefill")
+	TEXTS_FOR_ZONES("pgsteal")
+	TEXTS_FOR_ZONES("pgscan_kswapd")
+	TEXTS_FOR_ZONES("pgscan_direct")
+
+#ifdef CONFIG_NUMA
+	"zone_reclaim_failed",
+#endif
+	"pginodesteal",
+	"slabs_scanned",
+	"kswapd_steal",
+	"kswapd_inodesteal",
+	"kswapd_low_wmark_hit_quickly",
+	"kswapd_high_wmark_hit_quickly",
+	"kswapd_skip_congestion_wait",
+	"pageoutrun",
+	"allocstall",
+
+	"pgrotated",
+
+#ifdef CONFIG_COMPACTION
+	"compact_blocks_moved",
+	"compact_pages_moved",
+	"compact_pagemigrate_failed",
+	"compact_stall",
+	"compact_fail",
+	"compact_success",
+#endif
+
+#ifdef CONFIG_HUGETLB_PAGE
+	"htlb_buddy_alloc_success",
+	"htlb_buddy_alloc_fail",
+#endif
+	"unevictable_pgs_culled",
+	"unevictable_pgs_scanned",
+	"unevictable_pgs_rescued",
+	"unevictable_pgs_mlocked",
+	"unevictable_pgs_munlocked",
+	"unevictable_pgs_cleared",
+	"unevictable_pgs_stranded",
+	"unevictable_pgs_mlockfreed",
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+	"thp_fault_alloc",
+	"thp_fault_fallback",
+	"thp_collapse_alloc",
+	"thp_collapse_alloc_failed",
+	"thp_split",
+#endif
+
+#endif /* CONFIG_VM_EVENTS_COUNTERS */
+};
+#endif /* CONFIG_PROC_FS || CONFIG_SYSFS */
+
+
 #ifdef CONFIG_PROC_FS
 static void frag_show_print(struct seq_file *m, pg_data_t *pgdat,
 						struct zone *zone)
@@ -831,135 +963,6 @@
 	.release	= seq_release,
 };
 
-#ifdef CONFIG_ZONE_DMA
-#define TEXT_FOR_DMA(xx) xx "_dma",
-#else
-#define TEXT_FOR_DMA(xx)
-#endif
-
-#ifdef CONFIG_ZONE_DMA32
-#define TEXT_FOR_DMA32(xx) xx "_dma32",
-#else
-#define TEXT_FOR_DMA32(xx)
-#endif
-
-#ifdef CONFIG_HIGHMEM
-#define TEXT_FOR_HIGHMEM(xx) xx "_high",
-#else
-#define TEXT_FOR_HIGHMEM(xx)
-#endif
-
-#define TEXTS_FOR_ZONES(xx) TEXT_FOR_DMA(xx) TEXT_FOR_DMA32(xx) xx "_normal", \
-					TEXT_FOR_HIGHMEM(xx) xx "_movable",
-
-static const char * const vmstat_text[] = {
-	/* Zoned VM counters */
-	"nr_free_pages",
-	"nr_inactive_anon",
-	"nr_active_anon",
-	"nr_inactive_file",
-	"nr_active_file",
-	"nr_unevictable",
-	"nr_mlock",
-	"nr_anon_pages",
-	"nr_mapped",
-	"nr_file_pages",
-	"nr_dirty",
-	"nr_writeback",
-	"nr_slab_reclaimable",
-	"nr_slab_unreclaimable",
-	"nr_page_table_pages",
-	"nr_kernel_stack",
-	"nr_unstable",
-	"nr_bounce",
-	"nr_vmscan_write",
-	"nr_writeback_temp",
-	"nr_isolated_anon",
-	"nr_isolated_file",
-	"nr_shmem",
-	"nr_dirtied",
-	"nr_written",
-
-#ifdef CONFIG_NUMA
-	"numa_hit",
-	"numa_miss",
-	"numa_foreign",
-	"numa_interleave",
-	"numa_local",
-	"numa_other",
-#endif
-	"nr_anon_transparent_hugepages",
-	"nr_dirty_threshold",
-	"nr_dirty_background_threshold",
-
-#ifdef CONFIG_VM_EVENT_COUNTERS
-	"pgpgin",
-	"pgpgout",
-	"pswpin",
-	"pswpout",
-
-	TEXTS_FOR_ZONES("pgalloc")
-
-	"pgfree",
-	"pgactivate",
-	"pgdeactivate",
-
-	"pgfault",
-	"pgmajfault",
-
-	TEXTS_FOR_ZONES("pgrefill")
-	TEXTS_FOR_ZONES("pgsteal")
-	TEXTS_FOR_ZONES("pgscan_kswapd")
-	TEXTS_FOR_ZONES("pgscan_direct")
-
-#ifdef CONFIG_NUMA
-	"zone_reclaim_failed",
-#endif
-	"pginodesteal",
-	"slabs_scanned",
-	"kswapd_steal",
-	"kswapd_inodesteal",
-	"kswapd_low_wmark_hit_quickly",
-	"kswapd_high_wmark_hit_quickly",
-	"kswapd_skip_congestion_wait",
-	"pageoutrun",
-	"allocstall",
-
-	"pgrotated",
-
-#ifdef CONFIG_COMPACTION
-	"compact_blocks_moved",
-	"compact_pages_moved",
-	"compact_pagemigrate_failed",
-	"compact_stall",
-	"compact_fail",
-	"compact_success",
-#endif
-
-#ifdef CONFIG_HUGETLB_PAGE
-	"htlb_buddy_alloc_success",
-	"htlb_buddy_alloc_fail",
-#endif
-	"unevictable_pgs_culled",
-	"unevictable_pgs_scanned",
-	"unevictable_pgs_rescued",
-	"unevictable_pgs_mlocked",
-	"unevictable_pgs_munlocked",
-	"unevictable_pgs_cleared",
-	"unevictable_pgs_stranded",
-	"unevictable_pgs_mlockfreed",
-
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-	"thp_fault_alloc",
-	"thp_fault_fallback",
-	"thp_collapse_alloc",
-	"thp_collapse_alloc_failed",
-	"thp_split",
-#endif
-
-#endif /* CONFIG_VM_EVENTS_COUNTERS */
-};
-
 static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
 							struct zone *zone)
 {
@@ -1198,7 +1201,6 @@
 #ifdef CONFIG_SMP
 	int cpu;
 
-	refresh_zone_stat_thresholds();
 	register_cpu_notifier(&vmstat_notifier);
 
 	for_each_online_cpu(cpu)
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index 67e3127..cd6e4aa 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -326,10 +326,12 @@
  * Run memory cache shrinker.
  */
 static int
-rpcauth_cache_shrinker(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
+rpcauth_cache_shrinker(struct shrinker *shrink, struct shrink_control *sc)
 {
 	LIST_HEAD(free);
 	int res;
+	int nr_to_scan = sc->nr_to_scan;
+	gfp_t gfp_mask = sc->gfp_mask;
 
 	if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL)
 		return (nr_to_scan == 0) ? 0 : -1;
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index d867081..8657f99 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -210,10 +210,10 @@
 
 our $logFunctions = qr{(?x:
 	printk|
-	pr_(debug|dbg|vdbg|devel|info|warning|err|notice|alert|crit|emerg|cont)|
-	(dev|netdev|netif)_(printk|dbg|vdbg|info|warn|err|notice|alert|crit|emerg|WARN)|
+	[a-z]+_(emerg|alert|crit|err|warning|warn|notice|info|debug|dbg|vdbg|devel|cont|WARN)|
 	WARN|
-	panic
+	panic|
+	MODULE_[A-Z_]+
 )};
 
 our @typeList = (
@@ -1462,7 +1462,7 @@
 #80 column limit
 		if ($line =~ /^\+/ && $prevrawline !~ /\/\*\*/ &&
 		    $rawline !~ /^.\s*\*\s*\@$Ident\s/ &&
-		    !($line =~ /^\+\s*$logFunctions\s*\(\s*(?:(KERN_\S+\s*|[^"]*))?"[X\t]*"\s*(?:,|\)\s*;)\s*$/ ||
+		    !($line =~ /^\+\s*$logFunctions\s*\(\s*(?:(KERN_\S+\s*|[^"]*))?"[X\t]*"\s*(?:|,|\)\s*;)\s*$/ ||
 		    $line =~ /^\+\s*"[^"]*"\s*(?:\s*|,|\)\s*;)\s*$/) &&
 		    $length > 80)
 		{
@@ -2748,6 +2748,11 @@
 			WARN("sizeof(& should be avoided\n" . $herecurr);
 		}
 
+# check for line continuations in quoted strings with odd counts of "
+		if ($rawline =~ /\\$/ && $rawline =~ tr/"/"/ % 2) {
+			WARN("Avoid line continuations in quoted strings\n" . $herecurr);
+		}
+
 # check for new externs in .c files.
 		if ($realfile =~ /\.c$/ && defined $stat &&
 		    $stat =~ /^.\s*(?:extern\s+)?$Type\s+($Ident)(\s*)\(/s)