Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jmorris/security-testing-2.6

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jmorris/security-testing-2.6: (44 commits)
  nommu: Provide mmap_min_addr definition.
  TOMOYO: Add description of lists and structures.
  TOMOYO: Remove unused field.
  integrity: ima audit dentry_open failure
  TOMOYO: Remove unused parameter.
  security: use mmap_min_addr indepedently of security models
  TOMOYO: Simplify policy reader.
  TOMOYO: Remove redundant markers.
  SELinux: define audit permissions for audit tree netlink messages
  TOMOYO: Remove unused mutex.
  tomoyo: avoid get+put of task_struct
  smack: Remove redundant initialization.
  integrity: nfsd imbalance bug fix
  rootplug: Remove redundant initialization.
  smack: do not beyond ARRAY_SIZE of data
  integrity: move ima_counts_get
  integrity: path_check update
  IMA: Add __init notation to ima functions
  IMA: Minimal IMA policy and boot param for TCB IMA policy
  selinux: remove obsolete read buffer limit from sel_read_bool
  ...
diff --git a/Documentation/DocBook/Makefile b/Documentation/DocBook/Makefile
index b1eb661..9632444 100644
--- a/Documentation/DocBook/Makefile
+++ b/Documentation/DocBook/Makefile
@@ -13,7 +13,8 @@
 	    gadget.xml libata.xml mtdnand.xml librs.xml rapidio.xml \
 	    genericirq.xml s390-drivers.xml uio-howto.xml scsi.xml \
 	    mac80211.xml debugobjects.xml sh.xml regulator.xml \
-	    alsa-driver-api.xml writing-an-alsa-driver.xml
+	    alsa-driver-api.xml writing-an-alsa-driver.xml \
+	    tracepoint.xml
 
 ###
 # The build process is as follows (targets):
diff --git a/Documentation/DocBook/tracepoint.tmpl b/Documentation/DocBook/tracepoint.tmpl
new file mode 100644
index 0000000..b0756d0
--- /dev/null
+++ b/Documentation/DocBook/tracepoint.tmpl
@@ -0,0 +1,89 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
+	"http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
+
+<book id="Tracepoints">
+ <bookinfo>
+  <title>The Linux Kernel Tracepoint API</title>
+
+  <authorgroup>
+   <author>
+    <firstname>Jason</firstname>
+    <surname>Baron</surname>
+    <affiliation>
+     <address>
+      <email>jbaron@redhat.com</email>
+     </address>
+    </affiliation>
+   </author>
+  </authorgroup>
+
+  <legalnotice>
+   <para>
+     This documentation is free software; you can redistribute
+     it and/or modify it under the terms of the GNU General Public
+     License as published by the Free Software Foundation; either
+     version 2 of the License, or (at your option) any later
+     version.
+   </para>
+
+   <para>
+     This program is distributed in the hope that it will be
+     useful, but WITHOUT ANY WARRANTY; without even the implied
+     warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+     See the GNU General Public License for more details.
+   </para>
+
+   <para>
+     You should have received a copy of the GNU General Public
+     License along with this program; if not, write to the Free
+     Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+     MA 02111-1307 USA
+   </para>
+
+   <para>
+     For more details see the file COPYING in the source
+     distribution of Linux.
+   </para>
+  </legalnotice>
+ </bookinfo>
+
+ <toc></toc>
+  <chapter id="intro">
+   <title>Introduction</title>
+   <para>
+     Tracepoints are static probe points that are located in strategic points
+     throughout the kernel. 'Probes' register/unregister with tracepoints
+     via a callback mechanism. The 'probes' are strictly typed functions that
+     are passed a unique set of parameters defined by each tracepoint.
+   </para>
+
+   <para>
+     From this simple callback mechanism, 'probes' can be used to profile, debug,
+     and understand kernel behavior. There are a number of tools that provide a
+     framework for using 'probes'. These tools include Systemtap, ftrace, and
+     LTTng.
+   </para>
+
+   <para>
+     Tracepoints are defined in a number of header files via various macros. Thus,
+     the purpose of this document is to provide a clear accounting of the available
+     tracepoints. The intention is to understand not only what tracepoints are
+     available but also to understand where future tracepoints might be added.
+   </para>
+
+   <para>
+     The API presented has functions of the form:
+     <function>trace_tracepointname(function parameters)</function>. These are the
+     tracepoints callbacks that are found throughout the code. Registering and
+     unregistering probes with these callback sites is covered in the
+     <filename>Documentation/trace/*</filename> directory.
+   </para>
+  </chapter>
+
+  <chapter id="irq">
+   <title>IRQ</title>
+!Iinclude/trace/events/irq.h
+  </chapter>
+
+</book>
diff --git a/Documentation/RCU/trace.txt b/Documentation/RCU/trace.txt
index 0688482..02cced1 100644
--- a/Documentation/RCU/trace.txt
+++ b/Documentation/RCU/trace.txt
@@ -192,23 +192,24 @@
 The output of "cat rcu/rcudata" looks as follows:
 
 rcu:
-  0 c=4011 g=4012 pq=1 pqc=4011 qp=0 rpfq=1 rp=3c2a dt=23301/73 dn=2 df=1882 of=0 ri=2126 ql=2 b=10
-  1 c=4011 g=4012 pq=1 pqc=4011 qp=0 rpfq=3 rp=39a6 dt=78073/1 dn=2 df=1402 of=0 ri=1875 ql=46 b=10
-  2 c=4010 g=4010 pq=1 pqc=4010 qp=0 rpfq=-5 rp=1d12 dt=16646/0 dn=2 df=3140 of=0 ri=2080 ql=0 b=10
-  3 c=4012 g=4013 pq=1 pqc=4012 qp=1 rpfq=3 rp=2b50 dt=21159/1 dn=2 df=2230 of=0 ri=1923 ql=72 b=10
-  4 c=4012 g=4013 pq=1 pqc=4012 qp=1 rpfq=3 rp=1644 dt=5783/1 dn=2 df=3348 of=0 ri=2805 ql=7 b=10
-  5 c=4012 g=4013 pq=0 pqc=4011 qp=1 rpfq=3 rp=1aac dt=5879/1 dn=2 df=3140 of=0 ri=2066 ql=10 b=10
-  6 c=4012 g=4013 pq=1 pqc=4012 qp=1 rpfq=3 rp=ed8 dt=5847/1 dn=2 df=3797 of=0 ri=1266 ql=10 b=10
-  7 c=4012 g=4013 pq=1 pqc=4012 qp=1 rpfq=3 rp=1fa2 dt=6199/1 dn=2 df=2795 of=0 ri=2162 ql=28 b=10
+rcu:
+  0 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=10951/1 dn=0 df=1101 of=0 ri=36 ql=0 b=10
+  1 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=16117/1 dn=0 df=1015 of=0 ri=0 ql=0 b=10
+  2 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=1445/1 dn=0 df=1839 of=0 ri=0 ql=0 b=10
+  3 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=6681/1 dn=0 df=1545 of=0 ri=0 ql=0 b=10
+  4 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=1003/1 dn=0 df=1992 of=0 ri=0 ql=0 b=10
+  5 c=17829 g=17830 pq=1 pqc=17829 qp=1 dt=3887/1 dn=0 df=3331 of=0 ri=4 ql=2 b=10
+  6 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=859/1 dn=0 df=3224 of=0 ri=0 ql=0 b=10
+  7 c=17829 g=17830 pq=0 pqc=17829 qp=1 dt=3761/1 dn=0 df=1818 of=0 ri=0 ql=2 b=10
 rcu_bh:
-  0 c=-268 g=-268 pq=1 pqc=-268 qp=0 rpfq=-145 rp=21d6 dt=23301/73 dn=2 df=0 of=0 ri=0 ql=0 b=10
-  1 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-170 rp=20ce dt=78073/1 dn=2 df=26 of=0 ri=5 ql=0 b=10
-  2 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-83 rp=fbd dt=16646/0 dn=2 df=28 of=0 ri=4 ql=0 b=10
-  3 c=-268 g=-268 pq=1 pqc=-268 qp=0 rpfq=-105 rp=178c dt=21159/1 dn=2 df=28 of=0 ri=2 ql=0 b=10
-  4 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-30 rp=b54 dt=5783/1 dn=2 df=32 of=0 ri=0 ql=0 b=10
-  5 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-29 rp=df5 dt=5879/1 dn=2 df=30 of=0 ri=3 ql=0 b=10
-  6 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-28 rp=788 dt=5847/1 dn=2 df=32 of=0 ri=0 ql=0 b=10
-  7 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-53 rp=1098 dt=6199/1 dn=2 df=30 of=0 ri=3 ql=0 b=10
+  0 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=10951/1 dn=0 df=0 of=0 ri=0 ql=0 b=10
+  1 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=16117/1 dn=0 df=13 of=0 ri=0 ql=0 b=10
+  2 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=1445/1 dn=0 df=15 of=0 ri=0 ql=0 b=10
+  3 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=6681/1 dn=0 df=9 of=0 ri=0 ql=0 b=10
+  4 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=1003/1 dn=0 df=15 of=0 ri=0 ql=0 b=10
+  5 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=3887/1 dn=0 df=15 of=0 ri=0 ql=0 b=10
+  6 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=859/1 dn=0 df=15 of=0 ri=0 ql=0 b=10
+  7 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=3761/1 dn=0 df=15 of=0 ri=0 ql=0 b=10
 
 The first section lists the rcu_data structures for rcu, the second for
 rcu_bh.  Each section has one line per CPU, or eight for this 8-CPU system.
@@ -253,12 +254,6 @@
 o	"qp" indicates that RCU still expects a quiescent state from
 	this CPU.
 
-o	"rpfq" is the number of rcu_pending() calls on this CPU required
-	to induce this CPU to invoke force_quiescent_state().
-
-o	"rp" is low-order four hex digits of the count of how many times
-	rcu_pending() has been invoked on this CPU.
-
 o	"dt" is the current value of the dyntick counter that is incremented
 	when entering or leaving dynticks idle state, either by the
 	scheduler or by irq.  The number after the "/" is the interrupt
@@ -305,6 +300,9 @@
 	of RCU callbacks is ready to invoke, then the remainder will
 	be deferred.
 
+There is also an rcu/rcudata.csv file with the same information in
+comma-separated-variable spreadsheet format.
+
 
 The output of "cat rcu/rcugp" looks as follows:
 
@@ -411,3 +409,63 @@
 		For example, the first entry at the lowest level shows
 		"^0", indicating that it corresponds to bit zero in
 		the first entry at the middle level.
+
+
+The output of "cat rcu/rcu_pending" looks as follows:
+
+rcu:
+  0 np=255892 qsp=53936 cbr=0 cng=14417 gpc=10033 gps=24320 nf=6445 nn=146741
+  1 np=261224 qsp=54638 cbr=0 cng=25723 gpc=16310 gps=2849 nf=5912 nn=155792
+  2 np=237496 qsp=49664 cbr=0 cng=2762 gpc=45478 gps=1762 nf=1201 nn=136629
+  3 np=236249 qsp=48766 cbr=0 cng=286 gpc=48049 gps=1218 nf=207 nn=137723
+  4 np=221310 qsp=46850 cbr=0 cng=26 gpc=43161 gps=4634 nf=3529 nn=123110
+  5 np=237332 qsp=48449 cbr=0 cng=54 gpc=47920 gps=3252 nf=201 nn=137456
+  6 np=219995 qsp=46718 cbr=0 cng=50 gpc=42098 gps=6093 nf=4202 nn=120834
+  7 np=249893 qsp=49390 cbr=0 cng=72 gpc=38400 gps=17102 nf=41 nn=144888
+rcu_bh:
+  0 np=146741 qsp=1419 cbr=0 cng=6 gpc=0 gps=0 nf=2 nn=145314
+  1 np=155792 qsp=12597 cbr=0 cng=0 gpc=4 gps=8 nf=3 nn=143180
+  2 np=136629 qsp=18680 cbr=0 cng=0 gpc=7 gps=6 nf=0 nn=117936
+  3 np=137723 qsp=2843 cbr=0 cng=0 gpc=10 gps=7 nf=0 nn=134863
+  4 np=123110 qsp=12433 cbr=0 cng=0 gpc=4 gps=2 nf=0 nn=110671
+  5 np=137456 qsp=4210 cbr=0 cng=0 gpc=6 gps=5 nf=0 nn=133235
+  6 np=120834 qsp=9902 cbr=0 cng=0 gpc=6 gps=3 nf=2 nn=110921
+  7 np=144888 qsp=26336 cbr=0 cng=0 gpc=8 gps=2 nf=0 nn=118542
+
+As always, this is once again split into "rcu" and "rcu_bh" portions.
+The fields are as follows:
+
+o	"np" is the number of times that __rcu_pending() has been invoked
+	for the corresponding flavor of RCU.
+
+o	"qsp" is the number of times that the RCU was waiting for a
+	quiescent state from this CPU.
+
+o	"cbr" is the number of times that this CPU had RCU callbacks
+	that had passed through a grace period, and were thus ready
+	to be invoked.
+
+o	"cng" is the number of times that this CPU needed another
+	grace period while RCU was idle.
+
+o	"gpc" is the number of times that an old grace period had
+	completed, but this CPU was not yet aware of it.
+
+o	"gps" is the number of times that a new grace period had started,
+	but this CPU was not yet aware of it.
+
+o	"nf" is the number of times that this CPU suspected that the
+	current grace period had run for too long, and thus needed to
+	be forced.
+
+	Please note that "forcing" consists of sending resched IPIs
+	to holdout CPUs.  If that CPU really still is in an old RCU
+	read-side critical section, then we really do have to wait for it.
+	The assumption behing "forcing" is that the CPU is not still in
+	an old RCU read-side critical section, but has not yet responded
+	for some other reason.
+
+o	"nn" is the number of times that this CPU needed nothing.  Alert
+	readers will note that the rcu "nn" number for a given CPU very
+	closely matches the rcu_bh "np" number for that same CPU.  This
+	is due to short-circuit evaluation in rcu_pending().
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index a5253f6..72d3bf0 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -56,7 +56,6 @@
 	ISAPNP	ISA PnP code is enabled.
 	ISDN	Appropriate ISDN support is enabled.
 	JOY	Appropriate joystick support is enabled.
-	KMEMTRACE kmemtrace is enabled.
 	LIBATA  Libata driver is enabled
 	LP	Printer support is enabled.
 	LOOP	Loopback device support is enabled.
@@ -754,12 +753,25 @@
 			ia64_pal_cache_flush instead of SAL_CACHE_FLUSH.
 
 	ftrace=[tracer]
-			[ftrace] will set and start the specified tracer
+			[FTRACE] will set and start the specified tracer
 			as early as possible in order to facilitate early
 			boot debugging.
 
 	ftrace_dump_on_oops
-			[ftrace] will dump the trace buffers on oops.
+			[FTRACE] will dump the trace buffers on oops.
+
+	ftrace_filter=[function-list]
+			[FTRACE] Limit the functions traced by the function
+			tracer at boot up. function-list is a comma separated
+			list of functions. This list can be changed at run
+			time by the set_ftrace_filter file in the debugfs
+			tracing directory. 
+
+	ftrace_notrace=[function-list]
+			[FTRACE] Do not trace the functions specified in
+			function-list. This list can be changed at run time
+			by the set_ftrace_notrace file in the debugfs
+			tracing directory.
 
 	gamecon.map[2|3]=
 			[HW,JOY] Multisystem joystick and NES/SNES/PSX pad
@@ -1062,15 +1074,6 @@
 			use the HighMem zone if it exists, and the Normal
 			zone if it does not.
 
-	kmemtrace.enable=	[KNL,KMEMTRACE] Format: { yes | no }
-				Controls whether kmemtrace is enabled
-				at boot-time.
-
-	kmemtrace.subbufs=n	[KNL,KMEMTRACE] Overrides the number of
-			subbufs kmemtrace's relay channel has. Set this
-			higher than default (KMEMTRACE_N_SUBBUFS in code) if
-			you experience buffer overruns.
-
 	kgdboc=		[HW] kgdb over consoles.
 			Requires a tty driver that supports console polling.
 			(only serial suported for now)
@@ -1671,6 +1674,14 @@
 	oprofile.timer=	[HW]
 			Use timer interrupt instead of performance counters
 
+	oprofile.cpu_type=	Force an oprofile cpu type
+			This might be useful if you have an older oprofile
+			userland or if you want common events.
+			Format: { archperfmon }
+			archperfmon: [X86] Force use of architectural
+				perfmon on Intel CPUs instead of the
+				CPU specific event set.
+
 	osst=		[HW,SCSI] SCSI Tape Driver
 			Format: <buffer_size>,<write_threshold>
 			See also Documentation/scsi/st.txt.
diff --git a/Documentation/trace/events.txt b/Documentation/trace/events.txt
new file mode 100644
index 0000000..f157d75
--- /dev/null
+++ b/Documentation/trace/events.txt
@@ -0,0 +1,90 @@
+			     Event Tracing
+
+		Documentation written by Theodore Ts'o
+			Updated by Li Zefan
+
+1. Introduction
+===============
+
+Tracepoints (see Documentation/trace/tracepoints.txt) can be used
+without creating custom kernel modules to register probe functions
+using the event tracing infrastructure.
+
+Not all tracepoints can be traced using the event tracing system;
+the kernel developer must provide code snippets which define how the
+tracing information is saved into the tracing buffer, and how the
+tracing information should be printed.
+
+2. Using Event Tracing
+======================
+
+2.1 Via the 'set_event' interface
+---------------------------------
+
+The events which are available for tracing can be found in the file
+/debug/tracing/available_events.
+
+To enable a particular event, such as 'sched_wakeup', simply echo it
+to /debug/tracing/set_event. For example:
+
+	# echo sched_wakeup >> /debug/tracing/set_event
+
+[ Note: '>>' is necessary, otherwise it will firstly disable
+  all the events. ]
+
+To disable an event, echo the event name to the set_event file prefixed
+with an exclamation point:
+
+	# echo '!sched_wakeup' >> /debug/tracing/set_event
+
+To disable all events, echo an empty line to the set_event file:
+
+	# echo > /debug/tracing/set_event
+
+To enable all events, echo '*:*' or '*:' to the set_event file:
+
+	# echo *:* > /debug/tracing/set_event
+
+The events are organized into subsystems, such as ext4, irq, sched,
+etc., and a full event name looks like this: <subsystem>:<event>.  The
+subsystem name is optional, but it is displayed in the available_events
+file.  All of the events in a subsystem can be specified via the syntax
+"<subsystem>:*"; for example, to enable all irq events, you can use the
+command:
+
+	# echo 'irq:*' > /debug/tracing/set_event
+
+2.2 Via the 'enable' toggle
+---------------------------
+
+The events available are also listed in /debug/tracing/events/ hierarchy
+of directories.
+
+To enable event 'sched_wakeup':
+
+	# echo 1 > /debug/tracing/events/sched/sched_wakeup/enable
+
+To disable it:
+
+	# echo 0 > /debug/tracing/events/sched/sched_wakeup/enable
+
+To enable all events in sched subsystem:
+
+	# echo 1 > /debug/tracing/events/sched/enable
+
+To eanble all events:
+
+	# echo 1 > /debug/tracing/events/enable
+
+When reading one of these enable files, there are four results:
+
+ 0 - all events this file affects are disabled
+ 1 - all events this file affects are enabled
+ X - there is a mixture of events enabled and disabled
+ ? - this file does not affect any event
+
+3. Defining an event-enabled tracepoint
+=======================================
+
+See The example provided in samples/trace_events
+
diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt
index e362f50..2a82d86 100644
--- a/Documentation/trace/ftrace.txt
+++ b/Documentation/trace/ftrace.txt
@@ -179,7 +179,7 @@
 
 	Function call tracer to trace all kernel functions.
 
-  "function_graph_tracer"
+  "function_graph"
 
 	Similar to the function tracer except that the
 	function tracer probes the functions on their entry
diff --git a/Documentation/trace/power.txt b/Documentation/trace/power.txt
new file mode 100644
index 0000000..cd805e1
--- /dev/null
+++ b/Documentation/trace/power.txt
@@ -0,0 +1,17 @@
+The power tracer collects detailed information about C-state and P-state
+transitions, instead of just looking at the high-level "average"
+information.
+
+There is a helper script found in scrips/tracing/power.pl in the kernel
+sources which can be used to parse this information and create a
+Scalable Vector Graphics (SVG) picture from the trace data.
+
+To use this tracer:
+
+	echo 0 > /sys/kernel/debug/tracing/tracing_enabled
+	echo power > /sys/kernel/debug/tracing/current_tracer
+	echo 1 > /sys/kernel/debug/tracing/tracing_enabled
+	sleep 1
+	echo 0 > /sys/kernel/debug/tracing/tracing_enabled
+	cat /sys/kernel/debug/tracing/trace | \
+		perl scripts/tracing/power.pl > out.sv
diff --git a/MAINTAINERS b/MAINTAINERS
index cf4abdd..84285b5 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -71,7 +71,7 @@
 M: Mail patches to
 L: Mailing list that is relevant to this area
 W: Web-page with status/info
-T: SCM tree type and location.  Type is one of: git, hg, quilt.
+T: SCM tree type and location.  Type is one of: git, hg, quilt, stgit.
 S: Status, one of the following:
 
 	Supported:	Someone is actually paid to look after this.
@@ -159,7 +159,8 @@
 8250/16?50 (AND CLONE UARTS) SERIAL DRIVER
 L:	linux-serial@vger.kernel.org
 W:	http://serial.sourceforge.net
-S:	Orphan
+M:	alan@lxorguk.ukuu.org.uk
+S:	Odd Fixes
 F:	drivers/serial/8250*
 F:	include/linux/serial_8250.h
 
@@ -5629,6 +5630,7 @@
 M:	alan@lxorguk.ukuu.org.uk
 L:	linux-kernel@vger.kernel.org
 S:	Maintained
+T:	stgit http://zeniv.linux.org.uk/~alan/ttydev/
 
 TULIP NETWORK DRIVERS
 P:	Grant Grundler
diff --git a/arch/arm/plat-mxc/include/mach/imx-uart.h b/arch/arm/plat-mxc/include/mach/imx-uart.h
index 599217b..f9bd17d 100644
--- a/arch/arm/plat-mxc/include/mach/imx-uart.h
+++ b/arch/arm/plat-mxc/include/mach/imx-uart.h
@@ -20,11 +20,16 @@
 #define ASMARM_ARCH_UART_H
 
 #define IMXUART_HAVE_RTSCTS (1<<0)
+#define IMXUART_IRDA        (1<<1)
 
 struct imxuart_platform_data {
 	int (*init)(struct platform_device *pdev);
 	int (*exit)(struct platform_device *pdev);
 	unsigned int flags;
+	void (*irda_enable)(int enable);
+	unsigned int irda_inv_rx:1;
+	unsigned int irda_inv_tx:1;
+	unsigned short transceiver_delay;
 };
 
 #endif
diff --git a/arch/frv/Kconfig b/arch/frv/Kconfig
index 9d1552a..8a5bd7a 100644
--- a/arch/frv/Kconfig
+++ b/arch/frv/Kconfig
@@ -6,6 +6,7 @@
 	bool
 	default y
 	select HAVE_IDE
+	select HAVE_ARCH_TRACEHOOK
 
 config ZONE_DMA
 	bool
diff --git a/arch/frv/include/asm/bitops.h b/arch/frv/include/asm/bitops.h
index 287f6f6..50ae91b 100644
--- a/arch/frv/include/asm/bitops.h
+++ b/arch/frv/include/asm/bitops.h
@@ -112,7 +112,7 @@
 #define atomic_clear_mask(mask, v)	atomic_test_and_ANDNOT_mask((mask), (v))
 #define atomic_set_mask(mask, v)	atomic_test_and_OR_mask((mask), (v))
 
-static inline int test_and_clear_bit(int nr, volatile void *addr)
+static inline int test_and_clear_bit(unsigned long nr, volatile void *addr)
 {
 	volatile unsigned long *ptr = addr;
 	unsigned long mask = 1UL << (nr & 31);
@@ -120,7 +120,7 @@
 	return (atomic_test_and_ANDNOT_mask(mask, ptr) & mask) != 0;
 }
 
-static inline int test_and_set_bit(int nr, volatile void *addr)
+static inline int test_and_set_bit(unsigned long nr, volatile void *addr)
 {
 	volatile unsigned long *ptr = addr;
 	unsigned long mask = 1UL << (nr & 31);
@@ -128,7 +128,7 @@
 	return (atomic_test_and_OR_mask(mask, ptr) & mask) != 0;
 }
 
-static inline int test_and_change_bit(int nr, volatile void *addr)
+static inline int test_and_change_bit(unsigned long nr, volatile void *addr)
 {
 	volatile unsigned long *ptr = addr;
 	unsigned long mask = 1UL << (nr & 31);
@@ -136,22 +136,22 @@
 	return (atomic_test_and_XOR_mask(mask, ptr) & mask) != 0;
 }
 
-static inline void clear_bit(int nr, volatile void *addr)
+static inline void clear_bit(unsigned long nr, volatile void *addr)
 {
 	test_and_clear_bit(nr, addr);
 }
 
-static inline void set_bit(int nr, volatile void *addr)
+static inline void set_bit(unsigned long nr, volatile void *addr)
 {
 	test_and_set_bit(nr, addr);
 }
 
-static inline void change_bit(int nr, volatile void * addr)
+static inline void change_bit(unsigned long nr, volatile void *addr)
 {
 	test_and_change_bit(nr, addr);
 }
 
-static inline void __clear_bit(int nr, volatile void * addr)
+static inline void __clear_bit(unsigned long nr, volatile void *addr)
 {
 	volatile unsigned long *a = addr;
 	int mask;
@@ -161,7 +161,7 @@
 	*a &= ~mask;
 }
 
-static inline void __set_bit(int nr, volatile void * addr)
+static inline void __set_bit(unsigned long nr, volatile void *addr)
 {
 	volatile unsigned long *a = addr;
 	int mask;
@@ -171,7 +171,7 @@
 	*a |= mask;
 }
 
-static inline void __change_bit(int nr, volatile void *addr)
+static inline void __change_bit(unsigned long nr, volatile void *addr)
 {
 	volatile unsigned long *a = addr;
 	int mask;
@@ -181,7 +181,7 @@
 	*a ^= mask;
 }
 
-static inline int __test_and_clear_bit(int nr, volatile void * addr)
+static inline int __test_and_clear_bit(unsigned long nr, volatile void *addr)
 {
 	volatile unsigned long *a = addr;
 	int mask, retval;
@@ -193,7 +193,7 @@
 	return retval;
 }
 
-static inline int __test_and_set_bit(int nr, volatile void * addr)
+static inline int __test_and_set_bit(unsigned long nr, volatile void *addr)
 {
 	volatile unsigned long *a = addr;
 	int mask, retval;
@@ -205,7 +205,7 @@
 	return retval;
 }
 
-static inline int __test_and_change_bit(int nr, volatile void * addr)
+static inline int __test_and_change_bit(unsigned long nr, volatile void *addr)
 {
 	volatile unsigned long *a = addr;
 	int mask, retval;
@@ -220,12 +220,13 @@
 /*
  * This routine doesn't need to be atomic.
  */
-static inline int __constant_test_bit(int nr, const volatile void * addr)
+static inline int
+__constant_test_bit(unsigned long nr, const volatile void *addr)
 {
 	return ((1UL << (nr & 31)) & (((const volatile unsigned int *) addr)[nr >> 5])) != 0;
 }
 
-static inline int __test_bit(int nr, const volatile void * addr)
+static inline int __test_bit(unsigned long nr, const volatile void *addr)
 {
 	int 	* a = (int *) addr;
 	int	mask;
diff --git a/arch/frv/include/asm/elf.h b/arch/frv/include/asm/elf.h
index 7279ec07d..7bbf6e4 100644
--- a/arch/frv/include/asm/elf.h
+++ b/arch/frv/include/asm/elf.h
@@ -116,6 +116,7 @@
 } while(0)
 
 #define USE_ELF_CORE_DUMP
+#define CORE_DUMP_USE_REGSET
 #define ELF_FDPIC_CORE_EFLAGS	EF_FRV_FDPIC
 #define ELF_EXEC_PAGESIZE	16384
 
diff --git a/arch/frv/include/asm/pci.h b/arch/frv/include/asm/pci.h
index 585d9b4..cc685e6 100644
--- a/arch/frv/include/asm/pci.h
+++ b/arch/frv/include/asm/pci.h
@@ -87,8 +87,7 @@
 				       dma_addr_t dma_handle,
 				       size_t size, int direction)
 {
-	if (direction == PCI_DMA_NONE)
-                BUG();
+	BUG_ON(direction == PCI_DMA_NONE);
 
 	frv_cache_wback_inv((unsigned long)bus_to_virt(dma_handle),
 			    (unsigned long)bus_to_virt(dma_handle) + size);
@@ -105,9 +104,7 @@
 				   int nelems, int direction)
 {
 	int i;
-
-	if (direction == PCI_DMA_NONE)
-                BUG();
+	BUG_ON(direction == PCI_DMA_NONE);
 
 	for (i = 0; i < nelems; i++)
 		frv_cache_wback_inv(sg_dma_address(&sg[i]),
diff --git a/arch/frv/include/asm/ptrace.h b/arch/frv/include/asm/ptrace.h
index cf69340..a54b535 100644
--- a/arch/frv/include/asm/ptrace.h
+++ b/arch/frv/include/asm/ptrace.h
@@ -65,6 +65,8 @@
 #ifdef __KERNEL__
 #ifndef __ASSEMBLY__
 
+struct task_struct;
+
 /*
  * we dedicate GR28 to keeping a pointer to the current exception frame
  * - gr28 is destroyed on entry to the kernel from userspace
@@ -73,11 +75,18 @@
 
 #define user_mode(regs)			(!((regs)->psr & PSR_S))
 #define instruction_pointer(regs)	((regs)->pc)
+#define user_stack_pointer(regs)	((regs)->sp)
 
 extern unsigned long user_stack(const struct pt_regs *);
 extern void show_regs(struct pt_regs *);
 #define profile_pc(regs) ((regs)->pc)
-#endif
+
+#define task_pt_regs(task) ((task)->thread.frame0)
+
+#define arch_has_single_step()	(1)
+extern void user_enable_single_step(struct task_struct *);
+extern void user_disable_single_step(struct task_struct *);
 
 #endif /* !__ASSEMBLY__ */
+#endif /* __KERNEL__ */
 #endif /* _ASM_PTRACE_H */
diff --git a/arch/frv/include/asm/syscall.h b/arch/frv/include/asm/syscall.h
new file mode 100644
index 0000000..70689eb
--- /dev/null
+++ b/arch/frv/include/asm/syscall.h
@@ -0,0 +1,123 @@
+/* syscall parameter access functions
+ *
+ * Copyright (C) 2009 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#ifndef _ASM_SYSCALL_H
+#define _ASM_SYSCALL_H
+
+#include <linux/err.h>
+#include <asm/ptrace.h>
+
+/*
+ * Get the system call number or -1
+ */
+static inline long syscall_get_nr(struct task_struct *task,
+				  struct pt_regs *regs)
+{
+	return regs->syscallno;
+}
+
+/*
+ * Restore the clobbered GR8 register
+ * (1st syscall arg was overwritten with syscall return or error)
+ */
+static inline void syscall_rollback(struct task_struct *task,
+				    struct pt_regs *regs)
+{
+	regs->gr8 = regs->orig_gr8;
+}
+
+/*
+ * See if the syscall return value is an error, returning it if it is and 0 if
+ * not
+ */
+static inline long syscall_get_error(struct task_struct *task,
+				     struct pt_regs *regs)
+{
+	return IS_ERR_VALUE(regs->gr8) ? regs->gr8 : 0;
+}
+
+/*
+ * Get the syscall return value
+ */
+static inline long syscall_get_return_value(struct task_struct *task,
+					    struct pt_regs *regs)
+{
+	return regs->gr8;
+}
+
+/*
+ * Set the syscall return value
+ */
+static inline void syscall_set_return_value(struct task_struct *task,
+					    struct pt_regs *regs,
+					    int error, long val)
+{
+	if (error)
+		regs->gr8 = -error;
+	else
+		regs->gr8 = val;
+}
+
+/*
+ * Retrieve the system call arguments
+ */
+static inline void syscall_get_arguments(struct task_struct *task,
+					 struct pt_regs *regs,
+					 unsigned int i, unsigned int n,
+					 unsigned long *args)
+{
+	/*
+	 * Do this simply for now. If we need to start supporting
+	 * fetching arguments from arbitrary indices, this will need some
+	 * extra logic. Presently there are no in-tree users that depend
+	 * on this behaviour.
+	 */
+	BUG_ON(i);
+
+	/* Argument pattern is: GR8, GR9, GR10, GR11, GR12, GR13 */
+	switch (n) {
+	case 6: args[5] = regs->gr13;
+	case 5: args[4] = regs->gr12;
+	case 4: args[3] = regs->gr11;
+	case 3: args[2] = regs->gr10;
+	case 2: args[1] = regs->gr9;
+	case 1:	args[0] = regs->gr8;
+		break;
+	default:
+		BUG();
+	}
+}
+
+/*
+ * Alter the system call arguments
+ */
+static inline void syscall_set_arguments(struct task_struct *task,
+					 struct pt_regs *regs,
+					 unsigned int i, unsigned int n,
+					 const unsigned long *args)
+{
+	/* Same note as above applies */
+	BUG_ON(i);
+
+	switch (n) {
+	case 6: regs->gr13 = args[5];
+	case 5: regs->gr12 = args[4];
+	case 4: regs->gr11 = args[3];
+	case 3: regs->gr10 = args[2];
+	case 2: regs->gr9  = args[1];
+	case 1: regs->gr8  = args[0];
+		break;
+	default:
+		BUG();
+	}
+}
+
+#endif /* _ASM_SYSCALL_H */
diff --git a/arch/frv/include/asm/thread_info.h b/arch/frv/include/asm/thread_info.h
index bb53ab7..e8a5ed7 100644
--- a/arch/frv/include/asm/thread_info.h
+++ b/arch/frv/include/asm/thread_info.h
@@ -109,20 +109,20 @@
  * - other flags in MSW
  */
 #define TIF_SYSCALL_TRACE	0	/* syscall trace active */
-#define TIF_SIGPENDING		1	/* signal pending */
-#define TIF_NEED_RESCHED	2	/* rescheduling necessary */
-#define TIF_SINGLESTEP		3	/* restore singlestep on return to user mode */
-#define TIF_IRET		4	/* return with iret */
+#define TIF_NOTIFY_RESUME	1	/* callback before returning to user */
+#define TIF_SIGPENDING		2	/* signal pending */
+#define TIF_NEED_RESCHED	3	/* rescheduling necessary */
+#define TIF_SINGLESTEP		4	/* restore singlestep on return to user mode */
 #define TIF_RESTORE_SIGMASK	5	/* restore signal mask in do_signal() */
 #define TIF_POLLING_NRFLAG	16	/* true if poll_idle() is polling TIF_NEED_RESCHED */
 #define TIF_MEMDIE		17	/* OOM killer killed process */
 #define TIF_FREEZE		18	/* freezing for suspend */
 
 #define _TIF_SYSCALL_TRACE	(1 << TIF_SYSCALL_TRACE)
+#define _TIF_NOTIFY_RESUME	(1 << TIF_NOTIFY_RESUME)
 #define _TIF_SIGPENDING		(1 << TIF_SIGPENDING)
 #define _TIF_NEED_RESCHED	(1 << TIF_NEED_RESCHED)
 #define _TIF_SINGLESTEP		(1 << TIF_SINGLESTEP)
-#define _TIF_IRET		(1 << TIF_IRET)
 #define _TIF_RESTORE_SIGMASK	(1 << TIF_RESTORE_SIGMASK)
 #define _TIF_POLLING_NRFLAG	(1 << TIF_POLLING_NRFLAG)
 #define _TIF_FREEZE		(1 << TIF_FREEZE)
diff --git a/arch/frv/kernel/entry.S b/arch/frv/kernel/entry.S
index 1da523b..356e0e3 100644
--- a/arch/frv/kernel/entry.S
+++ b/arch/frv/kernel/entry.S
@@ -886,7 +886,6 @@
 	bnc		icc0,#0,__syscall_badsys
 
 	ldi		@(gr15,#TI_FLAGS),gr4
-	ori		gr4,#_TIF_SYSCALL_TRACE,gr4
 	andicc		gr4,#_TIF_SYSCALL_TRACE,gr0,icc0
 	bne		icc0,#0,__syscall_trace_entry
 
@@ -1150,11 +1149,10 @@
 	# perform syscall entry tracing
 __syscall_trace_entry:
 	LEDS		0x6320
-	setlos.p	#0,gr8
-	call		do_syscall_trace
+	call		syscall_trace_entry
 
-	ldi		@(gr28,#REG_SYSCALLNO),gr7
-	lddi		@(gr28,#REG_GR(8)) ,gr8
+	lddi.p		@(gr28,#REG_GR(8)) ,gr8
+	ori		gr8,#0,gr7		; syscall_trace_entry() returned new syscallno
 	lddi		@(gr28,#REG_GR(10)),gr10
 	lddi.p		@(gr28,#REG_GR(12)),gr12
 
@@ -1169,11 +1167,10 @@
 	beq		icc0,#1,__entry_work_pending
 
 	movsg		psr,gr23
-	andi		gr23,#~PSR_PIL,gr23	; could let do_syscall_trace() call schedule()
+	andi		gr23,#~PSR_PIL,gr23	; could let syscall_trace_exit() call schedule()
 	movgs		gr23,psr
 
-	setlos.p	#1,gr8
-	call		do_syscall_trace
+	call		syscall_trace_exit
 	bra		__entry_resume_userspace
 
 __syscall_badsys:
diff --git a/arch/frv/kernel/ptrace.c b/arch/frv/kernel/ptrace.c
index 5e7d401..60eeed3 100644
--- a/arch/frv/kernel/ptrace.c
+++ b/arch/frv/kernel/ptrace.c
@@ -19,6 +19,9 @@
 #include <linux/user.h>
 #include <linux/security.h>
 #include <linux/signal.h>
+#include <linux/regset.h>
+#include <linux/elf.h>
+#include <linux/tracehook.h>
 
 #include <asm/uaccess.h>
 #include <asm/page.h>
@@ -33,6 +36,169 @@
  */
 
 /*
+ * retrieve the contents of FRV userspace general registers
+ */
+static int genregs_get(struct task_struct *target,
+		       const struct user_regset *regset,
+		       unsigned int pos, unsigned int count,
+		       void *kbuf, void __user *ubuf)
+{
+	const struct user_int_regs *iregs = &target->thread.user->i;
+	int ret;
+
+	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+				  iregs, 0, sizeof(*iregs));
+	if (ret < 0)
+		return ret;
+
+	return user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
+					sizeof(*iregs), -1);
+}
+
+/*
+ * update the contents of the FRV userspace general registers
+ */
+static int genregs_set(struct task_struct *target,
+		       const struct user_regset *regset,
+		       unsigned int pos, unsigned int count,
+		       const void *kbuf, const void __user *ubuf)
+{
+	struct user_int_regs *iregs = &target->thread.user->i;
+	unsigned int offs_gr0, offs_gr1;
+	int ret;
+
+	/* not allowed to set PSR or __status */
+	if (pos < offsetof(struct user_int_regs, psr) + sizeof(long) &&
+	    pos + count > offsetof(struct user_int_regs, psr))
+		return -EIO;
+
+	if (pos < offsetof(struct user_int_regs, __status) + sizeof(long) &&
+	    pos + count > offsetof(struct user_int_regs, __status))
+		return -EIO;
+
+	/* set the control regs */
+	offs_gr0 = offsetof(struct user_int_regs, gr[0]);
+	offs_gr1 = offsetof(struct user_int_regs, gr[1]);
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				 iregs, 0, offs_gr0);
+	if (ret < 0)
+		return ret;
+
+	/* skip GR0/TBR */
+	ret = user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
+					offs_gr0, offs_gr1);
+	if (ret < 0)
+		return ret;
+
+	/* set the general regs */
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				 &iregs->gr[1], offs_gr1, sizeof(*iregs));
+	if (ret < 0)
+		return ret;
+
+	return user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
+					sizeof(*iregs), -1);
+}
+
+/*
+ * retrieve the contents of FRV userspace FP/Media registers
+ */
+static int fpmregs_get(struct task_struct *target,
+		       const struct user_regset *regset,
+		       unsigned int pos, unsigned int count,
+		       void *kbuf, void __user *ubuf)
+{
+	const struct user_fpmedia_regs *fpregs = &target->thread.user->f;
+	int ret;
+
+	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+				  fpregs, 0, sizeof(*fpregs));
+	if (ret < 0)
+		return ret;
+
+	return user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
+					sizeof(*fpregs), -1);
+}
+
+/*
+ * update the contents of the FRV userspace FP/Media registers
+ */
+static int fpmregs_set(struct task_struct *target,
+		       const struct user_regset *regset,
+		       unsigned int pos, unsigned int count,
+		       const void *kbuf, const void __user *ubuf)
+{
+	struct user_fpmedia_regs *fpregs = &target->thread.user->f;
+	int ret;
+
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				 fpregs, 0, sizeof(*fpregs));
+	if (ret < 0)
+		return ret;
+
+	return user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
+					sizeof(*fpregs), -1);
+}
+
+/*
+ * determine if the FP/Media registers have actually been used
+ */
+static int fpmregs_active(struct task_struct *target,
+			  const struct user_regset *regset)
+{
+	return tsk_used_math(target) ? regset->n : 0;
+}
+
+/*
+ * Define the register sets available on the FRV under Linux
+ */
+enum frv_regset {
+	REGSET_GENERAL,
+	REGSET_FPMEDIA,
+};
+
+static const struct user_regset frv_regsets[] = {
+	/*
+	 * General register format is:
+	 *	PSR, ISR, CCR, CCCR, LR, LCR, PC, (STATUS), SYSCALLNO, ORIG_G8
+	 *	GNER0-1, IACC0, TBR, GR1-63
+	 */
+	[REGSET_GENERAL] = {
+		.core_note_type	= NT_PRSTATUS,
+		.n		= ELF_NGREG,
+		.size		= sizeof(long),
+		.align		= sizeof(long),
+		.get		= genregs_get,
+		.set		= genregs_set,
+	},
+	/*
+	 * FPU/Media register format is:
+	 *	FR0-63, FNER0-1, MSR0-1, ACC0-7, ACCG0-8, FSR
+	 */
+	[REGSET_FPMEDIA] = {
+		.core_note_type	= NT_PRFPREG,
+		.n		= sizeof(struct user_fpmedia_regs) / sizeof(long),
+		.size		= sizeof(long),
+		.align		= sizeof(long),
+		.get		= fpmregs_get,
+		.set		= fpmregs_set,
+		.active		= fpmregs_active,
+	},
+};
+
+static const struct user_regset_view user_frv_native_view = {
+	.name		= "frv",
+	.e_machine	= EM_FRV,
+	.regsets	= frv_regsets,
+	.n		= ARRAY_SIZE(frv_regsets),
+};
+
+const struct user_regset_view *task_user_regset_view(struct task_struct *task)
+{
+	return &user_frv_native_view;
+}
+
+/*
  * Get contents of register REGNO in task TASK.
  */
 static inline long get_reg(struct task_struct *task, int regno)
@@ -69,40 +235,23 @@
 }
 
 /*
- * check that an address falls within the bounds of the target process's memory
- * mappings
- */
-static inline int is_user_addr_valid(struct task_struct *child,
-				     unsigned long start, unsigned long len)
-{
-#ifdef CONFIG_MMU
-	if (start >= PAGE_OFFSET || len > PAGE_OFFSET - start)
-		return -EIO;
-	return 0;
-#else
-	struct vm_area_struct *vma;
-
-	vma = find_vma(child->mm, start);
-	if (vma && start >= vma->vm_start && start + len <= vma->vm_end)
-		return 0;
-
-	return -EIO;
-#endif
-}
-
-/*
  * Called by kernel/ptrace.c when detaching..
  *
  * Control h/w single stepping
  */
-void ptrace_disable(struct task_struct *child)
+void user_enable_single_step(struct task_struct *child)
+{
+	child->thread.frame0->__status |= REG__STATUS_STEP;
+}
+
+void user_disable_single_step(struct task_struct *child)
 {
 	child->thread.frame0->__status &= ~REG__STATUS_STEP;
 }
 
-void ptrace_enable(struct task_struct *child)
+void ptrace_disable(struct task_struct *child)
 {
-	child->thread.frame0->__status |= REG__STATUS_STEP;
+	user_disable_single_step(child);
 }
 
 long arch_ptrace(struct task_struct *child, long request, long addr, long data)
@@ -111,15 +260,6 @@
 	int ret;
 
 	switch (request) {
-		/* when I and D space are separate, these will need to be fixed. */
-	case PTRACE_PEEKTEXT: /* read word at location addr. */
-	case PTRACE_PEEKDATA:
-		ret = -EIO;
-		if (is_user_addr_valid(child, addr, sizeof(tmp)) < 0)
-			break;
-		ret = generic_ptrace_peekdata(child, addr, data);
-		break;
-
 		/* read the word at location addr in the USER area. */
 	case PTRACE_PEEKUSR: {
 		tmp = 0;
@@ -163,15 +303,6 @@
 		break;
 	}
 
-		/* when I and D space are separate, this will have to be fixed. */
-	case PTRACE_POKETEXT: /* write the word at location addr. */
-	case PTRACE_POKEDATA:
-		ret = -EIO;
-		if (is_user_addr_valid(child, addr, sizeof(tmp)) < 0)
-			break;
-		ret = generic_ptrace_pokedata(child, addr, data);
-		break;
-
 	case PTRACE_POKEUSR: /* write the word at location addr in the USER area */
 		ret = -EIO;
 		if ((addr & 3) || addr < 0)
@@ -179,7 +310,7 @@
 
 		ret = 0;
 		switch (addr >> 2) {
-		case 0 ... PT__END-1:
+		case 0 ... PT__END - 1:
 			ret = put_reg(child, addr >> 2, data);
 			break;
 
@@ -189,95 +320,29 @@
 		}
 		break;
 
-	case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */
-	case PTRACE_CONT: /* restart after signal. */
-		ret = -EIO;
-		if (!valid_signal(data))
-			break;
-		if (request == PTRACE_SYSCALL)
-			set_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
-		else
-			clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
-		child->exit_code = data;
-		ptrace_disable(child);
-		wake_up_process(child);
-		ret = 0;
-		break;
+	case PTRACE_GETREGS:	/* Get all integer regs from the child. */
+		return copy_regset_to_user(child, &user_frv_native_view,
+					   REGSET_GENERAL,
+					   0, sizeof(child->thread.user->i),
+					   (void __user *)data);
 
-		/* make the child exit.  Best I can do is send it a sigkill.
-		 * perhaps it should be put in the status that it wants to
-		 * exit.
-		 */
-	case PTRACE_KILL:
-		ret = 0;
-		if (child->exit_state == EXIT_ZOMBIE)	/* already dead */
-			break;
-		child->exit_code = SIGKILL;
-		clear_tsk_thread_flag(child, TIF_SINGLESTEP);
-		ptrace_disable(child);
-		wake_up_process(child);
-		break;
+	case PTRACE_SETREGS:	/* Set all integer regs in the child. */
+		return copy_regset_from_user(child, &user_frv_native_view,
+					     REGSET_GENERAL,
+					     0, sizeof(child->thread.user->i),
+					     (const void __user *)data);
 
-	case PTRACE_SINGLESTEP:  /* set the trap flag. */
-		ret = -EIO;
-		if (!valid_signal(data))
-			break;
-		clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
-		ptrace_enable(child);
-		child->exit_code = data;
-		wake_up_process(child);
-		ret = 0;
-		break;
+	case PTRACE_GETFPREGS:	/* Get the child FP/Media state. */
+		return copy_regset_to_user(child, &user_frv_native_view,
+					   REGSET_FPMEDIA,
+					   0, sizeof(child->thread.user->f),
+					   (void __user *)data);
 
-	case PTRACE_DETACH:	/* detach a process that was attached. */
-		ret = ptrace_detach(child, data);
-		break;
-
-	case PTRACE_GETREGS: { /* Get all integer regs from the child. */
-		int i;
-		for (i = 0; i < PT__GPEND; i++) {
-			tmp = get_reg(child, i);
-			if (put_user(tmp, (unsigned long *) data)) {
-				ret = -EFAULT;
-				break;
-			}
-			data += sizeof(long);
-		}
-		ret = 0;
-		break;
-	}
-
-	case PTRACE_SETREGS: { /* Set all integer regs in the child. */
-		int i;
-		for (i = 0; i < PT__GPEND; i++) {
-			if (get_user(tmp, (unsigned long *) data)) {
-				ret = -EFAULT;
-				break;
-			}
-			put_reg(child, i, tmp);
-			data += sizeof(long);
-		}
-		ret = 0;
-		break;
-	}
-
-	case PTRACE_GETFPREGS: { /* Get the child FP/Media state. */
-		ret = 0;
-		if (copy_to_user((void *) data,
-				 &child->thread.user->f,
-				 sizeof(child->thread.user->f)))
-			ret = -EFAULT;
-		break;
-	}
-
-	case PTRACE_SETFPREGS: { /* Set the child FP/Media state. */
-		ret = 0;
-		if (copy_from_user(&child->thread.user->f,
-				   (void *) data,
-				   sizeof(child->thread.user->f)))
-			ret = -EFAULT;
-		break;
-	}
+	case PTRACE_SETFPREGS:	/* Set the child FP/Media state. */
+		return copy_regset_from_user(child, &user_frv_native_view,
+					     REGSET_FPMEDIA,
+					     0, sizeof(child->thread.user->f),
+					     (const void __user *)data);
 
 	case PTRACE_GETFDPIC:
 		tmp = 0;
@@ -300,414 +365,36 @@
 		break;
 
 	default:
-		ret = -EIO;
+		ret = ptrace_request(child, request, addr, data);
 		break;
 	}
 	return ret;
 }
 
-int __nongprelbss kstrace;
-
-static const struct {
-	const char	*name;
-	unsigned	argmask;
-} __syscall_name_table[NR_syscalls] = {
-	[0]	= { "restart_syscall"			},
-	[1]	= { "exit",		0x000001	},
-	[2]	= { "fork",		0xffffff	},
-	[3]	= { "read",		0x000141	},
-	[4]	= { "write",		0x000141	},
-	[5]	= { "open",		0x000235	},
-	[6]	= { "close",		0x000001	},
-	[7]	= { "waitpid",		0x000141	},
-	[8]	= { "creat",		0x000025	},
-	[9]	= { "link",		0x000055	},
-	[10]	= { "unlink",		0x000005	},
-	[11]	= { "execve",		0x000445	},
-	[12]	= { "chdir",		0x000005	},
-	[13]	= { "time",		0x000004	},
-	[14]	= { "mknod",		0x000325	},
-	[15]	= { "chmod",		0x000025	},
-	[16]	= { "lchown",		0x000025	},
-	[17]	= { "break" },
-	[18]	= { "oldstat",		0x000045	},
-	[19]	= { "lseek",		0x000131	},
-	[20]	= { "getpid",		0xffffff	},
-	[21]	= { "mount",		0x043555	},
-	[22]	= { "umount",		0x000005	},
-	[23]	= { "setuid",		0x000001	},
-	[24]	= { "getuid",		0xffffff	},
-	[25]	= { "stime",		0x000004	},
-	[26]	= { "ptrace",		0x004413	},
-	[27]	= { "alarm",		0x000001	},
-	[28]	= { "oldfstat",		0x000041	},
-	[29]	= { "pause",		0xffffff	},
-	[30]	= { "utime",		0x000045	},
-	[31]	= { "stty" },
-	[32]	= { "gtty" },
-	[33]	= { "access",		0x000025	},
-	[34]	= { "nice",		0x000001	},
-	[35]	= { "ftime" },
-	[36]	= { "sync",		0xffffff	},
-	[37]	= { "kill",		0x000011	},
-	[38]	= { "rename",		0x000055	},
-	[39]	= { "mkdir",		0x000025	},
-	[40]	= { "rmdir",		0x000005	},
-	[41]	= { "dup",		0x000001	},
-	[42]	= { "pipe",		0x000004	},
-	[43]	= { "times",		0x000004	},
-	[44]	= { "prof" },
-	[45]	= { "brk",		0x000004	},
-	[46]	= { "setgid",		0x000001	},
-	[47]	= { "getgid",		0xffffff	},
-	[48]	= { "signal",		0x000041	},
-	[49]	= { "geteuid",		0xffffff	},
-	[50]	= { "getegid",		0xffffff	},
-	[51]	= { "acct",		0x000005	},
-	[52]	= { "umount2",		0x000035	},
-	[53]	= { "lock" },
-	[54]	= { "ioctl",		0x000331	},
-	[55]	= { "fcntl",		0x000331	},
-	[56]	= { "mpx" },
-	[57]	= { "setpgid",		0x000011	},
-	[58]	= { "ulimit" },
-	[60]	= { "umask",		0x000002	},
-	[61]	= { "chroot",		0x000005	},
-	[62]	= { "ustat",		0x000043	},
-	[63]	= { "dup2",		0x000011	},
-	[64]	= { "getppid",		0xffffff	},
-	[65]	= { "getpgrp",		0xffffff	},
-	[66]	= { "setsid",		0xffffff	},
-	[67]	= { "sigaction" },
-	[68]	= { "sgetmask" },
-	[69]	= { "ssetmask" },
-	[70]	= { "setreuid" },
-	[71]	= { "setregid" },
-	[72]	= { "sigsuspend" },
-	[73]	= { "sigpending" },
-	[74]	= { "sethostname" },
-	[75]	= { "setrlimit" },
-	[76]	= { "getrlimit" },
-	[77]	= { "getrusage" },
-	[78]	= { "gettimeofday" },
-	[79]	= { "settimeofday" },
-	[80]	= { "getgroups" },
-	[81]	= { "setgroups" },
-	[82]	= { "select" },
-	[83]	= { "symlink" },
-	[84]	= { "oldlstat" },
-	[85]	= { "readlink" },
-	[86]	= { "uselib" },
-	[87]	= { "swapon" },
-	[88]	= { "reboot" },
-	[89]	= { "readdir" },
-	[91]	= { "munmap",		0x000034	},
-	[92]	= { "truncate" },
-	[93]	= { "ftruncate" },
-	[94]	= { "fchmod" },
-	[95]	= { "fchown" },
-	[96]	= { "getpriority" },
-	[97]	= { "setpriority" },
-	[99]	= { "statfs" },
-	[100]	= { "fstatfs" },
-	[102]	= { "socketcall" },
-	[103]	= { "syslog" },
-	[104]	= { "setitimer" },
-	[105]	= { "getitimer" },
-	[106]	= { "stat" },
-	[107]	= { "lstat" },
-	[108]	= { "fstat" },
-	[111]	= { "vhangup" },
-	[114]	= { "wait4" },
-	[115]	= { "swapoff" },
-	[116]	= { "sysinfo" },
-	[117]	= { "ipc" },
-	[118]	= { "fsync" },
-	[119]	= { "sigreturn" },
-	[120]	= { "clone" },
-	[121]	= { "setdomainname" },
-	[122]	= { "uname" },
-	[123]	= { "modify_ldt" },
-	[123]	= { "cacheflush" },
-	[124]	= { "adjtimex" },
-	[125]	= { "mprotect" },
-	[126]	= { "sigprocmask" },
-	[127]	= { "create_module" },
-	[128]	= { "init_module" },
-	[129]	= { "delete_module" },
-	[130]	= { "get_kernel_syms" },
-	[131]	= { "quotactl" },
-	[132]	= { "getpgid" },
-	[133]	= { "fchdir" },
-	[134]	= { "bdflush" },
-	[135]	= { "sysfs" },
-	[136]	= { "personality" },
-	[137]	= { "afs_syscall" },
-	[138]	= { "setfsuid" },
-	[139]	= { "setfsgid" },
-	[140]	= { "_llseek",			0x014331	},
-	[141]	= { "getdents" },
-	[142]	= { "_newselect",		0x000141	},
-	[143]	= { "flock" },
-	[144]	= { "msync" },
-	[145]	= { "readv" },
-	[146]	= { "writev" },
-	[147]	= { "getsid",			0x000001	},
-	[148]	= { "fdatasync",		0x000001	},
-	[149]	= { "_sysctl",			0x000004	},
-	[150]	= { "mlock" },
-	[151]	= { "munlock" },
-	[152]	= { "mlockall" },
-	[153]	= { "munlockall" },
-	[154]	= { "sched_setparam" },
-	[155]	= { "sched_getparam" },
-	[156]	= { "sched_setscheduler" },
-	[157]	= { "sched_getscheduler" },
-	[158]	= { "sched_yield" },
-	[159]	= { "sched_get_priority_max" },
-	[160]	= { "sched_get_priority_min" },
-	[161]	= { "sched_rr_get_interval" },
-	[162]	= { "nanosleep",		0x000044	},
-	[163]	= { "mremap" },
-	[164]	= { "setresuid" },
-	[165]	= { "getresuid" },
-	[166]	= { "vm86" },
-	[167]	= { "query_module" },
-	[168]	= { "poll" },
-	[169]	= { "nfsservctl" },
-	[170]	= { "setresgid" },
-	[171]	= { "getresgid" },
-	[172]	= { "prctl",			0x333331	},
-	[173]	= { "rt_sigreturn",		0xffffff	},
-	[174]	= { "rt_sigaction",		0x001441	},
-	[175]	= { "rt_sigprocmask",		0x001441	},
-	[176]	= { "rt_sigpending",		0x000014	},
-	[177]	= { "rt_sigtimedwait",		0x001444	},
-	[178]	= { "rt_sigqueueinfo",		0x000411	},
-	[179]	= { "rt_sigsuspend",		0x000014	},
-	[180]	= { "pread",			0x003341	},
-	[181]	= { "pwrite",			0x003341	},
-	[182]	= { "chown",			0x000115	},
-	[183]	= { "getcwd" },
-	[184]	= { "capget" },
-	[185]	= { "capset" },
-	[186]	= { "sigaltstack" },
-	[187]	= { "sendfile" },
-	[188]	= { "getpmsg" },
-	[189]	= { "putpmsg" },
-	[190]	= { "vfork",			0xffffff	},
-	[191]	= { "ugetrlimit" },
-	[192]	= { "mmap2",			0x313314	},
-	[193]	= { "truncate64" },
-	[194]	= { "ftruncate64" },
-	[195]	= { "stat64",			0x000045	},
-	[196]	= { "lstat64",			0x000045	},
-	[197]	= { "fstat64",			0x000041	},
-	[198]	= { "lchown32" },
-	[199]	= { "getuid32",			0xffffff	},
-	[200]	= { "getgid32",			0xffffff	},
-	[201]	= { "geteuid32",		0xffffff	},
-	[202]	= { "getegid32",		0xffffff	},
-	[203]	= { "setreuid32" },
-	[204]	= { "setregid32" },
-	[205]	= { "getgroups32" },
-	[206]	= { "setgroups32" },
-	[207]	= { "fchown32" },
-	[208]	= { "setresuid32" },
-	[209]	= { "getresuid32" },
-	[210]	= { "setresgid32" },
-	[211]	= { "getresgid32" },
-	[212]	= { "chown32" },
-	[213]	= { "setuid32" },
-	[214]	= { "setgid32" },
-	[215]	= { "setfsuid32" },
-	[216]	= { "setfsgid32" },
-	[217]	= { "pivot_root" },
-	[218]	= { "mincore" },
-	[219]	= { "madvise" },
-	[220]	= { "getdents64" },
-	[221]	= { "fcntl64" },
-	[223]	= { "security" },
-	[224]	= { "gettid" },
-	[225]	= { "readahead" },
-	[226]	= { "setxattr" },
-	[227]	= { "lsetxattr" },
-	[228]	= { "fsetxattr" },
-	[229]	= { "getxattr" },
-	[230]	= { "lgetxattr" },
-	[231]	= { "fgetxattr" },
-	[232]	= { "listxattr" },
-	[233]	= { "llistxattr" },
-	[234]	= { "flistxattr" },
-	[235]	= { "removexattr" },
-	[236]	= { "lremovexattr" },
-	[237]	= { "fremovexattr" },
-	[238]	= { "tkill" },
-	[239]	= { "sendfile64" },
-	[240]	= { "futex" },
-	[241]	= { "sched_setaffinity" },
-	[242]	= { "sched_getaffinity" },
-	[243]	= { "set_thread_area" },
-	[244]	= { "get_thread_area" },
-	[245]	= { "io_setup" },
-	[246]	= { "io_destroy" },
-	[247]	= { "io_getevents" },
-	[248]	= { "io_submit" },
-	[249]	= { "io_cancel" },
-	[250]	= { "fadvise64" },
-	[252]	= { "exit_group",		0x000001	},
-	[253]	= { "lookup_dcookie" },
-	[254]	= { "epoll_create" },
-	[255]	= { "epoll_ctl" },
-	[256]	= { "epoll_wait" },
-	[257]	= { "remap_file_pages" },
-	[258]	= { "set_tid_address" },
-	[259]	= { "timer_create" },
-	[260]	= { "timer_settime" },
-	[261]	= { "timer_gettime" },
-	[262]	= { "timer_getoverrun" },
-	[263]	= { "timer_delete" },
-	[264]	= { "clock_settime" },
-	[265]	= { "clock_gettime" },
-	[266]	= { "clock_getres" },
-	[267]	= { "clock_nanosleep" },
-	[268]	= { "statfs64" },
-	[269]	= { "fstatfs64" },
-	[270]	= { "tgkill" },
-	[271]	= { "utimes" },
-	[272]	= { "fadvise64_64" },
-	[273]	= { "vserver" },
-	[274]	= { "mbind" },
-	[275]	= { "get_mempolicy" },
-	[276]	= { "set_mempolicy" },
-	[277]	= { "mq_open" },
-	[278]	= { "mq_unlink" },
-	[279]	= { "mq_timedsend" },
-	[280]	= { "mq_timedreceive" },
-	[281]	= { "mq_notify" },
-	[282]	= { "mq_getsetattr" },
-	[283]	= { "sys_kexec_load" },
-};
-
-asmlinkage void do_syscall_trace(int leaving)
+/*
+ * handle tracing of system call entry
+ * - return the revised system call number or ULONG_MAX to cause ENOSYS
+ */
+asmlinkage unsigned long syscall_trace_entry(void)
 {
-#if 0
-	unsigned long *argp;
-	const char *name;
-	unsigned argmask;
-	char buffer[16];
-
-	if (!kstrace)
-		return;
-
-	if (!current->mm)
-		return;
-
-	if (__frame->gr7 == __NR_close)
-		return;
-
-#if 0
-	if (__frame->gr7 != __NR_mmap2 &&
-	    __frame->gr7 != __NR_vfork &&
-	    __frame->gr7 != __NR_execve &&
-	    __frame->gr7 != __NR_exit)
-		return;
-#endif
-
-	argmask = 0;
-	name = NULL;
-	if (__frame->gr7 < NR_syscalls) {
-		name = __syscall_name_table[__frame->gr7].name;
-		argmask = __syscall_name_table[__frame->gr7].argmask;
-	}
-	if (!name) {
-		sprintf(buffer, "sys_%lx", __frame->gr7);
-		name = buffer;
+	__frame->__status |= REG__STATUS_SYSC_ENTRY;
+	if (tracehook_report_syscall_entry(__frame)) {
+		/* tracing decided this syscall should not happen, so
+		 * We'll return a bogus call number to get an ENOSYS
+		 * error, but leave the original number in
+		 * __frame->syscallno
+		 */
+		return ULONG_MAX;
 	}
 
-	if (!leaving) {
-		if (!argmask) {
-			printk(KERN_CRIT "[%d] %s(%lx,%lx,%lx,%lx,%lx,%lx)\n",
-			       current->pid,
-			       name,
-			       __frame->gr8,
-			       __frame->gr9,
-			       __frame->gr10,
-			       __frame->gr11,
-			       __frame->gr12,
-			       __frame->gr13);
-		}
-		else if (argmask == 0xffffff) {
-			printk(KERN_CRIT "[%d] %s()\n",
-			       current->pid,
-			       name);
-		}
-		else {
-			printk(KERN_CRIT "[%d] %s(",
-			       current->pid,
-			       name);
+	return __frame->syscallno;
+}
 
-			argp = &__frame->gr8;
-
-			do {
-				switch (argmask & 0xf) {
-				case 1:
-					printk("%ld", (long) *argp);
-					break;
-				case 2:
-					printk("%lo", *argp);
-					break;
-				case 3:
-					printk("%lx", *argp);
-					break;
-				case 4:
-					printk("%p", (void *) *argp);
-					break;
-				case 5:
-					printk("\"%s\"", (char *) *argp);
-					break;
-				}
-
-				argp++;
-				argmask >>= 4;
-				if (argmask)
-					printk(",");
-
-			} while (argmask);
-
-			printk(")\n");
-		}
-	}
-	else {
-		if ((int)__frame->gr8 > -4096 && (int)__frame->gr8 < 4096)
-			printk(KERN_CRIT "[%d] %s() = %ld\n", current->pid, name, __frame->gr8);
-		else
-			printk(KERN_CRIT "[%d] %s() = %lx\n", current->pid, name, __frame->gr8);
-	}
-	return;
-#endif
-
-	if (!test_thread_flag(TIF_SYSCALL_TRACE))
-		return;
-
-	if (!(current->ptrace & PT_PTRACED))
-		return;
-
-	/* we need to indicate entry or exit to strace */
-	if (leaving)
-		__frame->__status |= REG__STATUS_SYSC_EXIT;
-	else
-		__frame->__status |= REG__STATUS_SYSC_ENTRY;
-
-	ptrace_notify(SIGTRAP);
-
-	/*
-	 * this isn't the same as continuing with a signal, but it will do
-	 * for normal use.  strace only continues with a signal if the
-	 * stopping signal is not SIGTRAP.  -brl
-	 */
-	if (current->exit_code) {
-		send_sig(current->exit_code, current, 1);
-		current->exit_code = 0;
-	}
+/*
+ * handle tracing of system call exit
+ */
+asmlinkage void syscall_trace_exit(void)
+{
+	__frame->__status |= REG__STATUS_SYSC_EXIT;
+	tracehook_report_syscall_exit(__frame, 0);
 }
diff --git a/arch/frv/kernel/signal.c b/arch/frv/kernel/signal.c
index 3bdb368..4a7a62c 100644
--- a/arch/frv/kernel/signal.c
+++ b/arch/frv/kernel/signal.c
@@ -21,6 +21,7 @@
 #include <linux/unistd.h>
 #include <linux/personality.h>
 #include <linux/freezer.h>
+#include <linux/tracehook.h>
 #include <asm/ucontext.h>
 #include <asm/uaccess.h>
 #include <asm/cacheflush.h>
@@ -516,6 +517,9 @@
 			 * clear the TIF_RESTORE_SIGMASK flag */
 			if (test_thread_flag(TIF_RESTORE_SIGMASK))
 				clear_thread_flag(TIF_RESTORE_SIGMASK);
+
+			tracehook_signal_handler(signr, &info, &ka, __frame,
+						 test_thread_flag(TIF_SINGLESTEP));
 		}
 
 		return;
@@ -564,4 +568,10 @@
 	if (thread_info_flags & (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK))
 		do_signal();
 
+	/* deal with notification on about to resume userspace execution */
+	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
+		clear_thread_flag(TIF_NOTIFY_RESUME);
+		tracehook_notify_resume(__frame);
+	}
+
 } /* end do_notify_resume() */
diff --git a/arch/frv/kernel/uaccess.c b/arch/frv/kernel/uaccess.c
index 9fb771a..374f88d 100644
--- a/arch/frv/kernel/uaccess.c
+++ b/arch/frv/kernel/uaccess.c
@@ -23,8 +23,7 @@
 	char *p, ch;
 	long err = -EFAULT;
 
-	if (count < 0)
-		BUG();
+	BUG_ON(count < 0);
 
 	p = dst;
 
@@ -76,8 +75,7 @@
 	long err = 0;
 	char ch;
 
-	if (count < 0)
-		BUG();
+	BUG_ON(count < 0);
 
 #ifndef CONFIG_MMU
 	if ((unsigned long) src < memory_start)
diff --git a/arch/frv/mb93090-mb00/pci-dma-nommu.c b/arch/frv/mb93090-mb00/pci-dma-nommu.c
index 52ff9ae..4e1ba0b 100644
--- a/arch/frv/mb93090-mb00/pci-dma-nommu.c
+++ b/arch/frv/mb93090-mb00/pci-dma-nommu.c
@@ -116,8 +116,7 @@
 dma_addr_t dma_map_single(struct device *dev, void *ptr, size_t size,
 			  enum dma_data_direction direction)
 {
-	if (direction == DMA_NONE)
-                BUG();
+	BUG_ON(direction == DMA_NONE);
 
 	frv_cache_wback_inv((unsigned long) ptr, (unsigned long) ptr + size);
 
@@ -151,8 +150,7 @@
 		frv_cache_wback_inv(sg_dma_address(&sg[i]),
 				    sg_dma_address(&sg[i]) + sg_dma_len(&sg[i]));
 
-	if (direction == DMA_NONE)
-                BUG();
+	BUG_ON(direction == DMA_NONE);
 
 	return nents;
 }
diff --git a/arch/frv/mb93090-mb00/pci-dma.c b/arch/frv/mb93090-mb00/pci-dma.c
index 3ddedeb..45954f0 100644
--- a/arch/frv/mb93090-mb00/pci-dma.c
+++ b/arch/frv/mb93090-mb00/pci-dma.c
@@ -48,8 +48,7 @@
 dma_addr_t dma_map_single(struct device *dev, void *ptr, size_t size,
 			  enum dma_data_direction direction)
 {
-	if (direction == DMA_NONE)
-                BUG();
+	BUG_ON(direction == DMA_NONE);
 
 	frv_cache_wback_inv((unsigned long) ptr, (unsigned long) ptr + size);
 
@@ -81,8 +80,7 @@
 	void *vaddr;
 	int i;
 
-	if (direction == DMA_NONE)
-                BUG();
+	BUG_ON(direction == DMA_NONE);
 
 	dampr2 = __get_DAMPR(2);
 
diff --git a/arch/mn10300/Kconfig b/arch/mn10300/Kconfig
index 3559267..89faaca 100644
--- a/arch/mn10300/Kconfig
+++ b/arch/mn10300/Kconfig
@@ -8,6 +8,7 @@
 config MN10300
 	def_bool y
 	select HAVE_OPROFILE
+	select HAVE_ARCH_TRACEHOOK
 
 config AM33
 	def_bool y
diff --git a/arch/mn10300/include/asm/elf.h b/arch/mn10300/include/asm/elf.h
index bf09f8b..4910546 100644
--- a/arch/mn10300/include/asm/elf.h
+++ b/arch/mn10300/include/asm/elf.h
@@ -34,7 +34,7 @@
  */
 typedef unsigned long elf_greg_t;
 
-#define ELF_NGREG (sizeof (struct pt_regs) / sizeof(elf_greg_t))
+#define ELF_NGREG ((sizeof(struct pt_regs) / sizeof(elf_greg_t)) - 1)
 typedef elf_greg_t elf_gregset_t[ELF_NGREG];
 
 #define ELF_NFPREG 32
@@ -76,6 +76,7 @@
 } while (0)
 
 #define USE_ELF_CORE_DUMP
+#define CORE_DUMP_USE_REGSET
 #define ELF_EXEC_PAGESIZE	4096
 
 /*
diff --git a/arch/mn10300/include/asm/processor.h b/arch/mn10300/include/asm/processor.h
index 7323927..f7d4b0d 100644
--- a/arch/mn10300/include/asm/processor.h
+++ b/arch/mn10300/include/asm/processor.h
@@ -143,13 +143,7 @@
 
 unsigned long get_wchan(struct task_struct *p);
 
-#define task_pt_regs(task)						\
-({									\
-       struct pt_regs *__regs__;					\
-       __regs__ = (struct pt_regs *) (KSTK_TOP(task_stack_page(task)) - 8); \
-       __regs__ - 1;							\
-})
-
+#define task_pt_regs(task) ((task)->thread.uregs)
 #define KSTK_EIP(task) (task_pt_regs(task)->pc)
 #define KSTK_ESP(task) (task_pt_regs(task)->sp)
 
diff --git a/arch/mn10300/include/asm/ptrace.h b/arch/mn10300/include/asm/ptrace.h
index 7b06cc6..921942e 100644
--- a/arch/mn10300/include/asm/ptrace.h
+++ b/arch/mn10300/include/asm/ptrace.h
@@ -91,9 +91,17 @@
 #if defined(__KERNEL__)
 
 #if !defined(__ASSEMBLY__)
+struct task_struct;
+
 #define user_mode(regs)			(((regs)->epsw & EPSW_nSL) == EPSW_nSL)
 #define instruction_pointer(regs)	((regs)->pc)
+#define user_stack_pointer(regs)	((regs)->sp)
 extern void show_regs(struct pt_regs *);
+
+#define arch_has_single_step()	(1)
+extern void user_enable_single_step(struct task_struct *);
+extern void user_disable_single_step(struct task_struct *);
+
 #endif  /*  !__ASSEMBLY  */
 
 #define profile_pc(regs) ((regs)->pc)
diff --git a/arch/mn10300/kernel/entry.S b/arch/mn10300/kernel/entry.S
index 3dc3e46..7408a27 100644
--- a/arch/mn10300/kernel/entry.S
+++ b/arch/mn10300/kernel/entry.S
@@ -76,7 +76,7 @@
 	cmp	nr_syscalls,d0
 	bcc	syscall_badsys
 	btst	_TIF_SYSCALL_TRACE,(TI_flags,a2)
-	bne	syscall_trace_entry
+	bne	syscall_entry_trace
 syscall_call:
 	add	d0,d0,a1
 	add	a1,a1
@@ -104,11 +104,10 @@
 syscall_exit_work:
 	btst	_TIF_SYSCALL_TRACE,d2
 	beq	work_pending
-	__sti				# could let do_syscall_trace() call
+	__sti				# could let syscall_trace_exit() call
 					# schedule() instead
 	mov	fp,d0
-	mov	1,d1
-	call	do_syscall_trace[],0	# do_syscall_trace(regs,entryexit)
+	call	syscall_trace_exit[],0	# do_syscall_trace(regs)
 	jmp	resume_userspace
 
 	ALIGN
@@ -138,13 +137,11 @@
 	jmp	resume_userspace
 
 	# perform syscall entry tracing
-syscall_trace_entry:
+syscall_entry_trace:
 	mov	-ENOSYS,d0
 	mov	d0,(REG_D0,fp)
 	mov	fp,d0
-	clr	d1
-	call	do_syscall_trace[],0
-	mov	(REG_ORIG_D0,fp),d0
+	call	syscall_trace_entry[],0	# returns the syscall number to actually use
 	mov	(REG_D1,fp),d1
 	cmp	nr_syscalls,d0
 	bcs	syscall_call
diff --git a/arch/mn10300/kernel/ptrace.c b/arch/mn10300/kernel/ptrace.c
index d6d6cdc..e143339 100644
--- a/arch/mn10300/kernel/ptrace.c
+++ b/arch/mn10300/kernel/ptrace.c
@@ -17,6 +17,9 @@
 #include <linux/errno.h>
 #include <linux/ptrace.h>
 #include <linux/user.h>
+#include <linux/regset.h>
+#include <linux/elf.h>
+#include <linux/tracehook.h>
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/system.h>
@@ -64,12 +67,6 @@
 		((unsigned long) task->thread.uregs + offset);
 }
 
-/*
- * this routine will put a word on the processes privileged stack.
- * the offset is how far from the base addr as stored in the TSS.
- * this routine assumes that all the privileged stacks are in our
- * data space.
- */
 static inline
 int put_stack_long(struct task_struct *task, int offset, unsigned long data)
 {
@@ -80,44 +77,191 @@
 	return 0;
 }
 
-static inline unsigned long get_fpregs(struct fpu_state_struct *buf,
-				       struct task_struct *tsk)
+/*
+ * retrieve the contents of MN10300 userspace general registers
+ */
+static int genregs_get(struct task_struct *target,
+		       const struct user_regset *regset,
+		       unsigned int pos, unsigned int count,
+		       void *kbuf, void __user *ubuf)
 {
-	return __copy_to_user(buf, &tsk->thread.fpu_state,
-			      sizeof(struct fpu_state_struct));
-}
+	const struct pt_regs *regs = task_pt_regs(target);
+	int ret;
 
-static inline unsigned long set_fpregs(struct task_struct *tsk,
-				       struct fpu_state_struct *buf)
-{
-	return __copy_from_user(&tsk->thread.fpu_state, buf,
-				sizeof(struct fpu_state_struct));
-}
+	/* we need to skip regs->next */
+	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+				  regs, 0, PT_ORIG_D0 * sizeof(long));
+	if (ret < 0)
+		return ret;
 
-static inline void fpsave_init(struct task_struct *task)
-{
-	memset(&task->thread.fpu_state, 0, sizeof(struct fpu_state_struct));
+	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+				  &regs->orig_d0, PT_ORIG_D0 * sizeof(long),
+				  NR_PTREGS * sizeof(long));
+	if (ret < 0)
+		return ret;
+
+	return user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
+					NR_PTREGS * sizeof(long), -1);
 }
 
 /*
- * make sure the single step bit is not set
+ * update the contents of the MN10300 userspace general registers
  */
-void ptrace_disable(struct task_struct *child)
+static int genregs_set(struct task_struct *target,
+		       const struct user_regset *regset,
+		       unsigned int pos, unsigned int count,
+		       const void *kbuf, const void __user *ubuf)
 {
-#ifndef CONFIG_MN10300_USING_JTAG
-	struct user *dummy = NULL;
-	long tmp;
+	struct pt_regs *regs = task_pt_regs(target);
+	unsigned long tmp;
+	int ret;
 
-	tmp = get_stack_long(child, (unsigned long) &dummy->regs.epsw);
-	tmp &= ~EPSW_T;
-	put_stack_long(child, (unsigned long) &dummy->regs.epsw, tmp);
-#endif
+	/* we need to skip regs->next */
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				 regs, 0, PT_ORIG_D0 * sizeof(long));
+	if (ret < 0)
+		return ret;
+
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				 &regs->orig_d0, PT_ORIG_D0 * sizeof(long),
+				 PT_EPSW * sizeof(long));
+	if (ret < 0)
+		return ret;
+
+	/* we need to mask off changes to EPSW */
+	tmp = regs->epsw;
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				 &tmp, PT_EPSW * sizeof(long),
+				 PT_PC * sizeof(long));
+	tmp &= EPSW_FLAG_V | EPSW_FLAG_C | EPSW_FLAG_N | EPSW_FLAG_Z;
+	tmp |= regs->epsw & ~(EPSW_FLAG_V | EPSW_FLAG_C | EPSW_FLAG_N |
+			      EPSW_FLAG_Z);
+	regs->epsw = tmp;
+
+	if (ret < 0)
+		return ret;
+
+	/* and finally load the PC */
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				 &regs->pc, PT_PC * sizeof(long),
+				 NR_PTREGS * sizeof(long));
+
+	if (ret < 0)
+		return ret;
+
+	return user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
+					 NR_PTREGS * sizeof(long), -1);
 }
 
 /*
- * set the single step bit
+ * retrieve the contents of MN10300 userspace FPU registers
  */
-void ptrace_enable(struct task_struct *child)
+static int fpuregs_get(struct task_struct *target,
+		       const struct user_regset *regset,
+		       unsigned int pos, unsigned int count,
+		       void *kbuf, void __user *ubuf)
+{
+	const struct fpu_state_struct *fpregs = &target->thread.fpu_state;
+	int ret;
+
+	unlazy_fpu(target);
+
+	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+				  fpregs, 0, sizeof(*fpregs));
+	if (ret < 0)
+		return ret;
+
+	return user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
+					sizeof(*fpregs), -1);
+}
+
+/*
+ * update the contents of the MN10300 userspace FPU registers
+ */
+static int fpuregs_set(struct task_struct *target,
+		       const struct user_regset *regset,
+		       unsigned int pos, unsigned int count,
+		       const void *kbuf, const void __user *ubuf)
+{
+	struct fpu_state_struct fpu_state = target->thread.fpu_state;
+	int ret;
+
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				 &fpu_state, 0, sizeof(fpu_state));
+	if (ret < 0)
+		return ret;
+
+	fpu_kill_state(target);
+	target->thread.fpu_state = fpu_state;
+	set_using_fpu(target);
+
+	return user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
+					 sizeof(fpu_state), -1);
+}
+
+/*
+ * determine if the FPU registers have actually been used
+ */
+static int fpuregs_active(struct task_struct *target,
+			  const struct user_regset *regset)
+{
+	return is_using_fpu(target) ? regset->n : 0;
+}
+
+/*
+ * Define the register sets available on the MN10300 under Linux
+ */
+enum mn10300_regset {
+	REGSET_GENERAL,
+	REGSET_FPU,
+};
+
+static const struct user_regset mn10300_regsets[] = {
+	/*
+	 * General register format is:
+	 *	A3, A2, D3, D2, MCVF, MCRL, MCRH, MDRQ
+	 *	E1, E0, E7...E2, SP, LAR, LIR, MDR
+	 *	A1, A0, D1, D0, ORIG_D0, EPSW, PC
+	 */
+	[REGSET_GENERAL] = {
+		.core_note_type	= NT_PRSTATUS,
+		.n		= ELF_NGREG,
+		.size		= sizeof(long),
+		.align		= sizeof(long),
+		.get		= genregs_get,
+		.set		= genregs_set,
+	},
+	/*
+	 * FPU register format is:
+	 *	FS0-31, FPCR
+	 */
+	[REGSET_FPU] = {
+		.core_note_type	= NT_PRFPREG,
+		.n		= sizeof(struct fpu_state_struct) / sizeof(long),
+		.size		= sizeof(long),
+		.align		= sizeof(long),
+		.get		= fpuregs_get,
+		.set		= fpuregs_set,
+		.active		= fpuregs_active,
+	},
+};
+
+static const struct user_regset_view user_mn10300_native_view = {
+	.name		= "mn10300",
+	.e_machine	= EM_MN10300,
+	.regsets	= mn10300_regsets,
+	.n		= ARRAY_SIZE(mn10300_regsets),
+};
+
+const struct user_regset_view *task_user_regset_view(struct task_struct *task)
+{
+	return &user_mn10300_native_view;
+}
+
+/*
+ * set the single-step bit
+ */
+void user_enable_single_step(struct task_struct *child)
 {
 #ifndef CONFIG_MN10300_USING_JTAG
 	struct user *dummy = NULL;
@@ -130,44 +274,36 @@
 }
 
 /*
+ * make sure the single-step bit is not set
+ */
+void user_disable_single_step(struct task_struct *child)
+{
+#ifndef CONFIG_MN10300_USING_JTAG
+	struct user *dummy = NULL;
+	long tmp;
+
+	tmp = get_stack_long(child, (unsigned long) &dummy->regs.epsw);
+	tmp &= ~EPSW_T;
+	put_stack_long(child, (unsigned long) &dummy->regs.epsw, tmp);
+#endif
+}
+
+void ptrace_disable(struct task_struct *child)
+{
+	user_disable_single_step(child);
+}
+
+/*
  * handle the arch-specific side of process tracing
  */
 long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 {
-	struct fpu_state_struct fpu_state;
-	int i, ret;
+	unsigned long tmp;
+	int ret;
 
 	switch (request) {
-	/* read the word at location addr. */
-	case PTRACE_PEEKTEXT: {
-		unsigned long tmp;
-		int copied;
-
-		copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 0);
-		ret = -EIO;
-		if (copied != sizeof(tmp))
-			break;
-		ret = put_user(tmp, (unsigned long *) data);
-		break;
-	}
-
-	/* read the word at location addr. */
-	case PTRACE_PEEKDATA: {
-		unsigned long tmp;
-		int copied;
-
-		copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 0);
-		ret = -EIO;
-		if (copied != sizeof(tmp))
-			break;
-		ret = put_user(tmp, (unsigned long *) data);
-		break;
-	}
-
 	/* read the word at location addr in the USER area. */
-	case PTRACE_PEEKUSR: {
-		unsigned long tmp;
-
+	case PTRACE_PEEKUSR:
 		ret = -EIO;
 		if ((addr & 3) || addr < 0 ||
 		    addr > sizeof(struct user) - 3)
@@ -179,17 +315,6 @@
 					     ptrace_regid_to_frame[addr]);
 		ret = put_user(tmp, (unsigned long *) data);
 		break;
-	}
-
-	/* write the word at location addr. */
-	case PTRACE_POKETEXT:
-	case PTRACE_POKEDATA:
-		if (access_process_vm(child, addr, &data, sizeof(data), 1) ==
-		    sizeof(data))
-			ret = 0;
-		else
-			ret = -EIO;
-		break;
 
 		/* write the word at location addr in the USER area */
 	case PTRACE_POKEUSR:
@@ -204,132 +329,32 @@
 					     data);
 		break;
 
-		/* continue and stop at next (return from) syscall */
-	case PTRACE_SYSCALL:
-		/* restart after signal. */
-	case PTRACE_CONT:
-		ret = -EIO;
-		if ((unsigned long) data > _NSIG)
-			break;
-		if (request == PTRACE_SYSCALL)
-			set_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
-		else
-			clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
-		child->exit_code = data;
-		ptrace_disable(child);
-		wake_up_process(child);
-		ret = 0;
-		break;
+	case PTRACE_GETREGS:	/* Get all integer regs from the child. */
+		return copy_regset_to_user(child, &user_mn10300_native_view,
+					   REGSET_GENERAL,
+					   0, NR_PTREGS * sizeof(long),
+					   (void __user *)data);
 
-		/*
-		 * make the child exit
-		 * - the best I can do is send it a sigkill
-		 * - perhaps it should be put in the status that it wants to
-		 *   exit
-		 */
-	case PTRACE_KILL:
-		ret = 0;
-		if (child->exit_state == EXIT_ZOMBIE)	/* already dead */
-			break;
-		child->exit_code = SIGKILL;
-		clear_tsk_thread_flag(child, TIF_SINGLESTEP);
-		ptrace_disable(child);
-		wake_up_process(child);
-		break;
+	case PTRACE_SETREGS:	/* Set all integer regs in the child. */
+		return copy_regset_from_user(child, &user_mn10300_native_view,
+					     REGSET_GENERAL,
+					     0, NR_PTREGS * sizeof(long),
+					     (const void __user *)data);
 
-	case PTRACE_SINGLESTEP: /* set the trap flag. */
-#ifndef CONFIG_MN10300_USING_JTAG
-		ret = -EIO;
-		if ((unsigned long) data > _NSIG)
-			break;
-		clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
-		ptrace_enable(child);
-		child->exit_code = data;
-		wake_up_process(child);
-		ret = 0;
-#else
-		ret = -EINVAL;
-#endif
-		break;
+	case PTRACE_GETFPREGS:	/* Get the child FPU state. */
+		return copy_regset_to_user(child, &user_mn10300_native_view,
+					   REGSET_FPU,
+					   0, sizeof(struct fpu_state_struct),
+					   (void __user *)data);
 
-	case PTRACE_DETACH:	/* detach a process that was attached. */
-		ret = ptrace_detach(child, data);
-		break;
-
-		/* Get all gp regs from the child. */
-	case PTRACE_GETREGS: {
-		unsigned long tmp;
-
-		if (!access_ok(VERIFY_WRITE, (unsigned *) data, NR_PTREGS << 2)) {
-			ret = -EIO;
-			break;
-		}
-
-		for (i = 0; i < NR_PTREGS << 2; i += 4) {
-			tmp = get_stack_long(child, ptrace_regid_to_frame[i]);
-			__put_user(tmp, (unsigned long *) data);
-			data += sizeof(tmp);
-		}
-		ret = 0;
-		break;
-	}
-
-	case PTRACE_SETREGS: { /* Set all gp regs in the child. */
-		unsigned long tmp;
-
-		if (!access_ok(VERIFY_READ, (unsigned long *)data,
-			       sizeof(struct pt_regs))) {
-			ret = -EIO;
-			break;
-		}
-
-		for (i = 0; i < NR_PTREGS << 2; i += 4) {
-			__get_user(tmp, (unsigned long *) data);
-			put_stack_long(child, ptrace_regid_to_frame[i], tmp);
-			data += sizeof(tmp);
-		}
-		ret = 0;
-		break;
-	}
-
-	case PTRACE_GETFPREGS: { /* Get the child FPU state. */
-		if (is_using_fpu(child)) {
-			unlazy_fpu(child);
-			fpu_state = child->thread.fpu_state;
-		} else {
-			memset(&fpu_state, 0, sizeof(fpu_state));
-		}
-
-		ret = -EIO;
-		if (copy_to_user((void *) data, &fpu_state,
-				 sizeof(fpu_state)) == 0)
-			ret = 0;
-		break;
-	}
-
-	case PTRACE_SETFPREGS: { /* Set the child FPU state. */
-		ret = -EFAULT;
-		if (copy_from_user(&fpu_state, (const void *) data,
-				   sizeof(fpu_state)) == 0) {
-			fpu_kill_state(child);
-			child->thread.fpu_state = fpu_state;
-			set_using_fpu(child);
-			ret = 0;
-		}
-		break;
-	}
-
-	case PTRACE_SETOPTIONS: {
-		if (data & PTRACE_O_TRACESYSGOOD)
-			child->ptrace |= PT_TRACESYSGOOD;
-		else
-			child->ptrace &= ~PT_TRACESYSGOOD;
-		ret = 0;
-		break;
-	}
+	case PTRACE_SETFPREGS:	/* Set the child FPU state. */
+		return copy_regset_from_user(child, &user_mn10300_native_view,
+					     REGSET_FPU,
+					     0, sizeof(struct fpu_state_struct),
+					     (const void __user *)data);
 
 	default:
-		ret = -EIO;
+		ret = ptrace_request(child, request, addr, data);
 		break;
 	}
 
@@ -337,43 +362,26 @@
 }
 
 /*
- * notification of system call entry/exit
- * - triggered by current->work.syscall_trace
+ * handle tracing of system call entry
+ * - return the revised system call number or ULONG_MAX to cause ENOSYS
  */
-asmlinkage void do_syscall_trace(struct pt_regs *regs, int entryexit)
+asmlinkage unsigned long syscall_trace_entry(struct pt_regs *regs)
 {
-#if 0
-	/* just in case... */
-	printk(KERN_DEBUG "[%d] syscall_%lu(%lx,%lx,%lx,%lx) = %lx\n",
-	       current->pid,
-	       regs->orig_d0,
-	       regs->a0,
-	       regs->d1,
-	       regs->a3,
-	       regs->a2,
-	       regs->d0);
-	return;
-#endif
+	if (tracehook_report_syscall_entry(regs))
+		/* tracing decided this syscall should not happen, so
+		 * We'll return a bogus call number to get an ENOSYS
+		 * error, but leave the original number in
+		 * regs->orig_d0
+		 */
+		return ULONG_MAX;
 
-	if (!test_thread_flag(TIF_SYSCALL_TRACE) &&
-	    !test_thread_flag(TIF_SINGLESTEP))
-		return;
-	if (!(current->ptrace & PT_PTRACED))
-		return;
+	return regs->orig_d0;
+}
 
-	/* the 0x80 provides a way for the tracing parent to distinguish
-	   between a syscall stop and SIGTRAP delivery */
-	ptrace_notify(SIGTRAP |
-		      ((current->ptrace & PT_TRACESYSGOOD) &&
-		       !test_thread_flag(TIF_SINGLESTEP) ? 0x80 : 0));
-
-	/*
-	 * this isn't the same as continuing with a signal, but it will do
-	 * for normal use.  strace only continues with a signal if the
-	 * stopping signal is not SIGTRAP.  -brl
-	 */
-	if (current->exit_code) {
-		send_sig(current->exit_code, current, 1);
-		current->exit_code = 0;
-	}
+/*
+ * handle tracing of system call exit
+ */
+asmlinkage void syscall_trace_exit(struct pt_regs *regs)
+{
+	tracehook_report_syscall_exit(regs, 0);
 }
diff --git a/arch/mn10300/kernel/signal.c b/arch/mn10300/kernel/signal.c
index 841ca99..9f7572a 100644
--- a/arch/mn10300/kernel/signal.c
+++ b/arch/mn10300/kernel/signal.c
@@ -23,6 +23,7 @@
 #include <linux/tty.h>
 #include <linux/personality.h>
 #include <linux/suspend.h>
+#include <linux/tracehook.h>
 #include <asm/cacheflush.h>
 #include <asm/ucontext.h>
 #include <asm/uaccess.h>
@@ -511,6 +512,9 @@
 			 * clear the TIF_RESTORE_SIGMASK flag */
 			if (test_thread_flag(TIF_RESTORE_SIGMASK))
 				clear_thread_flag(TIF_RESTORE_SIGMASK);
+
+			tracehook_signal_handler(signr, &info, &ka, regs,
+						 test_thread_flag(TIF_SINGLESTEP));
 		}
 
 		return;
@@ -561,4 +565,9 @@
 	/* deal with pending signal delivery */
 	if (thread_info_flags & (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK))
 		do_signal(regs);
+
+	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
+		clear_thread_flag(TIF_NOTIFY_RESUME);
+		tracehook_notify_resume(__frame);
+	}
 }
diff --git a/arch/mn10300/mm/tlb-mn10300.S b/arch/mn10300/mm/tlb-mn10300.S
index 7892080..7095147 100644
--- a/arch/mn10300/mm/tlb-mn10300.S
+++ b/arch/mn10300/mm/tlb-mn10300.S
@@ -165,24 +165,6 @@
 ENTRY(dtlb_aerror)
 	and	~EPSW_NMID,epsw
 	add	-4,sp
-	mov	d1,(sp)
-
-	movhu	(MMUFCR_DFC),d1			# is it the initial valid write
-						# to this page?
-	and	MMUFCR_xFC_INITWR,d1
- 	beq	dtlb_pagefault			# jump if not
-
-	mov	(DPTEL),d1			# set the dirty bit
-						# (don't replace with BSET!)
-	or	_PAGE_DIRTY,d1
-	mov	d1,(DPTEL)
-	mov	(sp),d1
-	add	4,sp
- 	rti
-
-	ALIGN
-dtlb_pagefault:
-	mov	(sp),d1
 	SAVE_ALL
 	add	-4,sp				# need to pass three params
 
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 33fac6b..d105f29 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -174,6 +174,15 @@
 	  Add a simple leak tracer to the IOMMU code. This is useful when you
 	  are debugging a buggy device driver that leaks IOMMU mappings.
 
+config X86_DS_SELFTEST
+    bool "DS selftest"
+    default y
+    depends on DEBUG_KERNEL
+    depends on X86_DS
+	---help---
+	  Perform Debug Store selftests at boot time.
+	  If in doubt, say "N".
+
 config HAVE_MMIOTRACE_SUPPORT
 	def_bool y
 
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index a505202..dcef387 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -830,4 +830,5 @@
 	.quad sys_inotify_init1
 	.quad compat_sys_preadv
 	.quad compat_sys_pwritev
+	.quad compat_sys_rt_tgsigqueueinfo	/* 335 */
 ia32_syscall_end:
diff --git a/arch/x86/include/asm/ds.h b/arch/x86/include/asm/ds.h
index a8f672b..70dac19 100644
--- a/arch/x86/include/asm/ds.h
+++ b/arch/x86/include/asm/ds.h
@@ -15,8 +15,8 @@
  * - buffer allocation (memory accounting)
  *
  *
- * Copyright (C) 2007-2008 Intel Corporation.
- * Markus Metzger <markus.t.metzger@intel.com>, 2007-2008
+ * Copyright (C) 2007-2009 Intel Corporation.
+ * Markus Metzger <markus.t.metzger@intel.com>, 2007-2009
  */
 
 #ifndef _ASM_X86_DS_H
@@ -83,8 +83,10 @@
  * The interrupt threshold is independent from the overflow callback
  * to allow users to use their own overflow interrupt handling mechanism.
  *
- * task: the task to request recording for;
- *       NULL for per-cpu recording on the current cpu
+ * The function might sleep.
+ *
+ * task: the task to request recording for
+ * cpu:  the cpu to request recording for
  * base: the base pointer for the (non-pageable) buffer;
  * size: the size of the provided buffer in bytes
  * ovfl: pointer to a function to be called on buffer overflow;
@@ -93,19 +95,28 @@
  *     -1 if no interrupt threshold is requested.
  * flags: a bit-mask of the above flags
  */
-extern struct bts_tracer *ds_request_bts(struct task_struct *task,
-					 void *base, size_t size,
-					 bts_ovfl_callback_t ovfl,
-					 size_t th, unsigned int flags);
-extern struct pebs_tracer *ds_request_pebs(struct task_struct *task,
-					   void *base, size_t size,
-					   pebs_ovfl_callback_t ovfl,
-					   size_t th, unsigned int flags);
+extern struct bts_tracer *ds_request_bts_task(struct task_struct *task,
+					      void *base, size_t size,
+					      bts_ovfl_callback_t ovfl,
+					      size_t th, unsigned int flags);
+extern struct bts_tracer *ds_request_bts_cpu(int cpu, void *base, size_t size,
+					     bts_ovfl_callback_t ovfl,
+					     size_t th, unsigned int flags);
+extern struct pebs_tracer *ds_request_pebs_task(struct task_struct *task,
+						void *base, size_t size,
+						pebs_ovfl_callback_t ovfl,
+						size_t th, unsigned int flags);
+extern struct pebs_tracer *ds_request_pebs_cpu(int cpu,
+					       void *base, size_t size,
+					       pebs_ovfl_callback_t ovfl,
+					       size_t th, unsigned int flags);
 
 /*
  * Release BTS or PEBS resources
  * Suspend and resume BTS or PEBS tracing
  *
+ * Must be called with irq's enabled.
+ *
  * tracer: the tracer handle returned from ds_request_~()
  */
 extern void ds_release_bts(struct bts_tracer *tracer);
@@ -115,6 +126,28 @@
 extern void ds_suspend_pebs(struct pebs_tracer *tracer);
 extern void ds_resume_pebs(struct pebs_tracer *tracer);
 
+/*
+ * Release BTS or PEBS resources
+ * Suspend and resume BTS or PEBS tracing
+ *
+ * Cpu tracers must call this on the traced cpu.
+ * Task tracers must call ds_release_~_noirq() for themselves.
+ *
+ * May be called with irq's disabled.
+ *
+ * Returns 0 if successful;
+ * -EPERM if the cpu tracer does not trace the current cpu.
+ * -EPERM if the task tracer does not trace itself.
+ *
+ * tracer: the tracer handle returned from ds_request_~()
+ */
+extern int ds_release_bts_noirq(struct bts_tracer *tracer);
+extern int ds_suspend_bts_noirq(struct bts_tracer *tracer);
+extern int ds_resume_bts_noirq(struct bts_tracer *tracer);
+extern int ds_release_pebs_noirq(struct pebs_tracer *tracer);
+extern int ds_suspend_pebs_noirq(struct pebs_tracer *tracer);
+extern int ds_resume_pebs_noirq(struct pebs_tracer *tracer);
+
 
 /*
  * The raw DS buffer state as it is used for BTS and PEBS recording.
@@ -170,9 +203,9 @@
 		} lbr;
 		/* BTS_TASK_ARRIVES or BTS_TASK_DEPARTS */
 		struct {
-			__u64 jiffies;
+			__u64 clock;
 			pid_t pid;
-		} timestamp;
+		} event;
 	} variant;
 };
 
@@ -201,8 +234,12 @@
 struct pebs_trace {
 	struct ds_trace ds;
 
-	/* the PEBS reset value */
-	unsigned long long reset_value;
+	/* the number of valid counters in the below array */
+	unsigned int counters;
+
+#define MAX_PEBS_COUNTERS 4
+	/* the counter reset value */
+	unsigned long long counter_reset[MAX_PEBS_COUNTERS];
 };
 
 
@@ -237,9 +274,11 @@
  * Returns 0 on success; -Eerrno on error
  *
  * tracer: the tracer handle returned from ds_request_pebs()
+ * counter: the index of the counter
  * value: the new counter reset value
  */
-extern int ds_set_pebs_reset(struct pebs_tracer *tracer, u64 value);
+extern int ds_set_pebs_reset(struct pebs_tracer *tracer,
+			     unsigned int counter, u64 value);
 
 /*
  * Initialization
@@ -252,21 +291,12 @@
  */
 extern void ds_switch_to(struct task_struct *prev, struct task_struct *next);
 
-/*
- * Task clone/init and cleanup work
- */
-extern void ds_copy_thread(struct task_struct *tsk, struct task_struct *father);
-extern void ds_exit_thread(struct task_struct *tsk);
-
 #else /* CONFIG_X86_DS */
 
 struct cpuinfo_x86;
 static inline void __cpuinit ds_init_intel(struct cpuinfo_x86 *ignored) {}
 static inline void ds_switch_to(struct task_struct *prev,
 				struct task_struct *next) {}
-static inline void ds_copy_thread(struct task_struct *tsk,
-				  struct task_struct *father) {}
-static inline void ds_exit_thread(struct task_struct *tsk) {}
 
 #endif /* CONFIG_X86_DS */
 #endif /* _ASM_X86_DS_H */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 87ede2f..c776826 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -462,14 +462,8 @@
 	unsigned		io_bitmap_max;
 /* MSR_IA32_DEBUGCTLMSR value to switch in if TIF_DEBUGCTLMSR is set.  */
 	unsigned long	debugctlmsr;
-#ifdef CONFIG_X86_DS
-/* Debug Store context; see include/asm-x86/ds.h; goes into MSR_IA32_DS_AREA */
+	/* Debug Store context; see asm/ds.h */
 	struct ds_context	*ds_ctx;
-#endif /* CONFIG_X86_DS */
-#ifdef CONFIG_X86_PTRACE_BTS
-/* the signal to send on a bts buffer overflow */
-	unsigned int	bts_ovfl_signal;
-#endif /* CONFIG_X86_PTRACE_BTS */
 };
 
 static inline unsigned long native_get_debugreg(int regno)
@@ -797,6 +791,21 @@
     return debugctlmsr;
 }
 
+static inline unsigned long get_debugctlmsr_on_cpu(int cpu)
+{
+	u64 debugctlmsr = 0;
+	u32 val1, val2;
+
+#ifndef CONFIG_X86_DEBUGCTLMSR
+	if (boot_cpu_data.x86 < 6)
+		return 0;
+#endif
+	rdmsr_on_cpu(cpu, MSR_IA32_DEBUGCTLMSR, &val1, &val2);
+	debugctlmsr = val1 | ((u64)val2 << 32);
+
+	return debugctlmsr;
+}
+
 static inline void update_debugctlmsr(unsigned long debugctlmsr)
 {
 #ifndef CONFIG_X86_DEBUGCTLMSR
@@ -806,6 +815,18 @@
 	wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr);
 }
 
+static inline void update_debugctlmsr_on_cpu(int cpu,
+					     unsigned long debugctlmsr)
+{
+#ifndef CONFIG_X86_DEBUGCTLMSR
+	if (boot_cpu_data.x86 < 6)
+		return;
+#endif
+	wrmsr_on_cpu(cpu, MSR_IA32_DEBUGCTLMSR,
+		     (u32)((u64)debugctlmsr),
+		     (u32)((u64)debugctlmsr >> 32));
+}
+
 /*
  * from system description table in BIOS. Mostly for MCA use, but
  * others may find it useful:
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 624f133..0f0d908 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -236,12 +236,11 @@
 extern int do_set_thread_area(struct task_struct *p, int idx,
 			      struct user_desc __user *info, int can_allocate);
 
-extern void x86_ptrace_untrace(struct task_struct *);
-extern void x86_ptrace_fork(struct task_struct *child,
-			    unsigned long clone_flags);
+#ifdef CONFIG_X86_PTRACE_BTS
+extern void ptrace_bts_untrace(struct task_struct *tsk);
 
-#define arch_ptrace_untrace(tsk) x86_ptrace_untrace(tsk)
-#define arch_ptrace_fork(child, flags) x86_ptrace_fork(child, flags)
+#define arch_ptrace_untrace(tsk)	ptrace_bts_untrace(tsk)
+#endif /* CONFIG_X86_PTRACE_BTS */
 
 #endif /* __KERNEL__ */
 
diff --git a/arch/x86/include/asm/termios.h b/arch/x86/include/asm/termios.h
index f729563..c4ee805 100644
--- a/arch/x86/include/asm/termios.h
+++ b/arch/x86/include/asm/termios.h
@@ -67,6 +67,7 @@
 	SET_LOW_TERMIOS_BITS(termios, termio, c_oflag);
 	SET_LOW_TERMIOS_BITS(termios, termio, c_cflag);
 	SET_LOW_TERMIOS_BITS(termios, termio, c_lflag);
+	get_user(termios->c_line, &termio->c_line);
 	return copy_from_user(termios->c_cc, termio->c_cc, NCC);
 }
 
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 16a5c84..a5ecc9c 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -17,7 +17,7 @@
 
 static inline void __native_flush_tlb(void)
 {
-	write_cr3(read_cr3());
+	native_write_cr3(native_read_cr3());
 }
 
 static inline void __native_flush_tlb_global(void)
@@ -32,11 +32,11 @@
 	 */
 	raw_local_irq_save(flags);
 
-	cr4 = read_cr4();
+	cr4 = native_read_cr4();
 	/* clear PGE */
-	write_cr4(cr4 & ~X86_CR4_PGE);
+	native_write_cr4(cr4 & ~X86_CR4_PGE);
 	/* write old PGE again and flush TLBs */
-	write_cr4(cr4);
+	native_write_cr4(cr4);
 
 	raw_local_irq_restore(flags);
 }
diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h
index 6e72d74..708dae61 100644
--- a/arch/x86/include/asm/unistd_32.h
+++ b/arch/x86/include/asm/unistd_32.h
@@ -340,6 +340,7 @@
 #define __NR_inotify_init1	332
 #define __NR_preadv		333
 #define __NR_pwritev		334
+#define __NR_rt_tgsigqueueinfo	335
 
 #ifdef __KERNEL__
 
diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h
index f818294..4e2b054 100644
--- a/arch/x86/include/asm/unistd_64.h
+++ b/arch/x86/include/asm/unistd_64.h
@@ -657,6 +657,8 @@
 __SYSCALL(__NR_preadv, sys_preadv)
 #define __NR_pwritev				296
 __SYSCALL(__NR_pwritev, sys_pwritev)
+#define __NR_rt_tgsigqueueinfo			297
+__SYSCALL(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo)
 
 
 #ifndef __NO_STUBS
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 235f592..4f78bd6 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -44,6 +44,7 @@
 obj-y				+= i387.o xsave.o
 obj-y				+= ptrace.o
 obj-$(CONFIG_X86_DS)		+= ds.o
+obj-$(CONFIG_X86_DS_SELFTEST)		+= ds_selftest.o
 obj-$(CONFIG_X86_32)		+= tls.o
 obj-$(CONFIG_IA32_EMULATION)	+= tls.o
 obj-y				+= step.o
diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c
index 87b67e3..48bfe13 100644
--- a/arch/x86/kernel/ds.c
+++ b/arch/x86/kernel/ds.c
@@ -19,45 +19,61 @@
  * Markus Metzger <markus.t.metzger@intel.com>, 2007-2009
  */
 
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/trace_clock.h>
 
 #include <asm/ds.h>
 
-#include <linux/errno.h>
-#include <linux/string.h>
-#include <linux/slab.h>
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/kernel.h>
-
+#include "ds_selftest.h"
 
 /*
- * The configuration for a particular DS hardware implementation.
+ * The configuration for a particular DS hardware implementation:
  */
 struct ds_configuration {
-	/* the name of the configuration */
-	const char *name;
-	/* the size of one pointer-typed field in the DS structure and
-	   in the BTS and PEBS buffers in bytes;
-	   this covers the first 8 DS fields related to buffer management. */
-	unsigned char  sizeof_field;
-	/* the size of a BTS/PEBS record in bytes */
-	unsigned char  sizeof_rec[2];
-	/* a series of bit-masks to control various features indexed
-	 * by enum ds_feature */
-	unsigned long ctl[dsf_ctl_max];
+	/* The name of the configuration: */
+	const char		*name;
+
+	/* The size of pointer-typed fields in DS, BTS, and PEBS: */
+	unsigned char		sizeof_ptr_field;
+
+	/* The size of a BTS/PEBS record in bytes: */
+	unsigned char		sizeof_rec[2];
+
+	/* The number of pebs counter reset values in the DS structure. */
+	unsigned char		nr_counter_reset;
+
+	/* Control bit-masks indexed by enum ds_feature: */
+	unsigned long		ctl[dsf_ctl_max];
 };
-static DEFINE_PER_CPU(struct ds_configuration, ds_cfg_array);
+static struct ds_configuration ds_cfg __read_mostly;
 
-#define ds_cfg per_cpu(ds_cfg_array, smp_processor_id())
 
-#define MAX_SIZEOF_DS (12 * 8)	/* maximal size of a DS configuration */
-#define MAX_SIZEOF_BTS (3 * 8)	/* maximal size of a BTS record */
-#define DS_ALIGNMENT (1 << 3)	/* BTS and PEBS buffer alignment */
+/* Maximal size of a DS configuration: */
+#define MAX_SIZEOF_DS		0x80
 
-#define BTS_CONTROL \
- (ds_cfg.ctl[dsf_bts] | ds_cfg.ctl[dsf_bts_kernel] | ds_cfg.ctl[dsf_bts_user] |\
-  ds_cfg.ctl[dsf_bts_overflow])
+/* Maximal size of a BTS record: */
+#define MAX_SIZEOF_BTS		(3 * 8)
 
+/* BTS and PEBS buffer alignment: */
+#define DS_ALIGNMENT		(1 << 3)
+
+/* Number of buffer pointers in DS: */
+#define NUM_DS_PTR_FIELDS	8
+
+/* Size of a pebs reset value in DS: */
+#define PEBS_RESET_FIELD_SIZE	8
+
+/* Mask of control bits in the DS MSR register: */
+#define BTS_CONTROL				  \
+	( ds_cfg.ctl[dsf_bts]			| \
+	  ds_cfg.ctl[dsf_bts_kernel]		| \
+	  ds_cfg.ctl[dsf_bts_user]		| \
+	  ds_cfg.ctl[dsf_bts_overflow] )
 
 /*
  * A BTS or PEBS tracer.
@@ -66,29 +82,36 @@
  * to identify tracers.
  */
 struct ds_tracer {
-	/* the DS context (partially) owned by this tracer */
-	struct ds_context *context;
-	/* the buffer provided on ds_request() and its size in bytes */
-	void *buffer;
-	size_t size;
+	/* The DS context (partially) owned by this tracer. */
+	struct ds_context	*context;
+	/* The buffer provided on ds_request() and its size in bytes. */
+	void			*buffer;
+	size_t			size;
 };
 
 struct bts_tracer {
-	/* the common DS part */
-	struct ds_tracer ds;
-	/* the trace including the DS configuration */
-	struct bts_trace trace;
-	/* buffer overflow notification function */
-	bts_ovfl_callback_t ovfl;
+	/* The common DS part: */
+	struct ds_tracer	ds;
+
+	/* The trace including the DS configuration: */
+	struct bts_trace	trace;
+
+	/* Buffer overflow notification function: */
+	bts_ovfl_callback_t	ovfl;
+
+	/* Active flags affecting trace collection. */
+	unsigned int		flags;
 };
 
 struct pebs_tracer {
-	/* the common DS part */
-	struct ds_tracer ds;
-	/* the trace including the DS configuration */
-	struct pebs_trace trace;
-	/* buffer overflow notification function */
-	pebs_ovfl_callback_t ovfl;
+	/* The common DS part: */
+	struct ds_tracer	ds;
+
+	/* The trace including the DS configuration: */
+	struct pebs_trace	trace;
+
+	/* Buffer overflow notification function: */
+	pebs_ovfl_callback_t	ovfl;
 };
 
 /*
@@ -97,6 +120,7 @@
  *
  * The DS configuration consists of the following fields; different
  * architetures vary in the size of those fields.
+ *
  * - double-word aligned base linear address of the BTS buffer
  * - write pointer into the BTS buffer
  * - end linear address of the BTS buffer (one byte beyond the end of
@@ -135,21 +159,22 @@
 };
 
 enum ds_qualifier {
-	ds_bts  = 0,
+	ds_bts = 0,
 	ds_pebs
 };
 
-static inline unsigned long ds_get(const unsigned char *base,
-				   enum ds_qualifier qual, enum ds_field field)
+static inline unsigned long
+ds_get(const unsigned char *base, enum ds_qualifier qual, enum ds_field field)
 {
-	base += (ds_cfg.sizeof_field * (field + (4 * qual)));
+	base += (ds_cfg.sizeof_ptr_field * (field + (4 * qual)));
 	return *(unsigned long *)base;
 }
 
-static inline void ds_set(unsigned char *base, enum ds_qualifier qual,
-			  enum ds_field field, unsigned long value)
+static inline void
+ds_set(unsigned char *base, enum ds_qualifier qual, enum ds_field field,
+       unsigned long value)
 {
-	base += (ds_cfg.sizeof_field * (field + (4 * qual)));
+	base += (ds_cfg.sizeof_ptr_field * (field + (4 * qual)));
 	(*(unsigned long *)base) = value;
 }
 
@@ -159,7 +184,6 @@
  */
 static DEFINE_SPINLOCK(ds_lock);
 
-
 /*
  * We either support (system-wide) per-cpu or per-thread allocation.
  * We distinguish the two based on the task_struct pointer, where a
@@ -178,12 +202,28 @@
  */
 static atomic_t tracers = ATOMIC_INIT(0);
 
-static inline void get_tracer(struct task_struct *task)
+static inline int get_tracer(struct task_struct *task)
 {
-	if (task)
+	int error;
+
+	spin_lock_irq(&ds_lock);
+
+	if (task) {
+		error = -EPERM;
+		if (atomic_read(&tracers) < 0)
+			goto out;
 		atomic_inc(&tracers);
-	else
+	} else {
+		error = -EPERM;
+		if (atomic_read(&tracers) > 0)
+			goto out;
 		atomic_dec(&tracers);
+	}
+
+	error = 0;
+out:
+	spin_unlock_irq(&ds_lock);
+	return error;
 }
 
 static inline void put_tracer(struct task_struct *task)
@@ -194,14 +234,6 @@
 		atomic_inc(&tracers);
 }
 
-static inline int check_tracer(struct task_struct *task)
-{
-	return task ?
-		(atomic_read(&tracers) >= 0) :
-		(atomic_read(&tracers) <= 0);
-}
-
-
 /*
  * The DS context is either attached to a thread or to a cpu:
  * - in the former case, the thread_struct contains a pointer to the
@@ -213,61 +245,58 @@
  * deallocated when the last user puts the context.
  */
 struct ds_context {
-	/* pointer to the DS configuration; goes into MSR_IA32_DS_AREA */
-	unsigned char ds[MAX_SIZEOF_DS];
-	/* the owner of the BTS and PEBS configuration, respectively */
-	struct bts_tracer *bts_master;
-	struct pebs_tracer *pebs_master;
-	/* use count */
-	unsigned long count;
-	/* a pointer to the context location inside the thread_struct
-	 * or the per_cpu context array */
-	struct ds_context **this;
-	/* a pointer to the task owning this context, or NULL, if the
-	 * context is owned by a cpu */
-	struct task_struct *task;
+	/* The DS configuration; goes into MSR_IA32_DS_AREA: */
+	unsigned char		ds[MAX_SIZEOF_DS];
+
+	/* The owner of the BTS and PEBS configuration, respectively: */
+	struct bts_tracer	*bts_master;
+	struct pebs_tracer	*pebs_master;
+
+	/* Use count: */
+	unsigned long		count;
+
+	/* Pointer to the context pointer field: */
+	struct ds_context	**this;
+
+	/* The traced task; NULL for cpu tracing: */
+	struct task_struct	*task;
+
+	/* The traced cpu; only valid if task is NULL: */
+	int			cpu;
 };
 
-static DEFINE_PER_CPU(struct ds_context *, system_context_array);
-
-#define system_context per_cpu(system_context_array, smp_processor_id())
+static DEFINE_PER_CPU(struct ds_context *, cpu_context);
 
 
-static inline struct ds_context *ds_get_context(struct task_struct *task)
+static struct ds_context *ds_get_context(struct task_struct *task, int cpu)
 {
 	struct ds_context **p_context =
-		(task ? &task->thread.ds_ctx : &system_context);
+		(task ? &task->thread.ds_ctx : &per_cpu(cpu_context, cpu));
 	struct ds_context *context = NULL;
 	struct ds_context *new_context = NULL;
-	unsigned long irq;
 
 	/* Chances are small that we already have a context. */
 	new_context = kzalloc(sizeof(*new_context), GFP_KERNEL);
 	if (!new_context)
 		return NULL;
 
-	spin_lock_irqsave(&ds_lock, irq);
+	spin_lock_irq(&ds_lock);
 
 	context = *p_context;
-	if (!context) {
+	if (likely(!context)) {
 		context = new_context;
 
 		context->this = p_context;
 		context->task = task;
+		context->cpu = cpu;
 		context->count = 0;
 
-		if (task)
-			set_tsk_thread_flag(task, TIF_DS_AREA_MSR);
-
-		if (!task || (task == current))
-			wrmsrl(MSR_IA32_DS_AREA, (unsigned long)context->ds);
-
 		*p_context = context;
 	}
 
 	context->count++;
 
-	spin_unlock_irqrestore(&ds_lock, irq);
+	spin_unlock_irq(&ds_lock);
 
 	if (context != new_context)
 		kfree(new_context);
@@ -275,8 +304,9 @@
 	return context;
 }
 
-static inline void ds_put_context(struct ds_context *context)
+static void ds_put_context(struct ds_context *context)
 {
+	struct task_struct *task;
 	unsigned long irq;
 
 	if (!context)
@@ -291,17 +321,55 @@
 
 	*(context->this) = NULL;
 
-	if (context->task)
-		clear_tsk_thread_flag(context->task, TIF_DS_AREA_MSR);
+	task = context->task;
 
-	if (!context->task || (context->task == current))
-		wrmsrl(MSR_IA32_DS_AREA, 0);
+	if (task)
+		clear_tsk_thread_flag(task, TIF_DS_AREA_MSR);
+
+	/*
+	 * We leave the (now dangling) pointer to the DS configuration in
+	 * the DS_AREA msr. This is as good or as bad as replacing it with
+	 * NULL - the hardware would crash if we enabled tracing.
+	 *
+	 * This saves us some problems with having to write an msr on a
+	 * different cpu while preventing others from doing the same for the
+	 * next context for that same cpu.
+	 */
 
 	spin_unlock_irqrestore(&ds_lock, irq);
 
+	/* The context might still be in use for context switching. */
+	if (task && (task != current))
+		wait_task_context_switch(task);
+
 	kfree(context);
 }
 
+static void ds_install_ds_area(struct ds_context *context)
+{
+	unsigned long ds;
+
+	ds = (unsigned long)context->ds;
+
+	/*
+	 * There is a race between the bts master and the pebs master.
+	 *
+	 * The thread/cpu access is synchronized via get/put_cpu() for
+	 * task tracing and via wrmsr_on_cpu for cpu tracing.
+	 *
+	 * If bts and pebs are collected for the same task or same cpu,
+	 * the same confiuration is written twice.
+	 */
+	if (context->task) {
+		get_cpu();
+		if (context->task == current)
+			wrmsrl(MSR_IA32_DS_AREA, ds);
+		set_tsk_thread_flag(context->task, TIF_DS_AREA_MSR);
+		put_cpu();
+	} else
+		wrmsr_on_cpu(context->cpu, MSR_IA32_DS_AREA,
+			     (u32)((u64)ds), (u32)((u64)ds >> 32));
+}
 
 /*
  * Call the tracer's callback on a buffer overflow.
@@ -332,9 +400,9 @@
  * The remainder of any partially written record is zeroed out.
  *
  * context: the DS context
- * qual: the buffer type
- * record: the data to write
- * size: the size of the data
+ * qual:    the buffer type
+ * record:  the data to write
+ * size:    the size of the data
  */
 static int ds_write(struct ds_context *context, enum ds_qualifier qual,
 		    const void *record, size_t size)
@@ -349,14 +417,14 @@
 		unsigned long write_size, adj_write_size;
 
 		/*
-		 * write as much as possible without producing an
+		 * Write as much as possible without producing an
 		 * overflow interrupt.
 		 *
-		 * interrupt_threshold must either be
+		 * Interrupt_threshold must either be
 		 * - bigger than absolute_maximum or
 		 * - point to a record between buffer_base and absolute_maximum
 		 *
-		 * index points to a valid record.
+		 * Index points to a valid record.
 		 */
 		base   = ds_get(context->ds, qual, ds_buffer_base);
 		index  = ds_get(context->ds, qual, ds_index);
@@ -365,8 +433,10 @@
 
 		write_end = min(end, int_th);
 
-		/* if we are already beyond the interrupt threshold,
-		 * we fill the entire buffer */
+		/*
+		 * If we are already beyond the interrupt threshold,
+		 * we fill the entire buffer.
+		 */
 		if (write_end <= index)
 			write_end = end;
 
@@ -383,7 +453,7 @@
 		adj_write_size = write_size / ds_cfg.sizeof_rec[qual];
 		adj_write_size *= ds_cfg.sizeof_rec[qual];
 
-		/* zero out trailing bytes */
+		/* Zero out trailing bytes. */
 		memset((char *)index + write_size, 0,
 		       adj_write_size - write_size);
 		index += adj_write_size;
@@ -410,7 +480,7 @@
  * Later architectures use 64bit pointers throughout, whereas earlier
  * architectures use 32bit pointers in 32bit mode.
  *
- * We compute the base address for the first 8 fields based on:
+ * We compute the base address for the fields based on:
  * - the field size stored in the DS configuration
  * - the relative field position
  *
@@ -431,23 +501,23 @@
 	bts_to,
 	bts_flags,
 
-	bts_qual = bts_from,
-	bts_jiffies = bts_to,
-	bts_pid = bts_flags,
+	bts_qual		= bts_from,
+	bts_clock		= bts_to,
+	bts_pid			= bts_flags,
 
-	bts_qual_mask = (bts_qual_max - 1),
-	bts_escape = ((unsigned long)-1 & ~bts_qual_mask)
+	bts_qual_mask		= (bts_qual_max - 1),
+	bts_escape		= ((unsigned long)-1 & ~bts_qual_mask)
 };
 
 static inline unsigned long bts_get(const char *base, enum bts_field field)
 {
-	base += (ds_cfg.sizeof_field * field);
+	base += (ds_cfg.sizeof_ptr_field * field);
 	return *(unsigned long *)base;
 }
 
 static inline void bts_set(char *base, enum bts_field field, unsigned long val)
 {
-	base += (ds_cfg.sizeof_field * field);;
+	base += (ds_cfg.sizeof_ptr_field * field);;
 	(*(unsigned long *)base) = val;
 }
 
@@ -463,8 +533,8 @@
  *
  * return: bytes read/written on success; -Eerrno, otherwise
  */
-static int bts_read(struct bts_tracer *tracer, const void *at,
-		    struct bts_struct *out)
+static int
+bts_read(struct bts_tracer *tracer, const void *at, struct bts_struct *out)
 {
 	if (!tracer)
 		return -EINVAL;
@@ -478,8 +548,8 @@
 	memset(out, 0, sizeof(*out));
 	if ((bts_get(at, bts_qual) & ~bts_qual_mask) == bts_escape) {
 		out->qualifier = (bts_get(at, bts_qual) & bts_qual_mask);
-		out->variant.timestamp.jiffies = bts_get(at, bts_jiffies);
-		out->variant.timestamp.pid = bts_get(at, bts_pid);
+		out->variant.event.clock = bts_get(at, bts_clock);
+		out->variant.event.pid = bts_get(at, bts_pid);
 	} else {
 		out->qualifier = bts_branch;
 		out->variant.lbr.from = bts_get(at, bts_from);
@@ -516,8 +586,8 @@
 	case bts_task_arrives:
 	case bts_task_departs:
 		bts_set(raw, bts_qual, (bts_escape | in->qualifier));
-		bts_set(raw, bts_jiffies, in->variant.timestamp.jiffies);
-		bts_set(raw, bts_pid, in->variant.timestamp.pid);
+		bts_set(raw, bts_clock, in->variant.event.clock);
+		bts_set(raw, bts_pid, in->variant.event.pid);
 		break;
 	default:
 		return -EINVAL;
@@ -555,7 +625,8 @@
 			     unsigned int flags) {
 	unsigned long buffer, adj;
 
-	/* adjust the buffer address and size to meet alignment
+	/*
+	 * Adjust the buffer address and size to meet alignment
 	 * constraints:
 	 * - buffer is double-word aligned
 	 * - size is multiple of record size
@@ -577,9 +648,11 @@
 	trace->begin = (void *)buffer;
 	trace->top = trace->begin;
 	trace->end = (void *)(buffer + size);
-	/* The value for 'no threshold' is -1, which will set the
+	/*
+	 * The value for 'no threshold' is -1, which will set the
 	 * threshold outside of the buffer, just like we want it.
 	 */
+	ith *= ds_cfg.sizeof_rec[qual];
 	trace->ith = (void *)(buffer + size - ith);
 
 	trace->flags = flags;
@@ -588,18 +661,27 @@
 
 static int ds_request(struct ds_tracer *tracer, struct ds_trace *trace,
 		      enum ds_qualifier qual, struct task_struct *task,
-		      void *base, size_t size, size_t th, unsigned int flags)
+		      int cpu, void *base, size_t size, size_t th)
 {
 	struct ds_context *context;
 	int error;
+	size_t req_size;
+
+	error = -EOPNOTSUPP;
+	if (!ds_cfg.sizeof_rec[qual])
+		goto out;
 
 	error = -EINVAL;
 	if (!base)
 		goto out;
 
-	/* we require some space to do alignment adjustments below */
+	req_size = ds_cfg.sizeof_rec[qual];
+	/* We might need space for alignment adjustments. */
+	if (!IS_ALIGNED((unsigned long)base, DS_ALIGNMENT))
+		req_size += DS_ALIGNMENT;
+
 	error = -EINVAL;
-	if (size < (DS_ALIGNMENT + ds_cfg.sizeof_rec[qual]))
+	if (size < req_size)
 		goto out;
 
 	if (th != (size_t)-1) {
@@ -614,182 +696,318 @@
 	tracer->size = size;
 
 	error = -ENOMEM;
-	context = ds_get_context(task);
+	context = ds_get_context(task, cpu);
 	if (!context)
 		goto out;
 	tracer->context = context;
 
-	ds_init_ds_trace(trace, qual, base, size, th, flags);
+	/*
+	 * Defer any tracer-specific initialization work for the context until
+	 * context ownership has been clarified.
+	 */
 
 	error = 0;
  out:
 	return error;
 }
 
-struct bts_tracer *ds_request_bts(struct task_struct *task,
-				  void *base, size_t size,
-				  bts_ovfl_callback_t ovfl, size_t th,
-				  unsigned int flags)
+static struct bts_tracer *ds_request_bts(struct task_struct *task, int cpu,
+					 void *base, size_t size,
+					 bts_ovfl_callback_t ovfl, size_t th,
+					 unsigned int flags)
 {
 	struct bts_tracer *tracer;
-	unsigned long irq;
 	int error;
 
-	error = -EOPNOTSUPP;
-	if (!ds_cfg.ctl[dsf_bts])
-		goto out;
-
-	/* buffer overflow notification is not yet implemented */
+	/* Buffer overflow notification is not yet implemented. */
 	error = -EOPNOTSUPP;
 	if (ovfl)
 		goto out;
 
+	error = get_tracer(task);
+	if (error < 0)
+		goto out;
+
 	error = -ENOMEM;
 	tracer = kzalloc(sizeof(*tracer), GFP_KERNEL);
 	if (!tracer)
-		goto out;
+		goto out_put_tracer;
 	tracer->ovfl = ovfl;
 
+	/* Do some more error checking and acquire a tracing context. */
 	error = ds_request(&tracer->ds, &tracer->trace.ds,
-			   ds_bts, task, base, size, th, flags);
+			   ds_bts, task, cpu, base, size, th);
 	if (error < 0)
 		goto out_tracer;
 
-
-	spin_lock_irqsave(&ds_lock, irq);
-
-	error = -EPERM;
-	if (!check_tracer(task))
-		goto out_unlock;
-	get_tracer(task);
+	/* Claim the bts part of the tracing context we acquired above. */
+	spin_lock_irq(&ds_lock);
 
 	error = -EPERM;
 	if (tracer->ds.context->bts_master)
-		goto out_put_tracer;
+		goto out_unlock;
 	tracer->ds.context->bts_master = tracer;
 
-	spin_unlock_irqrestore(&ds_lock, irq);
+	spin_unlock_irq(&ds_lock);
 
+	/*
+	 * Now that we own the bts part of the context, let's complete the
+	 * initialization for that part.
+	 */
+	ds_init_ds_trace(&tracer->trace.ds, ds_bts, base, size, th, flags);
+	ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_bts);
+	ds_install_ds_area(tracer->ds.context);
 
 	tracer->trace.read  = bts_read;
 	tracer->trace.write = bts_write;
 
-	ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_bts);
+	/* Start tracing. */
 	ds_resume_bts(tracer);
 
 	return tracer;
 
- out_put_tracer:
-	put_tracer(task);
  out_unlock:
-	spin_unlock_irqrestore(&ds_lock, irq);
+	spin_unlock_irq(&ds_lock);
 	ds_put_context(tracer->ds.context);
  out_tracer:
 	kfree(tracer);
+ out_put_tracer:
+	put_tracer(task);
  out:
 	return ERR_PTR(error);
 }
 
-struct pebs_tracer *ds_request_pebs(struct task_struct *task,
-				    void *base, size_t size,
-				    pebs_ovfl_callback_t ovfl, size_t th,
-				    unsigned int flags)
+struct bts_tracer *ds_request_bts_task(struct task_struct *task,
+				       void *base, size_t size,
+				       bts_ovfl_callback_t ovfl,
+				       size_t th, unsigned int flags)
+{
+	return ds_request_bts(task, 0, base, size, ovfl, th, flags);
+}
+
+struct bts_tracer *ds_request_bts_cpu(int cpu, void *base, size_t size,
+				      bts_ovfl_callback_t ovfl,
+				      size_t th, unsigned int flags)
+{
+	return ds_request_bts(NULL, cpu, base, size, ovfl, th, flags);
+}
+
+static struct pebs_tracer *ds_request_pebs(struct task_struct *task, int cpu,
+					   void *base, size_t size,
+					   pebs_ovfl_callback_t ovfl, size_t th,
+					   unsigned int flags)
 {
 	struct pebs_tracer *tracer;
-	unsigned long irq;
 	int error;
 
-	/* buffer overflow notification is not yet implemented */
+	/* Buffer overflow notification is not yet implemented. */
 	error = -EOPNOTSUPP;
 	if (ovfl)
 		goto out;
 
+	error = get_tracer(task);
+	if (error < 0)
+		goto out;
+
 	error = -ENOMEM;
 	tracer = kzalloc(sizeof(*tracer), GFP_KERNEL);
 	if (!tracer)
-		goto out;
+		goto out_put_tracer;
 	tracer->ovfl = ovfl;
 
+	/* Do some more error checking and acquire a tracing context. */
 	error = ds_request(&tracer->ds, &tracer->trace.ds,
-			   ds_pebs, task, base, size, th, flags);
+			   ds_pebs, task, cpu, base, size, th);
 	if (error < 0)
 		goto out_tracer;
 
-	spin_lock_irqsave(&ds_lock, irq);
-
-	error = -EPERM;
-	if (!check_tracer(task))
-		goto out_unlock;
-	get_tracer(task);
+	/* Claim the pebs part of the tracing context we acquired above. */
+	spin_lock_irq(&ds_lock);
 
 	error = -EPERM;
 	if (tracer->ds.context->pebs_master)
-		goto out_put_tracer;
+		goto out_unlock;
 	tracer->ds.context->pebs_master = tracer;
 
-	spin_unlock_irqrestore(&ds_lock, irq);
+	spin_unlock_irq(&ds_lock);
 
+	/*
+	 * Now that we own the pebs part of the context, let's complete the
+	 * initialization for that part.
+	 */
+	ds_init_ds_trace(&tracer->trace.ds, ds_pebs, base, size, th, flags);
 	ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_pebs);
+	ds_install_ds_area(tracer->ds.context);
+
+	/* Start tracing. */
 	ds_resume_pebs(tracer);
 
 	return tracer;
 
- out_put_tracer:
-	put_tracer(task);
  out_unlock:
-	spin_unlock_irqrestore(&ds_lock, irq);
+	spin_unlock_irq(&ds_lock);
 	ds_put_context(tracer->ds.context);
  out_tracer:
 	kfree(tracer);
+ out_put_tracer:
+	put_tracer(task);
  out:
 	return ERR_PTR(error);
 }
 
-void ds_release_bts(struct bts_tracer *tracer)
+struct pebs_tracer *ds_request_pebs_task(struct task_struct *task,
+					 void *base, size_t size,
+					 pebs_ovfl_callback_t ovfl,
+					 size_t th, unsigned int flags)
 {
-	if (!tracer)
-		return;
+	return ds_request_pebs(task, 0, base, size, ovfl, th, flags);
+}
 
-	ds_suspend_bts(tracer);
+struct pebs_tracer *ds_request_pebs_cpu(int cpu, void *base, size_t size,
+					pebs_ovfl_callback_t ovfl,
+					size_t th, unsigned int flags)
+{
+	return ds_request_pebs(NULL, cpu, base, size, ovfl, th, flags);
+}
+
+static void ds_free_bts(struct bts_tracer *tracer)
+{
+	struct task_struct *task;
+
+	task = tracer->ds.context->task;
 
 	WARN_ON_ONCE(tracer->ds.context->bts_master != tracer);
 	tracer->ds.context->bts_master = NULL;
 
-	put_tracer(tracer->ds.context->task);
+	/* Make sure tracing stopped and the tracer is not in use. */
+	if (task && (task != current))
+		wait_task_context_switch(task);
+
 	ds_put_context(tracer->ds.context);
+	put_tracer(task);
 
 	kfree(tracer);
 }
 
+void ds_release_bts(struct bts_tracer *tracer)
+{
+	might_sleep();
+
+	if (!tracer)
+		return;
+
+	ds_suspend_bts(tracer);
+	ds_free_bts(tracer);
+}
+
+int ds_release_bts_noirq(struct bts_tracer *tracer)
+{
+	struct task_struct *task;
+	unsigned long irq;
+	int error;
+
+	if (!tracer)
+		return 0;
+
+	task = tracer->ds.context->task;
+
+	local_irq_save(irq);
+
+	error = -EPERM;
+	if (!task &&
+	    (tracer->ds.context->cpu != smp_processor_id()))
+		goto out;
+
+	error = -EPERM;
+	if (task && (task != current))
+		goto out;
+
+	ds_suspend_bts_noirq(tracer);
+	ds_free_bts(tracer);
+
+	error = 0;
+ out:
+	local_irq_restore(irq);
+	return error;
+}
+
+static void update_task_debugctlmsr(struct task_struct *task,
+				    unsigned long debugctlmsr)
+{
+	task->thread.debugctlmsr = debugctlmsr;
+
+	get_cpu();
+	if (task == current)
+		update_debugctlmsr(debugctlmsr);
+	put_cpu();
+}
+
 void ds_suspend_bts(struct bts_tracer *tracer)
 {
 	struct task_struct *task;
+	unsigned long debugctlmsr;
+	int cpu;
 
 	if (!tracer)
 		return;
 
+	tracer->flags = 0;
+
 	task = tracer->ds.context->task;
+	cpu  = tracer->ds.context->cpu;
 
-	if (!task || (task == current))
-		update_debugctlmsr(get_debugctlmsr() & ~BTS_CONTROL);
+	WARN_ON(!task && irqs_disabled());
 
-	if (task) {
-		task->thread.debugctlmsr &= ~BTS_CONTROL;
+	debugctlmsr = (task ?
+		       task->thread.debugctlmsr :
+		       get_debugctlmsr_on_cpu(cpu));
+	debugctlmsr &= ~BTS_CONTROL;
 
-		if (!task->thread.debugctlmsr)
-			clear_tsk_thread_flag(task, TIF_DEBUGCTLMSR);
-	}
+	if (task)
+		update_task_debugctlmsr(task, debugctlmsr);
+	else
+		update_debugctlmsr_on_cpu(cpu, debugctlmsr);
 }
 
-void ds_resume_bts(struct bts_tracer *tracer)
+int ds_suspend_bts_noirq(struct bts_tracer *tracer)
 {
 	struct task_struct *task;
-	unsigned long control;
+	unsigned long debugctlmsr, irq;
+	int cpu, error = 0;
 
 	if (!tracer)
-		return;
+		return 0;
+
+	tracer->flags = 0;
 
 	task = tracer->ds.context->task;
+	cpu  = tracer->ds.context->cpu;
+
+	local_irq_save(irq);
+
+	error = -EPERM;
+	if (!task && (cpu != smp_processor_id()))
+		goto out;
+
+	debugctlmsr = (task ?
+		       task->thread.debugctlmsr :
+		       get_debugctlmsr());
+	debugctlmsr &= ~BTS_CONTROL;
+
+	if (task)
+		update_task_debugctlmsr(task, debugctlmsr);
+	else
+		update_debugctlmsr(debugctlmsr);
+
+	error = 0;
+ out:
+	local_irq_restore(irq);
+	return error;
+}
+
+static unsigned long ds_bts_control(struct bts_tracer *tracer)
+{
+	unsigned long control;
 
 	control = ds_cfg.ctl[dsf_bts];
 	if (!(tracer->trace.ds.flags & BTS_KERNEL))
@@ -797,41 +1015,149 @@
 	if (!(tracer->trace.ds.flags & BTS_USER))
 		control |= ds_cfg.ctl[dsf_bts_user];
 
-	if (task) {
-		task->thread.debugctlmsr |= control;
-		set_tsk_thread_flag(task, TIF_DEBUGCTLMSR);
-	}
-
-	if (!task || (task == current))
-		update_debugctlmsr(get_debugctlmsr() | control);
+	return control;
 }
 
-void ds_release_pebs(struct pebs_tracer *tracer)
+void ds_resume_bts(struct bts_tracer *tracer)
 {
+	struct task_struct *task;
+	unsigned long debugctlmsr;
+	int cpu;
+
 	if (!tracer)
 		return;
 
-	ds_suspend_pebs(tracer);
+	tracer->flags = tracer->trace.ds.flags;
+
+	task = tracer->ds.context->task;
+	cpu  = tracer->ds.context->cpu;
+
+	WARN_ON(!task && irqs_disabled());
+
+	debugctlmsr = (task ?
+		       task->thread.debugctlmsr :
+		       get_debugctlmsr_on_cpu(cpu));
+	debugctlmsr |= ds_bts_control(tracer);
+
+	if (task)
+		update_task_debugctlmsr(task, debugctlmsr);
+	else
+		update_debugctlmsr_on_cpu(cpu, debugctlmsr);
+}
+
+int ds_resume_bts_noirq(struct bts_tracer *tracer)
+{
+	struct task_struct *task;
+	unsigned long debugctlmsr, irq;
+	int cpu, error = 0;
+
+	if (!tracer)
+		return 0;
+
+	tracer->flags = tracer->trace.ds.flags;
+
+	task = tracer->ds.context->task;
+	cpu  = tracer->ds.context->cpu;
+
+	local_irq_save(irq);
+
+	error = -EPERM;
+	if (!task && (cpu != smp_processor_id()))
+		goto out;
+
+	debugctlmsr = (task ?
+		       task->thread.debugctlmsr :
+		       get_debugctlmsr());
+	debugctlmsr |= ds_bts_control(tracer);
+
+	if (task)
+		update_task_debugctlmsr(task, debugctlmsr);
+	else
+		update_debugctlmsr(debugctlmsr);
+
+	error = 0;
+ out:
+	local_irq_restore(irq);
+	return error;
+}
+
+static void ds_free_pebs(struct pebs_tracer *tracer)
+{
+	struct task_struct *task;
+
+	task = tracer->ds.context->task;
 
 	WARN_ON_ONCE(tracer->ds.context->pebs_master != tracer);
 	tracer->ds.context->pebs_master = NULL;
 
-	put_tracer(tracer->ds.context->task);
 	ds_put_context(tracer->ds.context);
+	put_tracer(task);
 
 	kfree(tracer);
 }
 
+void ds_release_pebs(struct pebs_tracer *tracer)
+{
+	might_sleep();
+
+	if (!tracer)
+		return;
+
+	ds_suspend_pebs(tracer);
+	ds_free_pebs(tracer);
+}
+
+int ds_release_pebs_noirq(struct pebs_tracer *tracer)
+{
+	struct task_struct *task;
+	unsigned long irq;
+	int error;
+
+	if (!tracer)
+		return 0;
+
+	task = tracer->ds.context->task;
+
+	local_irq_save(irq);
+
+	error = -EPERM;
+	if (!task &&
+	    (tracer->ds.context->cpu != smp_processor_id()))
+		goto out;
+
+	error = -EPERM;
+	if (task && (task != current))
+		goto out;
+
+	ds_suspend_pebs_noirq(tracer);
+	ds_free_pebs(tracer);
+
+	error = 0;
+ out:
+	local_irq_restore(irq);
+	return error;
+}
+
 void ds_suspend_pebs(struct pebs_tracer *tracer)
 {
 
 }
 
+int ds_suspend_pebs_noirq(struct pebs_tracer *tracer)
+{
+	return 0;
+}
+
 void ds_resume_pebs(struct pebs_tracer *tracer)
 {
 
 }
 
+int ds_resume_pebs_noirq(struct pebs_tracer *tracer)
+{
+	return 0;
+}
+
 const struct bts_trace *ds_read_bts(struct bts_tracer *tracer)
 {
 	if (!tracer)
@@ -847,8 +1173,12 @@
 		return NULL;
 
 	ds_read_config(tracer->ds.context, &tracer->trace.ds, ds_pebs);
-	tracer->trace.reset_value =
-		*(u64 *)(tracer->ds.context->ds + (ds_cfg.sizeof_field * 8));
+
+	tracer->trace.counters = ds_cfg.nr_counter_reset;
+	memcpy(tracer->trace.counter_reset,
+	       tracer->ds.context->ds +
+	       (NUM_DS_PTR_FIELDS * ds_cfg.sizeof_ptr_field),
+	       ds_cfg.nr_counter_reset * PEBS_RESET_FIELD_SIZE);
 
 	return &tracer->trace;
 }
@@ -873,18 +1203,24 @@
 
 	tracer->trace.ds.top = tracer->trace.ds.begin;
 
-	ds_set(tracer->ds.context->ds, ds_bts, ds_index,
+	ds_set(tracer->ds.context->ds, ds_pebs, ds_index,
 	       (unsigned long)tracer->trace.ds.top);
 
 	return 0;
 }
 
-int ds_set_pebs_reset(struct pebs_tracer *tracer, u64 value)
+int ds_set_pebs_reset(struct pebs_tracer *tracer,
+		      unsigned int counter, u64 value)
 {
 	if (!tracer)
 		return -EINVAL;
 
-	*(u64 *)(tracer->ds.context->ds + (ds_cfg.sizeof_field * 8)) = value;
+	if (ds_cfg.nr_counter_reset < counter)
+		return -EINVAL;
+
+	*(u64 *)(tracer->ds.context->ds +
+		 (NUM_DS_PTR_FIELDS * ds_cfg.sizeof_ptr_field) +
+		 (counter * PEBS_RESET_FIELD_SIZE)) = value;
 
 	return 0;
 }
@@ -894,73 +1230,117 @@
 	.ctl[dsf_bts]		= (1 << 2) | (1 << 3),
 	.ctl[dsf_bts_kernel]	= (1 << 5),
 	.ctl[dsf_bts_user]	= (1 << 6),
-
-	.sizeof_field		= sizeof(long),
-	.sizeof_rec[ds_bts]	= sizeof(long) * 3,
-#ifdef __i386__
-	.sizeof_rec[ds_pebs]	= sizeof(long) * 10,
-#else
-	.sizeof_rec[ds_pebs]	= sizeof(long) * 18,
-#endif
+	.nr_counter_reset	= 1,
 };
 static const struct ds_configuration ds_cfg_pentium_m = {
 	.name = "Pentium M",
 	.ctl[dsf_bts]		= (1 << 6) | (1 << 7),
-
-	.sizeof_field		= sizeof(long),
-	.sizeof_rec[ds_bts]	= sizeof(long) * 3,
-#ifdef __i386__
-	.sizeof_rec[ds_pebs]	= sizeof(long) * 10,
-#else
-	.sizeof_rec[ds_pebs]	= sizeof(long) * 18,
-#endif
+	.nr_counter_reset	= 1,
 };
 static const struct ds_configuration ds_cfg_core2_atom = {
 	.name = "Core 2/Atom",
 	.ctl[dsf_bts]		= (1 << 6) | (1 << 7),
 	.ctl[dsf_bts_kernel]	= (1 << 9),
 	.ctl[dsf_bts_user]	= (1 << 10),
-
-	.sizeof_field		= 8,
-	.sizeof_rec[ds_bts]	= 8 * 3,
-	.sizeof_rec[ds_pebs]	= 8 * 18,
+	.nr_counter_reset	= 1,
+};
+static const struct ds_configuration ds_cfg_core_i7 = {
+	.name = "Core i7",
+	.ctl[dsf_bts]		= (1 << 6) | (1 << 7),
+	.ctl[dsf_bts_kernel]	= (1 << 9),
+	.ctl[dsf_bts_user]	= (1 << 10),
+	.nr_counter_reset	= 4,
 };
 
 static void
-ds_configure(const struct ds_configuration *cfg)
+ds_configure(const struct ds_configuration *cfg,
+	     struct cpuinfo_x86 *cpu)
 {
+	unsigned long nr_pebs_fields = 0;
+
+	printk(KERN_INFO "[ds] using %s configuration\n", cfg->name);
+
+#ifdef __i386__
+	nr_pebs_fields = 10;
+#else
+	nr_pebs_fields = 18;
+#endif
+
+	/*
+	 * Starting with version 2, architectural performance
+	 * monitoring supports a format specifier.
+	 */
+	if ((cpuid_eax(0xa) & 0xff) > 1) {
+		unsigned long perf_capabilities, format;
+
+		rdmsrl(MSR_IA32_PERF_CAPABILITIES, perf_capabilities);
+
+		format = (perf_capabilities >> 8) & 0xf;
+
+		switch (format) {
+		case 0:
+			nr_pebs_fields = 18;
+			break;
+		case 1:
+			nr_pebs_fields = 22;
+			break;
+		default:
+			printk(KERN_INFO
+			       "[ds] unknown PEBS format: %lu\n", format);
+			nr_pebs_fields = 0;
+			break;
+		}
+	}
+
 	memset(&ds_cfg, 0, sizeof(ds_cfg));
 	ds_cfg = *cfg;
 
-	printk(KERN_INFO "[ds] using %s configuration\n", ds_cfg.name);
+	ds_cfg.sizeof_ptr_field =
+		(cpu_has(cpu, X86_FEATURE_DTES64) ? 8 : 4);
 
-	if (!cpu_has_bts) {
-		ds_cfg.ctl[dsf_bts] = 0;
+	ds_cfg.sizeof_rec[ds_bts]  = ds_cfg.sizeof_ptr_field * 3;
+	ds_cfg.sizeof_rec[ds_pebs] = ds_cfg.sizeof_ptr_field * nr_pebs_fields;
+
+	if (!cpu_has(cpu, X86_FEATURE_BTS)) {
+		ds_cfg.sizeof_rec[ds_bts] = 0;
 		printk(KERN_INFO "[ds] bts not available\n");
 	}
-	if (!cpu_has_pebs)
+	if (!cpu_has(cpu, X86_FEATURE_PEBS)) {
+		ds_cfg.sizeof_rec[ds_pebs] = 0;
 		printk(KERN_INFO "[ds] pebs not available\n");
+	}
 
-	WARN_ON_ONCE(MAX_SIZEOF_DS < (12 * ds_cfg.sizeof_field));
+	printk(KERN_INFO "[ds] sizes: address: %u bit, ",
+	       8 * ds_cfg.sizeof_ptr_field);
+	printk("bts/pebs record: %u/%u bytes\n",
+	       ds_cfg.sizeof_rec[ds_bts], ds_cfg.sizeof_rec[ds_pebs]);
+
+	WARN_ON_ONCE(MAX_PEBS_COUNTERS < ds_cfg.nr_counter_reset);
 }
 
 void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
 {
+	/* Only configure the first cpu. Others are identical. */
+	if (ds_cfg.name)
+		return;
+
 	switch (c->x86) {
 	case 0x6:
 		switch (c->x86_model) {
 		case 0x9:
 		case 0xd: /* Pentium M */
-			ds_configure(&ds_cfg_pentium_m);
+			ds_configure(&ds_cfg_pentium_m, c);
 			break;
 		case 0xf:
 		case 0x17: /* Core2 */
 		case 0x1c: /* Atom */
-			ds_configure(&ds_cfg_core2_atom);
+			ds_configure(&ds_cfg_core2_atom, c);
 			break;
-		case 0x1a: /* i7 */
+		case 0x1a: /* Core i7 */
+			ds_configure(&ds_cfg_core_i7, c);
+			break;
 		default:
-			/* sorry, don't know about them */
+			/* Sorry, don't know about them. */
 			break;
 		}
 		break;
@@ -969,64 +1349,89 @@
 		case 0x0:
 		case 0x1:
 		case 0x2: /* Netburst */
-			ds_configure(&ds_cfg_netburst);
+			ds_configure(&ds_cfg_netburst, c);
 			break;
 		default:
-			/* sorry, don't know about them */
+			/* Sorry, don't know about them. */
 			break;
 		}
 		break;
 	default:
-		/* sorry, don't know about them */
+		/* Sorry, don't know about them. */
 		break;
 	}
 }
 
+static inline void ds_take_timestamp(struct ds_context *context,
+				     enum bts_qualifier qualifier,
+				     struct task_struct *task)
+{
+	struct bts_tracer *tracer = context->bts_master;
+	struct bts_struct ts;
+
+	/* Prevent compilers from reading the tracer pointer twice. */
+	barrier();
+
+	if (!tracer || !(tracer->flags & BTS_TIMESTAMPS))
+		return;
+
+	memset(&ts, 0, sizeof(ts));
+	ts.qualifier		= qualifier;
+	ts.variant.event.clock	= trace_clock_global();
+	ts.variant.event.pid	= task->pid;
+
+	bts_write(tracer, &ts);
+}
+
 /*
  * Change the DS configuration from tracing prev to tracing next.
  */
 void ds_switch_to(struct task_struct *prev, struct task_struct *next)
 {
-	struct ds_context *prev_ctx = prev->thread.ds_ctx;
-	struct ds_context *next_ctx = next->thread.ds_ctx;
+	struct ds_context *prev_ctx	= prev->thread.ds_ctx;
+	struct ds_context *next_ctx	= next->thread.ds_ctx;
+	unsigned long debugctlmsr	= next->thread.debugctlmsr;
+
+	/* Make sure all data is read before we start. */
+	barrier();
 
 	if (prev_ctx) {
 		update_debugctlmsr(0);
 
-		if (prev_ctx->bts_master &&
-		    (prev_ctx->bts_master->trace.ds.flags & BTS_TIMESTAMPS)) {
-			struct bts_struct ts = {
-				.qualifier = bts_task_departs,
-				.variant.timestamp.jiffies = jiffies_64,
-				.variant.timestamp.pid = prev->pid
-			};
-			bts_write(prev_ctx->bts_master, &ts);
-		}
+		ds_take_timestamp(prev_ctx, bts_task_departs, prev);
 	}
 
 	if (next_ctx) {
-		if (next_ctx->bts_master &&
-		    (next_ctx->bts_master->trace.ds.flags & BTS_TIMESTAMPS)) {
-			struct bts_struct ts = {
-				.qualifier = bts_task_arrives,
-				.variant.timestamp.jiffies = jiffies_64,
-				.variant.timestamp.pid = next->pid
-			};
-			bts_write(next_ctx->bts_master, &ts);
-		}
+		ds_take_timestamp(next_ctx, bts_task_arrives, next);
 
 		wrmsrl(MSR_IA32_DS_AREA, (unsigned long)next_ctx->ds);
 	}
 
-	update_debugctlmsr(next->thread.debugctlmsr);
+	update_debugctlmsr(debugctlmsr);
 }
 
-void ds_copy_thread(struct task_struct *tsk, struct task_struct *father)
+static __init int ds_selftest(void)
 {
-	clear_tsk_thread_flag(tsk, TIF_DS_AREA_MSR);
-	tsk->thread.ds_ctx = NULL;
-}
+	if (ds_cfg.sizeof_rec[ds_bts]) {
+		int error;
 
-void ds_exit_thread(struct task_struct *tsk)
-{
+		error = ds_selftest_bts();
+		if (error) {
+			WARN(1, "[ds] selftest failed. disabling bts.\n");
+			ds_cfg.sizeof_rec[ds_bts] = 0;
+		}
+	}
+
+	if (ds_cfg.sizeof_rec[ds_pebs]) {
+		int error;
+
+		error = ds_selftest_pebs();
+		if (error) {
+			WARN(1, "[ds] selftest failed. disabling pebs.\n");
+			ds_cfg.sizeof_rec[ds_pebs] = 0;
+		}
+	}
+
+	return 0;
 }
+device_initcall(ds_selftest);
diff --git a/arch/x86/kernel/ds_selftest.c b/arch/x86/kernel/ds_selftest.c
new file mode 100644
index 0000000..6bc7c19
--- /dev/null
+++ b/arch/x86/kernel/ds_selftest.c
@@ -0,0 +1,408 @@
+/*
+ * Debug Store support - selftest
+ *
+ *
+ * Copyright (C) 2009 Intel Corporation.
+ * Markus Metzger <markus.t.metzger@intel.com>, 2009
+ */
+
+#include "ds_selftest.h"
+
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/smp.h>
+#include <linux/cpu.h>
+
+#include <asm/ds.h>
+
+
+#define BUFFER_SIZE		521	/* Intentionally chose an odd size. */
+#define SMALL_BUFFER_SIZE	24	/* A single bts entry. */
+
+struct ds_selftest_bts_conf {
+	struct bts_tracer *tracer;
+	int error;
+	int (*suspend)(struct bts_tracer *);
+	int (*resume)(struct bts_tracer *);
+};
+
+static int ds_selftest_bts_consistency(const struct bts_trace *trace)
+{
+	int error = 0;
+
+	if (!trace) {
+		printk(KERN_CONT "failed to access trace...");
+		/* Bail out. Other tests are pointless. */
+		return -1;
+	}
+
+	if (!trace->read) {
+		printk(KERN_CONT "bts read not available...");
+		error = -1;
+	}
+
+	/* Do some sanity checks on the trace configuration. */
+	if (!trace->ds.n) {
+		printk(KERN_CONT "empty bts buffer...");
+		error = -1;
+	}
+	if (!trace->ds.size) {
+		printk(KERN_CONT "bad bts trace setup...");
+		error = -1;
+	}
+	if (trace->ds.end !=
+	    (char *)trace->ds.begin + (trace->ds.n * trace->ds.size)) {
+		printk(KERN_CONT "bad bts buffer setup...");
+		error = -1;
+	}
+	/*
+	 * We allow top in [begin; end], since its not clear when the
+	 * overflow adjustment happens: after the increment or before the
+	 * write.
+	 */
+	if ((trace->ds.top < trace->ds.begin) ||
+	    (trace->ds.end < trace->ds.top)) {
+		printk(KERN_CONT "bts top out of bounds...");
+		error = -1;
+	}
+
+	return error;
+}
+
+static int ds_selftest_bts_read(struct bts_tracer *tracer,
+				const struct bts_trace *trace,
+				const void *from, const void *to)
+{
+	const unsigned char *at;
+
+	/*
+	 * Check a few things which do not belong to this test.
+	 * They should be covered by other tests.
+	 */
+	if (!trace)
+		return -1;
+
+	if (!trace->read)
+		return -1;
+
+	if (to < from)
+		return -1;
+
+	if (from < trace->ds.begin)
+		return -1;
+
+	if (trace->ds.end < to)
+		return -1;
+
+	if (!trace->ds.size)
+		return -1;
+
+	/* Now to the test itself. */
+	for (at = from; (void *)at < to; at += trace->ds.size) {
+		struct bts_struct bts;
+		unsigned long index;
+		int error;
+
+		if (((void *)at - trace->ds.begin) % trace->ds.size) {
+			printk(KERN_CONT
+			       "read from non-integer index...");
+			return -1;
+		}
+		index = ((void *)at - trace->ds.begin) / trace->ds.size;
+
+		memset(&bts, 0, sizeof(bts));
+		error = trace->read(tracer, at, &bts);
+		if (error < 0) {
+			printk(KERN_CONT
+			       "error reading bts trace at [%lu] (0x%p)...",
+			       index, at);
+			return error;
+		}
+
+		switch (bts.qualifier) {
+		case BTS_BRANCH:
+			break;
+		default:
+			printk(KERN_CONT
+			       "unexpected bts entry %llu at [%lu] (0x%p)...",
+			       bts.qualifier, index, at);
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+static void ds_selftest_bts_cpu(void *arg)
+{
+	struct ds_selftest_bts_conf *conf = arg;
+	const struct bts_trace *trace;
+	void *top;
+
+	if (IS_ERR(conf->tracer)) {
+		conf->error = PTR_ERR(conf->tracer);
+		conf->tracer = NULL;
+
+		printk(KERN_CONT
+		       "initialization failed (err: %d)...", conf->error);
+		return;
+	}
+
+	/* We should meanwhile have enough trace. */
+	conf->error = conf->suspend(conf->tracer);
+	if (conf->error < 0)
+		return;
+
+	/* Let's see if we can access the trace. */
+	trace = ds_read_bts(conf->tracer);
+
+	conf->error = ds_selftest_bts_consistency(trace);
+	if (conf->error < 0)
+		return;
+
+	/* If everything went well, we should have a few trace entries. */
+	if (trace->ds.top == trace->ds.begin) {
+		/*
+		 * It is possible but highly unlikely that we got a
+		 * buffer overflow and end up at exactly the same
+		 * position we started from.
+		 * Let's issue a warning, but continue.
+		 */
+		printk(KERN_CONT "no trace/overflow...");
+	}
+
+	/* Let's try to read the trace we collected. */
+	conf->error =
+		ds_selftest_bts_read(conf->tracer, trace,
+				     trace->ds.begin, trace->ds.top);
+	if (conf->error < 0)
+		return;
+
+	/*
+	 * Let's read the trace again.
+	 * Since we suspended tracing, we should get the same result.
+	 */
+	top = trace->ds.top;
+
+	trace = ds_read_bts(conf->tracer);
+	conf->error = ds_selftest_bts_consistency(trace);
+	if (conf->error < 0)
+		return;
+
+	if (top != trace->ds.top) {
+		printk(KERN_CONT "suspend not working...");
+		conf->error = -1;
+		return;
+	}
+
+	/* Let's collect some more trace - see if resume is working. */
+	conf->error = conf->resume(conf->tracer);
+	if (conf->error < 0)
+		return;
+
+	conf->error = conf->suspend(conf->tracer);
+	if (conf->error < 0)
+		return;
+
+	trace = ds_read_bts(conf->tracer);
+
+	conf->error = ds_selftest_bts_consistency(trace);
+	if (conf->error < 0)
+		return;
+
+	if (trace->ds.top == top) {
+		/*
+		 * It is possible but highly unlikely that we got a
+		 * buffer overflow and end up at exactly the same
+		 * position we started from.
+		 * Let's issue a warning and check the full trace.
+		 */
+		printk(KERN_CONT
+		       "no resume progress/overflow...");
+
+		conf->error =
+			ds_selftest_bts_read(conf->tracer, trace,
+					     trace->ds.begin, trace->ds.end);
+	} else if (trace->ds.top < top) {
+		/*
+		 * We had a buffer overflow - the entire buffer should
+		 * contain trace records.
+		 */
+		conf->error =
+			ds_selftest_bts_read(conf->tracer, trace,
+					     trace->ds.begin, trace->ds.end);
+	} else {
+		/*
+		 * It is quite likely that the buffer did not overflow.
+		 * Let's just check the delta trace.
+		 */
+		conf->error =
+			ds_selftest_bts_read(conf->tracer, trace, top,
+					     trace->ds.top);
+	}
+	if (conf->error < 0)
+		return;
+
+	conf->error = 0;
+}
+
+static int ds_suspend_bts_wrap(struct bts_tracer *tracer)
+{
+	ds_suspend_bts(tracer);
+	return 0;
+}
+
+static int ds_resume_bts_wrap(struct bts_tracer *tracer)
+{
+	ds_resume_bts(tracer);
+	return 0;
+}
+
+static void ds_release_bts_noirq_wrap(void *tracer)
+{
+	(void)ds_release_bts_noirq(tracer);
+}
+
+static int ds_selftest_bts_bad_release_noirq(int cpu,
+					     struct bts_tracer *tracer)
+{
+	int error = -EPERM;
+
+	/* Try to release the tracer on the wrong cpu. */
+	get_cpu();
+	if (cpu != smp_processor_id()) {
+		error = ds_release_bts_noirq(tracer);
+		if (error != -EPERM)
+			printk(KERN_CONT "release on wrong cpu...");
+	}
+	put_cpu();
+
+	return error ? 0 : -1;
+}
+
+static int ds_selftest_bts_bad_request_cpu(int cpu, void *buffer)
+{
+	struct bts_tracer *tracer;
+	int error;
+
+	/* Try to request cpu tracing while task tracing is active. */
+	tracer = ds_request_bts_cpu(cpu, buffer, BUFFER_SIZE, NULL,
+				    (size_t)-1, BTS_KERNEL);
+	error = PTR_ERR(tracer);
+	if (!IS_ERR(tracer)) {
+		ds_release_bts(tracer);
+		error = 0;
+	}
+
+	if (error != -EPERM)
+		printk(KERN_CONT "cpu/task tracing overlap...");
+
+	return error ? 0 : -1;
+}
+
+static int ds_selftest_bts_bad_request_task(void *buffer)
+{
+	struct bts_tracer *tracer;
+	int error;
+
+	/* Try to request cpu tracing while task tracing is active. */
+	tracer = ds_request_bts_task(current, buffer, BUFFER_SIZE, NULL,
+				    (size_t)-1, BTS_KERNEL);
+	error = PTR_ERR(tracer);
+	if (!IS_ERR(tracer)) {
+		error = 0;
+		ds_release_bts(tracer);
+	}
+
+	if (error != -EPERM)
+		printk(KERN_CONT "task/cpu tracing overlap...");
+
+	return error ? 0 : -1;
+}
+
+int ds_selftest_bts(void)
+{
+	struct ds_selftest_bts_conf conf;
+	unsigned char buffer[BUFFER_SIZE], *small_buffer;
+	unsigned long irq;
+	int cpu;
+
+	printk(KERN_INFO "[ds] bts selftest...");
+	conf.error = 0;
+
+	small_buffer = (unsigned char *)ALIGN((unsigned long)buffer, 8) + 8;
+
+	get_online_cpus();
+	for_each_online_cpu(cpu) {
+		conf.suspend = ds_suspend_bts_wrap;
+		conf.resume = ds_resume_bts_wrap;
+		conf.tracer =
+			ds_request_bts_cpu(cpu, buffer, BUFFER_SIZE,
+					   NULL, (size_t)-1, BTS_KERNEL);
+		ds_selftest_bts_cpu(&conf);
+		if (conf.error >= 0)
+			conf.error = ds_selftest_bts_bad_request_task(buffer);
+		ds_release_bts(conf.tracer);
+		if (conf.error < 0)
+			goto out;
+
+		conf.suspend = ds_suspend_bts_noirq;
+		conf.resume = ds_resume_bts_noirq;
+		conf.tracer =
+			ds_request_bts_cpu(cpu, buffer, BUFFER_SIZE,
+					   NULL, (size_t)-1, BTS_KERNEL);
+		smp_call_function_single(cpu, ds_selftest_bts_cpu, &conf, 1);
+		if (conf.error >= 0) {
+			conf.error =
+				ds_selftest_bts_bad_release_noirq(cpu,
+								  conf.tracer);
+			/* We must not release the tracer twice. */
+			if (conf.error < 0)
+				conf.tracer = NULL;
+		}
+		if (conf.error >= 0)
+			conf.error = ds_selftest_bts_bad_request_task(buffer);
+		smp_call_function_single(cpu, ds_release_bts_noirq_wrap,
+					 conf.tracer, 1);
+		if (conf.error < 0)
+			goto out;
+	}
+
+	conf.suspend = ds_suspend_bts_wrap;
+	conf.resume = ds_resume_bts_wrap;
+	conf.tracer =
+		ds_request_bts_task(current, buffer, BUFFER_SIZE,
+				    NULL, (size_t)-1, BTS_KERNEL);
+	ds_selftest_bts_cpu(&conf);
+	if (conf.error >= 0)
+		conf.error = ds_selftest_bts_bad_request_cpu(0, buffer);
+	ds_release_bts(conf.tracer);
+	if (conf.error < 0)
+		goto out;
+
+	conf.suspend = ds_suspend_bts_noirq;
+	conf.resume = ds_resume_bts_noirq;
+	conf.tracer =
+		ds_request_bts_task(current, small_buffer, SMALL_BUFFER_SIZE,
+				   NULL, (size_t)-1, BTS_KERNEL);
+	local_irq_save(irq);
+	ds_selftest_bts_cpu(&conf);
+	if (conf.error >= 0)
+		conf.error = ds_selftest_bts_bad_request_cpu(0, buffer);
+	ds_release_bts_noirq(conf.tracer);
+	local_irq_restore(irq);
+	if (conf.error < 0)
+		goto out;
+
+	conf.error = 0;
+ out:
+	put_online_cpus();
+	printk(KERN_CONT "%s.\n", (conf.error ? "failed" : "passed"));
+
+	return conf.error;
+}
+
+int ds_selftest_pebs(void)
+{
+	return 0;
+}
diff --git a/arch/x86/kernel/ds_selftest.h b/arch/x86/kernel/ds_selftest.h
new file mode 100644
index 0000000..2ba8745
--- /dev/null
+++ b/arch/x86/kernel/ds_selftest.h
@@ -0,0 +1,15 @@
+/*
+ * Debug Store support - selftest
+ *
+ *
+ * Copyright (C) 2009 Intel Corporation.
+ * Markus Metzger <markus.t.metzger@intel.com>, 2009
+ */
+
+#ifdef CONFIG_X86_DS_SELFTEST
+extern int ds_selftest_bts(void);
+extern int ds_selftest_pebs(void);
+#else
+static inline int ds_selftest_bts(void) { return 0; }
+static inline int ds_selftest_pebs(void) { return 0; }
+#endif
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index bb01ce0..1c17d7c 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -147,27 +147,14 @@
 GLOBAL(return_to_handler)
 	subq  $80, %rsp
 
+	/* Save the return values */
 	movq %rax, (%rsp)
-	movq %rcx, 8(%rsp)
-	movq %rdx, 16(%rsp)
-	movq %rsi, 24(%rsp)
-	movq %rdi, 32(%rsp)
-	movq %r8, 40(%rsp)
-	movq %r9, 48(%rsp)
-	movq %r10, 56(%rsp)
-	movq %r11, 64(%rsp)
+	movq %rdx, 8(%rsp)
 
 	call ftrace_return_to_handler
 
 	movq %rax, 72(%rsp)
-	movq 64(%rsp), %r11
-	movq 56(%rsp), %r10
-	movq 48(%rsp), %r9
-	movq 40(%rsp), %r8
-	movq 32(%rsp), %rdi
-	movq 24(%rsp), %rsi
-	movq 16(%rsp), %rdx
-	movq 8(%rsp), %rcx
+	movq 8(%rsp), %rdx
 	movq (%rsp), %rax
 	addq $72, %rsp
 	retq
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index e22d63b..3bb2be1 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -16,6 +16,7 @@
 #include <asm/idle.h>
 #include <asm/uaccess.h>
 #include <asm/i387.h>
+#include <asm/ds.h>
 
 unsigned long idle_halt;
 EXPORT_SYMBOL(idle_halt);
@@ -47,6 +48,8 @@
 		kmem_cache_free(task_xstate_cachep, tsk->thread.xstate);
 		tsk->thread.xstate = NULL;
 	}
+
+	WARN(tsk->thread.ds_ctx, "leaking DS context\n");
 }
 
 void free_thread_info(struct thread_info *ti)
@@ -85,8 +88,6 @@
 		put_cpu();
 		kfree(bp);
 	}
-
-	ds_exit_thread(current);
 }
 
 void flush_thread(void)
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index c60924b..59f4524 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -287,7 +287,8 @@
 		p->thread.io_bitmap_max = 0;
 	}
 
-	ds_copy_thread(p, current);
+	clear_tsk_thread_flag(p, TIF_DS_AREA_MSR);
+	p->thread.ds_ctx = NULL;
 
 	clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR);
 	p->thread.debugctlmsr = 0;
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 45f010f..ebefb54 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -332,7 +332,8 @@
 			goto out;
 	}
 
-	ds_copy_thread(p, me);
+	clear_tsk_thread_flag(p, TIF_DS_AREA_MSR);
+	p->thread.ds_ctx = NULL;
 
 	clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR);
 	p->thread.debugctlmsr = 0;
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 23b7c8f..09ecbde 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -21,6 +21,7 @@
 #include <linux/audit.h>
 #include <linux/seccomp.h>
 #include <linux/signal.h>
+#include <linux/workqueue.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -578,17 +579,130 @@
 }
 
 #ifdef CONFIG_X86_PTRACE_BTS
+/*
+ * A branch trace store context.
+ *
+ * Contexts may only be installed by ptrace_bts_config() and only for
+ * ptraced tasks.
+ *
+ * Contexts are destroyed when the tracee is detached from the tracer.
+ * The actual destruction work requires interrupts enabled, so the
+ * work is deferred and will be scheduled during __ptrace_unlink().
+ *
+ * Contexts hold an additional task_struct reference on the traced
+ * task, as well as a reference on the tracer's mm.
+ *
+ * Ptrace already holds a task_struct for the duration of ptrace operations,
+ * but since destruction is deferred, it may be executed after both
+ * tracer and tracee exited.
+ */
+struct bts_context {
+	/* The branch trace handle. */
+	struct bts_tracer	*tracer;
+
+	/* The buffer used to store the branch trace and its size. */
+	void			*buffer;
+	unsigned int		size;
+
+	/* The mm that paid for the above buffer. */
+	struct mm_struct	*mm;
+
+	/* The task this context belongs to. */
+	struct task_struct	*task;
+
+	/* The signal to send on a bts buffer overflow. */
+	unsigned int		bts_ovfl_signal;
+
+	/* The work struct to destroy a context. */
+	struct work_struct	work;
+};
+
+static int alloc_bts_buffer(struct bts_context *context, unsigned int size)
+{
+	void *buffer = NULL;
+	int err = -ENOMEM;
+
+	err = account_locked_memory(current->mm, current->signal->rlim, size);
+	if (err < 0)
+		return err;
+
+	buffer = kzalloc(size, GFP_KERNEL);
+	if (!buffer)
+		goto out_refund;
+
+	context->buffer = buffer;
+	context->size = size;
+	context->mm = get_task_mm(current);
+
+	return 0;
+
+ out_refund:
+	refund_locked_memory(current->mm, size);
+	return err;
+}
+
+static inline void free_bts_buffer(struct bts_context *context)
+{
+	if (!context->buffer)
+		return;
+
+	kfree(context->buffer);
+	context->buffer = NULL;
+
+	refund_locked_memory(context->mm, context->size);
+	context->size = 0;
+
+	mmput(context->mm);
+	context->mm = NULL;
+}
+
+static void free_bts_context_work(struct work_struct *w)
+{
+	struct bts_context *context;
+
+	context = container_of(w, struct bts_context, work);
+
+	ds_release_bts(context->tracer);
+	put_task_struct(context->task);
+	free_bts_buffer(context);
+	kfree(context);
+}
+
+static inline void free_bts_context(struct bts_context *context)
+{
+	INIT_WORK(&context->work, free_bts_context_work);
+	schedule_work(&context->work);
+}
+
+static inline struct bts_context *alloc_bts_context(struct task_struct *task)
+{
+	struct bts_context *context = kzalloc(sizeof(*context), GFP_KERNEL);
+	if (context) {
+		context->task = task;
+		task->bts = context;
+
+		get_task_struct(task);
+	}
+
+	return context;
+}
+
 static int ptrace_bts_read_record(struct task_struct *child, size_t index,
 				  struct bts_struct __user *out)
 {
+	struct bts_context *context;
 	const struct bts_trace *trace;
 	struct bts_struct bts;
 	const unsigned char *at;
 	int error;
 
-	trace = ds_read_bts(child->bts);
+	context = child->bts;
+	if (!context)
+		return -ESRCH;
+
+	trace = ds_read_bts(context->tracer);
 	if (!trace)
-		return -EPERM;
+		return -ESRCH;
 
 	at = trace->ds.top - ((index + 1) * trace->ds.size);
 	if ((void *)at < trace->ds.begin)
@@ -597,7 +711,7 @@
 	if (!trace->read)
 		return -EOPNOTSUPP;
 
-	error = trace->read(child->bts, at, &bts);
+	error = trace->read(context->tracer, at, &bts);
 	if (error < 0)
 		return error;
 
@@ -611,13 +725,18 @@
 			    long size,
 			    struct bts_struct __user *out)
 {
+	struct bts_context *context;
 	const struct bts_trace *trace;
 	const unsigned char *at;
 	int error, drained = 0;
 
-	trace = ds_read_bts(child->bts);
+	context = child->bts;
+	if (!context)
+		return -ESRCH;
+
+	trace = ds_read_bts(context->tracer);
 	if (!trace)
-		return -EPERM;
+		return -ESRCH;
 
 	if (!trace->read)
 		return -EOPNOTSUPP;
@@ -628,9 +747,8 @@
 	for (at = trace->ds.begin; (void *)at < trace->ds.top;
 	     out++, drained++, at += trace->ds.size) {
 		struct bts_struct bts;
-		int error;
 
-		error = trace->read(child->bts, at, &bts);
+		error = trace->read(context->tracer, at, &bts);
 		if (error < 0)
 			return error;
 
@@ -640,35 +758,18 @@
 
 	memset(trace->ds.begin, 0, trace->ds.n * trace->ds.size);
 
-	error = ds_reset_bts(child->bts);
+	error = ds_reset_bts(context->tracer);
 	if (error < 0)
 		return error;
 
 	return drained;
 }
 
-static int ptrace_bts_allocate_buffer(struct task_struct *child, size_t size)
-{
-	child->bts_buffer = alloc_locked_buffer(size);
-	if (!child->bts_buffer)
-		return -ENOMEM;
-
-	child->bts_size = size;
-
-	return 0;
-}
-
-static void ptrace_bts_free_buffer(struct task_struct *child)
-{
-	free_locked_buffer(child->bts_buffer, child->bts_size);
-	child->bts_buffer = NULL;
-	child->bts_size = 0;
-}
-
 static int ptrace_bts_config(struct task_struct *child,
 			     long cfg_size,
 			     const struct ptrace_bts_config __user *ucfg)
 {
+	struct bts_context *context;
 	struct ptrace_bts_config cfg;
 	unsigned int flags = 0;
 
@@ -678,28 +779,33 @@
 	if (copy_from_user(&cfg, ucfg, sizeof(cfg)))
 		return -EFAULT;
 
-	if (child->bts) {
-		ds_release_bts(child->bts);
-		child->bts = NULL;
-	}
+	context = child->bts;
+	if (!context)
+		context = alloc_bts_context(child);
+	if (!context)
+		return -ENOMEM;
 
 	if (cfg.flags & PTRACE_BTS_O_SIGNAL) {
 		if (!cfg.signal)
 			return -EINVAL;
 
-		child->thread.bts_ovfl_signal = cfg.signal;
 		return -EOPNOTSUPP;
+		context->bts_ovfl_signal = cfg.signal;
 	}
 
-	if ((cfg.flags & PTRACE_BTS_O_ALLOC) &&
-	    (cfg.size != child->bts_size)) {
-		int error;
+	ds_release_bts(context->tracer);
+	context->tracer = NULL;
 
-		ptrace_bts_free_buffer(child);
+	if ((cfg.flags & PTRACE_BTS_O_ALLOC) && (cfg.size != context->size)) {
+		int err;
 
-		error = ptrace_bts_allocate_buffer(child, cfg.size);
-		if (error < 0)
-			return error;
+		free_bts_buffer(context);
+		if (!cfg.size)
+			return 0;
+
+		err = alloc_bts_buffer(context, cfg.size);
+		if (err < 0)
+			return err;
 	}
 
 	if (cfg.flags & PTRACE_BTS_O_TRACE)
@@ -708,15 +814,14 @@
 	if (cfg.flags & PTRACE_BTS_O_SCHED)
 		flags |= BTS_TIMESTAMPS;
 
-	child->bts = ds_request_bts(child, child->bts_buffer, child->bts_size,
-				    /* ovfl = */ NULL, /* th = */ (size_t)-1,
-				    flags);
-	if (IS_ERR(child->bts)) {
-		int error = PTR_ERR(child->bts);
+	context->tracer =
+		ds_request_bts_task(child, context->buffer, context->size,
+				    NULL, (size_t)-1, flags);
+	if (unlikely(IS_ERR(context->tracer))) {
+		int error = PTR_ERR(context->tracer);
 
-		ptrace_bts_free_buffer(child);
-		child->bts = NULL;
-
+		free_bts_buffer(context);
+		context->tracer = NULL;
 		return error;
 	}
 
@@ -727,20 +832,25 @@
 			     long cfg_size,
 			     struct ptrace_bts_config __user *ucfg)
 {
+	struct bts_context *context;
 	const struct bts_trace *trace;
 	struct ptrace_bts_config cfg;
 
+	context = child->bts;
+	if (!context)
+		return -ESRCH;
+
 	if (cfg_size < sizeof(cfg))
 		return -EIO;
 
-	trace = ds_read_bts(child->bts);
+	trace = ds_read_bts(context->tracer);
 	if (!trace)
-		return -EPERM;
+		return -ESRCH;
 
 	memset(&cfg, 0, sizeof(cfg));
-	cfg.size = trace->ds.end - trace->ds.begin;
-	cfg.signal = child->thread.bts_ovfl_signal;
-	cfg.bts_size = sizeof(struct bts_struct);
+	cfg.size	= trace->ds.end - trace->ds.begin;
+	cfg.signal	= context->bts_ovfl_signal;
+	cfg.bts_size	= sizeof(struct bts_struct);
 
 	if (cfg.signal)
 		cfg.flags |= PTRACE_BTS_O_SIGNAL;
@@ -759,80 +869,51 @@
 
 static int ptrace_bts_clear(struct task_struct *child)
 {
+	struct bts_context *context;
 	const struct bts_trace *trace;
 
-	trace = ds_read_bts(child->bts);
+	context = child->bts;
+	if (!context)
+		return -ESRCH;
+
+	trace = ds_read_bts(context->tracer);
 	if (!trace)
-		return -EPERM;
+		return -ESRCH;
 
 	memset(trace->ds.begin, 0, trace->ds.n * trace->ds.size);
 
-	return ds_reset_bts(child->bts);
+	return ds_reset_bts(context->tracer);
 }
 
 static int ptrace_bts_size(struct task_struct *child)
 {
+	struct bts_context *context;
 	const struct bts_trace *trace;
 
-	trace = ds_read_bts(child->bts);
+	context = child->bts;
+	if (!context)
+		return -ESRCH;
+
+	trace = ds_read_bts(context->tracer);
 	if (!trace)
-		return -EPERM;
+		return -ESRCH;
 
 	return (trace->ds.top - trace->ds.begin) / trace->ds.size;
 }
 
-static void ptrace_bts_fork(struct task_struct *tsk)
-{
-	tsk->bts = NULL;
-	tsk->bts_buffer = NULL;
-	tsk->bts_size = 0;
-	tsk->thread.bts_ovfl_signal = 0;
-}
-
-static void ptrace_bts_untrace(struct task_struct *child)
+/*
+ * Called from __ptrace_unlink() after the child has been moved back
+ * to its original parent.
+ */
+void ptrace_bts_untrace(struct task_struct *child)
 {
 	if (unlikely(child->bts)) {
-		ds_release_bts(child->bts);
+		free_bts_context(child->bts);
 		child->bts = NULL;
-
-		/* We cannot update total_vm and locked_vm since
-		   child's mm is already gone. But we can reclaim the
-		   memory. */
-		kfree(child->bts_buffer);
-		child->bts_buffer = NULL;
-		child->bts_size = 0;
 	}
 }
-
-static void ptrace_bts_detach(struct task_struct *child)
-{
-	/*
-	 * Ptrace_detach() races with ptrace_untrace() in case
-	 * the child dies and is reaped by another thread.
-	 *
-	 * We only do the memory accounting at this point and
-	 * leave the buffer deallocation and the bts tracer
-	 * release to ptrace_bts_untrace() which will be called
-	 * later on with tasklist_lock held.
-	 */
-	release_locked_buffer(child->bts_buffer, child->bts_size);
-}
-#else
-static inline void ptrace_bts_fork(struct task_struct *tsk) {}
-static inline void ptrace_bts_detach(struct task_struct *child) {}
-static inline void ptrace_bts_untrace(struct task_struct *child) {}
 #endif /* CONFIG_X86_PTRACE_BTS */
 
-void x86_ptrace_fork(struct task_struct *child, unsigned long clone_flags)
-{
-	ptrace_bts_fork(child);
-}
-
-void x86_ptrace_untrace(struct task_struct *child)
-{
-	ptrace_bts_untrace(child);
-}
-
 /*
  * Called by kernel/ptrace.c when detaching..
  *
@@ -844,7 +925,6 @@
 #ifdef TIF_SYSCALL_EMU
 	clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
 #endif
-	ptrace_bts_detach(child);
 }
 
 #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index f7bddc2..4aaf7e4 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -20,7 +20,7 @@
 
 static int save_stack_stack(void *data, char *name)
 {
-	return -1;
+	return 0;
 }
 
 static void save_stack_address(void *data, unsigned long addr, int reliable)
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index ff5c873..734f92c 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -334,3 +334,4 @@
 	.long sys_inotify_init1
 	.long sys_preadv
 	.long sys_pwritev
+	.long sys_rt_tgsigqueueinfo	/* 335 */
diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c
index 50dc802..16ccbd7 100644
--- a/arch/x86/mm/kmmio.c
+++ b/arch/x86/mm/kmmio.c
@@ -32,7 +32,7 @@
 	struct list_head list;
 	struct kmmio_fault_page *release_next;
 	unsigned long page; /* location of the fault page */
-	bool old_presence; /* page presence prior to arming */
+	pteval_t old_presence; /* page presence prior to arming */
 	bool armed;
 
 	/*
@@ -97,60 +97,62 @@
 static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page)
 {
 	struct list_head *head;
-	struct kmmio_fault_page *p;
+	struct kmmio_fault_page *f;
 
 	page &= PAGE_MASK;
 	head = kmmio_page_list(page);
-	list_for_each_entry_rcu(p, head, list) {
-		if (p->page == page)
-			return p;
+	list_for_each_entry_rcu(f, head, list) {
+		if (f->page == page)
+			return f;
 	}
 	return NULL;
 }
 
-static void set_pmd_presence(pmd_t *pmd, bool present, bool *old)
+static void clear_pmd_presence(pmd_t *pmd, bool clear, pmdval_t *old)
 {
 	pmdval_t v = pmd_val(*pmd);
-	*old = !!(v & _PAGE_PRESENT);
-	v &= ~_PAGE_PRESENT;
-	if (present)
-		v |= _PAGE_PRESENT;
+	if (clear) {
+		*old = v & _PAGE_PRESENT;
+		v &= ~_PAGE_PRESENT;
+	} else	/* presume this has been called with clear==true previously */
+		v |= *old;
 	set_pmd(pmd, __pmd(v));
 }
 
-static void set_pte_presence(pte_t *pte, bool present, bool *old)
+static void clear_pte_presence(pte_t *pte, bool clear, pteval_t *old)
 {
 	pteval_t v = pte_val(*pte);
-	*old = !!(v & _PAGE_PRESENT);
-	v &= ~_PAGE_PRESENT;
-	if (present)
-		v |= _PAGE_PRESENT;
+	if (clear) {
+		*old = v & _PAGE_PRESENT;
+		v &= ~_PAGE_PRESENT;
+	} else	/* presume this has been called with clear==true previously */
+		v |= *old;
 	set_pte_atomic(pte, __pte(v));
 }
 
-static int set_page_presence(unsigned long addr, bool present, bool *old)
+static int clear_page_presence(struct kmmio_fault_page *f, bool clear)
 {
 	unsigned int level;
-	pte_t *pte = lookup_address(addr, &level);
+	pte_t *pte = lookup_address(f->page, &level);
 
 	if (!pte) {
-		pr_err("kmmio: no pte for page 0x%08lx\n", addr);
+		pr_err("kmmio: no pte for page 0x%08lx\n", f->page);
 		return -1;
 	}
 
 	switch (level) {
 	case PG_LEVEL_2M:
-		set_pmd_presence((pmd_t *)pte, present, old);
+		clear_pmd_presence((pmd_t *)pte, clear, &f->old_presence);
 		break;
 	case PG_LEVEL_4K:
-		set_pte_presence(pte, present, old);
+		clear_pte_presence(pte, clear, &f->old_presence);
 		break;
 	default:
 		pr_err("kmmio: unexpected page level 0x%x.\n", level);
 		return -1;
 	}
 
-	__flush_tlb_one(addr);
+	__flush_tlb_one(f->page);
 	return 0;
 }
 
@@ -171,9 +173,9 @@
 	WARN_ONCE(f->armed, KERN_ERR "kmmio page already armed.\n");
 	if (f->armed) {
 		pr_warning("kmmio double-arm: page 0x%08lx, ref %d, old %d\n",
-					f->page, f->count, f->old_presence);
+					f->page, f->count, !!f->old_presence);
 	}
-	ret = set_page_presence(f->page, false, &f->old_presence);
+	ret = clear_page_presence(f, true);
 	WARN_ONCE(ret < 0, KERN_ERR "kmmio arming 0x%08lx failed.\n", f->page);
 	f->armed = true;
 	return ret;
@@ -182,8 +184,7 @@
 /** Restore the given page to saved presence state. */
 static void disarm_kmmio_fault_page(struct kmmio_fault_page *f)
 {
-	bool tmp;
-	int ret = set_page_presence(f->page, f->old_presence, &tmp);
+	int ret = clear_page_presence(f, false);
 	WARN_ONCE(ret < 0,
 			KERN_ERR "kmmio disarming 0x%08lx failed.\n", f->page);
 	f->armed = false;
@@ -310,7 +311,12 @@
 	struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx);
 
 	if (!ctx->active) {
-		pr_debug("kmmio: spurious debug trap on CPU %d.\n",
+		/*
+		 * debug traps without an active context are due to either
+		 * something external causing them (f.e. using a debugger while
+		 * mmio tracing enabled), or erroneous behaviour
+		 */
+		pr_warning("kmmio: unexpected debug trap on CPU %d.\n",
 							smp_processor_id());
 		goto out;
 	}
@@ -439,12 +445,12 @@
 						head,
 						struct kmmio_delayed_release,
 						rcu);
-	struct kmmio_fault_page *p = dr->release_list;
-	while (p) {
-		struct kmmio_fault_page *next = p->release_next;
-		BUG_ON(p->count);
-		kfree(p);
-		p = next;
+	struct kmmio_fault_page *f = dr->release_list;
+	while (f) {
+		struct kmmio_fault_page *next = f->release_next;
+		BUG_ON(f->count);
+		kfree(f);
+		f = next;
 	}
 	kfree(dr);
 }
@@ -453,19 +459,19 @@
 {
 	struct kmmio_delayed_release *dr =
 		container_of(head, struct kmmio_delayed_release, rcu);
-	struct kmmio_fault_page *p = dr->release_list;
+	struct kmmio_fault_page *f = dr->release_list;
 	struct kmmio_fault_page **prevp = &dr->release_list;
 	unsigned long flags;
 
 	spin_lock_irqsave(&kmmio_lock, flags);
-	while (p) {
-		if (!p->count) {
-			list_del_rcu(&p->list);
-			prevp = &p->release_next;
+	while (f) {
+		if (!f->count) {
+			list_del_rcu(&f->list);
+			prevp = &f->release_next;
 		} else {
-			*prevp = p->release_next;
+			*prevp = f->release_next;
 		}
-		p = p->release_next;
+		f = f->release_next;
 	}
 	spin_unlock_irqrestore(&kmmio_lock, flags);
 
@@ -528,8 +534,8 @@
 }
 EXPORT_SYMBOL(unregister_kmmio_probe);
 
-static int kmmio_die_notifier(struct notifier_block *nb, unsigned long val,
-								void *args)
+static int
+kmmio_die_notifier(struct notifier_block *nb, unsigned long val, void *args)
 {
 	struct die_args *arg = args;
 
@@ -544,11 +550,23 @@
 	.notifier_call = kmmio_die_notifier
 };
 
-static int __init init_kmmio(void)
+int kmmio_init(void)
 {
 	int i;
+
 	for (i = 0; i < KMMIO_PAGE_TABLE_SIZE; i++)
 		INIT_LIST_HEAD(&kmmio_page_table[i]);
+
 	return register_die_notifier(&nb_die);
 }
-fs_initcall(init_kmmio); /* should be before device_initcall() */
+
+void kmmio_cleanup(void)
+{
+	int i;
+
+	unregister_die_notifier(&nb_die);
+	for (i = 0; i < KMMIO_PAGE_TABLE_SIZE; i++) {
+		WARN_ONCE(!list_empty(&kmmio_page_table[i]),
+			KERN_ERR "kmmio_page_table not empty at cleanup, any further tracing will leak memory.\n");
+	}
+}
diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c
index c9342ed..132772a 100644
--- a/arch/x86/mm/mmio-mod.c
+++ b/arch/x86/mm/mmio-mod.c
@@ -451,6 +451,7 @@
 
 	if (nommiotrace)
 		pr_info(NAME "MMIO tracing disabled.\n");
+	kmmio_init();
 	enter_uniprocessor();
 	spin_lock_irq(&trace_lock);
 	atomic_inc(&mmiotrace_enabled);
@@ -473,6 +474,7 @@
 
 	clear_trace_list(); /* guarantees: no more kmmio callbacks */
 	leave_uniprocessor();
+	kmmio_cleanup();
 	pr_info(NAME "disabled.\n");
 out:
 	mutex_unlock(&mmiotrace_mutex);
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index 202864a..3b285e6 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -356,14 +356,11 @@
 #define exit_sysfs() do { } while (0)
 #endif /* CONFIG_PM */
 
-static int p4force;
-module_param(p4force, int, 0);
-
 static int __init p4_init(char **cpu_type)
 {
 	__u8 cpu_model = boot_cpu_data.x86_model;
 
-	if (!p4force && (cpu_model > 6 || cpu_model == 5))
+	if (cpu_model > 6 || cpu_model == 5)
 		return 0;
 
 #ifndef CONFIG_SMP
@@ -389,10 +386,25 @@
 	return 0;
 }
 
+static int force_arch_perfmon;
+static int force_cpu_type(const char *str, struct kernel_param *kp)
+{
+	if (!strcmp(str, "archperfmon")) {
+		force_arch_perfmon = 1;
+		printk(KERN_INFO "oprofile: forcing architectural perfmon\n");
+	}
+
+	return 0;
+}
+module_param_call(cpu_type, force_cpu_type, NULL, NULL, 0);
+
 static int __init ppro_init(char **cpu_type)
 {
 	__u8 cpu_model = boot_cpu_data.x86_model;
 
+	if (force_arch_perfmon && cpu_has_arch_perfmon)
+		return 0;
+
 	switch (cpu_model) {
 	case 0 ... 2:
 		*cpu_type = "i386/ppro";
@@ -414,6 +426,13 @@
 	case 15: case 23:
 		*cpu_type = "i386/core_2";
 		break;
+	case 26:
+		arch_perfmon_setup_counters();
+		*cpu_type = "i386/core_i7";
+		break;
+	case 28:
+		*cpu_type = "i386/atom";
+		break;
 	default:
 		/* Unknown */
 		return 0;
diff --git a/block/blk-core.c b/block/blk-core.c
index c89883b..648f15c 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -28,22 +28,14 @@
 #include <linux/task_io_accounting_ops.h>
 #include <linux/blktrace_api.h>
 #include <linux/fault-inject.h>
-#include <trace/block.h>
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/block.h>
 
 #include "blk.h"
 
-DEFINE_TRACE(block_plug);
-DEFINE_TRACE(block_unplug_io);
-DEFINE_TRACE(block_unplug_timer);
-DEFINE_TRACE(block_getrq);
-DEFINE_TRACE(block_sleeprq);
-DEFINE_TRACE(block_rq_requeue);
-DEFINE_TRACE(block_bio_backmerge);
-DEFINE_TRACE(block_bio_frontmerge);
-DEFINE_TRACE(block_bio_queue);
-DEFINE_TRACE(block_rq_complete);
-DEFINE_TRACE(block_remap);	/* Also used in drivers/md/dm.c */
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_remap);
+EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete);
 
 static int __make_request(struct request_queue *q, struct bio *bio);
 
@@ -1277,7 +1269,7 @@
 		bio->bi_bdev = bdev->bd_contains;
 
 		trace_block_remap(bdev_get_queue(bio->bi_bdev), bio,
-				    bdev->bd_dev, bio->bi_sector,
+				    bdev->bd_dev,
 				    bio->bi_sector - p->start_sect);
 	}
 }
@@ -1446,8 +1438,7 @@
 			goto end_io;
 
 		if (old_sector != -1)
-			trace_block_remap(q, bio, old_dev, bio->bi_sector,
-					    old_sector);
+			trace_block_remap(q, bio, old_dev, old_sector);
 
 		trace_block_bio_queue(q, bio);
 
@@ -1741,10 +1732,14 @@
 	trace_block_rq_complete(req->q, req);
 
 	/*
-	 * for a REQ_TYPE_BLOCK_PC request, we want to carry any eventual
-	 * sense key with us all the way through
+	 * For fs requests, rq is just carrier of independent bio's
+	 * and each partial completion should be handled separately.
+	 * Reset per-request error on each partial completion.
+	 *
+	 * TODO: tj: This is too subtle.  It would be better to let
+	 * low level drivers do what they see fit.
 	 */
-	if (!blk_pc_request(req))
+	if (blk_fs_request(req))
 		req->errors = 0;
 
 	if (error && (blk_fs_request(req) && !(req->cmd_flags & REQ_QUIET))) {
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 3ff9bba..26f9ec2 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -383,16 +383,21 @@
 int blk_register_queue(struct gendisk *disk)
 {
 	int ret;
+	struct device *dev = disk_to_dev(disk);
 
 	struct request_queue *q = disk->queue;
 
 	if (WARN_ON(!q))
 		return -ENXIO;
 
+	ret = blk_trace_init_sysfs(dev);
+	if (ret)
+		return ret;
+
 	if (!q->request_fn)
 		return 0;
 
-	ret = kobject_add(&q->kobj, kobject_get(&disk_to_dev(disk)->kobj),
+	ret = kobject_add(&q->kobj, kobject_get(&dev->kobj),
 			  "%s", "queue");
 	if (ret < 0)
 		return ret;
diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c
index f87615d..f8c218c 100644
--- a/block/compat_ioctl.c
+++ b/block/compat_ioctl.c
@@ -568,7 +568,7 @@
 	memcpy(&buts.name, &cbuts.name, 32);
 
 	mutex_lock(&bdev->bd_mutex);
-	ret = do_blk_trace_setup(q, b, bdev->bd_dev, &buts);
+	ret = do_blk_trace_setup(q, b, bdev->bd_dev, bdev, &buts);
 	mutex_unlock(&bdev->bd_mutex);
 	if (ret)
 		return ret;
diff --git a/block/elevator.c b/block/elevator.c
index 7073a90..e220f0c 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -33,17 +33,16 @@
 #include <linux/compiler.h>
 #include <linux/delay.h>
 #include <linux/blktrace_api.h>
-#include <trace/block.h>
 #include <linux/hash.h>
 #include <linux/uaccess.h>
 
+#include <trace/events/block.h>
+
 #include "blk.h"
 
 static DEFINE_SPINLOCK(elv_list_lock);
 static LIST_HEAD(elv_list);
 
-DEFINE_TRACE(block_rq_abort);
-
 /*
  * Merge hash stuff.
  */
@@ -55,9 +54,6 @@
 #define rq_hash_key(rq)		((rq)->sector + (rq)->nr_sectors)
 #define ELV_ON_HASH(rq)		(!hlist_unhashed(&(rq)->hash))
 
-DEFINE_TRACE(block_rq_insert);
-DEFINE_TRACE(block_rq_issue);
-
 /*
  * Query io scheduler to see if the current process issuing bio may be
  * merged with rq.
diff --git a/drivers/bluetooth/hci_ldisc.c b/drivers/bluetooth/hci_ldisc.c
index af761dc..4895f0e 100644
--- a/drivers/bluetooth/hci_ldisc.c
+++ b/drivers/bluetooth/hci_ldisc.c
@@ -277,8 +277,8 @@
 	/* FIXME: why is this needed. Note don't use ldisc_ref here as the
 	   open path is before the ldisc is referencable */
 
-	if (tty->ldisc.ops->flush_buffer)
-		tty->ldisc.ops->flush_buffer(tty);
+	if (tty->ldisc->ops->flush_buffer)
+		tty->ldisc->ops->flush_buffer(tty);
 	tty_driver_flush_buffer(tty);
 
 	return 0;
@@ -463,7 +463,6 @@
 				clear_bit(HCI_UART_PROTO_SET, &hu->flags);
 				return err;
 			}
-			tty->low_latency = 1;
 		} else
 			return -EBUSY;
 		break;
diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig
index 735bbe2..02ecfd5 100644
--- a/drivers/char/Kconfig
+++ b/drivers/char/Kconfig
@@ -97,6 +97,19 @@
 	  kind of kernel debugging operations.
 	  When in doubt, say "N".
 
+config BFIN_JTAG_COMM
+	tristate "Blackfin JTAG Communication"
+	depends on BLACKFIN
+	help
+	  Add support for emulating a TTY device over the Blackfin JTAG.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called bfin_jtag_comm.
+
+config BFIN_JTAG_COMM_CONSOLE
+	bool "Console on Blackfin JTAG"
+	depends on BFIN_JTAG_COMM=y
+
 config SERIAL_NONSTANDARD
 	bool "Non-standard serial port support"
 	depends on HAS_IOMEM
diff --git a/drivers/char/Makefile b/drivers/char/Makefile
index 9caf5b5..189efcf 100644
--- a/drivers/char/Makefile
+++ b/drivers/char/Makefile
@@ -13,6 +13,7 @@
 obj-$(CONFIG_UNIX98_PTYS)	+= pty.o
 obj-y				+= misc.o
 obj-$(CONFIG_VT)		+= vt_ioctl.o vc_screen.o selection.o keyboard.o
+obj-$(CONFIG_BFIN_JTAG_COMM)	+= bfin_jtag_comm.o
 obj-$(CONFIG_CONSOLE_TRANSLATIONS) += consolemap.o consolemap_deftbl.o
 obj-$(CONFIG_HW_CONSOLE)	+= vt.o defkeymap.o
 obj-$(CONFIG_AUDIT)		+= tty_audit.o
diff --git a/drivers/char/bfin_jtag_comm.c b/drivers/char/bfin_jtag_comm.c
new file mode 100644
index 0000000..44c113d
--- /dev/null
+++ b/drivers/char/bfin_jtag_comm.c
@@ -0,0 +1,365 @@
+/*
+ * TTY over Blackfin JTAG Communication
+ *
+ * Copyright 2008-2009 Analog Devices Inc.
+ *
+ * Enter bugs at http://blackfin.uclinux.org/
+ *
+ * Licensed under the GPL-2 or later.
+ */
+
+#include <linux/circ_buf.h>
+#include <linux/console.h>
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include <linux/kthread.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/sched.h>
+#include <linux/tty.h>
+#include <linux/tty_driver.h>
+#include <linux/tty_flip.h>
+#include <asm/atomic.h>
+
+/* See the Debug/Emulation chapter in the HRM */
+#define EMUDOF   0x00000001	/* EMUDAT_OUT full & valid */
+#define EMUDIF   0x00000002	/* EMUDAT_IN full & valid */
+#define EMUDOOVF 0x00000004	/* EMUDAT_OUT overflow */
+#define EMUDIOVF 0x00000008	/* EMUDAT_IN overflow */
+
+#define DRV_NAME "bfin-jtag-comm"
+#define DEV_NAME "ttyBFJC"
+
+#define pr_init(fmt, args...) ({ static const __initdata char __fmt[] = fmt; printk(__fmt, ## args); })
+#define debug(fmt, args...) pr_debug(DRV_NAME ": " fmt, ## args)
+
+static inline uint32_t bfin_write_emudat(uint32_t emudat)
+{
+	__asm__ __volatile__("emudat = %0;" : : "d"(emudat));
+	return emudat;
+}
+
+static inline uint32_t bfin_read_emudat(void)
+{
+	uint32_t emudat;
+	__asm__ __volatile__("%0 = emudat;" : "=d"(emudat));
+	return emudat;
+}
+
+static inline uint32_t bfin_write_emudat_chars(char a, char b, char c, char d)
+{
+	return bfin_write_emudat((a << 0) | (b << 8) | (c << 16) | (d << 24));
+}
+
+#define CIRC_SIZE 2048	/* see comment in tty_io.c:do_tty_write() */
+#define CIRC_MASK (CIRC_SIZE - 1)
+#define circ_empty(circ)     ((circ)->head == (circ)->tail)
+#define circ_free(circ)      CIRC_SPACE((circ)->head, (circ)->tail, CIRC_SIZE)
+#define circ_cnt(circ)       CIRC_CNT((circ)->head, (circ)->tail, CIRC_SIZE)
+#define circ_byte(circ, idx) ((circ)->buf[(idx) & CIRC_MASK])
+
+static struct tty_driver *bfin_jc_driver;
+static struct task_struct *bfin_jc_kthread;
+static struct tty_struct * volatile bfin_jc_tty;
+static unsigned long bfin_jc_count;
+static DEFINE_MUTEX(bfin_jc_tty_mutex);
+static volatile struct circ_buf bfin_jc_write_buf;
+
+static int
+bfin_jc_emudat_manager(void *arg)
+{
+	uint32_t inbound_len = 0, outbound_len = 0;
+
+	while (!kthread_should_stop()) {
+		/* no one left to give data to, so sleep */
+		if (bfin_jc_tty == NULL && circ_empty(&bfin_jc_write_buf)) {
+			debug("waiting for readers\n");
+			__set_current_state(TASK_UNINTERRUPTIBLE);
+			schedule();
+			__set_current_state(TASK_RUNNING);
+		}
+
+		/* no data available, so just chill */
+		if (!(bfin_read_DBGSTAT() & EMUDIF) && circ_empty(&bfin_jc_write_buf)) {
+			debug("waiting for data (in_len = %i) (circ: %i %i)\n",
+				inbound_len, bfin_jc_write_buf.tail, bfin_jc_write_buf.head);
+			if (inbound_len)
+				schedule();
+			else
+				schedule_timeout_interruptible(HZ);
+			continue;
+		}
+
+		/* if incoming data is ready, eat it */
+		if (bfin_read_DBGSTAT() & EMUDIF) {
+			struct tty_struct *tty;
+			mutex_lock(&bfin_jc_tty_mutex);
+			tty = (struct tty_struct *)bfin_jc_tty;
+			if (tty != NULL) {
+				uint32_t emudat = bfin_read_emudat();
+				if (inbound_len == 0) {
+					debug("incoming length: 0x%08x\n", emudat);
+					inbound_len = emudat;
+				} else {
+					size_t num_chars = (4 <= inbound_len ? 4 : inbound_len);
+					debug("  incoming data: 0x%08x (pushing %zu)\n", emudat, num_chars);
+					inbound_len -= num_chars;
+					tty_insert_flip_string(tty, (unsigned char *)&emudat, num_chars);
+					tty_flip_buffer_push(tty);
+				}
+			}
+			mutex_unlock(&bfin_jc_tty_mutex);
+		}
+
+		/* if outgoing data is ready, post it */
+		if (!(bfin_read_DBGSTAT() & EMUDOF) && !circ_empty(&bfin_jc_write_buf)) {
+			if (outbound_len == 0) {
+				outbound_len = circ_cnt(&bfin_jc_write_buf);
+				bfin_write_emudat(outbound_len);
+				debug("outgoing length: 0x%08x\n", outbound_len);
+			} else {
+				struct tty_struct *tty;
+				int tail = bfin_jc_write_buf.tail;
+				size_t ate = (4 <= outbound_len ? 4 : outbound_len);
+				uint32_t emudat =
+				bfin_write_emudat_chars(
+					circ_byte(&bfin_jc_write_buf, tail + 0),
+					circ_byte(&bfin_jc_write_buf, tail + 1),
+					circ_byte(&bfin_jc_write_buf, tail + 2),
+					circ_byte(&bfin_jc_write_buf, tail + 3)
+				);
+				bfin_jc_write_buf.tail += ate;
+				outbound_len -= ate;
+				mutex_lock(&bfin_jc_tty_mutex);
+				tty = (struct tty_struct *)bfin_jc_tty;
+				if (tty)
+					tty_wakeup(tty);
+				mutex_unlock(&bfin_jc_tty_mutex);
+				debug("  outgoing data: 0x%08x (pushing %zu)\n", emudat, ate);
+			}
+		}
+	}
+
+	__set_current_state(TASK_RUNNING);
+	return 0;
+}
+
+static int
+bfin_jc_open(struct tty_struct *tty, struct file *filp)
+{
+	mutex_lock(&bfin_jc_tty_mutex);
+	debug("open %lu\n", bfin_jc_count);
+	++bfin_jc_count;
+	bfin_jc_tty = tty;
+	wake_up_process(bfin_jc_kthread);
+	mutex_unlock(&bfin_jc_tty_mutex);
+	return 0;
+}
+
+static void
+bfin_jc_close(struct tty_struct *tty, struct file *filp)
+{
+	mutex_lock(&bfin_jc_tty_mutex);
+	debug("close %lu\n", bfin_jc_count);
+	if (--bfin_jc_count == 0)
+		bfin_jc_tty = NULL;
+	wake_up_process(bfin_jc_kthread);
+	mutex_unlock(&bfin_jc_tty_mutex);
+}
+
+/* XXX: we dont handle the put_char() case where we must handle count = 1 */
+static int
+bfin_jc_circ_write(const unsigned char *buf, int count)
+{
+	int i;
+	count = min(count, circ_free(&bfin_jc_write_buf));
+	debug("going to write chunk of %i bytes\n", count);
+	for (i = 0; i < count; ++i)
+		circ_byte(&bfin_jc_write_buf, bfin_jc_write_buf.head + i) = buf[i];
+	bfin_jc_write_buf.head += i;
+	return i;
+}
+
+#ifndef CONFIG_BFIN_JTAG_COMM_CONSOLE
+# define acquire_console_sem()
+# define release_console_sem()
+#endif
+static int
+bfin_jc_write(struct tty_struct *tty, const unsigned char *buf, int count)
+{
+	int i;
+	acquire_console_sem();
+	i = bfin_jc_circ_write(buf, count);
+	release_console_sem();
+	wake_up_process(bfin_jc_kthread);
+	return i;
+}
+
+static void
+bfin_jc_flush_chars(struct tty_struct *tty)
+{
+	wake_up_process(bfin_jc_kthread);
+}
+
+static int
+bfin_jc_write_room(struct tty_struct *tty)
+{
+	return circ_free(&bfin_jc_write_buf);
+}
+
+static int
+bfin_jc_chars_in_buffer(struct tty_struct *tty)
+{
+	return circ_cnt(&bfin_jc_write_buf);
+}
+
+static void
+bfin_jc_wait_until_sent(struct tty_struct *tty, int timeout)
+{
+	unsigned long expire = jiffies + timeout;
+	while (!circ_empty(&bfin_jc_write_buf)) {
+		if (signal_pending(current))
+			break;
+		if (time_after(jiffies, expire))
+			break;
+	}
+}
+
+static struct tty_operations bfin_jc_ops = {
+	.open            = bfin_jc_open,
+	.close           = bfin_jc_close,
+	.write           = bfin_jc_write,
+	/*.put_char        = bfin_jc_put_char,*/
+	.flush_chars     = bfin_jc_flush_chars,
+	.write_room      = bfin_jc_write_room,
+	.chars_in_buffer = bfin_jc_chars_in_buffer,
+	.wait_until_sent = bfin_jc_wait_until_sent,
+};
+
+static int __init bfin_jc_init(void)
+{
+	int ret;
+
+	bfin_jc_kthread = kthread_create(bfin_jc_emudat_manager, NULL, DRV_NAME);
+	if (IS_ERR(bfin_jc_kthread))
+		return PTR_ERR(bfin_jc_kthread);
+
+	ret = -ENOMEM;
+
+	bfin_jc_write_buf.head = bfin_jc_write_buf.tail = 0;
+	bfin_jc_write_buf.buf = kmalloc(CIRC_SIZE, GFP_KERNEL);
+	if (!bfin_jc_write_buf.buf)
+		goto err;
+
+	bfin_jc_driver = alloc_tty_driver(1);
+	if (!bfin_jc_driver)
+		goto err;
+
+	bfin_jc_driver->owner        = THIS_MODULE;
+	bfin_jc_driver->driver_name  = DRV_NAME;
+	bfin_jc_driver->name         = DEV_NAME;
+	bfin_jc_driver->type         = TTY_DRIVER_TYPE_SERIAL;
+	bfin_jc_driver->subtype      = SERIAL_TYPE_NORMAL;
+	bfin_jc_driver->init_termios = tty_std_termios;
+	tty_set_operations(bfin_jc_driver, &bfin_jc_ops);
+
+	ret = tty_register_driver(bfin_jc_driver);
+	if (ret)
+		goto err;
+
+	pr_init(KERN_INFO DRV_NAME ": initialized\n");
+
+	return 0;
+
+ err:
+	put_tty_driver(bfin_jc_driver);
+	kfree(bfin_jc_write_buf.buf);
+	kthread_stop(bfin_jc_kthread);
+	return ret;
+}
+module_init(bfin_jc_init);
+
+static void __exit bfin_jc_exit(void)
+{
+	kthread_stop(bfin_jc_kthread);
+	kfree(bfin_jc_write_buf.buf);
+	tty_unregister_driver(bfin_jc_driver);
+	put_tty_driver(bfin_jc_driver);
+}
+module_exit(bfin_jc_exit);
+
+#if defined(CONFIG_BFIN_JTAG_COMM_CONSOLE) || defined(CONFIG_EARLY_PRINTK)
+static void
+bfin_jc_straight_buffer_write(const char *buf, unsigned count)
+{
+	unsigned ate = 0;
+	while (bfin_read_DBGSTAT() & EMUDOF)
+		continue;
+	bfin_write_emudat(count);
+	while (ate < count) {
+		while (bfin_read_DBGSTAT() & EMUDOF)
+			continue;
+		bfin_write_emudat_chars(buf[ate], buf[ate+1], buf[ate+2], buf[ate+3]);
+		ate += 4;
+	}
+}
+#endif
+
+#ifdef CONFIG_BFIN_JTAG_COMM_CONSOLE
+static void
+bfin_jc_console_write(struct console *co, const char *buf, unsigned count)
+{
+	if (bfin_jc_kthread == NULL)
+		bfin_jc_straight_buffer_write(buf, count);
+	else
+		bfin_jc_circ_write(buf, count);
+}
+
+static struct tty_driver *
+bfin_jc_console_device(struct console *co, int *index)
+{
+	*index = co->index;
+	return bfin_jc_driver;
+}
+
+static struct console bfin_jc_console = {
+	.name    = DEV_NAME,
+	.write   = bfin_jc_console_write,
+	.device  = bfin_jc_console_device,
+	.flags   = CON_ANYTIME | CON_PRINTBUFFER,
+	.index   = -1,
+};
+
+static int __init bfin_jc_console_init(void)
+{
+	register_console(&bfin_jc_console);
+	return 0;
+}
+console_initcall(bfin_jc_console_init);
+#endif
+
+#ifdef CONFIG_EARLY_PRINTK
+static void __init
+bfin_jc_early_write(struct console *co, const char *buf, unsigned int count)
+{
+	bfin_jc_straight_buffer_write(buf, count);
+}
+
+static struct __initdata console bfin_jc_early_console = {
+	.name   = "early_BFJC",
+	.write   = bfin_jc_early_write,
+	.flags   = CON_ANYTIME | CON_PRINTBUFFER,
+	.index   = -1,
+};
+
+struct console * __init
+bfin_jc_early_init(unsigned int port, unsigned int cflag)
+{
+	return &bfin_jc_early_console;
+}
+#endif
+
+MODULE_AUTHOR("Mike Frysinger <vapier@gentoo.org>");
+MODULE_DESCRIPTION("TTY over Blackfin JTAG Communication");
+MODULE_LICENSE("GPL");
diff --git a/drivers/char/cyclades.c b/drivers/char/cyclades.c
index 1fdb9f6..f3366d3 100644
--- a/drivers/char/cyclades.c
+++ b/drivers/char/cyclades.c
@@ -604,7 +604,6 @@
 
 #define NR_PORTS	256
 
-#define ZE_V1_NPORTS	64
 #define ZO_V1	0
 #define ZO_V2	1
 #define ZE_V1	2
@@ -663,18 +662,6 @@
 static void cy_throttle(struct tty_struct *tty);
 static void cy_send_xchar(struct tty_struct *tty, char ch);
 
-#define IS_CYC_Z(card) ((card).num_chips == (unsigned int)-1)
-
-#define Z_FPGA_CHECK(card) \
-	((readl(&((struct RUNTIME_9060 __iomem *) \
-		((card).ctl_addr))->init_ctrl) & (1<<17)) != 0)
-
-#define ISZLOADED(card)	(((ZO_V1 == readl(&((struct RUNTIME_9060 __iomem *) \
-			((card).ctl_addr))->mail_box_0)) || \
-			Z_FPGA_CHECK(card)) && \
-			(ZFIRM_ID == readl(&((struct FIRM_ID __iomem *) \
-			((card).base_addr+ID_ADDRESS))->signature)))
-
 #ifndef SERIAL_XMIT_SIZE
 #define	SERIAL_XMIT_SIZE	(min(PAGE_SIZE, 4096))
 #endif
@@ -687,8 +674,6 @@
 #define DRIVER_VERSION	0x02010203
 #define RAM_SIZE 0x80000
 
-#define Z_FPGA_LOADED(X)	((readl(&(X)->init_ctrl) & (1<<17)) != 0)
-
 enum zblock_type {
 	ZBLOCK_PRG = 0,
 	ZBLOCK_FPGA = 1
@@ -883,6 +868,29 @@
 static struct timer_list cyz_rx_full_timer[NR_PORTS];
 #endif				/* CONFIG_CYZ_INTR */
 
+static inline bool cy_is_Z(struct cyclades_card *card)
+{
+	return card->num_chips == (unsigned int)-1;
+}
+
+static inline bool __cyz_fpga_loaded(struct RUNTIME_9060 __iomem *ctl_addr)
+{
+	return readl(&ctl_addr->init_ctrl) & (1 << 17);
+}
+
+static inline bool cyz_fpga_loaded(struct cyclades_card *card)
+{
+	return __cyz_fpga_loaded(card->ctl_addr.p9060);
+}
+
+static inline bool cyz_is_loaded(struct cyclades_card *card)
+{
+	struct FIRM_ID __iomem *fw_id = card->base_addr + ID_ADDRESS;
+
+	return (card->hw_ver == ZO_V1 || cyz_fpga_loaded(card)) &&
+			readl(&fw_id->signature) == ZFIRM_ID;
+}
+
 static inline int serial_paranoia_check(struct cyclades_port *info,
 		char *name, const char *routine)
 {
@@ -1395,19 +1403,15 @@
 	unsigned long loc_doorbell;
 
 	firm_id = cinfo->base_addr + ID_ADDRESS;
-	if (!ISZLOADED(*cinfo))
-		return -1;
 	zfw_ctrl = cinfo->base_addr + (readl(&firm_id->zfwctrl_addr) & 0xfffff);
 	board_ctrl = &zfw_ctrl->board_ctrl;
 
-	loc_doorbell = readl(&((struct RUNTIME_9060 __iomem *)
-				  (cinfo->ctl_addr))->loc_doorbell);
+	loc_doorbell = readl(&cinfo->ctl_addr.p9060->loc_doorbell);
 	if (loc_doorbell) {
 		*cmd = (char)(0xff & loc_doorbell);
 		*channel = readl(&board_ctrl->fwcmd_channel);
 		*param = (__u32) readl(&board_ctrl->fwcmd_param);
-		cy_writel(&((struct RUNTIME_9060 __iomem *)(cinfo->ctl_addr))->
-			  loc_doorbell, 0xffffffff);
+		cy_writel(&cinfo->ctl_addr.p9060->loc_doorbell, 0xffffffff);
 		return 1;
 	}
 	return 0;
@@ -1424,15 +1428,14 @@
 	unsigned int index;
 
 	firm_id = cinfo->base_addr + ID_ADDRESS;
-	if (!ISZLOADED(*cinfo))
+	if (!cyz_is_loaded(cinfo))
 		return -1;
 
 	zfw_ctrl = cinfo->base_addr + (readl(&firm_id->zfwctrl_addr) & 0xfffff);
 	board_ctrl = &zfw_ctrl->board_ctrl;
 
 	index = 0;
-	pci_doorbell =
-	    &((struct RUNTIME_9060 __iomem *)(cinfo->ctl_addr))->pci_doorbell;
+	pci_doorbell = &cinfo->ctl_addr.p9060->pci_doorbell;
 	while ((readl(pci_doorbell) & 0xff) != 0) {
 		if (index++ == 1000)
 			return (int)(readl(pci_doorbell) & 0xff);
@@ -1624,10 +1627,8 @@
 	static struct BOARD_CTRL __iomem *board_ctrl;
 	static struct CH_CTRL __iomem *ch_ctrl;
 	static struct BUF_CTRL __iomem *buf_ctrl;
-	__u32 channel;
+	__u32 channel, param, fw_ver;
 	__u8 cmd;
-	__u32 param;
-	__u32 hw_ver, fw_ver;
 	int special_count;
 	int delta_count;
 
@@ -1635,8 +1636,6 @@
 	zfw_ctrl = cinfo->base_addr + (readl(&firm_id->zfwctrl_addr) & 0xfffff);
 	board_ctrl = &zfw_ctrl->board_ctrl;
 	fw_ver = readl(&board_ctrl->fw_version);
-	hw_ver = readl(&((struct RUNTIME_9060 __iomem *)(cinfo->ctl_addr))->
-			mail_box_0);
 
 	while (cyz_fetch_msg(cinfo, &channel, &cmd, &param) == 1) {
 		special_count = 0;
@@ -1737,15 +1736,7 @@
 {
 	struct cyclades_card *cinfo = dev_id;
 
-	if (unlikely(cinfo == NULL)) {
-#ifdef CY_DEBUG_INTERRUPTS
-		printk(KERN_DEBUG "cyz_interrupt: spurious interrupt %d\n",
-									irq);
-#endif
-		return IRQ_NONE;	/* spurious interrupt */
-	}
-
-	if (unlikely(!ISZLOADED(*cinfo))) {
+	if (unlikely(!cyz_is_loaded(cinfo))) {
 #ifdef CY_DEBUG_INTERRUPTS
 		printk(KERN_DEBUG "cyz_interrupt: board not yet loaded "
 				"(IRQ%d).\n", irq);
@@ -1785,7 +1776,6 @@
 	struct tty_struct *tty;
 	struct FIRM_ID __iomem *firm_id;
 	struct ZFW_CTRL __iomem *zfw_ctrl;
-	struct BOARD_CTRL __iomem *board_ctrl;
 	struct BUF_CTRL __iomem *buf_ctrl;
 	unsigned long expires = jiffies + HZ;
 	unsigned int port, card;
@@ -1793,19 +1783,17 @@
 	for (card = 0; card < NR_CARDS; card++) {
 		cinfo = &cy_card[card];
 
-		if (!IS_CYC_Z(*cinfo))
+		if (!cy_is_Z(cinfo))
 			continue;
-		if (!ISZLOADED(*cinfo))
+		if (!cyz_is_loaded(cinfo))
 			continue;
 
 		firm_id = cinfo->base_addr + ID_ADDRESS;
 		zfw_ctrl = cinfo->base_addr +
 				(readl(&firm_id->zfwctrl_addr) & 0xfffff);
-		board_ctrl = &(zfw_ctrl->board_ctrl);
 
 	/* Skip first polling cycle to avoid racing conditions with the FW */
 		if (!cinfo->intr_enabled) {
-			cinfo->nports = (int)readl(&board_ctrl->n_channel);
 			cinfo->intr_enabled = 1;
 			continue;
 		}
@@ -1874,7 +1862,7 @@
 
 	set_line_char(info);
 
-	if (!IS_CYC_Z(*card)) {
+	if (!cy_is_Z(card)) {
 		chip = channel >> 2;
 		channel &= 0x03;
 		index = card->bus_index;
@@ -1931,7 +1919,7 @@
 		base_addr = card->base_addr;
 
 		firm_id = base_addr + ID_ADDRESS;
-		if (!ISZLOADED(*card))
+		if (!cyz_is_loaded(card))
 			return -ENODEV;
 
 		zfw_ctrl = card->base_addr +
@@ -2026,7 +2014,7 @@
 
 	card = info->card;
 	channel = info->line - card->first_line;
-	if (!IS_CYC_Z(*card)) {
+	if (!cy_is_Z(card)) {
 		chip = channel >> 2;
 		channel &= 0x03;
 		index = card->bus_index;
@@ -2070,7 +2058,7 @@
 
 	card = info->card;
 	channel = info->line - card->first_line;
-	if (!IS_CYC_Z(*card)) {
+	if (!cy_is_Z(card)) {
 		chip = channel >> 2;
 		channel &= 0x03;
 		index = card->bus_index;
@@ -2126,7 +2114,7 @@
 #endif
 
 		firm_id = base_addr + ID_ADDRESS;
-		if (!ISZLOADED(*card))
+		if (!cyz_is_loaded(card))
 			return;
 
 		zfw_ctrl = card->base_addr +
@@ -2233,7 +2221,7 @@
 #endif
 	info->port.blocked_open++;
 
-	if (!IS_CYC_Z(*cinfo)) {
+	if (!cy_is_Z(cinfo)) {
 		chip = channel >> 2;
 		channel &= 0x03;
 		index = cinfo->bus_index;
@@ -2296,7 +2284,7 @@
 
 		base_addr = cinfo->base_addr;
 		firm_id = base_addr + ID_ADDRESS;
-		if (!ISZLOADED(*cinfo)) {
+		if (!cyz_is_loaded(cinfo)) {
 			__set_current_state(TASK_RUNNING);
 			remove_wait_queue(&info->port.open_wait, &wait);
 			return -EINVAL;
@@ -2397,16 +2385,14 @@
 	   treat it as absent from the system.  This
 	   will make the user pay attention.
 	 */
-	if (IS_CYC_Z(*info->card)) {
+	if (cy_is_Z(info->card)) {
 		struct cyclades_card *cinfo = info->card;
 		struct FIRM_ID __iomem *firm_id = cinfo->base_addr + ID_ADDRESS;
 
-		if (!ISZLOADED(*cinfo)) {
-			if (((ZE_V1 == readl(&((struct RUNTIME_9060 __iomem *)
-					 (cinfo->ctl_addr))->mail_box_0)) &&
-					Z_FPGA_CHECK(*cinfo)) &&
-					(ZFIRM_HLT == readl(
-						&firm_id->signature))) {
+		if (!cyz_is_loaded(cinfo)) {
+			if (cinfo->hw_ver == ZE_V1 && cyz_fpga_loaded(cinfo) &&
+					readl(&firm_id->signature) ==
+					ZFIRM_HLT) {
 				printk(KERN_ERR "cyc:Cyclades-Z Error: you "
 					"need an external power supply for "
 					"this number of ports.\nFirmware "
@@ -2423,18 +2409,13 @@
 		   interrupts should be enabled as soon as the first open
 		   happens to one of its ports. */
 			if (!cinfo->intr_enabled) {
-				struct ZFW_CTRL __iomem *zfw_ctrl;
-				struct BOARD_CTRL __iomem *board_ctrl;
-
-				zfw_ctrl = cinfo->base_addr +
-					(readl(&firm_id->zfwctrl_addr) &
-					 0xfffff);
-
-				board_ctrl = &zfw_ctrl->board_ctrl;
+				u16 intr;
 
 				/* Enable interrupts on the PLX chip */
-				cy_writew(cinfo->ctl_addr + 0x68,
-					readw(cinfo->ctl_addr + 0x68) | 0x0900);
+				intr = readw(&cinfo->ctl_addr.p9060->
+						intr_ctrl_stat) | 0x0900;
+				cy_writew(&cinfo->ctl_addr.p9060->
+						intr_ctrl_stat, intr);
 				/* Enable interrupts on the FW */
 				retval = cyz_issue_cmd(cinfo, 0,
 						C_CM_IRQ_ENBL, 0L);
@@ -2442,8 +2423,6 @@
 					printk(KERN_ERR "cyc:IRQ enable retval "
 						"was %x\n", retval);
 				}
-				cinfo->nports =
-					(int)readl(&board_ctrl->n_channel);
 				cinfo->intr_enabled = 1;
 			}
 		}
@@ -2556,7 +2535,7 @@
 #endif
 	card = info->card;
 	channel = (info->line) - (card->first_line);
-	if (!IS_CYC_Z(*card)) {
+	if (!cy_is_Z(card)) {
 		chip = channel >> 2;
 		channel &= 0x03;
 		index = card->bus_index;
@@ -2601,7 +2580,7 @@
 	info->xmit_cnt = info->xmit_head = info->xmit_tail = 0;
 	spin_unlock_irqrestore(&card->card_lock, flags);
 
-	if (IS_CYC_Z(*card)) {	/* If it is a Z card, flush the on-board
+	if (cy_is_Z(card)) {	/* If it is a Z card, flush the on-board
 					   buffers as well */
 		spin_lock_irqsave(&card->card_lock, flags);
 		retval = cyz_issue_cmd(card, channel, C_CM_FLUSH_TX, 0L);
@@ -2682,7 +2661,7 @@
 
 	spin_lock_irqsave(&card->card_lock, flags);
 
-	if (!IS_CYC_Z(*card)) {
+	if (!cy_is_Z(card)) {
 		int channel = info->line - card->first_line;
 		int index = card->bus_index;
 		void __iomem *base_addr = card->base_addr +
@@ -2902,7 +2881,7 @@
 	channel = (info->line) - (card->first_line);
 
 #ifdef Z_EXT_CHARS_IN_BUFFER
-	if (!IS_CYC_Z(cy_card[card])) {
+	if (!cy_is_Z(card)) {
 #endif				/* Z_EXT_CHARS_IN_BUFFER */
 #ifdef CY_DEBUG_IO
 		printk(KERN_DEBUG "cyc:cy_chars_in_buffer ttyC%d %d\n",
@@ -2984,7 +2963,6 @@
 	void __iomem *base_addr;
 	int chip, channel, index;
 	unsigned cflag, iflag;
-	unsigned short chip_number;
 	int baud, baud_rate = 0;
 	int i;
 
@@ -3013,9 +2991,8 @@
 
 	card = info->card;
 	channel = info->line - card->first_line;
-	chip_number = channel / 4;
 
-	if (!IS_CYC_Z(*card)) {
+	if (!cy_is_Z(card)) {
 
 		index = card->bus_index;
 
@@ -3233,21 +3210,17 @@
 	} else {
 		struct FIRM_ID __iomem *firm_id;
 		struct ZFW_CTRL __iomem *zfw_ctrl;
-		struct BOARD_CTRL __iomem *board_ctrl;
 		struct CH_CTRL __iomem *ch_ctrl;
-		struct BUF_CTRL __iomem *buf_ctrl;
 		__u32 sw_flow;
 		int retval;
 
 		firm_id = card->base_addr + ID_ADDRESS;
-		if (!ISZLOADED(*card))
+		if (!cyz_is_loaded(card))
 			return;
 
 		zfw_ctrl = card->base_addr +
 			(readl(&firm_id->zfwctrl_addr) & 0xfffff);
-		board_ctrl = &zfw_ctrl->board_ctrl;
 		ch_ctrl = &(zfw_ctrl->ch_ctrl[channel]);
-		buf_ctrl = &zfw_ctrl->buf_ctrl[channel];
 
 		/* baud rate */
 		baud = tty_get_baud_rate(info->port.tty);
@@ -3457,7 +3430,7 @@
 
 	card = info->card;
 	channel = (info->line) - (card->first_line);
-	if (!IS_CYC_Z(*card)) {
+	if (!cy_is_Z(card)) {
 		chip = channel >> 2;
 		channel &= 0x03;
 		index = card->bus_index;
@@ -3497,7 +3470,7 @@
 
 	card = info->card;
 	channel = info->line - card->first_line;
-	if (!IS_CYC_Z(*card)) {
+	if (!cy_is_Z(card)) {
 		chip = channel >> 2;
 		channel &= 0x03;
 		index = card->bus_index;
@@ -3523,7 +3496,7 @@
 	} else {
 		base_addr = card->base_addr;
 		firm_id = card->base_addr + ID_ADDRESS;
-		if (ISZLOADED(*card)) {
+		if (cyz_is_loaded(card)) {
 			zfw_ctrl = card->base_addr +
 				(readl(&firm_id->zfwctrl_addr) & 0xfffff);
 			board_ctrl = &zfw_ctrl->board_ctrl;
@@ -3566,7 +3539,7 @@
 
 	card = info->card;
 	channel = (info->line) - (card->first_line);
-	if (!IS_CYC_Z(*card)) {
+	if (!cy_is_Z(card)) {
 		chip = channel >> 2;
 		channel &= 0x03;
 		index = card->bus_index;
@@ -3641,7 +3614,7 @@
 		base_addr = card->base_addr;
 
 		firm_id = card->base_addr + ID_ADDRESS;
-		if (ISZLOADED(*card)) {
+		if (cyz_is_loaded(card)) {
 			zfw_ctrl = card->base_addr +
 				(readl(&firm_id->zfwctrl_addr) & 0xfffff);
 			board_ctrl = &zfw_ctrl->board_ctrl;
@@ -3713,7 +3686,7 @@
 	card = info->card;
 
 	spin_lock_irqsave(&card->card_lock, flags);
-	if (!IS_CYC_Z(*card)) {
+	if (!cy_is_Z(card)) {
 		/* Let the transmit ISR take care of this (since it
 		   requires stuffing characters into the output stream).
 		 */
@@ -3782,7 +3755,7 @@
 
 	card = info->card;
 	channel = info->line - card->first_line;
-	if (!IS_CYC_Z(*card)) {
+	if (!cy_is_Z(card)) {
 		chip = channel >> 2;
 		channel &= 0x03;
 		index = card->bus_index;
@@ -3810,7 +3783,7 @@
 
 	card = info->card;
 	channel = info->line - card->first_line;
-	if (!IS_CYC_Z(*card)) {
+	if (!cy_is_Z(card)) {
 		chip = channel >> 2;
 		channel &= 0x03;
 		index = card->bus_index;
@@ -3844,7 +3817,7 @@
 
 	card = info->card;
 	channel = info->line - card->first_line;
-	if (!IS_CYC_Z(*card)) {
+	if (!cy_is_Z(card)) {
 		chip = channel >> 2;
 		channel &= 0x03;
 		index = card->bus_index;
@@ -3867,7 +3840,7 @@
 
 	card = info->card;
 	channel = info->line - card->first_line;
-	if (!IS_CYC_Z(*card)) {
+	if (!cy_is_Z(card)) {
 		chip = channel >> 2;
 		channel &= 0x03;
 		index = card->bus_index;
@@ -4121,7 +4094,7 @@
 	card = info->card;
 	channel = info->line - card->first_line;
 
-	if (IS_CYC_Z(*card)) {
+	if (cy_is_Z(card)) {
 		if (ch == STOP_CHAR(tty))
 			cyz_issue_cmd(card, channel, C_CM_SENDXOFF, 0L);
 		else if (ch == START_CHAR(tty))
@@ -4154,7 +4127,7 @@
 	card = info->card;
 
 	if (I_IXOFF(tty)) {
-		if (!IS_CYC_Z(*card))
+		if (!cy_is_Z(card))
 			cy_send_xchar(tty, STOP_CHAR(tty));
 		else
 			info->throttle = 1;
@@ -4162,7 +4135,7 @@
 
 	if (tty->termios->c_cflag & CRTSCTS) {
 		channel = info->line - card->first_line;
-		if (!IS_CYC_Z(*card)) {
+		if (!cy_is_Z(card)) {
 			chip = channel >> 2;
 			channel &= 0x03;
 			index = card->bus_index;
@@ -4219,7 +4192,7 @@
 	if (tty->termios->c_cflag & CRTSCTS) {
 		card = info->card;
 		channel = info->line - card->first_line;
-		if (!IS_CYC_Z(*card)) {
+		if (!cy_is_Z(card)) {
 			chip = channel >> 2;
 			channel &= 0x03;
 			index = card->bus_index;
@@ -4263,7 +4236,7 @@
 
 	cinfo = info->card;
 	channel = info->line - cinfo->first_line;
-	if (!IS_CYC_Z(*cinfo)) {
+	if (!cy_is_Z(cinfo)) {
 		index = cinfo->bus_index;
 		chip = channel >> 2;
 		channel &= 0x03;
@@ -4296,7 +4269,7 @@
 	cinfo = info->card;
 	channel = info->line - cinfo->first_line;
 	index = cinfo->bus_index;
-	if (!IS_CYC_Z(*cinfo)) {
+	if (!cy_is_Z(cinfo)) {
 		chip = channel >> 2;
 		channel &= 0x03;
 		base_addr = cinfo->base_addr + (cy_chip_offset[chip] << index);
@@ -4347,33 +4320,20 @@
 static int __devinit cy_init_card(struct cyclades_card *cinfo)
 {
 	struct cyclades_port *info;
-	u32 uninitialized_var(mailbox);
-	unsigned int nports, port;
+	unsigned int port;
 	unsigned short chip_number;
-	int uninitialized_var(index);
 
 	spin_lock_init(&cinfo->card_lock);
+	cinfo->intr_enabled = 0;
 
-	if (IS_CYC_Z(*cinfo)) {	/* Cyclades-Z */
-		mailbox = readl(&((struct RUNTIME_9060 __iomem *)
-				     cinfo->ctl_addr)->mail_box_0);
-		nports = (mailbox == ZE_V1) ? ZE_V1_NPORTS : 8;
-		cinfo->intr_enabled = 0;
-		cinfo->nports = 0;	/* Will be correctly set later, after
-					   Z FW is loaded */
-	} else {
-		index = cinfo->bus_index;
-		nports = cinfo->nports = CyPORTS_PER_CHIP * cinfo->num_chips;
-	}
-
-	cinfo->ports = kzalloc(sizeof(*cinfo->ports) * nports, GFP_KERNEL);
+	cinfo->ports = kcalloc(cinfo->nports, sizeof(*cinfo->ports),
+			GFP_KERNEL);
 	if (cinfo->ports == NULL) {
 		printk(KERN_ERR "Cyclades: cannot allocate ports\n");
-		cinfo->nports = 0;
 		return -ENOMEM;
 	}
 
-	for (port = cinfo->first_line; port < cinfo->first_line + nports;
+	for (port = cinfo->first_line; port < cinfo->first_line + cinfo->nports;
 			port++) {
 		info = &cinfo->ports[port - cinfo->first_line];
 		tty_port_init(&info->port);
@@ -4387,9 +4347,9 @@
 		init_completion(&info->shutdown_wait);
 		init_waitqueue_head(&info->delta_msr_wait);
 
-		if (IS_CYC_Z(*cinfo)) {
+		if (cy_is_Z(cinfo)) {
 			info->type = PORT_STARTECH;
-			if (mailbox == ZO_V1)
+			if (cinfo->hw_ver == ZO_V1)
 				info->xmit_fifo_size = CYZ_FIFO_SIZE;
 			else
 				info->xmit_fifo_size = 4 * CYZ_FIFO_SIZE;
@@ -4398,6 +4358,7 @@
 				cyz_rx_restart, (unsigned long)info);
 #endif
 		} else {
+			int index = cinfo->bus_index;
 			info->type = PORT_CIRRUS;
 			info->xmit_fifo_size = CyMAX_CHAR_FIFO;
 			info->cor1 = CyPARITY_NONE | Cy_1_STOP | Cy_8_BITS;
@@ -4430,7 +4391,7 @@
 	}
 
 #ifndef CONFIG_CYZ_INTR
-	if (IS_CYC_Z(*cinfo) && !timer_pending(&cyz_timerlist)) {
+	if (cy_is_Z(cinfo) && !timer_pending(&cyz_timerlist)) {
 		mod_timer(&cyz_timerlist, jiffies + 1);
 #ifdef CY_PCI_DEBUG
 		printk(KERN_DEBUG "Cyclades-Z polling initialized\n");
@@ -4621,11 +4582,12 @@
 
 		/* set cy_card */
 		cy_card[j].base_addr = cy_isa_address;
-		cy_card[j].ctl_addr = NULL;
+		cy_card[j].ctl_addr.p9050 = NULL;
 		cy_card[j].irq = (int)cy_isa_irq;
 		cy_card[j].bus_index = 0;
 		cy_card[j].first_line = cy_next_channel;
-		cy_card[j].num_chips = cy_isa_nchan / 4;
+		cy_card[j].num_chips = cy_isa_nchan / CyPORTS_PER_CHIP;
+		cy_card[j].nports = cy_isa_nchan;
 		if (cy_init_card(&cy_card[j])) {
 			cy_card[j].base_addr = NULL;
 			free_irq(cy_isa_irq, &cy_card[j]);
@@ -4781,7 +4743,7 @@
 	struct CUSTOM_REG __iomem *cust = base_addr;
 	struct ZFW_CTRL __iomem *pt_zfwctrl;
 	void __iomem *tmp;
-	u32 mailbox, status;
+	u32 mailbox, status, nchan;
 	unsigned int i;
 	int retval;
 
@@ -4793,7 +4755,7 @@
 
 	/* Check whether the firmware is already loaded and running. If
 	   positive, skip this board */
-	if (Z_FPGA_LOADED(ctl_addr) && readl(&fid->signature) == ZFIRM_ID) {
+	if (__cyz_fpga_loaded(ctl_addr) && readl(&fid->signature) == ZFIRM_ID) {
 		u32 cntval = readl(base_addr + 0x190);
 
 		udelay(100);
@@ -4812,7 +4774,7 @@
 
 	mailbox = readl(&ctl_addr->mail_box_0);
 
-	if (mailbox == 0 || Z_FPGA_LOADED(ctl_addr)) {
+	if (mailbox == 0 || __cyz_fpga_loaded(ctl_addr)) {
 		/* stops CPU and set window to beginning of RAM */
 		cy_writel(&ctl_addr->loc_addr_base, WIN_CREG);
 		cy_writel(&cust->cpu_stop, 0);
@@ -4828,7 +4790,7 @@
 				base_addr);
 		if (retval)
 			goto err_rel;
-		if (!Z_FPGA_LOADED(ctl_addr)) {
+		if (!__cyz_fpga_loaded(ctl_addr)) {
 			dev_err(&pdev->dev, "fw upload successful, but fw is "
 					"not loaded\n");
 			goto err_rel;
@@ -4887,7 +4849,7 @@
 				"system before loading the new FW to the "
 				"Cyclades-Z.\n");
 
-			if (Z_FPGA_LOADED(ctl_addr))
+			if (__cyz_fpga_loaded(ctl_addr))
 				plx_init(pdev, irq, ctl_addr);
 
 			retval = -EIO;
@@ -4902,16 +4864,16 @@
 			base_addr + ID_ADDRESS, readl(&fid->zfwctrl_addr),
 			base_addr + readl(&fid->zfwctrl_addr));
 
+	nchan = readl(&pt_zfwctrl->board_ctrl.n_channel);
 	dev_info(&pdev->dev, "Cyclades-Z FW loaded: version = %x, ports = %u\n",
-		readl(&pt_zfwctrl->board_ctrl.fw_version),
-		readl(&pt_zfwctrl->board_ctrl.n_channel));
+		readl(&pt_zfwctrl->board_ctrl.fw_version), nchan);
 
-	if (readl(&pt_zfwctrl->board_ctrl.n_channel) == 0) {
+	if (nchan == 0) {
 		dev_warn(&pdev->dev, "no Cyclades-Z ports were found. Please "
 			"check the connection between the Z host card and the "
 			"serial expanders.\n");
 
-		if (Z_FPGA_LOADED(ctl_addr))
+		if (__cyz_fpga_loaded(ctl_addr))
 			plx_init(pdev, irq, ctl_addr);
 
 		dev_info(&pdev->dev, "Null number of ports detected. Board "
@@ -4932,9 +4894,7 @@
 	cy_writel(&ctl_addr->intr_ctrl_stat, readl(&ctl_addr->intr_ctrl_stat) |
 			0x00030800UL);
 
-	plx_init(pdev, irq, ctl_addr);
-
-	return 0;
+	return nchan;
 err_rel:
 	release_firmware(fw);
 err:
@@ -4946,7 +4906,7 @@
 {
 	void __iomem *addr0 = NULL, *addr2 = NULL;
 	char *card_name = NULL;
-	u32 mailbox;
+	u32 uninitialized_var(mailbox);
 	unsigned int device_id, nchan = 0, card_no, i;
 	unsigned char plx_ver;
 	int retval, irq;
@@ -5023,11 +4983,12 @@
 		}
 
 		/* Disable interrupts on the PLX before resetting it */
-		cy_writew(addr0 + 0x68, readw(addr0 + 0x68) & ~0x0900);
+		cy_writew(&ctl_addr->intr_ctrl_stat,
+				readw(&ctl_addr->intr_ctrl_stat) & ~0x0900);
 
 		plx_init(pdev, irq, addr0);
 
-		mailbox = (u32)readl(&ctl_addr->mail_box_0);
+		mailbox = readl(&ctl_addr->mail_box_0);
 
 		addr2 = ioremap_nocache(pci_resource_start(pdev, 2),
 				mailbox == ZE_V1 ? CyPCI_Ze_win : CyPCI_Zwin);
@@ -5038,12 +4999,8 @@
 
 		if (mailbox == ZE_V1) {
 			card_name = "Cyclades-Ze";
-
-			readl(&ctl_addr->mail_box_0);
-			nchan = ZE_V1_NPORTS;
 		} else {
 			card_name = "Cyclades-8Zo";
-
 #ifdef CY_PCI_DEBUG
 			if (mailbox == ZO_V1) {
 				cy_writel(&ctl_addr->loc_addr_base, WIN_CREG);
@@ -5065,15 +5022,12 @@
 			 */
 			if ((mailbox == ZO_V1) || (mailbox == ZO_V2))
 				cy_writel(addr2 + ID_ADDRESS, 0L);
-
-			retval = cyz_load_fw(pdev, addr2, addr0, irq);
-			if (retval)
-				goto err_unmap;
-			/* This must be a Cyclades-8Zo/PCI.  The extendable
-			   version will have a different device_id and will
-			   be allocated its maximum number of ports. */
-			nchan = 8;
 		}
+
+		retval = cyz_load_fw(pdev, addr2, addr0, irq);
+		if (retval <= 0)
+			goto err_unmap;
+		nchan = retval;
 	}
 
 	if ((cy_next_channel + nchan) > NR_PORTS) {
@@ -5103,8 +5057,10 @@
 			dev_err(&pdev->dev, "could not allocate IRQ\n");
 			goto err_unmap;
 		}
-		cy_card[card_no].num_chips = nchan / 4;
+		cy_card[card_no].num_chips = nchan / CyPORTS_PER_CHIP;
 	} else {
+		cy_card[card_no].hw_ver = mailbox;
+		cy_card[card_no].num_chips = (unsigned int)-1;
 #ifdef CONFIG_CYZ_INTR
 		/* allocate IRQ only if board has an IRQ */
 		if (irq != 0 && irq != 255) {
@@ -5117,15 +5073,15 @@
 			}
 		}
 #endif				/* CONFIG_CYZ_INTR */
-		cy_card[card_no].num_chips = (unsigned int)-1;
 	}
 
 	/* set cy_card */
 	cy_card[card_no].base_addr = addr2;
-	cy_card[card_no].ctl_addr = addr0;
+	cy_card[card_no].ctl_addr.p9050 = addr0;
 	cy_card[card_no].irq = irq;
 	cy_card[card_no].bus_index = 1;
 	cy_card[card_no].first_line = cy_next_channel;
+	cy_card[card_no].nports = nchan;
 	retval = cy_init_card(&cy_card[card_no]);
 	if (retval)
 		goto err_null;
@@ -5138,17 +5094,20 @@
 		plx_ver = readb(addr2 + CyPLX_VER) & 0x0f;
 		switch (plx_ver) {
 		case PLX_9050:
-
 			cy_writeb(addr0 + 0x4c, 0x43);
 			break;
 
 		case PLX_9060:
 		case PLX_9080:
 		default:	/* Old boards, use PLX_9060 */
-			plx_init(pdev, irq, addr0);
-			cy_writew(addr0 + 0x68, readw(addr0 + 0x68) | 0x0900);
+		{
+			struct RUNTIME_9060 __iomem *ctl_addr = addr0;
+			plx_init(pdev, irq, ctl_addr);
+			cy_writew(&ctl_addr->intr_ctrl_stat,
+				readw(&ctl_addr->intr_ctrl_stat) | 0x0900);
 			break;
 		}
+		}
 	}
 
 	dev_info(&pdev->dev, "%s/PCI #%d found: %d channels starting from "
@@ -5179,22 +5138,23 @@
 	unsigned int i;
 
 	/* non-Z with old PLX */
-	if (!IS_CYC_Z(*cinfo) && (readb(cinfo->base_addr + CyPLX_VER) & 0x0f) ==
+	if (!cy_is_Z(cinfo) && (readb(cinfo->base_addr + CyPLX_VER) & 0x0f) ==
 			PLX_9050)
-		cy_writeb(cinfo->ctl_addr + 0x4c, 0);
+		cy_writeb(cinfo->ctl_addr.p9050 + 0x4c, 0);
 	else
 #ifndef CONFIG_CYZ_INTR
-		if (!IS_CYC_Z(*cinfo))
+		if (!cy_is_Z(cinfo))
 #endif
-		cy_writew(cinfo->ctl_addr + 0x68,
-				readw(cinfo->ctl_addr + 0x68) & ~0x0900);
+		cy_writew(&cinfo->ctl_addr.p9060->intr_ctrl_stat,
+			readw(&cinfo->ctl_addr.p9060->intr_ctrl_stat) &
+			~0x0900);
 
 	iounmap(cinfo->base_addr);
-	if (cinfo->ctl_addr)
-		iounmap(cinfo->ctl_addr);
+	if (cinfo->ctl_addr.p9050)
+		iounmap(cinfo->ctl_addr.p9050);
 	if (cinfo->irq
 #ifndef CONFIG_CYZ_INTR
-		&& !IS_CYC_Z(*cinfo)
+		&& !cy_is_Z(cinfo)
 #endif /* CONFIG_CYZ_INTR */
 		)
 		free_irq(cinfo->irq, cinfo);
@@ -5240,7 +5200,7 @@
 					(cur_jifs - info->idle_stats.recv_idle)/
 					HZ, info->idle_stats.overruns,
 					/* FIXME: double check locking */
-					(long)info->port.tty->ldisc.ops->num);
+					(long)info->port.tty->ldisc->ops->num);
 			else
 				seq_printf(m, "%3d %8lu %10lu %8lu "
 					"%10lu %8lu %9lu %6ld\n",
@@ -5386,11 +5346,11 @@
 			/* clear interrupt */
 			cy_writeb(card->base_addr + Cy_ClrIntr, 0);
 			iounmap(card->base_addr);
-			if (card->ctl_addr)
-				iounmap(card->ctl_addr);
+			if (card->ctl_addr.p9050)
+				iounmap(card->ctl_addr.p9050);
 			if (card->irq
 #ifndef CONFIG_CYZ_INTR
-				&& !IS_CYC_Z(*card)
+				&& !cy_is_Z(card)
 #endif /* CONFIG_CYZ_INTR */
 				)
 				free_irq(card->irq, card);
diff --git a/drivers/char/epca.c b/drivers/char/epca.c
index af7c13c..abef1f7 100644
--- a/drivers/char/epca.c
+++ b/drivers/char/epca.c
@@ -745,7 +745,7 @@
 	return 0;
 }
 
-static void epca_raise_dtr_rts(struct tty_port *port)
+static void epca_dtr_rts(struct tty_port *port, int onoff)
 {
 }
 
@@ -925,7 +925,7 @@
 
 static const struct tty_port_operations epca_port_ops = {
 	.carrier_raised = epca_carrier_raised,
-	.raise_dtr_rts = epca_raise_dtr_rts,
+	.dtr_rts = epca_dtr_rts,
 };
 
 static int info_open(struct tty_struct *tty, struct file *filp)
@@ -1518,7 +1518,7 @@
 		if (event & MODEMCHG_IND) {
 			/* A modem signal change has been indicated */
 			ch->imodem = mstat;
-			if (test_bit(ASYNC_CHECK_CD, &ch->port.flags)) {
+			if (test_bit(ASYNCB_CHECK_CD, &ch->port.flags)) {
 				/* We are now receiving dcd */
 				if (mstat & ch->dcd)
 					wake_up_interruptible(&ch->port.open_wait);
@@ -1765,9 +1765,9 @@
 		 * that the driver will wait on carrier detect.
 		 */
 		if (ts->c_cflag & CLOCAL)
-			clear_bit(ASYNC_CHECK_CD, &ch->port.flags);
+			clear_bit(ASYNCB_CHECK_CD, &ch->port.flags);
 		else
-			set_bit(ASYNC_CHECK_CD, &ch->port.flags);
+			set_bit(ASYNCB_CHECK_CD, &ch->port.flags);
 		mval = ch->m_dtr | ch->m_rts;
 	} /* End CBAUD not detected */
 	iflag = termios2digi_i(ch, ts->c_iflag);
@@ -2114,8 +2114,8 @@
 			tty_wait_until_sent(tty, 0);
 		} else {
 			/* ldisc lock already held in ioctl */
-			if (tty->ldisc.ops->flush_buffer)
-				tty->ldisc.ops->flush_buffer(tty);
+			if (tty->ldisc->ops->flush_buffer)
+				tty->ldisc->ops->flush_buffer(tty);
 		}
 		unlock_kernel();
 		/* Fall Thru */
@@ -2244,7 +2244,8 @@
 			if (test_and_clear_bit(EPCA_EVENT_HANGUP, &ch->event)) {
 				tty_hangup(tty);
 				wake_up_interruptible(&ch->port.open_wait);
-				clear_bit(ASYNC_NORMAL_ACTIVE, &ch->port.flags);
+				clear_bit(ASYNCB_NORMAL_ACTIVE,
+						&ch->port.flags);
 			}
 		}
 		tty_kref_put(tty);
diff --git a/drivers/char/ip2/i2lib.c b/drivers/char/ip2/i2lib.c
index 0061e18..0d10b89 100644
--- a/drivers/char/ip2/i2lib.c
+++ b/drivers/char/ip2/i2lib.c
@@ -868,11 +868,11 @@
 		amountToMove = count;
 	}
 	// Move the first block
-	pCh->pTTY->ldisc.ops->receive_buf( pCh->pTTY,
+	pCh->pTTY->ldisc->ops->receive_buf( pCh->pTTY,
 		 &(pCh->Ibuf[stripIndex]), NULL, amountToMove );
 	// If we needed to wrap, do the second data move
 	if (count > amountToMove) {
-		pCh->pTTY->ldisc.ops->receive_buf( pCh->pTTY,
+		pCh->pTTY->ldisc->ops->receive_buf( pCh->pTTY,
 		 pCh->Ibuf, NULL, count - amountToMove );
 	}
 	// Bump and wrap the stripIndex all at once by the amount of data read. This
diff --git a/drivers/char/ip2/ip2main.c b/drivers/char/ip2/ip2main.c
index afd9247..517271c 100644
--- a/drivers/char/ip2/ip2main.c
+++ b/drivers/char/ip2/ip2main.c
@@ -1315,8 +1315,8 @@
 	if (tty->pgrp)
 		kill_pgrp(tty->pgrp, sig, 1);
 	if (flush || !L_NOFLSH(tty)) {
-		if ( tty->ldisc.ops->flush_buffer )  
-			tty->ldisc.ops->flush_buffer(tty);
+		if ( tty->ldisc->ops->flush_buffer )  
+			tty->ldisc->ops->flush_buffer(tty);
 		i2InputFlush( tty->driver_data );
 	}
 }
diff --git a/drivers/char/isicom.c b/drivers/char/isicom.c
index a59eac5..4d745a8 100644
--- a/drivers/char/isicom.c
+++ b/drivers/char/isicom.c
@@ -329,7 +329,7 @@
 
 /* card->lock MUST NOT be held */
 
-static void isicom_raise_dtr_rts(struct tty_port *port)
+static void isicom_dtr_rts(struct tty_port *port, int on)
 {
 	struct isi_port *ip = container_of(port, struct isi_port, port);
 	struct isi_board *card = ip->card;
@@ -339,10 +339,17 @@
 	if (!lock_card(card))
 		return;
 
-	outw(0x8000 | (channel << card->shift_count) | 0x02, base);
-	outw(0x0f04, base);
-	InterruptTheCard(base);
-	ip->status |= (ISI_DTR | ISI_RTS);
+	if (on) {
+		outw(0x8000 | (channel << card->shift_count) | 0x02, base);
+		outw(0x0f04, base);
+		InterruptTheCard(base);
+		ip->status |= (ISI_DTR | ISI_RTS);
+	} else {
+		outw(0x8000 | (channel << card->shift_count) | 0x02, base);
+		outw(0x0C04, base);
+		InterruptTheCard(base);
+		ip->status &= ~(ISI_DTR | ISI_RTS);
+	}
 	unlock_card(card);
 }
 
@@ -1339,7 +1346,7 @@
 
 static const struct tty_port_operations isicom_port_ops = {
 	.carrier_raised		= isicom_carrier_raised,
-	.raise_dtr_rts		= isicom_raise_dtr_rts,
+	.dtr_rts		= isicom_dtr_rts,
 };
 
 static int __devinit reset_card(struct pci_dev *pdev,
diff --git a/drivers/char/istallion.c b/drivers/char/istallion.c
index fff19f7..e18800c 100644
--- a/drivers/char/istallion.c
+++ b/drivers/char/istallion.c
@@ -1140,14 +1140,14 @@
 	return (portp->sigs & TIOCM_CD) ? 1 : 0;
 }
 
-static void stli_raise_dtr_rts(struct tty_port *port)
+static void stli_dtr_rts(struct tty_port *port, int on)
 {
 	struct stliport *portp = container_of(port, struct stliport, port);
 	struct stlibrd *brdp = stli_brds[portp->brdnr];
-	stli_mkasysigs(&portp->asig, 1, 1);
+	stli_mkasysigs(&portp->asig, on, on);
 	if (stli_cmdwait(brdp, portp, A_SETSIGNALS, &portp->asig,
 		sizeof(asysigs_t), 0) < 0)
-			printk(KERN_WARNING "istallion: dtr raise failed.\n");
+			printk(KERN_WARNING "istallion: dtr set failed.\n");
 }
 
 
@@ -4417,7 +4417,7 @@
 
 static const struct tty_port_operations stli_port_ops = {
 	.carrier_raised = stli_carrier_raised,
-	.raise_dtr_rts = stli_raise_dtr_rts,
+	.dtr_rts = stli_dtr_rts,
 };
 
 /*****************************************************************************/
diff --git a/drivers/char/moxa.c b/drivers/char/moxa.c
index 4a4cab7..65b6ff2 100644
--- a/drivers/char/moxa.c
+++ b/drivers/char/moxa.c
@@ -1184,6 +1184,11 @@
 		return -ENODEV;
 	}
 
+	if (port % MAX_PORTS_PER_BOARD >= brd->numPorts) {
+		mutex_unlock(&moxa_openlock);
+		return -ENODEV;
+	}
+
 	ch = &brd->ports[port % MAX_PORTS_PER_BOARD];
 	ch->port.count++;
 	tty->driver_data = ch;
diff --git a/drivers/char/mxser.c b/drivers/char/mxser.c
index 13f8871..9533f43 100644
--- a/drivers/char/mxser.c
+++ b/drivers/char/mxser.c
@@ -547,14 +547,18 @@
 	return (inb(mp->ioaddr + UART_MSR) & UART_MSR_DCD)?1:0;
 }
 
-static void mxser_raise_dtr_rts(struct tty_port *port)
+static void mxser_dtr_rts(struct tty_port *port, int on)
 {
 	struct mxser_port *mp = container_of(port, struct mxser_port, port);
 	unsigned long flags;
 
 	spin_lock_irqsave(&mp->slock, flags);
-	outb(inb(mp->ioaddr + UART_MCR) |
-		UART_MCR_DTR | UART_MCR_RTS, mp->ioaddr + UART_MCR);
+	if (on)
+		outb(inb(mp->ioaddr + UART_MCR) |
+			UART_MCR_DTR | UART_MCR_RTS, mp->ioaddr + UART_MCR);
+	else
+		outb(inb(mp->ioaddr + UART_MCR)&~(UART_MCR_DTR | UART_MCR_RTS),
+			mp->ioaddr + UART_MCR);
 	spin_unlock_irqrestore(&mp->slock, flags);
 }
 
@@ -2356,7 +2360,7 @@
 
 struct tty_port_operations mxser_port_ops = {
 	.carrier_raised = mxser_carrier_raised,
-	.raise_dtr_rts = mxser_raise_dtr_rts,
+	.dtr_rts = mxser_dtr_rts,
 };
 
 /*
diff --git a/drivers/char/n_hdlc.c b/drivers/char/n_hdlc.c
index bacb3e2..461ece5 100644
--- a/drivers/char/n_hdlc.c
+++ b/drivers/char/n_hdlc.c
@@ -342,8 +342,8 @@
 #endif
 	
 	/* Flush any pending characters in the driver and discipline. */
-	if (tty->ldisc.ops->flush_buffer)
-		tty->ldisc.ops->flush_buffer(tty);
+	if (tty->ldisc->ops->flush_buffer)
+		tty->ldisc->ops->flush_buffer(tty);
 
 	tty_driver_flush_buffer(tty);
 		
diff --git a/drivers/char/n_tty.c b/drivers/char/n_tty.c
index f6f0e4e..94a5d50 100644
--- a/drivers/char/n_tty.c
+++ b/drivers/char/n_tty.c
@@ -73,24 +73,6 @@
 #define ECHO_OP_SET_CANON_COL 0x81
 #define ECHO_OP_ERASE_TAB 0x82
 
-static inline unsigned char *alloc_buf(void)
-{
-	gfp_t prio = in_interrupt() ? GFP_ATOMIC : GFP_KERNEL;
-
-	if (PAGE_SIZE != N_TTY_BUF_SIZE)
-		return kmalloc(N_TTY_BUF_SIZE, prio);
-	else
-		return (unsigned char *)__get_free_page(prio);
-}
-
-static inline void free_buf(unsigned char *buf)
-{
-	if (PAGE_SIZE != N_TTY_BUF_SIZE)
-		kfree(buf);
-	else
-		free_page((unsigned long) buf);
-}
-
 static inline int tty_put_user(struct tty_struct *tty, unsigned char x,
 			       unsigned char __user *ptr)
 {
@@ -1558,11 +1540,11 @@
 {
 	n_tty_flush_buffer(tty);
 	if (tty->read_buf) {
-		free_buf(tty->read_buf);
+		kfree(tty->read_buf);
 		tty->read_buf = NULL;
 	}
 	if (tty->echo_buf) {
-		free_buf(tty->echo_buf);
+		kfree(tty->echo_buf);
 		tty->echo_buf = NULL;
 	}
 }
@@ -1584,17 +1566,16 @@
 
 	/* These are ugly. Currently a malloc failure here can panic */
 	if (!tty->read_buf) {
-		tty->read_buf = alloc_buf();
+		tty->read_buf = kzalloc(N_TTY_BUF_SIZE, GFP_KERNEL);
 		if (!tty->read_buf)
 			return -ENOMEM;
 	}
 	if (!tty->echo_buf) {
-		tty->echo_buf = alloc_buf();
+		tty->echo_buf = kzalloc(N_TTY_BUF_SIZE, GFP_KERNEL);
+
 		if (!tty->echo_buf)
 			return -ENOMEM;
 	}
-	memset(tty->read_buf, 0, N_TTY_BUF_SIZE);
-	memset(tty->echo_buf, 0, N_TTY_BUF_SIZE);
 	reset_buffer_flags(tty);
 	tty->column = 0;
 	n_tty_set_termios(tty, NULL);
diff --git a/drivers/char/pcmcia/synclink_cs.c b/drivers/char/pcmcia/synclink_cs.c
index 19d79fc..77b3648 100644
--- a/drivers/char/pcmcia/synclink_cs.c
+++ b/drivers/char/pcmcia/synclink_cs.c
@@ -383,7 +383,7 @@
 static void tx_timeout(unsigned long context);
 
 static int carrier_raised(struct tty_port *port);
-static void raise_dtr_rts(struct tty_port *port);
+static void dtr_rts(struct tty_port *port, int onoff);
 
 #if SYNCLINK_GENERIC_HDLC
 #define dev_to_port(D) (dev_to_hdlc(D)->priv)
@@ -513,7 +513,7 @@
 
 static const struct tty_port_operations mgslpc_port_ops = {
 	.carrier_raised = carrier_raised,
-	.raise_dtr_rts = raise_dtr_rts
+	.dtr_rts = dtr_rts
 };
 
 static int mgslpc_probe(struct pcmcia_device *link)
@@ -2528,13 +2528,16 @@
 	return 0;
 }
 
-static void raise_dtr_rts(struct tty_port *port)
+static void dtr_rts(struct tty_port *port, int onoff)
 {
 	MGSLPC_INFO *info = container_of(port, MGSLPC_INFO, port);
 	unsigned long flags;
 
 	spin_lock_irqsave(&info->lock,flags);
-	info->serial_signals |= SerialSignal_RTS + SerialSignal_DTR;
+	if (onoff)
+		info->serial_signals |= SerialSignal_RTS + SerialSignal_DTR;
+	else
+		info->serial_signals &= ~SerialSignal_RTS + SerialSignal_DTR;
 	set_signals(info);
 	spin_unlock_irqrestore(&info->lock,flags);
 }
diff --git a/drivers/char/pty.c b/drivers/char/pty.c
index 31038a0..5acd29e 100644
--- a/drivers/char/pty.c
+++ b/drivers/char/pty.c
@@ -30,7 +30,6 @@
 
 #include <asm/system.h>
 
-/* These are global because they are accessed in tty_io.c */
 #ifdef CONFIG_UNIX98_PTYS
 static struct tty_driver *ptm_driver;
 static struct tty_driver *pts_driver;
@@ -111,7 +110,7 @@
 	c = to->receive_room;
 	if (c > count)
 		c = count;
-	to->ldisc.ops->receive_buf(to, buf, NULL, c);
+	to->ldisc->ops->receive_buf(to, buf, NULL, c);
 
 	return c;
 }
@@ -149,11 +148,11 @@
 	int count;
 
 	/* We should get the line discipline lock for "tty->link" */
-	if (!to || !to->ldisc.ops->chars_in_buffer)
+	if (!to || !to->ldisc->ops->chars_in_buffer)
 		return 0;
 
 	/* The ldisc must report 0 if no characters available to be read */
-	count = to->ldisc.ops->chars_in_buffer(to);
+	count = to->ldisc->ops->chars_in_buffer(to);
 
 	if (tty->driver->subtype == PTY_TYPE_SLAVE)
 		return count;
@@ -187,8 +186,8 @@
 	if (!to)
 		return;
 
-	if (to->ldisc.ops->flush_buffer)
-		to->ldisc.ops->flush_buffer(to);
+	if (to->ldisc->ops->flush_buffer)
+		to->ldisc->ops->flush_buffer(to);
 
 	if (to->packet) {
 		spin_lock_irqsave(&tty->ctrl_lock, flags);
diff --git a/drivers/char/rocket.c b/drivers/char/rocket.c
index f59fc5c..63d5b62 100644
--- a/drivers/char/rocket.c
+++ b/drivers/char/rocket.c
@@ -872,11 +872,16 @@
 	return (sGetChanStatusLo(&info->channel) & CD_ACT) ? 1 : 0;
 }
 
-static void raise_dtr_rts(struct tty_port *port)
+static void dtr_rts(struct tty_port *port, int on)
 {
 	struct r_port *info = container_of(port, struct r_port, port);
-	sSetDTR(&info->channel);
-	sSetRTS(&info->channel);
+	if (on) {
+		sSetDTR(&info->channel);
+		sSetRTS(&info->channel);
+	} else {
+		sClrDTR(&info->channel);
+		sClrRTS(&info->channel);
+	}
 }
 
 /*
@@ -934,7 +939,7 @@
 	/*
 	 * Info->count is now 1; so it's safe to sleep now.
 	 */
-	if (!test_bit(ASYNC_INITIALIZED, &port->flags)) {
+	if (!test_bit(ASYNCB_INITIALIZED, &port->flags)) {
 		cp = &info->channel;
 		sSetRxTrigger(cp, TRIG_1);
 		if (sGetChanStatus(cp) & CD_ACT)
@@ -958,7 +963,7 @@
 		sEnRxFIFO(cp);
 		sEnTransmit(cp);
 
-		set_bit(ASYNC_INITIALIZED, &info->port.flags);
+		set_bit(ASYNCB_INITIALIZED, &info->port.flags);
 
 		/*
 		 * Set up the tty->alt_speed kludge
@@ -1641,7 +1646,7 @@
 	/*  Write remaining data into the port's xmit_buf */
 	while (1) {
 		/* Hung up ? */
-		if (!test_bit(ASYNC_NORMAL_ACTIVE, &info->port.flags))
+		if (!test_bit(ASYNCB_NORMAL_ACTIVE, &info->port.flags))
 			goto end;
 		c = min(count, XMIT_BUF_SIZE - info->xmit_cnt - 1);
 		c = min(c, XMIT_BUF_SIZE - info->xmit_head);
@@ -2250,7 +2255,7 @@
 
 static const struct tty_port_operations rocket_port_ops = {
 	.carrier_raised = carrier_raised,
-	.raise_dtr_rts = raise_dtr_rts,
+	.dtr_rts = dtr_rts,
 };
 
 /*
diff --git a/drivers/char/selection.c b/drivers/char/selection.c
index cb8ca56..f97b9e8 100644
--- a/drivers/char/selection.c
+++ b/drivers/char/selection.c
@@ -327,7 +327,7 @@
 		}
 		count = sel_buffer_lth - pasted;
 		count = min(count, tty->receive_room);
-		tty->ldisc.ops->receive_buf(tty, sel_buffer + pasted,
+		tty->ldisc->ops->receive_buf(tty, sel_buffer + pasted,
 								NULL, count);
 		pasted += count;
 	}
diff --git a/drivers/char/stallion.c b/drivers/char/stallion.c
index 2ad813a..53e504f 100644
--- a/drivers/char/stallion.c
+++ b/drivers/char/stallion.c
@@ -772,11 +772,11 @@
 	return (portp->sigs & TIOCM_CD) ? 1 : 0;
 }
 
-static void stl_raise_dtr_rts(struct tty_port *port)
+static void stl_dtr_rts(struct tty_port *port, int on)
 {
 	struct stlport *portp = container_of(port, struct stlport, port);
 	/* Takes brd_lock internally */
-	stl_setsignals(portp, 1, 1);
+	stl_setsignals(portp, on, on);
 }
 
 /*****************************************************************************/
@@ -2547,7 +2547,7 @@
 
 static const struct tty_port_operations stl_port_ops = {
 	.carrier_raised = stl_carrier_raised,
-	.raise_dtr_rts = stl_raise_dtr_rts,
+	.dtr_rts = stl_dtr_rts,
 };
 
 /*****************************************************************************/
diff --git a/drivers/char/synclink.c b/drivers/char/synclink.c
index afd0b26..afded3a 100644
--- a/drivers/char/synclink.c
+++ b/drivers/char/synclink.c
@@ -3247,13 +3247,16 @@
 	return (info->serial_signals & SerialSignal_DCD) ? 1 : 0;
 }
 
-static void raise_dtr_rts(struct tty_port *port)
+static void dtr_rts(struct tty_port *port, int on)
 {
 	struct mgsl_struct *info = container_of(port, struct mgsl_struct, port);
 	unsigned long flags;
 
 	spin_lock_irqsave(&info->irq_spinlock,flags);
-	info->serial_signals |= SerialSignal_RTS + SerialSignal_DTR;
+	if (on)
+		info->serial_signals |= SerialSignal_RTS + SerialSignal_DTR;
+	else
+		info->serial_signals &= ~(SerialSignal_RTS + SerialSignal_DTR);
  	usc_set_serial_signals(info);
 	spin_unlock_irqrestore(&info->irq_spinlock,flags);
 }
@@ -4258,7 +4261,7 @@
 
 static const struct tty_port_operations mgsl_port_ops = {
 	.carrier_raised = carrier_raised,
-	.raise_dtr_rts = raise_dtr_rts,
+	.dtr_rts = dtr_rts,
 };
 
 
diff --git a/drivers/char/synclink_gt.c b/drivers/char/synclink_gt.c
index 5e25649..1386625 100644
--- a/drivers/char/synclink_gt.c
+++ b/drivers/char/synclink_gt.c
@@ -214,6 +214,7 @@
 #define set_desc_next(a,b) (a).next   = cpu_to_le32((unsigned int)(b))
 #define set_desc_count(a,b)(a).count  = cpu_to_le16((unsigned short)(b))
 #define set_desc_eof(a,b)  (a).status = cpu_to_le16((b) ? (le16_to_cpu((a).status) | BIT0) : (le16_to_cpu((a).status) & ~BIT0))
+#define set_desc_status(a, b) (a).status = cpu_to_le16((unsigned short)(b))
 #define desc_count(a)      (le16_to_cpu((a).count))
 #define desc_status(a)     (le16_to_cpu((a).status))
 #define desc_complete(a)   (le16_to_cpu((a).status) & BIT15)
@@ -297,6 +298,7 @@
 	u32 max_frame_size;       /* as set by device config */
 
 	unsigned int rbuf_fill_level;
+	unsigned int rx_pio;
 	unsigned int if_mode;
 	unsigned int base_clock;
 
@@ -331,6 +333,8 @@
 	struct slgt_desc *rbufs;
 	unsigned int rbuf_current;
 	unsigned int rbuf_index;
+	unsigned int rbuf_fill_index;
+	unsigned short rbuf_fill_count;
 
 	unsigned int tbuf_count;
 	struct slgt_desc *tbufs;
@@ -2110,6 +2114,40 @@
 	info->pending_bh |= BH_STATUS;
 }
 
+static void isr_rxdata(struct slgt_info *info)
+{
+	unsigned int count = info->rbuf_fill_count;
+	unsigned int i = info->rbuf_fill_index;
+	unsigned short reg;
+
+	while (rd_reg16(info, SSR) & IRQ_RXDATA) {
+		reg = rd_reg16(info, RDR);
+		DBGISR(("isr_rxdata %s RDR=%04X\n", info->device_name, reg));
+		if (desc_complete(info->rbufs[i])) {
+			/* all buffers full */
+			rx_stop(info);
+			info->rx_restart = 1;
+			continue;
+		}
+		info->rbufs[i].buf[count++] = (unsigned char)reg;
+		/* async mode saves status byte to buffer for each data byte */
+		if (info->params.mode == MGSL_MODE_ASYNC)
+			info->rbufs[i].buf[count++] = (unsigned char)(reg >> 8);
+		if (count == info->rbuf_fill_level || (reg & BIT10)) {
+			/* buffer full or end of frame */
+			set_desc_count(info->rbufs[i], count);
+			set_desc_status(info->rbufs[i], BIT15 | (reg >> 8));
+			info->rbuf_fill_count = count = 0;
+			if (++i == info->rbuf_count)
+				i = 0;
+			info->pending_bh |= BH_RECEIVE;
+		}
+	}
+
+	info->rbuf_fill_index = i;
+	info->rbuf_fill_count = count;
+}
+
 static void isr_serial(struct slgt_info *info)
 {
 	unsigned short status = rd_reg16(info, SSR);
@@ -2125,6 +2163,8 @@
 			if (info->tx_count)
 				isr_txeom(info, status);
 		}
+		if (info->rx_pio && (status & IRQ_RXDATA))
+			isr_rxdata(info);
 		if ((status & IRQ_RXBREAK) && (status & RXBREAK)) {
 			info->icount.brk++;
 			/* process break detection if tty control allows */
@@ -2141,7 +2181,8 @@
 	} else {
 		if (status & (IRQ_TXIDLE + IRQ_TXUNDER))
 			isr_txeom(info, status);
-
+		if (info->rx_pio && (status & IRQ_RXDATA))
+			isr_rxdata(info);
 		if (status & IRQ_RXIDLE) {
 			if (status & RXIDLE)
 				info->icount.rxidle++;
@@ -2642,6 +2683,10 @@
 			return -EINVAL;
 		}
 		info->rbuf_fill_level = rbuf_fill_level;
+		if (rbuf_fill_level < 128)
+			info->rx_pio = 1; /* PIO mode */
+		else
+			info->rx_pio = 0; /* DMA mode */
 		rx_stop(info); /* restart receiver to use new fill level */
 	}
 
@@ -3099,13 +3144,16 @@
 	return (info->signals & SerialSignal_DCD) ? 1 : 0;
 }
 
-static void raise_dtr_rts(struct tty_port *port)
+static void dtr_rts(struct tty_port *port, int on)
 {
 	unsigned long flags;
 	struct slgt_info *info = container_of(port, struct slgt_info, port);
 
 	spin_lock_irqsave(&info->lock,flags);
-	info->signals |= SerialSignal_RTS + SerialSignal_DTR;
+	if (on)
+		info->signals |= SerialSignal_RTS + SerialSignal_DTR;
+	else
+		info->signals &= ~(SerialSignal_RTS + SerialSignal_DTR);
  	set_signals(info);
 	spin_unlock_irqrestore(&info->lock,flags);
 }
@@ -3419,7 +3467,7 @@
 
 static const struct tty_port_operations slgt_port_ops = {
 	.carrier_raised = carrier_raised,
-	.raise_dtr_rts = raise_dtr_rts,
+	.dtr_rts = dtr_rts,
 };
 
 /*
@@ -3841,15 +3889,27 @@
 	rdma_reset(info);
 	reset_rbufs(info);
 
-	/* set 1st descriptor address */
-	wr_reg32(info, RDDAR, info->rbufs[0].pdesc);
-
-	if (info->params.mode != MGSL_MODE_ASYNC) {
-		/* enable rx DMA and DMA interrupt */
-		wr_reg32(info, RDCSR, (BIT2 + BIT0));
+	if (info->rx_pio) {
+		/* rx request when rx FIFO not empty */
+		wr_reg16(info, SCR, (unsigned short)(rd_reg16(info, SCR) & ~BIT14));
+		slgt_irq_on(info, IRQ_RXDATA);
+		if (info->params.mode == MGSL_MODE_ASYNC) {
+			/* enable saving of rx status */
+			wr_reg32(info, RDCSR, BIT6);
+		}
 	} else {
-		/* enable saving of rx status, rx DMA and DMA interrupt */
-		wr_reg32(info, RDCSR, (BIT6 + BIT2 + BIT0));
+		/* rx request when rx FIFO half full */
+		wr_reg16(info, SCR, (unsigned short)(rd_reg16(info, SCR) | BIT14));
+		/* set 1st descriptor address */
+		wr_reg32(info, RDDAR, info->rbufs[0].pdesc);
+
+		if (info->params.mode != MGSL_MODE_ASYNC) {
+			/* enable rx DMA and DMA interrupt */
+			wr_reg32(info, RDCSR, (BIT2 + BIT0));
+		} else {
+			/* enable saving of rx status, rx DMA and DMA interrupt */
+			wr_reg32(info, RDCSR, (BIT6 + BIT2 + BIT0));
+		}
 	}
 
 	slgt_irq_on(info, IRQ_RXOVER);
@@ -4467,6 +4527,8 @@
 static void reset_rbufs(struct slgt_info *info)
 {
 	free_rbufs(info, 0, info->rbuf_count - 1);
+	info->rbuf_fill_index = 0;
+	info->rbuf_fill_count = 0;
 }
 
 /*
diff --git a/drivers/char/synclinkmp.c b/drivers/char/synclinkmp.c
index 26de60e..6f727e3 100644
--- a/drivers/char/synclinkmp.c
+++ b/drivers/char/synclinkmp.c
@@ -3277,13 +3277,16 @@
 	return (info->serial_signals & SerialSignal_DCD) ? 1 : 0;
 }
 
-static void raise_dtr_rts(struct tty_port *port)
+static void dtr_rts(struct tty_port *port, int on)
 {
 	SLMP_INFO *info = container_of(port, SLMP_INFO, port);
 	unsigned long flags;
 
 	spin_lock_irqsave(&info->lock,flags);
-	info->serial_signals |= SerialSignal_RTS + SerialSignal_DTR;
+	if (on)
+		info->serial_signals |= SerialSignal_RTS + SerialSignal_DTR;
+	else
+		info->serial_signals &= ~(SerialSignal_RTS + SerialSignal_DTR);
  	set_signals(info);
 	spin_unlock_irqrestore(&info->lock,flags);
 }
@@ -3746,7 +3749,7 @@
 
 static const struct tty_port_operations port_ops = {
 	.carrier_raised = carrier_raised,
-	.raise_dtr_rts = raise_dtr_rts,
+	.dtr_rts = dtr_rts,
 };
 
 /* Allocate and initialize a device instance structure
diff --git a/drivers/char/tty_audit.c b/drivers/char/tty_audit.c
index 55ba6f1..ac16fbe 100644
--- a/drivers/char/tty_audit.c
+++ b/drivers/char/tty_audit.c
@@ -29,10 +29,7 @@
 	buf = kmalloc(sizeof(*buf), GFP_KERNEL);
 	if (!buf)
 		goto err;
-	if (PAGE_SIZE != N_TTY_BUF_SIZE)
-		buf->data = kmalloc(N_TTY_BUF_SIZE, GFP_KERNEL);
-	else
-		buf->data = (unsigned char *)__get_free_page(GFP_KERNEL);
+	buf->data = kmalloc(N_TTY_BUF_SIZE, GFP_KERNEL);
 	if (!buf->data)
 		goto err_buf;
 	atomic_set(&buf->count, 1);
@@ -52,10 +49,7 @@
 static void tty_audit_buf_free(struct tty_audit_buf *buf)
 {
 	WARN_ON(buf->valid != 0);
-	if (PAGE_SIZE != N_TTY_BUF_SIZE)
-		kfree(buf->data);
-	else
-		free_page((unsigned long)buf->data);
+	kfree(buf->data);
 	kfree(buf);
 }
 
diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c
index 66b99a2..939e198 100644
--- a/drivers/char/tty_io.c
+++ b/drivers/char/tty_io.c
@@ -295,7 +295,7 @@
 	struct tty_driver *p, *res = NULL;
 	int tty_line = 0;
 	int len;
-	char *str;
+	char *str, *stp;
 
 	for (str = name; *str; str++)
 		if ((*str >= '0' && *str <= '9') || *str == ',')
@@ -311,13 +311,14 @@
 	list_for_each_entry(p, &tty_drivers, tty_drivers) {
 		if (strncmp(name, p->name, len) != 0)
 			continue;
-		if (*str == ',')
-			str++;
-		if (*str == '\0')
-			str = NULL;
+		stp = str;
+		if (*stp == ',')
+			stp++;
+		if (*stp == '\0')
+			stp = NULL;
 
 		if (tty_line >= 0 && tty_line <= p->num && p->ops &&
-		    p->ops->poll_init && !p->ops->poll_init(p, tty_line, str)) {
+		    p->ops->poll_init && !p->ops->poll_init(p, tty_line, stp)) {
 			res = tty_driver_kref_get(p);
 			*line = tty_line;
 			break;
@@ -470,43 +471,6 @@
 EXPORT_SYMBOL_GPL(tty_wakeup);
 
 /**
- *	tty_ldisc_flush	-	flush line discipline queue
- *	@tty: tty
- *
- *	Flush the line discipline queue (if any) for this tty. If there
- *	is no line discipline active this is a no-op.
- */
-
-void tty_ldisc_flush(struct tty_struct *tty)
-{
-	struct tty_ldisc *ld = tty_ldisc_ref(tty);
-	if (ld) {
-		if (ld->ops->flush_buffer)
-			ld->ops->flush_buffer(tty);
-		tty_ldisc_deref(ld);
-	}
-	tty_buffer_flush(tty);
-}
-
-EXPORT_SYMBOL_GPL(tty_ldisc_flush);
-
-/**
- *	tty_reset_termios	-	reset terminal state
- *	@tty: tty to reset
- *
- *	Restore a terminal to the driver default state
- */
-
-static void tty_reset_termios(struct tty_struct *tty)
-{
-	mutex_lock(&tty->termios_mutex);
-	*tty->termios = tty->driver->init_termios;
-	tty->termios->c_ispeed = tty_termios_input_baud_rate(tty->termios);
-	tty->termios->c_ospeed = tty_termios_baud_rate(tty->termios);
-	mutex_unlock(&tty->termios_mutex);
-}
-
-/**
  *	do_tty_hangup		-	actual handler for hangup events
  *	@work: tty device
  *
@@ -535,7 +499,6 @@
 	struct file *cons_filp = NULL;
 	struct file *filp, *f = NULL;
 	struct task_struct *p;
-	struct tty_ldisc *ld;
 	int    closecount = 0, n;
 	unsigned long flags;
 	int refs = 0;
@@ -566,40 +529,8 @@
 		filp->f_op = &hung_up_tty_fops;
 	}
 	file_list_unlock();
-	/*
-	 * FIXME! What are the locking issues here? This may me overdoing
-	 * things... This question is especially important now that we've
-	 * removed the irqlock.
-	 */
-	ld = tty_ldisc_ref(tty);
-	if (ld != NULL) {
-		/* We may have no line discipline at this point */
-		if (ld->ops->flush_buffer)
-			ld->ops->flush_buffer(tty);
-		tty_driver_flush_buffer(tty);
-		if ((test_bit(TTY_DO_WRITE_WAKEUP, &tty->flags)) &&
-		    ld->ops->write_wakeup)
-			ld->ops->write_wakeup(tty);
-		if (ld->ops->hangup)
-			ld->ops->hangup(tty);
-	}
-	/*
-	 * FIXME: Once we trust the LDISC code better we can wait here for
-	 * ldisc completion and fix the driver call race
-	 */
-	wake_up_interruptible_poll(&tty->write_wait, POLLOUT);
-	wake_up_interruptible_poll(&tty->read_wait, POLLIN);
-	/*
-	 * Shutdown the current line discipline, and reset it to
-	 * N_TTY.
-	 */
-	if (tty->driver->flags & TTY_DRIVER_RESET_TERMIOS)
-		tty_reset_termios(tty);
-	/* Defer ldisc switch */
-	/* tty_deferred_ldisc_switch(N_TTY);
 
-	  This should get done automatically when the port closes and
-	  tty_release is called */
+	tty_ldisc_hangup(tty);
 
 	read_lock(&tasklist_lock);
 	if (tty->session) {
@@ -628,12 +559,15 @@
 	read_unlock(&tasklist_lock);
 
 	spin_lock_irqsave(&tty->ctrl_lock, flags);
-	tty->flags = 0;
+	clear_bit(TTY_THROTTLED, &tty->flags);
+	clear_bit(TTY_PUSH, &tty->flags);
+	clear_bit(TTY_DO_WRITE_WAKEUP, &tty->flags);
 	put_pid(tty->session);
 	put_pid(tty->pgrp);
 	tty->session = NULL;
 	tty->pgrp = NULL;
 	tty->ctrl_status = 0;
+	set_bit(TTY_HUPPED, &tty->flags);
 	spin_unlock_irqrestore(&tty->ctrl_lock, flags);
 
 	/* Account for the p->signal references we killed */
@@ -659,10 +593,7 @@
 	 * can't yet guarantee all that.
 	 */
 	set_bit(TTY_HUPPED, &tty->flags);
-	if (ld) {
-		tty_ldisc_enable(tty);
-		tty_ldisc_deref(ld);
-	}
+	tty_ldisc_enable(tty);
 	unlock_kernel();
 	if (f)
 		fput(f);
@@ -2480,6 +2411,24 @@
 	return tty->ops->tiocmset(tty, file, set, clear);
 }
 
+struct tty_struct *tty_pair_get_tty(struct tty_struct *tty)
+{
+	if (tty->driver->type == TTY_DRIVER_TYPE_PTY &&
+	    tty->driver->subtype == PTY_TYPE_MASTER)
+		tty = tty->link;
+	return tty;
+}
+EXPORT_SYMBOL(tty_pair_get_tty);
+
+struct tty_struct *tty_pair_get_pty(struct tty_struct *tty)
+{
+	if (tty->driver->type == TTY_DRIVER_TYPE_PTY &&
+	    tty->driver->subtype == PTY_TYPE_MASTER)
+	    return tty;
+	return tty->link;
+}
+EXPORT_SYMBOL(tty_pair_get_pty);
+
 /*
  * Split this up, as gcc can choke on it otherwise..
  */
@@ -2495,11 +2444,7 @@
 	if (tty_paranoia_check(tty, inode, "tty_ioctl"))
 		return -EINVAL;
 
-	real_tty = tty;
-	if (tty->driver->type == TTY_DRIVER_TYPE_PTY &&
-	    tty->driver->subtype == PTY_TYPE_MASTER)
-		real_tty = tty->link;
-
+	real_tty = tty_pair_get_tty(tty);
 
 	/*
 	 * Factor out some common prep work
@@ -2555,7 +2500,7 @@
 	case TIOCGSID:
 		return tiocgsid(tty, real_tty, p);
 	case TIOCGETD:
-		return put_user(tty->ldisc.ops->num, (int __user *)p);
+		return put_user(tty->ldisc->ops->num, (int __user *)p);
 	case TIOCSETD:
 		return tiocsetd(tty, p);
 	/*
@@ -2770,6 +2715,7 @@
 	tty->buf.head = tty->buf.tail = NULL;
 	tty_buffer_init(tty);
 	mutex_init(&tty->termios_mutex);
+	mutex_init(&tty->ldisc_mutex);
 	init_waitqueue_head(&tty->write_wait);
 	init_waitqueue_head(&tty->read_wait);
 	INIT_WORK(&tty->hangup_work, do_tty_hangup);
diff --git a/drivers/char/tty_ioctl.c b/drivers/char/tty_ioctl.c
index 6f4c7d0..8116bb1c 100644
--- a/drivers/char/tty_ioctl.c
+++ b/drivers/char/tty_ioctl.c
@@ -97,14 +97,19 @@
  *	@tty: terminal
  *
  *	Indicate that a tty should stop transmitting data down the stack.
+ *	Takes the termios mutex to protect against parallel throttle/unthrottle
+ *	and also to ensure the driver can consistently reference its own
+ *	termios data at this point when implementing software flow control.
  */
 
 void tty_throttle(struct tty_struct *tty)
 {
+	mutex_lock(&tty->termios_mutex);
 	/* check TTY_THROTTLED first so it indicates our state */
 	if (!test_and_set_bit(TTY_THROTTLED, &tty->flags) &&
 	    tty->ops->throttle)
 		tty->ops->throttle(tty);
+	mutex_unlock(&tty->termios_mutex);
 }
 EXPORT_SYMBOL(tty_throttle);
 
@@ -113,13 +118,21 @@
  *	@tty: terminal
  *
  *	Indicate that a tty may continue transmitting data down the stack.
+ *	Takes the termios mutex to protect against parallel throttle/unthrottle
+ *	and also to ensure the driver can consistently reference its own
+ *	termios data at this point when implementing software flow control.
+ *
+ *	Drivers should however remember that the stack can issue a throttle,
+ *	then change flow control method, then unthrottle.
  */
 
 void tty_unthrottle(struct tty_struct *tty)
 {
+	mutex_lock(&tty->termios_mutex);
 	if (test_and_clear_bit(TTY_THROTTLED, &tty->flags) &&
 	    tty->ops->unthrottle)
 		tty->ops->unthrottle(tty);
+	mutex_unlock(&tty->termios_mutex);
 }
 EXPORT_SYMBOL(tty_unthrottle);
 
@@ -613,9 +626,25 @@
 	return 0;
 }
 
+static void copy_termios(struct tty_struct *tty, struct ktermios *kterm)
+{
+	mutex_lock(&tty->termios_mutex);
+	memcpy(kterm, tty->termios, sizeof(struct ktermios));
+	mutex_unlock(&tty->termios_mutex);
+}
+
+static void copy_termios_locked(struct tty_struct *tty, struct ktermios *kterm)
+{
+	mutex_lock(&tty->termios_mutex);
+	memcpy(kterm, tty->termios_locked, sizeof(struct ktermios));
+	mutex_unlock(&tty->termios_mutex);
+}
+
 static int get_termio(struct tty_struct *tty, struct termio __user *termio)
 {
-	if (kernel_termios_to_user_termio(termio, tty->termios))
+	struct ktermios kterm;
+	copy_termios(tty, &kterm);
+	if (kernel_termios_to_user_termio(termio, &kterm))
 		return -EFAULT;
 	return 0;
 }
@@ -917,6 +946,8 @@
 	struct tty_struct *real_tty;
 	void __user *p = (void __user *)arg;
 	int ret = 0;
+	struct ktermios kterm;
+	struct termiox ktermx;
 
 	if (tty->driver->type == TTY_DRIVER_TYPE_PTY &&
 	    tty->driver->subtype == PTY_TYPE_MASTER)
@@ -952,23 +983,20 @@
 		return set_termios(real_tty, p, TERMIOS_OLD);
 #ifndef TCGETS2
 	case TCGETS:
-		mutex_lock(&real_tty->termios_mutex);
-		if (kernel_termios_to_user_termios((struct termios __user *)arg, real_tty->termios))
+		copy_termios(real_tty, &kterm);
+		if (kernel_termios_to_user_termios((struct termios __user *)arg, &kterm))
 			ret = -EFAULT;
-		mutex_unlock(&real_tty->termios_mutex);
 		return ret;
 #else
 	case TCGETS:
-		mutex_lock(&real_tty->termios_mutex);
-		if (kernel_termios_to_user_termios_1((struct termios __user *)arg, real_tty->termios))
+		copy_termios(real_tty, &kterm);
+		if (kernel_termios_to_user_termios_1((struct termios __user *)arg, &kterm))
 			ret = -EFAULT;
-		mutex_unlock(&real_tty->termios_mutex);
 		return ret;
 	case TCGETS2:
-		mutex_lock(&real_tty->termios_mutex);
-		if (kernel_termios_to_user_termios((struct termios2 __user *)arg, real_tty->termios))
+		copy_termios(real_tty, &kterm);
+		if (kernel_termios_to_user_termios((struct termios2 __user *)arg, &kterm))
 			ret = -EFAULT;
-		mutex_unlock(&real_tty->termios_mutex);
 		return ret;
 	case TCSETSF2:
 		return set_termios(real_tty, p,  TERMIOS_FLUSH | TERMIOS_WAIT);
@@ -987,34 +1015,36 @@
 		return set_termios(real_tty, p, TERMIOS_TERMIO);
 #ifndef TCGETS2
 	case TIOCGLCKTRMIOS:
-		mutex_lock(&real_tty->termios_mutex);
-		if (kernel_termios_to_user_termios((struct termios __user *)arg, real_tty->termios_locked))
+		copy_termios_locked(real_tty, &kterm);
+		if (kernel_termios_to_user_termios((struct termios __user *)arg, &kterm))
 			ret = -EFAULT;
-		mutex_unlock(&real_tty->termios_mutex);
 		return ret;
 	case TIOCSLCKTRMIOS:
 		if (!capable(CAP_SYS_ADMIN))
 			return -EPERM;
-		mutex_lock(&real_tty->termios_mutex);
-		if (user_termios_to_kernel_termios(real_tty->termios_locked,
+		copy_termios_locked(real_tty, &kterm);
+		if (user_termios_to_kernel_termios(&kterm,
 					       (struct termios __user *) arg))
-			ret = -EFAULT;
+			return -EFAULT;
+		mutex_lock(&real_tty->termios_mutex);
+		memcpy(real_tty->termios_locked, &kterm, sizeof(struct ktermios));
 		mutex_unlock(&real_tty->termios_mutex);
-		return ret;
+		return 0;
 #else
 	case TIOCGLCKTRMIOS:
-		mutex_lock(&real_tty->termios_mutex);
-		if (kernel_termios_to_user_termios_1((struct termios __user *)arg, real_tty->termios_locked))
+		copy_termios_locked(real_tty, &kterm);
+		if (kernel_termios_to_user_termios_1((struct termios __user *)arg, &kterm))
 			ret = -EFAULT;
-		mutex_unlock(&real_tty->termios_mutex);
 		return ret;
 	case TIOCSLCKTRMIOS:
 		if (!capable(CAP_SYS_ADMIN))
-			ret = -EPERM;
-		mutex_lock(&real_tty->termios_mutex);
-		if (user_termios_to_kernel_termios_1(real_tty->termios_locked,
+			return -EPERM;
+		copy_termios_locked(real_tty, &kterm);
+		if (user_termios_to_kernel_termios_1(&kterm,
 					       (struct termios __user *) arg))
-			ret = -EFAULT;
+			return -EFAULT;
+		mutex_lock(&real_tty->termios_mutex);
+		memcpy(real_tty->termios_locked, &kterm, sizeof(struct ktermios));
 		mutex_unlock(&real_tty->termios_mutex);
 		return ret;
 #endif
@@ -1023,9 +1053,10 @@
 		if (real_tty->termiox == NULL)
 			return -EINVAL;
 		mutex_lock(&real_tty->termios_mutex);
-		if (copy_to_user(p, real_tty->termiox, sizeof(struct termiox)))
-			ret = -EFAULT;
+		memcpy(&ktermx, real_tty->termiox, sizeof(struct termiox));
 		mutex_unlock(&real_tty->termios_mutex);
+		if (copy_to_user(p, &ktermx, sizeof(struct termiox)))
+			ret = -EFAULT;
 		return ret;
 	case TCSETX:
 		return set_termiox(real_tty, p, 0);
@@ -1035,10 +1066,9 @@
 		return set_termiox(real_tty, p, TERMIOS_FLUSH);
 #endif		
 	case TIOCGSOFTCAR:
-		mutex_lock(&real_tty->termios_mutex);
-		ret = put_user(C_CLOCAL(real_tty) ? 1 : 0,
+		copy_termios(real_tty, &kterm);
+		ret = put_user((kterm.c_cflag & CLOCAL) ? 1 : 0,
 						(int __user *)arg);
-		mutex_unlock(&real_tty->termios_mutex);
 		return ret;
 	case TIOCSSOFTCAR:
 		if (get_user(arg, (unsigned int __user *) arg))
diff --git a/drivers/char/tty_ldisc.c b/drivers/char/tty_ldisc.c
index f78f5b0..39c8f86 100644
--- a/drivers/char/tty_ldisc.c
+++ b/drivers/char/tty_ldisc.c
@@ -115,19 +115,22 @@
 /**
  *	tty_ldisc_try_get	-	try and reference an ldisc
  *	@disc: ldisc number
- *	@ld: tty ldisc structure to complete
  *
  *	Attempt to open and lock a line discipline into place. Return
- *	the line discipline refcounted and assigned in ld. On an error
- *	report the error code back
+ *	the line discipline refcounted or an error.
  */
 
-static int tty_ldisc_try_get(int disc, struct tty_ldisc *ld)
+static struct tty_ldisc *tty_ldisc_try_get(int disc)
 {
 	unsigned long flags;
+	struct tty_ldisc *ld;
 	struct tty_ldisc_ops *ldops;
 	int err = -EINVAL;
-	
+
+	ld = kmalloc(sizeof(struct tty_ldisc), GFP_KERNEL);
+	if (ld == NULL)
+		return ERR_PTR(-ENOMEM);
+
 	spin_lock_irqsave(&tty_ldisc_lock, flags);
 	ld->ops = NULL;
 	ldops = tty_ldiscs[disc];
@@ -140,17 +143,19 @@
 			/* lock it */
 			ldops->refcount++;
 			ld->ops = ldops;
+			ld->refcount = 0;
 			err = 0;
 		}
 	}
 	spin_unlock_irqrestore(&tty_ldisc_lock, flags);
-	return err;
+	if (err)
+		return ERR_PTR(err);
+	return ld;
 }
 
 /**
  *	tty_ldisc_get		-	take a reference to an ldisc
  *	@disc: ldisc number
- *	@ld: tty line discipline structure to use
  *
  *	Takes a reference to a line discipline. Deals with refcounts and
  *	module locking counts. Returns NULL if the discipline is not available.
@@ -161,52 +166,54 @@
  *		takes tty_ldisc_lock to guard against ldisc races
  */
 
-static int tty_ldisc_get(int disc, struct tty_ldisc *ld)
+static struct tty_ldisc *tty_ldisc_get(int disc)
 {
-	int err;
+	struct tty_ldisc *ld;
 
 	if (disc < N_TTY || disc >= NR_LDISCS)
-		return -EINVAL;
-	err = tty_ldisc_try_get(disc, ld);
-	if (err < 0) {
+		return ERR_PTR(-EINVAL);
+	ld = tty_ldisc_try_get(disc);
+	if (IS_ERR(ld)) {
 		request_module("tty-ldisc-%d", disc);
-		err = tty_ldisc_try_get(disc, ld);
+		ld = tty_ldisc_try_get(disc);
 	}
-	return err;
+	return ld;
 }
 
 /**
  *	tty_ldisc_put		-	drop ldisc reference
- *	@disc: ldisc number
+ *	@ld: ldisc
  *
  *	Drop a reference to a line discipline. Manage refcounts and
- *	module usage counts
+ *	module usage counts. Free the ldisc once the recount hits zero.
  *
  *	Locking:
  *		takes tty_ldisc_lock to guard against ldisc races
  */
 
-static void tty_ldisc_put(struct tty_ldisc_ops *ld)
+static void tty_ldisc_put(struct tty_ldisc *ld)
 {
 	unsigned long flags;
-	int disc = ld->num;
+	int disc = ld->ops->num;
+	struct tty_ldisc_ops *ldo;
 
 	BUG_ON(disc < N_TTY || disc >= NR_LDISCS);
 
 	spin_lock_irqsave(&tty_ldisc_lock, flags);
-	ld = tty_ldiscs[disc];
-	BUG_ON(ld->refcount == 0);
-	ld->refcount--;
-	module_put(ld->owner);
+	ldo = tty_ldiscs[disc];
+	BUG_ON(ldo->refcount == 0);
+	ldo->refcount--;
+	module_put(ldo->owner);
 	spin_unlock_irqrestore(&tty_ldisc_lock, flags);
+	kfree(ld);
 }
 
-static void * tty_ldiscs_seq_start(struct seq_file *m, loff_t *pos)
+static void *tty_ldiscs_seq_start(struct seq_file *m, loff_t *pos)
 {
 	return (*pos < NR_LDISCS) ? pos : NULL;
 }
 
-static void * tty_ldiscs_seq_next(struct seq_file *m, void *v, loff_t *pos)
+static void *tty_ldiscs_seq_next(struct seq_file *m, void *v, loff_t *pos)
 {
 	(*pos)++;
 	return (*pos < NR_LDISCS) ? pos : NULL;
@@ -219,12 +226,13 @@
 static int tty_ldiscs_seq_show(struct seq_file *m, void *v)
 {
 	int i = *(loff_t *)v;
-	struct tty_ldisc ld;
-	
-	if (tty_ldisc_get(i, &ld) < 0)
+	struct tty_ldisc *ld;
+
+	ld = tty_ldisc_try_get(i);
+	if (IS_ERR(ld))
 		return 0;
-	seq_printf(m, "%-10s %2d\n", ld.ops->name ? ld.ops->name : "???", i);
-	tty_ldisc_put(ld.ops);
+	seq_printf(m, "%-10s %2d\n", ld->ops->name ? ld->ops->name : "???", i);
+	tty_ldisc_put(ld);
 	return 0;
 }
 
@@ -263,8 +271,7 @@
 
 static void tty_ldisc_assign(struct tty_struct *tty, struct tty_ldisc *ld)
 {
-	ld->refcount = 0;
-	tty->ldisc = *ld;
+	tty->ldisc = ld;
 }
 
 /**
@@ -286,7 +293,7 @@
 	int ret = 0;
 
 	spin_lock_irqsave(&tty_ldisc_lock, flags);
-	ld = &tty->ldisc;
+	ld = tty->ldisc;
 	if (test_bit(TTY_LDISC, &tty->flags)) {
 		ld->refcount++;
 		ret = 1;
@@ -315,10 +322,9 @@
 {
 	/* wait_event is a macro */
 	wait_event(tty_ldisc_wait, tty_ldisc_try(tty));
-	WARN_ON(tty->ldisc.refcount == 0);
-	return &tty->ldisc;
+	WARN_ON(tty->ldisc->refcount == 0);
+	return tty->ldisc;
 }
-
 EXPORT_SYMBOL_GPL(tty_ldisc_ref_wait);
 
 /**
@@ -335,10 +341,9 @@
 struct tty_ldisc *tty_ldisc_ref(struct tty_struct *tty)
 {
 	if (tty_ldisc_try(tty))
-		return &tty->ldisc;
+		return tty->ldisc;
 	return NULL;
 }
-
 EXPORT_SYMBOL_GPL(tty_ldisc_ref);
 
 /**
@@ -366,7 +371,6 @@
 		wake_up(&tty_ldisc_wait);
 	spin_unlock_irqrestore(&tty_ldisc_lock, flags);
 }
-
 EXPORT_SYMBOL_GPL(tty_ldisc_deref);
 
 /**
@@ -389,6 +393,26 @@
 }
 
 /**
+ *	tty_ldisc_flush	-	flush line discipline queue
+ *	@tty: tty
+ *
+ *	Flush the line discipline queue (if any) for this tty. If there
+ *	is no line discipline active this is a no-op.
+ */
+
+void tty_ldisc_flush(struct tty_struct *tty)
+{
+	struct tty_ldisc *ld = tty_ldisc_ref(tty);
+	if (ld) {
+		if (ld->ops->flush_buffer)
+			ld->ops->flush_buffer(tty);
+		tty_ldisc_deref(ld);
+	}
+	tty_buffer_flush(tty);
+}
+EXPORT_SYMBOL_GPL(tty_ldisc_flush);
+
+/**
  *	tty_set_termios_ldisc		-	set ldisc field
  *	@tty: tty structure
  *	@num: line discipline number
@@ -407,6 +431,39 @@
 	mutex_unlock(&tty->termios_mutex);
 }
 
+/**
+ *	tty_ldisc_open		-	open a line discipline
+ *	@tty: tty we are opening the ldisc on
+ *	@ld: discipline to open
+ *
+ *	A helper opening method. Also a convenient debugging and check
+ *	point.
+ */
+
+static int tty_ldisc_open(struct tty_struct *tty, struct tty_ldisc *ld)
+{
+	WARN_ON(test_and_set_bit(TTY_LDISC_OPEN, &tty->flags));
+	if (ld->ops->open)
+		return ld->ops->open(tty);
+	return 0;
+}
+
+/**
+ *	tty_ldisc_close		-	close a line discipline
+ *	@tty: tty we are opening the ldisc on
+ *	@ld: discipline to close
+ *
+ *	A helper close method. Also a convenient debugging and check
+ *	point.
+ */
+
+static void tty_ldisc_close(struct tty_struct *tty, struct tty_ldisc *ld)
+{
+	WARN_ON(!test_bit(TTY_LDISC_OPEN, &tty->flags));
+	clear_bit(TTY_LDISC_OPEN, &tty->flags);
+	if (ld->ops->close)
+		ld->ops->close(tty);
+}
 
 /**
  *	tty_ldisc_restore	-	helper for tty ldisc change
@@ -420,66 +477,136 @@
 static void tty_ldisc_restore(struct tty_struct *tty, struct tty_ldisc *old)
 {
 	char buf[64];
-	struct tty_ldisc new_ldisc;
+	struct tty_ldisc *new_ldisc;
+	int r;
 
 	/* There is an outstanding reference here so this is safe */
-	tty_ldisc_get(old->ops->num, old);
+	old = tty_ldisc_get(old->ops->num);
+	WARN_ON(IS_ERR(old));
 	tty_ldisc_assign(tty, old);
 	tty_set_termios_ldisc(tty, old->ops->num);
-	if (old->ops->open && (old->ops->open(tty) < 0)) {
-		tty_ldisc_put(old->ops);
+	if (tty_ldisc_open(tty, old) < 0) {
+		tty_ldisc_put(old);
 		/* This driver is always present */
-		if (tty_ldisc_get(N_TTY, &new_ldisc) < 0)
+		new_ldisc = tty_ldisc_get(N_TTY);
+		if (IS_ERR(new_ldisc))
 			panic("n_tty: get");
-		tty_ldisc_assign(tty, &new_ldisc);
+		tty_ldisc_assign(tty, new_ldisc);
 		tty_set_termios_ldisc(tty, N_TTY);
-		if (new_ldisc.ops->open) {
-			int r = new_ldisc.ops->open(tty);
-				if (r < 0)
-				panic("Couldn't open N_TTY ldisc for "
-				      "%s --- error %d.",
-				      tty_name(tty, buf), r);
-		}
+		r = tty_ldisc_open(tty, new_ldisc);
+		if (r < 0)
+			panic("Couldn't open N_TTY ldisc for "
+			      "%s --- error %d.",
+			      tty_name(tty, buf), r);
 	}
 }
 
 /**
+ *	tty_ldisc_halt		-	shut down the line discipline
+ *	@tty: tty device
+ *
+ *	Shut down the line discipline and work queue for this tty device.
+ *	The TTY_LDISC flag being cleared ensures no further references can
+ *	be obtained while the delayed work queue halt ensures that no more
+ *	data is fed to the ldisc.
+ *
+ *	In order to wait for any existing references to complete see
+ *	tty_ldisc_wait_idle.
+ */
+
+static int tty_ldisc_halt(struct tty_struct *tty)
+{
+	clear_bit(TTY_LDISC, &tty->flags);
+	return cancel_delayed_work(&tty->buf.work);
+}
+
+/**
+ *	tty_ldisc_wait_idle	-	wait for the ldisc to become idle
+ *	@tty: tty to wait for
+ *
+ *	Wait for the line discipline to become idle. The discipline must
+ *	have been halted for this to guarantee it remains idle.
+ *
+ *	tty_ldisc_lock protects the ref counts currently.
+ */
+
+static int tty_ldisc_wait_idle(struct tty_struct *tty)
+{
+	unsigned long flags;
+	spin_lock_irqsave(&tty_ldisc_lock, flags);
+	while (tty->ldisc->refcount) {
+		spin_unlock_irqrestore(&tty_ldisc_lock, flags);
+		if (wait_event_timeout(tty_ldisc_wait,
+				tty->ldisc->refcount == 0, 5 * HZ) == 0)
+			return -EBUSY;
+		spin_lock_irqsave(&tty_ldisc_lock, flags);
+	}
+	spin_unlock_irqrestore(&tty_ldisc_lock, flags);
+	return 0;
+}
+
+/**
  *	tty_set_ldisc		-	set line discipline
  *	@tty: the terminal to set
  *	@ldisc: the line discipline
  *
  *	Set the discipline of a tty line. Must be called from a process
- *	context.
+ *	context. The ldisc change logic has to protect itself against any
+ *	overlapping ldisc change (including on the other end of pty pairs),
+ *	the close of one side of a tty/pty pair, and eventually hangup.
  *
- *	Locking: takes tty_ldisc_lock.
- *		 called functions take termios_mutex
+ *	Locking: takes tty_ldisc_lock, termios_mutex
  */
 
 int tty_set_ldisc(struct tty_struct *tty, int ldisc)
 {
 	int retval;
-	struct tty_ldisc o_ldisc, new_ldisc;
-	int work;
-	unsigned long flags;
+	struct tty_ldisc *o_ldisc, *new_ldisc;
+	int work, o_work = 0;
 	struct tty_struct *o_tty;
 
-restart:
-	/* This is a bit ugly for now but means we can break the 'ldisc
-	   is part of the tty struct' assumption later */
-	retval = tty_ldisc_get(ldisc, &new_ldisc);
-	if (retval)
-		return retval;
+	new_ldisc = tty_ldisc_get(ldisc);
+	if (IS_ERR(new_ldisc))
+		return PTR_ERR(new_ldisc);
+
+	/*
+	 *	We need to look at the tty locking here for pty/tty pairs
+	 *	when both sides try to change in parallel.
+	 */
+
+	o_tty = tty->link;	/* o_tty is the pty side or NULL */
+
+
+	/*
+	 *	Check the no-op case
+	 */
+
+	if (tty->ldisc->ops->num == ldisc) {
+		tty_ldisc_put(new_ldisc);
+		return 0;
+	}
 
 	/*
 	 *	Problem: What do we do if this blocks ?
+	 *	We could deadlock here
 	 */
 
 	tty_wait_until_sent(tty, 0);
 
-	if (tty->ldisc.ops->num == ldisc) {
-		tty_ldisc_put(new_ldisc.ops);
-		return 0;
+	mutex_lock(&tty->ldisc_mutex);
+
+	/*
+	 *	We could be midstream of another ldisc change which has
+	 *	dropped the lock during processing. If so we need to wait.
+	 */
+
+	while (test_bit(TTY_LDISC_CHANGING, &tty->flags)) {
+		mutex_unlock(&tty->ldisc_mutex);
+		wait_event(tty_ldisc_wait,
+			test_bit(TTY_LDISC_CHANGING, &tty->flags) == 0);
+		mutex_lock(&tty->ldisc_mutex);
 	}
+	set_bit(TTY_LDISC_CHANGING, &tty->flags);
 
 	/*
 	 *	No more input please, we are switching. The new ldisc
@@ -489,8 +616,6 @@
 	tty->receive_room = 0;
 
 	o_ldisc = tty->ldisc;
-	o_tty = tty->link;
-
 	/*
 	 *	Make sure we don't change while someone holds a
 	 *	reference to the line discipline. The TTY_LDISC bit
@@ -501,108 +626,181 @@
 	 *	with a userspace app continually trying to use the tty in
 	 *	parallel to the change and re-referencing the tty.
 	 */
-	clear_bit(TTY_LDISC, &tty->flags);
-	if (o_tty)
-		clear_bit(TTY_LDISC, &o_tty->flags);
 
-	spin_lock_irqsave(&tty_ldisc_lock, flags);
-	if (tty->ldisc.refcount || (o_tty && o_tty->ldisc.refcount)) {
-		if (tty->ldisc.refcount) {
-			/* Free the new ldisc we grabbed. Must drop the lock
-			   first. */
-			spin_unlock_irqrestore(&tty_ldisc_lock, flags);
-			tty_ldisc_put(o_ldisc.ops);
-			/*
-			 * There are several reasons we may be busy, including
-			 * random momentary I/O traffic. We must therefore
-			 * retry. We could distinguish between blocking ops
-			 * and retries if we made tty_ldisc_wait() smarter.
-			 * That is up for discussion.
-			 */
-			if (wait_event_interruptible(tty_ldisc_wait, tty->ldisc.refcount == 0) < 0)
-				return -ERESTARTSYS;
-			goto restart;
-		}
-		if (o_tty && o_tty->ldisc.refcount) {
-			spin_unlock_irqrestore(&tty_ldisc_lock, flags);
-			tty_ldisc_put(o_tty->ldisc.ops);
-			if (wait_event_interruptible(tty_ldisc_wait, o_tty->ldisc.refcount == 0) < 0)
-				return -ERESTARTSYS;
-			goto restart;
-		}
-	}
-	/*
-	 *	If the TTY_LDISC bit is set, then we are racing against
-	 *	another ldisc change
-	 */
-	if (test_bit(TTY_LDISC_CHANGING, &tty->flags)) {
-		struct tty_ldisc *ld;
-		spin_unlock_irqrestore(&tty_ldisc_lock, flags);
-		tty_ldisc_put(new_ldisc.ops);
-		ld = tty_ldisc_ref_wait(tty);
-		tty_ldisc_deref(ld);
-		goto restart;
-	}
-	/*
-	 *	This flag is used to avoid two parallel ldisc changes. Once
-	 *	open and close are fine grained locked this may work better
-	 *	as a mutex shared with the open/close/hup paths
-	 */
-	set_bit(TTY_LDISC_CHANGING, &tty->flags);
+	work = tty_ldisc_halt(tty);
 	if (o_tty)
-		set_bit(TTY_LDISC_CHANGING, &o_tty->flags);
-	spin_unlock_irqrestore(&tty_ldisc_lock, flags);
-	
+		o_work = tty_ldisc_halt(o_tty);
+
 	/*
-	 *	From this point on we know nobody has an ldisc
-	 *	usage reference, nor can they obtain one until
-	 *	we say so later on.
+	 * Wait for ->hangup_work and ->buf.work handlers to terminate.
+	 * We must drop the mutex here in case a hangup is also in process.
 	 */
 
-	work = cancel_delayed_work(&tty->buf.work);
-	/*
-	 * Wait for ->hangup_work and ->buf.work handlers to terminate
-	 * MUST NOT hold locks here.
-	 */
+	mutex_unlock(&tty->ldisc_mutex);
+
 	flush_scheduled_work();
+
+	/* Let any existing reference holders finish */
+	retval = tty_ldisc_wait_idle(tty);
+	if (retval < 0) {
+		clear_bit(TTY_LDISC_CHANGING, &tty->flags);
+		tty_ldisc_put(new_ldisc);
+		return retval;
+	}
+
+	mutex_lock(&tty->ldisc_mutex);
+	if (test_bit(TTY_HUPPED, &tty->flags)) {
+		/* We were raced by the hangup method. It will have stomped
+		   the ldisc data and closed the ldisc down */
+		clear_bit(TTY_LDISC_CHANGING, &tty->flags);
+		mutex_unlock(&tty->ldisc_mutex);
+		tty_ldisc_put(new_ldisc);
+		return -EIO;
+	}
+
 	/* Shutdown the current discipline. */
-	if (o_ldisc.ops->close)
-		(o_ldisc.ops->close)(tty);
+	tty_ldisc_close(tty, o_ldisc);
 
 	/* Now set up the new line discipline. */
-	tty_ldisc_assign(tty, &new_ldisc);
+	tty_ldisc_assign(tty, new_ldisc);
 	tty_set_termios_ldisc(tty, ldisc);
-	if (new_ldisc.ops->open)
-		retval = (new_ldisc.ops->open)(tty);
+
+	retval = tty_ldisc_open(tty, new_ldisc);
 	if (retval < 0) {
-		tty_ldisc_put(new_ldisc.ops);
-		tty_ldisc_restore(tty, &o_ldisc);
+		/* Back to the old one or N_TTY if we can't */
+		tty_ldisc_put(new_ldisc);
+		tty_ldisc_restore(tty, o_ldisc);
 	}
+
 	/* At this point we hold a reference to the new ldisc and a
 	   a reference to the old ldisc. If we ended up flipping back
 	   to the existing ldisc we have two references to it */
 
-	if (tty->ldisc.ops->num != o_ldisc.ops->num && tty->ops->set_ldisc)
+	if (tty->ldisc->ops->num != o_ldisc->ops->num && tty->ops->set_ldisc)
 		tty->ops->set_ldisc(tty);
 
-	tty_ldisc_put(o_ldisc.ops);
+	tty_ldisc_put(o_ldisc);
 
 	/*
-	 *	Allow ldisc referencing to occur as soon as the driver
-	 *	ldisc callback completes.
+	 *	Allow ldisc referencing to occur again
 	 */
 
 	tty_ldisc_enable(tty);
 	if (o_tty)
 		tty_ldisc_enable(o_tty);
 
-	/* Restart it in case no characters kick it off. Safe if
+	/* Restart the work queue in case no characters kick it off. Safe if
 	   already running */
 	if (work)
 		schedule_delayed_work(&tty->buf.work, 1);
+	if (o_work)
+		schedule_delayed_work(&o_tty->buf.work, 1);
+	mutex_unlock(&tty->ldisc_mutex);
 	return retval;
 }
 
+/**
+ *	tty_reset_termios	-	reset terminal state
+ *	@tty: tty to reset
+ *
+ *	Restore a terminal to the driver default state.
+ */
+
+static void tty_reset_termios(struct tty_struct *tty)
+{
+	mutex_lock(&tty->termios_mutex);
+	*tty->termios = tty->driver->init_termios;
+	tty->termios->c_ispeed = tty_termios_input_baud_rate(tty->termios);
+	tty->termios->c_ospeed = tty_termios_baud_rate(tty->termios);
+	mutex_unlock(&tty->termios_mutex);
+}
+
+
+/**
+ *	tty_ldisc_reinit	-	reinitialise the tty ldisc
+ *	@tty: tty to reinit
+ *
+ *	Switch the tty back to N_TTY line discipline and leave the
+ *	ldisc state closed
+ */
+
+static void tty_ldisc_reinit(struct tty_struct *tty)
+{
+	struct tty_ldisc *ld;
+
+	tty_ldisc_close(tty, tty->ldisc);
+	tty_ldisc_put(tty->ldisc);
+	tty->ldisc = NULL;
+	/*
+	 *	Switch the line discipline back
+	 */
+	ld = tty_ldisc_get(N_TTY);
+	BUG_ON(IS_ERR(ld));
+	tty_ldisc_assign(tty, ld);
+	tty_set_termios_ldisc(tty, N_TTY);
+}
+
+/**
+ *	tty_ldisc_hangup		-	hangup ldisc reset
+ *	@tty: tty being hung up
+ *
+ *	Some tty devices reset their termios when they receive a hangup
+ *	event. In that situation we must also switch back to N_TTY properly
+ *	before we reset the termios data.
+ *
+ *	Locking: We can take the ldisc mutex as the rest of the code is
+ *	careful to allow for this.
+ *
+ *	In the pty pair case this occurs in the close() path of the
+ *	tty itself so we must be careful about locking rules.
+ */
+
+void tty_ldisc_hangup(struct tty_struct *tty)
+{
+	struct tty_ldisc *ld;
+
+	/*
+	 * FIXME! What are the locking issues here? This may me overdoing
+	 * things... This question is especially important now that we've
+	 * removed the irqlock.
+	 */
+	ld = tty_ldisc_ref(tty);
+	if (ld != NULL) {
+		/* We may have no line discipline at this point */
+		if (ld->ops->flush_buffer)
+			ld->ops->flush_buffer(tty);
+		tty_driver_flush_buffer(tty);
+		if ((test_bit(TTY_DO_WRITE_WAKEUP, &tty->flags)) &&
+		    ld->ops->write_wakeup)
+			ld->ops->write_wakeup(tty);
+		if (ld->ops->hangup)
+			ld->ops->hangup(tty);
+		tty_ldisc_deref(ld);
+	}
+	/*
+	 * FIXME: Once we trust the LDISC code better we can wait here for
+	 * ldisc completion and fix the driver call race
+	 */
+	wake_up_interruptible_poll(&tty->write_wait, POLLOUT);
+	wake_up_interruptible_poll(&tty->read_wait, POLLIN);
+	/*
+	 * Shutdown the current line discipline, and reset it to
+	 * N_TTY.
+	 */
+	if (tty->driver->flags & TTY_DRIVER_RESET_TERMIOS) {
+		/* Avoid racing set_ldisc */
+		mutex_lock(&tty->ldisc_mutex);
+		/* Switch back to N_TTY */
+		tty_ldisc_reinit(tty);
+		/* At this point we have a closed ldisc and we want to
+		   reopen it. We could defer this to the next open but
+		   it means auditing a lot of other paths so this is a FIXME */
+		WARN_ON(tty_ldisc_open(tty, tty->ldisc));
+		tty_ldisc_enable(tty);
+		mutex_unlock(&tty->ldisc_mutex);
+		tty_reset_termios(tty);
+	}
+}
 
 /**
  *	tty_ldisc_setup			-	open line discipline
@@ -610,24 +808,23 @@
  *	@o_tty: pair tty for pty/tty pairs
  *
  *	Called during the initial open of a tty/pty pair in order to set up the
- *	line discplines and bind them to the tty.
+ *	line disciplines and bind them to the tty. This has no locking issues
+ *	as the device isn't yet active.
  */
 
 int tty_ldisc_setup(struct tty_struct *tty, struct tty_struct *o_tty)
 {
-	struct tty_ldisc *ld = &tty->ldisc;
+	struct tty_ldisc *ld = tty->ldisc;
 	int retval;
 
-	if (ld->ops->open) {
-		retval = (ld->ops->open)(tty);
-		if (retval)
-			return retval;
-	}
-	if (o_tty && o_tty->ldisc.ops->open) {
-		retval = (o_tty->ldisc.ops->open)(o_tty);
+	retval = tty_ldisc_open(tty, ld);
+	if (retval)
+		return retval;
+
+	if (o_tty) {
+		retval = tty_ldisc_open(o_tty, o_tty->ldisc);
 		if (retval) {
-			if (ld->ops->close)
-				(ld->ops->close)(tty);
+			tty_ldisc_close(tty, ld);
 			return retval;
 		}
 		tty_ldisc_enable(o_tty);
@@ -635,32 +832,25 @@
 	tty_ldisc_enable(tty);
 	return 0;
 }
-
 /**
  *	tty_ldisc_release		-	release line discipline
  *	@tty: tty being shut down
  *	@o_tty: pair tty for pty/tty pairs
  *
- *	Called during the final close of a tty/pty pair in order to shut down the
- *	line discpline layer.
+ *	Called during the final close of a tty/pty pair in order to shut down
+ *	the line discpline layer. On exit the ldisc assigned is N_TTY and the
+ *	ldisc has not been opened.
  */
 
 void tty_ldisc_release(struct tty_struct *tty, struct tty_struct *o_tty)
 {
-	unsigned long flags;
-	struct tty_ldisc ld;
 	/*
 	 * Prevent flush_to_ldisc() from rescheduling the work for later.  Then
 	 * kill any delayed work. As this is the final close it does not
 	 * race with the set_ldisc code path.
 	 */
-	clear_bit(TTY_LDISC, &tty->flags);
-	cancel_delayed_work(&tty->buf.work);
 
-	/*
-	 * Wait for ->hangup_work and ->buf.work handlers to terminate
-	 */
-
+	tty_ldisc_halt(tty);
 	flush_scheduled_work();
 
 	/*
@@ -668,38 +858,19 @@
 	 * side waiters as the file is closing so user count on the file
 	 * side is zero.
 	 */
-	spin_lock_irqsave(&tty_ldisc_lock, flags);
-	while (tty->ldisc.refcount) {
-		spin_unlock_irqrestore(&tty_ldisc_lock, flags);
-		wait_event(tty_ldisc_wait, tty->ldisc.refcount == 0);
-		spin_lock_irqsave(&tty_ldisc_lock, flags);
-	}
-	spin_unlock_irqrestore(&tty_ldisc_lock, flags);
+
+	tty_ldisc_wait_idle(tty);
+
 	/*
 	 * Shutdown the current line discipline, and reset it to N_TTY.
 	 *
 	 * FIXME: this MUST get fixed for the new reflocking
 	 */
-	if (tty->ldisc.ops->close)
-		(tty->ldisc.ops->close)(tty);
-	tty_ldisc_put(tty->ldisc.ops);
 
-	/*
-	 *	Switch the line discipline back
-	 */
-	WARN_ON(tty_ldisc_get(N_TTY, &ld));
-	tty_ldisc_assign(tty, &ld);
-	tty_set_termios_ldisc(tty, N_TTY);
-	if (o_tty) {
-		/* FIXME: could o_tty be in setldisc here ? */
-		clear_bit(TTY_LDISC, &o_tty->flags);
-		if (o_tty->ldisc.ops->close)
-			(o_tty->ldisc.ops->close)(o_tty);
-		tty_ldisc_put(o_tty->ldisc.ops);
-		WARN_ON(tty_ldisc_get(N_TTY, &ld));
-		tty_ldisc_assign(o_tty, &ld);
-		tty_set_termios_ldisc(o_tty, N_TTY);
-	}
+	tty_ldisc_reinit(tty);
+	/* This will need doing differently if we need to lock */
+	if (o_tty)
+		tty_ldisc_release(o_tty, NULL);
 }
 
 /**
@@ -712,10 +883,10 @@
 
 void tty_ldisc_init(struct tty_struct *tty)
 {
-	struct tty_ldisc ld;
-	if (tty_ldisc_get(N_TTY, &ld) < 0)
+	struct tty_ldisc *ld = tty_ldisc_get(N_TTY);
+	if (IS_ERR(ld))
 		panic("n_tty: init_tty");
-	tty_ldisc_assign(tty, &ld);
+	tty_ldisc_assign(tty, ld);
 }
 
 void tty_ldisc_begin(void)
diff --git a/drivers/char/tty_port.c b/drivers/char/tty_port.c
index 9b8004c..62dadfc 100644
--- a/drivers/char/tty_port.c
+++ b/drivers/char/tty_port.c
@@ -137,7 +137,7 @@
 EXPORT_SYMBOL(tty_port_carrier_raised);
 
 /**
- *	tty_port_raise_dtr_rts	-	Riase DTR/RTS
+ *	tty_port_raise_dtr_rts	-	Raise DTR/RTS
  *	@port: tty port
  *
  *	Wrapper for the DTR/RTS raise logic. For the moment this is used
@@ -147,12 +147,28 @@
 
 void tty_port_raise_dtr_rts(struct tty_port *port)
 {
-	if (port->ops->raise_dtr_rts)
-		port->ops->raise_dtr_rts(port);
+	if (port->ops->dtr_rts)
+		port->ops->dtr_rts(port, 1);
 }
 EXPORT_SYMBOL(tty_port_raise_dtr_rts);
 
 /**
+ *	tty_port_lower_dtr_rts	-	Lower DTR/RTS
+ *	@port: tty port
+ *
+ *	Wrapper for the DTR/RTS raise logic. For the moment this is used
+ *	to hide some internal details. This will eventually become entirely
+ *	internal to the tty port.
+ */
+
+void tty_port_lower_dtr_rts(struct tty_port *port)
+{
+	if (port->ops->dtr_rts)
+		port->ops->dtr_rts(port, 0);
+}
+EXPORT_SYMBOL(tty_port_lower_dtr_rts);
+
+/**
  *	tty_port_block_til_ready	-	Waiting logic for tty open
  *	@port: the tty port being opened
  *	@tty: the tty device being bound
@@ -167,7 +183,7 @@
  *		- port flags and counts
  *
  *	The passed tty_port must implement the carrier_raised method if it can
- *	do carrier detect and the raise_dtr_rts method if it supports software
+ *	do carrier detect and the dtr_rts method if it supports software
  *	management of these lines. Note that the dtr/rts raise is done each
  *	iteration as a hangup may have previously dropped them while we wait.
  */
@@ -182,7 +198,8 @@
 
 	/* block if port is in the process of being closed */
 	if (tty_hung_up_p(filp) || port->flags & ASYNC_CLOSING) {
-		interruptible_sleep_on(&port->close_wait);
+		wait_event_interruptible(port->close_wait,
+				!(port->flags & ASYNC_CLOSING));
 		if (port->flags & ASYNC_HUP_NOTIFY)
 			return -EAGAIN;
 		else
@@ -205,7 +222,6 @@
 	   before the next open may complete */
 
 	retval = 0;
-	add_wait_queue(&port->open_wait, &wait);
 
 	/* The port lock protects the port counts */
 	spin_lock_irqsave(&port->lock, flags);
@@ -219,7 +235,7 @@
 		if (tty->termios->c_cflag & CBAUD)
 			tty_port_raise_dtr_rts(port);
 
-		set_current_state(TASK_INTERRUPTIBLE);
+		prepare_to_wait(&port->open_wait, &wait, TASK_INTERRUPTIBLE);
 		/* Check for a hangup or uninitialised port. Return accordingly */
 		if (tty_hung_up_p(filp) || !(port->flags & ASYNC_INITIALIZED)) {
 			if (port->flags & ASYNC_HUP_NOTIFY)
@@ -240,8 +256,7 @@
 		}
 		schedule();
 	}
-	set_current_state(TASK_RUNNING);
-	remove_wait_queue(&port->open_wait, &wait);
+	finish_wait(&port->open_wait, &wait);
 
 	/* Update counts. A parallel hangup will have set count to zero and
 	   we must not mess that up further */
@@ -292,6 +307,17 @@
 	if (port->flags & ASYNC_INITIALIZED &&
 			port->closing_wait != ASYNC_CLOSING_WAIT_NONE)
 		tty_wait_until_sent(tty, port->closing_wait);
+	if (port->drain_delay) {
+		unsigned int bps = tty_get_baud_rate(tty);
+		long timeout;
+
+		if (bps > 1200)
+			timeout = max_t(long, (HZ * 10 * port->drain_delay) / bps,
+								HZ / 10);
+		else
+			timeout = 2 * HZ;
+		schedule_timeout_interruptible(timeout);
+	}
 	return 1;
 }
 EXPORT_SYMBOL(tty_port_close_start);
@@ -302,6 +328,9 @@
 
 	tty_ldisc_flush(tty);
 
+	if (tty->termios->c_cflag & HUPCL)
+		tty_port_lower_dtr_rts(port);
+
 	spin_lock_irqsave(&port->lock, flags);
 	tty->closing = 0;
 
diff --git a/drivers/ide/alim15x3.c b/drivers/ide/alim15x3.c
index 537da1c..e59b6de 100644
--- a/drivers/ide/alim15x3.c
+++ b/drivers/ide/alim15x3.c
@@ -402,27 +402,23 @@
 	return cbl;
 }
 
-#if !defined(CONFIG_SPARC64) && !defined(CONFIG_PPC)
+#ifndef CONFIG_SPARC64
 /**
  *	init_hwif_ali15x3	-	Initialize the ALI IDE x86 stuff
  *	@hwif: interface to configure
  *
  *	Obtain the IRQ tables for an ALi based IDE solution on the PC
  *	class platforms. This part of the code isn't applicable to the
- *	Sparc and PowerPC systems.
+ *	Sparc systems.
  */
 
 static void __devinit init_hwif_ali15x3 (ide_hwif_t *hwif)
 {
-	struct pci_dev *dev = to_pci_dev(hwif->dev);
 	u8 ideic, inmir;
 	s8 irq_routing_table[] = { -1,  9, 3, 10, 4,  5, 7,  6,
 				      1, 11, 0, 12, 0, 14, 0, 15 };
 	int irq = -1;
 
-	if (dev->device == PCI_DEVICE_ID_AL_M5229)
-		hwif->irq = hwif->channel ? 15 : 14;
-
 	if (isa_dev) {
 		/*
 		 * read IDE interface control
@@ -455,7 +451,7 @@
 }
 #else
 #define init_hwif_ali15x3 NULL
-#endif /* !defined(CONFIG_SPARC64) && !defined(CONFIG_PPC) */
+#endif /* CONFIG_SPARC64 */
 
 /**
  *	init_dma_ali15x3	-	set up DMA on ALi15x3
diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
index 7201b17..afe5a43 100644
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -80,34 +80,6 @@
 EXPORT_SYMBOL_GPL(ide_init_pc);
 
 /*
- * Generate a new packet command request in front of the request queue, before
- * the current request, so that it will be processed immediately, on the next
- * pass through the driver.
- */
-static void ide_queue_pc_head(ide_drive_t *drive, struct gendisk *disk,
-			      struct ide_atapi_pc *pc, struct request *rq)
-{
-	blk_rq_init(NULL, rq);
-	rq->cmd_type = REQ_TYPE_SPECIAL;
-	rq->cmd_flags |= REQ_PREEMPT;
-	rq->buffer = (char *)pc;
-	rq->rq_disk = disk;
-
-	if (pc->req_xfer) {
-		rq->data = pc->buf;
-		rq->data_len = pc->req_xfer;
-	}
-
-	memcpy(rq->cmd, pc->c, 12);
-	if (drive->media == ide_tape)
-		rq->cmd[13] = REQ_IDETAPE_PC1;
-
-	drive->hwif->rq = NULL;
-
-	elv_add_request(drive->queue, rq, ELEVATOR_INSERT_FRONT, 0);
-}
-
-/*
  * Add a special packet command request to the tail of the request queue,
  * and wait for it to be serviced.
  */
@@ -119,19 +91,21 @@
 
 	rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
 	rq->cmd_type = REQ_TYPE_SPECIAL;
-	rq->buffer = (char *)pc;
+	rq->special = (char *)pc;
 
 	if (pc->req_xfer) {
-		rq->data = pc->buf;
-		rq->data_len = pc->req_xfer;
+		error = blk_rq_map_kern(drive->queue, rq, pc->buf, pc->req_xfer,
+					GFP_NOIO);
+		if (error)
+			goto put_req;
 	}
 
 	memcpy(rq->cmd, pc->c, 12);
 	if (drive->media == ide_tape)
 		rq->cmd[13] = REQ_IDETAPE_PC1;
 	error = blk_execute_rq(drive->queue, disk, rq, 0);
+put_req:
 	blk_put_request(rq);
-
 	return error;
 }
 EXPORT_SYMBOL_GPL(ide_queue_pc_tail);
@@ -191,20 +165,103 @@
 }
 EXPORT_SYMBOL_GPL(ide_create_request_sense_cmd);
 
+void ide_prep_sense(ide_drive_t *drive, struct request *rq)
+{
+	struct request_sense *sense = &drive->sense_data;
+	struct request *sense_rq = &drive->sense_rq;
+	unsigned int cmd_len, sense_len;
+	int err;
+
+	debug_log("%s: enter\n", __func__);
+
+	switch (drive->media) {
+	case ide_floppy:
+		cmd_len = 255;
+		sense_len = 18;
+		break;
+	case ide_tape:
+		cmd_len = 20;
+		sense_len = 20;
+		break;
+	default:
+		cmd_len = 18;
+		sense_len = 18;
+	}
+
+	BUG_ON(sense_len > sizeof(*sense));
+
+	if (blk_sense_request(rq) || drive->sense_rq_armed)
+		return;
+
+	memset(sense, 0, sizeof(*sense));
+
+	blk_rq_init(rq->q, sense_rq);
+
+	err = blk_rq_map_kern(drive->queue, sense_rq, sense, sense_len,
+			      GFP_NOIO);
+	if (unlikely(err)) {
+		if (printk_ratelimit())
+			printk(KERN_WARNING "%s: failed to map sense buffer\n",
+			       drive->name);
+		return;
+	}
+
+	sense_rq->rq_disk = rq->rq_disk;
+	sense_rq->cmd[0] = GPCMD_REQUEST_SENSE;
+	sense_rq->cmd[4] = cmd_len;
+	sense_rq->cmd_type = REQ_TYPE_SENSE;
+	sense_rq->cmd_flags |= REQ_PREEMPT;
+
+	if (drive->media == ide_tape)
+		sense_rq->cmd[13] = REQ_IDETAPE_PC1;
+
+	drive->sense_rq_armed = true;
+}
+EXPORT_SYMBOL_GPL(ide_prep_sense);
+
+int ide_queue_sense_rq(ide_drive_t *drive, void *special)
+{
+	/* deferred failure from ide_prep_sense() */
+	if (!drive->sense_rq_armed) {
+		printk(KERN_WARNING "%s: failed queue sense request\n",
+		       drive->name);
+		return -ENOMEM;
+	}
+
+	drive->sense_rq.special = special;
+	drive->sense_rq_armed = false;
+
+	drive->hwif->rq = NULL;
+
+	elv_add_request(drive->queue, &drive->sense_rq,
+			ELEVATOR_INSERT_FRONT, 0);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ide_queue_sense_rq);
+
 /*
  * Called when an error was detected during the last packet command.
- * We queue a request sense packet command in the head of the request list.
+ * We queue a request sense packet command at the head of the request
+ * queue.
  */
-void ide_retry_pc(ide_drive_t *drive, struct gendisk *disk)
+void ide_retry_pc(ide_drive_t *drive)
 {
-	struct request *rq = &drive->request_sense_rq;
+	struct request *sense_rq = &drive->sense_rq;
 	struct ide_atapi_pc *pc = &drive->request_sense_pc;
 
 	(void)ide_read_error(drive);
-	ide_create_request_sense_cmd(drive, pc);
+
+	/* init pc from sense_rq */
+	ide_init_pc(pc);
+	memcpy(pc->c, sense_rq->cmd, 12);
+	pc->buf = bio_data(sense_rq->bio);	/* pointer to mapped address */
+	pc->req_xfer = sense_rq->data_len;
+
 	if (drive->media == ide_tape)
 		set_bit(IDE_AFLAG_IGNORE_DSC, &drive->atapi_flags);
-	ide_queue_pc_head(drive, disk, pc, rq);
+
+	if (ide_queue_sense_rq(drive, pc))
+		ide_complete_rq(drive, -EIO, blk_rq_bytes(drive->hwif->rq));
 }
 EXPORT_SYMBOL_GPL(ide_retry_pc);
 
@@ -276,7 +333,6 @@
 	struct ide_cmd *cmd = &hwif->cmd;
 	struct request *rq = hwif->rq;
 	const struct ide_tp_ops *tp_ops = hwif->tp_ops;
-	xfer_func_t *xferfunc;
 	unsigned int timeout, done;
 	u16 bcount;
 	u8 stat, ireason, dsc = 0;
@@ -303,11 +359,8 @@
 					drive->name, rq_data_dir(pc->rq)
 						     ? "write" : "read");
 			pc->flags |= PC_FLAG_DMA_ERROR;
-		} else {
+		} else
 			pc->xferred = pc->req_xfer;
-			if (drive->pc_update_buffers)
-				drive->pc_update_buffers(drive, pc);
-		}
 		debug_log("%s: DMA finished\n", drive->name);
 	}
 
@@ -343,7 +396,7 @@
 			debug_log("[cmd %x]: check condition\n", rq->cmd[0]);
 
 			/* Retry operation */
-			ide_retry_pc(drive, rq->rq_disk);
+			ide_retry_pc(drive);
 
 			/* queued, but not started */
 			return ide_stopped;
@@ -353,6 +406,12 @@
 		if ((pc->flags & PC_FLAG_WAIT_FOR_DSC) && (stat & ATA_DSC) == 0)
 			dsc = 1;
 
+		/*
+		 * ->pc_callback() might change rq->data_len for
+		 * residual count, cache total length.
+		 */
+		done = blk_rq_bytes(rq);
+
 		/* Command finished - Call the callback function */
 		uptodate = drive->pc_callback(drive, dsc);
 
@@ -361,7 +420,6 @@
 
 		if (blk_special_request(rq)) {
 			rq->errors = 0;
-			done = blk_rq_bytes(rq);
 			error = 0;
 		} else {
 
@@ -370,11 +428,6 @@
 					rq->errors = -EIO;
 			}
 
-			if (drive->media == ide_tape)
-				done = ide_rq_bytes(rq); /* FIXME */
-			else
-				done = blk_rq_bytes(rq);
-
 			error = uptodate ? 0 : -EIO;
 		}
 
@@ -407,21 +460,11 @@
 		return ide_do_reset(drive);
 	}
 
-	xferfunc = write ? tp_ops->output_data : tp_ops->input_data;
+	done = min_t(unsigned int, bcount, cmd->nleft);
+	ide_pio_bytes(drive, cmd, write, done);
 
-	if (drive->media == ide_floppy && pc->buf == NULL) {
-		done = min_t(unsigned int, bcount, cmd->nleft);
-		ide_pio_bytes(drive, cmd, write, done);
-	} else if (drive->media == ide_tape && pc->bh) {
-		done = drive->pc_io_buffers(drive, pc, bcount, write);
-	} else {
-		done = min_t(unsigned int, bcount, pc->req_xfer - pc->xferred);
-		xferfunc(drive, NULL, pc->cur_pos, done);
-	}
-
-	/* Update the current position */
+	/* Update transferred byte count */
 	pc->xferred += done;
-	pc->cur_pos += done;
 
 	bcount -= done;
 
@@ -599,7 +642,6 @@
 
 		/* We haven't transferred any data yet */
 		pc->xferred = 0;
-		pc->cur_pos = pc->buf;
 
 		valid_tf = IDE_VALID_DEVICE;
 		bcount = ((drive->media == ide_tape) ?
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index 925eb9e..a75e4ee 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -206,54 +206,25 @@
 	ide_cd_log_error(drive->name, failed_command, sense);
 }
 
-static void cdrom_queue_request_sense(ide_drive_t *drive, void *sense,
-				      struct request *failed_command)
-{
-	struct cdrom_info *info		= drive->driver_data;
-	struct request *rq		= &drive->request_sense_rq;
-
-	ide_debug_log(IDE_DBG_SENSE, "enter");
-
-	if (sense == NULL)
-		sense = &info->sense_data;
-
-	/* stuff the sense request in front of our current request */
-	blk_rq_init(NULL, rq);
-	rq->cmd_type = REQ_TYPE_ATA_PC;
-	rq->rq_disk = info->disk;
-
-	rq->data = sense;
-	rq->cmd[0] = GPCMD_REQUEST_SENSE;
-	rq->cmd[4] = 18;
-	rq->data_len = 18;
-
-	rq->cmd_type = REQ_TYPE_SENSE;
-	rq->cmd_flags |= REQ_PREEMPT;
-
-	/* NOTE! Save the failed command in "rq->buffer" */
-	rq->buffer = (void *) failed_command;
-
-	if (failed_command)
-		ide_debug_log(IDE_DBG_SENSE, "failed_cmd: 0x%x",
-					     failed_command->cmd[0]);
-
-	drive->hwif->rq = NULL;
-
-	elv_add_request(drive->queue, rq, ELEVATOR_INSERT_FRONT, 0);
-}
-
 static void ide_cd_complete_failed_rq(ide_drive_t *drive, struct request *rq)
 {
 	/*
-	 * For REQ_TYPE_SENSE, "rq->buffer" points to the original
-	 * failed request
+	 * For REQ_TYPE_SENSE, "rq->special" points to the original
+	 * failed request.  Also, the sense data should be read
+	 * directly from rq which might be different from the original
+	 * sense buffer if it got copied during mapping.
 	 */
-	struct request *failed = (struct request *)rq->buffer;
-	struct cdrom_info *info = drive->driver_data;
-	void *sense = &info->sense_data;
+	struct request *failed = (struct request *)rq->special;
+	void *sense = bio_data(rq->bio);
 
 	if (failed) {
 		if (failed->sense) {
+			/*
+			 * Sense is always read into drive->sense_data.
+			 * Copy back if the failed request has its
+			 * sense pointer set.
+			 */
+			memcpy(failed->sense, sense, 18);
 			sense = failed->sense;
 			failed->sense_len = rq->sense_len;
 		}
@@ -428,7 +399,7 @@
 
 	/* if we got a CHECK_CONDITION status, queue a request sense command */
 	if (stat & ATA_ERR)
-		cdrom_queue_request_sense(drive, NULL, NULL);
+		return ide_queue_sense_rq(drive, NULL) ? 2 : 1;
 	return 1;
 
 end_request:
@@ -442,8 +413,7 @@
 
 		hwif->rq = NULL;
 
-		cdrom_queue_request_sense(drive, rq->sense, rq);
-		return 1;
+		return ide_queue_sense_rq(drive, rq) ? 2 : 1;
 	} else
 		return 2;
 }
@@ -503,14 +473,8 @@
 	 * and some drives don't send them.  Sigh.
 	 */
 	if (rq->cmd[0] == GPCMD_REQUEST_SENSE &&
-	    cmd->nleft > 0 && cmd->nleft <= 5) {
-		unsigned int ofs = cmd->nbytes - cmd->nleft;
-
-		while (cmd->nleft > 0) {
-			*((u8 *)rq->data + ofs++) = 0;
-			cmd->nleft--;
-		}
-	}
+	    cmd->nleft > 0 && cmd->nleft <= 5)
+		cmd->nleft = 0;
 }
 
 int ide_cd_queue_pc(ide_drive_t *drive, const unsigned char *cmd,
@@ -543,8 +507,12 @@
 		rq->cmd_flags |= cmd_flags;
 		rq->timeout = timeout;
 		if (buffer) {
-			rq->data = buffer;
-			rq->data_len = *bufflen;
+			error = blk_rq_map_kern(drive->queue, rq, buffer,
+						*bufflen, GFP_NOIO);
+			if (error) {
+				blk_put_request(rq);
+				return error;
+			}
 		}
 
 		error = blk_execute_rq(drive->queue, info->disk, rq, 0);
@@ -838,15 +806,10 @@
 	drive->dma = 0;
 
 	/* sg request */
-	if (rq->bio || ((rq->cmd_type == REQ_TYPE_ATA_PC) && rq->data_len)) {
+	if (rq->bio) {
 		struct request_queue *q = drive->queue;
+		char *buf = bio_data(rq->bio);
 		unsigned int alignment;
-		char *buf;
-
-		if (rq->bio)
-			buf = bio_data(rq->bio);
-		else
-			buf = rq->data;
 
 		drive->dma = !!(drive->dev_flags & IDE_DFLAG_USING_DMA);
 
@@ -896,6 +859,9 @@
 		goto out_end;
 	}
 
+	/* prepare sense request for this command */
+	ide_prep_sense(drive, rq);
+
 	memset(&cmd, 0, sizeof(cmd));
 
 	if (rq_data_dir(rq))
diff --git a/drivers/ide/ide-cd.h b/drivers/ide/ide-cd.h
index 1d97101..93a3cf1b 100644
--- a/drivers/ide/ide-cd.h
+++ b/drivers/ide/ide-cd.h
@@ -87,10 +87,6 @@
 
 	struct atapi_toc *toc;
 
-	/* The result of the last successful request sense command
-	   on this device. */
-	struct request_sense sense_data;
-
 	u8 max_speed;		/* Max speed of the drive. */
 	u8 current_speed;	/* Current speed of the drive. */
 
diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c
index a9fbe2c..c243880 100644
--- a/drivers/ide/ide-disk.c
+++ b/drivers/ide/ide-disk.c
@@ -411,7 +411,6 @@
 	cmd->protocol = ATA_PROT_NODATA;
 
 	rq->cmd_type = REQ_TYPE_ATA_TASKFILE;
-	rq->cmd_flags |= REQ_SOFTBARRIER;
 	rq->special = cmd;
 }
 
diff --git a/drivers/ide/ide-dma.c b/drivers/ide/ide-dma.c
index a0b8cab1..d9123ec 100644
--- a/drivers/ide/ide-dma.c
+++ b/drivers/ide/ide-dma.c
@@ -510,23 +510,11 @@
 	/*
 	 * un-busy drive etc and make sure request is sane
 	 */
-
 	rq = hwif->rq;
-	if (!rq)
-		goto out;
-
-	hwif->rq = NULL;
-
-	rq->errors = 0;
-
-	if (!rq->bio)
-		goto out;
-
-	rq->sector = rq->bio->bi_sector;
-	rq->current_nr_sectors = bio_iovec(rq->bio)->bv_len >> 9;
-	rq->hard_cur_sectors = rq->current_nr_sectors;
-	rq->buffer = bio_data(rq->bio);
-out:
+	if (rq) {
+		hwif->rq = NULL;
+		rq->errors = 0;
+	}
 	return ret;
 }
 
diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c
index 2b4868d..537b7c5 100644
--- a/drivers/ide/ide-floppy.c
+++ b/drivers/ide/ide-floppy.c
@@ -134,13 +134,17 @@
 	drive->pc = pc;
 
 	if (pc->retries > IDEFLOPPY_MAX_PC_RETRIES) {
+		unsigned int done = blk_rq_bytes(drive->hwif->rq);
+
 		if (!(pc->flags & PC_FLAG_SUPPRESS_ERROR))
 			ide_floppy_report_error(floppy, pc);
+
 		/* Giving up */
 		pc->error = IDE_DRV_ERROR_GENERAL;
 
 		drive->failed_pc = NULL;
 		drive->pc_callback(drive, 0);
+		ide_complete_rq(drive, -EIO, done);
 		return ide_stopped;
 	}
 
@@ -216,15 +220,13 @@
 	ide_init_pc(pc);
 	memcpy(pc->c, rq->cmd, sizeof(pc->c));
 	pc->rq = rq;
-	if (rq->data_len && rq_data_dir(rq) == WRITE)
-		pc->flags |= PC_FLAG_WRITING;
-	pc->buf = rq->data;
-	if (rq->bio)
+	if (rq->data_len) {
 		pc->flags |= PC_FLAG_DMA_OK;
-	/*
-	 * possibly problematic, doesn't look like ide-floppy correctly
-	 * handled scattered requests if dma fails...
-	 */
+		if (rq_data_dir(rq) == WRITE)
+			pc->flags |= PC_FLAG_WRITING;
+	}
+	/* pio will be performed by ide_pio_bytes() which handles sg fine */
+	pc->buf = NULL;
 	pc->req_xfer = pc->buf_size = rq->data_len;
 }
 
@@ -265,8 +267,8 @@
 		}
 		pc = &floppy->queued_pc;
 		idefloppy_create_rw_cmd(drive, pc, rq, (unsigned long)block);
-	} else if (blk_special_request(rq)) {
-		pc = (struct ide_atapi_pc *) rq->buffer;
+	} else if (blk_special_request(rq) || blk_sense_request(rq)) {
+		pc = (struct ide_atapi_pc *)rq->special;
 	} else if (blk_pc_request(rq)) {
 		pc = &floppy->queued_pc;
 		idefloppy_blockpc_cmd(floppy, pc, rq);
@@ -275,6 +277,8 @@
 		goto out_end;
 	}
 
+	ide_prep_sense(drive, rq);
+
 	memset(&cmd, 0, sizeof(cmd));
 
 	if (rq_data_dir(rq))
diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index 6415a2e..41d8040 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -248,14 +248,7 @@
 	struct scatterlist *sg = hwif->sg_table;
 	struct request *rq = cmd->rq;
 
-	if (rq->cmd_type == REQ_TYPE_ATA_TASKFILE) {
-		sg_init_one(sg, rq->buffer, rq->nr_sectors * SECTOR_SIZE);
-		cmd->sg_nents = 1;
-	} else if (!rq->bio) {
-		sg_init_one(sg, rq->data, rq->data_len);
-		cmd->sg_nents = 1;
-	} else
-		cmd->sg_nents = blk_rq_map_sg(drive->queue, rq, sg);
+	cmd->sg_nents = blk_rq_map_sg(drive->queue, rq, sg);
 }
 EXPORT_SYMBOL_GPL(ide_map_sg);
 
@@ -371,7 +364,7 @@
 		if (rq->cmd_type == REQ_TYPE_ATA_TASKFILE)
 			return execute_drive_cmd(drive, rq);
 		else if (blk_pm_request(rq)) {
-			struct request_pm_state *pm = rq->data;
+			struct request_pm_state *pm = rq->special;
 #ifdef DEBUG_PM
 			printk("%s: start_power_step(step: %d)\n",
 				drive->name, pm->pm_step);
@@ -484,6 +477,9 @@
 
 	spin_unlock_irq(q->queue_lock);
 
+	/* HLD do_request() callback might sleep, make sure it's okay */
+	might_sleep();
+
 	if (ide_lock_host(host, hwif))
 		goto plug_device_2;
 
diff --git a/drivers/ide/ide-ioctls.c b/drivers/ide/ide-ioctls.c
index c1c25eb..5991b23 100644
--- a/drivers/ide/ide-ioctls.c
+++ b/drivers/ide/ide-ioctls.c
@@ -231,7 +231,6 @@
 	rq->cmd_type = REQ_TYPE_SPECIAL;
 	rq->cmd_len = 1;
 	rq->cmd[0] = REQ_DRIVE_RESET;
-	rq->cmd_flags |= REQ_SOFTBARRIER;
 	if (blk_execute_rq(drive->queue, NULL, rq, 1))
 		ret = rq->errors;
 	blk_put_request(rq);
diff --git a/drivers/ide/ide-park.c b/drivers/ide/ide-park.c
index 310d03f..a914023 100644
--- a/drivers/ide/ide-park.c
+++ b/drivers/ide/ide-park.c
@@ -24,11 +24,8 @@
 			start_queue = 1;
 		spin_unlock_irq(&hwif->lock);
 
-		if (start_queue) {
-			spin_lock_irq(q->queue_lock);
-			blk_start_queueing(q);
-			spin_unlock_irq(q->queue_lock);
-		}
+		if (start_queue)
+			blk_run_queue(q);
 		return;
 	}
 	spin_unlock_irq(&hwif->lock);
diff --git a/drivers/ide/ide-pm.c b/drivers/ide/ide-pm.c
index 0d8a151..ba1488b 100644
--- a/drivers/ide/ide-pm.c
+++ b/drivers/ide/ide-pm.c
@@ -7,7 +7,6 @@
 	ide_hwif_t *hwif = drive->hwif;
 	struct request *rq;
 	struct request_pm_state rqpm;
-	struct ide_cmd cmd;
 	int ret;
 
 	/* call ACPI _GTM only once */
@@ -15,11 +14,9 @@
 		ide_acpi_get_timing(hwif);
 
 	memset(&rqpm, 0, sizeof(rqpm));
-	memset(&cmd, 0, sizeof(cmd));
 	rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
 	rq->cmd_type = REQ_TYPE_PM_SUSPEND;
-	rq->special = &cmd;
-	rq->data = &rqpm;
+	rq->special = &rqpm;
 	rqpm.pm_step = IDE_PM_START_SUSPEND;
 	if (mesg.event == PM_EVENT_PRETHAW)
 		mesg.event = PM_EVENT_FREEZE;
@@ -41,7 +38,6 @@
 	ide_hwif_t *hwif = drive->hwif;
 	struct request *rq;
 	struct request_pm_state rqpm;
-	struct ide_cmd cmd;
 	int err;
 
 	/* call ACPI _PS0 / _STM only once */
@@ -53,12 +49,10 @@
 	ide_acpi_exec_tfs(drive);
 
 	memset(&rqpm, 0, sizeof(rqpm));
-	memset(&cmd, 0, sizeof(cmd));
 	rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
 	rq->cmd_type = REQ_TYPE_PM_RESUME;
 	rq->cmd_flags |= REQ_PREEMPT;
-	rq->special = &cmd;
-	rq->data = &rqpm;
+	rq->special = &rqpm;
 	rqpm.pm_step = IDE_PM_START_RESUME;
 	rqpm.pm_state = PM_EVENT_ON;
 
@@ -77,7 +71,7 @@
 
 void ide_complete_power_step(ide_drive_t *drive, struct request *rq)
 {
-	struct request_pm_state *pm = rq->data;
+	struct request_pm_state *pm = rq->special;
 
 #ifdef DEBUG_PM
 	printk(KERN_INFO "%s: complete_power_step(step: %d)\n",
@@ -107,10 +101,8 @@
 
 ide_startstop_t ide_start_power_step(ide_drive_t *drive, struct request *rq)
 {
-	struct request_pm_state *pm = rq->data;
-	struct ide_cmd *cmd = rq->special;
-
-	memset(cmd, 0, sizeof(*cmd));
+	struct request_pm_state *pm = rq->special;
+	struct ide_cmd cmd = { };
 
 	switch (pm->pm_step) {
 	case IDE_PM_FLUSH_CACHE:	/* Suspend step 1 (flush cache) */
@@ -123,12 +115,12 @@
 			return ide_stopped;
 		}
 		if (ata_id_flush_ext_enabled(drive->id))
-			cmd->tf.command = ATA_CMD_FLUSH_EXT;
+			cmd.tf.command = ATA_CMD_FLUSH_EXT;
 		else
-			cmd->tf.command = ATA_CMD_FLUSH;
+			cmd.tf.command = ATA_CMD_FLUSH;
 		goto out_do_tf;
 	case IDE_PM_STANDBY:		/* Suspend step 2 (standby) */
-		cmd->tf.command = ATA_CMD_STANDBYNOW1;
+		cmd.tf.command = ATA_CMD_STANDBYNOW1;
 		goto out_do_tf;
 	case IDE_PM_RESTORE_PIO:	/* Resume step 1 (restore PIO) */
 		ide_set_max_pio(drive);
@@ -141,7 +133,7 @@
 			ide_complete_power_step(drive, rq);
 		return ide_stopped;
 	case IDE_PM_IDLE:		/* Resume step 2 (idle) */
-		cmd->tf.command = ATA_CMD_IDLEIMMEDIATE;
+		cmd.tf.command = ATA_CMD_IDLEIMMEDIATE;
 		goto out_do_tf;
 	case IDE_PM_RESTORE_DMA:	/* Resume step 3 (restore DMA) */
 		/*
@@ -163,11 +155,11 @@
 	return ide_stopped;
 
 out_do_tf:
-	cmd->valid.out.tf = IDE_VALID_OUT_TF | IDE_VALID_DEVICE;
-	cmd->valid.in.tf  = IDE_VALID_IN_TF  | IDE_VALID_DEVICE;
-	cmd->protocol = ATA_PROT_NODATA;
+	cmd.valid.out.tf = IDE_VALID_OUT_TF | IDE_VALID_DEVICE;
+	cmd.valid.in.tf  = IDE_VALID_IN_TF  | IDE_VALID_DEVICE;
+	cmd.protocol = ATA_PROT_NODATA;
 
-	return do_rw_taskfile(drive, cmd);
+	return do_rw_taskfile(drive, &cmd);
 }
 
 /**
@@ -181,7 +173,7 @@
 void ide_complete_pm_rq(ide_drive_t *drive, struct request *rq)
 {
 	struct request_queue *q = drive->queue;
-	struct request_pm_state *pm = rq->data;
+	struct request_pm_state *pm = rq->special;
 	unsigned long flags;
 
 	ide_complete_power_step(drive, rq);
@@ -207,7 +199,7 @@
 
 void ide_check_pm_state(ide_drive_t *drive, struct request *rq)
 {
-	struct request_pm_state *pm = rq->data;
+	struct request_pm_state *pm = rq->special;
 
 	if (blk_pm_suspend_request(rq) &&
 	    pm->pm_step == IDE_PM_START_SUSPEND)
diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index 3a53e08..203bbea 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -131,13 +131,6 @@
 	IDETAPE_DIR_WRITE = (1 << 2),
 };
 
-struct idetape_bh {
-	u32 b_size;
-	atomic_t b_count;
-	struct idetape_bh *b_reqnext;
-	char *b_data;
-};
-
 /* Tape door status */
 #define DOOR_UNLOCKED			0
 #define DOOR_LOCKED			1
@@ -219,18 +212,12 @@
 
 	/* Data buffer size chosen based on the tape's recommendation */
 	int buffer_size;
-	/* merge buffer */
-	struct idetape_bh *merge_bh;
-	/* size of the merge buffer */
-	int merge_bh_size;
-	/* pointer to current buffer head within the merge buffer */
-	struct idetape_bh *bh;
-	char *b_data;
-	int b_count;
-
-	int pages_per_buffer;
-	/* Wasted space in each stage */
-	int excess_bh_size;
+	/* Staging buffer of buffer_size bytes */
+	void *buf;
+	/* The read/write cursor */
+	void *cur;
+	/* The number of valid bytes in buf */
+	size_t valid;
 
 	/* Measures average tape speed */
 	unsigned long avg_time;
@@ -297,84 +284,6 @@
 	return tape;
 }
 
-static int idetape_input_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc,
-				  unsigned int bcount)
-{
-	struct idetape_bh *bh = pc->bh;
-	int count;
-
-	while (bcount) {
-		if (bh == NULL)
-			break;
-		count = min(
-			(unsigned int)(bh->b_size - atomic_read(&bh->b_count)),
-			bcount);
-		drive->hwif->tp_ops->input_data(drive, NULL, bh->b_data +
-					atomic_read(&bh->b_count), count);
-		bcount -= count;
-		atomic_add(count, &bh->b_count);
-		if (atomic_read(&bh->b_count) == bh->b_size) {
-			bh = bh->b_reqnext;
-			if (bh)
-				atomic_set(&bh->b_count, 0);
-		}
-	}
-
-	pc->bh = bh;
-
-	return bcount;
-}
-
-static int idetape_output_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc,
-				   unsigned int bcount)
-{
-	struct idetape_bh *bh = pc->bh;
-	int count;
-
-	while (bcount) {
-		if (bh == NULL)
-			break;
-		count = min((unsigned int)pc->b_count, (unsigned int)bcount);
-		drive->hwif->tp_ops->output_data(drive, NULL, pc->b_data, count);
-		bcount -= count;
-		pc->b_data += count;
-		pc->b_count -= count;
-		if (!pc->b_count) {
-			bh = bh->b_reqnext;
-			pc->bh = bh;
-			if (bh) {
-				pc->b_data = bh->b_data;
-				pc->b_count = atomic_read(&bh->b_count);
-			}
-		}
-	}
-
-	return bcount;
-}
-
-static void idetape_update_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc)
-{
-	struct idetape_bh *bh = pc->bh;
-	int count;
-	unsigned int bcount = pc->xferred;
-
-	if (pc->flags & PC_FLAG_WRITING)
-		return;
-	while (bcount) {
-		if (bh == NULL) {
-			printk(KERN_ERR "ide-tape: bh == NULL in %s\n",
-					__func__);
-			return;
-		}
-		count = min((unsigned int)bh->b_size, (unsigned int)bcount);
-		atomic_set(&bh->b_count, count);
-		if (atomic_read(&bh->b_count) == bh->b_size)
-			bh = bh->b_reqnext;
-		bcount -= count;
-	}
-	pc->bh = bh;
-}
-
 /*
  * called on each failed packet command retry to analyze the request sense. We
  * currently do not utilize this information.
@@ -392,12 +301,10 @@
 		 pc->c[0], tape->sense_key, tape->asc, tape->ascq);
 
 	/* Correct pc->xferred by asking the tape.	 */
-	if (pc->flags & PC_FLAG_DMA_ERROR) {
+	if (pc->flags & PC_FLAG_DMA_ERROR)
 		pc->xferred = pc->req_xfer -
 			tape->blk_size *
 			get_unaligned_be32(&sense[3]);
-		idetape_update_buffers(drive, pc);
-	}
 
 	/*
 	 * If error was the result of a zero-length read or write command,
@@ -436,29 +343,6 @@
 	}
 }
 
-/* Free data buffers completely. */
-static void ide_tape_kfree_buffer(idetape_tape_t *tape)
-{
-	struct idetape_bh *prev_bh, *bh = tape->merge_bh;
-
-	while (bh) {
-		u32 size = bh->b_size;
-
-		while (size) {
-			unsigned int order = fls(size >> PAGE_SHIFT)-1;
-
-			if (bh->b_data)
-				free_pages((unsigned long)bh->b_data, order);
-
-			size &= (order-1);
-			bh->b_data += (1 << order) * PAGE_SIZE;
-		}
-		prev_bh = bh;
-		bh = bh->b_reqnext;
-		kfree(prev_bh);
-	}
-}
-
 static void ide_tape_handle_dsc(ide_drive_t *);
 
 static int ide_tape_callback(ide_drive_t *drive, int dsc)
@@ -496,7 +380,7 @@
 		}
 
 		tape->first_frame += blocks;
-		rq->current_nr_sectors -= blocks;
+		rq->data_len -= blocks * tape->blk_size;
 
 		if (pc->error) {
 			uptodate = 0;
@@ -558,19 +442,6 @@
 	idetape_postpone_request(drive);
 }
 
-static int ide_tape_io_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc,
-				unsigned int bcount, int write)
-{
-	unsigned int bleft;
-
-	if (write)
-		bleft = idetape_output_buffers(drive, pc, bcount);
-	else
-		bleft = idetape_input_buffers(drive, pc, bcount);
-
-	return bcount - bleft;
-}
-
 /*
  * Packet Command Interface
  *
@@ -622,6 +493,8 @@
 
 	if (pc->retries > IDETAPE_MAX_PC_RETRIES ||
 		(pc->flags & PC_FLAG_ABORT)) {
+		unsigned int done = blk_rq_bytes(drive->hwif->rq);
+
 		/*
 		 * We will "abort" retrying a packet command in case legitimate
 		 * error code was received (crossing a filemark, or end of the
@@ -641,8 +514,10 @@
 			/* Giving up */
 			pc->error = IDE_DRV_ERROR_GENERAL;
 		}
+
 		drive->failed_pc = NULL;
 		drive->pc_callback(drive, 0);
+		ide_complete_rq(drive, -EIO, done);
 		return ide_stopped;
 	}
 	debug_log(DBG_SENSE, "Retry #%d, cmd = %02X\n", pc->retries, pc->c[0]);
@@ -695,7 +570,7 @@
 				printk(KERN_ERR "ide-tape: %s: I/O error, ",
 						tape->name);
 			/* Retry operation */
-			ide_retry_pc(drive, tape->disk);
+			ide_retry_pc(drive);
 			return ide_stopped;
 		}
 		pc->error = 0;
@@ -711,27 +586,22 @@
 				   struct ide_atapi_pc *pc, struct request *rq,
 				   u8 opcode)
 {
-	struct idetape_bh *bh = (struct idetape_bh *)rq->special;
-	unsigned int length = rq->current_nr_sectors;
+	unsigned int length = rq->nr_sectors;
 
 	ide_init_pc(pc);
 	put_unaligned(cpu_to_be32(length), (unsigned int *) &pc->c[1]);
 	pc->c[1] = 1;
-	pc->bh = bh;
 	pc->buf = NULL;
 	pc->buf_size = length * tape->blk_size;
 	pc->req_xfer = pc->buf_size;
 	if (pc->req_xfer == tape->buffer_size)
 		pc->flags |= PC_FLAG_DMA_OK;
 
-	if (opcode == READ_6) {
+	if (opcode == READ_6)
 		pc->c[0] = READ_6;
-		atomic_set(&bh->b_count, 0);
-	} else if (opcode == WRITE_6) {
+	else if (opcode == WRITE_6) {
 		pc->c[0] = WRITE_6;
 		pc->flags |= PC_FLAG_WRITING;
-		pc->b_data = bh->b_data;
-		pc->b_count = atomic_read(&bh->b_count);
 	}
 
 	memcpy(rq->cmd, pc->c, 12);
@@ -747,12 +617,10 @@
 	struct ide_cmd cmd;
 	u8 stat;
 
-	debug_log(DBG_SENSE, "sector: %llu, nr_sectors: %lu,"
-			" current_nr_sectors: %u\n",
-			(unsigned long long)rq->sector, rq->nr_sectors,
-			rq->current_nr_sectors);
+	debug_log(DBG_SENSE, "sector: %llu, nr_sectors: %lu\n",
+		  (unsigned long long)rq->sector, rq->nr_sectors);
 
-	if (!blk_special_request(rq)) {
+	if (!(blk_special_request(rq) || blk_sense_request(rq))) {
 		/* We do not support buffer cache originated requests. */
 		printk(KERN_NOTICE "ide-tape: %s: Unsupported request in "
 			"request queue (%d)\n", drive->name, rq->cmd_type);
@@ -828,7 +696,7 @@
 		goto out;
 	}
 	if (rq->cmd[13] & REQ_IDETAPE_PC1) {
-		pc = (struct ide_atapi_pc *) rq->buffer;
+		pc = (struct ide_atapi_pc *)rq->special;
 		rq->cmd[13] &= ~(REQ_IDETAPE_PC1);
 		rq->cmd[13] |= REQ_IDETAPE_PC2;
 		goto out;
@@ -840,6 +708,9 @@
 	BUG();
 
 out:
+	/* prepare sense request for this command */
+	ide_prep_sense(drive, rq);
+
 	memset(&cmd, 0, sizeof(cmd));
 
 	if (rq_data_dir(rq))
@@ -847,170 +718,13 @@
 
 	cmd.rq = rq;
 
+	ide_init_sg_cmd(&cmd, pc->req_xfer);
+	ide_map_sg(drive, &cmd);
+
 	return ide_tape_issue_pc(drive, &cmd, pc);
 }
 
 /*
- * The function below uses __get_free_pages to allocate a data buffer of size
- * tape->buffer_size (or a bit more). We attempt to combine sequential pages as
- * much as possible.
- *
- * It returns a pointer to the newly allocated buffer, or NULL in case of
- * failure.
- */
-static struct idetape_bh *ide_tape_kmalloc_buffer(idetape_tape_t *tape,
-						  int full, int clear)
-{
-	struct idetape_bh *prev_bh, *bh, *merge_bh;
-	int pages = tape->pages_per_buffer;
-	unsigned int order, b_allocd;
-	char *b_data = NULL;
-
-	merge_bh = kmalloc(sizeof(struct idetape_bh), GFP_KERNEL);
-	bh = merge_bh;
-	if (bh == NULL)
-		goto abort;
-
-	order = fls(pages) - 1;
-	bh->b_data = (char *) __get_free_pages(GFP_KERNEL, order);
-	if (!bh->b_data)
-		goto abort;
-	b_allocd = (1 << order) * PAGE_SIZE;
-	pages &= (order-1);
-
-	if (clear)
-		memset(bh->b_data, 0, b_allocd);
-	bh->b_reqnext = NULL;
-	bh->b_size = b_allocd;
-	atomic_set(&bh->b_count, full ? bh->b_size : 0);
-
-	while (pages) {
-		order = fls(pages) - 1;
-		b_data = (char *) __get_free_pages(GFP_KERNEL, order);
-		if (!b_data)
-			goto abort;
-		b_allocd = (1 << order) * PAGE_SIZE;
-
-		if (clear)
-			memset(b_data, 0, b_allocd);
-
-		/* newly allocated page frames below buffer header or ...*/
-		if (bh->b_data == b_data + b_allocd) {
-			bh->b_size += b_allocd;
-			bh->b_data -= b_allocd;
-			if (full)
-				atomic_add(b_allocd, &bh->b_count);
-			continue;
-		}
-		/* they are above the header */
-		if (b_data == bh->b_data + bh->b_size) {
-			bh->b_size += b_allocd;
-			if (full)
-				atomic_add(b_allocd, &bh->b_count);
-			continue;
-		}
-		prev_bh = bh;
-		bh = kmalloc(sizeof(struct idetape_bh), GFP_KERNEL);
-		if (!bh) {
-			free_pages((unsigned long) b_data, order);
-			goto abort;
-		}
-		bh->b_reqnext = NULL;
-		bh->b_data = b_data;
-		bh->b_size = b_allocd;
-		atomic_set(&bh->b_count, full ? bh->b_size : 0);
-		prev_bh->b_reqnext = bh;
-
-		pages &= (order-1);
-	}
-
-	bh->b_size -= tape->excess_bh_size;
-	if (full)
-		atomic_sub(tape->excess_bh_size, &bh->b_count);
-	return merge_bh;
-abort:
-	ide_tape_kfree_buffer(tape);
-	return NULL;
-}
-
-static int idetape_copy_stage_from_user(idetape_tape_t *tape,
-					const char __user *buf, int n)
-{
-	struct idetape_bh *bh = tape->bh;
-	int count;
-	int ret = 0;
-
-	while (n) {
-		if (bh == NULL) {
-			printk(KERN_ERR "ide-tape: bh == NULL in %s\n",
-					__func__);
-			return 1;
-		}
-		count = min((unsigned int)
-				(bh->b_size - atomic_read(&bh->b_count)),
-				(unsigned int)n);
-		if (copy_from_user(bh->b_data + atomic_read(&bh->b_count), buf,
-				count))
-			ret = 1;
-		n -= count;
-		atomic_add(count, &bh->b_count);
-		buf += count;
-		if (atomic_read(&bh->b_count) == bh->b_size) {
-			bh = bh->b_reqnext;
-			if (bh)
-				atomic_set(&bh->b_count, 0);
-		}
-	}
-	tape->bh = bh;
-	return ret;
-}
-
-static int idetape_copy_stage_to_user(idetape_tape_t *tape, char __user *buf,
-				      int n)
-{
-	struct idetape_bh *bh = tape->bh;
-	int count;
-	int ret = 0;
-
-	while (n) {
-		if (bh == NULL) {
-			printk(KERN_ERR "ide-tape: bh == NULL in %s\n",
-					__func__);
-			return 1;
-		}
-		count = min(tape->b_count, n);
-		if  (copy_to_user(buf, tape->b_data, count))
-			ret = 1;
-		n -= count;
-		tape->b_data += count;
-		tape->b_count -= count;
-		buf += count;
-		if (!tape->b_count) {
-			bh = bh->b_reqnext;
-			tape->bh = bh;
-			if (bh) {
-				tape->b_data = bh->b_data;
-				tape->b_count = atomic_read(&bh->b_count);
-			}
-		}
-	}
-	return ret;
-}
-
-static void idetape_init_merge_buffer(idetape_tape_t *tape)
-{
-	struct idetape_bh *bh = tape->merge_bh;
-	tape->bh = tape->merge_bh;
-
-	if (tape->chrdev_dir == IDETAPE_DIR_WRITE)
-		atomic_set(&bh->b_count, 0);
-	else {
-		tape->b_data = bh->b_data;
-		tape->b_count = atomic_read(&bh->b_count);
-	}
-}
-
-/*
  * Write a filemark if write_filemark=1. Flush the device buffers without
  * writing a filemark otherwise.
  */
@@ -1107,10 +821,10 @@
 		return;
 
 	clear_bit(IDE_AFLAG_FILEMARK, &drive->atapi_flags);
-	tape->merge_bh_size = 0;
-	if (tape->merge_bh != NULL) {
-		ide_tape_kfree_buffer(tape);
-		tape->merge_bh = NULL;
+	tape->valid = 0;
+	if (tape->buf != NULL) {
+		kfree(tape->buf);
+		tape->buf = NULL;
 	}
 
 	tape->chrdev_dir = IDETAPE_DIR_NONE;
@@ -1164,36 +878,44 @@
  * Generate a read/write request for the block device interface and wait for it
  * to be serviced.
  */
-static int idetape_queue_rw_tail(ide_drive_t *drive, int cmd, int blocks,
-				 struct idetape_bh *bh)
+static int idetape_queue_rw_tail(ide_drive_t *drive, int cmd, int size)
 {
 	idetape_tape_t *tape = drive->driver_data;
 	struct request *rq;
-	int ret, errors;
+	int ret;
 
 	debug_log(DBG_SENSE, "%s: cmd=%d\n", __func__, cmd);
+	BUG_ON(cmd != REQ_IDETAPE_READ && cmd != REQ_IDETAPE_WRITE);
+	BUG_ON(size < 0 || size % tape->blk_size);
 
 	rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
 	rq->cmd_type = REQ_TYPE_SPECIAL;
 	rq->cmd[13] = cmd;
 	rq->rq_disk = tape->disk;
-	rq->special = (void *)bh;
 	rq->sector = tape->first_frame;
-	rq->nr_sectors = blocks;
-	rq->current_nr_sectors = blocks;
+
+	if (size) {
+		ret = blk_rq_map_kern(drive->queue, rq, tape->buf, size,
+				      __GFP_WAIT);
+		if (ret)
+			goto out_put;
+	}
+
 	blk_execute_rq(drive->queue, tape->disk, rq, 0);
 
-	errors = rq->errors;
-	ret = tape->blk_size * (blocks - rq->current_nr_sectors);
+	/* calculate the number of transferred bytes and update buffer state */
+	size -= rq->data_len;
+	tape->cur = tape->buf;
+	if (cmd == REQ_IDETAPE_READ)
+		tape->valid = size;
+	else
+		tape->valid = 0;
+
+	ret = size;
+	if (rq->errors == IDE_DRV_ERROR_GENERAL)
+		ret = -EIO;
+out_put:
 	blk_put_request(rq);
-
-	if ((cmd & (REQ_IDETAPE_READ | REQ_IDETAPE_WRITE)) == 0)
-		return 0;
-
-	if (tape->merge_bh)
-		idetape_init_merge_buffer(tape);
-	if (errors == IDE_DRV_ERROR_GENERAL)
-		return -EIO;
 	return ret;
 }
 
@@ -1230,153 +952,87 @@
 	pc->flags |= PC_FLAG_WAIT_FOR_DSC;
 }
 
-/* Queue up a character device originated write request. */
-static int idetape_add_chrdev_write_request(ide_drive_t *drive, int blocks)
-{
-	idetape_tape_t *tape = drive->driver_data;
-
-	debug_log(DBG_CHRDEV, "Enter %s\n", __func__);
-
-	return idetape_queue_rw_tail(drive, REQ_IDETAPE_WRITE,
-				     blocks, tape->merge_bh);
-}
-
 static void ide_tape_flush_merge_buffer(ide_drive_t *drive)
 {
 	idetape_tape_t *tape = drive->driver_data;
-	int blocks, min;
-	struct idetape_bh *bh;
 
 	if (tape->chrdev_dir != IDETAPE_DIR_WRITE) {
 		printk(KERN_ERR "ide-tape: bug: Trying to empty merge buffer"
 				" but we are not writing.\n");
 		return;
 	}
-	if (tape->merge_bh_size > tape->buffer_size) {
-		printk(KERN_ERR "ide-tape: bug: merge_buffer too big\n");
-		tape->merge_bh_size = tape->buffer_size;
-	}
-	if (tape->merge_bh_size) {
-		blocks = tape->merge_bh_size / tape->blk_size;
-		if (tape->merge_bh_size % tape->blk_size) {
-			unsigned int i;
+	if (tape->buf) {
+		size_t aligned = roundup(tape->valid, tape->blk_size);
 
-			blocks++;
-			i = tape->blk_size - tape->merge_bh_size %
-				tape->blk_size;
-			bh = tape->bh->b_reqnext;
-			while (bh) {
-				atomic_set(&bh->b_count, 0);
-				bh = bh->b_reqnext;
-			}
-			bh = tape->bh;
-			while (i) {
-				if (bh == NULL) {
-					printk(KERN_INFO "ide-tape: bug,"
-							 " bh NULL\n");
-					break;
-				}
-				min = min(i, (unsigned int)(bh->b_size -
-						atomic_read(&bh->b_count)));
-				memset(bh->b_data + atomic_read(&bh->b_count),
-						0, min);
-				atomic_add(min, &bh->b_count);
-				i -= min;
-				bh = bh->b_reqnext;
-			}
-		}
-		(void) idetape_add_chrdev_write_request(drive, blocks);
-		tape->merge_bh_size = 0;
-	}
-	if (tape->merge_bh != NULL) {
-		ide_tape_kfree_buffer(tape);
-		tape->merge_bh = NULL;
+		memset(tape->cur, 0, aligned - tape->valid);
+		idetape_queue_rw_tail(drive, REQ_IDETAPE_WRITE, aligned);
+		kfree(tape->buf);
+		tape->buf = NULL;
 	}
 	tape->chrdev_dir = IDETAPE_DIR_NONE;
 }
 
-static int idetape_init_read(ide_drive_t *drive)
+static int idetape_init_rw(ide_drive_t *drive, int dir)
 {
 	idetape_tape_t *tape = drive->driver_data;
-	int bytes_read;
+	int rc;
 
-	/* Initialize read operation */
-	if (tape->chrdev_dir != IDETAPE_DIR_READ) {
-		if (tape->chrdev_dir == IDETAPE_DIR_WRITE) {
-			ide_tape_flush_merge_buffer(drive);
-			idetape_flush_tape_buffers(drive);
-		}
-		if (tape->merge_bh || tape->merge_bh_size) {
-			printk(KERN_ERR "ide-tape: merge_bh_size should be"
-					 " 0 now\n");
-			tape->merge_bh_size = 0;
-		}
-		tape->merge_bh = ide_tape_kmalloc_buffer(tape, 0, 0);
-		if (!tape->merge_bh)
-			return -ENOMEM;
-		tape->chrdev_dir = IDETAPE_DIR_READ;
+	BUG_ON(dir != IDETAPE_DIR_READ && dir != IDETAPE_DIR_WRITE);
 
-		/*
-		 * Issue a read 0 command to ensure that DSC handshake is
-		 * switched from completion mode to buffer available mode.
-		 * No point in issuing this if DSC overlap isn't supported, some
-		 * drives (Seagate STT3401A) will return an error.
-		 */
-		if (drive->dev_flags & IDE_DFLAG_DSC_OVERLAP) {
-			bytes_read = idetape_queue_rw_tail(drive,
-							REQ_IDETAPE_READ, 0,
-							tape->merge_bh);
-			if (bytes_read < 0) {
-				ide_tape_kfree_buffer(tape);
-				tape->merge_bh = NULL;
-				tape->chrdev_dir = IDETAPE_DIR_NONE;
-				return bytes_read;
-			}
+	if (tape->chrdev_dir == dir)
+		return 0;
+
+	if (tape->chrdev_dir == IDETAPE_DIR_READ)
+		ide_tape_discard_merge_buffer(drive, 1);
+	else if (tape->chrdev_dir == IDETAPE_DIR_WRITE) {
+		ide_tape_flush_merge_buffer(drive);
+		idetape_flush_tape_buffers(drive);
+	}
+
+	if (tape->buf || tape->valid) {
+		printk(KERN_ERR "ide-tape: valid should be 0 now\n");
+		tape->valid = 0;
+	}
+
+	tape->buf = kmalloc(tape->buffer_size, GFP_KERNEL);
+	if (!tape->buf)
+		return -ENOMEM;
+	tape->chrdev_dir = dir;
+	tape->cur = tape->buf;
+
+	/*
+	 * Issue a 0 rw command to ensure that DSC handshake is
+	 * switched from completion mode to buffer available mode.  No
+	 * point in issuing this if DSC overlap isn't supported, some
+	 * drives (Seagate STT3401A) will return an error.
+	 */
+	if (drive->dev_flags & IDE_DFLAG_DSC_OVERLAP) {
+		int cmd = dir == IDETAPE_DIR_READ ? REQ_IDETAPE_READ
+						  : REQ_IDETAPE_WRITE;
+
+		rc = idetape_queue_rw_tail(drive, cmd, 0);
+		if (rc < 0) {
+			kfree(tape->buf);
+			tape->buf = NULL;
+			tape->chrdev_dir = IDETAPE_DIR_NONE;
+			return rc;
 		}
 	}
 
 	return 0;
 }
 
-/* called from idetape_chrdev_read() to service a chrdev read request. */
-static int idetape_add_chrdev_read_request(ide_drive_t *drive, int blocks)
-{
-	idetape_tape_t *tape = drive->driver_data;
-
-	debug_log(DBG_PROCS, "Enter %s, %d blocks\n", __func__, blocks);
-
-	/* If we are at a filemark, return a read length of 0 */
-	if (test_bit(IDE_AFLAG_FILEMARK, &drive->atapi_flags))
-		return 0;
-
-	idetape_init_read(drive);
-
-	return idetape_queue_rw_tail(drive, REQ_IDETAPE_READ, blocks,
-				     tape->merge_bh);
-}
-
 static void idetape_pad_zeros(ide_drive_t *drive, int bcount)
 {
 	idetape_tape_t *tape = drive->driver_data;
-	struct idetape_bh *bh;
-	int blocks;
+
+	memset(tape->buf, 0, tape->buffer_size);
 
 	while (bcount) {
-		unsigned int count;
+		unsigned int count = min(tape->buffer_size, bcount);
 
-		bh = tape->merge_bh;
-		count = min(tape->buffer_size, bcount);
+		idetape_queue_rw_tail(drive, REQ_IDETAPE_WRITE, count);
 		bcount -= count;
-		blocks = count / tape->blk_size;
-		while (count) {
-			atomic_set(&bh->b_count,
-				   min(count, (unsigned int)bh->b_size));
-			memset(bh->b_data, 0, atomic_read(&bh->b_count));
-			count -= atomic_read(&bh->b_count);
-			bh = bh->b_reqnext;
-		}
-		idetape_queue_rw_tail(drive, REQ_IDETAPE_WRITE, blocks,
-				      tape->merge_bh);
 	}
 }
 
@@ -1456,7 +1112,7 @@
 	}
 
 	if (tape->chrdev_dir == IDETAPE_DIR_READ) {
-		tape->merge_bh_size = 0;
+		tape->valid = 0;
 		if (test_and_clear_bit(IDE_AFLAG_FILEMARK, &drive->atapi_flags))
 			++count;
 		ide_tape_discard_merge_buffer(drive, 0);
@@ -1505,9 +1161,9 @@
 {
 	struct ide_tape_obj *tape = file->private_data;
 	ide_drive_t *drive = tape->drive;
-	ssize_t bytes_read, temp, actually_read = 0, rc;
+	size_t done = 0;
 	ssize_t ret = 0;
-	u16 ctl = *(u16 *)&tape->caps[12];
+	int rc;
 
 	debug_log(DBG_CHRDEV, "Enter %s, count %Zd\n", __func__, count);
 
@@ -1517,49 +1173,43 @@
 			    (count % tape->blk_size) == 0)
 				tape->user_bs_factor = count / tape->blk_size;
 	}
-	rc = idetape_init_read(drive);
+
+	rc = idetape_init_rw(drive, IDETAPE_DIR_READ);
 	if (rc < 0)
 		return rc;
-	if (count == 0)
-		return (0);
-	if (tape->merge_bh_size) {
-		actually_read = min((unsigned int)(tape->merge_bh_size),
-				    (unsigned int)count);
-		if (idetape_copy_stage_to_user(tape, buf, actually_read))
+
+	while (done < count) {
+		size_t todo;
+
+		/* refill if staging buffer is empty */
+		if (!tape->valid) {
+			/* If we are at a filemark, nothing more to read */
+			if (test_bit(IDE_AFLAG_FILEMARK, &drive->atapi_flags))
+				break;
+			/* read */
+			if (idetape_queue_rw_tail(drive, REQ_IDETAPE_READ,
+						  tape->buffer_size) <= 0)
+				break;
+		}
+
+		/* copy out */
+		todo = min_t(size_t, count - done, tape->valid);
+		if (copy_to_user(buf + done, tape->cur, todo))
 			ret = -EFAULT;
-		buf += actually_read;
-		tape->merge_bh_size -= actually_read;
-		count -= actually_read;
+
+		tape->cur += todo;
+		tape->valid -= todo;
+		done += todo;
 	}
-	while (count >= tape->buffer_size) {
-		bytes_read = idetape_add_chrdev_read_request(drive, ctl);
-		if (bytes_read <= 0)
-			goto finish;
-		if (idetape_copy_stage_to_user(tape, buf, bytes_read))
-			ret = -EFAULT;
-		buf += bytes_read;
-		count -= bytes_read;
-		actually_read += bytes_read;
-	}
-	if (count) {
-		bytes_read = idetape_add_chrdev_read_request(drive, ctl);
-		if (bytes_read <= 0)
-			goto finish;
-		temp = min((unsigned long)count, (unsigned long)bytes_read);
-		if (idetape_copy_stage_to_user(tape, buf, temp))
-			ret = -EFAULT;
-		actually_read += temp;
-		tape->merge_bh_size = bytes_read-temp;
-	}
-finish:
-	if (!actually_read && test_bit(IDE_AFLAG_FILEMARK, &drive->atapi_flags)) {
+
+	if (!done && test_bit(IDE_AFLAG_FILEMARK, &drive->atapi_flags)) {
 		debug_log(DBG_SENSE, "%s: spacing over filemark\n", tape->name);
 
 		idetape_space_over_filemarks(drive, MTFSF, 1);
 		return 0;
 	}
 
-	return ret ? ret : actually_read;
+	return ret ? ret : done;
 }
 
 static ssize_t idetape_chrdev_write(struct file *file, const char __user *buf,
@@ -1567,9 +1217,9 @@
 {
 	struct ide_tape_obj *tape = file->private_data;
 	ide_drive_t *drive = tape->drive;
-	ssize_t actually_written = 0;
+	size_t done = 0;
 	ssize_t ret = 0;
-	u16 ctl = *(u16 *)&tape->caps[12];
+	int rc;
 
 	/* The drive is write protected. */
 	if (tape->write_prot)
@@ -1578,80 +1228,31 @@
 	debug_log(DBG_CHRDEV, "Enter %s, count %Zd\n", __func__, count);
 
 	/* Initialize write operation */
-	if (tape->chrdev_dir != IDETAPE_DIR_WRITE) {
-		if (tape->chrdev_dir == IDETAPE_DIR_READ)
-			ide_tape_discard_merge_buffer(drive, 1);
-		if (tape->merge_bh || tape->merge_bh_size) {
-			printk(KERN_ERR "ide-tape: merge_bh_size "
-				"should be 0 now\n");
-			tape->merge_bh_size = 0;
-		}
-		tape->merge_bh = ide_tape_kmalloc_buffer(tape, 0, 0);
-		if (!tape->merge_bh)
-			return -ENOMEM;
-		tape->chrdev_dir = IDETAPE_DIR_WRITE;
-		idetape_init_merge_buffer(tape);
+	rc = idetape_init_rw(drive, IDETAPE_DIR_WRITE);
+	if (rc < 0)
+		return rc;
 
-		/*
-		 * Issue a write 0 command to ensure that DSC handshake is
-		 * switched from completion mode to buffer available mode. No
-		 * point in issuing this if DSC overlap isn't supported, some
-		 * drives (Seagate STT3401A) will return an error.
-		 */
-		if (drive->dev_flags & IDE_DFLAG_DSC_OVERLAP) {
-			ssize_t retval = idetape_queue_rw_tail(drive,
-							REQ_IDETAPE_WRITE, 0,
-							tape->merge_bh);
-			if (retval < 0) {
-				ide_tape_kfree_buffer(tape);
-				tape->merge_bh = NULL;
-				tape->chrdev_dir = IDETAPE_DIR_NONE;
-				return retval;
-			}
-		}
-	}
-	if (count == 0)
-		return (0);
-	if (tape->merge_bh_size) {
-		if (tape->merge_bh_size >= tape->buffer_size) {
-			printk(KERN_ERR "ide-tape: bug: merge buf too big\n");
-			tape->merge_bh_size = 0;
-		}
-		actually_written = min((unsigned int)
-				(tape->buffer_size - tape->merge_bh_size),
-				(unsigned int)count);
-		if (idetape_copy_stage_from_user(tape, buf, actually_written))
-				ret = -EFAULT;
-		buf += actually_written;
-		tape->merge_bh_size += actually_written;
-		count -= actually_written;
+	while (done < count) {
+		size_t todo;
 
-		if (tape->merge_bh_size == tape->buffer_size) {
-			ssize_t retval;
-			tape->merge_bh_size = 0;
-			retval = idetape_add_chrdev_write_request(drive, ctl);
-			if (retval <= 0)
-				return (retval);
-		}
-	}
-	while (count >= tape->buffer_size) {
-		ssize_t retval;
-		if (idetape_copy_stage_from_user(tape, buf, tape->buffer_size))
+		/* flush if staging buffer is full */
+		if (tape->valid == tape->buffer_size &&
+		    idetape_queue_rw_tail(drive, REQ_IDETAPE_WRITE,
+					  tape->buffer_size) <= 0)
+			return rc;
+
+		/* copy in */
+		todo = min_t(size_t, count - done,
+			     tape->buffer_size - tape->valid);
+		if (copy_from_user(tape->cur, buf + done, todo))
 			ret = -EFAULT;
-		buf += tape->buffer_size;
-		count -= tape->buffer_size;
-		retval = idetape_add_chrdev_write_request(drive, ctl);
-		actually_written += tape->buffer_size;
-		if (retval <= 0)
-			return (retval);
+
+		tape->cur += todo;
+		tape->valid += todo;
+		done += todo;
 	}
-	if (count) {
-		actually_written += count;
-		if (idetape_copy_stage_from_user(tape, buf, count))
-			ret = -EFAULT;
-		tape->merge_bh_size += count;
-	}
-	return ret ? ret : actually_written;
+
+	return ret ? ret : done;
 }
 
 static int idetape_write_filemark(ide_drive_t *drive)
@@ -1812,7 +1413,7 @@
 		idetape_flush_tape_buffers(drive);
 	}
 	if (cmd == MTIOCGET || cmd == MTIOCPOS) {
-		block_offset = tape->merge_bh_size /
+		block_offset = tape->valid /
 			(tape->blk_size * tape->user_bs_factor);
 		position = idetape_read_position(drive);
 		if (position < 0)
@@ -1960,12 +1561,12 @@
 	idetape_tape_t *tape = drive->driver_data;
 
 	ide_tape_flush_merge_buffer(drive);
-	tape->merge_bh = ide_tape_kmalloc_buffer(tape, 1, 0);
-	if (tape->merge_bh != NULL) {
+	tape->buf = kmalloc(tape->buffer_size, GFP_KERNEL);
+	if (tape->buf != NULL) {
 		idetape_pad_zeros(drive, tape->blk_size *
 				(tape->user_bs_factor - 1));
-		ide_tape_kfree_buffer(tape);
-		tape->merge_bh = NULL;
+		kfree(tape->buf);
+		tape->buf = NULL;
 	}
 	idetape_write_filemark(drive);
 	idetape_flush_tape_buffers(drive);
@@ -2159,8 +1760,6 @@
 	u16 *ctl = (u16 *)&tape->caps[12];
 
 	drive->pc_callback	 = ide_tape_callback;
-	drive->pc_update_buffers = idetape_update_buffers;
-	drive->pc_io_buffers	 = ide_tape_io_buffers;
 
 	drive->dev_flags |= IDE_DFLAG_DSC_OVERLAP;
 
@@ -2191,11 +1790,6 @@
 		tape->buffer_size = *ctl * tape->blk_size;
 	}
 	buffer_size = tape->buffer_size;
-	tape->pages_per_buffer = buffer_size / PAGE_SIZE;
-	if (buffer_size % PAGE_SIZE) {
-		tape->pages_per_buffer++;
-		tape->excess_bh_size = PAGE_SIZE - buffer_size % PAGE_SIZE;
-	}
 
 	/* select the "best" DSC read/write polling freq */
 	speed = max(*(u16 *)&tape->caps[14], *(u16 *)&tape->caps[8]);
@@ -2238,7 +1832,7 @@
 	ide_drive_t *drive = tape->drive;
 	struct gendisk *g = tape->disk;
 
-	BUG_ON(tape->merge_bh_size);
+	BUG_ON(tape->valid);
 
 	drive->dev_flags &= ~IDE_DFLAG_DSC_OVERLAP;
 	drive->driver_data = NULL;
diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c
index 4aa6223..f400eb4 100644
--- a/drivers/ide/ide-taskfile.c
+++ b/drivers/ide/ide-taskfile.c
@@ -424,7 +424,9 @@
 
 	rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
 	rq->cmd_type = REQ_TYPE_ATA_TASKFILE;
-	rq->buffer = buf;
+
+	if (cmd->tf_flags & IDE_TFLAG_WRITE)
+		rq->cmd_flags |= REQ_RW;
 
 	/*
 	 * (ks) We transfer currently only whole sectors.
@@ -432,18 +434,20 @@
 	 * if we would find a solution to transfer any size.
 	 * To support special commands like READ LONG.
 	 */
-	rq->hard_nr_sectors = rq->nr_sectors = nsect;
-	rq->hard_cur_sectors = rq->current_nr_sectors = nsect;
-
-	if (cmd->tf_flags & IDE_TFLAG_WRITE)
-		rq->cmd_flags |= REQ_RW;
+	if (nsect) {
+		error = blk_rq_map_kern(drive->queue, rq, buf,
+					nsect * SECTOR_SIZE, __GFP_WAIT);
+		if (error)
+			goto put_req;
+	}
 
 	rq->special = cmd;
 	cmd->rq = rq;
 
 	error = blk_execute_rq(drive->queue, NULL, rq, 0);
-	blk_put_request(rq);
 
+put_req:
+	blk_put_request(rq);
 	return error;
 }
 
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 424f7b0..3fd8b1e 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -20,7 +20,8 @@
 #include <linux/idr.h>
 #include <linux/hdreg.h>
 #include <linux/blktrace_api.h>
-#include <trace/block.h>
+
+#include <trace/events/block.h>
 
 #define DM_MSG_PREFIX "core"
 
@@ -53,8 +54,6 @@
 	union map_info info;
 };
 
-DEFINE_TRACE(block_bio_complete);
-
 /*
  * For request-based dm.
  * One of these is allocated per request.
@@ -656,8 +655,7 @@
 		/* the bio has been remapped so dispatch it */
 
 		trace_block_remap(bdev_get_queue(clone->bi_bdev), clone,
-				    tio->io->bio->bi_bdev->bd_dev,
-				    clone->bi_sector, sector);
+				    tio->io->bio->bi_bdev->bd_dev, sector);
 
 		generic_make_request(clone);
 	} else if (r < 0 || r == DM_MAPIO_REQUEUE) {
diff --git a/drivers/parport/parport_pc.c b/drivers/parport/parport_pc.c
index 4e63cc9..151bf5b 100644
--- a/drivers/parport/parport_pc.c
+++ b/drivers/parport/parport_pc.c
@@ -1,5 +1,5 @@
 /* Low-level parallel-port routines for 8255-based PC-style hardware.
- * 
+ *
  * Authors: Phil Blundell <philb@gnu.org>
  *          Tim Waugh <tim@cyberelk.demon.co.uk>
  *	    Jose Renau <renau@acm.org>
@@ -11,7 +11,7 @@
  * Cleaned up include files - Russell King <linux@arm.uk.linux.org>
  * DMA support - Bert De Jonghe <bert@sophis.be>
  * Many ECP bugs fixed.  Fred Barnes & Jamie Lokier, 1999
- * More PCI support now conditional on CONFIG_PCI, 03/2001, Paul G. 
+ * More PCI support now conditional on CONFIG_PCI, 03/2001, Paul G.
  * Various hacks, Fred Barnes, 04/2001
  * Updated probing logic - Adam Belay <ambx1@neo.rr.com>
  */
@@ -56,10 +56,10 @@
 #include <linux/pnp.h>
 #include <linux/platform_device.h>
 #include <linux/sysctl.h>
+#include <linux/io.h>
+#include <linux/uaccess.h>
 
-#include <asm/io.h>
 #include <asm/dma.h>
-#include <asm/uaccess.h>
 
 #include <linux/parport.h>
 #include <linux/parport_pc.h>
@@ -82,7 +82,7 @@
 #define ECR_TST 06
 #define ECR_CNF 07
 #define ECR_MODE_MASK 0xe0
-#define ECR_WRITE(p,v) frob_econtrol((p),0xff,(v))
+#define ECR_WRITE(p, v) frob_econtrol((p), 0xff, (v))
 
 #undef DEBUG
 
@@ -109,27 +109,27 @@
 static int pnp_registered_parport;
 
 /* frob_control, but for ECR */
-static void frob_econtrol (struct parport *pb, unsigned char m,
+static void frob_econtrol(struct parport *pb, unsigned char m,
 			   unsigned char v)
 {
 	unsigned char ectr = 0;
 
 	if (m != 0xff)
-		ectr = inb (ECONTROL (pb));
+		ectr = inb(ECONTROL(pb));
 
-	DPRINTK (KERN_DEBUG "frob_econtrol(%02x,%02x): %02x -> %02x\n",
+	DPRINTK(KERN_DEBUG "frob_econtrol(%02x,%02x): %02x -> %02x\n",
 		m, v, ectr, (ectr & ~m) ^ v);
 
-	outb ((ectr & ~m) ^ v, ECONTROL (pb));
+	outb((ectr & ~m) ^ v, ECONTROL(pb));
 }
 
-static __inline__ void frob_set_mode (struct parport *p, int mode)
+static inline void frob_set_mode(struct parport *p, int mode)
 {
-	frob_econtrol (p, ECR_MODE_MASK, mode << 5);
+	frob_econtrol(p, ECR_MODE_MASK, mode << 5);
 }
 
 #ifdef CONFIG_PARPORT_PC_FIFO
-/* Safely change the mode bits in the ECR 
+/* Safely change the mode bits in the ECR
    Returns:
 	    0    : Success
 	   -EBUSY: Could not drain FIFO in some finite amount of time,
@@ -141,17 +141,18 @@
 	unsigned char oecr;
 	int mode;
 
-	DPRINTK(KERN_INFO "parport change_mode ECP-ISA to mode 0x%02x\n",m);
+	DPRINTK(KERN_INFO "parport change_mode ECP-ISA to mode 0x%02x\n", m);
 
 	if (!priv->ecr) {
-		printk (KERN_DEBUG "change_mode: but there's no ECR!\n");
+		printk(KERN_DEBUG "change_mode: but there's no ECR!\n");
 		return 0;
 	}
 
 	/* Bits <7:5> contain the mode. */
-	oecr = inb (ECONTROL (p));
+	oecr = inb(ECONTROL(p));
 	mode = (oecr >> 5) & 0x7;
-	if (mode == m) return 0;
+	if (mode == m)
+		return 0;
 
 	if (mode >= 2 && !(priv->ctr & 0x20)) {
 		/* This mode resets the FIFO, so we may
@@ -163,19 +164,21 @@
 		case ECR_ECP: /* ECP Parallel Port mode */
 			/* Busy wait for 200us */
 			for (counter = 0; counter < 40; counter++) {
-				if (inb (ECONTROL (p)) & 0x01)
+				if (inb(ECONTROL(p)) & 0x01)
 					break;
-				if (signal_pending (current)) break;
-				udelay (5);
+				if (signal_pending(current))
+					break;
+				udelay(5);
 			}
 
 			/* Poll slowly. */
-			while (!(inb (ECONTROL (p)) & 0x01)) {
-				if (time_after_eq (jiffies, expire))
+			while (!(inb(ECONTROL(p)) & 0x01)) {
+				if (time_after_eq(jiffies, expire))
 					/* The FIFO is stuck. */
 					return -EBUSY;
-				schedule_timeout_interruptible(msecs_to_jiffies(10));
-				if (signal_pending (current))
+				schedule_timeout_interruptible(
+							msecs_to_jiffies(10));
+				if (signal_pending(current))
 					break;
 			}
 		}
@@ -185,20 +188,20 @@
 		/* We have to go through mode 001 */
 		oecr &= ~(7 << 5);
 		oecr |= ECR_PS2 << 5;
-		ECR_WRITE (p, oecr);
+		ECR_WRITE(p, oecr);
 	}
 
 	/* Set the mode. */
 	oecr &= ~(7 << 5);
 	oecr |= m << 5;
-	ECR_WRITE (p, oecr);
+	ECR_WRITE(p, oecr);
 	return 0;
 }
 
 #ifdef CONFIG_PARPORT_1284
 /* Find FIFO lossage; FIFO is reset */
 #if 0
-static int get_fifo_residue (struct parport *p)
+static int get_fifo_residue(struct parport *p)
 {
 	int residue;
 	int cnfga;
@@ -206,26 +209,26 @@
 
 	/* Adjust for the contents of the FIFO. */
 	for (residue = priv->fifo_depth; ; residue--) {
-		if (inb (ECONTROL (p)) & 0x2)
+		if (inb(ECONTROL(p)) & 0x2)
 				/* Full up. */
 			break;
 
-		outb (0, FIFO (p));
+		outb(0, FIFO(p));
 	}
 
-	printk (KERN_DEBUG "%s: %d PWords were left in FIFO\n", p->name,
+	printk(KERN_DEBUG "%s: %d PWords were left in FIFO\n", p->name,
 		residue);
 
 	/* Reset the FIFO. */
-	frob_set_mode (p, ECR_PS2);
+	frob_set_mode(p, ECR_PS2);
 
 	/* Now change to config mode and clean up. FIXME */
-	frob_set_mode (p, ECR_CNF);
-	cnfga = inb (CONFIGA (p));
-	printk (KERN_DEBUG "%s: cnfgA contains 0x%02x\n", p->name, cnfga);
+	frob_set_mode(p, ECR_CNF);
+	cnfga = inb(CONFIGA(p));
+	printk(KERN_DEBUG "%s: cnfgA contains 0x%02x\n", p->name, cnfga);
 
 	if (!(cnfga & (1<<2))) {
-		printk (KERN_DEBUG "%s: Accounting for extra byte\n", p->name);
+		printk(KERN_DEBUG "%s: Accounting for extra byte\n", p->name);
 		residue++;
 	}
 
@@ -233,9 +236,11 @@
 	 * PWord != 1 byte. */
 
 	/* Back to PS2 mode. */
-	frob_set_mode (p, ECR_PS2);
+	frob_set_mode(p, ECR_PS2);
 
-	DPRINTK (KERN_DEBUG "*** get_fifo_residue: done residue collecting (ecr = 0x%2.2x)\n", inb (ECONTROL (p)));
+	DPRINTK(KERN_DEBUG
+	     "*** get_fifo_residue: done residue collecting (ecr = 0x%2.2x)\n",
+							inb(ECONTROL(p)));
 	return residue;
 }
 #endif  /*  0 */
@@ -257,8 +262,8 @@
 	/* To clear timeout some chips require double read */
 	parport_pc_read_status(pb);
 	r = parport_pc_read_status(pb);
-	outb (r | 0x01, STATUS (pb)); /* Some reset by writing 1 */
-	outb (r & 0xfe, STATUS (pb)); /* Others by writing 0 */
+	outb(r | 0x01, STATUS(pb)); /* Some reset by writing 1 */
+	outb(r & 0xfe, STATUS(pb)); /* Others by writing 0 */
 	r = parport_pc_read_status(pb);
 
 	return !(r & 0x01);
@@ -272,7 +277,8 @@
  * of these are in parport_pc.h.
  */
 
-static void parport_pc_init_state(struct pardevice *dev, struct parport_state *s)
+static void parport_pc_init_state(struct pardevice *dev,
+						struct parport_state *s)
 {
 	s->u.pc.ctr = 0xc;
 	if (dev->irq_func &&
@@ -289,22 +295,23 @@
 	const struct parport_pc_private *priv = p->physport->private_data;
 	s->u.pc.ctr = priv->ctr;
 	if (priv->ecr)
-		s->u.pc.ecr = inb (ECONTROL (p));
+		s->u.pc.ecr = inb(ECONTROL(p));
 }
 
-static void parport_pc_restore_state(struct parport *p, struct parport_state *s)
+static void parport_pc_restore_state(struct parport *p,
+						struct parport_state *s)
 {
 	struct parport_pc_private *priv = p->physport->private_data;
 	register unsigned char c = s->u.pc.ctr & priv->ctr_writable;
-	outb (c, CONTROL (p));
+	outb(c, CONTROL(p));
 	priv->ctr = c;
 	if (priv->ecr)
-		ECR_WRITE (p, s->u.pc.ecr);
+		ECR_WRITE(p, s->u.pc.ecr);
 }
 
 #ifdef CONFIG_PARPORT_1284
-static size_t parport_pc_epp_read_data (struct parport *port, void *buf,
-					size_t length, int flags)
+static size_t parport_pc_epp_read_data(struct parport *port, void *buf,
+				       size_t length, int flags)
 {
 	size_t got = 0;
 
@@ -316,54 +323,52 @@
 		 *  nFault is 0 if there is at least 1 byte in the Warp's FIFO
 		 *  pError is 1 if there are 16 bytes in the Warp's FIFO
 		 */
-		status = inb (STATUS (port));
+		status = inb(STATUS(port));
 
-		while (!(status & 0x08) && (got < length)) {
-			if ((left >= 16) && (status & 0x20) && !(status & 0x08)) {
+		while (!(status & 0x08) && got < length) {
+			if (left >= 16 && (status & 0x20) && !(status & 0x08)) {
 				/* can grab 16 bytes from warp fifo */
-				if (!((long)buf & 0x03)) {
-					insl (EPPDATA (port), buf, 4);
-				} else {
-					insb (EPPDATA (port), buf, 16);
-				}
+				if (!((long)buf & 0x03))
+					insl(EPPDATA(port), buf, 4);
+				else
+					insb(EPPDATA(port), buf, 16);
 				buf += 16;
 				got += 16;
 				left -= 16;
 			} else {
 				/* grab single byte from the warp fifo */
-				*((char *)buf) = inb (EPPDATA (port));
+				*((char *)buf) = inb(EPPDATA(port));
 				buf++;
 				got++;
 				left--;
 			}
-			status = inb (STATUS (port));
+			status = inb(STATUS(port));
 			if (status & 0x01) {
 				/* EPP timeout should never occur... */
-				printk (KERN_DEBUG "%s: EPP timeout occurred while talking to "
-					"w91284pic (should not have done)\n", port->name);
-				clear_epp_timeout (port);
+				printk(KERN_DEBUG
+"%s: EPP timeout occurred while talking to w91284pic (should not have done)\n", port->name);
+				clear_epp_timeout(port);
 			}
 		}
 		return got;
 	}
 	if ((flags & PARPORT_EPP_FAST) && (length > 1)) {
-		if (!(((long)buf | length) & 0x03)) {
-			insl (EPPDATA (port), buf, (length >> 2));
-		} else {
-			insb (EPPDATA (port), buf, length);
-		}
-		if (inb (STATUS (port)) & 0x01) {
-			clear_epp_timeout (port);
+		if (!(((long)buf | length) & 0x03))
+			insl(EPPDATA(port), buf, (length >> 2));
+		else
+			insb(EPPDATA(port), buf, length);
+		if (inb(STATUS(port)) & 0x01) {
+			clear_epp_timeout(port);
 			return -EIO;
 		}
 		return length;
 	}
 	for (; got < length; got++) {
-		*((char*)buf) = inb (EPPDATA(port));
+		*((char *)buf) = inb(EPPDATA(port));
 		buf++;
-		if (inb (STATUS (port)) & 0x01) {
+		if (inb(STATUS(port)) & 0x01) {
 			/* EPP timeout */
-			clear_epp_timeout (port);
+			clear_epp_timeout(port);
 			break;
 		}
 	}
@@ -371,28 +376,27 @@
 	return got;
 }
 
-static size_t parport_pc_epp_write_data (struct parport *port, const void *buf,
-					 size_t length, int flags)
+static size_t parport_pc_epp_write_data(struct parport *port, const void *buf,
+					size_t length, int flags)
 {
 	size_t written = 0;
 
 	if ((flags & PARPORT_EPP_FAST) && (length > 1)) {
-		if (!(((long)buf | length) & 0x03)) {
-			outsl (EPPDATA (port), buf, (length >> 2));
-		} else {
-			outsb (EPPDATA (port), buf, length);
-		}
-		if (inb (STATUS (port)) & 0x01) {
-			clear_epp_timeout (port);
+		if (!(((long)buf | length) & 0x03))
+			outsl(EPPDATA(port), buf, (length >> 2));
+		else
+			outsb(EPPDATA(port), buf, length);
+		if (inb(STATUS(port)) & 0x01) {
+			clear_epp_timeout(port);
 			return -EIO;
 		}
 		return length;
 	}
 	for (; written < length; written++) {
-		outb (*((char*)buf), EPPDATA(port));
+		outb(*((char *)buf), EPPDATA(port));
 		buf++;
-		if (inb (STATUS(port)) & 0x01) {
-			clear_epp_timeout (port);
+		if (inb(STATUS(port)) & 0x01) {
+			clear_epp_timeout(port);
 			break;
 		}
 	}
@@ -400,24 +404,24 @@
 	return written;
 }
 
-static size_t parport_pc_epp_read_addr (struct parport *port, void *buf,
+static size_t parport_pc_epp_read_addr(struct parport *port, void *buf,
 					size_t length, int flags)
 {
 	size_t got = 0;
 
 	if ((flags & PARPORT_EPP_FAST) && (length > 1)) {
-		insb (EPPADDR (port), buf, length);
-		if (inb (STATUS (port)) & 0x01) {
-			clear_epp_timeout (port);
+		insb(EPPADDR(port), buf, length);
+		if (inb(STATUS(port)) & 0x01) {
+			clear_epp_timeout(port);
 			return -EIO;
 		}
 		return length;
 	}
 	for (; got < length; got++) {
-		*((char*)buf) = inb (EPPADDR (port));
+		*((char *)buf) = inb(EPPADDR(port));
 		buf++;
-		if (inb (STATUS (port)) & 0x01) {
-			clear_epp_timeout (port);
+		if (inb(STATUS(port)) & 0x01) {
+			clear_epp_timeout(port);
 			break;
 		}
 	}
@@ -425,25 +429,25 @@
 	return got;
 }
 
-static size_t parport_pc_epp_write_addr (struct parport *port,
+static size_t parport_pc_epp_write_addr(struct parport *port,
 					 const void *buf, size_t length,
 					 int flags)
 {
 	size_t written = 0;
 
 	if ((flags & PARPORT_EPP_FAST) && (length > 1)) {
-		outsb (EPPADDR (port), buf, length);
-		if (inb (STATUS (port)) & 0x01) {
-			clear_epp_timeout (port);
+		outsb(EPPADDR(port), buf, length);
+		if (inb(STATUS(port)) & 0x01) {
+			clear_epp_timeout(port);
 			return -EIO;
 		}
 		return length;
 	}
 	for (; written < length; written++) {
-		outb (*((char*)buf), EPPADDR (port));
+		outb(*((char *)buf), EPPADDR(port));
 		buf++;
-		if (inb (STATUS (port)) & 0x01) {
-			clear_epp_timeout (port);
+		if (inb(STATUS(port)) & 0x01) {
+			clear_epp_timeout(port);
 			break;
 		}
 	}
@@ -451,74 +455,74 @@
 	return written;
 }
 
-static size_t parport_pc_ecpepp_read_data (struct parport *port, void *buf,
-					   size_t length, int flags)
+static size_t parport_pc_ecpepp_read_data(struct parport *port, void *buf,
+					  size_t length, int flags)
 {
 	size_t got;
 
-	frob_set_mode (port, ECR_EPP);
-	parport_pc_data_reverse (port);
-	parport_pc_write_control (port, 0x4);
-	got = parport_pc_epp_read_data (port, buf, length, flags);
-	frob_set_mode (port, ECR_PS2);
+	frob_set_mode(port, ECR_EPP);
+	parport_pc_data_reverse(port);
+	parport_pc_write_control(port, 0x4);
+	got = parport_pc_epp_read_data(port, buf, length, flags);
+	frob_set_mode(port, ECR_PS2);
 
 	return got;
 }
 
-static size_t parport_pc_ecpepp_write_data (struct parport *port,
-					    const void *buf, size_t length,
-					    int flags)
+static size_t parport_pc_ecpepp_write_data(struct parport *port,
+					   const void *buf, size_t length,
+					   int flags)
 {
 	size_t written;
 
-	frob_set_mode (port, ECR_EPP);
-	parport_pc_write_control (port, 0x4);
-	parport_pc_data_forward (port);
-	written = parport_pc_epp_write_data (port, buf, length, flags);
-	frob_set_mode (port, ECR_PS2);
+	frob_set_mode(port, ECR_EPP);
+	parport_pc_write_control(port, 0x4);
+	parport_pc_data_forward(port);
+	written = parport_pc_epp_write_data(port, buf, length, flags);
+	frob_set_mode(port, ECR_PS2);
 
 	return written;
 }
 
-static size_t parport_pc_ecpepp_read_addr (struct parport *port, void *buf,
-					   size_t length, int flags)
+static size_t parport_pc_ecpepp_read_addr(struct parport *port, void *buf,
+					  size_t length, int flags)
 {
 	size_t got;
 
-	frob_set_mode (port, ECR_EPP);
-	parport_pc_data_reverse (port);
-	parport_pc_write_control (port, 0x4);
-	got = parport_pc_epp_read_addr (port, buf, length, flags);
-	frob_set_mode (port, ECR_PS2);
+	frob_set_mode(port, ECR_EPP);
+	parport_pc_data_reverse(port);
+	parport_pc_write_control(port, 0x4);
+	got = parport_pc_epp_read_addr(port, buf, length, flags);
+	frob_set_mode(port, ECR_PS2);
 
 	return got;
 }
 
-static size_t parport_pc_ecpepp_write_addr (struct parport *port,
+static size_t parport_pc_ecpepp_write_addr(struct parport *port,
 					    const void *buf, size_t length,
 					    int flags)
 {
 	size_t written;
 
-	frob_set_mode (port, ECR_EPP);
-	parport_pc_write_control (port, 0x4);
-	parport_pc_data_forward (port);
-	written = parport_pc_epp_write_addr (port, buf, length, flags);
-	frob_set_mode (port, ECR_PS2);
+	frob_set_mode(port, ECR_EPP);
+	parport_pc_write_control(port, 0x4);
+	parport_pc_data_forward(port);
+	written = parport_pc_epp_write_addr(port, buf, length, flags);
+	frob_set_mode(port, ECR_PS2);
 
 	return written;
 }
 #endif /* IEEE 1284 support */
 
 #ifdef CONFIG_PARPORT_PC_FIFO
-static size_t parport_pc_fifo_write_block_pio (struct parport *port,
+static size_t parport_pc_fifo_write_block_pio(struct parport *port,
 					       const void *buf, size_t length)
 {
 	int ret = 0;
 	const unsigned char *bufp = buf;
 	size_t left = length;
 	unsigned long expire = jiffies + port->physport->cad->timeout;
-	const int fifo = FIFO (port);
+	const int fifo = FIFO(port);
 	int poll_for = 8; /* 80 usecs */
 	const struct parport_pc_private *priv = port->physport->private_data;
 	const int fifo_depth = priv->fifo_depth;
@@ -526,25 +530,25 @@
 	port = port->physport;
 
 	/* We don't want to be interrupted every character. */
-	parport_pc_disable_irq (port);
+	parport_pc_disable_irq(port);
 	/* set nErrIntrEn and serviceIntr */
-	frob_econtrol (port, (1<<4) | (1<<2), (1<<4) | (1<<2));
+	frob_econtrol(port, (1<<4) | (1<<2), (1<<4) | (1<<2));
 
 	/* Forward mode. */
-	parport_pc_data_forward (port); /* Must be in PS2 mode */
+	parport_pc_data_forward(port); /* Must be in PS2 mode */
 
 	while (left) {
 		unsigned char byte;
-		unsigned char ecrval = inb (ECONTROL (port));
+		unsigned char ecrval = inb(ECONTROL(port));
 		int i = 0;
 
-		if (need_resched() && time_before (jiffies, expire))
+		if (need_resched() && time_before(jiffies, expire))
 			/* Can't yield the port. */
-			schedule ();
+			schedule();
 
 		/* Anyone else waiting for the port? */
 		if (port->waithead) {
-			printk (KERN_DEBUG "Somebody wants the port\n");
+			printk(KERN_DEBUG "Somebody wants the port\n");
 			break;
 		}
 
@@ -552,21 +556,22 @@
 			/* FIFO is full. Wait for interrupt. */
 
 			/* Clear serviceIntr */
-			ECR_WRITE (port, ecrval & ~(1<<2));
-		false_alarm:
-			ret = parport_wait_event (port, HZ);
-			if (ret < 0) break;
+			ECR_WRITE(port, ecrval & ~(1<<2));
+false_alarm:
+			ret = parport_wait_event(port, HZ);
+			if (ret < 0)
+				break;
 			ret = 0;
-			if (!time_before (jiffies, expire)) {
+			if (!time_before(jiffies, expire)) {
 				/* Timed out. */
-				printk (KERN_DEBUG "FIFO write timed out\n");
+				printk(KERN_DEBUG "FIFO write timed out\n");
 				break;
 			}
-			ecrval = inb (ECONTROL (port));
+			ecrval = inb(ECONTROL(port));
 			if (!(ecrval & (1<<2))) {
 				if (need_resched() &&
-				    time_before (jiffies, expire))
-					schedule ();
+				    time_before(jiffies, expire))
+					schedule();
 
 				goto false_alarm;
 			}
@@ -577,38 +582,38 @@
 		/* Can't fail now. */
 		expire = jiffies + port->cad->timeout;
 
-	poll:
-		if (signal_pending (current))
+poll:
+		if (signal_pending(current))
 			break;
 
 		if (ecrval & 0x01) {
 			/* FIFO is empty. Blast it full. */
 			const int n = left < fifo_depth ? left : fifo_depth;
-			outsb (fifo, bufp, n);
+			outsb(fifo, bufp, n);
 			bufp += n;
 			left -= n;
 
 			/* Adjust the poll time. */
-			if (i < (poll_for - 2)) poll_for--;
+			if (i < (poll_for - 2))
+				poll_for--;
 			continue;
 		} else if (i++ < poll_for) {
-			udelay (10);
-			ecrval = inb (ECONTROL (port));
+			udelay(10);
+			ecrval = inb(ECONTROL(port));
 			goto poll;
 		}
 
-		/* Half-full (call me an optimist) */
+		/* Half-full(call me an optimist) */
 		byte = *bufp++;
-		outb (byte, fifo);
+		outb(byte, fifo);
 		left--;
-        }
-
-dump_parport_state ("leave fifo_write_block_pio", port);
+	}
+	dump_parport_state("leave fifo_write_block_pio", port);
 	return length - left;
 }
 
 #ifdef HAS_DMA
-static size_t parport_pc_fifo_write_block_dma (struct parport *port,
+static size_t parport_pc_fifo_write_block_dma(struct parport *port,
 					       const void *buf, size_t length)
 {
 	int ret = 0;
@@ -621,7 +626,7 @@
 	unsigned long start = (unsigned long) buf;
 	unsigned long end = (unsigned long) buf + length - 1;
 
-dump_parport_state ("enter fifo_write_block_dma", port);
+	dump_parport_state("enter fifo_write_block_dma", port);
 	if (end < MAX_DMA_ADDRESS) {
 		/* If it would cross a 64k boundary, cap it at the end. */
 		if ((start ^ end) & ~0xffffUL)
@@ -629,8 +634,9 @@
 
 		dma_addr = dma_handle = dma_map_single(dev, (void *)buf, length,
 						       DMA_TO_DEVICE);
-        } else {
-		/* above 16 MB we use a bounce buffer as ISA-DMA is not possible */
+	} else {
+		/* above 16 MB we use a bounce buffer as ISA-DMA
+		   is not possible */
 		maxlen   = PAGE_SIZE;          /* sizeof(priv->dma_buf) */
 		dma_addr = priv->dma_handle;
 		dma_handle = 0;
@@ -639,12 +645,12 @@
 	port = port->physport;
 
 	/* We don't want to be interrupted every character. */
-	parport_pc_disable_irq (port);
+	parport_pc_disable_irq(port);
 	/* set nErrIntrEn and serviceIntr */
-	frob_econtrol (port, (1<<4) | (1<<2), (1<<4) | (1<<2));
+	frob_econtrol(port, (1<<4) | (1<<2), (1<<4) | (1<<2));
 
 	/* Forward mode. */
-	parport_pc_data_forward (port); /* Must be in PS2 mode */
+	parport_pc_data_forward(port); /* Must be in PS2 mode */
 
 	while (left) {
 		unsigned long expire = jiffies + port->physport->cad->timeout;
@@ -665,10 +671,10 @@
 		set_dma_count(port->dma, count);
 
 		/* Set DMA mode */
-		frob_econtrol (port, 1<<3, 1<<3);
+		frob_econtrol(port, 1<<3, 1<<3);
 
 		/* Clear serviceIntr */
-		frob_econtrol (port, 1<<2, 0);
+		frob_econtrol(port, 1<<2, 0);
 
 		enable_dma(port->dma);
 		release_dma_lock(dmaflag);
@@ -676,20 +682,22 @@
 		/* assume DMA will be successful */
 		left -= count;
 		buf  += count;
-		if (dma_handle) dma_addr += count;
+		if (dma_handle)
+			dma_addr += count;
 
 		/* Wait for interrupt. */
-	false_alarm:
-		ret = parport_wait_event (port, HZ);
-		if (ret < 0) break;
+false_alarm:
+		ret = parport_wait_event(port, HZ);
+		if (ret < 0)
+			break;
 		ret = 0;
-		if (!time_before (jiffies, expire)) {
+		if (!time_before(jiffies, expire)) {
 			/* Timed out. */
-			printk (KERN_DEBUG "DMA write timed out\n");
+			printk(KERN_DEBUG "DMA write timed out\n");
 			break;
 		}
 		/* Is serviceIntr set? */
-		if (!(inb (ECONTROL (port)) & (1<<2))) {
+		if (!(inb(ECONTROL(port)) & (1<<2))) {
 			cond_resched();
 
 			goto false_alarm;
@@ -705,14 +713,15 @@
 
 		/* Anyone else waiting for the port? */
 		if (port->waithead) {
-			printk (KERN_DEBUG "Somebody wants the port\n");
+			printk(KERN_DEBUG "Somebody wants the port\n");
 			break;
 		}
 
 		/* update for possible DMA residue ! */
 		buf  -= count;
 		left += count;
-		if (dma_handle) dma_addr -= count;
+		if (dma_handle)
+			dma_addr -= count;
 	}
 
 	/* Maybe got here through break, so adjust for DMA residue! */
@@ -723,12 +732,12 @@
 	release_dma_lock(dmaflag);
 
 	/* Turn off DMA mode */
-	frob_econtrol (port, 1<<3, 0);
+	frob_econtrol(port, 1<<3, 0);
 
 	if (dma_handle)
 		dma_unmap_single(dev, dma_handle, length, DMA_TO_DEVICE);
 
-dump_parport_state ("leave fifo_write_block_dma", port);
+	dump_parport_state("leave fifo_write_block_dma", port);
 	return length - left;
 }
 #endif
@@ -738,13 +747,13 @@
 {
 #ifdef HAS_DMA
 	if (port->dma != PARPORT_DMA_NONE)
-		return parport_pc_fifo_write_block_dma (port, buf, length);
+		return parport_pc_fifo_write_block_dma(port, buf, length);
 #endif
-	return parport_pc_fifo_write_block_pio (port, buf, length);
+	return parport_pc_fifo_write_block_pio(port, buf, length);
 }
 
 /* Parallel Port FIFO mode (ECP chipsets) */
-static size_t parport_pc_compat_write_block_pio (struct parport *port,
+static size_t parport_pc_compat_write_block_pio(struct parport *port,
 						 const void *buf, size_t length,
 						 int flags)
 {
@@ -756,14 +765,16 @@
 	/* Special case: a timeout of zero means we cannot call schedule().
 	 * Also if O_NONBLOCK is set then use the default implementation. */
 	if (port->physport->cad->timeout <= PARPORT_INACTIVITY_O_NONBLOCK)
-		return parport_ieee1284_write_compat (port, buf,
+		return parport_ieee1284_write_compat(port, buf,
 						      length, flags);
 
 	/* Set up parallel port FIFO mode.*/
-	parport_pc_data_forward (port); /* Must be in PS2 mode */
-	parport_pc_frob_control (port, PARPORT_CONTROL_STROBE, 0);
-	r = change_mode (port, ECR_PPF); /* Parallel port FIFO */
-	if (r)  printk (KERN_DEBUG "%s: Warning change_mode ECR_PPF failed\n", port->name);
+	parport_pc_data_forward(port); /* Must be in PS2 mode */
+	parport_pc_frob_control(port, PARPORT_CONTROL_STROBE, 0);
+	r = change_mode(port, ECR_PPF); /* Parallel port FIFO */
+	if (r)
+		printk(KERN_DEBUG "%s: Warning change_mode ECR_PPF failed\n",
+								port->name);
 
 	port->physport->ieee1284.phase = IEEE1284_PH_FWD_DATA;
 
@@ -775,40 +786,39 @@
 	 * the FIFO is empty, so allow 4 seconds for each position
 	 * in the fifo.
 	 */
-        expire = jiffies + (priv->fifo_depth * HZ * 4);
+	expire = jiffies + (priv->fifo_depth * HZ * 4);
 	do {
 		/* Wait for the FIFO to empty */
-		r = change_mode (port, ECR_PS2);
-		if (r != -EBUSY) {
+		r = change_mode(port, ECR_PS2);
+		if (r != -EBUSY)
 			break;
-		}
-	} while (time_before (jiffies, expire));
+	} while (time_before(jiffies, expire));
 	if (r == -EBUSY) {
 
-		printk (KERN_DEBUG "%s: FIFO is stuck\n", port->name);
+		printk(KERN_DEBUG "%s: FIFO is stuck\n", port->name);
 
 		/* Prevent further data transfer. */
-		frob_set_mode (port, ECR_TST);
+		frob_set_mode(port, ECR_TST);
 
 		/* Adjust for the contents of the FIFO. */
 		for (written -= priv->fifo_depth; ; written++) {
-			if (inb (ECONTROL (port)) & 0x2) {
+			if (inb(ECONTROL(port)) & 0x2) {
 				/* Full up. */
 				break;
 			}
-			outb (0, FIFO (port));
+			outb(0, FIFO(port));
 		}
 
 		/* Reset the FIFO and return to PS2 mode. */
-		frob_set_mode (port, ECR_PS2);
+		frob_set_mode(port, ECR_PS2);
 	}
 
-	r = parport_wait_peripheral (port,
+	r = parport_wait_peripheral(port,
 				     PARPORT_STATUS_BUSY,
 				     PARPORT_STATUS_BUSY);
 	if (r)
-		printk (KERN_DEBUG
-			"%s: BUSY timeout (%d) in compat_write_block_pio\n", 
+		printk(KERN_DEBUG
+			"%s: BUSY timeout (%d) in compat_write_block_pio\n",
 			port->name, r);
 
 	port->physport->ieee1284.phase = IEEE1284_PH_FWD_IDLE;
@@ -818,7 +828,7 @@
 
 /* ECP */
 #ifdef CONFIG_PARPORT_1284
-static size_t parport_pc_ecp_write_block_pio (struct parport *port,
+static size_t parport_pc_ecp_write_block_pio(struct parport *port,
 					      const void *buf, size_t length,
 					      int flags)
 {
@@ -830,36 +840,38 @@
 	/* Special case: a timeout of zero means we cannot call schedule().
 	 * Also if O_NONBLOCK is set then use the default implementation. */
 	if (port->physport->cad->timeout <= PARPORT_INACTIVITY_O_NONBLOCK)
-		return parport_ieee1284_ecp_write_data (port, buf,
+		return parport_ieee1284_ecp_write_data(port, buf,
 							length, flags);
 
 	/* Switch to forward mode if necessary. */
 	if (port->physport->ieee1284.phase != IEEE1284_PH_FWD_IDLE) {
 		/* Event 47: Set nInit high. */
-		parport_frob_control (port,
+		parport_frob_control(port,
 				      PARPORT_CONTROL_INIT
 				      | PARPORT_CONTROL_AUTOFD,
 				      PARPORT_CONTROL_INIT
 				      | PARPORT_CONTROL_AUTOFD);
 
 		/* Event 49: PError goes high. */
-		r = parport_wait_peripheral (port,
+		r = parport_wait_peripheral(port,
 					     PARPORT_STATUS_PAPEROUT,
 					     PARPORT_STATUS_PAPEROUT);
 		if (r) {
-			printk (KERN_DEBUG "%s: PError timeout (%d) "
+			printk(KERN_DEBUG "%s: PError timeout (%d) "
 				"in ecp_write_block_pio\n", port->name, r);
 		}
 	}
 
 	/* Set up ECP parallel port mode.*/
-	parport_pc_data_forward (port); /* Must be in PS2 mode */
-	parport_pc_frob_control (port,
+	parport_pc_data_forward(port); /* Must be in PS2 mode */
+	parport_pc_frob_control(port,
 				 PARPORT_CONTROL_STROBE |
 				 PARPORT_CONTROL_AUTOFD,
 				 0);
-	r = change_mode (port, ECR_ECP); /* ECP FIFO */
-	if (r) printk (KERN_DEBUG "%s: Warning change_mode ECR_ECP failed\n", port->name);
+	r = change_mode(port, ECR_ECP); /* ECP FIFO */
+	if (r)
+		printk(KERN_DEBUG "%s: Warning change_mode ECR_ECP failed\n",
+								port->name);
 	port->physport->ieee1284.phase = IEEE1284_PH_FWD_DATA;
 
 	/* Write the data to the FIFO. */
@@ -873,55 +885,54 @@
 	expire = jiffies + (priv->fifo_depth * (HZ * 4));
 	do {
 		/* Wait for the FIFO to empty */
-		r = change_mode (port, ECR_PS2);
-		if (r != -EBUSY) {
+		r = change_mode(port, ECR_PS2);
+		if (r != -EBUSY)
 			break;
-		}
-	} while (time_before (jiffies, expire));
+	} while (time_before(jiffies, expire));
 	if (r == -EBUSY) {
 
-		printk (KERN_DEBUG "%s: FIFO is stuck\n", port->name);
+		printk(KERN_DEBUG "%s: FIFO is stuck\n", port->name);
 
 		/* Prevent further data transfer. */
-		frob_set_mode (port, ECR_TST);
+		frob_set_mode(port, ECR_TST);
 
 		/* Adjust for the contents of the FIFO. */
 		for (written -= priv->fifo_depth; ; written++) {
-			if (inb (ECONTROL (port)) & 0x2) {
+			if (inb(ECONTROL(port)) & 0x2) {
 				/* Full up. */
 				break;
 			}
-			outb (0, FIFO (port));
+			outb(0, FIFO(port));
 		}
 
 		/* Reset the FIFO and return to PS2 mode. */
-		frob_set_mode (port, ECR_PS2);
+		frob_set_mode(port, ECR_PS2);
 
 		/* Host transfer recovery. */
-		parport_pc_data_reverse (port); /* Must be in PS2 mode */
-		udelay (5);
-		parport_frob_control (port, PARPORT_CONTROL_INIT, 0);
-		r = parport_wait_peripheral (port, PARPORT_STATUS_PAPEROUT, 0);
+		parport_pc_data_reverse(port); /* Must be in PS2 mode */
+		udelay(5);
+		parport_frob_control(port, PARPORT_CONTROL_INIT, 0);
+		r = parport_wait_peripheral(port, PARPORT_STATUS_PAPEROUT, 0);
 		if (r)
-			printk (KERN_DEBUG "%s: PE,1 timeout (%d) "
+			printk(KERN_DEBUG "%s: PE,1 timeout (%d) "
 				"in ecp_write_block_pio\n", port->name, r);
 
-		parport_frob_control (port,
+		parport_frob_control(port,
 				      PARPORT_CONTROL_INIT,
 				      PARPORT_CONTROL_INIT);
-		r = parport_wait_peripheral (port,
+		r = parport_wait_peripheral(port,
 					     PARPORT_STATUS_PAPEROUT,
 					     PARPORT_STATUS_PAPEROUT);
-                if (r)
-                        printk (KERN_DEBUG "%s: PE,2 timeout (%d) "
+		if (r)
+			printk(KERN_DEBUG "%s: PE,2 timeout (%d) "
 				"in ecp_write_block_pio\n", port->name, r);
 	}
 
-	r = parport_wait_peripheral (port,
-				     PARPORT_STATUS_BUSY, 
+	r = parport_wait_peripheral(port,
+				     PARPORT_STATUS_BUSY,
 				     PARPORT_STATUS_BUSY);
-	if(r)
-		printk (KERN_DEBUG
+	if (r)
+		printk(KERN_DEBUG
 			"%s: BUSY timeout (%d) in ecp_write_block_pio\n",
 			port->name, r);
 
@@ -931,7 +942,7 @@
 }
 
 #if 0
-static size_t parport_pc_ecp_read_block_pio (struct parport *port,
+static size_t parport_pc_ecp_read_block_pio(struct parport *port,
 					     void *buf, size_t length,
 					     int flags)
 {
@@ -944,13 +955,13 @@
 	char *bufp = buf;
 
 	port = port->physport;
-DPRINTK (KERN_DEBUG "parport_pc: parport_pc_ecp_read_block_pio\n");
-dump_parport_state ("enter fcn", port);
+	DPRINTK(KERN_DEBUG "parport_pc: parport_pc_ecp_read_block_pio\n");
+	dump_parport_state("enter fcn", port);
 
 	/* Special case: a timeout of zero means we cannot call schedule().
 	 * Also if O_NONBLOCK is set then use the default implementation. */
 	if (port->cad->timeout <= PARPORT_INACTIVITY_O_NONBLOCK)
-		return parport_ieee1284_ecp_read_data (port, buf,
+		return parport_ieee1284_ecp_read_data(port, buf,
 						       length, flags);
 
 	if (port->ieee1284.mode == IEEE1284_MODE_ECPRLE) {
@@ -966,173 +977,178 @@
 	 * go through software emulation.  Otherwise we may have to throw
 	 * away data. */
 	if (length < fifofull)
-		return parport_ieee1284_ecp_read_data (port, buf,
+		return parport_ieee1284_ecp_read_data(port, buf,
 						       length, flags);
 
 	if (port->ieee1284.phase != IEEE1284_PH_REV_IDLE) {
 		/* change to reverse-idle phase (must be in forward-idle) */
 
 		/* Event 38: Set nAutoFd low (also make sure nStrobe is high) */
-		parport_frob_control (port,
+		parport_frob_control(port,
 				      PARPORT_CONTROL_AUTOFD
 				      | PARPORT_CONTROL_STROBE,
 				      PARPORT_CONTROL_AUTOFD);
-		parport_pc_data_reverse (port); /* Must be in PS2 mode */
-		udelay (5);
+		parport_pc_data_reverse(port); /* Must be in PS2 mode */
+		udelay(5);
 		/* Event 39: Set nInit low to initiate bus reversal */
-		parport_frob_control (port,
+		parport_frob_control(port,
 				      PARPORT_CONTROL_INIT,
 				      0);
 		/* Event 40: Wait for  nAckReverse (PError) to go low */
-		r = parport_wait_peripheral (port, PARPORT_STATUS_PAPEROUT, 0);
-                if (r) {
-                        printk (KERN_DEBUG "%s: PE timeout Event 40 (%d) "
+		r = parport_wait_peripheral(port, PARPORT_STATUS_PAPEROUT, 0);
+		if (r) {
+			printk(KERN_DEBUG "%s: PE timeout Event 40 (%d) "
 				"in ecp_read_block_pio\n", port->name, r);
 			return 0;
 		}
 	}
 
 	/* Set up ECP FIFO mode.*/
-/*	parport_pc_frob_control (port,
+/*	parport_pc_frob_control(port,
 				 PARPORT_CONTROL_STROBE |
 				 PARPORT_CONTROL_AUTOFD,
 				 PARPORT_CONTROL_AUTOFD); */
-	r = change_mode (port, ECR_ECP); /* ECP FIFO */
-	if (r) printk (KERN_DEBUG "%s: Warning change_mode ECR_ECP failed\n", port->name);
+	r = change_mode(port, ECR_ECP); /* ECP FIFO */
+	if (r)
+		printk(KERN_DEBUG "%s: Warning change_mode ECR_ECP failed\n",
+								port->name);
 
 	port->ieee1284.phase = IEEE1284_PH_REV_DATA;
 
 	/* the first byte must be collected manually */
-dump_parport_state ("pre 43", port);
+	dump_parport_state("pre 43", port);
 	/* Event 43: Wait for nAck to go low */
-	r = parport_wait_peripheral (port, PARPORT_STATUS_ACK, 0);
+	r = parport_wait_peripheral(port, PARPORT_STATUS_ACK, 0);
 	if (r) {
 		/* timed out while reading -- no data */
-		printk (KERN_DEBUG "PIO read timed out (initial byte)\n");
+		printk(KERN_DEBUG "PIO read timed out (initial byte)\n");
 		goto out_no_data;
 	}
 	/* read byte */
-	*bufp++ = inb (DATA (port));
+	*bufp++ = inb(DATA(port));
 	left--;
-dump_parport_state ("43-44", port);
+	dump_parport_state("43-44", port);
 	/* Event 44: nAutoFd (HostAck) goes high to acknowledge */
-	parport_pc_frob_control (port,
+	parport_pc_frob_control(port,
 				 PARPORT_CONTROL_AUTOFD,
 				 0);
-dump_parport_state ("pre 45", port);
+	dump_parport_state("pre 45", port);
 	/* Event 45: Wait for nAck to go high */
-/*	r = parport_wait_peripheral (port, PARPORT_STATUS_ACK, PARPORT_STATUS_ACK); */
-dump_parport_state ("post 45", port);
-r = 0;
+	/* r = parport_wait_peripheral(port, PARPORT_STATUS_ACK,
+						PARPORT_STATUS_ACK); */
+	dump_parport_state("post 45", port);
+	r = 0;
 	if (r) {
 		/* timed out while waiting for peripheral to respond to ack */
-		printk (KERN_DEBUG "ECP PIO read timed out (waiting for nAck)\n");
+		printk(KERN_DEBUG "ECP PIO read timed out (waiting for nAck)\n");
 
 		/* keep hold of the byte we've got already */
 		goto out_no_data;
 	}
 	/* Event 46: nAutoFd (HostAck) goes low to accept more data */
-	parport_pc_frob_control (port,
+	parport_pc_frob_control(port,
 				 PARPORT_CONTROL_AUTOFD,
 				 PARPORT_CONTROL_AUTOFD);
 
 
-dump_parport_state ("rev idle", port);
+	dump_parport_state("rev idle", port);
 	/* Do the transfer. */
 	while (left > fifofull) {
 		int ret;
 		unsigned long expire = jiffies + port->cad->timeout;
-		unsigned char ecrval = inb (ECONTROL (port));
+		unsigned char ecrval = inb(ECONTROL(port));
 
-		if (need_resched() && time_before (jiffies, expire))
+		if (need_resched() && time_before(jiffies, expire))
 			/* Can't yield the port. */
-			schedule ();
+			schedule();
 
 		/* At this point, the FIFO may already be full. In
-                 * that case ECP is already holding back the
-                 * peripheral (assuming proper design) with a delayed
-                 * handshake.  Work fast to avoid a peripheral
-                 * timeout.  */
+		 * that case ECP is already holding back the
+		 * peripheral (assuming proper design) with a delayed
+		 * handshake.  Work fast to avoid a peripheral
+		 * timeout.  */
 
 		if (ecrval & 0x01) {
 			/* FIFO is empty. Wait for interrupt. */
-dump_parport_state ("FIFO empty", port);
+			dump_parport_state("FIFO empty", port);
 
 			/* Anyone else waiting for the port? */
 			if (port->waithead) {
-				printk (KERN_DEBUG "Somebody wants the port\n");
+				printk(KERN_DEBUG "Somebody wants the port\n");
 				break;
 			}
 
 			/* Clear serviceIntr */
-			ECR_WRITE (port, ecrval & ~(1<<2));
-		false_alarm:
-dump_parport_state ("waiting", port);
-			ret = parport_wait_event (port, HZ);
-DPRINTK (KERN_DEBUG "parport_wait_event returned %d\n", ret);
+			ECR_WRITE(port, ecrval & ~(1<<2));
+false_alarm:
+			dump_parport_state("waiting", port);
+			ret = parport_wait_event(port, HZ);
+			DPRINTK(KERN_DEBUG "parport_wait_event returned %d\n",
+									ret);
 			if (ret < 0)
 				break;
 			ret = 0;
-			if (!time_before (jiffies, expire)) {
+			if (!time_before(jiffies, expire)) {
 				/* Timed out. */
-dump_parport_state ("timeout", port);
-				printk (KERN_DEBUG "PIO read timed out\n");
+				dump_parport_state("timeout", port);
+				printk(KERN_DEBUG "PIO read timed out\n");
 				break;
 			}
-			ecrval = inb (ECONTROL (port));
+			ecrval = inb(ECONTROL(port));
 			if (!(ecrval & (1<<2))) {
 				if (need_resched() &&
-				    time_before (jiffies, expire)) {
-					schedule ();
+				    time_before(jiffies, expire)) {
+					schedule();
 				}
 				goto false_alarm;
 			}
 
 			/* Depending on how the FIFO threshold was
-                         * set, how long interrupt service took, and
-                         * how fast the peripheral is, we might be
-                         * lucky and have a just filled FIFO. */
+			 * set, how long interrupt service took, and
+			 * how fast the peripheral is, we might be
+			 * lucky and have a just filled FIFO. */
 			continue;
 		}
 
 		if (ecrval & 0x02) {
 			/* FIFO is full. */
-dump_parport_state ("FIFO full", port);
-			insb (fifo, bufp, fifo_depth);
+			dump_parport_state("FIFO full", port);
+			insb(fifo, bufp, fifo_depth);
 			bufp += fifo_depth;
 			left -= fifo_depth;
 			continue;
 		}
 
-DPRINTK (KERN_DEBUG "*** ecp_read_block_pio: reading one byte from the FIFO\n");
+		DPRINTK(KERN_DEBUG
+		  "*** ecp_read_block_pio: reading one byte from the FIFO\n");
 
 		/* FIFO not filled.  We will cycle this loop for a while
-                 * and either the peripheral will fill it faster,
-                 * tripping a fast empty with insb, or we empty it. */
-		*bufp++ = inb (fifo);
+		 * and either the peripheral will fill it faster,
+		 * tripping a fast empty with insb, or we empty it. */
+		*bufp++ = inb(fifo);
 		left--;
 	}
 
 	/* scoop up anything left in the FIFO */
-	while (left && !(inb (ECONTROL (port) & 0x01))) {
-		*bufp++ = inb (fifo);
+	while (left && !(inb(ECONTROL(port) & 0x01))) {
+		*bufp++ = inb(fifo);
 		left--;
 	}
 
 	port->ieee1284.phase = IEEE1284_PH_REV_IDLE;
-dump_parport_state ("rev idle2", port);
+	dump_parport_state("rev idle2", port);
 
 out_no_data:
 
 	/* Go to forward idle mode to shut the peripheral up (event 47). */
-	parport_frob_control (port, PARPORT_CONTROL_INIT, PARPORT_CONTROL_INIT);
+	parport_frob_control(port, PARPORT_CONTROL_INIT, PARPORT_CONTROL_INIT);
 
 	/* event 49: PError goes high */
-	r = parport_wait_peripheral (port,
+	r = parport_wait_peripheral(port,
 				     PARPORT_STATUS_PAPEROUT,
 				     PARPORT_STATUS_PAPEROUT);
 	if (r) {
-		printk (KERN_DEBUG
+		printk(KERN_DEBUG
 			"%s: PE timeout FWDIDLE (%d) in ecp_read_block_pio\n",
 			port->name, r);
 	}
@@ -1141,14 +1157,14 @@
 
 	/* Finish up. */
 	{
-		int lost = get_fifo_residue (port);
+		int lost = get_fifo_residue(port);
 		if (lost)
 			/* Shouldn't happen with compliant peripherals. */
-			printk (KERN_DEBUG "%s: DATA LOSS (%d bytes)!\n",
+			printk(KERN_DEBUG "%s: DATA LOSS (%d bytes)!\n",
 				port->name, lost);
 	}
 
-dump_parport_state ("fwd idle", port);
+	dump_parport_state("fwd idle", port);
 	return length - left;
 }
 #endif  /*  0  */
@@ -1164,8 +1180,7 @@
 
 /* GCC is not inlining extern inline function later overwriten to non-inline,
    so we use outlined_ variants here.  */
-static const struct parport_operations parport_pc_ops =
-{
+static const struct parport_operations parport_pc_ops = {
 	.write_data	= parport_pc_write_data,
 	.read_data	= parport_pc_read_data,
 
@@ -1202,88 +1217,107 @@
 };
 
 #ifdef CONFIG_PARPORT_PC_SUPERIO
+
+static struct superio_struct *find_free_superio(void)
+{
+	int i;
+	for (i = 0; i < NR_SUPERIOS; i++)
+		if (superios[i].io == 0)
+			return &superios[i];
+	return NULL;
+}
+
+
 /* Super-IO chipset detection, Winbond, SMSC */
 static void __devinit show_parconfig_smsc37c669(int io, int key)
 {
-	int cr1,cr4,cra,cr23,cr26,cr27,i=0;
-	static const char *const modes[]={
+	int cr1, cr4, cra, cr23, cr26, cr27;
+	struct superio_struct *s;
+
+	static const char *const modes[] = {
 		"SPP and Bidirectional (PS/2)",
 		"EPP and SPP",
 		"ECP",
 		"ECP and EPP" };
 
-	outb(key,io);
-	outb(key,io);
-	outb(1,io);
-	cr1=inb(io+1);
-	outb(4,io);
-	cr4=inb(io+1);
-	outb(0x0a,io);
-	cra=inb(io+1);
-	outb(0x23,io);
-	cr23=inb(io+1);
-	outb(0x26,io);
-	cr26=inb(io+1);
-	outb(0x27,io);
-	cr27=inb(io+1);
-	outb(0xaa,io);
+	outb(key, io);
+	outb(key, io);
+	outb(1, io);
+	cr1 = inb(io + 1);
+	outb(4, io);
+	cr4 = inb(io + 1);
+	outb(0x0a, io);
+	cra = inb(io + 1);
+	outb(0x23, io);
+	cr23 = inb(io + 1);
+	outb(0x26, io);
+	cr26 = inb(io + 1);
+	outb(0x27, io);
+	cr27 = inb(io + 1);
+	outb(0xaa, io);
 
 	if (verbose_probing) {
-		printk (KERN_INFO "SMSC 37c669 LPT Config: cr_1=0x%02x, 4=0x%02x, "
+		printk(KERN_INFO
+			"SMSC 37c669 LPT Config: cr_1=0x%02x, 4=0x%02x, "
 			"A=0x%2x, 23=0x%02x, 26=0x%02x, 27=0x%02x\n",
-			cr1,cr4,cra,cr23,cr26,cr27);
-		
+			cr1, cr4, cra, cr23, cr26, cr27);
+
 		/* The documentation calls DMA and IRQ-Lines by letters, so
 		   the board maker can/will wire them
 		   appropriately/randomly...  G=reserved H=IDE-irq, */
-		printk (KERN_INFO "SMSC LPT Config: io=0x%04x, irq=%c, dma=%c, "
-			"fifo threshold=%d\n", cr23*4,
-			(cr27 &0x0f) ? 'A'-1+(cr27 &0x0f): '-',
-			(cr26 &0x0f) ? 'A'-1+(cr26 &0x0f): '-', cra & 0x0f);
+		printk(KERN_INFO
+	"SMSC LPT Config: io=0x%04x, irq=%c, dma=%c, fifo threshold=%d\n",
+				cr23 * 4,
+				(cr27 & 0x0f) ? 'A' - 1 + (cr27 & 0x0f) : '-',
+				(cr26 & 0x0f) ? 'A' - 1 + (cr26 & 0x0f) : '-',
+				cra & 0x0f);
 		printk(KERN_INFO "SMSC LPT Config: enabled=%s power=%s\n",
-		       (cr23*4 >=0x100) ?"yes":"no", (cr1 & 4) ? "yes" : "no");
-		printk(KERN_INFO "SMSC LPT Config: Port mode=%s, EPP version =%s\n",
-		       (cr1 & 0x08 ) ? "Standard mode only (SPP)" : modes[cr4 & 0x03], 
-		       (cr4 & 0x40) ? "1.7" : "1.9");
+		       (cr23 * 4 >= 0x100) ? "yes" : "no",
+		       (cr1 & 4) ? "yes" : "no");
+		printk(KERN_INFO
+			"SMSC LPT Config: Port mode=%s, EPP version =%s\n",
+				(cr1 & 0x08) ? "Standard mode only (SPP)"
+					      : modes[cr4 & 0x03],
+				(cr4 & 0x40) ? "1.7" : "1.9");
 	}
-		
+
 	/* Heuristics !  BIOS setup for this mainboard device limits
 	   the choices to standard settings, i.e. io-address and IRQ
 	   are related, however DMA can be 1 or 3, assume DMA_A=DMA1,
 	   DMA_C=DMA3 (this is true e.g. for TYAN 1564D Tomcat IV) */
-	if(cr23*4 >=0x100) { /* if active */
-		while((superios[i].io!= 0) && (i<NR_SUPERIOS))
-			i++;
-		if(i==NR_SUPERIOS)
+	if (cr23 * 4 >= 0x100) { /* if active */
+		s = find_free_superio();
+		if (s == NULL)
 			printk(KERN_INFO "Super-IO: too many chips!\n");
 		else {
 			int d;
-			switch (cr23*4) {
-				case 0x3bc:
-					superios[i].io = 0x3bc;
-					superios[i].irq = 7;
-					break;
-				case 0x378:
-					superios[i].io = 0x378;
-					superios[i].irq = 7;
-					break;
-				case 0x278:
-					superios[i].io = 0x278;
-					superios[i].irq = 5;
+			switch (cr23 * 4) {
+			case 0x3bc:
+				s->io = 0x3bc;
+				s->irq = 7;
+				break;
+			case 0x378:
+				s->io = 0x378;
+				s->irq = 7;
+				break;
+			case 0x278:
+				s->io = 0x278;
+				s->irq = 5;
 			}
-			d=(cr26 &0x0f);
-			if((d==1) || (d==3)) 
-				superios[i].dma= d;
+			d = (cr26 & 0x0f);
+			if (d == 1 || d == 3)
+				s->dma = d;
 			else
-				superios[i].dma= PARPORT_DMA_NONE;
+				s->dma = PARPORT_DMA_NONE;
 		}
- 	}
+	}
 }
 
 
 static void __devinit show_parconfig_winbond(int io, int key)
 {
-	int cr30,cr60,cr61,cr70,cr74,crf0,i=0;
+	int cr30, cr60, cr61, cr70, cr74, crf0;
+	struct superio_struct *s;
 	static const char *const modes[] = {
 		"Standard (SPP) and Bidirectional(PS/2)", /* 0 */
 		"EPP-1.9 and SPP",
@@ -1296,110 +1330,134 @@
 	static char *const irqtypes[] = {
 		"pulsed low, high-Z",
 		"follows nACK" };
-		
+
 	/* The registers are called compatible-PnP because the
-           register layout is modelled after ISA-PnP, the access
-           method is just another ... */
-	outb(key,io);
-	outb(key,io);
-	outb(0x07,io);   /* Register 7: Select Logical Device */
-	outb(0x01,io+1); /* LD1 is Parallel Port */
-	outb(0x30,io);
-	cr30=inb(io+1);
-	outb(0x60,io);
-	cr60=inb(io+1);
-	outb(0x61,io);
-	cr61=inb(io+1);
-	outb(0x70,io);
-	cr70=inb(io+1);
-	outb(0x74,io);
-	cr74=inb(io+1);
-	outb(0xf0,io);
-	crf0=inb(io+1);
-	outb(0xaa,io);
+	   register layout is modelled after ISA-PnP, the access
+	   method is just another ... */
+	outb(key, io);
+	outb(key, io);
+	outb(0x07, io);   /* Register 7: Select Logical Device */
+	outb(0x01, io + 1); /* LD1 is Parallel Port */
+	outb(0x30, io);
+	cr30 = inb(io + 1);
+	outb(0x60, io);
+	cr60 = inb(io + 1);
+	outb(0x61, io);
+	cr61 = inb(io + 1);
+	outb(0x70, io);
+	cr70 = inb(io + 1);
+	outb(0x74, io);
+	cr74 = inb(io + 1);
+	outb(0xf0, io);
+	crf0 = inb(io + 1);
+	outb(0xaa, io);
 
 	if (verbose_probing) {
-		printk(KERN_INFO "Winbond LPT Config: cr_30=%02x 60,61=%02x%02x "
-		       "70=%02x 74=%02x, f0=%02x\n", cr30,cr60,cr61,cr70,cr74,crf0);
-		printk(KERN_INFO "Winbond LPT Config: active=%s, io=0x%02x%02x irq=%d, ", 
-		       (cr30 & 0x01) ? "yes":"no", cr60,cr61,cr70&0x0f );
+		printk(KERN_INFO
+    "Winbond LPT Config: cr_30=%02x 60,61=%02x%02x 70=%02x 74=%02x, f0=%02x\n",
+					cr30, cr60, cr61, cr70, cr74, crf0);
+		printk(KERN_INFO "Winbond LPT Config: active=%s, io=0x%02x%02x irq=%d, ",
+		       (cr30 & 0x01) ? "yes" : "no", cr60, cr61, cr70 & 0x0f);
 		if ((cr74 & 0x07) > 3)
 			printk("dma=none\n");
 		else
-			printk("dma=%d\n",cr74 & 0x07);
-		printk(KERN_INFO "Winbond LPT Config: irqtype=%s, ECP fifo threshold=%d\n",
-		       irqtypes[crf0>>7], (crf0>>3)&0x0f);
-		printk(KERN_INFO "Winbond LPT Config: Port mode=%s\n", modes[crf0 & 0x07]);
+			printk("dma=%d\n", cr74 & 0x07);
+		printk(KERN_INFO
+		    "Winbond LPT Config: irqtype=%s, ECP fifo threshold=%d\n",
+					irqtypes[crf0>>7], (crf0>>3)&0x0f);
+		printk(KERN_INFO "Winbond LPT Config: Port mode=%s\n",
+					modes[crf0 & 0x07]);
 	}
 
-	if(cr30 & 0x01) { /* the settings can be interrogated later ... */
-		while((superios[i].io!= 0) && (i<NR_SUPERIOS))
-			i++;
-		if(i==NR_SUPERIOS) 
+	if (cr30 & 0x01) { /* the settings can be interrogated later ... */
+		s = find_free_superio();
+		if (s == NULL)
 			printk(KERN_INFO "Super-IO: too many chips!\n");
 		else {
-			superios[i].io = (cr60<<8)|cr61;
-			superios[i].irq = cr70&0x0f;
-			superios[i].dma = (((cr74 & 0x07) > 3) ?
+			s->io = (cr60 << 8) | cr61;
+			s->irq = cr70 & 0x0f;
+			s->dma = (((cr74 & 0x07) > 3) ?
 					   PARPORT_DMA_NONE : (cr74 & 0x07));
 		}
 	}
 }
 
-static void __devinit decode_winbond(int efer, int key, int devid, int devrev, int oldid)
+static void __devinit decode_winbond(int efer, int key, int devid,
+							int devrev, int oldid)
 {
 	const char *type = "unknown";
-	int id,progif=2;
+	int id, progif = 2;
 
 	if (devid == devrev)
 		/* simple heuristics, we happened to read some
-                   non-winbond register */
+		   non-winbond register */
 		return;
 
-	id=(devid<<8) | devrev;
+	id = (devid << 8) | devrev;
 
 	/* Values are from public data sheets pdf files, I can just
-           confirm 83977TF is correct :-) */
-	if      (id == 0x9771) type="83977F/AF";
-	else if (id == 0x9773) type="83977TF / SMSC 97w33x/97w34x";
-	else if (id == 0x9774) type="83977ATF";
-	else if ((id & ~0x0f) == 0x5270) type="83977CTF / SMSC 97w36x";
-	else if ((id & ~0x0f) == 0x52f0) type="83977EF / SMSC 97w35x";
-	else if ((id & ~0x0f) == 0x5210) type="83627";
-	else if ((id & ~0x0f) == 0x6010) type="83697HF";
-	else if ((oldid &0x0f ) == 0x0a) { type="83877F"; progif=1;}
-	else if ((oldid &0x0f ) == 0x0b) { type="83877AF"; progif=1;}
-	else if ((oldid &0x0f ) == 0x0c) { type="83877TF"; progif=1;}
-	else if ((oldid &0x0f ) == 0x0d) { type="83877ATF"; progif=1;}
-	else progif=0;
+	   confirm 83977TF is correct :-) */
+	if (id == 0x9771)
+		type = "83977F/AF";
+	else if (id == 0x9773)
+		type = "83977TF / SMSC 97w33x/97w34x";
+	else if (id == 0x9774)
+		type = "83977ATF";
+	else if ((id & ~0x0f) == 0x5270)
+		type = "83977CTF / SMSC 97w36x";
+	else if ((id & ~0x0f) == 0x52f0)
+		type = "83977EF / SMSC 97w35x";
+	else if ((id & ~0x0f) == 0x5210)
+		type = "83627";
+	else if ((id & ~0x0f) == 0x6010)
+		type = "83697HF";
+	else if ((oldid & 0x0f) == 0x0a) {
+		type = "83877F";
+		progif = 1;
+	} else if ((oldid & 0x0f) == 0x0b) {
+		type = "83877AF";
+		progif = 1;
+	} else if ((oldid & 0x0f) == 0x0c) {
+		type = "83877TF";
+		progif = 1;
+	} else if ((oldid & 0x0f) == 0x0d) {
+		type = "83877ATF";
+		progif = 1;
+	} else
+		progif = 0;
 
 	if (verbose_probing)
 		printk(KERN_INFO "Winbond chip at EFER=0x%x key=0x%02x "
-		       "devid=%02x devrev=%02x oldid=%02x type=%s\n", 
+		       "devid=%02x devrev=%02x oldid=%02x type=%s\n",
 		       efer, key, devid, devrev, oldid, type);
 
 	if (progif == 2)
-		show_parconfig_winbond(efer,key);
+		show_parconfig_winbond(efer, key);
 }
 
 static void __devinit decode_smsc(int efer, int key, int devid, int devrev)
 {
-        const char *type = "unknown";
+	const char *type = "unknown";
 	void (*func)(int io, int key);
-        int id;
+	int id;
 
-        if (devid == devrev)
+	if (devid == devrev)
 		/* simple heuristics, we happened to read some
-                   non-smsc register */
+		   non-smsc register */
 		return;
 
-	func=NULL;
-        id=(devid<<8) | devrev;
+	func = NULL;
+	id = (devid << 8) | devrev;
 
-	if	(id==0x0302) {type="37c669"; func=show_parconfig_smsc37c669;}
-	else if	(id==0x6582) type="37c665IR";
-	else if	(devid==0x65) type="37c665GT";
-	else if	(devid==0x66) type="37c666GT";
+	if (id == 0x0302) {
+		type = "37c669";
+		func = show_parconfig_smsc37c669;
+	} else if (id == 0x6582)
+		type = "37c665IR";
+	else if	(devid == 0x65)
+		type = "37c665GT";
+	else if	(devid == 0x66)
+		type = "37c666GT";
 
 	if (verbose_probing)
 		printk(KERN_INFO "SMSC chip at EFER=0x%x "
@@ -1407,138 +1465,138 @@
 		       efer, key, devid, devrev, type);
 
 	if (func)
-		func(efer,key);
+		func(efer, key);
 }
 
 
 static void __devinit winbond_check(int io, int key)
 {
-	int devid,devrev,oldid,x_devid,x_devrev,x_oldid;
+	int devid, devrev, oldid, x_devid, x_devrev, x_oldid;
 
 	if (!request_region(io, 3, __func__))
 		return;
 
 	/* First probe without key */
-	outb(0x20,io);
-	x_devid=inb(io+1);
-	outb(0x21,io);
-	x_devrev=inb(io+1);
-	outb(0x09,io);
-	x_oldid=inb(io+1);
+	outb(0x20, io);
+	x_devid = inb(io + 1);
+	outb(0x21, io);
+	x_devrev = inb(io + 1);
+	outb(0x09, io);
+	x_oldid = inb(io + 1);
 
-	outb(key,io);
-	outb(key,io);     /* Write Magic Sequence to EFER, extended
-                             funtion enable register */
-	outb(0x20,io);    /* Write EFIR, extended function index register */
-	devid=inb(io+1);  /* Read EFDR, extended function data register */
-	outb(0x21,io);
-	devrev=inb(io+1);
-	outb(0x09,io);
-	oldid=inb(io+1);
-	outb(0xaa,io);    /* Magic Seal */
+	outb(key, io);
+	outb(key, io);     /* Write Magic Sequence to EFER, extended
+			      funtion enable register */
+	outb(0x20, io);    /* Write EFIR, extended function index register */
+	devid = inb(io + 1);  /* Read EFDR, extended function data register */
+	outb(0x21, io);
+	devrev = inb(io + 1);
+	outb(0x09, io);
+	oldid = inb(io + 1);
+	outb(0xaa, io);    /* Magic Seal */
 
 	if ((x_devid == devid) && (x_devrev == devrev) && (x_oldid == oldid))
 		goto out; /* protection against false positives */
 
-	decode_winbond(io,key,devid,devrev,oldid);
+	decode_winbond(io, key, devid, devrev, oldid);
 out:
 	release_region(io, 3);
 }
 
-static void __devinit winbond_check2(int io,int key)
+static void __devinit winbond_check2(int io, int key)
 {
-        int devid,devrev,oldid,x_devid,x_devrev,x_oldid;
+	int devid, devrev, oldid, x_devid, x_devrev, x_oldid;
 
 	if (!request_region(io, 3, __func__))
 		return;
 
 	/* First probe without the key */
-	outb(0x20,io+2);
-	x_devid=inb(io+2);
-	outb(0x21,io+1);
-	x_devrev=inb(io+2);
-	outb(0x09,io+1);
-	x_oldid=inb(io+2);
+	outb(0x20, io + 2);
+	x_devid = inb(io + 2);
+	outb(0x21, io + 1);
+	x_devrev = inb(io + 2);
+	outb(0x09, io + 1);
+	x_oldid = inb(io + 2);
 
-        outb(key,io);     /* Write Magic Byte to EFER, extended
-                             funtion enable register */
-        outb(0x20,io+2);  /* Write EFIR, extended function index register */
-        devid=inb(io+2);  /* Read EFDR, extended function data register */
-        outb(0x21,io+1);
-        devrev=inb(io+2);
-        outb(0x09,io+1);
-        oldid=inb(io+2);
-        outb(0xaa,io);    /* Magic Seal */
+	outb(key, io);     /* Write Magic Byte to EFER, extended
+			      funtion enable register */
+	outb(0x20, io + 2);  /* Write EFIR, extended function index register */
+	devid = inb(io + 2);  /* Read EFDR, extended function data register */
+	outb(0x21, io + 1);
+	devrev = inb(io + 2);
+	outb(0x09, io + 1);
+	oldid = inb(io + 2);
+	outb(0xaa, io);    /* Magic Seal */
 
-	if ((x_devid == devid) && (x_devrev == devrev) && (x_oldid == oldid))
+	if (x_devid == devid && x_devrev == devrev && x_oldid == oldid)
 		goto out; /* protection against false positives */
 
-	decode_winbond(io,key,devid,devrev,oldid);
+	decode_winbond(io, key, devid, devrev, oldid);
 out:
 	release_region(io, 3);
 }
 
 static void __devinit smsc_check(int io, int key)
 {
-        int id,rev,oldid,oldrev,x_id,x_rev,x_oldid,x_oldrev;
+	int id, rev, oldid, oldrev, x_id, x_rev, x_oldid, x_oldrev;
 
 	if (!request_region(io, 3, __func__))
 		return;
 
 	/* First probe without the key */
-	outb(0x0d,io);
-	x_oldid=inb(io+1);
-	outb(0x0e,io);
-	x_oldrev=inb(io+1);
-	outb(0x20,io);
-	x_id=inb(io+1);
-	outb(0x21,io);
-	x_rev=inb(io+1);
+	outb(0x0d, io);
+	x_oldid = inb(io + 1);
+	outb(0x0e, io);
+	x_oldrev = inb(io + 1);
+	outb(0x20, io);
+	x_id = inb(io + 1);
+	outb(0x21, io);
+	x_rev = inb(io + 1);
 
-        outb(key,io);
-        outb(key,io);     /* Write Magic Sequence to EFER, extended
-                             funtion enable register */
-        outb(0x0d,io);    /* Write EFIR, extended function index register */
-        oldid=inb(io+1);  /* Read EFDR, extended function data register */
-        outb(0x0e,io);
-        oldrev=inb(io+1);
-	outb(0x20,io);
-	id=inb(io+1);
-	outb(0x21,io);
-	rev=inb(io+1);
-        outb(0xaa,io);    /* Magic Seal */
+	outb(key, io);
+	outb(key, io);     /* Write Magic Sequence to EFER, extended
+			      funtion enable register */
+	outb(0x0d, io);    /* Write EFIR, extended function index register */
+	oldid = inb(io + 1);  /* Read EFDR, extended function data register */
+	outb(0x0e, io);
+	oldrev = inb(io + 1);
+	outb(0x20, io);
+	id = inb(io + 1);
+	outb(0x21, io);
+	rev = inb(io + 1);
+	outb(0xaa, io);    /* Magic Seal */
 
-	if ((x_id == id) && (x_oldrev == oldrev) &&
-	    (x_oldid == oldid) && (x_rev == rev))
+	if (x_id == id && x_oldrev == oldrev &&
+	    x_oldid == oldid && x_rev == rev)
 		goto out; /* protection against false positives */
 
-        decode_smsc(io,key,oldid,oldrev);
+	decode_smsc(io, key, oldid, oldrev);
 out:
 	release_region(io, 3);
 }
 
 
-static void __devinit detect_and_report_winbond (void)
-{ 
+static void __devinit detect_and_report_winbond(void)
+{
 	if (verbose_probing)
 		printk(KERN_DEBUG "Winbond Super-IO detection, now testing ports 3F0,370,250,4E,2E ...\n");
-	winbond_check(0x3f0,0x87);
-	winbond_check(0x370,0x87);
-	winbond_check(0x2e ,0x87);
-	winbond_check(0x4e ,0x87);
-	winbond_check(0x3f0,0x86);
-	winbond_check2(0x250,0x88); 
-	winbond_check2(0x250,0x89);
+	winbond_check(0x3f0, 0x87);
+	winbond_check(0x370, 0x87);
+	winbond_check(0x2e , 0x87);
+	winbond_check(0x4e , 0x87);
+	winbond_check(0x3f0, 0x86);
+	winbond_check2(0x250, 0x88);
+	winbond_check2(0x250, 0x89);
 }
 
-static void __devinit detect_and_report_smsc (void)
+static void __devinit detect_and_report_smsc(void)
 {
 	if (verbose_probing)
 		printk(KERN_DEBUG "SMSC Super-IO detection, now testing Ports 2F0, 370 ...\n");
-	smsc_check(0x3f0,0x55);
-	smsc_check(0x370,0x55);
-	smsc_check(0x3f0,0x44);
-	smsc_check(0x370,0x44);
+	smsc_check(0x3f0, 0x55);
+	smsc_check(0x370, 0x55);
+	smsc_check(0x3f0, 0x44);
+	smsc_check(0x370, 0x44);
 }
 
 static void __devinit detect_and_report_it87(void)
@@ -1573,34 +1631,39 @@
 }
 #endif /* CONFIG_PARPORT_PC_SUPERIO */
 
-static int get_superio_dma (struct parport *p)
+static struct superio_struct *find_superio(struct parport *p)
 {
-	int i=0;
-	while( (superios[i].io != p->base) && (i<NR_SUPERIOS))
-		i++;
-	if (i!=NR_SUPERIOS)
-		return superios[i].dma;
+	int i;
+	for (i = 0; i < NR_SUPERIOS; i++)
+		if (superios[i].io != p->base)
+			return &superios[i];
+	return NULL;
+}
+
+static int get_superio_dma(struct parport *p)
+{
+	struct superio_struct *s = find_superio(p);
+	if (s)
+		return s->dma;
 	return PARPORT_DMA_NONE;
 }
 
-static int get_superio_irq (struct parport *p)
+static int get_superio_irq(struct parport *p)
 {
-	int i=0;
-        while( (superios[i].io != p->base) && (i<NR_SUPERIOS))
-                i++;
-        if (i!=NR_SUPERIOS)
-                return superios[i].irq;
-        return PARPORT_IRQ_NONE;
+	struct superio_struct *s = find_superio(p);
+	if (s)
+		return s->irq;
+	return PARPORT_IRQ_NONE;
 }
-	
+
 
 /* --- Mode detection ------------------------------------- */
 
 /*
  * Checks for port existence, all ports support SPP MODE
- * Returns: 
+ * Returns:
  *         0           :  No parallel port at this address
- *  PARPORT_MODE_PCSPP :  SPP port detected 
+ *  PARPORT_MODE_PCSPP :  SPP port detected
  *                        (if the user specified an ioport himself,
  *                         this shall always be the case!)
  *
@@ -1610,7 +1673,7 @@
 	unsigned char r, w;
 
 	/*
-	 * first clear an eventually pending EPP timeout 
+	 * first clear an eventually pending EPP timeout
 	 * I (sailer@ife.ee.ethz.ch) have an SMSC chipset
 	 * that does not even respond to SPP cycles if an EPP
 	 * timeout is pending
@@ -1619,19 +1682,19 @@
 
 	/* Do a simple read-write test to make sure the port exists. */
 	w = 0xc;
-	outb (w, CONTROL (pb));
+	outb(w, CONTROL(pb));
 
 	/* Is there a control register that we can read from?  Some
 	 * ports don't allow reads, so read_control just returns a
 	 * software copy. Some ports _do_ allow reads, so bypass the
 	 * software copy here.  In addition, some bits aren't
 	 * writable. */
-	r = inb (CONTROL (pb));
+	r = inb(CONTROL(pb));
 	if ((r & 0xf) == w) {
 		w = 0xe;
-		outb (w, CONTROL (pb));
-		r = inb (CONTROL (pb));
-		outb (0xc, CONTROL (pb));
+		outb(w, CONTROL(pb));
+		r = inb(CONTROL(pb));
+		outb(0xc, CONTROL(pb));
 		if ((r & 0xf) == w)
 			return PARPORT_MODE_PCSPP;
 	}
@@ -1639,18 +1702,18 @@
 	if (user_specified)
 		/* That didn't work, but the user thinks there's a
 		 * port here. */
-		printk (KERN_INFO "parport 0x%lx (WARNING): CTR: "
+		printk(KERN_INFO "parport 0x%lx (WARNING): CTR: "
 			"wrote 0x%02x, read 0x%02x\n", pb->base, w, r);
 
 	/* Try the data register.  The data lines aren't tri-stated at
 	 * this stage, so we expect back what we wrote. */
 	w = 0xaa;
-	parport_pc_write_data (pb, w);
-	r = parport_pc_read_data (pb);
+	parport_pc_write_data(pb, w);
+	r = parport_pc_read_data(pb);
 	if (r == w) {
 		w = 0x55;
-		parport_pc_write_data (pb, w);
-		r = parport_pc_read_data (pb);
+		parport_pc_write_data(pb, w);
+		r = parport_pc_read_data(pb);
 		if (r == w)
 			return PARPORT_MODE_PCSPP;
 	}
@@ -1658,9 +1721,9 @@
 	if (user_specified) {
 		/* Didn't work, but the user is convinced this is the
 		 * place. */
-		printk (KERN_INFO "parport 0x%lx (WARNING): DATA: "
+		printk(KERN_INFO "parport 0x%lx (WARNING): DATA: "
 			"wrote 0x%02x, read 0x%02x\n", pb->base, w, r);
-		printk (KERN_INFO "parport 0x%lx: You gave this address, "
+		printk(KERN_INFO "parport 0x%lx: You gave this address, "
 			"but there is probably no parallel port there!\n",
 			pb->base);
 	}
@@ -1691,33 +1754,33 @@
 	struct parport_pc_private *priv = pb->private_data;
 	unsigned char r = 0xc;
 
-	outb (r, CONTROL (pb));
-	if ((inb (ECONTROL (pb)) & 0x3) == (r & 0x3)) {
-		outb (r ^ 0x2, CONTROL (pb)); /* Toggle bit 1 */
+	outb(r, CONTROL(pb));
+	if ((inb(ECONTROL(pb)) & 0x3) == (r & 0x3)) {
+		outb(r ^ 0x2, CONTROL(pb)); /* Toggle bit 1 */
 
-		r = inb (CONTROL (pb));
-		if ((inb (ECONTROL (pb)) & 0x2) == (r & 0x2))
+		r = inb(CONTROL(pb));
+		if ((inb(ECONTROL(pb)) & 0x2) == (r & 0x2))
 			goto no_reg; /* Sure that no ECR register exists */
 	}
-	
-	if ((inb (ECONTROL (pb)) & 0x3 ) != 0x1)
+
+	if ((inb(ECONTROL(pb)) & 0x3) != 0x1)
 		goto no_reg;
 
-	ECR_WRITE (pb, 0x34);
-	if (inb (ECONTROL (pb)) != 0x35)
+	ECR_WRITE(pb, 0x34);
+	if (inb(ECONTROL(pb)) != 0x35)
 		goto no_reg;
 
 	priv->ecr = 1;
-	outb (0xc, CONTROL (pb));
-	
+	outb(0xc, CONTROL(pb));
+
 	/* Go to mode 000 */
-	frob_set_mode (pb, ECR_SPP);
+	frob_set_mode(pb, ECR_SPP);
 
 	return 1;
 
  no_reg:
-	outb (0xc, CONTROL (pb));
-	return 0; 
+	outb(0xc, CONTROL(pb));
+	return 0;
 }
 
 #ifdef CONFIG_PARPORT_1284
@@ -1727,7 +1790,7 @@
  * allows us to read data from the data lines.  In theory we would get back
  * 0xff but any peripheral attached to the port may drag some or all of the
  * lines down to zero.  So if we get back anything that isn't the contents
- * of the data register we deem PS/2 support to be present. 
+ * of the data register we deem PS/2 support to be present.
  *
  * Some SPP ports have "half PS/2" ability - you can't turn off the line
  * drivers, but an external peripheral with sufficiently beefy drivers of
@@ -1735,26 +1798,28 @@
  * where they can then be read back as normal.  Ports with this property
  * and the right type of device attached are likely to fail the SPP test,
  * (as they will appear to have stuck bits) and so the fact that they might
- * be misdetected here is rather academic. 
+ * be misdetected here is rather academic.
  */
 
 static int parport_PS2_supported(struct parport *pb)
 {
 	int ok = 0;
-  
+
 	clear_epp_timeout(pb);
 
 	/* try to tri-state the buffer */
-	parport_pc_data_reverse (pb);
-	
+	parport_pc_data_reverse(pb);
+
 	parport_pc_write_data(pb, 0x55);
-	if (parport_pc_read_data(pb) != 0x55) ok++;
+	if (parport_pc_read_data(pb) != 0x55)
+		ok++;
 
 	parport_pc_write_data(pb, 0xaa);
-	if (parport_pc_read_data(pb) != 0xaa) ok++;
+	if (parport_pc_read_data(pb) != 0xaa)
+		ok++;
 
 	/* cancel input mode */
-	parport_pc_data_forward (pb);
+	parport_pc_data_forward(pb);
 
 	if (ok) {
 		pb->modes |= PARPORT_MODE_TRISTATE;
@@ -1773,68 +1838,68 @@
 	int config, configb;
 	int pword;
 	struct parport_pc_private *priv = pb->private_data;
-	/* Translate ECP intrLine to ISA irq value */	
-	static const int intrline[]= { 0, 7, 9, 10, 11, 14, 15, 5 }; 
+	/* Translate ECP intrLine to ISA irq value */
+	static const int intrline[] = { 0, 7, 9, 10, 11, 14, 15, 5 };
 
 	/* If there is no ECR, we have no hope of supporting ECP. */
 	if (!priv->ecr)
 		return 0;
 
 	/* Find out FIFO depth */
-	ECR_WRITE (pb, ECR_SPP << 5); /* Reset FIFO */
-	ECR_WRITE (pb, ECR_TST << 5); /* TEST FIFO */
-	for (i=0; i < 1024 && !(inb (ECONTROL (pb)) & 0x02); i++)
-		outb (0xaa, FIFO (pb));
+	ECR_WRITE(pb, ECR_SPP << 5); /* Reset FIFO */
+	ECR_WRITE(pb, ECR_TST << 5); /* TEST FIFO */
+	for (i = 0; i < 1024 && !(inb(ECONTROL(pb)) & 0x02); i++)
+		outb(0xaa, FIFO(pb));
 
 	/*
 	 * Using LGS chipset it uses ECR register, but
 	 * it doesn't support ECP or FIFO MODE
 	 */
 	if (i == 1024) {
-		ECR_WRITE (pb, ECR_SPP << 5);
+		ECR_WRITE(pb, ECR_SPP << 5);
 		return 0;
 	}
 
 	priv->fifo_depth = i;
 	if (verbose_probing)
-		printk (KERN_DEBUG "0x%lx: FIFO is %d bytes\n", pb->base, i);
+		printk(KERN_DEBUG "0x%lx: FIFO is %d bytes\n", pb->base, i);
 
 	/* Find out writeIntrThreshold */
-	frob_econtrol (pb, 1<<2, 1<<2);
-	frob_econtrol (pb, 1<<2, 0);
+	frob_econtrol(pb, 1<<2, 1<<2);
+	frob_econtrol(pb, 1<<2, 0);
 	for (i = 1; i <= priv->fifo_depth; i++) {
-		inb (FIFO (pb));
-		udelay (50);
-		if (inb (ECONTROL (pb)) & (1<<2))
+		inb(FIFO(pb));
+		udelay(50);
+		if (inb(ECONTROL(pb)) & (1<<2))
 			break;
 	}
 
 	if (i <= priv->fifo_depth) {
 		if (verbose_probing)
-			printk (KERN_DEBUG "0x%lx: writeIntrThreshold is %d\n",
+			printk(KERN_DEBUG "0x%lx: writeIntrThreshold is %d\n",
 				pb->base, i);
 	} else
 		/* Number of bytes we know we can write if we get an
-                   interrupt. */
+		   interrupt. */
 		i = 0;
 
 	priv->writeIntrThreshold = i;
 
 	/* Find out readIntrThreshold */
-	frob_set_mode (pb, ECR_PS2); /* Reset FIFO and enable PS2 */
-	parport_pc_data_reverse (pb); /* Must be in PS2 mode */
-	frob_set_mode (pb, ECR_TST); /* Test FIFO */
-	frob_econtrol (pb, 1<<2, 1<<2);
-	frob_econtrol (pb, 1<<2, 0);
+	frob_set_mode(pb, ECR_PS2); /* Reset FIFO and enable PS2 */
+	parport_pc_data_reverse(pb); /* Must be in PS2 mode */
+	frob_set_mode(pb, ECR_TST); /* Test FIFO */
+	frob_econtrol(pb, 1<<2, 1<<2);
+	frob_econtrol(pb, 1<<2, 0);
 	for (i = 1; i <= priv->fifo_depth; i++) {
-		outb (0xaa, FIFO (pb));
-		if (inb (ECONTROL (pb)) & (1<<2))
+		outb(0xaa, FIFO(pb));
+		if (inb(ECONTROL(pb)) & (1<<2))
 			break;
 	}
 
 	if (i <= priv->fifo_depth) {
 		if (verbose_probing)
-			printk (KERN_INFO "0x%lx: readIntrThreshold is %d\n",
+			printk(KERN_INFO "0x%lx: readIntrThreshold is %d\n",
 				pb->base, i);
 	} else
 		/* Number of bytes we can read if we get an interrupt. */
@@ -1842,23 +1907,23 @@
 
 	priv->readIntrThreshold = i;
 
-	ECR_WRITE (pb, ECR_SPP << 5); /* Reset FIFO */
-	ECR_WRITE (pb, 0xf4); /* Configuration mode */
-	config = inb (CONFIGA (pb));
+	ECR_WRITE(pb, ECR_SPP << 5); /* Reset FIFO */
+	ECR_WRITE(pb, 0xf4); /* Configuration mode */
+	config = inb(CONFIGA(pb));
 	pword = (config >> 4) & 0x7;
 	switch (pword) {
 	case 0:
 		pword = 2;
-		printk (KERN_WARNING "0x%lx: Unsupported pword size!\n",
+		printk(KERN_WARNING "0x%lx: Unsupported pword size!\n",
 			pb->base);
 		break;
 	case 2:
 		pword = 4;
-		printk (KERN_WARNING "0x%lx: Unsupported pword size!\n",
+		printk(KERN_WARNING "0x%lx: Unsupported pword size!\n",
 			pb->base);
 		break;
 	default:
-		printk (KERN_WARNING "0x%lx: Unknown implementation ID\n",
+		printk(KERN_WARNING "0x%lx: Unknown implementation ID\n",
 			pb->base);
 		/* Assume 1 */
 	case 1:
@@ -1867,28 +1932,29 @@
 	priv->pword = pword;
 
 	if (verbose_probing) {
-		printk (KERN_DEBUG "0x%lx: PWord is %d bits\n", pb->base, 8 * pword);
-		
-		printk (KERN_DEBUG "0x%lx: Interrupts are ISA-%s\n", pb->base,
+		printk(KERN_DEBUG "0x%lx: PWord is %d bits\n",
+			pb->base, 8 * pword);
+
+		printk(KERN_DEBUG "0x%lx: Interrupts are ISA-%s\n", pb->base,
 			config & 0x80 ? "Level" : "Pulses");
 
-		configb = inb (CONFIGB (pb));
-		printk (KERN_DEBUG "0x%lx: ECP port cfgA=0x%02x cfgB=0x%02x\n",
+		configb = inb(CONFIGB(pb));
+		printk(KERN_DEBUG "0x%lx: ECP port cfgA=0x%02x cfgB=0x%02x\n",
 			pb->base, config, configb);
-		printk (KERN_DEBUG "0x%lx: ECP settings irq=", pb->base);
-		if ((configb >>3) & 0x07)
-			printk("%d",intrline[(configb >>3) & 0x07]);
+		printk(KERN_DEBUG "0x%lx: ECP settings irq=", pb->base);
+		if ((configb >> 3) & 0x07)
+			printk("%d", intrline[(configb >> 3) & 0x07]);
 		else
 			printk("<none or set by other means>");
-		printk (" dma=");
-		if( (configb & 0x03 ) == 0x00)
+		printk(" dma=");
+		if ((configb & 0x03) == 0x00)
 			printk("<none or set by other means>\n");
 		else
-			printk("%d\n",configb & 0x07);
+			printk("%d\n", configb & 0x07);
 	}
 
 	/* Go back to mode 000 */
-	frob_set_mode (pb, ECR_SPP);
+	frob_set_mode(pb, ECR_SPP);
 
 	return 1;
 }
@@ -1903,10 +1969,10 @@
 	if (!priv->ecr)
 		return 0;
 
-	oecr = inb (ECONTROL (pb));
-	ECR_WRITE (pb, ECR_PS2 << 5);
+	oecr = inb(ECONTROL(pb));
+	ECR_WRITE(pb, ECR_PS2 << 5);
 	result = parport_PS2_supported(pb);
-	ECR_WRITE (pb, oecr);
+	ECR_WRITE(pb, oecr);
 	return result;
 }
 
@@ -1930,16 +1996,15 @@
 	 */
 
 	/* If EPP timeout bit clear then EPP available */
-	if (!clear_epp_timeout(pb)) {
+	if (!clear_epp_timeout(pb))
 		return 0;  /* No way to clear timeout */
-	}
 
 	/* Check for Intel bug. */
 	if (priv->ecr) {
 		unsigned char i;
 		for (i = 0x00; i < 0x80; i += 0x20) {
-			ECR_WRITE (pb, i);
-			if (clear_epp_timeout (pb)) {
+			ECR_WRITE(pb, i);
+			if (clear_epp_timeout(pb)) {
 				/* Phony EPP in ECP. */
 				return 0;
 			}
@@ -1963,17 +2028,16 @@
 	int result;
 	unsigned char oecr;
 
-	if (!priv->ecr) {
+	if (!priv->ecr)
 		return 0;
-	}
 
-	oecr = inb (ECONTROL (pb));
+	oecr = inb(ECONTROL(pb));
 	/* Search for SMC style EPP+ECP mode */
-	ECR_WRITE (pb, 0x80);
-	outb (0x04, CONTROL (pb));
+	ECR_WRITE(pb, 0x80);
+	outb(0x04, CONTROL(pb));
 	result = parport_EPP_supported(pb);
 
-	ECR_WRITE (pb, oecr);
+	ECR_WRITE(pb, oecr);
 
 	if (result) {
 		/* Set up access functions to use ECP+EPP hardware. */
@@ -1991,11 +2055,25 @@
 /* Don't bother probing for modes we know we won't use. */
 static int __devinit parport_PS2_supported(struct parport *pb) { return 0; }
 #ifdef CONFIG_PARPORT_PC_FIFO
-static int parport_ECP_supported(struct parport *pb) { return 0; }
+static int parport_ECP_supported(struct parport *pb)
+{
+	return 0;
+}
 #endif
-static int __devinit parport_EPP_supported(struct parport *pb) { return 0; }
-static int __devinit parport_ECPEPP_supported(struct parport *pb){return 0;}
-static int __devinit parport_ECPPS2_supported(struct parport *pb){return 0;}
+static int __devinit parport_EPP_supported(struct parport *pb)
+{
+	return 0;
+}
+
+static int __devinit parport_ECPEPP_supported(struct parport *pb)
+{
+	return 0;
+}
+
+static int __devinit parport_ECPPS2_supported(struct parport *pb)
+{
+	return 0;
+}
 
 #endif /* No IEEE 1284 support */
 
@@ -2005,17 +2083,17 @@
 static int programmable_irq_support(struct parport *pb)
 {
 	int irq, intrLine;
-	unsigned char oecr = inb (ECONTROL (pb));
+	unsigned char oecr = inb(ECONTROL(pb));
 	static const int lookup[8] = {
 		PARPORT_IRQ_NONE, 7, 9, 10, 11, 14, 15, 5
 	};
 
-	ECR_WRITE (pb, ECR_CNF << 5); /* Configuration MODE */
+	ECR_WRITE(pb, ECR_CNF << 5); /* Configuration MODE */
 
-	intrLine = (inb (CONFIGB (pb)) >> 3) & 0x07;
+	intrLine = (inb(CONFIGB(pb)) >> 3) & 0x07;
 	irq = lookup[intrLine];
 
-	ECR_WRITE (pb, oecr);
+	ECR_WRITE(pb, oecr);
 	return irq;
 }
 
@@ -2025,17 +2103,17 @@
 	unsigned long irqs;
 
 	irqs = probe_irq_on();
-		
-	ECR_WRITE (pb, ECR_SPP << 5); /* Reset FIFO */
-	ECR_WRITE (pb, (ECR_TST << 5) | 0x04);
-	ECR_WRITE (pb, ECR_TST << 5);
+
+	ECR_WRITE(pb, ECR_SPP << 5); /* Reset FIFO */
+	ECR_WRITE(pb, (ECR_TST << 5) | 0x04);
+	ECR_WRITE(pb, ECR_TST << 5);
 
 	/* If Full FIFO sure that writeIntrThreshold is generated */
-	for (i=0; i < 1024 && !(inb (ECONTROL (pb)) & 0x02) ; i++) 
-		outb (0xaa, FIFO (pb));
-		
+	for (i = 0; i < 1024 && !(inb(ECONTROL(pb)) & 0x02) ; i++)
+		outb(0xaa, FIFO(pb));
+
 	pb->irq = probe_irq_off(irqs);
-	ECR_WRITE (pb, ECR_SPP << 5);
+	ECR_WRITE(pb, ECR_SPP << 5);
 
 	if (pb->irq <= 0)
 		pb->irq = PARPORT_IRQ_NONE;
@@ -2045,7 +2123,7 @@
 
 /*
  * This detection seems that only works in National Semiconductors
- * This doesn't work in SMC, LGS, and Winbond 
+ * This doesn't work in SMC, LGS, and Winbond
  */
 static int irq_probe_EPP(struct parport *pb)
 {
@@ -2056,16 +2134,16 @@
 	unsigned char oecr;
 
 	if (pb->modes & PARPORT_MODE_PCECR)
-		oecr = inb (ECONTROL (pb));
+		oecr = inb(ECONTROL(pb));
 
 	irqs = probe_irq_on();
 
 	if (pb->modes & PARPORT_MODE_PCECR)
-		frob_econtrol (pb, 0x10, 0x10);
-	
+		frob_econtrol(pb, 0x10, 0x10);
+
 	clear_epp_timeout(pb);
-	parport_pc_frob_control (pb, 0x20, 0x20);
-	parport_pc_frob_control (pb, 0x10, 0x10);
+	parport_pc_frob_control(pb, 0x20, 0x20);
+	parport_pc_frob_control(pb, 0x10, 0x10);
 	clear_epp_timeout(pb);
 
 	/* Device isn't expecting an EPP read
@@ -2074,9 +2152,9 @@
 	parport_pc_read_epp(pb);
 	udelay(20);
 
-	pb->irq = probe_irq_off (irqs);
+	pb->irq = probe_irq_off(irqs);
 	if (pb->modes & PARPORT_MODE_PCECR)
-		ECR_WRITE (pb, oecr);
+		ECR_WRITE(pb, oecr);
 	parport_pc_write_control(pb, 0xc);
 
 	if (pb->irq <= 0)
@@ -2133,28 +2211,28 @@
 /* --- DMA detection -------------------------------------- */
 
 /* Only if chipset conforms to ECP ISA Interface Standard */
-static int programmable_dma_support (struct parport *p)
+static int programmable_dma_support(struct parport *p)
 {
-	unsigned char oecr = inb (ECONTROL (p));
+	unsigned char oecr = inb(ECONTROL(p));
 	int dma;
 
-	frob_set_mode (p, ECR_CNF);
-	
-	dma = inb (CONFIGB(p)) & 0x07;
+	frob_set_mode(p, ECR_CNF);
+
+	dma = inb(CONFIGB(p)) & 0x07;
 	/* 000: Indicates jumpered 8-bit DMA if read-only.
 	   100: Indicates jumpered 16-bit DMA if read-only. */
 	if ((dma & 0x03) == 0)
 		dma = PARPORT_DMA_NONE;
 
-	ECR_WRITE (p, oecr);
+	ECR_WRITE(p, oecr);
 	return dma;
 }
 
-static int parport_dma_probe (struct parport *p)
+static int parport_dma_probe(struct parport *p)
 {
 	const struct parport_pc_private *priv = p->private_data;
-	if (priv->ecr)
-		p->dma = programmable_dma_support(p); /* ask ECP chipset first */
+	if (priv->ecr)		/* ask ECP chipset first */
+		p->dma = programmable_dma_support(p);
 	if (p->dma == PARPORT_DMA_NONE) {
 		/* ask known Super-IO chips proper, although these
 		   claim ECP compatible, some don't report their DMA
@@ -2212,7 +2290,7 @@
 	if (!base_res)
 		goto out4;
 
-	memcpy(ops, &parport_pc_ops, sizeof (struct parport_operations));
+	memcpy(ops, &parport_pc_ops, sizeof(struct parport_operations));
 	priv->ctr = 0xc;
 	priv->ctr_writable = ~0x10;
 	priv->ecr = 0;
@@ -2239,7 +2317,7 @@
 			if (!parport_EPP_supported(p))
 				parport_ECPEPP_supported(p);
 	}
-	if (!parport_SPP_supported (p))
+	if (!parport_SPP_supported(p))
 		/* No port. */
 		goto out5;
 	if (priv->ecr)
@@ -2247,7 +2325,7 @@
 	else
 		parport_PS2_supported(p);
 
-	p->size = (p->modes & PARPORT_MODE_EPP)?8:3;
+	p->size = (p->modes & PARPORT_MODE_EPP) ? 8 : 3;
 
 	printk(KERN_INFO "%s: PC-style at 0x%lx", p->name, p->base);
 	if (p->base_hi && priv->ecr)
@@ -2271,7 +2349,7 @@
 		}
 	}
 	if (p->dma == PARPORT_DMA_AUTO) /* To use DMA, giving the irq
-                                           is mandatory (see above) */
+					   is mandatory (see above) */
 		p->dma = PARPORT_DMA_NONE;
 
 #ifdef CONFIG_PARPORT_PC_FIFO
@@ -2288,16 +2366,23 @@
 		if (p->dma != PARPORT_DMA_NONE) {
 			printk(", dma %d", p->dma);
 			p->modes |= PARPORT_MODE_DMA;
-		}
-		else printk(", using FIFO");
-	}
-	else
+		} else
+			printk(", using FIFO");
+	} else
 		/* We can't use the DMA channel after all. */
 		p->dma = PARPORT_DMA_NONE;
 #endif /* Allowed to use FIFO/DMA */
 
 	printk(" [");
-#define printmode(x) {if(p->modes&PARPORT_MODE_##x){printk("%s%s",f?",":"",#x);f++;}}
+
+#define printmode(x) \
+	{\
+		if (p->modes & PARPORT_MODE_##x) {\
+			printk("%s%s", f ? "," : "", #x);\
+			f++;\
+		} \
+	}
+
 	{
 		int f = 0;
 		printmode(PCSPP);
@@ -2309,10 +2394,10 @@
 	}
 #undef printmode
 #ifndef CONFIG_PARPORT_1284
-	printk ("(,...)");
+	printk("(,...)");
 #endif /* CONFIG_PARPORT_1284 */
 	printk("]\n");
-	if (probedirq != PARPORT_IRQ_NONE) 
+	if (probedirq != PARPORT_IRQ_NONE)
 		printk(KERN_INFO "%s: irq %d detected\n", p->name, probedirq);
 
 	/* If No ECP release the ports grabbed above. */
@@ -2328,7 +2413,7 @@
 	if (p->irq != PARPORT_IRQ_NONE) {
 		if (request_irq(p->irq, parport_irq_handler,
 				 irqflags, p->name, p)) {
-			printk (KERN_WARNING "%s: irq %d in use, "
+			printk(KERN_WARNING "%s: irq %d in use, "
 				"resorting to polled operation\n",
 				p->name, p->irq);
 			p->irq = PARPORT_IRQ_NONE;
@@ -2338,8 +2423,8 @@
 #ifdef CONFIG_PARPORT_PC_FIFO
 #ifdef HAS_DMA
 		if (p->dma != PARPORT_DMA_NONE) {
-			if (request_dma (p->dma, p->name)) {
-				printk (KERN_WARNING "%s: dma %d in use, "
+			if (request_dma(p->dma, p->name)) {
+				printk(KERN_WARNING "%s: dma %d in use, "
 					"resorting to PIO operation\n",
 					p->name, p->dma);
 				p->dma = PARPORT_DMA_NONE;
@@ -2349,8 +2434,8 @@
 						       PAGE_SIZE,
 						       &priv->dma_handle,
 						       GFP_KERNEL);
-				if (! priv->dma_buf) {
-					printk (KERN_WARNING "%s: "
+				if (!priv->dma_buf) {
+					printk(KERN_WARNING "%s: "
 						"cannot get buffer for DMA, "
 						"resorting to PIO operation\n",
 						p->name);
@@ -2369,10 +2454,10 @@
 		 * Put the ECP detected port in PS2 mode.
 		 * Do this also for ports that have ECR but don't do ECP.
 		 */
-		ECR_WRITE (p, 0x34);
+		ECR_WRITE(p, 0x34);
 
 	parport_pc_write_data(p, 0);
-	parport_pc_data_forward (p);
+	parport_pc_data_forward(p);
 
 	/* Now that we've told the sharing engine about the port, and
 	   found out its characteristics, let the high-level drivers
@@ -2380,7 +2465,7 @@
 	spin_lock(&ports_lock);
 	list_add(&priv->list, &ports_list);
 	spin_unlock(&ports_lock);
-	parport_announce_port (p);
+	parport_announce_port(p);
 
 	return p;
 
@@ -2393,18 +2478,17 @@
 out4:
 	parport_put_port(p);
 out3:
-	kfree (priv);
+	kfree(priv);
 out2:
-	kfree (ops);
+	kfree(ops);
 out1:
 	if (pdev)
 		platform_device_unregister(pdev);
 	return NULL;
 }
+EXPORT_SYMBOL(parport_pc_probe_port);
 
-EXPORT_SYMBOL (parport_pc_probe_port);
-
-void parport_pc_unregister_port (struct parport *p)
+void parport_pc_unregister_port(struct parport *p)
 {
 	struct parport_pc_private *priv = p->private_data;
 	struct parport_operations *ops = p->ops;
@@ -2430,17 +2514,16 @@
 				    priv->dma_buf,
 				    priv->dma_handle);
 #endif
-	kfree (p->private_data);
+	kfree(p->private_data);
 	parport_put_port(p);
-	kfree (ops); /* hope no-one cached it */
+	kfree(ops); /* hope no-one cached it */
 }
-
-EXPORT_SYMBOL (parport_pc_unregister_port);
+EXPORT_SYMBOL(parport_pc_unregister_port);
 
 #ifdef CONFIG_PCI
 
 /* ITE support maintained by Rich Liu <richliu@poorman.org> */
-static int __devinit sio_ite_8872_probe (struct pci_dev *pdev, int autoirq,
+static int __devinit sio_ite_8872_probe(struct pci_dev *pdev, int autoirq,
 					 int autodma,
 					 const struct parport_pc_via_data *via)
 {
@@ -2452,73 +2535,74 @@
 	int irq;
 	int i;
 
-	DPRINTK (KERN_DEBUG "sio_ite_8872_probe()\n");
-	
-	// make sure which one chip
-	for(i = 0; i < 5; i++) {
+	DPRINTK(KERN_DEBUG "sio_ite_8872_probe()\n");
+
+	/* make sure which one chip */
+	for (i = 0; i < 5; i++) {
 		base_res = request_region(inta_addr[i], 32, "it887x");
 		if (base_res) {
 			int test;
-			pci_write_config_dword (pdev, 0x60,
+			pci_write_config_dword(pdev, 0x60,
 						0xe5000000 | inta_addr[i]);
-			pci_write_config_dword (pdev, 0x78,
+			pci_write_config_dword(pdev, 0x78,
 						0x00000000 | inta_addr[i]);
-			test = inb (inta_addr[i]);
-			if (test != 0xff) break;
+			test = inb(inta_addr[i]);
+			if (test != 0xff)
+				break;
 			release_region(inta_addr[i], 0x8);
 		}
 	}
-	if(i >= 5) {
-		printk (KERN_INFO "parport_pc: cannot find ITE8872 INTA\n");
+	if (i >= 5) {
+		printk(KERN_INFO "parport_pc: cannot find ITE8872 INTA\n");
 		return 0;
 	}
 
-	type = inb (inta_addr[i] + 0x18);
+	type = inb(inta_addr[i] + 0x18);
 	type &= 0x0f;
 
 	switch (type) {
 	case 0x2:
-		printk (KERN_INFO "parport_pc: ITE8871 found (1P)\n");
+		printk(KERN_INFO "parport_pc: ITE8871 found (1P)\n");
 		ite8872set = 0x64200000;
 		break;
 	case 0xa:
-		printk (KERN_INFO "parport_pc: ITE8875 found (1P)\n");
+		printk(KERN_INFO "parport_pc: ITE8875 found (1P)\n");
 		ite8872set = 0x64200000;
 		break;
 	case 0xe:
-		printk (KERN_INFO "parport_pc: ITE8872 found (2S1P)\n");
+		printk(KERN_INFO "parport_pc: ITE8872 found (2S1P)\n");
 		ite8872set = 0x64e00000;
 		break;
 	case 0x6:
-		printk (KERN_INFO "parport_pc: ITE8873 found (1S)\n");
+		printk(KERN_INFO "parport_pc: ITE8873 found (1S)\n");
 		return 0;
 	case 0x8:
-		DPRINTK (KERN_DEBUG "parport_pc: ITE8874 found (2S)\n");
+		DPRINTK(KERN_DEBUG "parport_pc: ITE8874 found (2S)\n");
 		return 0;
 	default:
-		printk (KERN_INFO "parport_pc: unknown ITE887x\n");
-		printk (KERN_INFO "parport_pc: please mail 'lspci -nvv' "
+		printk(KERN_INFO "parport_pc: unknown ITE887x\n");
+		printk(KERN_INFO "parport_pc: please mail 'lspci -nvv' "
 			"output to Rich.Liu@ite.com.tw\n");
 		return 0;
 	}
 
-	pci_read_config_byte (pdev, 0x3c, &ite8872_irq);
-	pci_read_config_dword (pdev, 0x1c, &ite8872_lpt);
+	pci_read_config_byte(pdev, 0x3c, &ite8872_irq);
+	pci_read_config_dword(pdev, 0x1c, &ite8872_lpt);
 	ite8872_lpt &= 0x0000ff00;
-	pci_read_config_dword (pdev, 0x20, &ite8872_lpthi);
+	pci_read_config_dword(pdev, 0x20, &ite8872_lpthi);
 	ite8872_lpthi &= 0x0000ff00;
-	pci_write_config_dword (pdev, 0x6c, 0xe3000000 | ite8872_lpt);
-	pci_write_config_dword (pdev, 0x70, 0xe3000000 | ite8872_lpthi);
-	pci_write_config_dword (pdev, 0x80, (ite8872_lpthi<<16) | ite8872_lpt);
-	// SET SPP&EPP , Parallel Port NO DMA , Enable All Function
-	// SET Parallel IRQ
-	pci_write_config_dword (pdev, 0x9c,
+	pci_write_config_dword(pdev, 0x6c, 0xe3000000 | ite8872_lpt);
+	pci_write_config_dword(pdev, 0x70, 0xe3000000 | ite8872_lpthi);
+	pci_write_config_dword(pdev, 0x80, (ite8872_lpthi<<16) | ite8872_lpt);
+	/* SET SPP&EPP , Parallel Port NO DMA , Enable All Function */
+	/* SET Parallel IRQ */
+	pci_write_config_dword(pdev, 0x9c,
 				ite8872set | (ite8872_irq * 0x11111));
 
-	DPRINTK (KERN_DEBUG "ITE887x: The IRQ is %d.\n", ite8872_irq);
-	DPRINTK (KERN_DEBUG "ITE887x: The PARALLEL I/O port is 0x%x.\n",
+	DPRINTK(KERN_DEBUG "ITE887x: The IRQ is %d.\n", ite8872_irq);
+	DPRINTK(KERN_DEBUG "ITE887x: The PARALLEL I/O port is 0x%x.\n",
 		 ite8872_lpt);
-	DPRINTK (KERN_DEBUG "ITE887x: The PARALLEL I/O porthi is 0x%x.\n",
+	DPRINTK(KERN_DEBUG "ITE887x: The PARALLEL I/O porthi is 0x%x.\n",
 		 ite8872_lpthi);
 
 	/* Let the user (or defaults) steer us away from interrupts */
@@ -2530,14 +2614,14 @@
 	 * Release the resource so that parport_pc_probe_port can get it.
 	 */
 	release_resource(base_res);
-	if (parport_pc_probe_port (ite8872_lpt, ite8872_lpthi,
+	if (parport_pc_probe_port(ite8872_lpt, ite8872_lpthi,
 				   irq, PARPORT_DMA_NONE, &pdev->dev, 0)) {
-		printk (KERN_INFO
+		printk(KERN_INFO
 			"parport_pc: ITE 8872 parallel port: io=0x%X",
-			ite8872_lpt);
+								ite8872_lpt);
 		if (irq != PARPORT_IRQ_NONE)
-			printk (", irq=%d", irq);
-		printk ("\n");
+			printk(", irq=%d", irq);
+		printk("\n");
 		return 1;
 	}
 
@@ -2546,7 +2630,7 @@
 
 /* VIA 8231 support by Pavel Fedin <sonic_amiga@rambler.ru>
    based on VIA 686a support code by Jeff Garzik <jgarzik@pobox.com> */
-static int __devinitdata parport_init_mode = 0;
+static int __devinitdata parport_init_mode;
 
 /* Data for two known VIA chips */
 static struct parport_pc_via_data via_686a_data __devinitdata = {
@@ -2568,7 +2652,7 @@
 	0xF6
 };
 
-static int __devinit sio_via_probe (struct pci_dev *pdev, int autoirq,
+static int __devinit sio_via_probe(struct pci_dev *pdev, int autoirq,
 				    int autodma,
 				    const struct parport_pc_via_data *via)
 {
@@ -2580,38 +2664,38 @@
 
 	printk(KERN_DEBUG "parport_pc: VIA 686A/8231 detected\n");
 
-	switch(parport_init_mode)
-	{
+	switch (parport_init_mode) {
 	case 1:
-	    printk(KERN_DEBUG "parport_pc: setting SPP mode\n");
-	    siofunc = VIA_FUNCTION_PARPORT_SPP;
-	    break;
+		printk(KERN_DEBUG "parport_pc: setting SPP mode\n");
+		siofunc = VIA_FUNCTION_PARPORT_SPP;
+		break;
 	case 2:
-	    printk(KERN_DEBUG "parport_pc: setting PS/2 mode\n");
-	    siofunc = VIA_FUNCTION_PARPORT_SPP;
-	    ppcontrol = VIA_PARPORT_BIDIR;
-	    break;
+		printk(KERN_DEBUG "parport_pc: setting PS/2 mode\n");
+		siofunc = VIA_FUNCTION_PARPORT_SPP;
+		ppcontrol = VIA_PARPORT_BIDIR;
+		break;
 	case 3:
-	    printk(KERN_DEBUG "parport_pc: setting EPP mode\n");
-	    siofunc = VIA_FUNCTION_PARPORT_EPP;
-	    ppcontrol = VIA_PARPORT_BIDIR;
-	    have_epp = 1;
-	    break;
+		printk(KERN_DEBUG "parport_pc: setting EPP mode\n");
+		siofunc = VIA_FUNCTION_PARPORT_EPP;
+		ppcontrol = VIA_PARPORT_BIDIR;
+		have_epp = 1;
+		break;
 	case 4:
-	    printk(KERN_DEBUG "parport_pc: setting ECP mode\n");
-	    siofunc = VIA_FUNCTION_PARPORT_ECP;
-	    ppcontrol = VIA_PARPORT_BIDIR;
-	    break;
+		printk(KERN_DEBUG "parport_pc: setting ECP mode\n");
+		siofunc = VIA_FUNCTION_PARPORT_ECP;
+		ppcontrol = VIA_PARPORT_BIDIR;
+		break;
 	case 5:
-	    printk(KERN_DEBUG "parport_pc: setting EPP+ECP mode\n");
-	    siofunc = VIA_FUNCTION_PARPORT_ECP;
-	    ppcontrol = VIA_PARPORT_BIDIR|VIA_PARPORT_ECPEPP;
-	    have_epp = 1;
-	    break;
-	 default:
-	    printk(KERN_DEBUG "parport_pc: probing current configuration\n");
-	    siofunc = VIA_FUNCTION_PROBE;
-	    break;
+		printk(KERN_DEBUG "parport_pc: setting EPP+ECP mode\n");
+		siofunc = VIA_FUNCTION_PARPORT_ECP;
+		ppcontrol = VIA_PARPORT_BIDIR|VIA_PARPORT_ECPEPP;
+		have_epp = 1;
+		break;
+	default:
+		printk(KERN_DEBUG
+			"parport_pc: probing current configuration\n");
+		siofunc = VIA_FUNCTION_PROBE;
+		break;
 	}
 	/*
 	 * unlock super i/o configuration
@@ -2622,38 +2706,36 @@
 
 	/* Bits 1-0: Parallel Port Mode / Enable */
 	outb(via->viacfg_function, VIA_CONFIG_INDEX);
-	tmp = inb (VIA_CONFIG_DATA);
+	tmp = inb(VIA_CONFIG_DATA);
 	/* Bit 5: EPP+ECP enable; bit 7: PS/2 bidirectional port enable */
 	outb(via->viacfg_parport_control, VIA_CONFIG_INDEX);
-	tmp2 = inb (VIA_CONFIG_DATA);
-	if (siofunc == VIA_FUNCTION_PROBE)
-	{
-	    siofunc = tmp & VIA_FUNCTION_PARPORT_DISABLE;
-	    ppcontrol = tmp2;
+	tmp2 = inb(VIA_CONFIG_DATA);
+	if (siofunc == VIA_FUNCTION_PROBE) {
+		siofunc = tmp & VIA_FUNCTION_PARPORT_DISABLE;
+		ppcontrol = tmp2;
+	} else {
+		tmp &= ~VIA_FUNCTION_PARPORT_DISABLE;
+		tmp |= siofunc;
+		outb(via->viacfg_function, VIA_CONFIG_INDEX);
+		outb(tmp, VIA_CONFIG_DATA);
+		tmp2 &= ~(VIA_PARPORT_BIDIR|VIA_PARPORT_ECPEPP);
+		tmp2 |= ppcontrol;
+		outb(via->viacfg_parport_control, VIA_CONFIG_INDEX);
+		outb(tmp2, VIA_CONFIG_DATA);
 	}
-	else
-	{
-	    tmp &= ~VIA_FUNCTION_PARPORT_DISABLE;
-	    tmp |= siofunc;
-	    outb(via->viacfg_function, VIA_CONFIG_INDEX);
-	    outb(tmp, VIA_CONFIG_DATA);
-	    tmp2 &= ~(VIA_PARPORT_BIDIR|VIA_PARPORT_ECPEPP);
-	    tmp2 |= ppcontrol;
-	    outb(via->viacfg_parport_control, VIA_CONFIG_INDEX);
-	    outb(tmp2, VIA_CONFIG_DATA);
-	}
-	
+
 	/* Parallel Port I/O Base Address, bits 9-2 */
 	outb(via->viacfg_parport_base, VIA_CONFIG_INDEX);
 	port1 = inb(VIA_CONFIG_DATA) << 2;
-	
-	printk (KERN_DEBUG "parport_pc: Current parallel port base: 0x%X\n",port1);
-	if ((port1 == 0x3BC) && have_epp)
-	{
-	    outb(via->viacfg_parport_base, VIA_CONFIG_INDEX);
-	    outb((0x378 >> 2), VIA_CONFIG_DATA);
-	    printk(KERN_DEBUG "parport_pc: Parallel port base changed to 0x378\n");
-	    port1 = 0x378;
+
+	printk(KERN_DEBUG "parport_pc: Current parallel port base: 0x%X\n",
+									port1);
+	if (port1 == 0x3BC && have_epp) {
+		outb(via->viacfg_parport_base, VIA_CONFIG_INDEX);
+		outb((0x378 >> 2), VIA_CONFIG_DATA);
+		printk(KERN_DEBUG
+			"parport_pc: Parallel port base changed to 0x378\n");
+		port1 = 0x378;
 	}
 
 	/*
@@ -2667,36 +2749,39 @@
 		printk(KERN_INFO "parport_pc: VIA parallel port disabled in BIOS\n");
 		return 0;
 	}
-	
+
 	/* Bits 7-4: PnP Routing for Parallel Port IRQ */
 	pci_read_config_byte(pdev, via->via_pci_parport_irq_reg, &tmp);
 	irq = ((tmp & VIA_IRQCONTROL_PARALLEL) >> 4);
 
-	if (siofunc == VIA_FUNCTION_PARPORT_ECP)
-	{
-	    /* Bits 3-2: PnP Routing for Parallel Port DMA */
-	    pci_read_config_byte(pdev, via->via_pci_parport_dma_reg, &tmp);
-	    dma = ((tmp & VIA_DMACONTROL_PARALLEL) >> 2);
-	}
-	else
-	    /* if ECP not enabled, DMA is not enabled, assumed bogus 'dma' value */
-	    dma = PARPORT_DMA_NONE;
+	if (siofunc == VIA_FUNCTION_PARPORT_ECP) {
+		/* Bits 3-2: PnP Routing for Parallel Port DMA */
+		pci_read_config_byte(pdev, via->via_pci_parport_dma_reg, &tmp);
+		dma = ((tmp & VIA_DMACONTROL_PARALLEL) >> 2);
+	} else
+		/* if ECP not enabled, DMA is not enabled, assumed
+		   bogus 'dma' value */
+		dma = PARPORT_DMA_NONE;
 
 	/* Let the user (or defaults) steer us away from interrupts and DMA */
 	if (autoirq == PARPORT_IRQ_NONE) {
-	    irq = PARPORT_IRQ_NONE;
-	    dma = PARPORT_DMA_NONE;
+		irq = PARPORT_IRQ_NONE;
+		dma = PARPORT_DMA_NONE;
 	}
 	if (autodma == PARPORT_DMA_NONE)
-	    dma = PARPORT_DMA_NONE;
+		dma = PARPORT_DMA_NONE;
 
 	switch (port1) {
-	case 0x3bc: port2 = 0x7bc; break;
-	case 0x378: port2 = 0x778; break;
-	case 0x278: port2 = 0x678; break;
+	case 0x3bc:
+		port2 = 0x7bc; break;
+	case 0x378:
+		port2 = 0x778; break;
+	case 0x278:
+		port2 = 0x678; break;
 	default:
-		printk(KERN_INFO "parport_pc: Weird VIA parport base 0x%X, ignoring\n",
-			port1);
+		printk(KERN_INFO
+			"parport_pc: Weird VIA parport base 0x%X, ignoring\n",
+									port1);
 		return 0;
 	}
 
@@ -2714,17 +2799,17 @@
 	}
 
 	/* finally, do the probe with values obtained */
-	if (parport_pc_probe_port (port1, port2, irq, dma, &pdev->dev, 0)) {
-		printk (KERN_INFO
+	if (parport_pc_probe_port(port1, port2, irq, dma, &pdev->dev, 0)) {
+		printk(KERN_INFO
 			"parport_pc: VIA parallel port: io=0x%X", port1);
 		if (irq != PARPORT_IRQ_NONE)
-			printk (", irq=%d", irq);
+			printk(", irq=%d", irq);
 		if (dma != PARPORT_DMA_NONE)
-			printk (", dma=%d", dma);
-		printk ("\n");
+			printk(", dma=%d", dma);
+		printk("\n");
 		return 1;
 	}
-	
+
 	printk(KERN_WARNING "parport_pc: Strange, can't probe VIA parallel port: io=0x%X, irq=%d, dma=%d\n",
 		port1, irq, dma);
 	return 0;
@@ -2732,8 +2817,8 @@
 
 
 enum parport_pc_sio_types {
-	sio_via_686a = 0,	/* Via VT82C686A motherboard Super I/O */
-	sio_via_8231,		/* Via VT8231 south bridge integrated Super IO */
+	sio_via_686a = 0,   /* Via VT82C686A motherboard Super I/O */
+	sio_via_8231,	    /* Via VT8231 south bridge integrated Super IO */
 	sio_ite_8872,
 	last_sio
 };
@@ -2804,15 +2889,15 @@
 };
 
 
-/* each element directly indexed from enum list, above 
+/* each element directly indexed from enum list, above
  * (but offset by last_sio) */
 static struct parport_pc_pci {
 	int numports;
 	struct { /* BAR (base address registers) numbers in the config
-                    space header */
+		    space header */
 		int lo;
-		int hi; /* -1 if not there, >6 for offset-method (max
-                           BAR is 6) */
+		int hi;
+		/* -1 if not there, >6 for offset-method (max BAR is 6) */
 	} addr[4];
 
 	/* If set, this is called immediately after pci_enable_device.
@@ -2857,7 +2942,7 @@
 	/* timedia_4018  */             { 2, { { 0, 1 }, { 2, 3 }, } },
 	/* timedia_9018a */             { 2, { { 0, 1 }, { 2, 3 }, } },
 					/* SYBA uses fixed offsets in
-                                           a 1K io window */
+					   a 1K io window */
 	/* syba_2p_epp AP138B */	{ 2, { { 0, 0x078 }, { 0, 0x178 }, } },
 	/* syba_1p_ecp W83787 */	{ 1, { { 0, 0x078 }, } },
 	/* titan_010l */		{ 1, { { 3, -1 }, } },
@@ -2873,11 +2958,14 @@
 	/* oxsemi_pcie_pport */		{ 1, { { 0, 1 }, } },
 	/* aks_0100 */                  { 1, { { 0, -1 }, } },
 	/* mobility_pp */		{ 1, { { 0, 1 }, } },
-	/* netmos_9705 */               { 1, { { 0, -1 }, } }, /* untested */
-        /* netmos_9715 */               { 2, { { 0, 1 }, { 2, 3 },} }, /* untested */
-        /* netmos_9755 */               { 2, { { 0, 1 }, { 2, 3 },} }, /* untested */
-	/* netmos_9805 */               { 1, { { 0, -1 }, } }, /* untested */
-	/* netmos_9815 */               { 2, { { 0, -1 }, { 2, -1 }, } }, /* untested */
+
+	/* The netmos entries below are untested */
+	/* netmos_9705 */               { 1, { { 0, -1 }, } },
+	/* netmos_9715 */               { 2, { { 0, 1 }, { 2, 3 },} },
+	/* netmos_9755 */               { 2, { { 0, 1 }, { 2, 3 },} },
+	/* netmos_9805 */               { 1, { { 0, -1 }, } },
+	/* netmos_9815 */               { 2, { { 0, -1 }, { 2, -1 }, } },
+
 	/* quatech_sppxp100 */		{ 1, { { 0, 1 }, } },
 };
 
@@ -2906,7 +2994,7 @@
 	{ PCI_VENDOR_ID_LAVA, PCI_DEVICE_ID_LAVA_BOCA_IOPPAR,
 	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, boca_ioppar },
 	{ PCI_VENDOR_ID_PLX, PCI_DEVICE_ID_PLX_9050,
-	  PCI_SUBVENDOR_ID_EXSYS, PCI_SUBDEVICE_ID_EXSYS_4014, 0,0, plx_9050 },
+	  PCI_SUBVENDOR_ID_EXSYS, PCI_SUBDEVICE_ID_EXSYS_4014, 0, 0, plx_9050 },
 	/* PCI_VENDOR_ID_TIMEDIA/SUNIX has many differing cards ...*/
 	{ 0x1409, 0x7168, 0x1409, 0x4078, 0, 0, timedia_4078a },
 	{ 0x1409, 0x7168, 0x1409, 0x4079, 0, 0, timedia_4079h },
@@ -2940,7 +3028,8 @@
 	{ 0x9710, 0x9805, 0x1000, 0x0010, 0, 0, titan_1284p1 },
 	{ 0x9710, 0x9815, 0x1000, 0x0020, 0, 0, titan_1284p2 },
 	/* PCI_VENDOR_ID_AVLAB/Intek21 has another bunch of cards ...*/
-	{ 0x14db, 0x2120, PCI_ANY_ID, PCI_ANY_ID, 0, 0, avlab_1p}, /* AFAVLAB_TK9902 */
+	/* AFAVLAB_TK9902 */
+	{ 0x14db, 0x2120, PCI_ANY_ID, PCI_ANY_ID, 0, 0, avlab_1p},
 	{ 0x14db, 0x2121, PCI_ANY_ID, PCI_ANY_ID, 0, 0, avlab_2p},
 	{ PCI_VENDOR_ID_OXSEMI, PCI_DEVICE_ID_OXSEMI_16PCI952PP,
 	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, oxsemi_952 },
@@ -2983,14 +3072,14 @@
 	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, quatech_sppxp100 },
 	{ 0, } /* terminate list */
 };
-MODULE_DEVICE_TABLE(pci,parport_pc_pci_tbl);
+MODULE_DEVICE_TABLE(pci, parport_pc_pci_tbl);
 
 struct pci_parport_data {
 	int num;
 	struct parport *ports[2];
 };
 
-static int parport_pc_pci_probe (struct pci_dev *dev,
+static int parport_pc_pci_probe(struct pci_dev *dev,
 					   const struct pci_device_id *id)
 {
 	int err, count, n, i = id->driver_data;
@@ -3003,7 +3092,8 @@
 	/* This is a PCI card */
 	i -= last_sio;
 	count = 0;
-	if ((err = pci_enable_device (dev)) != 0)
+	err = pci_enable_device(dev);
+	if (err)
 		return err;
 
 	data = kmalloc(sizeof(struct pci_parport_data), GFP_KERNEL);
@@ -3011,7 +3101,7 @@
 		return -ENOMEM;
 
 	if (cards[i].preinit_hook &&
-	    cards[i].preinit_hook (dev, PARPORT_IRQ_NONE, PARPORT_DMA_NONE)) {
+	    cards[i].preinit_hook(dev, PARPORT_IRQ_NONE, PARPORT_DMA_NONE)) {
 		kfree(data);
 		return -ENODEV;
 	}
@@ -3021,25 +3111,25 @@
 		int hi = cards[i].addr[n].hi;
 		int irq;
 		unsigned long io_lo, io_hi;
-		io_lo = pci_resource_start (dev, lo);
+		io_lo = pci_resource_start(dev, lo);
 		io_hi = 0;
 		if ((hi >= 0) && (hi <= 6))
-			io_hi = pci_resource_start (dev, hi);
+			io_hi = pci_resource_start(dev, hi);
 		else if (hi > 6)
 			io_lo += hi; /* Reinterpret the meaning of
-                                        "hi" as an offset (see SYBA
-                                        def.) */
+					"hi" as an offset (see SYBA
+					def.) */
 		/* TODO: test if sharing interrupts works */
 		irq = dev->irq;
 		if (irq == IRQ_NONE) {
-			printk (KERN_DEBUG
+			printk(KERN_DEBUG
 	"PCI parallel port detected: %04x:%04x, I/O at %#lx(%#lx)\n",
 				parport_pc_pci_tbl[i + last_sio].vendor,
 				parport_pc_pci_tbl[i + last_sio].device,
 				io_lo, io_hi);
 			irq = PARPORT_IRQ_NONE;
 		} else {
-			printk (KERN_DEBUG
+			printk(KERN_DEBUG
 	"PCI parallel port detected: %04x:%04x, I/O at %#lx(%#lx), IRQ %d\n",
 				parport_pc_pci_tbl[i + last_sio].vendor,
 				parport_pc_pci_tbl[i + last_sio].device,
@@ -3056,7 +3146,7 @@
 	data->num = count;
 
 	if (cards[i].postinit_hook)
-		cards[i].postinit_hook (dev, count == 0);
+		cards[i].postinit_hook(dev, count == 0);
 
 	if (count) {
 		pci_set_drvdata(dev, data);
@@ -3090,7 +3180,7 @@
 	.remove		= __devexit_p(parport_pc_pci_remove),
 };
 
-static int __init parport_pc_init_superio (int autoirq, int autodma)
+static int __init parport_pc_init_superio(int autoirq, int autodma)
 {
 	const struct pci_device_id *id;
 	struct pci_dev *pdev = NULL;
@@ -3101,8 +3191,9 @@
 		if (id == NULL || id->driver_data >= last_sio)
 			continue;
 
-		if (parport_pc_superio_info[id->driver_data].probe
-			(pdev, autoirq, autodma,parport_pc_superio_info[id->driver_data].via)) {
+		if (parport_pc_superio_info[id->driver_data].probe(
+			pdev, autoirq, autodma,
+			parport_pc_superio_info[id->driver_data].via)) {
 			ret++;
 		}
 	}
@@ -3111,7 +3202,10 @@
 }
 #else
 static struct pci_driver parport_pc_pci_driver;
-static int __init parport_pc_init_superio(int autoirq, int autodma) {return 0;}
+static int __init parport_pc_init_superio(int autoirq, int autodma)
+{
+	return 0;
+}
 #endif /* CONFIG_PCI */
 
 #ifdef CONFIG_PNP
@@ -3124,44 +3218,45 @@
 	{ }
 };
 
-MODULE_DEVICE_TABLE(pnp,parport_pc_pnp_tbl);
+MODULE_DEVICE_TABLE(pnp, parport_pc_pnp_tbl);
 
-static int parport_pc_pnp_probe(struct pnp_dev *dev, const struct pnp_device_id *id)
+static int parport_pc_pnp_probe(struct pnp_dev *dev,
+						const struct pnp_device_id *id)
 {
 	struct parport *pdata;
 	unsigned long io_lo, io_hi;
 	int dma, irq;
 
-	if (pnp_port_valid(dev,0) &&
-		!(pnp_port_flags(dev,0) & IORESOURCE_DISABLED)) {
-		io_lo = pnp_port_start(dev,0);
+	if (pnp_port_valid(dev, 0) &&
+		!(pnp_port_flags(dev, 0) & IORESOURCE_DISABLED)) {
+		io_lo = pnp_port_start(dev, 0);
 	} else
 		return -EINVAL;
 
-	if (pnp_port_valid(dev,1) &&
-		!(pnp_port_flags(dev,1) & IORESOURCE_DISABLED)) {
-		io_hi = pnp_port_start(dev,1);
+	if (pnp_port_valid(dev, 1) &&
+		!(pnp_port_flags(dev, 1) & IORESOURCE_DISABLED)) {
+		io_hi = pnp_port_start(dev, 1);
 	} else
 		io_hi = 0;
 
-	if (pnp_irq_valid(dev,0) &&
-		!(pnp_irq_flags(dev,0) & IORESOURCE_DISABLED)) {
-		irq = pnp_irq(dev,0);
+	if (pnp_irq_valid(dev, 0) &&
+		!(pnp_irq_flags(dev, 0) & IORESOURCE_DISABLED)) {
+		irq = pnp_irq(dev, 0);
 	} else
 		irq = PARPORT_IRQ_NONE;
 
-	if (pnp_dma_valid(dev,0) &&
-		!(pnp_dma_flags(dev,0) & IORESOURCE_DISABLED)) {
-		dma = pnp_dma(dev,0);
+	if (pnp_dma_valid(dev, 0) &&
+		!(pnp_dma_flags(dev, 0) & IORESOURCE_DISABLED)) {
+		dma = pnp_dma(dev, 0);
 	} else
 		dma = PARPORT_DMA_NONE;
 
 	dev_info(&dev->dev, "reported by %s\n", dev->protocol->name);
-	if (!(pdata = parport_pc_probe_port(io_lo, io_hi,
-					irq, dma, &dev->dev, 0)))
+	pdata = parport_pc_probe_port(io_lo, io_hi, irq, dma, &dev->dev, 0);
+	if (pdata == NULL)
 		return -ENODEV;
 
-	pnp_set_drvdata(dev,pdata);
+	pnp_set_drvdata(dev, pdata);
 	return 0;
 }
 
@@ -3203,7 +3298,7 @@
 
 /* This is called by parport_pc_find_nonpci_ports (in asm/parport.h) */
 static int __devinit __attribute__((unused))
-parport_pc_find_isa_ports (int autoirq, int autodma)
+parport_pc_find_isa_ports(int autoirq, int autodma)
 {
 	int count = 0;
 
@@ -3227,7 +3322,7 @@
  * autoirq is PARPORT_IRQ_NONE, PARPORT_IRQ_AUTO, or PARPORT_IRQ_PROBEONLY
  * autodma is PARPORT_DMA_NONE or PARPORT_DMA_AUTO
  */
-static void __init parport_pc_find_ports (int autoirq, int autodma)
+static void __init parport_pc_find_ports(int autoirq, int autodma)
 {
 	int count = 0, err;
 
@@ -3261,11 +3356,18 @@
  *	syntax and keep in mind that code below is a cleaned up version.
  */
 
-static int __initdata io[PARPORT_PC_MAX_PORTS+1] = { [0 ... PARPORT_PC_MAX_PORTS] = 0 };
-static int __initdata io_hi[PARPORT_PC_MAX_PORTS+1] =
-	{ [0 ... PARPORT_PC_MAX_PORTS] = PARPORT_IOHI_AUTO };
-static int __initdata dmaval[PARPORT_PC_MAX_PORTS] = { [0 ... PARPORT_PC_MAX_PORTS-1] = PARPORT_DMA_NONE };
-static int __initdata irqval[PARPORT_PC_MAX_PORTS] = { [0 ... PARPORT_PC_MAX_PORTS-1] = PARPORT_IRQ_PROBEONLY };
+static int __initdata io[PARPORT_PC_MAX_PORTS+1] = {
+	[0 ... PARPORT_PC_MAX_PORTS] = 0
+};
+static int __initdata io_hi[PARPORT_PC_MAX_PORTS+1] = {
+	[0 ... PARPORT_PC_MAX_PORTS] = PARPORT_IOHI_AUTO
+};
+static int __initdata dmaval[PARPORT_PC_MAX_PORTS] = {
+	[0 ... PARPORT_PC_MAX_PORTS-1] = PARPORT_DMA_NONE
+};
+static int __initdata irqval[PARPORT_PC_MAX_PORTS] = {
+	[0 ... PARPORT_PC_MAX_PORTS-1] = PARPORT_IRQ_PROBEONLY
+};
 
 static int __init parport_parse_param(const char *s, int *val,
 				int automatic, int none, int nofifo)
@@ -3306,18 +3408,19 @@
 #ifdef CONFIG_PCI
 static int __init parport_init_mode_setup(char *str)
 {
-	printk(KERN_DEBUG "parport_pc.c: Specified parameter parport_init_mode=%s\n", str);
+	printk(KERN_DEBUG
+	     "parport_pc.c: Specified parameter parport_init_mode=%s\n", str);
 
-	if (!strcmp (str, "spp"))
-		parport_init_mode=1;
-	if (!strcmp (str, "ps2"))
-		parport_init_mode=2;
-	if (!strcmp (str, "epp"))
-		parport_init_mode=3;
-	if (!strcmp (str, "ecp"))
-		parport_init_mode=4;
-	if (!strcmp (str, "ecpepp"))
-		parport_init_mode=5;
+	if (!strcmp(str, "spp"))
+		parport_init_mode = 1;
+	if (!strcmp(str, "ps2"))
+		parport_init_mode = 2;
+	if (!strcmp(str, "epp"))
+		parport_init_mode = 3;
+	if (!strcmp(str, "ecp"))
+		parport_init_mode = 4;
+	if (!strcmp(str, "ecpepp"))
+		parport_init_mode = 5;
 	return 1;
 }
 #endif
@@ -3341,7 +3444,8 @@
 #endif
 #ifdef CONFIG_PCI
 static char *init_mode;
-MODULE_PARM_DESC(init_mode, "Initialise mode for VIA VT8231 port (spp, ps2, epp, ecp or ecpepp)");
+MODULE_PARM_DESC(init_mode,
+	"Initialise mode for VIA VT8231 port (spp, ps2, epp, ecp or ecpepp)");
 module_param(init_mode, charp, 0);
 #endif
 
@@ -3372,7 +3476,7 @@
 				irqval[0] = val;
 				break;
 			default:
-				printk (KERN_WARNING
+				printk(KERN_WARNING
 					"parport_pc: irq specified "
 					"without base address.  Use 'io=' "
 					"to specify one\n");
@@ -3385,7 +3489,7 @@
 				dmaval[0] = val;
 				break;
 			default:
-				printk (KERN_WARNING
+				printk(KERN_WARNING
 					"parport_pc: dma specified "
 					"without base address.  Use 'io=' "
 					"to specify one\n");
@@ -3396,7 +3500,7 @@
 
 #else
 
-static int parport_setup_ptr __initdata = 0;
+static int parport_setup_ptr __initdata;
 
 /*
  * Acceptable parameters:
@@ -3407,7 +3511,7 @@
  *
  * IRQ/DMA may be numeric or 'auto' or 'none'
  */
-static int __init parport_setup (char *str)
+static int __init parport_setup(char *str)
 {
 	char *endptr;
 	char *sep;
@@ -3419,15 +3523,15 @@
 		return 1;
 	}
 
-	if (!strncmp (str, "auto", 4)) {
+	if (!strncmp(str, "auto", 4)) {
 		irqval[0] = PARPORT_IRQ_AUTO;
 		dmaval[0] = PARPORT_DMA_AUTO;
 		return 1;
 	}
 
-	val = simple_strtoul (str, &endptr, 0);
+	val = simple_strtoul(str, &endptr, 0);
 	if (endptr == str) {
-		printk (KERN_WARNING "parport=%s not understood\n", str);
+		printk(KERN_WARNING "parport=%s not understood\n", str);
 		return 1;
 	}
 
@@ -3461,7 +3565,7 @@
 	return io[0] == PARPORT_DISABLE;
 }
 
-__setup ("parport=", parport_setup);
+__setup("parport=", parport_setup);
 
 /*
  * Acceptable parameters:
@@ -3469,7 +3573,7 @@
  * parport_init_mode=[spp|ps2|epp|ecp|ecpepp]
  */
 #ifdef CONFIG_PCI
-__setup("parport_init_mode=",parport_init_mode_setup);
+__setup("parport_init_mode=", parport_init_mode_setup);
 #endif
 #endif
 
@@ -3493,13 +3597,13 @@
 		for (i = 0; i < PARPORT_PC_MAX_PORTS; i++) {
 			if (!io[i])
 				break;
-			if ((io_hi[i]) == PARPORT_IOHI_AUTO)
-			       io_hi[i] = 0x400 + io[i];
+			if (io_hi[i] == PARPORT_IOHI_AUTO)
+				io_hi[i] = 0x400 + io[i];
 			parport_pc_probe_port(io[i], io_hi[i],
-					  irqval[i], dmaval[i], NULL, 0);
+					irqval[i], dmaval[i], NULL, 0);
 		}
 	} else
-		parport_pc_find_ports (irqval[0], dmaval[0]);
+		parport_pc_find_ports(irqval[0], dmaval[0]);
 
 	return 0;
 }
@@ -3507,9 +3611,9 @@
 static void __exit parport_pc_exit(void)
 {
 	if (pci_registered_parport)
-		pci_unregister_driver (&parport_pc_pci_driver);
+		pci_unregister_driver(&parport_pc_pci_driver);
 	if (pnp_registered_parport)
-		pnp_unregister_driver (&parport_pc_pnp_driver);
+		pnp_unregister_driver(&parport_pc_pnp_driver);
 	platform_driver_unregister(&parport_pc_platform_driver);
 
 	while (!list_empty(&ports_list)) {
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index e1716f14..91e316f 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -1065,6 +1065,7 @@
 		return blk_trace_setup(sdp->device->request_queue,
 				       sdp->disk->disk_name,
 				       MKDEV(SCSI_GENERIC_MAJOR, sdp->index),
+				       NULL,
 				       (char *)arg);
 	case BLKTRACESTART:
 		return blk_trace_startstop(sdp->device->request_queue, 1);
diff --git a/drivers/serial/8250.c b/drivers/serial/8250.c
index a0127e9..fb867a9 100644
--- a/drivers/serial/8250.c
+++ b/drivers/serial/8250.c
@@ -287,6 +287,13 @@
 		.fcr		= UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10,
 		.flags		= UART_CAP_FIFO,
 	},
+	[PORT_AR7] = {
+		.name		= "AR7",
+		.fifo_size	= 16,
+		.tx_loadsz	= 16,
+		.fcr		= UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_00,
+		.flags		= UART_CAP_FIFO | UART_CAP_AFE,
+	},
 };
 
 #if defined (CONFIG_SERIAL_8250_AU1X00)
diff --git a/drivers/serial/8250_pci.c b/drivers/serial/8250_pci.c
index 938bc1b..e371a9c 100644
--- a/drivers/serial/8250_pci.c
+++ b/drivers/serial/8250_pci.c
@@ -2776,6 +2776,9 @@
 	{	PCI_VENDOR_ID_OXSEMI, 0x950a,
 		PCI_ANY_ID, PCI_ANY_ID, 0, 0,
 		pbn_b0_2_1130000 },
+	{	PCI_VENDOR_ID_OXSEMI, PCI_DEVICE_ID_OXSEMI_C950,
+		PCI_VENDOR_ID_OXSEMI, PCI_SUBDEVICE_ID_OXSEMI_C950, 0, 0,
+		pbn_b0_1_921600 },
 	{	PCI_VENDOR_ID_OXSEMI, PCI_DEVICE_ID_OXSEMI_16PCI954,
 		PCI_ANY_ID, PCI_ANY_ID, 0, 0,
 		pbn_b0_4_115200 },
diff --git a/drivers/serial/Kconfig b/drivers/serial/Kconfig
index 343e3a3..641e800 100644
--- a/drivers/serial/Kconfig
+++ b/drivers/serial/Kconfig
@@ -833,6 +833,7 @@
 	bool "IMX serial port support"
 	depends on ARM && (ARCH_IMX || ARCH_MXC)
 	select SERIAL_CORE
+	select RATIONAL
 	help
 	  If you have a machine based on a Motorola IMX CPU you
 	  can enable its onboard serial port by enabling this option.
@@ -1433,4 +1434,11 @@
 	default 19200 if (SERIAL_SPORT_BAUD_RATE_19200)
 	default 9600 if (SERIAL_SPORT_BAUD_RATE_9600)
 
+config SERIAL_TIMBERDALE
+	tristate "Support for timberdale UART"
+	depends on MFD_TIMBERDALE
+	select SERIAL_CORE
+	---help---
+	Add support for UART controller on timberdale.
+
 endmenu
diff --git a/drivers/serial/Makefile b/drivers/serial/Makefile
index d438eb2..45a8658 100644
--- a/drivers/serial/Makefile
+++ b/drivers/serial/Makefile
@@ -77,3 +77,4 @@
 obj-$(CONFIG_SERIAL_KS8695) += serial_ks8695.o
 obj-$(CONFIG_KGDB_SERIAL_CONSOLE) += kgdboc.o
 obj-$(CONFIG_SERIAL_QE) += ucc_uart.o
+obj-$(CONFIG_SERIAL_TIMBERDALE)	+= timbuart.o
diff --git a/drivers/serial/bfin_5xx.c b/drivers/serial/bfin_5xx.c
index d86123e..e2f6b1bf 100644
--- a/drivers/serial/bfin_5xx.c
+++ b/drivers/serial/bfin_5xx.c
@@ -330,6 +330,11 @@
 		/* Clear TFI bit */
 		UART_PUT_LSR(uart, TFI);
 #endif
+		/* Anomaly notes:
+		 *  05000215 -	we always clear ETBEI within last UART TX
+		 *		interrupt to end a string. It is always set
+		 *		when start a new tx.
+		 */
 		UART_CLEAR_IER(uart, ETBEI);
 		return;
 	}
@@ -415,6 +420,7 @@
 	set_dma_start_addr(uart->tx_dma_channel, (unsigned long)(xmit->buf+xmit->tail));
 	set_dma_x_count(uart->tx_dma_channel, uart->tx_count);
 	set_dma_x_modify(uart->tx_dma_channel, 1);
+	SSYNC();
 	enable_dma(uart->tx_dma_channel);
 
 	UART_SET_IER(uart, ETBEI);
@@ -473,27 +479,41 @@
 void bfin_serial_rx_dma_timeout(struct bfin_serial_port *uart)
 {
 	int x_pos, pos;
-	unsigned long flags;
 
-	spin_lock_irqsave(&uart->port.lock, flags);
+	dma_disable_irq(uart->rx_dma_channel);
+	spin_lock_bh(&uart->port.lock);
 
+	/* 2D DMA RX buffer ring is used. Because curr_y_count and
+	 * curr_x_count can't be read as an atomic operation,
+	 * curr_y_count should be read before curr_x_count. When
+	 * curr_x_count is read, curr_y_count may already indicate
+	 * next buffer line. But, the position calculated here is
+	 * still indicate the old line. The wrong position data may
+	 * be smaller than current buffer tail, which cause garbages
+	 * are received if it is not prohibit.
+	 */
 	uart->rx_dma_nrows = get_dma_curr_ycount(uart->rx_dma_channel);
 	x_pos = get_dma_curr_xcount(uart->rx_dma_channel);
 	uart->rx_dma_nrows = DMA_RX_YCOUNT - uart->rx_dma_nrows;
-	if (uart->rx_dma_nrows == DMA_RX_YCOUNT)
+	if (uart->rx_dma_nrows == DMA_RX_YCOUNT || x_pos == 0)
 		uart->rx_dma_nrows = 0;
 	x_pos = DMA_RX_XCOUNT - x_pos;
 	if (x_pos == DMA_RX_XCOUNT)
 		x_pos = 0;
 
 	pos = uart->rx_dma_nrows * DMA_RX_XCOUNT + x_pos;
-	if (pos != uart->rx_dma_buf.tail) {
+	/* Ignore receiving data if new position is in the same line of
+	 * current buffer tail and small.
+	 */
+	if (pos > uart->rx_dma_buf.tail ||
+		uart->rx_dma_nrows < (uart->rx_dma_buf.tail/DMA_RX_XCOUNT)) {
 		uart->rx_dma_buf.head = pos;
 		bfin_serial_dma_rx_chars(uart);
 		uart->rx_dma_buf.tail = uart->rx_dma_buf.head;
 	}
 
-	spin_unlock_irqrestore(&uart->port.lock, flags);
+	spin_unlock_bh(&uart->port.lock);
+	dma_enable_irq(uart->rx_dma_channel);
 
 	mod_timer(&(uart->rx_dma_timer), jiffies + DMA_RX_FLUSH_JIFFIES);
 }
@@ -514,6 +534,11 @@
 	if (!(get_dma_curr_irqstat(uart->tx_dma_channel)&DMA_RUN)) {
 		disable_dma(uart->tx_dma_channel);
 		clear_dma_irqstat(uart->tx_dma_channel);
+		/* Anomaly notes:
+		 *  05000215 -	we always clear ETBEI within last UART TX
+		 *		interrupt to end a string. It is always set
+		 *		when start a new tx.
+		 */
 		UART_CLEAR_IER(uart, ETBEI);
 		xmit->tail = (xmit->tail + uart->tx_count) & (UART_XMIT_SIZE - 1);
 		uart->port.icount.tx += uart->tx_count;
@@ -532,11 +557,26 @@
 {
 	struct bfin_serial_port *uart = dev_id;
 	unsigned short irqstat;
+	int x_pos, pos;
 
 	spin_lock(&uart->port.lock);
 	irqstat = get_dma_curr_irqstat(uart->rx_dma_channel);
 	clear_dma_irqstat(uart->rx_dma_channel);
-	bfin_serial_dma_rx_chars(uart);
+
+	uart->rx_dma_nrows = get_dma_curr_ycount(uart->rx_dma_channel);
+	x_pos = get_dma_curr_xcount(uart->rx_dma_channel);
+	uart->rx_dma_nrows = DMA_RX_YCOUNT - uart->rx_dma_nrows;
+	if (uart->rx_dma_nrows == DMA_RX_YCOUNT || x_pos == 0)
+		uart->rx_dma_nrows = 0;
+
+	pos = uart->rx_dma_nrows * DMA_RX_XCOUNT;
+	if (pos > uart->rx_dma_buf.tail ||
+		uart->rx_dma_nrows < (uart->rx_dma_buf.tail/DMA_RX_XCOUNT)) {
+		uart->rx_dma_buf.head = pos;
+		bfin_serial_dma_rx_chars(uart);
+		uart->rx_dma_buf.tail = uart->rx_dma_buf.head;
+	}
+
 	spin_unlock(&uart->port.lock);
 
 	return IRQ_HANDLED;
@@ -789,8 +829,16 @@
 			__func__);
 	}
 
-	if (termios->c_cflag & CSTOPB)
-		lcr |= STB;
+	/* Anomaly notes:
+	 *  05000231 -  STOP bit is always set to 1 whatever the user is set.
+	 */
+	if (termios->c_cflag & CSTOPB) {
+		if (ANOMALY_05000231)
+			printk(KERN_WARNING "STOP bits other than 1 is not "
+				"supported in case of anomaly 05000231.\n");
+		else
+			lcr |= STB;
+	}
 	if (termios->c_cflag & PARENB)
 		lcr |= PEN;
 	if (!(termios->c_cflag & PARODD))
@@ -940,6 +988,10 @@
 }
 
 #ifdef CONFIG_CONSOLE_POLL
+/* Anomaly notes:
+ *  05000099 -  Because we only use THRE in poll_put and DR in poll_get,
+ *		losing other bits of UART_LSR is not a problem here.
+ */
 static void bfin_serial_poll_put_char(struct uart_port *port, unsigned char chr)
 {
 	struct bfin_serial_port *uart = (struct bfin_serial_port *)port;
@@ -1245,12 +1297,17 @@
 	}
 }
 
+/*
+ * This should have a .setup or .early_setup in it, but then things get called
+ * without the command line options, and the baud rate gets messed up - so
+ * don't let the common infrastructure play with things. (see calls to setup
+ * & earlysetup in ./kernel/printk.c:register_console()
+ */
 static struct __initdata console bfin_early_serial_console = {
 	.name = "early_BFuart",
 	.write = early_serial_write,
 	.device = uart_console_device,
 	.flags = CON_PRINTBUFFER,
-	.setup = bfin_serial_console_setup,
 	.index = -1,
 	.data  = &bfin_serial_reg,
 };
diff --git a/drivers/serial/bfin_sport_uart.c b/drivers/serial/bfin_sport_uart.c
index 529c0ff..34b4ae0 100644
--- a/drivers/serial/bfin_sport_uart.c
+++ b/drivers/serial/bfin_sport_uart.c
@@ -101,15 +101,16 @@
 {
 	pr_debug("%s value:%x\n", __func__, value);
 	/* Place a Start and Stop bit */
-	__asm__ volatile (
-		"R2 = b#01111111100;\n\t"
-		"R3 = b#10000000001;\n\t"
-		"%0 <<= 2;\n\t"
-		"%0 = %0 & R2;\n\t"
-		"%0 = %0 | R3;\n\t"
-		:"=r"(value)
-		:"0"(value)
-		:"R2", "R3");
+	__asm__ __volatile__ (
+		"R2 = b#01111111100;"
+		"R3 = b#10000000001;"
+		"%0 <<= 2;"
+		"%0 = %0 & R2;"
+		"%0 = %0 | R3;"
+		: "=d"(value)
+		: "d"(value)
+		: "ASTAT", "R2", "R3"
+	);
 	pr_debug("%s value:%x\n", __func__, value);
 
 	SPORT_PUT_TX(up, value);
@@ -118,27 +119,30 @@
 static inline unsigned int rx_one_byte(struct sport_uart_port *up)
 {
 	unsigned int value, extract;
+	u32 tmp_mask1, tmp_mask2, tmp_shift, tmp;
 
 	value = SPORT_GET_RX32(up);
 	pr_debug("%s value:%x\n", __func__, value);
 
 	/* Extract 8 bits data */
-	__asm__ volatile (
-		"R5 = 0;\n\t"
-		"P0 = 8;\n\t"
-		"R1 = 0x1801(Z);\n\t"
-		"R3 = 0x0300(Z);\n\t"
-		"R4 = 0;\n\t"
-		"LSETUP(loop_s, loop_e) LC0 = P0;\nloop_s:\t"
-		"R2 = extract(%1, R1.L)(Z);\n\t"
-		"R2 <<= R4;\n\t"
-		"R5 = R5 | R2;\n\t"
-		"R1 = R1 - R3;\nloop_e:\t"
-		"R4 += 1;\n\t"
-		"%0 = R5;\n\t"
-		:"=r"(extract)
-		:"r"(value)
-		:"P0", "R1", "R2","R3","R4", "R5");
+	__asm__ __volatile__ (
+		"%[extr] = 0;"
+		"%[mask1] = 0x1801(Z);"
+		"%[mask2] = 0x0300(Z);"
+		"%[shift] = 0;"
+		"LSETUP(.Lloop_s, .Lloop_e) LC0 = %[lc];"
+		".Lloop_s:"
+		"%[tmp] = extract(%[val], %[mask1].L)(Z);"
+		"%[tmp] <<= %[shift];"
+		"%[extr] = %[extr] | %[tmp];"
+		"%[mask1] = %[mask1] - %[mask2];"
+		".Lloop_e:"
+		"%[shift] += 1;"
+		: [val]"=d"(value), [extr]"=d"(extract), [shift]"=d"(tmp_shift), [tmp]"=d"(tmp),
+		  [mask1]"=d"(tmp_mask1), [mask2]"=d"(tmp_mask2)
+		: "d"(value), [lc]"a"(8)
+		: "ASTAT", "LB0", "LC0", "LT0"
+	);
 
 	pr_debug("	extract:%x\n", extract);
 	return extract;
@@ -149,7 +153,7 @@
 	int tclkdiv, tfsdiv, rclkdiv;
 
 	/* Set TCR1 and TCR2 */
-	SPORT_PUT_TCR1(up, (LTFS | ITFS | TFSR | TLSBIT | ITCLK));
+	SPORT_PUT_TCR1(up, (LATFS | ITFS | TFSR | TLSBIT | ITCLK));
 	SPORT_PUT_TCR2(up, 10);
 	pr_debug("%s TCR1:%x, TCR2:%x\n", __func__, SPORT_GET_TCR1(up), SPORT_GET_TCR2(up));
 
@@ -419,7 +423,7 @@
 }
 
 static void sport_set_termios(struct uart_port *port,
-		struct termios *termios, struct termios *old)
+		struct ktermios *termios, struct ktermios *old)
 {
 	pr_debug("%s enter, c_cflag:%08x\n", __func__, termios->c_cflag);
 	uart_update_timeout(port, CS8 ,port->uartclk);
diff --git a/drivers/serial/icom.c b/drivers/serial/icom.c
index a461b3b..9f2891c 100644
--- a/drivers/serial/icom.c
+++ b/drivers/serial/icom.c
@@ -137,7 +137,12 @@
 static spinlock_t icom_lock;
 
 #ifdef ICOM_TRACE
-static inline void trace(struct icom_port *, char *, unsigned long) {};
+static inline void trace(struct icom_port *icom_port, char *trace_pt,
+			unsigned long trace_data)
+{
+	dev_info(&icom_port->adapter->pci_dev->dev, ":%d:%s - %lx\n",
+	icom_port->port, trace_pt, trace_data);
+}
 #else
 static inline void trace(struct icom_port *icom_port, char *trace_pt, unsigned long trace_data) {};
 #endif
@@ -408,7 +413,7 @@
 	release_firmware(fw);
 
 	/* Set Hardware level */
-	if ((icom_port->adapter->version | ADAPTER_V2) == ADAPTER_V2)
+	if (icom_port->adapter->version == ADAPTER_V2)
 		writeb(V2_HARDWARE, &(icom_port->dram->misc_flags));
 
 	/* Start the processor in Adapter */
@@ -861,7 +866,7 @@
 	/* find icom_port for this interrupt */
 	icom_adapter = (struct icom_adapter *) dev_id;
 
-	if ((icom_adapter->version | ADAPTER_V2) == ADAPTER_V2) {
+	if (icom_adapter->version == ADAPTER_V2) {
 		int_reg = icom_adapter->base_addr + 0x8024;
 
 		adapter_interrupts = readl(int_reg);
@@ -1647,15 +1652,6 @@
 module_init(icom_init);
 module_exit(icom_exit);
 
-#ifdef ICOM_TRACE
-static inline void trace(struct icom_port *icom_port, char *trace_pt,
-		  unsigned long trace_data)
-{
-	dev_info(&icom_port->adapter->pci_dev->dev, ":%d:%s - %lx\n",
-		 icom_port->port, trace_pt, trace_data);
-}
-#endif
-
 MODULE_AUTHOR("Michael Anderson <mjanders@us.ibm.com>");
 MODULE_DESCRIPTION("IBM iSeries Serial IOA driver");
 MODULE_SUPPORTED_DEVICE
diff --git a/drivers/serial/imx.c b/drivers/serial/imx.c
index 5f0be40..7b5d1de 100644
--- a/drivers/serial/imx.c
+++ b/drivers/serial/imx.c
@@ -8,6 +8,9 @@
  *  Author: Sascha Hauer <sascha@saschahauer.de>
  *  Copyright (C) 2004 Pengutronix
  *
+ *  Copyright (C) 2009 emlix GmbH
+ *  Author: Fabian Godehardt (added IrDA support for iMX)
+ *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation; either version 2 of the License, or
@@ -41,6 +44,8 @@
 #include <linux/serial_core.h>
 #include <linux/serial.h>
 #include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/rational.h>
 
 #include <asm/io.h>
 #include <asm/irq.h>
@@ -148,6 +153,7 @@
 #define  UCR4_DREN  	 (1<<0)  /* Recv data ready interrupt enable */
 #define  UFCR_RXTL_SHF   0       /* Receiver trigger level shift */
 #define  UFCR_RFDIV      (7<<7)  /* Reference freq divider mask */
+#define  UFCR_RFDIV_REG(x)	(((x) < 7 ? 6 - (x) : 6) << 7)
 #define  UFCR_TXTL_SHF   10      /* Transmitter trigger level shift */
 #define  USR1_PARITYERR  (1<<15) /* Parity error interrupt flag */
 #define  USR1_RTSS  	 (1<<14) /* RTS pin status */
@@ -211,10 +217,20 @@
 	struct timer_list	timer;
 	unsigned int		old_status;
 	int			txirq,rxirq,rtsirq;
-	int			have_rtscts:1;
+	unsigned int		have_rtscts:1;
+	unsigned int		use_irda:1;
+	unsigned int		irda_inv_rx:1;
+	unsigned int		irda_inv_tx:1;
+	unsigned short		trcv_delay; /* transceiver delay */
 	struct clk		*clk;
 };
 
+#ifdef CONFIG_IRDA
+#define USE_IRDA(sport)	((sport)->use_irda)
+#else
+#define USE_IRDA(sport)	(0)
+#endif
+
 /*
  * Handle any change of modem status signal since we were last called.
  */
@@ -268,6 +284,48 @@
 	struct imx_port *sport = (struct imx_port *)port;
 	unsigned long temp;
 
+	if (USE_IRDA(sport)) {
+		/* half duplex - wait for end of transmission */
+		int n = 256;
+		while ((--n > 0) &&
+		      !(readl(sport->port.membase + USR2) & USR2_TXDC)) {
+			udelay(5);
+			barrier();
+		}
+		/*
+		 * irda transceiver - wait a bit more to avoid
+		 * cutoff, hardware dependent
+		 */
+		udelay(sport->trcv_delay);
+
+		/*
+		 * half duplex - reactivate receive mode,
+		 * flush receive pipe echo crap
+		 */
+		if (readl(sport->port.membase + USR2) & USR2_TXDC) {
+			temp = readl(sport->port.membase + UCR1);
+			temp &= ~(UCR1_TXMPTYEN | UCR1_TRDYEN);
+			writel(temp, sport->port.membase + UCR1);
+
+			temp = readl(sport->port.membase + UCR4);
+			temp &= ~(UCR4_TCEN);
+			writel(temp, sport->port.membase + UCR4);
+
+			while (readl(sport->port.membase + URXD0) &
+			       URXD_CHARRDY)
+				barrier();
+
+			temp = readl(sport->port.membase + UCR1);
+			temp |= UCR1_RRDYEN;
+			writel(temp, sport->port.membase + UCR1);
+
+			temp = readl(sport->port.membase + UCR4);
+			temp |= UCR4_DREN;
+			writel(temp, sport->port.membase + UCR4);
+		}
+		return;
+	}
+
 	temp = readl(sport->port.membase + UCR1);
 	writel(temp & ~UCR1_TXMPTYEN, sport->port.membase + UCR1);
 }
@@ -302,13 +360,15 @@
 		/* send xmit->buf[xmit->tail]
 		 * out the port here */
 		writel(xmit->buf[xmit->tail], sport->port.membase + URTX0);
-		xmit->tail = (xmit->tail + 1) &
-		         (UART_XMIT_SIZE - 1);
+		xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
 		sport->port.icount.tx++;
 		if (uart_circ_empty(xmit))
 			break;
 	}
 
+	if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
+		uart_write_wakeup(&sport->port);
+
 	if (uart_circ_empty(xmit))
 		imx_stop_tx(&sport->port);
 }
@@ -321,9 +381,30 @@
 	struct imx_port *sport = (struct imx_port *)port;
 	unsigned long temp;
 
+	if (USE_IRDA(sport)) {
+		/* half duplex in IrDA mode; have to disable receive mode */
+		temp = readl(sport->port.membase + UCR4);
+		temp &= ~(UCR4_DREN);
+		writel(temp, sport->port.membase + UCR4);
+
+		temp = readl(sport->port.membase + UCR1);
+		temp &= ~(UCR1_RRDYEN);
+		writel(temp, sport->port.membase + UCR1);
+	}
+
 	temp = readl(sport->port.membase + UCR1);
 	writel(temp | UCR1_TXMPTYEN, sport->port.membase + UCR1);
 
+	if (USE_IRDA(sport)) {
+		temp = readl(sport->port.membase + UCR1);
+		temp |= UCR1_TRDYEN;
+		writel(temp, sport->port.membase + UCR1);
+
+		temp = readl(sport->port.membase + UCR4);
+		temp |= UCR4_TCEN;
+		writel(temp, sport->port.membase + UCR4);
+	}
+
 	if (readl(sport->port.membase + UTS) & UTS_TXEMPTY)
 		imx_transmit_buffer(sport);
 }
@@ -395,8 +476,7 @@
 				continue;
 		}
 
-		if (uart_handle_sysrq_char
-		            (&sport->port, (unsigned char)rx))
+		if (uart_handle_sysrq_char(&sport->port, (unsigned char)rx))
 			continue;
 
 		if (rx & (URXD_PRERR | URXD_OVRRUN | URXD_FRMERR) ) {
@@ -471,26 +551,26 @@
  */
 static unsigned int imx_get_mctrl(struct uart_port *port)
 {
-        struct imx_port *sport = (struct imx_port *)port;
-        unsigned int tmp = TIOCM_DSR | TIOCM_CAR;
+	struct imx_port *sport = (struct imx_port *)port;
+	unsigned int tmp = TIOCM_DSR | TIOCM_CAR;
 
-        if (readl(sport->port.membase + USR1) & USR1_RTSS)
-                tmp |= TIOCM_CTS;
+	if (readl(sport->port.membase + USR1) & USR1_RTSS)
+		tmp |= TIOCM_CTS;
 
-        if (readl(sport->port.membase + UCR2) & UCR2_CTS)
-                tmp |= TIOCM_RTS;
+	if (readl(sport->port.membase + UCR2) & UCR2_CTS)
+		tmp |= TIOCM_RTS;
 
-        return tmp;
+	return tmp;
 }
 
 static void imx_set_mctrl(struct uart_port *port, unsigned int mctrl)
 {
-        struct imx_port *sport = (struct imx_port *)port;
+	struct imx_port *sport = (struct imx_port *)port;
 	unsigned long temp;
 
 	temp = readl(sport->port.membase + UCR2) & ~UCR2_CTS;
 
-        if (mctrl & TIOCM_RTS)
+	if (mctrl & TIOCM_RTS)
 		temp |= UCR2_CTS;
 
 	writel(temp, sport->port.membase + UCR2);
@@ -534,12 +614,7 @@
 	if(!ufcr_rfdiv)
 		ufcr_rfdiv = 1;
 
-	if(ufcr_rfdiv >= 7)
-		ufcr_rfdiv = 6;
-	else
-		ufcr_rfdiv = 6 - ufcr_rfdiv;
-
-	val |= UFCR_RFDIV & (ufcr_rfdiv << 7);
+	val |= UFCR_RFDIV_REG(ufcr_rfdiv);
 
 	writel(val, sport->port.membase + UFCR);
 
@@ -558,8 +633,24 @@
 	 * requesting IRQs
 	 */
 	temp = readl(sport->port.membase + UCR4);
+
+	if (USE_IRDA(sport))
+		temp |= UCR4_IRSC;
+
 	writel(temp & ~UCR4_DREN, sport->port.membase + UCR4);
 
+	if (USE_IRDA(sport)) {
+		/* reset fifo's and state machines */
+		int i = 100;
+		temp = readl(sport->port.membase + UCR2);
+		temp &= ~UCR2_SRST;
+		writel(temp, sport->port.membase + UCR2);
+		while (!(readl(sport->port.membase + UCR2) & UCR2_SRST) &&
+		    (--i > 0)) {
+			udelay(1);
+		}
+	}
+
 	/*
 	 * Allocate the IRQ(s) i.MX1 has three interrupts whereas later
 	 * chips only have one interrupt.
@@ -575,12 +666,16 @@
 		if (retval)
 			goto error_out2;
 
-		retval = request_irq(sport->rtsirq, imx_rtsint,
-			     (sport->rtsirq < MAX_INTERNAL_IRQ) ? 0 :
-			       IRQF_TRIGGER_FALLING | IRQF_TRIGGER_RISING,
-				DRIVER_NAME, sport);
-		if (retval)
-			goto error_out3;
+		/* do not use RTS IRQ on IrDA */
+		if (!USE_IRDA(sport)) {
+			retval = request_irq(sport->rtsirq, imx_rtsint,
+				     (sport->rtsirq < MAX_INTERNAL_IRQ) ? 0 :
+				       IRQF_TRIGGER_FALLING |
+				       IRQF_TRIGGER_RISING,
+					DRIVER_NAME, sport);
+			if (retval)
+				goto error_out3;
+		}
 	} else {
 		retval = request_irq(sport->port.irq, imx_int, 0,
 				DRIVER_NAME, sport);
@@ -597,18 +692,49 @@
 
 	temp = readl(sport->port.membase + UCR1);
 	temp |= UCR1_RRDYEN | UCR1_RTSDEN | UCR1_UARTEN;
+
+	if (USE_IRDA(sport)) {
+		temp |= UCR1_IREN;
+		temp &= ~(UCR1_RTSDEN);
+	}
+
 	writel(temp, sport->port.membase + UCR1);
 
 	temp = readl(sport->port.membase + UCR2);
 	temp |= (UCR2_RXEN | UCR2_TXEN);
 	writel(temp, sport->port.membase + UCR2);
 
+	if (USE_IRDA(sport)) {
+		/* clear RX-FIFO */
+		int i = 64;
+		while ((--i > 0) &&
+			(readl(sport->port.membase + URXD0) & URXD_CHARRDY)) {
+			barrier();
+		}
+	}
+
 #if defined CONFIG_ARCH_MX2 || defined CONFIG_ARCH_MX3
 	temp = readl(sport->port.membase + UCR3);
 	temp |= UCR3_RXDMUXSEL;
 	writel(temp, sport->port.membase + UCR3);
 #endif
 
+	if (USE_IRDA(sport)) {
+		temp = readl(sport->port.membase + UCR4);
+		if (sport->irda_inv_rx)
+			temp |= UCR4_INVR;
+		else
+			temp &= ~(UCR4_INVR);
+		writel(temp | UCR4_DREN, sport->port.membase + UCR4);
+
+		temp = readl(sport->port.membase + UCR3);
+		if (sport->irda_inv_tx)
+			temp |= UCR3_INVT;
+		else
+			temp &= ~(UCR3_INVT);
+		writel(temp, sport->port.membase + UCR3);
+	}
+
 	/*
 	 * Enable modem status interrupts
 	 */
@@ -616,6 +742,16 @@
 	imx_enable_ms(&sport->port);
 	spin_unlock_irqrestore(&sport->port.lock,flags);
 
+	if (USE_IRDA(sport)) {
+		struct imxuart_platform_data *pdata;
+		pdata = sport->port.dev->platform_data;
+		sport->irda_inv_rx = pdata->irda_inv_rx;
+		sport->irda_inv_tx = pdata->irda_inv_tx;
+		sport->trcv_delay = pdata->transceiver_delay;
+		if (pdata->irda_enable)
+			pdata->irda_enable(1);
+	}
+
 	return 0;
 
 error_out3:
@@ -633,6 +769,17 @@
 	struct imx_port *sport = (struct imx_port *)port;
 	unsigned long temp;
 
+	temp = readl(sport->port.membase + UCR2);
+	temp &= ~(UCR2_TXEN);
+	writel(temp, sport->port.membase + UCR2);
+
+	if (USE_IRDA(sport)) {
+		struct imxuart_platform_data *pdata;
+		pdata = sport->port.dev->platform_data;
+		if (pdata->irda_enable)
+			pdata->irda_enable(0);
+	}
+
 	/*
 	 * Stop our timer.
 	 */
@@ -642,7 +789,8 @@
 	 * Free the interrupts
 	 */
 	if (sport->txirq > 0) {
-		free_irq(sport->rtsirq, sport);
+		if (!USE_IRDA(sport))
+			free_irq(sport->rtsirq, sport);
 		free_irq(sport->txirq, sport);
 		free_irq(sport->rxirq, sport);
 	} else
@@ -654,6 +802,9 @@
 
 	temp = readl(sport->port.membase + UCR1);
 	temp &= ~(UCR1_TXMPTYEN | UCR1_RRDYEN | UCR1_RTSDEN | UCR1_UARTEN);
+	if (USE_IRDA(sport))
+		temp &= ~(UCR1_IREN);
+
 	writel(temp, sport->port.membase + UCR1);
 }
 
@@ -665,7 +816,9 @@
 	unsigned long flags;
 	unsigned int ucr2, old_ucr1, old_txrxen, baud, quot;
 	unsigned int old_csize = old ? old->c_cflag & CSIZE : CS8;
-	unsigned int div, num, denom, ufcr;
+	unsigned int div, ufcr;
+	unsigned long num, denom;
+	uint64_t tdiv64;
 
 	/*
 	 * If we don't support modem control lines, don't allow
@@ -761,38 +914,39 @@
 			sport->port.membase + UCR2);
 	old_txrxen &= (UCR2_TXEN | UCR2_RXEN);
 
-	div = sport->port.uartclk / (baud * 16);
-	if (div > 7)
-		div = 7;
-	if (!div)
+	if (USE_IRDA(sport)) {
+		/*
+		 * use maximum available submodule frequency to
+		 * avoid missing short pulses due to low sampling rate
+		 */
 		div = 1;
-
-	num = baud;
-	denom = port->uartclk / div / 16;
-
-	/* shift num and denom right until they fit into 16 bits */
-	while (num > 0x10000 || denom > 0x10000) {
-		num >>= 1;
-		denom >>= 1;
+	} else {
+		div = sport->port.uartclk / (baud * 16);
+		if (div > 7)
+			div = 7;
+		if (!div)
+			div = 1;
 	}
-	if (num > 0)
-		num -= 1;
-	if (denom > 0)
-		denom -= 1;
+
+	rational_best_approximation(16 * div * baud, sport->port.uartclk,
+		1 << 16, 1 << 16, &num, &denom);
+
+	tdiv64 = sport->port.uartclk;
+	tdiv64 *= num;
+	do_div(tdiv64, denom * 16 * div);
+	tty_encode_baud_rate(sport->port.info->port.tty,
+		(speed_t)tdiv64, (speed_t)tdiv64);
+
+	num -= 1;
+	denom -= 1;
+
+	ufcr = readl(sport->port.membase + UFCR);
+	ufcr = (ufcr & (~UFCR_RFDIV)) | UFCR_RFDIV_REG(div);
+	writel(ufcr, sport->port.membase + UFCR);
 
 	writel(num, sport->port.membase + UBIR);
 	writel(denom, sport->port.membase + UBMR);
 
-	if (div == 7)
-		div = 6; /* 6 in RFDIV means divide by 7 */
-	else
-		div = 6 - div;
-
-	ufcr = readl(sport->port.membase + UFCR);
-	ufcr = (ufcr & (~UFCR_RFDIV)) |
-	    (div << 7);
-	writel(ufcr, sport->port.membase + UFCR);
-
 #ifdef ONEMS
 	writel(sport->port.uartclk / div / 1000, sport->port.membase + ONEMS);
 #endif
@@ -1072,22 +1226,22 @@
 
 static int serial_imx_suspend(struct platform_device *dev, pm_message_t state)
 {
-        struct imx_port *sport = platform_get_drvdata(dev);
+	struct imx_port *sport = platform_get_drvdata(dev);
 
-        if (sport)
-                uart_suspend_port(&imx_reg, &sport->port);
+	if (sport)
+		uart_suspend_port(&imx_reg, &sport->port);
 
-        return 0;
+	return 0;
 }
 
 static int serial_imx_resume(struct platform_device *dev)
 {
-        struct imx_port *sport = platform_get_drvdata(dev);
+	struct imx_port *sport = platform_get_drvdata(dev);
 
-        if (sport)
-                uart_resume_port(&imx_reg, &sport->port);
+	if (sport)
+		uart_resume_port(&imx_reg, &sport->port);
 
-        return 0;
+	return 0;
 }
 
 static int serial_imx_probe(struct platform_device *pdev)
@@ -1143,19 +1297,29 @@
 	imx_ports[pdev->id] = sport;
 
 	pdata = pdev->dev.platform_data;
-	if(pdata && (pdata->flags & IMXUART_HAVE_RTSCTS))
+	if (pdata && (pdata->flags & IMXUART_HAVE_RTSCTS))
 		sport->have_rtscts = 1;
 
+#ifdef CONFIG_IRDA
+	if (pdata && (pdata->flags & IMXUART_IRDA))
+		sport->use_irda = 1;
+#endif
+
 	if (pdata->init) {
 		ret = pdata->init(pdev);
 		if (ret)
 			goto clkput;
 	}
 
-	uart_add_one_port(&imx_reg, &sport->port);
+	ret = uart_add_one_port(&imx_reg, &sport->port);
+	if (ret)
+		goto deinit;
 	platform_set_drvdata(pdev, &sport->port);
 
 	return 0;
+deinit:
+	if (pdata->exit)
+		pdata->exit(pdev);
 clkput:
 	clk_put(sport->clk);
 	clk_disable(sport->clk);
@@ -1193,13 +1357,13 @@
 }
 
 static struct platform_driver serial_imx_driver = {
-        .probe          = serial_imx_probe,
-        .remove         = serial_imx_remove,
+	.probe		= serial_imx_probe,
+	.remove		= serial_imx_remove,
 
 	.suspend	= serial_imx_suspend,
 	.resume		= serial_imx_resume,
 	.driver		= {
-	        .name	= "imx-uart",
+		.name	= "imx-uart",
 		.owner	= THIS_MODULE,
 	},
 };
diff --git a/drivers/serial/jsm/jsm.h b/drivers/serial/jsm/jsm.h
index c0a3e27..4e5f3bd 100644
--- a/drivers/serial/jsm/jsm.h
+++ b/drivers/serial/jsm/jsm.h
@@ -61,6 +61,7 @@
 	if ((DBG_##nlevel & jsm_debug))			\
 	dev_printk(KERN_##klevel, pdev->dev, fmt, ## args)
 
+#define	MAXLINES	256
 #define MAXPORTS	8
 #define MAX_STOPS_SENT	5
 
diff --git a/drivers/serial/jsm/jsm_tty.c b/drivers/serial/jsm/jsm_tty.c
index 31496dc0..107ce2e1 100644
--- a/drivers/serial/jsm/jsm_tty.c
+++ b/drivers/serial/jsm/jsm_tty.c
@@ -33,6 +33,8 @@
 
 #include "jsm.h"
 
+static DECLARE_BITMAP(linemap, MAXLINES);
+
 static void jsm_carrier(struct jsm_channel *ch);
 
 static inline int jsm_get_mstat(struct jsm_channel *ch)
@@ -433,6 +435,7 @@
 int __devinit jsm_uart_port_init(struct jsm_board *brd)
 {
 	int i;
+	unsigned int line;
 	struct jsm_channel *ch;
 
 	if (!brd)
@@ -459,9 +462,15 @@
 		brd->channels[i]->uart_port.membase = brd->re_map_membase;
 		brd->channels[i]->uart_port.fifosize = 16;
 		brd->channels[i]->uart_port.ops = &jsm_ops;
-		brd->channels[i]->uart_port.line = brd->channels[i]->ch_portnum + brd->boardnum * 2;
+		line = find_first_zero_bit(linemap, MAXLINES);
+		if (line >= MAXLINES) {
+			printk(KERN_INFO "jsm: linemap is full, added device failed\n");
+			continue;
+		} else
+			set_bit((int)line, linemap);
+		brd->channels[i]->uart_port.line = line;
 		if (uart_add_one_port (&jsm_uart_driver, &brd->channels[i]->uart_port))
-			printk(KERN_INFO "Added device failed\n");
+			printk(KERN_INFO "jsm: add device failed\n");
 		else
 			printk(KERN_INFO "Added device \n");
 	}
@@ -494,6 +503,7 @@
 
 		ch = brd->channels[i];
 
+		clear_bit((int)(ch->uart_port.line), linemap);
 		uart_remove_one_port(&jsm_uart_driver, &brd->channels[i]->uart_port);
 	}
 
diff --git a/drivers/serial/timbuart.c b/drivers/serial/timbuart.c
new file mode 100644
index 0000000..ac9e5d5
--- /dev/null
+++ b/drivers/serial/timbuart.c
@@ -0,0 +1,526 @@
+/*
+ * timbuart.c timberdale FPGA UART driver
+ * Copyright (c) 2009 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/* Supports:
+ * Timberdale FPGA UART
+ */
+
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+#include <linux/serial_core.h>
+#include <linux/kernel.h>
+#include <linux/platform_device.h>
+#include <linux/ioport.h>
+
+#include "timbuart.h"
+
+struct timbuart_port {
+	struct uart_port	port;
+	struct tasklet_struct	tasklet;
+	int			usedma;
+	u8			last_ier;
+	struct platform_device  *dev;
+};
+
+static int baudrates[] = {9600, 19200, 38400, 57600, 115200, 230400, 460800,
+	921600, 1843200, 3250000};
+
+static void timbuart_mctrl_check(struct uart_port *port, u8 isr, u8 *ier);
+
+static irqreturn_t timbuart_handleinterrupt(int irq, void *devid);
+
+static void timbuart_stop_rx(struct uart_port *port)
+{
+	/* spin lock held by upper layer, disable all RX interrupts */
+	u8 ier = ioread8(port->membase + TIMBUART_IER) & ~RXFLAGS;
+	iowrite8(ier, port->membase + TIMBUART_IER);
+}
+
+static void timbuart_stop_tx(struct uart_port *port)
+{
+	/* spinlock held by upper layer, disable TX interrupt */
+	u8 ier = ioread8(port->membase + TIMBUART_IER) & ~TXBAE;
+	iowrite8(ier, port->membase + TIMBUART_IER);
+}
+
+static void timbuart_start_tx(struct uart_port *port)
+{
+	struct timbuart_port *uart =
+		container_of(port, struct timbuart_port, port);
+
+	/* do not transfer anything here -> fire off the tasklet */
+	tasklet_schedule(&uart->tasklet);
+}
+
+static void timbuart_flush_buffer(struct uart_port *port)
+{
+	u8 ctl = ioread8(port->membase + TIMBUART_CTRL) | TIMBUART_CTRL_FLSHTX;
+
+	iowrite8(ctl, port->membase + TIMBUART_CTRL);
+	iowrite8(TXBF, port->membase + TIMBUART_ISR);
+}
+
+static void timbuart_rx_chars(struct uart_port *port)
+{
+	struct tty_struct *tty = port->info->port.tty;
+
+	while (ioread8(port->membase + TIMBUART_ISR) & RXDP) {
+		u8 ch = ioread8(port->membase + TIMBUART_RXFIFO);
+		port->icount.rx++;
+		tty_insert_flip_char(tty, ch, TTY_NORMAL);
+	}
+
+	spin_unlock(&port->lock);
+	tty_flip_buffer_push(port->info->port.tty);
+	spin_lock(&port->lock);
+
+	dev_dbg(port->dev, "%s - total read %d bytes\n",
+		__func__, port->icount.rx);
+}
+
+static void timbuart_tx_chars(struct uart_port *port)
+{
+	struct circ_buf *xmit = &port->info->xmit;
+
+	while (!(ioread8(port->membase + TIMBUART_ISR) & TXBF) &&
+		!uart_circ_empty(xmit)) {
+		iowrite8(xmit->buf[xmit->tail],
+			port->membase + TIMBUART_TXFIFO);
+		xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
+		port->icount.tx++;
+	}
+
+	dev_dbg(port->dev,
+		"%s - total written %d bytes, CTL: %x, RTS: %x, baud: %x\n",
+		 __func__,
+		port->icount.tx,
+		ioread8(port->membase + TIMBUART_CTRL),
+		port->mctrl & TIOCM_RTS,
+		ioread8(port->membase + TIMBUART_BAUDRATE));
+}
+
+static void timbuart_handle_tx_port(struct uart_port *port, u8 isr, u8 *ier)
+{
+	struct timbuart_port *uart =
+		container_of(port, struct timbuart_port, port);
+	struct circ_buf *xmit = &port->info->xmit;
+
+	if (uart_circ_empty(xmit) || uart_tx_stopped(port))
+		return;
+
+	if (port->x_char)
+		return;
+
+	if (isr & TXFLAGS) {
+		timbuart_tx_chars(port);
+		/* clear all TX interrupts */
+		iowrite8(TXFLAGS, port->membase + TIMBUART_ISR);
+
+		if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
+			uart_write_wakeup(port);
+	} else
+		/* Re-enable any tx interrupt */
+		*ier |= uart->last_ier & TXFLAGS;
+
+	/* enable interrupts if there are chars in the transmit buffer,
+	 * Or if we delivered some bytes and want the almost empty interrupt
+	 * we wake up the upper layer later when we got the interrupt
+	 * to give it some time to go out...
+	 */
+	if (!uart_circ_empty(xmit))
+		*ier |= TXBAE;
+
+	dev_dbg(port->dev, "%s - leaving\n", __func__);
+}
+
+void timbuart_handle_rx_port(struct uart_port *port, u8 isr, u8 *ier)
+{
+	if (isr & RXFLAGS) {
+		/* Some RX status is set */
+		if (isr & RXBF) {
+			u8 ctl = ioread8(port->membase + TIMBUART_CTRL) |
+				TIMBUART_CTRL_FLSHRX;
+			iowrite8(ctl, port->membase + TIMBUART_CTRL);
+			port->icount.overrun++;
+		} else if (isr & (RXDP))
+			timbuart_rx_chars(port);
+
+		/* ack all RX interrupts */
+		iowrite8(RXFLAGS, port->membase + TIMBUART_ISR);
+	}
+
+	/* always have the RX interrupts enabled */
+	*ier |= RXBAF | RXBF | RXTT;
+
+	dev_dbg(port->dev, "%s - leaving\n", __func__);
+}
+
+void timbuart_tasklet(unsigned long arg)
+{
+	struct timbuart_port *uart = (struct timbuart_port *)arg;
+	u8 isr, ier = 0;
+
+	spin_lock(&uart->port.lock);
+
+	isr = ioread8(uart->port.membase + TIMBUART_ISR);
+	dev_dbg(uart->port.dev, "%s ISR: %x\n", __func__, isr);
+
+	if (!uart->usedma)
+		timbuart_handle_tx_port(&uart->port, isr, &ier);
+
+	timbuart_mctrl_check(&uart->port, isr, &ier);
+
+	if (!uart->usedma)
+		timbuart_handle_rx_port(&uart->port, isr, &ier);
+
+	iowrite8(ier, uart->port.membase + TIMBUART_IER);
+
+	spin_unlock(&uart->port.lock);
+	dev_dbg(uart->port.dev, "%s leaving\n", __func__);
+}
+
+static unsigned int timbuart_tx_empty(struct uart_port *port)
+{
+	u8 isr = ioread8(port->membase + TIMBUART_ISR);
+
+	return (isr & TXBAE) ? TIOCSER_TEMT : 0;
+}
+
+static unsigned int timbuart_get_mctrl(struct uart_port *port)
+{
+	u8 cts = ioread8(port->membase + TIMBUART_CTRL);
+	dev_dbg(port->dev, "%s - cts %x\n", __func__, cts);
+
+	if (cts & TIMBUART_CTRL_CTS)
+		return TIOCM_CTS | TIOCM_DSR | TIOCM_CAR;
+	else
+		return TIOCM_DSR | TIOCM_CAR;
+}
+
+static void timbuart_set_mctrl(struct uart_port *port, unsigned int mctrl)
+{
+	dev_dbg(port->dev, "%s - %x\n", __func__, mctrl);
+
+	if (mctrl & TIOCM_RTS)
+		iowrite8(TIMBUART_CTRL_RTS, port->membase + TIMBUART_CTRL);
+	else
+		iowrite8(TIMBUART_CTRL_RTS, port->membase + TIMBUART_CTRL);
+}
+
+static void timbuart_mctrl_check(struct uart_port *port, u8 isr, u8 *ier)
+{
+	unsigned int cts;
+
+	if (isr & CTS_DELTA) {
+		/* ack */
+		iowrite8(CTS_DELTA, port->membase + TIMBUART_ISR);
+		cts = timbuart_get_mctrl(port);
+		uart_handle_cts_change(port, cts & TIOCM_CTS);
+		wake_up_interruptible(&port->info->delta_msr_wait);
+	}
+
+	*ier |= CTS_DELTA;
+}
+
+static void timbuart_enable_ms(struct uart_port *port)
+{
+	/* N/A */
+}
+
+static void timbuart_break_ctl(struct uart_port *port, int ctl)
+{
+	/* N/A */
+}
+
+static int timbuart_startup(struct uart_port *port)
+{
+	struct timbuart_port *uart =
+		container_of(port, struct timbuart_port, port);
+
+	dev_dbg(port->dev, "%s\n", __func__);
+
+	iowrite8(TIMBUART_CTRL_FLSHRX, port->membase + TIMBUART_CTRL);
+	iowrite8(0xff, port->membase + TIMBUART_ISR);
+	/* Enable all but TX interrupts */
+	iowrite8(RXBAF | RXBF | RXTT | CTS_DELTA,
+		port->membase + TIMBUART_IER);
+
+	return request_irq(port->irq, timbuart_handleinterrupt, IRQF_SHARED,
+		"timb-uart", uart);
+}
+
+static void timbuart_shutdown(struct uart_port *port)
+{
+	struct timbuart_port *uart =
+		container_of(port, struct timbuart_port, port);
+	dev_dbg(port->dev, "%s\n", __func__);
+	free_irq(port->irq, uart);
+	iowrite8(0, port->membase + TIMBUART_IER);
+}
+
+static int get_bindex(int baud)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(baudrates); i++)
+		if (baud <= baudrates[i])
+			return i;
+
+	return -1;
+}
+
+static void timbuart_set_termios(struct uart_port *port,
+	struct ktermios *termios,
+	struct ktermios *old)
+{
+	unsigned int baud;
+	short bindex;
+	unsigned long flags;
+
+	baud = uart_get_baud_rate(port, termios, old, 0, port->uartclk / 16);
+	bindex = get_bindex(baud);
+	dev_dbg(port->dev, "%s - bindex %d\n", __func__, bindex);
+
+	if (bindex < 0)
+		bindex = 0;
+	baud = baudrates[bindex];
+
+	/* The serial layer calls into this once with old = NULL when setting
+	   up initially */
+	if (old)
+		tty_termios_copy_hw(termios, old);
+	tty_termios_encode_baud_rate(termios, baud, baud);
+
+	spin_lock_irqsave(&port->lock, flags);
+	iowrite8((u8)bindex, port->membase + TIMBUART_BAUDRATE);
+	uart_update_timeout(port, termios->c_cflag, baud);
+	spin_unlock_irqrestore(&port->lock, flags);
+}
+
+static const char *timbuart_type(struct uart_port *port)
+{
+	return port->type == PORT_UNKNOWN ? "timbuart" : NULL;
+}
+
+/* We do not request/release mappings of the registers here,
+ * currently it's done in the proble function.
+ */
+static void timbuart_release_port(struct uart_port *port)
+{
+	struct platform_device *pdev = to_platform_device(port->dev);
+	int size =
+		resource_size(platform_get_resource(pdev, IORESOURCE_MEM, 0));
+
+	if (port->flags & UPF_IOREMAP) {
+		iounmap(port->membase);
+		port->membase = NULL;
+	}
+
+	release_mem_region(port->mapbase, size);
+}
+
+static int timbuart_request_port(struct uart_port *port)
+{
+	struct platform_device *pdev = to_platform_device(port->dev);
+	int size =
+		resource_size(platform_get_resource(pdev, IORESOURCE_MEM, 0));
+
+	if (!request_mem_region(port->mapbase, size, "timb-uart"))
+		return -EBUSY;
+
+	if (port->flags & UPF_IOREMAP) {
+		port->membase = ioremap(port->mapbase, size);
+		if (port->membase == NULL) {
+			release_mem_region(port->mapbase, size);
+			return -ENOMEM;
+		}
+	}
+
+	return 0;
+}
+
+static irqreturn_t timbuart_handleinterrupt(int irq, void *devid)
+{
+	struct timbuart_port *uart = (struct timbuart_port *)devid;
+
+	if (ioread8(uart->port.membase + TIMBUART_IPR)) {
+		uart->last_ier = ioread8(uart->port.membase + TIMBUART_IER);
+
+		/* disable interrupts, the tasklet enables them again */
+		iowrite8(0, uart->port.membase + TIMBUART_IER);
+
+		/* fire off bottom half */
+		tasklet_schedule(&uart->tasklet);
+
+		return IRQ_HANDLED;
+	} else
+		return IRQ_NONE;
+}
+
+/*
+ * Configure/autoconfigure the port.
+ */
+static void timbuart_config_port(struct uart_port *port, int flags)
+{
+	if (flags & UART_CONFIG_TYPE) {
+		port->type = PORT_TIMBUART;
+		timbuart_request_port(port);
+	}
+}
+
+static int timbuart_verify_port(struct uart_port *port,
+	struct serial_struct *ser)
+{
+	/* we don't want the core code to modify any port params */
+	return -EINVAL;
+}
+
+static struct uart_ops timbuart_ops = {
+	.tx_empty = timbuart_tx_empty,
+	.set_mctrl = timbuart_set_mctrl,
+	.get_mctrl = timbuart_get_mctrl,
+	.stop_tx = timbuart_stop_tx,
+	.start_tx = timbuart_start_tx,
+	.flush_buffer = timbuart_flush_buffer,
+	.stop_rx = timbuart_stop_rx,
+	.enable_ms = timbuart_enable_ms,
+	.break_ctl = timbuart_break_ctl,
+	.startup = timbuart_startup,
+	.shutdown = timbuart_shutdown,
+	.set_termios = timbuart_set_termios,
+	.type = timbuart_type,
+	.release_port = timbuart_release_port,
+	.request_port = timbuart_request_port,
+	.config_port = timbuart_config_port,
+	.verify_port = timbuart_verify_port
+};
+
+static struct uart_driver timbuart_driver = {
+	.owner = THIS_MODULE,
+	.driver_name = "timberdale_uart",
+	.dev_name = "ttyTU",
+	.major = TIMBUART_MAJOR,
+	.minor = TIMBUART_MINOR,
+	.nr = 1
+};
+
+static int timbuart_probe(struct platform_device *dev)
+{
+	int err;
+	struct timbuart_port *uart;
+	struct resource *iomem;
+
+	dev_dbg(&dev->dev, "%s\n", __func__);
+
+	uart = kzalloc(sizeof(*uart), GFP_KERNEL);
+	if (!uart) {
+		err = -EINVAL;
+		goto err_mem;
+	}
+
+	uart->usedma = 0;
+
+	uart->port.uartclk = 3250000 * 16;
+	uart->port.fifosize  = TIMBUART_FIFO_SIZE;
+	uart->port.regshift  = 2;
+	uart->port.iotype  = UPIO_MEM;
+	uart->port.ops = &timbuart_ops;
+	uart->port.irq = 0;
+	uart->port.flags = UPF_BOOT_AUTOCONF | UPF_IOREMAP;
+	uart->port.line  = 0;
+	uart->port.dev	= &dev->dev;
+
+	iomem = platform_get_resource(dev, IORESOURCE_MEM, 0);
+	if (!iomem) {
+		err = -ENOMEM;
+		goto err_register;
+	}
+	uart->port.mapbase = iomem->start;
+	uart->port.membase = NULL;
+
+	uart->port.irq = platform_get_irq(dev, 0);
+	if (uart->port.irq < 0) {
+		err = -EINVAL;
+		goto err_register;
+	}
+
+	tasklet_init(&uart->tasklet, timbuart_tasklet, (unsigned long)uart);
+
+	err = uart_register_driver(&timbuart_driver);
+	if (err)
+		goto err_register;
+
+	err = uart_add_one_port(&timbuart_driver, &uart->port);
+	if (err)
+		goto err_add_port;
+
+	platform_set_drvdata(dev, uart);
+
+	return 0;
+
+err_add_port:
+	uart_unregister_driver(&timbuart_driver);
+err_register:
+	kfree(uart);
+err_mem:
+	printk(KERN_ERR "timberdale: Failed to register Timberdale UART: %d\n",
+		err);
+
+	return err;
+}
+
+static int timbuart_remove(struct platform_device *dev)
+{
+	struct timbuart_port *uart = platform_get_drvdata(dev);
+
+	tasklet_kill(&uart->tasklet);
+	uart_remove_one_port(&timbuart_driver, &uart->port);
+	uart_unregister_driver(&timbuart_driver);
+	kfree(uart);
+
+	return 0;
+}
+
+static struct platform_driver timbuart_platform_driver = {
+	.driver = {
+		.name	= "timb-uart",
+		.owner	= THIS_MODULE,
+	},
+	.probe		= timbuart_probe,
+	.remove		= timbuart_remove,
+};
+
+/*--------------------------------------------------------------------------*/
+
+static int __init timbuart_init(void)
+{
+	return platform_driver_register(&timbuart_platform_driver);
+}
+
+static void __exit timbuart_exit(void)
+{
+	platform_driver_unregister(&timbuart_platform_driver);
+}
+
+module_init(timbuart_init);
+module_exit(timbuart_exit);
+
+MODULE_DESCRIPTION("Timberdale UART driver");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:timb-uart");
+
diff --git a/drivers/serial/timbuart.h b/drivers/serial/timbuart.h
new file mode 100644
index 0000000..7e56676
--- /dev/null
+++ b/drivers/serial/timbuart.h
@@ -0,0 +1,58 @@
+/*
+ * timbuart.c timberdale FPGA GPIO driver
+ * Copyright (c) 2009 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/* Supports:
+ * Timberdale FPGA UART
+ */
+
+#ifndef _TIMBUART_H
+#define _TIMBUART_H
+
+#define TIMBUART_FIFO_SIZE	2048
+
+#define TIMBUART_RXFIFO		0x08
+#define TIMBUART_TXFIFO		0x0c
+#define TIMBUART_IER		0x10
+#define TIMBUART_IPR		0x14
+#define TIMBUART_ISR		0x18
+#define TIMBUART_CTRL		0x1c
+#define TIMBUART_BAUDRATE	0x20
+
+#define TIMBUART_CTRL_RTS	0x01
+#define TIMBUART_CTRL_CTS	0x02
+#define TIMBUART_CTRL_FLSHTX	0x40
+#define TIMBUART_CTRL_FLSHRX	0x80
+
+#define TXBF		0x01
+#define TXBAE		0x02
+#define CTS_DELTA	0x04
+#define RXDP		0x08
+#define RXBAF		0x10
+#define RXBF		0x20
+#define RXTT		0x40
+#define RXBNAE		0x80
+#define TXBE		0x100
+
+#define RXFLAGS (RXDP | RXBAF | RXBF | RXTT | RXBNAE)
+#define TXFLAGS (TXBF | TXBAE)
+
+#define TIMBUART_MAJOR 204
+#define TIMBUART_MINOR 192
+
+#endif /* _TIMBUART_H */
+
diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c
index 7a1164d..ddeb691 100644
--- a/drivers/usb/class/cdc-acm.c
+++ b/drivers/usb/class/cdc-acm.c
@@ -16,7 +16,8 @@
  *	v0.9  - thorough cleaning, URBification, almost a rewrite
  *	v0.10 - some more cleanups
  *	v0.11 - fixed flow control, read error doesn't stop reads
- *	v0.12 - added TIOCM ioctls, added break handling, made struct acm kmalloced
+ *	v0.12 - added TIOCM ioctls, added break handling, made struct acm
+ *		kmalloced
  *	v0.13 - added termios, added hangup
  *	v0.14 - sized down struct acm
  *	v0.15 - fixed flow control again - characters could be lost
@@ -62,7 +63,7 @@
 #include <linux/tty_flip.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/usb.h>
 #include <linux/usb/cdc.h>
 #include <asm/byteorder.h>
@@ -87,7 +88,10 @@
 
 static DEFINE_MUTEX(open_mutex);
 
-#define ACM_READY(acm)	(acm && acm->dev && acm->used)
+#define ACM_READY(acm)	(acm && acm->dev && acm->port.count)
+
+static const struct tty_port_operations acm_port_ops = {
+};
 
 #ifdef VERBOSE_DEBUG
 #define verbose	1
@@ -99,13 +103,15 @@
  * Functions for ACM control messages.
  */
 
-static int acm_ctrl_msg(struct acm *acm, int request, int value, void *buf, int len)
+static int acm_ctrl_msg(struct acm *acm, int request, int value,
+							void *buf, int len)
 {
 	int retval = usb_control_msg(acm->dev, usb_sndctrlpipe(acm->dev, 0),
 		request, USB_RT_ACM, value,
 		acm->control->altsetting[0].desc.bInterfaceNumber,
 		buf, len, 5000);
-	dbg("acm_control_msg: rq: 0x%02x val: %#x len: %#x result: %d", request, value, len, retval);
+	dbg("acm_control_msg: rq: 0x%02x val: %#x len: %#x result: %d",
+						request, value, len, retval);
 	return retval < 0 ? retval : 0;
 }
 
@@ -150,9 +156,8 @@
 
 	n = ACM_NW;
 	spin_lock_irqsave(&acm->write_lock, flags);
-	for (i = 0; i < ACM_NW; i++) {
+	for (i = 0; i < ACM_NW; i++)
 		n -= acm->wb[i].use;
-	}
 	spin_unlock_irqrestore(&acm->write_lock, flags);
 	return n;
 }
@@ -183,7 +188,8 @@
 	wb->urb->transfer_buffer_length = wb->len;
 	wb->urb->dev = acm->dev;
 
-	if ((rc = usb_submit_urb(wb->urb, GFP_ATOMIC)) < 0) {
+	rc = usb_submit_urb(wb->urb, GFP_ATOMIC);
+	if (rc < 0) {
 		dbg("usb_submit_urb(write bulk) failed: %d", rc);
 		acm_write_done(acm, wb);
 	}
@@ -262,6 +268,7 @@
 {
 	struct acm *acm = urb->context;
 	struct usb_cdc_notification *dr = urb->transfer_buffer;
+	struct tty_struct *tty;
 	unsigned char *data;
 	int newctrl;
 	int retval;
@@ -287,40 +294,45 @@
 
 	data = (unsigned char *)(dr + 1);
 	switch (dr->bNotificationType) {
+	case USB_CDC_NOTIFY_NETWORK_CONNECTION:
+		dbg("%s network", dr->wValue ?
+					"connected to" : "disconnected from");
+		break;
 
-		case USB_CDC_NOTIFY_NETWORK_CONNECTION:
+	case USB_CDC_NOTIFY_SERIAL_STATE:
+		tty = tty_port_tty_get(&acm->port);
+		newctrl = get_unaligned_le16(data);
 
-			dbg("%s network", dr->wValue ? "connected to" : "disconnected from");
-			break;
-
-		case USB_CDC_NOTIFY_SERIAL_STATE:
-
-			newctrl = get_unaligned_le16(data);
-
-			if (acm->tty && !acm->clocal && (acm->ctrlin & ~newctrl & ACM_CTRL_DCD)) {
+		if (tty) {
+			if (!acm->clocal &&
+				(acm->ctrlin & ~newctrl & ACM_CTRL_DCD)) {
 				dbg("calling hangup");
-				tty_hangup(acm->tty);
+				tty_hangup(tty);
 			}
+			tty_kref_put(tty);
+		}
 
-			acm->ctrlin = newctrl;
+		acm->ctrlin = newctrl;
 
-			dbg("input control lines: dcd%c dsr%c break%c ring%c framing%c parity%c overrun%c",
-				acm->ctrlin & ACM_CTRL_DCD ? '+' : '-',	acm->ctrlin & ACM_CTRL_DSR ? '+' : '-',
-				acm->ctrlin & ACM_CTRL_BRK ? '+' : '-',	acm->ctrlin & ACM_CTRL_RI  ? '+' : '-',
-				acm->ctrlin & ACM_CTRL_FRAMING ? '+' : '-',	acm->ctrlin & ACM_CTRL_PARITY ? '+' : '-',
-				acm->ctrlin & ACM_CTRL_OVERRUN ? '+' : '-');
-
+		dbg("input control lines: dcd%c dsr%c break%c ring%c framing%c parity%c overrun%c",
+			acm->ctrlin & ACM_CTRL_DCD ? '+' : '-',
+			acm->ctrlin & ACM_CTRL_DSR ? '+' : '-',
+			acm->ctrlin & ACM_CTRL_BRK ? '+' : '-',
+			acm->ctrlin & ACM_CTRL_RI  ? '+' : '-',
+			acm->ctrlin & ACM_CTRL_FRAMING ? '+' : '-',
+			acm->ctrlin & ACM_CTRL_PARITY ? '+' : '-',
+			acm->ctrlin & ACM_CTRL_OVERRUN ? '+' : '-');
 			break;
 
-		default:
-			dbg("unknown notification %d received: index %d len %d data0 %d data1 %d",
-				dr->bNotificationType, dr->wIndex,
-				dr->wLength, data[0], data[1]);
-			break;
+	default:
+		dbg("unknown notification %d received: index %d len %d data0 %d data1 %d",
+			dr->bNotificationType, dr->wIndex,
+			dr->wLength, data[0], data[1]);
+		break;
 	}
 exit:
 	usb_mark_last_busy(acm->dev);
-	retval = usb_submit_urb (urb, GFP_ATOMIC);
+	retval = usb_submit_urb(urb, GFP_ATOMIC);
 	if (retval)
 		dev_err(&urb->dev->dev, "%s - usb_submit_urb failed with "
 			"result %d", __func__, retval);
@@ -371,15 +383,14 @@
 {
 	struct acm *acm = (void *)_acm;
 	struct acm_rb *buf;
-	struct tty_struct *tty = acm->tty;
+	struct tty_struct *tty;
 	struct acm_ru *rcv;
 	unsigned long flags;
 	unsigned char throttled;
 
 	dbg("Entering acm_rx_tasklet");
 
-	if (!ACM_READY(acm))
-	{
+	if (!ACM_READY(acm)) {
 		dbg("acm_rx_tasklet: ACM not ready");
 		return;
 	}
@@ -387,12 +398,13 @@
 	spin_lock_irqsave(&acm->throttle_lock, flags);
 	throttled = acm->throttle;
 	spin_unlock_irqrestore(&acm->throttle_lock, flags);
-	if (throttled)
-	{
+	if (throttled) {
 		dbg("acm_rx_tasklet: throttled");
 		return;
 	}
 
+	tty = tty_port_tty_get(&acm->port);
+
 next_buffer:
 	spin_lock_irqsave(&acm->read_lock, flags);
 	if (list_empty(&acm->filled_read_bufs)) {
@@ -406,20 +418,22 @@
 
 	dbg("acm_rx_tasklet: procesing buf 0x%p, size = %d", buf, buf->size);
 
-	tty_buffer_request_room(tty, buf->size);
-	spin_lock_irqsave(&acm->throttle_lock, flags);
-	throttled = acm->throttle;
-	spin_unlock_irqrestore(&acm->throttle_lock, flags);
-	if (!throttled)
-		tty_insert_flip_string(tty, buf->base, buf->size);
-	tty_flip_buffer_push(tty);
-
-	if (throttled) {
-		dbg("Throttling noticed");
-		spin_lock_irqsave(&acm->read_lock, flags);
-		list_add(&buf->list, &acm->filled_read_bufs);
-		spin_unlock_irqrestore(&acm->read_lock, flags);
-		return;
+	if (tty) {
+		spin_lock_irqsave(&acm->throttle_lock, flags);
+		throttled = acm->throttle;
+		spin_unlock_irqrestore(&acm->throttle_lock, flags);
+		if (!throttled) {
+			tty_buffer_request_room(tty, buf->size);
+			tty_insert_flip_string(tty, buf->base, buf->size);
+			tty_flip_buffer_push(tty);
+		} else {
+			tty_kref_put(tty);
+			dbg("Throttling noticed");
+			spin_lock_irqsave(&acm->read_lock, flags);
+			list_add(&buf->list, &acm->filled_read_bufs);
+			spin_unlock_irqrestore(&acm->read_lock, flags);
+			return;
+		}
 	}
 
 	spin_lock_irqsave(&acm->read_lock, flags);
@@ -428,6 +442,8 @@
 	goto next_buffer;
 
 urbs:
+	tty_kref_put(tty);
+
 	while (!list_empty(&acm->spare_read_bufs)) {
 		spin_lock_irqsave(&acm->read_lock, flags);
 		if (list_empty(&acm->spare_read_urbs)) {
@@ -454,10 +470,11 @@
 		rcv->urb->transfer_dma = buf->dma;
 		rcv->urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
 
-		/* This shouldn't kill the driver as unsuccessful URBs are returned to the
-		   free-urbs-pool and resubmited ASAP */
+		/* This shouldn't kill the driver as unsuccessful URBs are
+		   returned to the free-urbs-pool and resubmited ASAP */
 		spin_lock_irqsave(&acm->read_lock, flags);
-		if (acm->susp_count || usb_submit_urb(rcv->urb, GFP_ATOMIC) < 0) {
+		if (acm->susp_count ||
+				usb_submit_urb(rcv->urb, GFP_ATOMIC) < 0) {
 			list_add(&buf->list, &acm->spare_read_bufs);
 			list_add(&rcv->list, &acm->spare_read_urbs);
 			acm->processing = 0;
@@ -499,11 +516,14 @@
 static void acm_softint(struct work_struct *work)
 {
 	struct acm *acm = container_of(work, struct acm, work);
+	struct tty_struct *tty;
 
 	dev_vdbg(&acm->data->dev, "tx work\n");
 	if (!ACM_READY(acm))
 		return;
-	tty_wakeup(acm->tty);
+	tty = tty_port_tty_get(&acm->port);
+	tty_wakeup(tty);
+	tty_kref_put(tty);
 }
 
 static void acm_waker(struct work_struct *waker)
@@ -543,8 +563,9 @@
 		rv = 0;
 
 	set_bit(TTY_NO_WRITE_SPLIT, &tty->flags);
+
 	tty->driver_data = acm;
-	acm->tty = tty;
+	tty_port_tty_set(&acm->port, tty);
 
 	if (usb_autopm_get_interface(acm->control) < 0)
 		goto early_bail;
@@ -552,11 +573,10 @@
 		acm->control->needs_remote_wakeup = 1;
 
 	mutex_lock(&acm->mutex);
-	if (acm->used++) {
+	if (acm->port.count++) {
 		usb_autopm_put_interface(acm->control);
 		goto done;
-        }
-
+	}
 
 	acm->ctrlurb->dev = acm->dev;
 	if (usb_submit_urb(acm->ctrlurb, GFP_KERNEL)) {
@@ -567,22 +587,22 @@
 	if (0 > acm_set_control(acm, acm->ctrlout = ACM_CTRL_DTR | ACM_CTRL_RTS) &&
 	    (acm->ctrl_caps & USB_CDC_CAP_LINE))
 		goto full_bailout;
+
 	usb_autopm_put_interface(acm->control);
 
 	INIT_LIST_HEAD(&acm->spare_read_urbs);
 	INIT_LIST_HEAD(&acm->spare_read_bufs);
 	INIT_LIST_HEAD(&acm->filled_read_bufs);
-	for (i = 0; i < acm->rx_buflimit; i++) {
+
+	for (i = 0; i < acm->rx_buflimit; i++)
 		list_add(&(acm->ru[i].list), &acm->spare_read_urbs);
-	}
-	for (i = 0; i < acm->rx_buflimit; i++) {
+	for (i = 0; i < acm->rx_buflimit; i++)
 		list_add(&(acm->rb[i].list), &acm->spare_read_bufs);
-	}
 
 	acm->throttle = 0;
 
 	tasklet_schedule(&acm->urb_task);
-
+	rv = tty_port_block_til_ready(&acm->port, tty, filp);
 done:
 	mutex_unlock(&acm->mutex);
 err_out:
@@ -593,16 +613,17 @@
 	usb_kill_urb(acm->ctrlurb);
 bail_out:
 	usb_autopm_put_interface(acm->control);
-	acm->used--;
+	acm->port.count--;
 	mutex_unlock(&acm->mutex);
 early_bail:
 	mutex_unlock(&open_mutex);
+	tty_port_tty_set(&acm->port, NULL);
 	return -EIO;
 }
 
 static void acm_tty_unregister(struct acm *acm)
 {
-	int i,nr;
+	int i, nr;
 
 	nr = acm->rx_buflimit;
 	tty_unregister_device(acm_tty_driver, acm->minor);
@@ -619,41 +640,56 @@
 
 static int acm_tty_chars_in_buffer(struct tty_struct *tty);
 
-static void acm_tty_close(struct tty_struct *tty, struct file *filp)
+static void acm_port_down(struct acm *acm, int drain)
 {
-	struct acm *acm = tty->driver_data;
-	int i,nr;
-
-	if (!acm || !acm->used)
-		return;
-
-	nr = acm->rx_buflimit;
+	int i, nr = acm->rx_buflimit;
 	mutex_lock(&open_mutex);
-	if (!--acm->used) {
-		if (acm->dev) {
-			usb_autopm_get_interface(acm->control);
-			acm_set_control(acm, acm->ctrlout = 0);
-
-			/* try letting the last writes drain naturally */
+	if (acm->dev) {
+		usb_autopm_get_interface(acm->control);
+		acm_set_control(acm, acm->ctrlout = 0);
+		/* try letting the last writes drain naturally */
+		if (drain) {
 			wait_event_interruptible_timeout(acm->drain_wait,
-					(ACM_NW == acm_wb_is_avail(acm))
-						|| !acm->dev,
+				(ACM_NW == acm_wb_is_avail(acm)) || !acm->dev,
 					ACM_CLOSE_TIMEOUT * HZ);
-
-			usb_kill_urb(acm->ctrlurb);
-			for (i = 0; i < ACM_NW; i++)
-				usb_kill_urb(acm->wb[i].urb);
-			for (i = 0; i < nr; i++)
-				usb_kill_urb(acm->ru[i].urb);
-			acm->control->needs_remote_wakeup = 0;
-			usb_autopm_put_interface(acm->control);
-		} else
-			acm_tty_unregister(acm);
+		}
+		usb_kill_urb(acm->ctrlurb);
+		for (i = 0; i < ACM_NW; i++)
+			usb_kill_urb(acm->wb[i].urb);
+		for (i = 0; i < nr; i++)
+			usb_kill_urb(acm->ru[i].urb);
+		acm->control->needs_remote_wakeup = 0;
+		usb_autopm_put_interface(acm->control);
 	}
 	mutex_unlock(&open_mutex);
 }
 
-static int acm_tty_write(struct tty_struct *tty, const unsigned char *buf, int count)
+static void acm_tty_hangup(struct tty_struct *tty)
+{
+	struct acm *acm = tty->driver_data;
+	tty_port_hangup(&acm->port);
+	acm_port_down(acm, 0);
+}
+
+static void acm_tty_close(struct tty_struct *tty, struct file *filp)
+{
+	struct acm *acm = tty->driver_data;
+
+	/* Perform the closing process and see if we need to do the hardware
+	   shutdown */
+	if (tty_port_close_start(&acm->port, tty, filp) == 0)
+		return;
+	acm_port_down(acm, 0);
+	tty_port_close_end(&acm->port, tty);
+	mutex_lock(&open_mutex);
+	tty_port_tty_set(&acm->port, NULL);
+	if (!acm->dev)
+		acm_tty_unregister(acm);
+	mutex_unlock(&open_mutex);
+}
+
+static int acm_tty_write(struct tty_struct *tty,
+					const unsigned char *buf, int count)
 {
 	struct acm *acm = tty->driver_data;
 	int stat;
@@ -669,7 +705,8 @@
 		return 0;
 
 	spin_lock_irqsave(&acm->write_lock, flags);
-	if ((wbn = acm_wb_alloc(acm)) < 0) {
+	wbn = acm_wb_alloc(acm);
+	if (wbn < 0) {
 		spin_unlock_irqrestore(&acm->write_lock, flags);
 		return 0;
 	}
@@ -681,7 +718,8 @@
 	wb->len = count;
 	spin_unlock_irqrestore(&acm->write_lock, flags);
 
-	if ((stat = acm_write_start(acm, wbn)) < 0)
+	stat = acm_write_start(acm, wbn);
+	if (stat < 0)
 		return stat;
 	return count;
 }
@@ -767,8 +805,10 @@
 		return -EINVAL;
 
 	newctrl = acm->ctrlout;
-	set = (set & TIOCM_DTR ? ACM_CTRL_DTR : 0) | (set & TIOCM_RTS ? ACM_CTRL_RTS : 0);
-	clear = (clear & TIOCM_DTR ? ACM_CTRL_DTR : 0) | (clear & TIOCM_RTS ? ACM_CTRL_RTS : 0);
+	set = (set & TIOCM_DTR ? ACM_CTRL_DTR : 0) |
+					(set & TIOCM_RTS ? ACM_CTRL_RTS : 0);
+	clear = (clear & TIOCM_DTR ? ACM_CTRL_DTR : 0) |
+					(clear & TIOCM_RTS ? ACM_CTRL_RTS : 0);
 
 	newctrl = (newctrl & ~clear) | set;
 
@@ -777,7 +817,8 @@
 	return acm_set_control(acm, acm->ctrlout = newctrl);
 }
 
-static int acm_tty_ioctl(struct tty_struct *tty, struct file *file, unsigned int cmd, unsigned long arg)
+static int acm_tty_ioctl(struct tty_struct *tty, struct file *file,
+					unsigned int cmd, unsigned long arg)
 {
 	struct acm *acm = tty->driver_data;
 
@@ -799,7 +840,8 @@
 	5, 6, 7, 8
 };
 
-static void acm_tty_set_termios(struct tty_struct *tty, struct ktermios *termios_old)
+static void acm_tty_set_termios(struct tty_struct *tty,
+						struct ktermios *termios_old)
 {
 	struct acm *acm = tty->driver_data;
 	struct ktermios *termios = tty->termios;
@@ -809,19 +851,23 @@
 	if (!ACM_READY(acm))
 		return;
 
+	/* FIXME: Needs to support the tty_baud interface */
+	/* FIXME: Broken on sparc */
 	newline.dwDTERate = cpu_to_le32p(acm_tty_speed +
 		(termios->c_cflag & CBAUD & ~CBAUDEX) + (termios->c_cflag & CBAUDEX ? 15 : 0));
 	newline.bCharFormat = termios->c_cflag & CSTOPB ? 2 : 0;
 	newline.bParityType = termios->c_cflag & PARENB ?
-		(termios->c_cflag & PARODD ? 1 : 2) + (termios->c_cflag & CMSPAR ? 2 : 0) : 0;
+				(termios->c_cflag & PARODD ? 1 : 2) +
+				(termios->c_cflag & CMSPAR ? 2 : 0) : 0;
 	newline.bDataBits = acm_tty_size[(termios->c_cflag & CSIZE) >> 4];
-
+	/* FIXME: Needs to clear unsupported bits in the termios */
 	acm->clocal = ((termios->c_cflag & CLOCAL) != 0);
 
 	if (!newline.dwDTERate) {
 		newline.dwDTERate = acm->line.dwDTERate;
 		newctrl &= ~ACM_CTRL_DTR;
-	} else  newctrl |=  ACM_CTRL_DTR;
+	} else
+		newctrl |=  ACM_CTRL_DTR;
 
 	if (newctrl != acm->ctrlout)
 		acm_set_control(acm, acm->ctrlout = newctrl);
@@ -846,9 +892,8 @@
 	struct acm_wb *wb;
 	struct usb_device *usb_dev = interface_to_usbdev(acm->control);
 
-	for (wb = &acm->wb[0], i = 0; i < ACM_NW; i++, wb++) {
+	for (wb = &acm->wb[0], i = 0; i < ACM_NW; i++, wb++)
 		usb_buffer_free(usb_dev, acm->writesize, wb->buf, wb->dmah);
-	}
 }
 
 static void acm_read_buffers_free(struct acm *acm)
@@ -857,7 +902,8 @@
 	int i, n = acm->rx_buflimit;
 
 	for (i = 0; i < n; i++)
-		usb_buffer_free(usb_dev, acm->readsize, acm->rb[i].base, acm->rb[i].dma);
+		usb_buffer_free(usb_dev, acm->readsize,
+					acm->rb[i].base, acm->rb[i].dma);
 }
 
 /* Little helper: write buffers allocate */
@@ -882,8 +928,8 @@
 	return 0;
 }
 
-static int acm_probe (struct usb_interface *intf,
-		      const struct usb_device_id *id)
+static int acm_probe(struct usb_interface *intf,
+		     const struct usb_device_id *id)
 {
 	struct usb_cdc_union_desc *union_header = NULL;
 	struct usb_cdc_country_functional_desc *cfd = NULL;
@@ -897,7 +943,7 @@
 	struct usb_device *usb_dev = interface_to_usbdev(intf);
 	struct acm *acm;
 	int minor;
-	int ctrlsize,readsize;
+	int ctrlsize, readsize;
 	u8 *buf;
 	u8 ac_management_function = 0;
 	u8 call_management_function = 0;
@@ -917,7 +963,7 @@
 		control_interface = usb_ifnum_to_if(usb_dev, 0);
 		goto skip_normal_probe;
 	}
-	
+
 	/* normal probing*/
 	if (!buffer) {
 		dev_err(&intf->dev, "Weird descriptor references\n");
@@ -925,8 +971,10 @@
 	}
 
 	if (!buflen) {
-		if (intf->cur_altsetting->endpoint->extralen && intf->cur_altsetting->endpoint->extra) {
-			dev_dbg(&intf->dev,"Seeking extra descriptors on endpoint\n");
+		if (intf->cur_altsetting->endpoint->extralen &&
+				intf->cur_altsetting->endpoint->extra) {
+			dev_dbg(&intf->dev,
+				"Seeking extra descriptors on endpoint\n");
 			buflen = intf->cur_altsetting->endpoint->extralen;
 			buffer = intf->cur_altsetting->endpoint->extra;
 		} else {
@@ -937,47 +985,43 @@
 	}
 
 	while (buflen > 0) {
-		if (buffer [1] != USB_DT_CS_INTERFACE) {
+		if (buffer[1] != USB_DT_CS_INTERFACE) {
 			dev_err(&intf->dev, "skipping garbage\n");
 			goto next_desc;
 		}
 
-		switch (buffer [2]) {
-			case USB_CDC_UNION_TYPE: /* we've found it */
-				if (union_header) {
-					dev_err(&intf->dev, "More than one "
-						"union descriptor, "
-						"skipping ...\n");
-					goto next_desc;
-				}
-				union_header = (struct usb_cdc_union_desc *)
-							buffer;
-				break;
-			case USB_CDC_COUNTRY_TYPE: /* export through sysfs*/
-				cfd = (struct usb_cdc_country_functional_desc *)buffer;
-				break;
-			case USB_CDC_HEADER_TYPE: /* maybe check version */ 
-				break; /* for now we ignore it */ 
-			case USB_CDC_ACM_TYPE:
-				ac_management_function = buffer[3];
-				break;
-			case USB_CDC_CALL_MANAGEMENT_TYPE:
-				call_management_function = buffer[3];
-				call_interface_num = buffer[4];
-				if ((call_management_function & 3) != 3)
-					dev_err(&intf->dev, "This device "
-						"cannot do calls on its own. "
-						"It is no modem.\n");
-				break;
-			default:
-				/* there are LOTS more CDC descriptors that
-				 * could legitimately be found here.
-				 */
-				dev_dbg(&intf->dev, "Ignoring descriptor: "
-						"type %02x, length %d\n",
-						buffer[2], buffer[0]);
-				break;
+		switch (buffer[2]) {
+		case USB_CDC_UNION_TYPE: /* we've found it */
+			if (union_header) {
+				dev_err(&intf->dev, "More than one "
+					"union descriptor, skipping ...\n");
+				goto next_desc;
 			}
+			union_header = (struct usb_cdc_union_desc *)buffer;
+			break;
+		case USB_CDC_COUNTRY_TYPE: /* export through sysfs*/
+			cfd = (struct usb_cdc_country_functional_desc *)buffer;
+			break;
+		case USB_CDC_HEADER_TYPE: /* maybe check version */
+			break; /* for now we ignore it */
+		case USB_CDC_ACM_TYPE:
+			ac_management_function = buffer[3];
+			break;
+		case USB_CDC_CALL_MANAGEMENT_TYPE:
+			call_management_function = buffer[3];
+			call_interface_num = buffer[4];
+			if ((call_management_function & 3) != 3)
+				dev_err(&intf->dev, "This device cannot do calls on its own. It is not a modem.\n");
+			break;
+		default:
+			/* there are LOTS more CDC descriptors that
+			 * could legitimately be found here.
+			 */
+			dev_dbg(&intf->dev, "Ignoring descriptor: "
+					"type %02x, length %d\n",
+					buffer[2], buffer[0]);
+			break;
+		}
 next_desc:
 		buflen -= buffer[0];
 		buffer += buffer[0];
@@ -985,33 +1029,36 @@
 
 	if (!union_header) {
 		if (call_interface_num > 0) {
-			dev_dbg(&intf->dev,"No union descriptor, using call management descriptor\n");
+			dev_dbg(&intf->dev, "No union descriptor, using call management descriptor\n");
 			data_interface = usb_ifnum_to_if(usb_dev, (data_interface_num = call_interface_num));
 			control_interface = intf;
 		} else {
-			dev_dbg(&intf->dev,"No union descriptor, giving up\n");
+			dev_dbg(&intf->dev,
+					"No union descriptor, giving up\n");
 			return -ENODEV;
 		}
 	} else {
 		control_interface = usb_ifnum_to_if(usb_dev, union_header->bMasterInterface0);
 		data_interface = usb_ifnum_to_if(usb_dev, (data_interface_num = union_header->bSlaveInterface0));
 		if (!control_interface || !data_interface) {
-			dev_dbg(&intf->dev,"no interfaces\n");
+			dev_dbg(&intf->dev, "no interfaces\n");
 			return -ENODEV;
 		}
 	}
-	
+
 	if (data_interface_num != call_interface_num)
-		dev_dbg(&intf->dev,"Separate call control interface. That is not fully supported.\n");
+		dev_dbg(&intf->dev, "Separate call control interface. That is not fully supported.\n");
 
 skip_normal_probe:
 
 	/*workaround for switched interfaces */
-	if (data_interface->cur_altsetting->desc.bInterfaceClass != CDC_DATA_INTERFACE_TYPE) {
-		if (control_interface->cur_altsetting->desc.bInterfaceClass == CDC_DATA_INTERFACE_TYPE) {
+	if (data_interface->cur_altsetting->desc.bInterfaceClass
+						!= CDC_DATA_INTERFACE_TYPE) {
+		if (control_interface->cur_altsetting->desc.bInterfaceClass
+						== CDC_DATA_INTERFACE_TYPE) {
 			struct usb_interface *t;
-			dev_dbg(&intf->dev,"Your device has switched interfaces.\n");
-
+			dev_dbg(&intf->dev,
+				"Your device has switched interfaces.\n");
 			t = control_interface;
 			control_interface = data_interface;
 			data_interface = t;
@@ -1023,9 +1070,9 @@
 	/* Accept probe requests only for the control interface */
 	if (intf != control_interface)
 		return -ENODEV;
-	
+
 	if (usb_interface_claimed(data_interface)) { /* valid in this context */
-		dev_dbg(&intf->dev,"The data interface isn't available\n");
+		dev_dbg(&intf->dev, "The data interface isn't available\n");
 		return -EBUSY;
 	}
 
@@ -1042,8 +1089,8 @@
 	if (!usb_endpoint_dir_in(epread)) {
 		/* descriptors are swapped */
 		struct usb_endpoint_descriptor *t;
-		dev_dbg(&intf->dev,"The data interface has switched endpoints\n");
-		
+		dev_dbg(&intf->dev,
+			"The data interface has switched endpoints\n");
 		t = epread;
 		epread = epwrite;
 		epwrite = t;
@@ -1056,13 +1103,15 @@
 		return -ENODEV;
 	}
 
-	if (!(acm = kzalloc(sizeof(struct acm), GFP_KERNEL))) {
+	acm = kzalloc(sizeof(struct acm), GFP_KERNEL);
+	if (acm == NULL) {
 		dev_dbg(&intf->dev, "out of memory (acm kzalloc)\n");
 		goto alloc_fail;
 	}
 
 	ctrlsize = le16_to_cpu(epctrl->wMaxPacketSize);
-	readsize = le16_to_cpu(epread->wMaxPacketSize)* ( quirks == SINGLE_RX_URB ? 1 : 2);
+	readsize = le16_to_cpu(epread->wMaxPacketSize) *
+				(quirks == SINGLE_RX_URB ? 1 : 2);
 	acm->writesize = le16_to_cpu(epwrite->wMaxPacketSize) * 20;
 	acm->control = control_interface;
 	acm->data = data_interface;
@@ -1082,6 +1131,8 @@
 	spin_lock_init(&acm->read_lock);
 	mutex_init(&acm->mutex);
 	acm->rx_endpoint = usb_rcvbulkpipe(usb_dev, epread->bEndpointAddress);
+	tty_port_init(&acm->port);
+	acm->port.ops = &acm_port_ops;
 
 	buf = usb_buffer_alloc(usb_dev, ctrlsize, GFP_KERNEL, &acm->ctrl_dma);
 	if (!buf) {
@@ -1103,8 +1154,10 @@
 	for (i = 0; i < num_rx_buf; i++) {
 		struct acm_ru *rcv = &(acm->ru[i]);
 
-		if (!(rcv->urb = usb_alloc_urb(0, GFP_KERNEL))) {
-			dev_dbg(&intf->dev, "out of memory (read urbs usb_alloc_urb)\n");
+		rcv->urb = usb_alloc_urb(0, GFP_KERNEL);
+		if (rcv->urb == NULL) {
+			dev_dbg(&intf->dev,
+				"out of memory (read urbs usb_alloc_urb)\n");
 			goto alloc_fail7;
 		}
 
@@ -1117,26 +1170,29 @@
 		rb->base = usb_buffer_alloc(acm->dev, readsize,
 				GFP_KERNEL, &rb->dma);
 		if (!rb->base) {
-			dev_dbg(&intf->dev, "out of memory (read bufs usb_buffer_alloc)\n");
+			dev_dbg(&intf->dev,
+				"out of memory (read bufs usb_buffer_alloc)\n");
 			goto alloc_fail7;
 		}
 	}
-	for(i = 0; i < ACM_NW; i++)
-	{
+	for (i = 0; i < ACM_NW; i++) {
 		struct acm_wb *snd = &(acm->wb[i]);
 
-		if (!(snd->urb = usb_alloc_urb(0, GFP_KERNEL))) {
-			dev_dbg(&intf->dev, "out of memory (write urbs usb_alloc_urb)");
+		snd->urb = usb_alloc_urb(0, GFP_KERNEL);
+		if (snd->urb == NULL) {
+			dev_dbg(&intf->dev,
+				"out of memory (write urbs usb_alloc_urb)");
 			goto alloc_fail7;
 		}
 
-		usb_fill_bulk_urb(snd->urb, usb_dev, usb_sndbulkpipe(usb_dev, epwrite->bEndpointAddress),
-				NULL, acm->writesize, acm_write_bulk, snd);
+		usb_fill_bulk_urb(snd->urb, usb_dev,
+			usb_sndbulkpipe(usb_dev, epwrite->bEndpointAddress),
+			NULL, acm->writesize, acm_write_bulk, snd);
 		snd->urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
 		snd->instance = acm;
 	}
 
-	usb_set_intfdata (intf, acm);
+	usb_set_intfdata(intf, acm);
 
 	i = device_create_file(&intf->dev, &dev_attr_bmCapabilities);
 	if (i < 0)
@@ -1147,7 +1203,8 @@
 		if (!acm->country_codes)
 			goto skip_countries;
 		acm->country_code_size = cfd->bLength - 4;
-		memcpy(acm->country_codes, (u8 *)&cfd->wCountyCode0, cfd->bLength - 4);
+		memcpy(acm->country_codes, (u8 *)&cfd->wCountyCode0,
+							cfd->bLength - 4);
 		acm->country_rel_date = cfd->iCountryCodeRelDate;
 
 		i = device_create_file(&intf->dev, &dev_attr_wCountryCodes);
@@ -1156,7 +1213,8 @@
 			goto skip_countries;
 		}
 
-		i = device_create_file(&intf->dev, &dev_attr_iCountryCodeRelDate);
+		i = device_create_file(&intf->dev,
+						&dev_attr_iCountryCodeRelDate);
 		if (i < 0) {
 			kfree(acm->country_codes);
 			goto skip_countries;
@@ -1164,8 +1222,10 @@
 	}
 
 skip_countries:
-	usb_fill_int_urb(acm->ctrlurb, usb_dev, usb_rcvintpipe(usb_dev, epctrl->bEndpointAddress),
-			 acm->ctrl_buffer, ctrlsize, acm_ctrl_irq, acm, epctrl->bInterval);
+	usb_fill_int_urb(acm->ctrlurb, usb_dev,
+			usb_rcvintpipe(usb_dev, epctrl->bEndpointAddress),
+			acm->ctrl_buffer, ctrlsize, acm_ctrl_irq, acm,
+			epctrl->bInterval);
 	acm->ctrlurb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
 	acm->ctrlurb->transfer_dma = acm->ctrl_dma;
 
@@ -1212,7 +1272,7 @@
 	tasklet_disable(&acm->urb_task);
 
 	usb_kill_urb(acm->ctrlurb);
-	for(i = 0; i < ACM_NW; i++)
+	for (i = 0; i < ACM_NW; i++)
 		usb_kill_urb(acm->wb[i].urb);
 	for (i = 0; i < acm->rx_buflimit; i++)
 		usb_kill_urb(acm->ru[i].urb);
@@ -1227,13 +1287,14 @@
 {
 	struct acm *acm = usb_get_intfdata(intf);
 	struct usb_device *usb_dev = interface_to_usbdev(intf);
+	struct tty_struct *tty;
 
 	/* sibling interface is already cleaning up */
 	if (!acm)
 		return;
 
 	mutex_lock(&open_mutex);
-	if (acm->country_codes){
+	if (acm->country_codes) {
 		device_remove_file(&acm->control->dev,
 				&dev_attr_wCountryCodes);
 		device_remove_file(&acm->control->dev,
@@ -1247,22 +1308,25 @@
 	stop_data_traffic(acm);
 
 	acm_write_buffers_free(acm);
-	usb_buffer_free(usb_dev, acm->ctrlsize, acm->ctrl_buffer, acm->ctrl_dma);
+	usb_buffer_free(usb_dev, acm->ctrlsize, acm->ctrl_buffer,
+								acm->ctrl_dma);
 	acm_read_buffers_free(acm);
 
 	usb_driver_release_interface(&acm_driver, intf == acm->control ?
 					acm->data : acm->control);
 
-	if (!acm->used) {
+	if (acm->port.count == 0) {
 		acm_tty_unregister(acm);
 		mutex_unlock(&open_mutex);
 		return;
 	}
 
 	mutex_unlock(&open_mutex);
-
-	if (acm->tty)
-		tty_hangup(acm->tty);
+	tty = tty_port_tty_get(&acm->port);
+	if (tty) {
+		tty_hangup(tty);
+		tty_kref_put(tty);
+	}
 }
 
 #ifdef CONFIG_PM
@@ -1297,7 +1361,7 @@
 	*/
 	mutex_lock(&acm->mutex);
 
-	if (acm->used)
+	if (acm->port.count)
 		stop_data_traffic(acm);
 
 	mutex_unlock(&acm->mutex);
@@ -1319,7 +1383,7 @@
 		return 0;
 
 	mutex_lock(&acm->mutex);
-	if (acm->used) {
+	if (acm->port.count) {
 		rv = usb_submit_urb(acm->ctrlurb, GFP_NOIO);
 		if (rv < 0)
 			goto err_out;
@@ -1398,7 +1462,7 @@
 	{ USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_ACM,
 		USB_CDC_ACM_PROTO_AT_GSM) },
 	{ USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_ACM,
-		USB_CDC_ACM_PROTO_AT_3G	) },
+		USB_CDC_ACM_PROTO_AT_3G) },
 	{ USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_ACM,
 		USB_CDC_ACM_PROTO_AT_CDMA) },
 
@@ -1406,7 +1470,7 @@
 	{ }
 };
 
-MODULE_DEVICE_TABLE (usb, acm_ids);
+MODULE_DEVICE_TABLE(usb, acm_ids);
 
 static struct usb_driver acm_driver = {
 	.name =		"cdc_acm",
@@ -1429,6 +1493,7 @@
 static const struct tty_operations acm_ops = {
 	.open =			acm_tty_open,
 	.close =		acm_tty_close,
+	.hangup =		acm_tty_hangup,
 	.write =		acm_tty_write,
 	.write_room =		acm_tty_write_room,
 	.ioctl =		acm_tty_ioctl,
@@ -1460,7 +1525,8 @@
 	acm_tty_driver->subtype = SERIAL_TYPE_NORMAL,
 	acm_tty_driver->flags = TTY_DRIVER_REAL_RAW | TTY_DRIVER_DYNAMIC_DEV;
 	acm_tty_driver->init_termios = tty_std_termios;
-	acm_tty_driver->init_termios.c_cflag = B9600 | CS8 | CREAD | HUPCL | CLOCAL;
+	acm_tty_driver->init_termios.c_cflag = B9600 | CS8 | CREAD |
+								HUPCL | CLOCAL;
 	tty_set_operations(acm_tty_driver, &acm_ops);
 
 	retval = tty_register_driver(acm_tty_driver);
@@ -1492,7 +1558,7 @@
 module_init(acm_init);
 module_exit(acm_exit);
 
-MODULE_AUTHOR( DRIVER_AUTHOR );
-MODULE_DESCRIPTION( DRIVER_DESC );
+MODULE_AUTHOR(DRIVER_AUTHOR);
+MODULE_DESCRIPTION(DRIVER_DESC);
 MODULE_LICENSE("GPL");
 MODULE_ALIAS_CHARDEV_MAJOR(ACM_TTY_MAJOR);
diff --git a/drivers/usb/class/cdc-acm.h b/drivers/usb/class/cdc-acm.h
index 1f95e7aa1..4c38564 100644
--- a/drivers/usb/class/cdc-acm.h
+++ b/drivers/usb/class/cdc-acm.h
@@ -89,8 +89,8 @@
 	struct usb_device *dev;				/* the corresponding usb device */
 	struct usb_interface *control;			/* control interface */
 	struct usb_interface *data;			/* data interface */
-	struct tty_struct *tty;				/* the corresponding tty */
-	struct urb *ctrlurb;			/* urbs */
+	struct tty_port port;			 	/* our tty port data */
+	struct urb *ctrlurb;				/* urbs */
 	u8 *ctrl_buffer;				/* buffers of urbs */
 	dma_addr_t ctrl_dma;				/* dma handles of buffers */
 	u8 *country_codes;				/* country codes from device */
@@ -120,7 +120,6 @@
 	unsigned int ctrlout;				/* output control lines (DTR, RTS) */
 	unsigned int writesize;				/* max packet size for the output bulk endpoint */
 	unsigned int readsize,ctrlsize;			/* buffer sizes for freeing */
-	unsigned int used;				/* someone has this acm's device open */
 	unsigned int minor;				/* acm minor number */
 	unsigned char throttle;				/* throttled by tty layer */
 	unsigned char clocal;				/* termios CLOCAL */
diff --git a/drivers/usb/serial/belkin_sa.c b/drivers/usb/serial/belkin_sa.c
index b7eacad..2bfd6dd 100644
--- a/drivers/usb/serial/belkin_sa.c
+++ b/drivers/usb/serial/belkin_sa.c
@@ -93,8 +93,7 @@
 static void belkin_sa_shutdown(struct usb_serial *serial);
 static int  belkin_sa_open(struct tty_struct *tty,
 			struct usb_serial_port *port, struct file *filp);
-static void belkin_sa_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp);
+static void belkin_sa_close(struct usb_serial_port *port);
 static void belkin_sa_read_int_callback(struct urb *urb);
 static void belkin_sa_set_termios(struct tty_struct *tty,
 			struct usb_serial_port *port, struct ktermios * old);
@@ -244,8 +243,7 @@
 } /* belkin_sa_open */
 
 
-static void belkin_sa_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void belkin_sa_close(struct usb_serial_port *port)
 {
 	dbg("%s port %d", __func__, port->number);
 
diff --git a/drivers/usb/serial/ch341.c b/drivers/usb/serial/ch341.c
index ab4cc27..2830766 100644
--- a/drivers/usb/serial/ch341.c
+++ b/drivers/usb/serial/ch341.c
@@ -262,32 +262,40 @@
 	return r;
 }
 
-static void ch341_close(struct tty_struct *tty, struct usb_serial_port *port,
-				struct file *filp)
+static int ch341_carrier_raised(struct usb_serial_port *port)
+{
+	struct ch341_private *priv = usb_get_serial_port_data(port);
+	if (priv->line_status & CH341_BIT_DCD)
+		return 1;
+	return 0;
+}
+
+static void ch341_dtr_rts(struct usb_serial_port *port, int on)
 {
 	struct ch341_private *priv = usb_get_serial_port_data(port);
 	unsigned long flags;
-	unsigned int c_cflag;
 
 	dbg("%s - port %d", __func__, port->number);
+	/* drop DTR and RTS */
+	spin_lock_irqsave(&priv->lock, flags);
+	if (on)
+		priv->line_control |= CH341_BIT_RTS | CH341_BIT_DTR;
+	else
+		priv->line_control &= ~(CH341_BIT_RTS | CH341_BIT_DTR);
+	spin_unlock_irqrestore(&priv->lock, flags);
+	ch341_set_handshake(port->serial->dev, priv->line_control);
+	wake_up_interruptible(&priv->delta_msr_wait);
+}
+
+static void ch341_close(struct usb_serial_port *port)
+{
+	dbg("%s - port %d", __func__, port->number);
 
 	/* shutdown our urbs */
 	dbg("%s - shutting down urbs", __func__);
 	usb_kill_urb(port->write_urb);
 	usb_kill_urb(port->read_urb);
 	usb_kill_urb(port->interrupt_in_urb);
-
-	if (tty) {
-		c_cflag = tty->termios->c_cflag;
-		if (c_cflag & HUPCL) {
-			/* drop DTR and RTS */
-			spin_lock_irqsave(&priv->lock, flags);
-			priv->line_control = 0;
-			spin_unlock_irqrestore(&priv->lock, flags);
-			ch341_set_handshake(port->serial->dev, 0);
-		}
-	}
-	wake_up_interruptible(&priv->delta_msr_wait);
 }
 
 
@@ -302,7 +310,6 @@
 	dbg("ch341_open()");
 
 	priv->baud_rate = DEFAULT_BAUD_RATE;
-	priv->line_control = CH341_BIT_RTS | CH341_BIT_DTR;
 
 	r = ch341_configure(serial->dev, priv);
 	if (r)
@@ -322,7 +329,7 @@
 	if (r) {
 		dev_err(&port->dev, "%s - failed submitting interrupt urb,"
 			" error %d\n", __func__, r);
-		ch341_close(tty, port, NULL);
+		ch341_close(port);
 		return -EPROTO;
 	}
 
@@ -343,9 +350,6 @@
 
 	dbg("ch341_set_termios()");
 
-	if (!tty || !tty->termios)
-		return;
-
 	baud_rate = tty_get_baud_rate(tty);
 
 	priv->baud_rate = baud_rate;
@@ -568,6 +572,8 @@
 	.usb_driver        = &ch341_driver,
 	.num_ports         = 1,
 	.open              = ch341_open,
+	.dtr_rts	   = ch341_dtr_rts,
+	.carrier_raised	   = ch341_carrier_raised,
 	.close             = ch341_close,
 	.ioctl             = ch341_ioctl,
 	.set_termios       = ch341_set_termios,
diff --git a/drivers/usb/serial/console.c b/drivers/usb/serial/console.c
index 19e2404..247b61b 100644
--- a/drivers/usb/serial/console.c
+++ b/drivers/usb/serial/console.c
@@ -169,7 +169,9 @@
 			kfree(tty);
 		}
 	}
-
+	/* So we know not to kill the hardware on a hangup on this
+	   port. We have also bumped the use count by one so it won't go
+	   idle */
 	port->console = 1;
 	retval = 0;
 
@@ -182,7 +184,7 @@
 	kfree(tty);
 reset_open_count:
 	port->port.count = 0;
-goto out;
+	goto out;
 }
 
 static void usb_console_write(struct console *co,
diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c
index e8d5133..16a154d 100644
--- a/drivers/usb/serial/cp210x.c
+++ b/drivers/usb/serial/cp210x.c
@@ -1,5 +1,5 @@
 /*
- * Silicon Laboratories CP2101/CP2102 USB to RS232 serial adaptor driver
+ * Silicon Laboratories CP210x USB to RS232 serial adaptor driver
  *
  * Copyright (C) 2005 Craig Shelley (craig@microtron.org.uk)
  *
@@ -27,44 +27,46 @@
 /*
  * Version Information
  */
-#define DRIVER_VERSION "v0.08"
-#define DRIVER_DESC "Silicon Labs CP2101/CP2102 RS232 serial adaptor driver"
+#define DRIVER_VERSION "v0.09"
+#define DRIVER_DESC "Silicon Labs CP210x RS232 serial adaptor driver"
 
 /*
  * Function Prototypes
  */
-static int cp2101_open(struct tty_struct *, struct usb_serial_port *,
+static int cp210x_open(struct tty_struct *, struct usb_serial_port *,
 							struct file *);
-static void cp2101_cleanup(struct usb_serial_port *);
-static void cp2101_close(struct tty_struct *, struct usb_serial_port *,
-							struct file*);
-static void cp2101_get_termios(struct tty_struct *,
+static void cp210x_cleanup(struct usb_serial_port *);
+static void cp210x_close(struct usb_serial_port *);
+static void cp210x_get_termios(struct tty_struct *,
 	struct usb_serial_port *port);
-static void cp2101_get_termios_port(struct usb_serial_port *port,
+static void cp210x_get_termios_port(struct usb_serial_port *port,
 	unsigned int *cflagp, unsigned int *baudp);
-static void cp2101_set_termios(struct tty_struct *, struct usb_serial_port *,
+static void cp210x_set_termios(struct tty_struct *, struct usb_serial_port *,
 							struct ktermios*);
-static int cp2101_tiocmget(struct tty_struct *, struct file *);
-static int cp2101_tiocmset(struct tty_struct *, struct file *,
+static int cp210x_tiocmget(struct tty_struct *, struct file *);
+static int cp210x_tiocmset(struct tty_struct *, struct file *,
 		unsigned int, unsigned int);
-static int cp2101_tiocmset_port(struct usb_serial_port *port, struct file *,
+static int cp210x_tiocmset_port(struct usb_serial_port *port, struct file *,
 		unsigned int, unsigned int);
-static void cp2101_break_ctl(struct tty_struct *, int);
-static int cp2101_startup(struct usb_serial *);
-static void cp2101_shutdown(struct usb_serial *);
+static void cp210x_break_ctl(struct tty_struct *, int);
+static int cp210x_startup(struct usb_serial *);
+static void cp210x_shutdown(struct usb_serial *);
 
 static int debug;
 
 static struct usb_device_id id_table [] = {
 	{ USB_DEVICE(0x0471, 0x066A) }, /* AKTAKOM ACE-1001 cable */
 	{ USB_DEVICE(0x0489, 0xE000) }, /* Pirelli Broadband S.p.A, DP-L10 SIP/GSM Mobile */
+	{ USB_DEVICE(0x0745, 0x1000) }, /* CipherLab USB CCD Barcode Scanner 1000 */
 	{ USB_DEVICE(0x08e6, 0x5501) }, /* Gemalto Prox-PU/CU contactless smartcard reader */
+	{ USB_DEVICE(0x08FD, 0x000A) }, /* Digianswer A/S , ZigBee/802.15.4 MAC Device */
 	{ USB_DEVICE(0x0FCF, 0x1003) }, /* Dynastream ANT development board */
 	{ USB_DEVICE(0x0FCF, 0x1004) }, /* Dynastream ANT2USB */
 	{ USB_DEVICE(0x0FCF, 0x1006) }, /* Dynastream ANT development board */
 	{ USB_DEVICE(0x10A6, 0xAA26) }, /* Knock-off DCU-11 cable */
 	{ USB_DEVICE(0x10AB, 0x10C5) }, /* Siemens MC60 Cable */
 	{ USB_DEVICE(0x10B5, 0xAC70) }, /* Nokia CA-42 USB */
+	{ USB_DEVICE(0x10C4, 0x0F91) }, /* Vstabi */
 	{ USB_DEVICE(0x10C4, 0x800A) }, /* SPORTident BSM7-D-USB main station */
 	{ USB_DEVICE(0x10C4, 0x803B) }, /* Pololu USB-serial converter */
 	{ USB_DEVICE(0x10C4, 0x8053) }, /* Enfora EDG1228 */
@@ -85,10 +87,12 @@
 	{ USB_DEVICE(0x10C4, 0x81C8) }, /* Lipowsky Industrie Elektronik GmbH, Baby-JTAG */
 	{ USB_DEVICE(0x10C4, 0x81E2) }, /* Lipowsky Industrie Elektronik GmbH, Baby-LIN */
 	{ USB_DEVICE(0x10C4, 0x81E7) }, /* Aerocomm Radio */
+	{ USB_DEVICE(0x10C4, 0x81F2) }, /* C1007 HF band RFID controller */
 	{ USB_DEVICE(0x10C4, 0x8218) }, /* Lipowsky Industrie Elektronik GmbH, HARP-1 */
 	{ USB_DEVICE(0x10C4, 0x822B) }, /* Modem EDGE(GSM) Comander 2 */
 	{ USB_DEVICE(0x10C4, 0x826B) }, /* Cygnal Integrated Products, Inc., Fasttrax GPS demostration module */
 	{ USB_DEVICE(0x10c4, 0x8293) }, /* Telegesys ETRX2USB */
+	{ USB_DEVICE(0x10C4, 0x82F9) }, /* Procyon AVS */
 	{ USB_DEVICE(0x10C4, 0x8341) }, /* Siemens MC35PU GPRS Modem */
 	{ USB_DEVICE(0x10C4, 0x83A8) }, /* Amber Wireless AMB2560 */
 	{ USB_DEVICE(0x10C4, 0x846E) }, /* BEI USB Sensor Interface (VCP) */
@@ -99,7 +103,9 @@
 	{ USB_DEVICE(0x10C4, 0xF003) }, /* Elan Digital Systems USBpulse100 */
 	{ USB_DEVICE(0x10C4, 0xF004) }, /* Elan Digital Systems USBcount50 */
 	{ USB_DEVICE(0x10C5, 0xEA61) }, /* Silicon Labs MobiData GPRS USB Modem */
+	{ USB_DEVICE(0x10CE, 0xEA6A) }, /* Silicon Labs MobiData GPRS USB Modem 100EU */
 	{ USB_DEVICE(0x13AD, 0x9999) }, /* Baltech card reader */
+	{ USB_DEVICE(0x1555, 0x0004) }, /* Owen AC4 USB-RS485 Converter */
 	{ USB_DEVICE(0x166A, 0x0303) }, /* Clipsal 5500PCU C-Bus USB interface */
 	{ USB_DEVICE(0x16D6, 0x0001) }, /* Jablotron serial interface */
 	{ USB_DEVICE(0x18EF, 0xE00F) }, /* ELV USB-I2C-Interface */
@@ -108,53 +114,70 @@
 
 MODULE_DEVICE_TABLE(usb, id_table);
 
-static struct usb_driver cp2101_driver = {
-	.name		= "cp2101",
+static struct usb_driver cp210x_driver = {
+	.name		= "cp210x",
 	.probe		= usb_serial_probe,
 	.disconnect	= usb_serial_disconnect,
 	.id_table	= id_table,
 	.no_dynamic_id	= 	1,
 };
 
-static struct usb_serial_driver cp2101_device = {
+static struct usb_serial_driver cp210x_device = {
 	.driver = {
 		.owner =	THIS_MODULE,
-		.name = 	"cp2101",
+		.name = 	"cp210x",
 	},
-	.usb_driver		= &cp2101_driver,
+	.usb_driver		= &cp210x_driver,
 	.id_table		= id_table,
 	.num_ports		= 1,
-	.open			= cp2101_open,
-	.close			= cp2101_close,
-	.break_ctl		= cp2101_break_ctl,
-	.set_termios		= cp2101_set_termios,
-	.tiocmget 		= cp2101_tiocmget,
-	.tiocmset		= cp2101_tiocmset,
-	.attach			= cp2101_startup,
-	.shutdown		= cp2101_shutdown,
+	.open			= cp210x_open,
+	.close			= cp210x_close,
+	.break_ctl		= cp210x_break_ctl,
+	.set_termios		= cp210x_set_termios,
+	.tiocmget 		= cp210x_tiocmget,
+	.tiocmset		= cp210x_tiocmset,
+	.attach			= cp210x_startup,
+	.shutdown		= cp210x_shutdown,
 };
 
 /* Config request types */
 #define REQTYPE_HOST_TO_DEVICE	0x41
 #define REQTYPE_DEVICE_TO_HOST	0xc1
 
-/* Config SET requests. To GET, add 1 to the request number */
-#define CP2101_UART 		0x00	/* Enable / Disable */
-#define CP2101_BAUDRATE		0x01	/* (BAUD_RATE_GEN_FREQ / baudrate) */
-#define CP2101_BITS		0x03	/* 0x(0)(databits)(parity)(stopbits) */
-#define CP2101_BREAK		0x05	/* On / Off */
-#define CP2101_CONTROL		0x07	/* Flow control line states */
-#define CP2101_MODEMCTL		0x13	/* Modem controls */
-#define CP2101_CONFIG_6		0x19	/* 6 bytes of config data ??? */
+/* Config request codes */
+#define CP210X_IFC_ENABLE	0x00
+#define CP210X_SET_BAUDDIV	0x01
+#define CP210X_GET_BAUDDIV	0x02
+#define CP210X_SET_LINE_CTL	0x03
+#define CP210X_GET_LINE_CTL	0x04
+#define CP210X_SET_BREAK	0x05
+#define CP210X_IMM_CHAR		0x06
+#define CP210X_SET_MHS		0x07
+#define CP210X_GET_MDMSTS	0x08
+#define CP210X_SET_XON		0x09
+#define CP210X_SET_XOFF		0x0A
+#define CP210X_SET_EVENTMASK	0x0B
+#define CP210X_GET_EVENTMASK	0x0C
+#define CP210X_SET_CHAR		0x0D
+#define CP210X_GET_CHARS	0x0E
+#define CP210X_GET_PROPS	0x0F
+#define CP210X_GET_COMM_STATUS	0x10
+#define CP210X_RESET		0x11
+#define CP210X_PURGE		0x12
+#define CP210X_SET_FLOW		0x13
+#define CP210X_GET_FLOW		0x14
+#define CP210X_EMBED_EVENTS	0x15
+#define CP210X_GET_EVENTSTATE	0x16
+#define CP210X_SET_CHARS	0x19
 
-/* CP2101_UART */
+/* CP210X_IFC_ENABLE */
 #define UART_ENABLE		0x0001
 #define UART_DISABLE		0x0000
 
-/* CP2101_BAUDRATE */
+/* CP210X_(SET|GET)_BAUDDIV */
 #define BAUD_RATE_GEN_FREQ	0x384000
 
-/* CP2101_BITS */
+/* CP210X_(SET|GET)_LINE_CTL */
 #define BITS_DATA_MASK		0X0f00
 #define BITS_DATA_5		0X0500
 #define BITS_DATA_6		0X0600
@@ -174,11 +197,11 @@
 #define BITS_STOP_1_5		0x0001
 #define BITS_STOP_2		0x0002
 
-/* CP2101_BREAK */
+/* CP210X_SET_BREAK */
 #define BREAK_ON		0x0000
 #define BREAK_OFF		0x0001
 
-/* CP2101_CONTROL */
+/* CP210X_(SET_MHS|GET_MDMSTS) */
 #define CONTROL_DTR		0x0001
 #define CONTROL_RTS		0x0002
 #define CONTROL_CTS		0x0010
@@ -189,13 +212,13 @@
 #define CONTROL_WRITE_RTS	0x0200
 
 /*
- * cp2101_get_config
- * Reads from the CP2101 configuration registers
+ * cp210x_get_config
+ * Reads from the CP210x configuration registers
  * 'size' is specified in bytes.
  * 'data' is a pointer to a pre-allocated array of integers large
  * enough to hold 'size' bytes (with 4 bytes to each integer)
  */
-static int cp2101_get_config(struct usb_serial_port *port, u8 request,
+static int cp210x_get_config(struct usb_serial_port *port, u8 request,
 		unsigned int *data, int size)
 {
 	struct usb_serial *serial = port->serial;
@@ -211,9 +234,6 @@
 		return -ENOMEM;
 	}
 
-	/* For get requests, the request number must be incremented */
-	request++;
-
 	/* Issue the request, attempting to read 'size' bytes */
 	result = usb_control_msg(serial->dev, usb_rcvctrlpipe(serial->dev, 0),
 				request, REQTYPE_DEVICE_TO_HOST, 0x0000,
@@ -236,12 +256,12 @@
 }
 
 /*
- * cp2101_set_config
- * Writes to the CP2101 configuration registers
+ * cp210x_set_config
+ * Writes to the CP210x configuration registers
  * Values less than 16 bits wide are sent directly
  * 'size' is specified in bytes.
  */
-static int cp2101_set_config(struct usb_serial_port *port, u8 request,
+static int cp210x_set_config(struct usb_serial_port *port, u8 request,
 		unsigned int *data, int size)
 {
 	struct usb_serial *serial = port->serial;
@@ -292,21 +312,21 @@
 }
 
 /*
- * cp2101_set_config_single
- * Convenience function for calling cp2101_set_config on single data values
+ * cp210x_set_config_single
+ * Convenience function for calling cp210x_set_config on single data values
  * without requiring an integer pointer
  */
-static inline int cp2101_set_config_single(struct usb_serial_port *port,
+static inline int cp210x_set_config_single(struct usb_serial_port *port,
 		u8 request, unsigned int data)
 {
-	return cp2101_set_config(port, request, &data, 2);
+	return cp210x_set_config(port, request, &data, 2);
 }
 
 /*
- * cp2101_quantise_baudrate
+ * cp210x_quantise_baudrate
  * Quantises the baud rate as per AN205 Table 1
  */
-static unsigned int cp2101_quantise_baudrate(unsigned int baud) {
+static unsigned int cp210x_quantise_baudrate(unsigned int baud) {
 	if      (baud <= 56)       baud = 0;
 	else if (baud <= 300)      baud = 300;
 	else if (baud <= 600)      baud = 600;
@@ -343,7 +363,7 @@
 	return baud;
 }
 
-static int cp2101_open(struct tty_struct *tty, struct usb_serial_port *port,
+static int cp210x_open(struct tty_struct *tty, struct usb_serial_port *port,
 				struct file *filp)
 {
 	struct usb_serial *serial = port->serial;
@@ -351,7 +371,7 @@
 
 	dbg("%s - port %d", __func__, port->number);
 
-	if (cp2101_set_config_single(port, CP2101_UART, UART_ENABLE)) {
+	if (cp210x_set_config_single(port, CP210X_IFC_ENABLE, UART_ENABLE)) {
 		dev_err(&port->dev, "%s - Unable to enable UART\n",
 				__func__);
 		return -EPROTO;
@@ -373,17 +393,17 @@
 	}
 
 	/* Configure the termios structure */
-	cp2101_get_termios(tty, port);
+	cp210x_get_termios(tty, port);
 
 	/* Set the DTR and RTS pins low */
-	cp2101_tiocmset_port(tty ? (struct usb_serial_port *) tty->driver_data
+	cp210x_tiocmset_port(tty ? (struct usb_serial_port *) tty->driver_data
 			: port,
 		NULL, TIOCM_DTR | TIOCM_RTS, 0);
 
 	return 0;
 }
 
-static void cp2101_cleanup(struct usb_serial_port *port)
+static void cp210x_cleanup(struct usb_serial_port *port)
 {
 	struct usb_serial *serial = port->serial;
 
@@ -398,8 +418,7 @@
 	}
 }
 
-static void cp2101_close(struct tty_struct *tty, struct usb_serial_port *port,
-					struct file *filp)
+static void cp210x_close(struct usb_serial_port *port)
 {
 	dbg("%s - port %d", __func__, port->number);
 
@@ -410,23 +429,23 @@
 
 	mutex_lock(&port->serial->disc_mutex);
 	if (!port->serial->disconnected)
-		cp2101_set_config_single(port, CP2101_UART, UART_DISABLE);
+		cp210x_set_config_single(port, CP210X_IFC_ENABLE, UART_DISABLE);
 	mutex_unlock(&port->serial->disc_mutex);
 }
 
 /*
- * cp2101_get_termios
+ * cp210x_get_termios
  * Reads the baud rate, data bits, parity, stop bits and flow control mode
  * from the device, corrects any unsupported values, and configures the
  * termios structure to reflect the state of the device
  */
-static void cp2101_get_termios(struct tty_struct *tty,
+static void cp210x_get_termios(struct tty_struct *tty,
 	struct usb_serial_port *port)
 {
 	unsigned int baud;
 
 	if (tty) {
-		cp2101_get_termios_port(tty->driver_data,
+		cp210x_get_termios_port(tty->driver_data,
 			&tty->termios->c_cflag, &baud);
 		tty_encode_baud_rate(tty, baud, baud);
 	}
@@ -434,15 +453,15 @@
 	else {
 		unsigned int cflag;
 		cflag = 0;
-		cp2101_get_termios_port(port, &cflag, &baud);
+		cp210x_get_termios_port(port, &cflag, &baud);
 	}
 }
 
 /*
- * cp2101_get_termios_port
- * This is the heart of cp2101_get_termios which always uses a &usb_serial_port.
+ * cp210x_get_termios_port
+ * This is the heart of cp210x_get_termios which always uses a &usb_serial_port.
  */
-static void cp2101_get_termios_port(struct usb_serial_port *port,
+static void cp210x_get_termios_port(struct usb_serial_port *port,
 	unsigned int *cflagp, unsigned int *baudp)
 {
 	unsigned int cflag, modem_ctl[4];
@@ -451,17 +470,17 @@
 
 	dbg("%s - port %d", __func__, port->number);
 
-	cp2101_get_config(port, CP2101_BAUDRATE, &baud, 2);
+	cp210x_get_config(port, CP210X_GET_BAUDDIV, &baud, 2);
 	/* Convert to baudrate */
 	if (baud)
-		baud = cp2101_quantise_baudrate((BAUD_RATE_GEN_FREQ + baud/2)/ baud);
+		baud = cp210x_quantise_baudrate((BAUD_RATE_GEN_FREQ + baud/2)/ baud);
 
 	dbg("%s - baud rate = %d", __func__, baud);
 	*baudp = baud;
 
 	cflag = *cflagp;
 
-	cp2101_get_config(port, CP2101_BITS, &bits, 2);
+	cp210x_get_config(port, CP210X_GET_LINE_CTL, &bits, 2);
 	cflag &= ~CSIZE;
 	switch (bits & BITS_DATA_MASK) {
 	case BITS_DATA_5:
@@ -486,14 +505,14 @@
 		cflag |= CS8;
 		bits &= ~BITS_DATA_MASK;
 		bits |= BITS_DATA_8;
-		cp2101_set_config(port, CP2101_BITS, &bits, 2);
+		cp210x_set_config(port, CP210X_SET_LINE_CTL, &bits, 2);
 		break;
 	default:
 		dbg("%s - Unknown number of data bits, using 8", __func__);
 		cflag |= CS8;
 		bits &= ~BITS_DATA_MASK;
 		bits |= BITS_DATA_8;
-		cp2101_set_config(port, CP2101_BITS, &bits, 2);
+		cp210x_set_config(port, CP210X_SET_LINE_CTL, &bits, 2);
 		break;
 	}
 
@@ -516,20 +535,20 @@
 				__func__);
 		cflag &= ~PARENB;
 		bits &= ~BITS_PARITY_MASK;
-		cp2101_set_config(port, CP2101_BITS, &bits, 2);
+		cp210x_set_config(port, CP210X_SET_LINE_CTL, &bits, 2);
 		break;
 	case BITS_PARITY_SPACE:
 		dbg("%s - parity = SPACE (not supported, disabling parity)",
 				__func__);
 		cflag &= ~PARENB;
 		bits &= ~BITS_PARITY_MASK;
-		cp2101_set_config(port, CP2101_BITS, &bits, 2);
+		cp210x_set_config(port, CP210X_SET_LINE_CTL, &bits, 2);
 		break;
 	default:
 		dbg("%s - Unknown parity mode, disabling parity", __func__);
 		cflag &= ~PARENB;
 		bits &= ~BITS_PARITY_MASK;
-		cp2101_set_config(port, CP2101_BITS, &bits, 2);
+		cp210x_set_config(port, CP210X_SET_LINE_CTL, &bits, 2);
 		break;
 	}
 
@@ -542,7 +561,7 @@
 		dbg("%s - stop bits = 1.5 (not supported, using 1 stop bit)",
 								__func__);
 		bits &= ~BITS_STOP_MASK;
-		cp2101_set_config(port, CP2101_BITS, &bits, 2);
+		cp210x_set_config(port, CP210X_SET_LINE_CTL, &bits, 2);
 		break;
 	case BITS_STOP_2:
 		dbg("%s - stop bits = 2", __func__);
@@ -552,11 +571,11 @@
 		dbg("%s - Unknown number of stop bits, using 1 stop bit",
 								__func__);
 		bits &= ~BITS_STOP_MASK;
-		cp2101_set_config(port, CP2101_BITS, &bits, 2);
+		cp210x_set_config(port, CP210X_SET_LINE_CTL, &bits, 2);
 		break;
 	}
 
-	cp2101_get_config(port, CP2101_MODEMCTL, modem_ctl, 16);
+	cp210x_get_config(port, CP210X_GET_FLOW, modem_ctl, 16);
 	if (modem_ctl[0] & 0x0008) {
 		dbg("%s - flow control = CRTSCTS", __func__);
 		cflag |= CRTSCTS;
@@ -568,7 +587,7 @@
 	*cflagp = cflag;
 }
 
-static void cp2101_set_termios(struct tty_struct *tty,
+static void cp210x_set_termios(struct tty_struct *tty,
 		struct usb_serial_port *port, struct ktermios *old_termios)
 {
 	unsigned int cflag, old_cflag;
@@ -583,13 +602,13 @@
 	tty->termios->c_cflag &= ~CMSPAR;
 	cflag = tty->termios->c_cflag;
 	old_cflag = old_termios->c_cflag;
-	baud = cp2101_quantise_baudrate(tty_get_baud_rate(tty));
+	baud = cp210x_quantise_baudrate(tty_get_baud_rate(tty));
 
 	/* If the baud rate is to be updated*/
 	if (baud != tty_termios_baud_rate(old_termios) && baud != 0) {
 		dbg("%s - Setting baud rate to %d baud", __func__,
 				baud);
-		if (cp2101_set_config_single(port, CP2101_BAUDRATE,
+		if (cp210x_set_config_single(port, CP210X_SET_BAUDDIV,
 					((BAUD_RATE_GEN_FREQ + baud/2) / baud))) {
 			dbg("Baud rate requested not supported by device\n");
 			baud = tty_termios_baud_rate(old_termios);
@@ -600,7 +619,7 @@
 
 	/* If the number of data bits is to be updated */
 	if ((cflag & CSIZE) != (old_cflag & CSIZE)) {
-		cp2101_get_config(port, CP2101_BITS, &bits, 2);
+		cp210x_get_config(port, CP210X_GET_LINE_CTL, &bits, 2);
 		bits &= ~BITS_DATA_MASK;
 		switch (cflag & CSIZE) {
 		case CS5:
@@ -624,19 +643,19 @@
 			dbg("%s - data bits = 9", __func__);
 			break;*/
 		default:
-			dbg("cp2101 driver does not "
+			dbg("cp210x driver does not "
 					"support the number of bits requested,"
 					" using 8 bit mode\n");
 				bits |= BITS_DATA_8;
 				break;
 		}
-		if (cp2101_set_config(port, CP2101_BITS, &bits, 2))
+		if (cp210x_set_config(port, CP210X_SET_LINE_CTL, &bits, 2))
 			dbg("Number of data bits requested "
 					"not supported by device\n");
 	}
 
 	if ((cflag & (PARENB|PARODD)) != (old_cflag & (PARENB|PARODD))) {
-		cp2101_get_config(port, CP2101_BITS, &bits, 2);
+		cp210x_get_config(port, CP210X_GET_LINE_CTL, &bits, 2);
 		bits &= ~BITS_PARITY_MASK;
 		if (cflag & PARENB) {
 			if (cflag & PARODD) {
@@ -647,13 +666,13 @@
 				dbg("%s - parity = EVEN", __func__);
 			}
 		}
-		if (cp2101_set_config(port, CP2101_BITS, &bits, 2))
+		if (cp210x_set_config(port, CP210X_SET_LINE_CTL, &bits, 2))
 			dbg("Parity mode not supported "
 					"by device\n");
 	}
 
 	if ((cflag & CSTOPB) != (old_cflag & CSTOPB)) {
-		cp2101_get_config(port, CP2101_BITS, &bits, 2);
+		cp210x_get_config(port, CP210X_GET_LINE_CTL, &bits, 2);
 		bits &= ~BITS_STOP_MASK;
 		if (cflag & CSTOPB) {
 			bits |= BITS_STOP_2;
@@ -662,13 +681,13 @@
 			bits |= BITS_STOP_1;
 			dbg("%s - stop bits = 1", __func__);
 		}
-		if (cp2101_set_config(port, CP2101_BITS, &bits, 2))
+		if (cp210x_set_config(port, CP210X_SET_LINE_CTL, &bits, 2))
 			dbg("Number of stop bits requested "
 					"not supported by device\n");
 	}
 
 	if ((cflag & CRTSCTS) != (old_cflag & CRTSCTS)) {
-		cp2101_get_config(port, CP2101_MODEMCTL, modem_ctl, 16);
+		cp210x_get_config(port, CP210X_GET_FLOW, modem_ctl, 16);
 		dbg("%s - read modem controls = 0x%.4x 0x%.4x 0x%.4x 0x%.4x",
 				__func__, modem_ctl[0], modem_ctl[1],
 				modem_ctl[2], modem_ctl[3]);
@@ -688,19 +707,19 @@
 		dbg("%s - write modem controls = 0x%.4x 0x%.4x 0x%.4x 0x%.4x",
 				__func__, modem_ctl[0], modem_ctl[1],
 				modem_ctl[2], modem_ctl[3]);
-		cp2101_set_config(port, CP2101_MODEMCTL, modem_ctl, 16);
+		cp210x_set_config(port, CP210X_SET_FLOW, modem_ctl, 16);
 	}
 
 }
 
-static int cp2101_tiocmset (struct tty_struct *tty, struct file *file,
+static int cp210x_tiocmset (struct tty_struct *tty, struct file *file,
 		unsigned int set, unsigned int clear)
 {
 	struct usb_serial_port *port = tty->driver_data;
-	return cp2101_tiocmset_port(port, file, set, clear);
+	return cp210x_tiocmset_port(port, file, set, clear);
 }
 
-static int cp2101_tiocmset_port(struct usb_serial_port *port, struct file *file,
+static int cp210x_tiocmset_port(struct usb_serial_port *port, struct file *file,
 		unsigned int set, unsigned int clear)
 {
 	unsigned int control = 0;
@@ -726,10 +745,10 @@
 
 	dbg("%s - control = 0x%.4x", __func__, control);
 
-	return cp2101_set_config(port, CP2101_CONTROL, &control, 2);
+	return cp210x_set_config(port, CP210X_SET_MHS, &control, 2);
 }
 
-static int cp2101_tiocmget (struct tty_struct *tty, struct file *file)
+static int cp210x_tiocmget (struct tty_struct *tty, struct file *file)
 {
 	struct usb_serial_port *port = tty->driver_data;
 	unsigned int control;
@@ -737,7 +756,7 @@
 
 	dbg("%s - port %d", __func__, port->number);
 
-	cp2101_get_config(port, CP2101_CONTROL, &control, 1);
+	cp210x_get_config(port, CP210X_GET_MDMSTS, &control, 1);
 
 	result = ((control & CONTROL_DTR) ? TIOCM_DTR : 0)
 		|((control & CONTROL_RTS) ? TIOCM_RTS : 0)
@@ -751,7 +770,7 @@
 	return result;
 }
 
-static void cp2101_break_ctl (struct tty_struct *tty, int break_state)
+static void cp210x_break_ctl (struct tty_struct *tty, int break_state)
 {
 	struct usb_serial_port *port = tty->driver_data;
 	unsigned int state;
@@ -763,17 +782,17 @@
 		state = BREAK_ON;
 	dbg("%s - turning break %s", __func__,
 			state == BREAK_OFF ? "off" : "on");
-	cp2101_set_config(port, CP2101_BREAK, &state, 2);
+	cp210x_set_config(port, CP210X_SET_BREAK, &state, 2);
 }
 
-static int cp2101_startup(struct usb_serial *serial)
+static int cp210x_startup(struct usb_serial *serial)
 {
-	/* CP2101 buffers behave strangely unless device is reset */
+	/* cp210x buffers behave strangely unless device is reset */
 	usb_reset_device(serial->dev);
 	return 0;
 }
 
-static void cp2101_shutdown(struct usb_serial *serial)
+static void cp210x_shutdown(struct usb_serial *serial)
 {
 	int i;
 
@@ -781,21 +800,21 @@
 
 	/* Stop reads and writes on all ports */
 	for (i = 0; i < serial->num_ports; ++i)
-		cp2101_cleanup(serial->port[i]);
+		cp210x_cleanup(serial->port[i]);
 }
 
-static int __init cp2101_init(void)
+static int __init cp210x_init(void)
 {
 	int retval;
 
-	retval = usb_serial_register(&cp2101_device);
+	retval = usb_serial_register(&cp210x_device);
 	if (retval)
 		return retval; /* Failed to register */
 
-	retval = usb_register(&cp2101_driver);
+	retval = usb_register(&cp210x_driver);
 	if (retval) {
 		/* Failed to register */
-		usb_serial_deregister(&cp2101_device);
+		usb_serial_deregister(&cp210x_device);
 		return retval;
 	}
 
@@ -805,14 +824,14 @@
 	return 0;
 }
 
-static void __exit cp2101_exit(void)
+static void __exit cp210x_exit(void)
 {
-	usb_deregister(&cp2101_driver);
-	usb_serial_deregister(&cp2101_device);
+	usb_deregister(&cp210x_driver);
+	usb_serial_deregister(&cp210x_device);
 }
 
-module_init(cp2101_init);
-module_exit(cp2101_exit);
+module_init(cp210x_init);
+module_exit(cp210x_exit);
 
 MODULE_DESCRIPTION(DRIVER_DESC);
 MODULE_VERSION(DRIVER_VERSION);
diff --git a/drivers/usb/serial/cyberjack.c b/drivers/usb/serial/cyberjack.c
index dd501bb..933ba91 100644
--- a/drivers/usb/serial/cyberjack.c
+++ b/drivers/usb/serial/cyberjack.c
@@ -61,8 +61,7 @@
 static void cyberjack_shutdown(struct usb_serial *serial);
 static int  cyberjack_open(struct tty_struct *tty,
 			struct usb_serial_port *port, struct file *filp);
-static void cyberjack_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp);
+static void cyberjack_close(struct usb_serial_port *port);
 static int cyberjack_write(struct tty_struct *tty,
 	struct usb_serial_port *port, const unsigned char *buf, int count);
 static int cyberjack_write_room(struct tty_struct *tty);
@@ -185,8 +184,7 @@
 	return result;
 }
 
-static void cyberjack_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void cyberjack_close(struct usb_serial_port *port)
 {
 	dbg("%s - port %d", __func__, port->number);
 
diff --git a/drivers/usb/serial/cypress_m8.c b/drivers/usb/serial/cypress_m8.c
index e568710..669f938 100644
--- a/drivers/usb/serial/cypress_m8.c
+++ b/drivers/usb/serial/cypress_m8.c
@@ -174,8 +174,8 @@
 static void cypress_shutdown(struct usb_serial *serial);
 static int  cypress_open(struct tty_struct *tty,
 			struct usb_serial_port *port, struct file *filp);
-static void cypress_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp);
+static void cypress_close(struct usb_serial_port *port);
+static void cypress_dtr_rts(struct usb_serial_port *port, int on);
 static int  cypress_write(struct tty_struct *tty, struct usb_serial_port *port,
 			const unsigned char *buf, int count);
 static void cypress_send(struct usb_serial_port *port);
@@ -218,6 +218,7 @@
 	.shutdown =			cypress_shutdown,
 	.open =				cypress_open,
 	.close =			cypress_close,
+	.dtr_rts =			cypress_dtr_rts,
 	.write =			cypress_write,
 	.write_room =			cypress_write_room,
 	.ioctl =			cypress_ioctl,
@@ -244,6 +245,7 @@
 	.shutdown =			cypress_shutdown,
 	.open =				cypress_open,
 	.close =			cypress_close,
+	.dtr_rts =			cypress_dtr_rts,
 	.write =			cypress_write,
 	.write_room =			cypress_write_room,
 	.ioctl =			cypress_ioctl,
@@ -270,6 +272,7 @@
 	.shutdown =			cypress_shutdown,
 	.open =				cypress_open,
 	.close =			cypress_close,
+	.dtr_rts =			cypress_dtr_rts,
 	.write =			cypress_write,
 	.write_room =			cypress_write_room,
 	.ioctl =			cypress_ioctl,
@@ -656,11 +659,7 @@
 	priv->rx_flags = 0;
 	spin_unlock_irqrestore(&priv->lock, flags);
 
-	/* raise both lines and set termios */
-	spin_lock_irqsave(&priv->lock, flags);
-	priv->line_control = CONTROL_DTR | CONTROL_RTS;
-	priv->cmd_ctrl = 1;
-	spin_unlock_irqrestore(&priv->lock, flags);
+	/* Set termios */
 	result = cypress_write(tty, port, NULL, 0);
 
 	if (result) {
@@ -694,76 +693,42 @@
 							__func__, result);
 		cypress_set_dead(port);
 	}
-
+	port->port.drain_delay = 256;
 	return result;
 } /* cypress_open */
 
-
-static void cypress_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void cypress_dtr_rts(struct usb_serial_port *port, int on)
 {
 	struct cypress_private *priv = usb_get_serial_port_data(port);
-	unsigned int c_cflag;
-	int bps;
-	long timeout;
-	wait_queue_t wait;
+	/* drop dtr and rts */
+	priv = usb_get_serial_port_data(port);
+	spin_lock_irq(&priv->lock);
+	if (on == 0)
+		priv->line_control = 0;
+	else 
+		priv->line_control = CONTROL_DTR | CONTROL_RTS;
+	priv->cmd_ctrl = 1;
+	spin_unlock_irq(&priv->lock);
+	cypress_write(NULL, port, NULL, 0);
+}
+
+static void cypress_close(struct usb_serial_port *port)
+{
+	struct cypress_private *priv = usb_get_serial_port_data(port);
 
 	dbg("%s - port %d", __func__, port->number);
 
-	/* wait for data to drain from buffer */
-	spin_lock_irq(&priv->lock);
-	timeout = CYPRESS_CLOSING_WAIT;
-	init_waitqueue_entry(&wait, current);
-	add_wait_queue(&tty->write_wait, &wait);
-	for (;;) {
-		set_current_state(TASK_INTERRUPTIBLE);
-		if (cypress_buf_data_avail(priv->buf) == 0
-		|| timeout == 0 || signal_pending(current)
-		/* without mutex, allowed due to harmless failure mode */
-		|| port->serial->disconnected)
-			break;
-		spin_unlock_irq(&priv->lock);
-		timeout = schedule_timeout(timeout);
-		spin_lock_irq(&priv->lock);
-	}
-	set_current_state(TASK_RUNNING);
-	remove_wait_queue(&tty->write_wait, &wait);
-	/* clear out any remaining data in the buffer */
-	cypress_buf_clear(priv->buf);
-	spin_unlock_irq(&priv->lock);
-
 	/* writing is potentially harmful, lock must be taken */
 	mutex_lock(&port->serial->disc_mutex);
 	if (port->serial->disconnected) {
 		mutex_unlock(&port->serial->disc_mutex);
 		return;
 	}
-	/* wait for characters to drain from device */
-	if (tty) {
-		bps = tty_get_baud_rate(tty);
-		if (bps > 1200)
-			timeout = max((HZ * 2560) / bps, HZ / 10);
-		else
-			timeout = 2 * HZ;
-		schedule_timeout_interruptible(timeout);
-	}
-
+	cypress_buf_clear(priv->buf);
 	dbg("%s - stopping urbs", __func__);
 	usb_kill_urb(port->interrupt_in_urb);
 	usb_kill_urb(port->interrupt_out_urb);
 
-	if (tty) {
-		c_cflag = tty->termios->c_cflag;
-		if (c_cflag & HUPCL) {
-			/* drop dtr and rts */
-			priv = usb_get_serial_port_data(port);
-			spin_lock_irq(&priv->lock);
-			priv->line_control = 0;
-			priv->cmd_ctrl = 1;
-			spin_unlock_irq(&priv->lock);
-			cypress_write(tty, port, NULL, 0);
-		}
-	}
 
 	if (stats)
 		dev_info(&port->dev, "Statistics: %d Bytes In | %d Bytes Out | %d Commands Issued\n",
diff --git a/drivers/usb/serial/digi_acceleport.c b/drivers/usb/serial/digi_acceleport.c
index 38ba4ea..30f5140 100644
--- a/drivers/usb/serial/digi_acceleport.c
+++ b/drivers/usb/serial/digi_acceleport.c
@@ -422,7 +422,6 @@
 	int dp_throttled;
 	int dp_throttle_restart;
 	wait_queue_head_t dp_flush_wait;
-	int dp_in_close;			/* close in progress */
 	wait_queue_head_t dp_close_wait;	/* wait queue for close */
 	struct work_struct dp_wakeup_work;
 	struct usb_serial_port *dp_port;
@@ -456,8 +455,9 @@
 static int digi_chars_in_buffer(struct tty_struct *tty);
 static int digi_open(struct tty_struct *tty, struct usb_serial_port *port,
 	struct file *filp);
-static void digi_close(struct tty_struct *tty, struct usb_serial_port *port,
-	struct file *filp);
+static void digi_close(struct usb_serial_port *port);
+static int digi_carrier_raised(struct usb_serial_port *port);
+static void digi_dtr_rts(struct usb_serial_port *port, int on);
 static int digi_startup_device(struct usb_serial *serial);
 static int digi_startup(struct usb_serial *serial);
 static void digi_shutdown(struct usb_serial *serial);
@@ -510,6 +510,8 @@
 	.num_ports =			3,
 	.open =				digi_open,
 	.close =			digi_close,
+	.dtr_rts =			digi_dtr_rts,
+	.carrier_raised =		digi_carrier_raised,
 	.write =			digi_write,
 	.write_room =			digi_write_room,
 	.write_bulk_callback = 		digi_write_bulk_callback,
@@ -1328,6 +1330,19 @@
 
 }
 
+static void digi_dtr_rts(struct usb_serial_port *port, int on)
+{
+	/* Adjust DTR and RTS */
+	digi_set_modem_signals(port, on * (TIOCM_DTR|TIOCM_RTS), 1);
+}
+
+static int digi_carrier_raised(struct usb_serial_port *port)
+{
+	struct digi_port *priv = usb_get_serial_port_data(port);
+	if (priv->dp_modem_signals & TIOCM_CD)
+		return 1;
+	return 0;
+}
 
 static int digi_open(struct tty_struct *tty, struct usb_serial_port *port,
 				struct file *filp)
@@ -1336,7 +1351,6 @@
 	unsigned char buf[32];
 	struct digi_port *priv = usb_get_serial_port_data(port);
 	struct ktermios not_termios;
-	unsigned long flags = 0;
 
 	dbg("digi_open: TOP: port=%d, open_count=%d",
 		priv->dp_port_num, port->port.count);
@@ -1345,26 +1359,6 @@
 	if (digi_startup_device(port->serial) != 0)
 		return -ENXIO;
 
-	spin_lock_irqsave(&priv->dp_port_lock, flags);
-
-	/* don't wait on a close in progress for non-blocking opens */
-	if (priv->dp_in_close && (filp->f_flags&(O_NDELAY|O_NONBLOCK)) == 0) {
-		spin_unlock_irqrestore(&priv->dp_port_lock, flags);
-		return -EAGAIN;
-	}
-
-	/* wait for a close in progress to finish */
-	while (priv->dp_in_close) {
-		cond_wait_interruptible_timeout_irqrestore(
-			&priv->dp_close_wait, DIGI_RETRY_TIMEOUT,
-			&priv->dp_port_lock, flags);
-		if (signal_pending(current))
-			return -EINTR;
-		spin_lock_irqsave(&priv->dp_port_lock, flags);
-	}
-
-	spin_unlock_irqrestore(&priv->dp_port_lock, flags);
-
 	/* read modem signals automatically whenever they change */
 	buf[0] = DIGI_CMD_READ_INPUT_SIGNALS;
 	buf[1] = priv->dp_port_num;
@@ -1387,16 +1381,11 @@
 		not_termios.c_iflag = ~tty->termios->c_iflag;
 		digi_set_termios(tty, port, &not_termios);
 	}
-
-	/* set DTR and RTS */
-	digi_set_modem_signals(port, TIOCM_DTR|TIOCM_RTS, 1);
-
 	return 0;
 }
 
 
-static void digi_close(struct tty_struct *tty, struct usb_serial_port *port,
-				struct file *filp)
+static void digi_close(struct usb_serial_port *port)
 {
 	DEFINE_WAIT(wait);
 	int ret;
@@ -1411,28 +1400,9 @@
 	if (port->serial->disconnected)
 		goto exit;
 
-	/* do cleanup only after final close on this port */
-	spin_lock_irq(&priv->dp_port_lock);
-	priv->dp_in_close = 1;
-	spin_unlock_irq(&priv->dp_port_lock);
-
-	/* tell line discipline to process only XON/XOFF */
-	tty->closing = 1;
-
-	/* wait for output to drain */
-	if ((filp->f_flags&(O_NDELAY|O_NONBLOCK)) == 0)
-		tty_wait_until_sent(tty, DIGI_CLOSE_TIMEOUT);
-
-	/* flush driver and line discipline buffers */
-	tty_driver_flush_buffer(tty);
-	tty_ldisc_flush(tty);
-
 	if (port->serial->dev) {
-		/* wait for transmit idle */
-		if ((filp->f_flags&(O_NDELAY|O_NONBLOCK)) == 0)
-			digi_transmit_idle(port, DIGI_CLOSE_TIMEOUT);
-		/* drop DTR and RTS */
-		digi_set_modem_signals(port, 0, 0);
+		/* FIXME: Transmit idle belongs in the wait_unti_sent path */
+		digi_transmit_idle(port, DIGI_CLOSE_TIMEOUT);
 
 		/* disable input flow control */
 		buf[0] = DIGI_CMD_SET_INPUT_FLOW_CONTROL;
@@ -1477,11 +1447,9 @@
 		/* shutdown any outstanding bulk writes */
 		usb_kill_urb(port->write_urb);
 	}
-	tty->closing = 0;
 exit:
 	spin_lock_irq(&priv->dp_port_lock);
 	priv->dp_write_urb_in_use = 0;
-	priv->dp_in_close = 0;
 	wake_up_interruptible(&priv->dp_close_wait);
 	spin_unlock_irq(&priv->dp_port_lock);
 	mutex_unlock(&port->serial->disc_mutex);
@@ -1560,7 +1528,6 @@
 		priv->dp_throttled = 0;
 		priv->dp_throttle_restart = 0;
 		init_waitqueue_head(&priv->dp_flush_wait);
-		priv->dp_in_close = 0;
 		init_waitqueue_head(&priv->dp_close_wait);
 		INIT_WORK(&priv->dp_wakeup_work, digi_wakeup_write_lock);
 		priv->dp_port = serial->port[i];
diff --git a/drivers/usb/serial/empeg.c b/drivers/usb/serial/empeg.c
index c709ec4..2b141cc 100644
--- a/drivers/usb/serial/empeg.c
+++ b/drivers/usb/serial/empeg.c
@@ -81,8 +81,7 @@
 /* function prototypes for an empeg-car player */
 static int  empeg_open(struct tty_struct *tty, struct usb_serial_port *port,
 						struct file *filp);
-static void empeg_close(struct tty_struct *tty, struct usb_serial_port *port,
-						struct file *filp);
+static void empeg_close(struct usb_serial_port *port);
 static int  empeg_write(struct tty_struct *tty, struct usb_serial_port *port,
 						const unsigned char *buf,
 						int count);
@@ -181,8 +180,7 @@
 }
 
 
-static void empeg_close(struct tty_struct *tty, struct usb_serial_port *port,
-				struct file *filp)
+static void empeg_close(struct usb_serial_port *port)
 {
 	dbg("%s - port %d", __func__, port->number);
 
diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c
index d9fcdae..683304d 100644
--- a/drivers/usb/serial/ftdi_sio.c
+++ b/drivers/usb/serial/ftdi_sio.c
@@ -89,6 +89,7 @@
 	int force_rtscts;	/* if non-zero, force RTS-CTS to always
 				   be enabled */
 
+	unsigned int latency;		/* latency setting in use */
 	spinlock_t tx_lock;	/* spinlock for transmit state */
 	unsigned long tx_bytes;
 	unsigned long tx_outstanding_bytes;
@@ -719,8 +720,8 @@
 static int  ftdi_sio_port_remove(struct usb_serial_port *port);
 static int  ftdi_open(struct tty_struct *tty,
 			struct usb_serial_port *port, struct file *filp);
-static void ftdi_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp);
+static void ftdi_close(struct usb_serial_port *port);
+static void ftdi_dtr_rts(struct usb_serial_port *port, int on);
 static int  ftdi_write(struct tty_struct *tty, struct usb_serial_port *port,
 			const unsigned char *buf, int count);
 static int  ftdi_write_room(struct tty_struct *tty);
@@ -758,6 +759,7 @@
 	.port_remove =		ftdi_sio_port_remove,
 	.open =			ftdi_open,
 	.close =		ftdi_close,
+	.dtr_rts =		ftdi_dtr_rts,
 	.throttle =		ftdi_throttle,
 	.unthrottle =		ftdi_unthrottle,
 	.write =		ftdi_write,
@@ -1037,7 +1039,54 @@
 	return rv;
 }
 
+static int write_latency_timer(struct usb_serial_port *port)
+{
+	struct ftdi_private *priv = usb_get_serial_port_data(port);
+	struct usb_device *udev = port->serial->dev;
+	char buf[1];
+	int rv = 0;
+	int l = priv->latency;
 
+	if (priv->flags & ASYNC_LOW_LATENCY)
+		l = 1;
+
+	dbg("%s: setting latency timer = %i", __func__, l);
+
+	rv = usb_control_msg(udev,
+			     usb_sndctrlpipe(udev, 0),
+			     FTDI_SIO_SET_LATENCY_TIMER_REQUEST,
+			     FTDI_SIO_SET_LATENCY_TIMER_REQUEST_TYPE,
+			     l, priv->interface,
+			     buf, 0, WDR_TIMEOUT);
+
+	if (rv < 0)
+		dev_err(&port->dev, "Unable to write latency timer: %i\n", rv);
+	return rv;
+}
+
+static int read_latency_timer(struct usb_serial_port *port)
+{
+	struct ftdi_private *priv = usb_get_serial_port_data(port);
+	struct usb_device *udev = port->serial->dev;
+	unsigned short latency = 0;
+	int rv = 0;
+
+
+	dbg("%s", __func__);
+
+	rv = usb_control_msg(udev,
+			     usb_rcvctrlpipe(udev, 0),
+			     FTDI_SIO_GET_LATENCY_TIMER_REQUEST,
+			     FTDI_SIO_GET_LATENCY_TIMER_REQUEST_TYPE,
+			     0, priv->interface,
+			     (char *) &latency, 1, WDR_TIMEOUT);
+
+	if (rv < 0) {
+		dev_err(&port->dev, "Unable to read latency timer: %i\n", rv);
+		return -EIO;
+	}
+	return latency;
+}
 
 static int get_serial_info(struct usb_serial_port *port,
 				struct serial_struct __user *retinfo)
@@ -1097,6 +1146,7 @@
 	priv->custom_divisor = new_serial.custom_divisor;
 
 	tty->low_latency = (priv->flags & ASYNC_LOW_LATENCY) ? 1 : 0;
+	write_latency_timer(port);
 
 check_and_exit:
 	if ((old_priv.flags & ASYNC_SPD_MASK) !=
@@ -1192,27 +1242,13 @@
 {
 	struct usb_serial_port *port = to_usb_serial_port(dev);
 	struct ftdi_private *priv = usb_get_serial_port_data(port);
-	struct usb_device *udev = port->serial->dev;
-	unsigned short latency = 0;
-	int rv = 0;
-
-
-	dbg("%s", __func__);
-
-	rv = usb_control_msg(udev,
-			     usb_rcvctrlpipe(udev, 0),
-			     FTDI_SIO_GET_LATENCY_TIMER_REQUEST,
-			     FTDI_SIO_GET_LATENCY_TIMER_REQUEST_TYPE,
-			     0, priv->interface,
-			     (char *) &latency, 1, WDR_TIMEOUT);
-
-	if (rv < 0) {
-		dev_err(dev, "Unable to read latency timer: %i\n", rv);
-		return -EIO;
-	}
-	return sprintf(buf, "%i\n", latency);
+	if (priv->flags & ASYNC_LOW_LATENCY)
+		return sprintf(buf, "1\n");
+	else
+		return sprintf(buf, "%i\n", priv->latency);
 }
 
+
 /* Write a new value of the latency timer, in units of milliseconds. */
 static ssize_t store_latency_timer(struct device *dev,
 			struct device_attribute *attr, const char *valbuf,
@@ -1220,25 +1256,13 @@
 {
 	struct usb_serial_port *port = to_usb_serial_port(dev);
 	struct ftdi_private *priv = usb_get_serial_port_data(port);
-	struct usb_device *udev = port->serial->dev;
-	char buf[1];
 	int v = simple_strtoul(valbuf, NULL, 10);
 	int rv = 0;
 
-	dbg("%s: setting latency timer = %i", __func__, v);
-
-	rv = usb_control_msg(udev,
-			     usb_sndctrlpipe(udev, 0),
-			     FTDI_SIO_SET_LATENCY_TIMER_REQUEST,
-			     FTDI_SIO_SET_LATENCY_TIMER_REQUEST_TYPE,
-			     v, priv->interface,
-			     buf, 0, WDR_TIMEOUT);
-
-	if (rv < 0) {
-		dev_err(dev, "Unable to write latency timer: %i\n", rv);
+	priv->latency = v;
+	rv = write_latency_timer(port);
+	if (rv < 0)
 		return -EIO;
-	}
-
 	return count;
 }
 
@@ -1392,6 +1416,7 @@
 	usb_set_serial_port_data(port, priv);
 
 	ftdi_determine_type(port);
+	read_latency_timer(port);
 	create_sysfs_attrs(port);
 	return 0;
 }
@@ -1514,6 +1539,8 @@
 	if (tty)
 		tty->low_latency = (priv->flags & ASYNC_LOW_LATENCY) ? 1 : 0;
 
+	write_latency_timer(port);
+
 	/* No error checking for this (will get errors later anyway) */
 	/* See ftdi_sio.h for description of what is reset */
 	usb_control_msg(dev, usb_sndctrlpipe(dev, 0),
@@ -1529,11 +1556,6 @@
 	if (tty)
 		ftdi_set_termios(tty, port, tty->termios);
 
-	/* FIXME: Flow control might be enabled, so it should be checked -
-	   we have no control of defaults! */
-	/* Turn on RTS and DTR since we are not flow controlling by default */
-	set_mctrl(port, TIOCM_DTR | TIOCM_RTS);
-
 	/* Not throttled */
 	spin_lock_irqsave(&priv->rx_lock, flags);
 	priv->rx_flags &= ~(THROTTLED | ACTUALLY_THROTTLED);
@@ -1558,6 +1580,30 @@
 } /* ftdi_open */
 
 
+static void ftdi_dtr_rts(struct usb_serial_port *port, int on)
+{
+	struct ftdi_private *priv = usb_get_serial_port_data(port);
+	char buf[1];
+
+	mutex_lock(&port->serial->disc_mutex);
+	if (!port->serial->disconnected) {
+		/* Disable flow control */
+		if (!on && usb_control_msg(port->serial->dev,
+			    usb_sndctrlpipe(port->serial->dev, 0),
+			    FTDI_SIO_SET_FLOW_CTRL_REQUEST,
+			    FTDI_SIO_SET_FLOW_CTRL_REQUEST_TYPE,
+			    0, priv->interface, buf, 0,
+			    WDR_TIMEOUT) < 0) {
+			    dev_err(&port->dev, "error from flowcontrol urb\n");
+		}
+		/* drop RTS and DTR */
+		if (on)
+			set_mctrl(port, TIOCM_DTR | TIOCM_RTS);
+		else
+			clear_mctrl(port, TIOCM_DTR | TIOCM_RTS);
+	}
+	mutex_unlock(&port->serial->disc_mutex);
+}
 
 /*
  * usbserial:__serial_close  only calls ftdi_close if the point is open
@@ -1567,31 +1613,12 @@
  *
  */
 
-static void ftdi_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void ftdi_close(struct usb_serial_port *port)
 { /* ftdi_close */
-	unsigned int c_cflag = tty->termios->c_cflag;
 	struct ftdi_private *priv = usb_get_serial_port_data(port);
-	char buf[1];
 
 	dbg("%s", __func__);
 
-	mutex_lock(&port->serial->disc_mutex);
-	if (c_cflag & HUPCL && !port->serial->disconnected) {
-		/* Disable flow control */
-		if (usb_control_msg(port->serial->dev,
-				    usb_sndctrlpipe(port->serial->dev, 0),
-				    FTDI_SIO_SET_FLOW_CTRL_REQUEST,
-				    FTDI_SIO_SET_FLOW_CTRL_REQUEST_TYPE,
-				    0, priv->interface, buf, 0,
-				    WDR_TIMEOUT) < 0) {
-			dev_err(&port->dev, "error from flowcontrol urb\n");
-		}
-
-		/* drop RTS and DTR */
-		clear_mctrl(port, TIOCM_DTR | TIOCM_RTS);
-	} /* Note change no line if hupcl is off */
-	mutex_unlock(&port->serial->disc_mutex);
 
 	/* cancel any scheduled reading */
 	cancel_delayed_work_sync(&priv->rx_work);
diff --git a/drivers/usb/serial/garmin_gps.c b/drivers/usb/serial/garmin_gps.c
index 586d30f..ee25a3f 100644
--- a/drivers/usb/serial/garmin_gps.c
+++ b/drivers/usb/serial/garmin_gps.c
@@ -993,8 +993,7 @@
 }
 
 
-static void garmin_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void garmin_close(struct usb_serial_port *port)
 {
 	struct usb_serial *serial = port->serial;
 	struct garmin_data *garmin_data_p = usb_get_serial_port_data(port);
diff --git a/drivers/usb/serial/generic.c b/drivers/usb/serial/generic.c
index 4cec990..be82ea9 100644
--- a/drivers/usb/serial/generic.c
+++ b/drivers/usb/serial/generic.c
@@ -184,8 +184,7 @@
 }
 EXPORT_SYMBOL_GPL(usb_serial_generic_resume);
 
-void usb_serial_generic_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+void usb_serial_generic_close(struct usb_serial_port *port)
 {
 	dbg("%s - port %d", __func__, port->number);
 	generic_cleanup(port);
diff --git a/drivers/usb/serial/io_edgeport.c b/drivers/usb/serial/io_edgeport.c
index fb4a73d..53ef599 100644
--- a/drivers/usb/serial/io_edgeport.c
+++ b/drivers/usb/serial/io_edgeport.c
@@ -207,8 +207,7 @@
 /* function prototypes for the usbserial callbacks */
 static int edge_open(struct tty_struct *tty, struct usb_serial_port *port,
 					struct file *filp);
-static void edge_close(struct tty_struct *tty, struct usb_serial_port *port,
-					struct file *filp);
+static void edge_close(struct usb_serial_port *port);
 static int edge_write(struct tty_struct *tty, struct usb_serial_port *port,
 					const unsigned char *buf, int count);
 static int edge_write_room(struct tty_struct *tty);
@@ -965,7 +964,7 @@
 
 	if (!edge_port->txfifo.fifo) {
 		dbg("%s - no memory", __func__);
-		edge_close(tty, port, filp);
+		edge_close(port);
 		return -ENOMEM;
 	}
 
@@ -975,7 +974,7 @@
 
 	if (!edge_port->write_urb) {
 		dbg("%s - no memory", __func__);
-		edge_close(tty, port, filp);
+		edge_close(port);
 		return -ENOMEM;
 	}
 
@@ -1099,8 +1098,7 @@
  * edge_close
  *	this function is called by the tty driver when a port is closed
  *****************************************************************************/
-static void edge_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void edge_close(struct usb_serial_port *port)
 {
 	struct edgeport_serial *edge_serial;
 	struct edgeport_port *edge_port;
diff --git a/drivers/usb/serial/io_ti.c b/drivers/usb/serial/io_ti.c
index 513b25e..eabf20e 100644
--- a/drivers/usb/serial/io_ti.c
+++ b/drivers/usb/serial/io_ti.c
@@ -2009,8 +2009,7 @@
 	return status;
 }
 
-static void edge_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void edge_close(struct usb_serial_port *port)
 {
 	struct edgeport_serial *edge_serial;
 	struct edgeport_port *edge_port;
diff --git a/drivers/usb/serial/ipaq.c b/drivers/usb/serial/ipaq.c
index cd62825..c610a99 100644
--- a/drivers/usb/serial/ipaq.c
+++ b/drivers/usb/serial/ipaq.c
@@ -76,8 +76,7 @@
 /* Function prototypes for an ipaq */
 static int  ipaq_open(struct tty_struct *tty,
 			struct usb_serial_port *port, struct file *filp);
-static void ipaq_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp);
+static void ipaq_close(struct usb_serial_port *port);
 static int  ipaq_calc_num_ports(struct usb_serial *serial);
 static int  ipaq_startup(struct usb_serial *serial);
 static void ipaq_shutdown(struct usb_serial *serial);
@@ -714,8 +713,7 @@
 }
 
 
-static void ipaq_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void ipaq_close(struct usb_serial_port *port)
 {
 	struct ipaq_private	*priv = usb_get_serial_port_data(port);
 
diff --git a/drivers/usb/serial/ipw.c b/drivers/usb/serial/ipw.c
index da2a2b4..29ad038 100644
--- a/drivers/usb/serial/ipw.c
+++ b/drivers/usb/serial/ipw.c
@@ -302,23 +302,17 @@
 	return 0;
 }
 
-static void ipw_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void ipw_dtr_rts(struct usb_serial_port *port, int on)
 {
 	struct usb_device *dev = port->serial->dev;
 	int result;
 
-	if (tty_hung_up_p(filp)) {
-		dbg("%s: tty_hung_up_p ...", __func__);
-		return;
-	}
-
 	/*--1: drop the dtr */
 	dbg("%s:dropping dtr", __func__);
 	result = usb_control_msg(dev, usb_sndctrlpipe(dev, 0),
 			 IPW_SIO_SET_PIN,
 			 USB_TYPE_VENDOR | USB_RECIP_INTERFACE | USB_DIR_OUT,
-			 IPW_PIN_CLRDTR,
+			 on ? IPW_PIN_SETDTR : IPW_PIN_CLRDTR,
 			 0,
 			 NULL,
 			 0,
@@ -332,7 +326,7 @@
 	result = usb_control_msg(dev, usb_sndctrlpipe(dev, 0),
 			 IPW_SIO_SET_PIN, USB_TYPE_VENDOR |
 			 		USB_RECIP_INTERFACE | USB_DIR_OUT,
-			 IPW_PIN_CLRRTS,
+			 on ? IPW_PIN_SETRTS : IPW_PIN_CLRRTS,
 			 0,
 			 NULL,
 			 0,
@@ -340,7 +334,12 @@
 	if (result < 0)
 		dev_err(&port->dev,
 				"dropping rts failed (error = %d)\n", result);
+}
 
+static void ipw_close(struct usb_serial_port *port)
+{
+	struct usb_device *dev = port->serial->dev;
+	int result;
 
 	/*--3: purge */
 	dbg("%s:sending purge", __func__);
@@ -461,6 +460,7 @@
 	.num_ports =		1,
 	.open =			ipw_open,
 	.close =		ipw_close,
+	.dtr_rts =		ipw_dtr_rts,
 	.port_probe = 		ipw_probe,
 	.port_remove =		ipw_disconnect,
 	.write =		ipw_write,
diff --git a/drivers/usb/serial/ir-usb.c b/drivers/usb/serial/ir-usb.c
index 4e2cda9..66009b6 100644
--- a/drivers/usb/serial/ir-usb.c
+++ b/drivers/usb/serial/ir-usb.c
@@ -88,8 +88,7 @@
 static int  ir_startup (struct usb_serial *serial);
 static int  ir_open(struct tty_struct *tty, struct usb_serial_port *port,
 					struct file *filep);
-static void ir_close(struct tty_struct *tty, struct usb_serial_port *port,
-					struct file *filep);
+static void ir_close(struct usb_serial_port *port);
 static int  ir_write(struct tty_struct *tty, struct usb_serial_port *port,
 					const unsigned char *buf, int count);
 static void ir_write_bulk_callback (struct urb *urb);
@@ -346,8 +345,7 @@
 	return result;
 }
 
-static void ir_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file * filp)
+static void ir_close(struct usb_serial_port *port)
 {
 	dbg("%s - port %d", __func__, port->number);
 
diff --git a/drivers/usb/serial/iuu_phoenix.c b/drivers/usb/serial/iuu_phoenix.c
index 4473d44..76a3cc3 100644
--- a/drivers/usb/serial/iuu_phoenix.c
+++ b/drivers/usb/serial/iuu_phoenix.c
@@ -40,7 +40,7 @@
 /*
  * Version Information
  */
-#define DRIVER_VERSION "v0.5"
+#define DRIVER_VERSION "v0.10"
 #define DRIVER_DESC "Infinity USB Unlimited Phoenix driver"
 
 static struct usb_device_id id_table[] = {
@@ -70,7 +70,6 @@
 struct iuu_private {
 	spinlock_t lock;	/* store irq state */
 	wait_queue_head_t delta_msr_wait;
-	u8 line_control;
 	u8 line_status;
 	u8 termios_initialized;
 	int tiostatus;		/* store IUART SIGNAL for tiocmget call */
@@ -651,32 +650,33 @@
 	unsigned long flags;
 	int result;
 	int i;
+	int buf_len;
 	char *buf_ptr = port->write_urb->transfer_buffer;
 	dbg("%s - enter", __func__);
 
+	spin_lock_irqsave(&priv->lock, flags);
 	*buf_ptr++ = IUU_UART_ESC;
 	*buf_ptr++ = IUU_UART_TX;
 	*buf_ptr++ = priv->writelen;
 
-	memcpy(buf_ptr, priv->writebuf,
-	       priv->writelen);
+	memcpy(buf_ptr, priv->writebuf, priv->writelen);
+	buf_len = priv->writelen;
+	priv->writelen = 0;
+	spin_unlock_irqrestore(&priv->lock, flags);
 	if (debug == 1) {
-		for (i = 0; i < priv->writelen; i++)
+		for (i = 0; i < buf_len; i++)
 			sprintf(priv->dbgbuf + i*2 ,
 				"%02X", priv->writebuf[i]);
-		priv->dbgbuf[priv->writelen+i*2] = 0;
+		priv->dbgbuf[buf_len+i*2] = 0;
 		dbg("%s - writing %i chars : %s", __func__,
-		    priv->writelen, priv->dbgbuf);
+		    buf_len, priv->dbgbuf);
 	}
 	usb_fill_bulk_urb(port->write_urb, port->serial->dev,
 			  usb_sndbulkpipe(port->serial->dev,
 					  port->bulk_out_endpointAddress),
-			  port->write_urb->transfer_buffer, priv->writelen + 3,
+			  port->write_urb->transfer_buffer, buf_len + 3,
 			  iuu_rxcmd, port);
 	result = usb_submit_urb(port->write_urb, GFP_ATOMIC);
-	spin_lock_irqsave(&priv->lock, flags);
-	priv->writelen = 0;
-	spin_unlock_irqrestore(&priv->lock, flags);
 	usb_serial_port_softint(port);
 	return result;
 }
@@ -770,14 +770,10 @@
 		return -ENOMEM;
 
 	spin_lock_irqsave(&priv->lock, flags);
-	if (priv->writelen > 0) {
-		/* buffer already filled but not commited */
-		spin_unlock_irqrestore(&priv->lock, flags);
-		return 0;
-	}
+
 	/* fill the buffer */
-	memcpy(priv->writebuf, buf, count);
-	priv->writelen = count;
+	memcpy(priv->writebuf + priv->writelen, buf, count);
+	priv->writelen += count;
 	spin_unlock_irqrestore(&priv->lock, flags);
 
 	return count;
@@ -819,7 +815,7 @@
 	buf[0] = IUU_UART_ENABLE;
 	buf[1] = (u8) ((IUU_BAUD_9600 >> 8) & 0x00FF);
 	buf[2] = (u8) (0x00FF & IUU_BAUD_9600);
-	buf[3] = (u8) (0x0F0 & IUU_TWO_STOP_BITS) | (0x07 & IUU_PARITY_EVEN);
+	buf[3] = (u8) (0x0F0 & IUU_ONE_STOP_BIT) | (0x07 & IUU_PARITY_EVEN);
 
 	status = bulk_immediate(port, buf, 4);
 	if (status != IUU_OPERATION_OK) {
@@ -946,19 +942,59 @@
 	return status;
 }
 
-static int set_control_lines(struct usb_device *dev, u8 value)
+static void iuu_set_termios(struct tty_struct *tty,
+		struct usb_serial_port *port, struct ktermios *old_termios)
 {
-	return 0;
+	const u32 supported_mask = CMSPAR|PARENB|PARODD;
+
+	unsigned int cflag = tty->termios->c_cflag;
+	int status;
+	u32 actual;
+	u32 parity;
+	int csize = CS7;
+	int baud = 9600;	/* Fixed for the moment */
+	u32 newval = cflag & supported_mask;
+
+	/* compute the parity parameter */
+	parity = 0;
+	if (cflag & CMSPAR) {	/* Using mark space */
+		if (cflag & PARODD)
+			parity |= IUU_PARITY_SPACE;
+		else
+			parity |= IUU_PARITY_MARK;
+	} else if (!(cflag & PARENB)) {
+		parity |= IUU_PARITY_NONE;
+		csize = CS8;
+	} else if (cflag & PARODD)
+		parity |= IUU_PARITY_ODD;
+	else
+		parity |= IUU_PARITY_EVEN;
+
+	parity |= (cflag & CSTOPB ? IUU_TWO_STOP_BITS : IUU_ONE_STOP_BIT);
+
+	/* set it */
+	status = iuu_uart_baud(port,
+			(clockmode == 2) ? 16457 : 9600 * boost / 100,
+			&actual, parity);
+
+	/* set the termios value to the real one, so the user now what has
+	 * changed. We support few fields so its easies to copy the old hw
+	 * settings back over and then adjust them
+	 */
+ 	if (old_termios)
+ 		tty_termios_copy_hw(tty->termios, old_termios);
+	if (status != 0)	/* Set failed - return old bits */
+		return;
+	/* Re-encode speed, parity and csize */
+	tty_encode_baud_rate(tty, baud, baud);
+	tty->termios->c_cflag &= ~(supported_mask|CSIZE);
+	tty->termios->c_cflag |= newval | csize;
 }
 
-static void iuu_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void iuu_close(struct usb_serial_port *port)
 {
 	/* iuu_led (port,255,0,0,0); */
 	struct usb_serial *serial;
-	struct iuu_private *priv = usb_get_serial_port_data(port);
-	unsigned long flags;
-	unsigned int c_cflag;
 
 	serial = port->serial;
 	if (!serial)
@@ -968,17 +1004,6 @@
 
 	iuu_uart_off(port);
 	if (serial->dev) {
-		if (tty) {
-			c_cflag = tty->termios->c_cflag;
-			if (c_cflag & HUPCL) {
-				/* drop DTR and RTS */
-				priv = usb_get_serial_port_data(port);
-				spin_lock_irqsave(&priv->lock, flags);
-				priv->line_control = 0;
-				spin_unlock_irqrestore(&priv->lock, flags);
-				set_control_lines(port->serial->dev, 0);
-			}
-		}
 		/* free writebuf */
 		/* shutdown our urbs */
 		dbg("%s - shutting down urbs", __func__);
@@ -1154,7 +1179,7 @@
 	if (result) {
 		dev_err(&port->dev, "%s - failed submitting read urb,"
 			" error %d\n", __func__, result);
-		iuu_close(tty, port, NULL);
+		iuu_close(port);
 		return -EPROTO;
 	} else {
 		dbg("%s - rxcmd OK", __func__);
@@ -1175,6 +1200,7 @@
 	.read_bulk_callback = iuu_uart_read_callback,
 	.tiocmget = iuu_tiocmget,
 	.tiocmset = iuu_tiocmset,
+	.set_termios = iuu_set_termios,
 	.attach = iuu_startup,
 	.shutdown = iuu_shutdown,
 };
diff --git a/drivers/usb/serial/keyspan.c b/drivers/usb/serial/keyspan.c
index 00daa8f..f1195a9 100644
--- a/drivers/usb/serial/keyspan.c
+++ b/drivers/usb/serial/keyspan.c
@@ -1298,8 +1298,16 @@
 		usb_kill_urb(urb);
 }
 
-static void keyspan_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void keyspan_dtr_rts(struct usb_serial_port *port, int on)
+{
+	struct keyspan_port_private *p_priv = usb_get_serial_port_data(port);
+
+	p_priv->rts_state = on;
+	p_priv->dtr_state = on;
+	keyspan_send_setup(port, 0);
+}
+
+static void keyspan_close(struct usb_serial_port *port)
 {
 	int			i;
 	struct usb_serial	*serial = port->serial;
@@ -1336,7 +1344,6 @@
 			stop_urb(p_priv->out_urbs[i]);
 		}
 	}
-	tty_port_tty_set(&port->port, NULL);
 }
 
 /* download the firmware to a pre-renumeration device */
diff --git a/drivers/usb/serial/keyspan.h b/drivers/usb/serial/keyspan.h
index 38b4582..0d4569b 100644
--- a/drivers/usb/serial/keyspan.h
+++ b/drivers/usb/serial/keyspan.h
@@ -38,9 +38,8 @@
 static int  keyspan_open		(struct tty_struct *tty,
 					 struct usb_serial_port *port,
 					 struct file *filp);
-static void keyspan_close		(struct tty_struct *tty,
-					 struct usb_serial_port *port,
-					 struct file *filp);
+static void keyspan_close		(struct usb_serial_port *port);
+static void keyspan_dtr_rts		(struct usb_serial_port *port, int on);
 static int  keyspan_startup		(struct usb_serial *serial);
 static void keyspan_shutdown		(struct usb_serial *serial);
 static int  keyspan_write_room		(struct tty_struct *tty);
@@ -562,6 +561,7 @@
 	.num_ports		= 1,
 	.open			= keyspan_open,
 	.close			= keyspan_close,
+	.dtr_rts		= keyspan_dtr_rts,
 	.write			= keyspan_write,
 	.write_room		= keyspan_write_room,
 	.set_termios		= keyspan_set_termios,
@@ -582,6 +582,7 @@
 	.num_ports		= 2,
 	.open			= keyspan_open,
 	.close			= keyspan_close,
+	.dtr_rts		= keyspan_dtr_rts,
 	.write			= keyspan_write,
 	.write_room		= keyspan_write_room,
 	.set_termios		= keyspan_set_termios,
@@ -602,6 +603,7 @@
 	.num_ports		= 4,
 	.open			= keyspan_open,
 	.close			= keyspan_close,
+	.dtr_rts		= keyspan_dtr_rts,
 	.write			= keyspan_write,
 	.write_room		= keyspan_write_room,
 	.set_termios		= keyspan_set_termios,
diff --git a/drivers/usb/serial/keyspan_pda.c b/drivers/usb/serial/keyspan_pda.c
index bf1ae24..ab769db 100644
--- a/drivers/usb/serial/keyspan_pda.c
+++ b/drivers/usb/serial/keyspan_pda.c
@@ -651,6 +651,35 @@
 }
 
 
+static void keyspan_pda_dtr_rts(struct usb_serial_port *port, int on)
+{
+	struct usb_serial *serial = port->serial;
+
+	if (serial->dev) {
+		if (on)
+			keyspan_pda_set_modem_info(serial, (1<<7) | (1<< 2));
+		else
+			keyspan_pda_set_modem_info(serial, 0);
+	}
+}
+
+static int keyspan_pda_carrier_raised(struct usb_serial_port *port)
+{
+	struct usb_serial *serial = port->serial;
+	unsigned char modembits;
+
+	/* If we can read the modem status and the DCD is low then
+	   carrier is not raised yet */
+	if (keyspan_pda_get_modem_info(serial, &modembits) >= 0) {
+		if (!(modembits & (1>>6)))
+			return 0;
+	}
+	/* Carrier raised, or we failed (eg disconnected) so
+	   progress accordingly */
+	return 1;
+}
+
+
 static int keyspan_pda_open(struct tty_struct *tty,
 			struct usb_serial_port *port, struct file *filp)
 {
@@ -682,13 +711,6 @@
 	priv->tx_room = room;
 	priv->tx_throttled = room ? 0 : 1;
 
-	/* the normal serial device seems to always turn on DTR and RTS here,
-	   so do the same */
-	if (tty && (tty->termios->c_cflag & CBAUD))
-		keyspan_pda_set_modem_info(serial, (1<<7) | (1<<2));
-	else
-		keyspan_pda_set_modem_info(serial, 0);
-
 	/*Start reading from the device*/
 	port->interrupt_in_urb->dev = serial->dev;
 	rc = usb_submit_urb(port->interrupt_in_urb, GFP_KERNEL);
@@ -700,19 +722,11 @@
 error:
 	return rc;
 }
-
-
-static void keyspan_pda_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void keyspan_pda_close(struct usb_serial_port *port)
 {
 	struct usb_serial *serial = port->serial;
 
 	if (serial->dev) {
-		/* the normal serial device seems to always shut
-		   off DTR and RTS now */
-		if (tty->termios->c_cflag & HUPCL)
-			keyspan_pda_set_modem_info(serial, 0);
-
 		/* shutdown our bulk reads and writes */
 		usb_kill_urb(port->write_urb);
 		usb_kill_urb(port->interrupt_in_urb);
@@ -839,6 +853,8 @@
 	.usb_driver = 		&keyspan_pda_driver,
 	.id_table =		id_table_std,
 	.num_ports =		1,
+	.dtr_rts =		keyspan_pda_dtr_rts,
+	.carrier_raised	=	keyspan_pda_carrier_raised,
 	.open =			keyspan_pda_open,
 	.close =		keyspan_pda_close,
 	.write =		keyspan_pda_write,
diff --git a/drivers/usb/serial/kl5kusb105.c b/drivers/usb/serial/kl5kusb105.c
index fcd9082..fa817c6 100644
--- a/drivers/usb/serial/kl5kusb105.c
+++ b/drivers/usb/serial/kl5kusb105.c
@@ -76,8 +76,7 @@
 static void klsi_105_shutdown(struct usb_serial *serial);
 static int  klsi_105_open(struct tty_struct *tty,
 			struct usb_serial_port *port, struct file *filp);
-static void klsi_105_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp);
+static void klsi_105_close(struct usb_serial_port *port);
 static int  klsi_105_write(struct tty_struct *tty,
 	struct usb_serial_port *port, const unsigned char *buf, int count);
 static void klsi_105_write_bulk_callback(struct urb *urb);
@@ -447,8 +446,7 @@
 } /* klsi_105_open */
 
 
-static void klsi_105_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void klsi_105_close(struct usb_serial_port *port)
 {
 	struct klsi_105_private *priv = usb_get_serial_port_data(port);
 	int rc;
diff --git a/drivers/usb/serial/kobil_sct.c b/drivers/usb/serial/kobil_sct.c
index c148544..6b57049 100644
--- a/drivers/usb/serial/kobil_sct.c
+++ b/drivers/usb/serial/kobil_sct.c
@@ -72,8 +72,7 @@
 static void kobil_shutdown(struct usb_serial *serial);
 static int  kobil_open(struct tty_struct *tty,
 			struct usb_serial_port *port, struct file *filp);
-static void kobil_close(struct tty_struct *tty, struct usb_serial_port *port,
-			struct file *filp);
+static void kobil_close(struct usb_serial_port *port);
 static int  kobil_write(struct tty_struct *tty, struct usb_serial_port *port,
 			 const unsigned char *buf, int count);
 static int  kobil_write_room(struct tty_struct *tty);
@@ -209,7 +208,7 @@
 
 	for (i = 0; i < serial->num_ports; ++i) {
 		while (serial->port[i]->port.count > 0)
-			kobil_close(NULL, serial->port[i], NULL);
+			kobil_close(serial->port[i]);
 		kfree(usb_get_serial_port_data(serial->port[i]));
 		usb_set_serial_port_data(serial->port[i], NULL);
 	}
@@ -346,11 +345,11 @@
 }
 
 
-static void kobil_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void kobil_close(struct usb_serial_port *port)
 {
 	dbg("%s - port %d", __func__, port->number);
 
+	/* FIXME: Add rts/dtr methods */
 	if (port->write_urb) {
 		usb_kill_urb(port->write_urb);
 		usb_free_urb(port->write_urb);
diff --git a/drivers/usb/serial/mct_u232.c b/drivers/usb/serial/mct_u232.c
index 82930a7..8737955 100644
--- a/drivers/usb/serial/mct_u232.c
+++ b/drivers/usb/serial/mct_u232.c
@@ -95,8 +95,8 @@
 static void mct_u232_shutdown(struct usb_serial *serial);
 static int  mct_u232_open(struct tty_struct *tty,
 			struct usb_serial_port *port, struct file *filp);
-static void mct_u232_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp);
+static void mct_u232_close(struct usb_serial_port *port);
+static void mct_u232_dtr_rts(struct usb_serial_port *port, int on);
 static void mct_u232_read_int_callback(struct urb *urb);
 static void mct_u232_set_termios(struct tty_struct *tty,
 			struct usb_serial_port *port, struct ktermios *old);
@@ -140,6 +140,7 @@
 	.num_ports =	     1,
 	.open =		     mct_u232_open,
 	.close =	     mct_u232_close,
+	.dtr_rts =	     mct_u232_dtr_rts,
 	.throttle =	     mct_u232_throttle,
 	.unthrottle =	     mct_u232_unthrottle,
 	.read_int_callback = mct_u232_read_int_callback,
@@ -496,29 +497,29 @@
 	return retval;
 } /* mct_u232_open */
 
-
-static void mct_u232_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void mct_u232_dtr_rts(struct usb_serial_port *port, int on)
 {
-	unsigned int c_cflag;
 	unsigned int control_state;
 	struct mct_u232_private *priv = usb_get_serial_port_data(port);
-	dbg("%s port %d", __func__, port->number);
 
-	if (tty) {
-		c_cflag = tty->termios->c_cflag;
-		mutex_lock(&port->serial->disc_mutex);
-		if (c_cflag & HUPCL && !port->serial->disconnected) {
-			/* drop DTR and RTS */
-			spin_lock_irq(&priv->lock);
+	mutex_lock(&port->serial->disc_mutex);
+	if (!port->serial->disconnected) {
+		/* drop DTR and RTS */
+		spin_lock_irq(&priv->lock);
+		if (on)
+			priv->control_state |= TIOCM_DTR | TIOCM_RTS;
+		else
 			priv->control_state &= ~(TIOCM_DTR | TIOCM_RTS);
-			control_state = priv->control_state;
-			spin_unlock_irq(&priv->lock);
-			mct_u232_set_modem_ctrl(port->serial, control_state);
-		}
-		mutex_unlock(&port->serial->disc_mutex);
+		control_state = priv->control_state;
+		spin_unlock_irq(&priv->lock);
+		mct_u232_set_modem_ctrl(port->serial, control_state);
 	}
+	mutex_unlock(&port->serial->disc_mutex);
+}
 
+static void mct_u232_close(struct usb_serial_port *port)
+{
+	dbg("%s port %d", __func__, port->number);
 
 	if (port->serial->dev) {
 		/* shutdown our urbs */
diff --git a/drivers/usb/serial/mos7720.c b/drivers/usb/serial/mos7720.c
index 24e3b5d..9e1a013 100644
--- a/drivers/usb/serial/mos7720.c
+++ b/drivers/usb/serial/mos7720.c
@@ -533,8 +533,7 @@
 	return chars;
 }
 
-static void mos7720_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void mos7720_close(struct usb_serial_port *port)
 {
 	struct usb_serial *serial;
 	struct moschip_port *mos7720_port;
diff --git a/drivers/usb/serial/mos7840.c b/drivers/usb/serial/mos7840.c
index 84fb1dc..10b78a3 100644
--- a/drivers/usb/serial/mos7840.c
+++ b/drivers/usb/serial/mos7840.c
@@ -1135,54 +1135,12 @@
 
 }
 
-/************************************************************************
- *
- * mos7840_block_until_tx_empty
- *
- *	This function will block the close until one of the following:
- *		1. TX count are 0
- *		2. The mos7840 has stopped
- *		3. A timeout of 3 seconds without activity has expired
- *
- ************************************************************************/
-static void mos7840_block_until_tx_empty(struct tty_struct *tty,
-				struct moschip_port *mos7840_port)
-{
-	int timeout = HZ / 10;
-	int wait = 30;
-	int count;
-
-	while (1) {
-
-		count = mos7840_chars_in_buffer(tty);
-
-		/* Check for Buffer status */
-		if (count <= 0)
-			return;
-
-		/* Block the thread for a while */
-		interruptible_sleep_on_timeout(&mos7840_port->wait_chase,
-					       timeout);
-
-		/* No activity.. count down section */
-		wait--;
-		if (wait == 0) {
-			dbg("%s - TIMEOUT", __func__);
-			return;
-		} else {
-			/* Reset timeout value back to seconds */
-			wait = 30;
-		}
-	}
-}
-
 /*****************************************************************************
  * mos7840_close
  *	this function is called by the tty driver when a port is closed
  *****************************************************************************/
 
-static void mos7840_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void mos7840_close(struct usb_serial_port *port)
 {
 	struct usb_serial *serial;
 	struct moschip_port *mos7840_port;
@@ -1223,10 +1181,6 @@
 		}
 	}
 
-	if (serial->dev)
-		/* flush and block until tx is empty */
-		mos7840_block_until_tx_empty(tty, mos7840_port);
-
 	/* While closing port, shutdown all bulk read, write  *
 	 * and interrupt read if they exists                  */
 	if (serial->dev) {
diff --git a/drivers/usb/serial/navman.c b/drivers/usb/serial/navman.c
index bcdcbb8..f5f3751a8 100644
--- a/drivers/usb/serial/navman.c
+++ b/drivers/usb/serial/navman.c
@@ -98,8 +98,7 @@
 	return result;
 }
 
-static void navman_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void navman_close(struct usb_serial_port *port)
 {
 	dbg("%s - port %d", __func__, port->number);
 
diff --git a/drivers/usb/serial/omninet.c b/drivers/usb/serial/omninet.c
index df65397..1104617 100644
--- a/drivers/usb/serial/omninet.c
+++ b/drivers/usb/serial/omninet.c
@@ -66,8 +66,7 @@
 /* function prototypes */
 static int  omninet_open(struct tty_struct *tty, struct usb_serial_port *port,
 							struct file *filp);
-static void omninet_close(struct tty_struct *tty, struct usb_serial_port *port,
-							struct file *filp);
+static void omninet_close(struct usb_serial_port *port);
 static void omninet_read_bulk_callback(struct urb *urb);
 static void omninet_write_bulk_callback(struct urb *urb);
 static int  omninet_write(struct tty_struct *tty, struct usb_serial_port *port,
@@ -189,8 +188,7 @@
 	return result;
 }
 
-static void omninet_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void omninet_close(struct usb_serial_port *port)
 {
 	dbg("%s - port %d", __func__, port->number);
 	usb_kill_urb(port->read_urb);
diff --git a/drivers/usb/serial/opticon.c b/drivers/usb/serial/opticon.c
index b500ad1..c20480a 100644
--- a/drivers/usb/serial/opticon.c
+++ b/drivers/usb/serial/opticon.c
@@ -173,8 +173,7 @@
 	return result;
 }
 
-static void opticon_close(struct tty_struct *tty, struct usb_serial_port *port,
-			  struct file *filp)
+static void opticon_close(struct usb_serial_port *port)
 {
 	struct opticon_private *priv = usb_get_serial_data(port->serial);
 
diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index 7817b82..a16d69f 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -45,8 +45,9 @@
 /* Function prototypes */
 static int  option_open(struct tty_struct *tty, struct usb_serial_port *port,
 							struct file *filp);
-static void option_close(struct tty_struct *tty, struct usb_serial_port *port,
-							struct file *filp);
+static void option_close(struct usb_serial_port *port);
+static void option_dtr_rts(struct usb_serial_port *port, int on);
+
 static int  option_startup(struct usb_serial *serial);
 static void option_shutdown(struct usb_serial *serial);
 static int  option_write_room(struct tty_struct *tty);
@@ -61,7 +62,7 @@
 static int  option_tiocmget(struct tty_struct *tty, struct file *file);
 static int  option_tiocmset(struct tty_struct *tty, struct file *file,
 				unsigned int set, unsigned int clear);
-static int  option_send_setup(struct tty_struct *tty, struct usb_serial_port *port);
+static int  option_send_setup(struct usb_serial_port *port);
 static int  option_suspend(struct usb_serial *serial, pm_message_t message);
 static int  option_resume(struct usb_serial *serial);
 
@@ -551,6 +552,7 @@
 	.num_ports         = 1,
 	.open              = option_open,
 	.close             = option_close,
+	.dtr_rts	   = option_dtr_rts,
 	.write             = option_write,
 	.write_room        = option_write_room,
 	.chars_in_buffer   = option_chars_in_buffer,
@@ -630,7 +632,7 @@
 	dbg("%s", __func__);
 	/* Doesn't support option setting */
 	tty_termios_copy_hw(tty->termios, old_termios);
-	option_send_setup(tty, port);
+	option_send_setup(port);
 }
 
 static int option_tiocmget(struct tty_struct *tty, struct file *file)
@@ -669,7 +671,7 @@
 		portdata->rts_state = 0;
 	if (clear & TIOCM_DTR)
 		portdata->dtr_state = 0;
-	return option_send_setup(tty, port);
+	return option_send_setup(port);
 }
 
 /* Write */
@@ -897,10 +899,6 @@
 
 	dbg("%s", __func__);
 
-	/* Set some sane defaults */
-	portdata->rts_state = 1;
-	portdata->dtr_state = 1;
-
 	/* Reset low level data toggle and start reading from endpoints */
 	for (i = 0; i < N_IN_URB; i++) {
 		urb = portdata->in_urbs[i];
@@ -936,13 +934,28 @@
 				usb_pipeout(urb->pipe), 0); */
 	}
 
-	option_send_setup(tty, port);
+	option_send_setup(port);
 
 	return 0;
 }
 
-static void option_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void option_dtr_rts(struct usb_serial_port *port, int on)
+{
+	struct usb_serial *serial = port->serial;
+	struct option_port_private *portdata;
+
+	dbg("%s", __func__);
+	portdata = usb_get_serial_port_data(port);
+	mutex_lock(&serial->disc_mutex);
+	portdata->rts_state = on;
+	portdata->dtr_state = on;
+	if (serial->dev)
+		option_send_setup(port);
+	mutex_unlock(&serial->disc_mutex);
+}
+
+
+static void option_close(struct usb_serial_port *port)
 {
 	int i;
 	struct usb_serial *serial = port->serial;
@@ -951,22 +964,13 @@
 	dbg("%s", __func__);
 	portdata = usb_get_serial_port_data(port);
 
-	portdata->rts_state = 0;
-	portdata->dtr_state = 0;
-
 	if (serial->dev) {
-		mutex_lock(&serial->disc_mutex);
-		if (!serial->disconnected)
-			option_send_setup(tty, port);
-		mutex_unlock(&serial->disc_mutex);
-
 		/* Stop reading/writing urbs */
 		for (i = 0; i < N_IN_URB; i++)
 			usb_kill_urb(portdata->in_urbs[i]);
 		for (i = 0; i < N_OUT_URB; i++)
 			usb_kill_urb(portdata->out_urbs[i]);
 	}
-	tty_port_tty_set(&port->port, NULL);
 }
 
 /* Helper functions used by option_setup_urbs */
@@ -1032,28 +1036,24 @@
  * This is exactly the same as SET_CONTROL_LINE_STATE from the PSTN
  * CDC.
 */
-static int option_send_setup(struct tty_struct *tty,
-						struct usb_serial_port *port)
+static int option_send_setup(struct usb_serial_port *port)
 {
 	struct usb_serial *serial = port->serial;
 	struct option_port_private *portdata;
 	int ifNum = serial->interface->cur_altsetting->desc.bInterfaceNumber;
+	int val = 0;
 	dbg("%s", __func__);
 
 	portdata = usb_get_serial_port_data(port);
 
-	if (tty) {
-		int val = 0;
-		if (portdata->dtr_state)
-			val |= 0x01;
-		if (portdata->rts_state)
-			val |= 0x02;
+	if (portdata->dtr_state)
+		val |= 0x01;
+	if (portdata->rts_state)
+		val |= 0x02;
 
-		return usb_control_msg(serial->dev,
-			usb_rcvctrlpipe(serial->dev, 0),
-			0x22, 0x21, val, ifNum, NULL, 0, USB_CTRL_SET_TIMEOUT);
-	}
-	return 0;
+	return usb_control_msg(serial->dev,
+		usb_rcvctrlpipe(serial->dev, 0),
+		0x22, 0x21, val, ifNum, NULL, 0, USB_CTRL_SET_TIMEOUT);
 }
 
 static int option_startup(struct usb_serial *serial)
diff --git a/drivers/usb/serial/oti6858.c b/drivers/usb/serial/oti6858.c
index ba551f0..7de5478 100644
--- a/drivers/usb/serial/oti6858.c
+++ b/drivers/usb/serial/oti6858.c
@@ -143,8 +143,7 @@
 /* function prototypes */
 static int oti6858_open(struct tty_struct *tty,
 			struct usb_serial_port *port, struct file *filp);
-static void oti6858_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp);
+static void oti6858_close(struct usb_serial_port *port);
 static void oti6858_set_termios(struct tty_struct *tty,
 			struct usb_serial_port *port, struct ktermios *old);
 static int oti6858_ioctl(struct tty_struct *tty, struct file *file,
@@ -622,67 +621,30 @@
 	if (result != 0) {
 		dev_err(&port->dev, "%s(): usb_submit_urb() failed"
 			       " with error %d\n", __func__, result);
-		oti6858_close(tty, port, NULL);
+		oti6858_close(port);
 		return -EPROTO;
 	}
 
 	/* setup termios */
 	if (tty)
 		oti6858_set_termios(tty, port, &tmp_termios);
-
+	port->port.drain_delay = 256;	/* FIXME: check the FIFO length */
 	return 0;
 }
 
-static void oti6858_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void oti6858_close(struct usb_serial_port *port)
 {
 	struct oti6858_private *priv = usb_get_serial_port_data(port);
 	unsigned long flags;
-	long timeout;
-	wait_queue_t wait;
 
 	dbg("%s(port = %d)", __func__, port->number);
 
-	/* wait for data to drain from the buffer */
 	spin_lock_irqsave(&priv->lock, flags);
-	timeout = 30 * HZ;	/* PL2303_CLOSING_WAIT */
-	init_waitqueue_entry(&wait, current);
-	add_wait_queue(&tty->write_wait, &wait);
-	dbg("%s(): entering wait loop", __func__);
-	for (;;) {
-		set_current_state(TASK_INTERRUPTIBLE);
-		if (oti6858_buf_data_avail(priv->buf) == 0
-		|| timeout == 0 || signal_pending(current)
-		|| port->serial->disconnected)
-			break;
-		spin_unlock_irqrestore(&priv->lock, flags);
-		timeout = schedule_timeout(timeout);
-		spin_lock_irqsave(&priv->lock, flags);
-	}
-	set_current_state(TASK_RUNNING);
-	remove_wait_queue(&tty->write_wait, &wait);
-	dbg("%s(): after wait loop", __func__);
-
 	/* clear out any remaining data in the buffer */
 	oti6858_buf_clear(priv->buf);
 	spin_unlock_irqrestore(&priv->lock, flags);
 
-	/* wait for characters to drain from the device */
-	/* (this is long enough for the entire 256 byte */
-	/* pl2303 hardware buffer to drain with no flow */
-	/* control for data rates of 1200 bps or more, */
-	/* for lower rates we should really know how much */
-	/* data is in the buffer to compute a delay */
-	/* that is not unnecessarily long) */
-	/* FIXME
-	bps = tty_get_baud_rate(tty);
-	if (bps > 1200)
-		timeout = max((HZ*2560)/bps,HZ/10);
-	else
-	*/
-		timeout = 2*HZ;
-	schedule_timeout_interruptible(timeout);
-	dbg("%s(): after schedule_timeout_interruptible()", __func__);
+	dbg("%s(): after buf_clear()", __func__);
 
 	/* cancel scheduled setup */
 	cancel_delayed_work(&priv->delayed_setup_work);
@@ -694,15 +656,6 @@
 	usb_kill_urb(port->write_urb);
 	usb_kill_urb(port->read_urb);
 	usb_kill_urb(port->interrupt_in_urb);
-
-	/*
-	if (tty && (tty->termios->c_cflag) & HUPCL) {
-		// drop DTR and RTS
-		spin_lock_irqsave(&priv->lock, flags);
-		priv->pending_setup.control &= ~CONTROL_MASK;
-		spin_unlock_irqrestore(&priv->lock, flags);
-	}
-	*/
 }
 
 static int oti6858_tiocmset(struct tty_struct *tty, struct file *file,
diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c
index 751a533..e02dc3d 100644
--- a/drivers/usb/serial/pl2303.c
+++ b/drivers/usb/serial/pl2303.c
@@ -652,69 +652,41 @@
 	kfree(buf);
 }
 
-static void pl2303_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void pl2303_dtr_rts(struct usb_serial_port *port, int on)
 {
 	struct pl2303_private *priv = usb_get_serial_port_data(port);
 	unsigned long flags;
-	unsigned int c_cflag;
-	int bps;
-	long timeout;
-	wait_queue_t wait;
+	u8 control;
+
+	spin_lock_irqsave(&priv->lock, flags);
+	/* Change DTR and RTS */
+	if (on)
+		priv->line_control |= (CONTROL_DTR | CONTROL_RTS);
+	else
+		priv->line_control &= ~(CONTROL_DTR | CONTROL_RTS);
+	control = priv->line_control;
+	spin_unlock_irqrestore(&priv->lock, flags);
+	set_control_lines(port->serial->dev, control);
+}
+
+static void pl2303_close(struct usb_serial_port *port)
+{
+	struct pl2303_private *priv = usb_get_serial_port_data(port);
+	unsigned long flags;
 
 	dbg("%s - port %d", __func__, port->number);
 
-	/* wait for data to drain from the buffer */
 	spin_lock_irqsave(&priv->lock, flags);
-	timeout = PL2303_CLOSING_WAIT;
-	init_waitqueue_entry(&wait, current);
-	add_wait_queue(&tty->write_wait, &wait);
-	for (;;) {
-		set_current_state(TASK_INTERRUPTIBLE);
-		if (pl2303_buf_data_avail(priv->buf) == 0 ||
-		    timeout == 0 || signal_pending(current) ||
-		    port->serial->disconnected)
-			break;
-		spin_unlock_irqrestore(&priv->lock, flags);
-		timeout = schedule_timeout(timeout);
-		spin_lock_irqsave(&priv->lock, flags);
-	}
-	set_current_state(TASK_RUNNING);
-	remove_wait_queue(&tty->write_wait, &wait);
 	/* clear out any remaining data in the buffer */
 	pl2303_buf_clear(priv->buf);
 	spin_unlock_irqrestore(&priv->lock, flags);
 
-	/* wait for characters to drain from the device */
-	/* (this is long enough for the entire 256 byte */
-	/* pl2303 hardware buffer to drain with no flow */
-	/* control for data rates of 1200 bps or more, */
-	/* for lower rates we should really know how much */
-	/* data is in the buffer to compute a delay */
-	/* that is not unnecessarily long) */
-	bps = tty_get_baud_rate(tty);
-	if (bps > 1200)
-		timeout = max((HZ*2560)/bps, HZ/10);
-	else
-		timeout = 2*HZ;
-	schedule_timeout_interruptible(timeout);
-
 	/* shutdown our urbs */
 	dbg("%s - shutting down urbs", __func__);
 	usb_kill_urb(port->write_urb);
 	usb_kill_urb(port->read_urb);
 	usb_kill_urb(port->interrupt_in_urb);
 
-	if (tty) {
-		c_cflag = tty->termios->c_cflag;
-		if (c_cflag & HUPCL) {
-			/* drop DTR and RTS */
-			spin_lock_irqsave(&priv->lock, flags);
-			priv->line_control = 0;
-			spin_unlock_irqrestore(&priv->lock, flags);
-			set_control_lines(port->serial->dev, 0);
-		}
-	}
 }
 
 static int pl2303_open(struct tty_struct *tty,
@@ -748,7 +720,7 @@
 	if (result) {
 		dev_err(&port->dev, "%s - failed submitting read urb,"
 			" error %d\n", __func__, result);
-		pl2303_close(tty, port, NULL);
+		pl2303_close(port);
 		return -EPROTO;
 	}
 
@@ -758,9 +730,10 @@
 	if (result) {
 		dev_err(&port->dev, "%s - failed submitting interrupt urb,"
 			" error %d\n", __func__, result);
-		pl2303_close(tty, port, NULL);
+		pl2303_close(port);
 		return -EPROTO;
 	}
+	port->port.drain_delay = 256;
 	return 0;
 }
 
@@ -821,6 +794,14 @@
 	return result;
 }
 
+static int pl2303_carrier_raised(struct usb_serial_port *port)
+{
+	struct pl2303_private *priv = usb_get_serial_port_data(port);
+	if (priv->line_status & UART_DCD)
+		return 1;
+	return 0;
+}
+
 static int wait_modem_info(struct usb_serial_port *port, unsigned int arg)
 {
 	struct pl2303_private *priv = usb_get_serial_port_data(port);
@@ -1125,6 +1106,8 @@
 	.num_ports =		1,
 	.open =			pl2303_open,
 	.close =		pl2303_close,
+	.dtr_rts = 		pl2303_dtr_rts,
+	.carrier_raised =	pl2303_carrier_raised,
 	.write =		pl2303_write,
 	.ioctl =		pl2303_ioctl,
 	.break_ctl =		pl2303_break_ctl,
diff --git a/drivers/usb/serial/sierra.c b/drivers/usb/serial/sierra.c
index 913225c..17ac34f 100644
--- a/drivers/usb/serial/sierra.c
+++ b/drivers/usb/serial/sierra.c
@@ -26,12 +26,10 @@
 #include <linux/module.h>
 #include <linux/usb.h>
 #include <linux/usb/serial.h>
-#include <linux/usb/ch9.h>
 
 #define SWIMS_USB_REQUEST_SetPower	0x00
 #define SWIMS_USB_REQUEST_SetNmea	0x07
 
-/* per port private data */
 #define N_IN_URB	4
 #define N_OUT_URB	4
 #define IN_BUFLEN	4096
@@ -39,6 +37,12 @@
 static int debug;
 static int nmea;
 
+/* Used in interface blacklisting */
+struct sierra_iface_info {
+	const u32 infolen;	/* number of interface numbers on blacklist */
+	const u8  *ifaceinfo;	/* pointer to the array holding the numbers */
+};
+
 static int sierra_set_power_state(struct usb_device *udev, __u16 swiState)
 {
 	int result;
@@ -85,6 +89,23 @@
 	return result;
 }
 
+static int is_blacklisted(const u8 ifnum,
+				const struct sierra_iface_info *blacklist)
+{
+	const u8  *info;
+	int i;
+
+	if (blacklist) {
+		info = blacklist->ifaceinfo;
+
+		for (i = 0; i < blacklist->infolen; i++) {
+			if (info[i] == ifnum)
+				return 1;
+		}
+	}
+	return 0;
+}
+
 static int sierra_calc_interface(struct usb_serial *serial)
 {
 	int interface;
@@ -153,9 +174,25 @@
 	 */
 	usb_set_serial_data(serial, (void *)num_ports);
 
+	/* ifnum could have changed - by calling usb_set_interface */
+	ifnum = sierra_calc_interface(serial);
+
+	if (is_blacklisted(ifnum,
+				(struct sierra_iface_info *)id->driver_info)) {
+		dev_dbg(&serial->dev->dev,
+			"Ignoring blacklisted interface #%d\n", ifnum);
+		return -ENODEV;
+	}
+
 	return result;
 }
 
+static const u8 direct_ip_non_serial_ifaces[] = { 7, 8, 9, 10, 11 };
+static const struct sierra_iface_info direct_ip_interface_blacklist = {
+	.infolen = ARRAY_SIZE(direct_ip_non_serial_ifaces),
+	.ifaceinfo = direct_ip_non_serial_ifaces,
+};
+
 static struct usb_device_id id_table [] = {
 	{ USB_DEVICE(0x1199, 0x0017) },	/* Sierra Wireless EM5625 */
 	{ USB_DEVICE(0x1199, 0x0018) },	/* Sierra Wireless MC5720 */
@@ -188,9 +225,11 @@
 	{ USB_DEVICE(0x1199, 0x6833) },	/* Sierra Wireless MC8781 */
 	{ USB_DEVICE(0x1199, 0x683A) },	/* Sierra Wireless MC8785 */
 	{ USB_DEVICE(0x1199, 0x683B) },	/* Sierra Wireless MC8785 Composite */
-	{ USB_DEVICE(0x1199, 0x683C) },	/* Sierra Wireless MC8790 */
-	{ USB_DEVICE(0x1199, 0x683D) },	/* Sierra Wireless MC8790 */
-	{ USB_DEVICE(0x1199, 0x683E) },	/* Sierra Wireless MC8790 */
+	/* Sierra Wireless MC8790, MC8791, MC8792 Composite */
+	{ USB_DEVICE(0x1199, 0x683C) },
+	{ USB_DEVICE(0x1199, 0x683D) },	/* Sierra Wireless MC8791 Composite */
+	/* Sierra Wireless MC8790, MC8791, MC8792 */
+	{ USB_DEVICE(0x1199, 0x683E) },
 	{ USB_DEVICE(0x1199, 0x6850) },	/* Sierra Wireless AirCard 880 */
 	{ USB_DEVICE(0x1199, 0x6851) },	/* Sierra Wireless AirCard 881 */
 	{ USB_DEVICE(0x1199, 0x6852) },	/* Sierra Wireless AirCard 880 E */
@@ -211,6 +250,10 @@
 	{ USB_DEVICE(0x1199, 0x0112) }, /* Sierra Wireless AirCard 580 */
 	{ USB_DEVICE(0x0F3D, 0x0112) }, /* Airprime/Sierra PC 5220 */
 
+	{ USB_DEVICE(0x1199, 0x68A3), 	/* Sierra Wireless Direct IP modems */
+	  .driver_info = (kernel_ulong_t)&direct_ip_interface_blacklist
+	},
+
 	{ }
 };
 MODULE_DEVICE_TABLE(usb, id_table);
@@ -229,7 +272,6 @@
 
 	/* Input endpoints and buffers for this port */
 	struct urb *in_urbs[N_IN_URB];
-	char *in_buffer[N_IN_URB];
 
 	/* Settings for the port */
 	int rts_state;	/* Handshaking pins (outputs) */
@@ -240,57 +282,50 @@
 	int ri_state;
 };
 
-static int sierra_send_setup(struct tty_struct *tty,
-						struct usb_serial_port *port)
+static int sierra_send_setup(struct usb_serial_port *port)
 {
 	struct usb_serial *serial = port->serial;
 	struct sierra_port_private *portdata;
 	__u16 interface = 0;
+	int val = 0;
 
 	dev_dbg(&port->dev, "%s", __func__);
 
 	portdata = usb_get_serial_port_data(port);
 
-	if (tty) {
-		int val = 0;
-		if (portdata->dtr_state)
-			val |= 0x01;
-		if (portdata->rts_state)
-			val |= 0x02;
+	if (portdata->dtr_state)
+		val |= 0x01;
+	if (portdata->rts_state)
+		val |= 0x02;
 
-		/* If composite device then properly report interface */
-		if (serial->num_ports == 1) {
-			interface = sierra_calc_interface(serial);
-
-			/* Control message is sent only to interfaces with
-			 * interrupt_in endpoints
-			 */
-			if (port->interrupt_in_urb) {
-				/* send control message */
-				return usb_control_msg(serial->dev,
-					usb_rcvctrlpipe(serial->dev, 0),
-					0x22, 0x21, val, interface,
-					NULL, 0, USB_CTRL_SET_TIMEOUT);
-			}
-		}
-
-		/* Otherwise the need to do non-composite mapping */
-		else {
-			if (port->bulk_out_endpointAddress == 2)
-				interface = 0;
-			else if (port->bulk_out_endpointAddress == 4)
-				interface = 1;
-			else if (port->bulk_out_endpointAddress == 5)
-				interface = 2;
-
+	/* If composite device then properly report interface */
+	if (serial->num_ports == 1) {
+		interface = sierra_calc_interface(serial);
+		/* Control message is sent only to interfaces with
+		 * interrupt_in endpoints
+		 */
+		if (port->interrupt_in_urb) {
+			/* send control message */
 			return usb_control_msg(serial->dev,
 				usb_rcvctrlpipe(serial->dev, 0),
 				0x22, 0x21, val, interface,
 				NULL, 0, USB_CTRL_SET_TIMEOUT);
-
 		}
 	}
 
+	/* Otherwise the need to do non-composite mapping */
+	else {
+		if (port->bulk_out_endpointAddress == 2)
+			interface = 0;
+		else if (port->bulk_out_endpointAddress == 4)
+			interface = 1;
+		else if (port->bulk_out_endpointAddress == 5)
+			interface = 2;
+		return usb_control_msg(serial->dev,
+			usb_rcvctrlpipe(serial->dev, 0),
+			0x22, 0x21, val, interface,
+			NULL, 0, USB_CTRL_SET_TIMEOUT);
+	}
 	return 0;
 }
 
@@ -299,7 +334,7 @@
 {
 	dev_dbg(&port->dev, "%s", __func__);
 	tty_termios_copy_hw(tty->termios, old_termios);
-	sierra_send_setup(tty, port);
+	sierra_send_setup(port);
 }
 
 static int sierra_tiocmget(struct tty_struct *tty, struct file *file)
@@ -338,7 +373,18 @@
 		portdata->rts_state = 0;
 	if (clear & TIOCM_DTR)
 		portdata->dtr_state = 0;
-	return sierra_send_setup(tty, port);
+	return sierra_send_setup(port);
+}
+
+static void sierra_release_urb(struct urb *urb)
+{
+	struct usb_serial_port *port;
+	if (urb) {
+		port =  urb->context;
+		dev_dbg(&port->dev, "%s: %p\n", __func__, urb);
+		kfree(urb->transfer_buffer);
+		usb_free_urb(urb);
+	}
 }
 
 static void sierra_outdat_callback(struct urb *urb)
@@ -465,7 +511,7 @@
 				" received", __func__);
 
 		/* Resubmit urb so we continue receiving */
-		if (port->port.count && status != -ESHUTDOWN) {
+		if (port->port.count && status != -ESHUTDOWN && status != -EPERM) {
 			err = usb_submit_urb(urb, GFP_ATOMIC);
 			if (err)
 				dev_err(&port->dev, "resubmit read urb failed."
@@ -557,14 +603,129 @@
 	return 2048;
 }
 
+static void sierra_stop_rx_urbs(struct usb_serial_port *port)
+{
+	int i;
+	struct sierra_port_private *portdata = usb_get_serial_port_data(port);
+
+	for (i = 0; i < ARRAY_SIZE(portdata->in_urbs); i++)
+		usb_kill_urb(portdata->in_urbs[i]);
+
+	usb_kill_urb(port->interrupt_in_urb);
+}
+
+static int sierra_submit_rx_urbs(struct usb_serial_port *port, gfp_t mem_flags)
+{
+	int ok_cnt;
+	int err = -EINVAL;
+	int i;
+	struct urb *urb;
+	struct sierra_port_private *portdata = usb_get_serial_port_data(port);
+
+	ok_cnt = 0;
+	for (i = 0; i < ARRAY_SIZE(portdata->in_urbs); i++) {
+		urb = portdata->in_urbs[i];
+		if (!urb)
+			continue;
+		err = usb_submit_urb(urb, mem_flags);
+		if (err) {
+			dev_err(&port->dev, "%s: submit urb failed: %d\n",
+				__func__, err);
+		} else {
+			ok_cnt++;
+		}
+	}
+
+	if (ok_cnt && port->interrupt_in_urb) {
+		err = usb_submit_urb(port->interrupt_in_urb, mem_flags);
+		if (err) {
+			dev_err(&port->dev, "%s: submit intr urb failed: %d\n",
+				__func__, err);
+		}
+	}
+
+	if (ok_cnt > 0) /* at least one rx urb submitted */
+		return 0;
+	else
+		return err;
+}
+
+static struct urb *sierra_setup_urb(struct usb_serial *serial, int endpoint,
+					int dir, void *ctx, int len,
+					gfp_t mem_flags,
+					usb_complete_t callback)
+{
+	struct urb	*urb;
+	u8		*buf;
+
+	if (endpoint == -1)
+		return NULL;
+
+	urb = usb_alloc_urb(0, mem_flags);
+	if (urb == NULL) {
+		dev_dbg(&serial->dev->dev, "%s: alloc for endpoint %d failed\n",
+			__func__, endpoint);
+		return NULL;
+	}
+
+	buf = kmalloc(len, mem_flags);
+	if (buf) {
+		/* Fill URB using supplied data */
+		usb_fill_bulk_urb(urb, serial->dev,
+			usb_sndbulkpipe(serial->dev, endpoint) | dir,
+			buf, len, callback, ctx);
+
+		/* debug */
+		dev_dbg(&serial->dev->dev, "%s %c u : %p d:%p\n", __func__,
+				dir == USB_DIR_IN ? 'i' : 'o', urb, buf);
+	} else {
+		dev_dbg(&serial->dev->dev, "%s %c u:%p d:%p\n", __func__,
+				dir == USB_DIR_IN ? 'i' : 'o', urb, buf);
+
+		sierra_release_urb(urb);
+		urb = NULL;
+	}
+
+	return urb;
+}
+
+static void sierra_close(struct usb_serial_port *port)
+{
+	int i;
+	struct usb_serial *serial = port->serial;
+	struct sierra_port_private *portdata;
+
+	dev_dbg(&port->dev, "%s\n", __func__);
+	portdata = usb_get_serial_port_data(port);
+
+	portdata->rts_state = 0;
+	portdata->dtr_state = 0;
+
+	if (serial->dev) {
+		mutex_lock(&serial->disc_mutex);
+		if (!serial->disconnected)
+			sierra_send_setup(port);
+		mutex_unlock(&serial->disc_mutex);
+
+		/* Stop reading urbs */
+		sierra_stop_rx_urbs(port);
+		/* .. and release them */
+		for (i = 0; i < N_IN_URB; i++) {
+			sierra_release_urb(portdata->in_urbs[i]);
+			portdata->in_urbs[i] = NULL;
+		}
+	}
+}
+
 static int sierra_open(struct tty_struct *tty,
 			struct usb_serial_port *port, struct file *filp)
 {
 	struct sierra_port_private *portdata;
 	struct usb_serial *serial = port->serial;
 	int i;
+	int err;
+	int endpoint;
 	struct urb *urb;
-	int result;
 
 	portdata = usb_get_serial_port_data(port);
 
@@ -574,77 +735,52 @@
 	portdata->rts_state = 1;
 	portdata->dtr_state = 1;
 
-	/* Reset low level data toggle and start reading from endpoints */
-	for (i = 0; i < N_IN_URB; i++) {
-		urb = portdata->in_urbs[i];
-		if (!urb)
-			continue;
-		if (urb->dev != serial->dev) {
-			dev_dbg(&port->dev, "%s: dev %p != %p",
-				 __func__, urb->dev, serial->dev);
-			continue;
-		}
 
-		/*
-		 * make sure endpoint data toggle is synchronized with the
-		 * device
-		 */
-		usb_clear_halt(urb->dev, urb->pipe);
-
-		result = usb_submit_urb(urb, GFP_KERNEL);
-		if (result) {
-			dev_err(&port->dev, "submit urb %d failed (%d) %d\n",
-				i, result, urb->transfer_buffer_length);
-		}
+	endpoint = port->bulk_in_endpointAddress;
+	for (i = 0; i < ARRAY_SIZE(portdata->in_urbs); i++) {
+		urb = sierra_setup_urb(serial, endpoint, USB_DIR_IN, port,
+					IN_BUFLEN, GFP_KERNEL,
+					sierra_indat_callback);
+		portdata->in_urbs[i] = urb;
 	}
+	/* clear halt condition */
+	usb_clear_halt(serial->dev,
+			usb_sndbulkpipe(serial->dev, endpoint) | USB_DIR_IN);
 
-	sierra_send_setup(tty, port);
-
-	/* start up the interrupt endpoint if we have one */
-	if (port->interrupt_in_urb) {
-		result = usb_submit_urb(port->interrupt_in_urb, GFP_KERNEL);
-		if (result)
-			dev_err(&port->dev, "submit irq_in urb failed %d\n",
-				result);
+	err = sierra_submit_rx_urbs(port, GFP_KERNEL);
+	if (err) {
+		/* get rid of everything as in close */
+		sierra_close(port);
+		return err;
 	}
+	sierra_send_setup(port);
+
 	return 0;
 }
 
-static void sierra_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+
+static void sierra_dtr_rts(struct usb_serial_port *port, int on)
 {
-	int i;
 	struct usb_serial *serial = port->serial;
 	struct sierra_port_private *portdata;
 
-	dev_dbg(&port->dev, "%s", __func__);
 	portdata = usb_get_serial_port_data(port);
-
-	portdata->rts_state = 0;
-	portdata->dtr_state = 0;
+	portdata->rts_state = on;
+	portdata->dtr_state = on;
 
 	if (serial->dev) {
 		mutex_lock(&serial->disc_mutex);
 		if (!serial->disconnected)
-			sierra_send_setup(tty, port);
+			sierra_send_setup(port);
 		mutex_unlock(&serial->disc_mutex);
-
-		/* Stop reading/writing urbs */
-		for (i = 0; i < N_IN_URB; i++)
-			usb_kill_urb(portdata->in_urbs[i]);
 	}
-
-	usb_kill_urb(port->interrupt_in_urb);
-	tty_port_tty_set(&port->port, NULL);
 }
 
 static int sierra_startup(struct usb_serial *serial)
 {
 	struct usb_serial_port *port;
 	struct sierra_port_private *portdata;
-	struct urb *urb;
 	int i;
-	int j;
 
 	dev_dbg(&serial->dev->dev, "%s", __func__);
 
@@ -666,34 +802,8 @@
 			return -ENOMEM;
 		}
 		spin_lock_init(&portdata->lock);
-		for (j = 0; j < N_IN_URB; j++) {
-			portdata->in_buffer[j] = kmalloc(IN_BUFLEN, GFP_KERNEL);
-			if (!portdata->in_buffer[j]) {
-				for (--j; j >= 0; j--)
-					kfree(portdata->in_buffer[j]);
-				kfree(portdata);
-				return -ENOMEM;
-			}
-		}
-
+		/* Set the port private data pointer */
 		usb_set_serial_port_data(port, portdata);
-
-		/* initialize the in urbs */
-		for (j = 0; j < N_IN_URB; ++j) {
-			urb = usb_alloc_urb(0, GFP_KERNEL);
-			if (urb == NULL) {
-				dev_dbg(&port->dev, "%s: alloc for in "
-					"port failed.", __func__);
-				continue;
-			}
-			/* Fill URB using supplied data. */
-			usb_fill_bulk_urb(urb, serial->dev,
-					  usb_rcvbulkpipe(serial->dev,
-						port->bulk_in_endpointAddress),
-					  portdata->in_buffer[j], IN_BUFLEN,
-					  sierra_indat_callback, port);
-			portdata->in_urbs[j] = urb;
-		}
 	}
 
 	return 0;
@@ -701,7 +811,7 @@
 
 static void sierra_shutdown(struct usb_serial *serial)
 {
-	int i, j;
+	int i;
 	struct usb_serial_port *port;
 	struct sierra_port_private *portdata;
 
@@ -714,12 +824,6 @@
 		portdata = usb_get_serial_port_data(port);
 		if (!portdata)
 			continue;
-
-		for (j = 0; j < N_IN_URB; j++) {
-			usb_kill_urb(portdata->in_urbs[j]);
-			usb_free_urb(portdata->in_urbs[j]);
-			kfree(portdata->in_buffer[j]);
-		}
 		kfree(portdata);
 		usb_set_serial_port_data(port, NULL);
 	}
@@ -737,6 +841,7 @@
 	.probe		   = sierra_probe,
 	.open              = sierra_open,
 	.close             = sierra_close,
+	.dtr_rts	   = sierra_dtr_rts,
 	.write             = sierra_write,
 	.write_room        = sierra_write_room,
 	.set_termios       = sierra_set_termios,
diff --git a/drivers/usb/serial/spcp8x5.c b/drivers/usb/serial/spcp8x5.c
index 5e7528c..8f7ed8f 100644
--- a/drivers/usb/serial/spcp8x5.c
+++ b/drivers/usb/serial/spcp8x5.c
@@ -446,66 +446,47 @@
 			"RTSCTS usb_control_msg(enable flowctrl) = %d\n", ret);
 }
 
-/* close the serial port. We should wait for data sending to device 1st and
- * then kill all urb. */
-static void spcp8x5_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static int spcp8x5_carrier_raised(struct usb_serial_port *port)
+{
+	struct spcp8x5_private *priv = usb_get_serial_port_data(port);
+	if (priv->line_status & MSR_STATUS_LINE_DCD)
+		return 1;
+	return 0;
+}
+
+static void spcp8x5_dtr_rts(struct usb_serial_port *port, int on)
 {
 	struct spcp8x5_private *priv = usb_get_serial_port_data(port);
 	unsigned long flags;
-	unsigned int c_cflag;
-	int bps;
-	long timeout;
-	wait_queue_t wait;
+	u8 control;
+
+	spin_lock_irqsave(&priv->lock, flags);
+	if (on)
+		priv->line_control = MCR_CONTROL_LINE_DTR
+						| MCR_CONTROL_LINE_RTS;
+	else
+		priv->line_control &= ~ (MCR_CONTROL_LINE_DTR
+						| MCR_CONTROL_LINE_RTS);
+	control = priv->line_control;
+	spin_unlock_irqrestore(&priv->lock, flags);
+	spcp8x5_set_ctrlLine(port->serial->dev, control , priv->type);
+}
+
+/* close the serial port. We should wait for data sending to device 1st and
+ * then kill all urb. */
+static void spcp8x5_close(struct usb_serial_port *port)
+{
+	struct spcp8x5_private *priv = usb_get_serial_port_data(port);
+	unsigned long flags;
 	int result;
 
 	dbg("%s - port %d", __func__, port->number);
 
-	/* wait for data to drain from the buffer */
 	spin_lock_irqsave(&priv->lock, flags);
-	timeout = SPCP8x5_CLOSING_WAIT;
-	init_waitqueue_entry(&wait, current);
-	add_wait_queue(&tty->write_wait, &wait);
-	for (;;) {
-		set_current_state(TASK_INTERRUPTIBLE);
-		if (ringbuf_avail_data(priv->buf) == 0 ||
-		    timeout == 0 || signal_pending(current))
-			break;
-		spin_unlock_irqrestore(&priv->lock, flags);
-		timeout = schedule_timeout(timeout);
-		spin_lock_irqsave(&priv->lock, flags);
-	}
-	set_current_state(TASK_RUNNING);
-	remove_wait_queue(&tty->write_wait, &wait);
-
 	/* clear out any remaining data in the buffer */
 	clear_ringbuf(priv->buf);
 	spin_unlock_irqrestore(&priv->lock, flags);
 
-	/* wait for characters to drain from the device (this is long enough
-	 * for the entire all byte spcp8x5 hardware buffer to drain with no
-	 * flow control for data rates of 1200 bps or more, for lower rates we
-	 * should really know how much data is in the buffer to compute a delay
-	 * that is not unnecessarily long) */
-	bps = tty_get_baud_rate(tty);
-	if (bps > 1200)
-		timeout = max((HZ*2560) / bps, HZ/10);
-	else
-		timeout = 2*HZ;
-	set_current_state(TASK_INTERRUPTIBLE);
-	schedule_timeout(timeout);
-
-	/* clear control lines */
-	if (tty) {
-		c_cflag = tty->termios->c_cflag;
-		if (c_cflag & HUPCL) {
-			spin_lock_irqsave(&priv->lock, flags);
-			priv->line_control = 0;
-			spin_unlock_irqrestore(&priv->lock, flags);
-			spcp8x5_set_ctrlLine(port->serial->dev, 0 , priv->type);
-		}
-	}
-
 	/* kill urb */
 	if (port->write_urb != NULL) {
 		result = usb_unlink_urb(port->write_urb);
@@ -665,13 +646,6 @@
 	if (ret)
 		return ret;
 
-	spin_lock_irqsave(&priv->lock, flags);
-	if (tty && (tty->termios->c_cflag & CBAUD))
-		priv->line_control = MCR_DTR | MCR_RTS;
-	else
-		priv->line_control = 0;
-	spin_unlock_irqrestore(&priv->lock, flags);
-
 	spcp8x5_set_ctrlLine(serial->dev, priv->line_control , priv->type);
 
 	/* Setup termios */
@@ -691,9 +665,10 @@
 	port->read_urb->dev = serial->dev;
 	ret = usb_submit_urb(port->read_urb, GFP_KERNEL);
 	if (ret) {
-		spcp8x5_close(tty, port, NULL);
+		spcp8x5_close(port);
 		return -EPROTO;
 	}
+	port->port.drain_delay = 256;
 	return 0;
 }
 
@@ -1033,6 +1008,8 @@
 	.num_ports		= 1,
 	.open 			= spcp8x5_open,
 	.close 			= spcp8x5_close,
+	.dtr_rts		= spcp8x5_dtr_rts,
+	.carrier_raised		= spcp8x5_carrier_raised,
 	.write 			= spcp8x5_write,
 	.set_termios 		= spcp8x5_set_termios,
 	.ioctl 			= spcp8x5_ioctl,
diff --git a/drivers/usb/serial/symbolserial.c b/drivers/usb/serial/symbolserial.c
index 69879e4..8b07ebc 100644
--- a/drivers/usb/serial/symbolserial.c
+++ b/drivers/usb/serial/symbolserial.c
@@ -152,8 +152,7 @@
 	return result;
 }
 
-static void symbol_close(struct tty_struct *tty, struct usb_serial_port *port,
-			  struct file *filp)
+static void symbol_close(struct usb_serial_port *port)
 {
 	struct symbol_private *priv = usb_get_serial_data(port->serial);
 
diff --git a/drivers/usb/serial/ti_usb_3410_5052.c b/drivers/usb/serial/ti_usb_3410_5052.c
index 0a64bac..42cb04c 100644
--- a/drivers/usb/serial/ti_usb_3410_5052.c
+++ b/drivers/usb/serial/ti_usb_3410_5052.c
@@ -100,8 +100,7 @@
 static void ti_shutdown(struct usb_serial *serial);
 static int ti_open(struct tty_struct *tty, struct usb_serial_port *port,
 		struct file *file);
-static void ti_close(struct tty_struct *tty, struct usb_serial_port *port,
-		struct file *file);
+static void ti_close(struct usb_serial_port *port);
 static int ti_write(struct tty_struct *tty, struct usb_serial_port *port,
 		const unsigned char *data, int count);
 static int ti_write_room(struct tty_struct *tty);
@@ -647,8 +646,7 @@
 }
 
 
-static void ti_close(struct tty_struct *tty, struct usb_serial_port *port,
-							struct file *file)
+static void ti_close(struct usb_serial_port *port)
 {
 	struct ti_device *tdev;
 	struct ti_port *tport;
diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c
index f331e2b..1967a7e 100644
--- a/drivers/usb/serial/usb-serial.c
+++ b/drivers/usb/serial/usb-serial.c
@@ -238,9 +238,11 @@
 			goto bailout_interface_put;
 		mutex_unlock(&serial->disc_mutex);
 	}
-
 	mutex_unlock(&port->mutex);
-	return 0;
+	/* Now do the correct tty layer semantics */
+	retval = tty_port_block_til_ready(&port->port, tty, filp);
+	if (retval == 0)
+		return 0;
 
 bailout_interface_put:
 	usb_autopm_put_interface(serial->interface);
@@ -259,64 +261,89 @@
 	return retval;
 }
 
-static void serial_close(struct tty_struct *tty, struct file *filp)
+/**
+ *	serial_do_down		-	shut down hardware
+ *	@port: port to shut down
+ *
+ *	Shut down a USB port unless it is the console. We never shut down the
+ *	console hardware as it will always be in use.
+ *
+ *	Don't free any resources at this point
+ */
+static void serial_do_down(struct usb_serial_port *port)
 {
-	struct usb_serial_port *port = tty->driver_data;
+	struct usb_serial_driver *drv = port->serial->type;
 	struct usb_serial *serial;
 	struct module *owner;
-	int count;
 
-	if (!port)
+	/* The console is magical, do not hang up the console hardware
+	   or there will be tears */
+	if (port->console)
 		return;
 
-	dbg("%s - port %d", __func__, port->number);
-
 	mutex_lock(&port->mutex);
 	serial = port->serial;
 	owner = serial->type->driver.owner;
 
-	if (port->port.count == 0) {
-		mutex_unlock(&port->mutex);
-		return;
-	}
+	if (drv->close)
+		drv->close(port);
 
-	if (port->port.count == 1)
-		/* only call the device specific close if this
-		 * port is being closed by the last owner. Ensure we do
-		 * this before we drop the port count. The call is protected
-		 * by the port mutex
-		 */
-		serial->type->close(tty, port, filp);
-
-	if (port->port.count == (port->console ? 2 : 1)) {
-		struct tty_struct *tty = tty_port_tty_get(&port->port);
-		if (tty) {
-			/* We must do this before we drop the port count to
-			   zero. */
-			if (tty->driver_data)
-				tty->driver_data = NULL;
-			tty_port_tty_set(&port->port, NULL);
-			tty_kref_put(tty);
-		}
-	}
-
-	--port->port.count;
-	count = port->port.count;
 	mutex_unlock(&port->mutex);
+}
+
+/**
+ *	serial_do_free		-	free resources post close/hangup
+ *	@port: port to free up
+ *
+ *	Do the resource freeing and refcount dropping for the port. We must
+ *	be careful about ordering and we must avoid freeing up the console.
+ */
+
+static void serial_do_free(struct usb_serial_port *port)
+{
+	struct usb_serial *serial;
+	struct module *owner;
+
+	/* The console is magical, do not hang up the console hardware
+	   or there will be tears */
+	if (port->console)
+		return;
+
+	serial = port->serial;
+	owner = serial->type->driver.owner;
 	put_device(&port->dev);
-
 	/* Mustn't dereference port any more */
-	if (count == 0) {
-		mutex_lock(&serial->disc_mutex);
-		if (!serial->disconnected)
-			usb_autopm_put_interface(serial->interface);
-		mutex_unlock(&serial->disc_mutex);
-	}
+	mutex_lock(&serial->disc_mutex);
+	if (!serial->disconnected)
+		usb_autopm_put_interface(serial->interface);
+	mutex_unlock(&serial->disc_mutex);
 	usb_serial_put(serial);
-
 	/* Mustn't dereference serial any more */
-	if (count == 0)
-		module_put(owner);
+	module_put(owner);
+}
+
+static void serial_close(struct tty_struct *tty, struct file *filp)
+{
+	struct usb_serial_port *port = tty->driver_data;
+
+	dbg("%s - port %d", __func__, port->number);
+
+
+	if (tty_port_close_start(&port->port, tty, filp) == 0)
+		return;
+
+	serial_do_down(port);		
+	tty_port_close_end(&port->port, tty);
+	tty_port_tty_set(&port->port, NULL);
+	serial_do_free(port);
+}
+
+static void serial_hangup(struct tty_struct *tty)
+{
+	struct usb_serial_port *port = tty->driver_data;
+	serial_do_down(port);
+	tty_port_hangup(&port->port);
+	serial_do_free(port);
 }
 
 static int serial_write(struct tty_struct *tty, const unsigned char *buf,
@@ -648,6 +675,29 @@
 	return NULL;
 }
 
+static int serial_carrier_raised(struct tty_port *port)
+{
+	struct usb_serial_port *p = container_of(port, struct usb_serial_port, port);
+	struct usb_serial_driver *drv = p->serial->type;
+	if (drv->carrier_raised)
+		return drv->carrier_raised(p);
+	/* No carrier control - don't block */
+	return 1;	
+}
+
+static void serial_dtr_rts(struct tty_port *port, int on)
+{
+	struct usb_serial_port *p = container_of(port, struct usb_serial_port, port);
+	struct usb_serial_driver *drv = p->serial->type;
+	if (drv->dtr_rts)
+		drv->dtr_rts(p, on);
+}
+
+static const struct tty_port_operations serial_port_ops = {
+	.carrier_raised = serial_carrier_raised,
+	.dtr_rts = serial_dtr_rts,
+};
+
 int usb_serial_probe(struct usb_interface *interface,
 			       const struct usb_device_id *id)
 {
@@ -841,6 +891,7 @@
 		if (!port)
 			goto probe_error;
 		tty_port_init(&port->port);
+		port->port.ops = &serial_port_ops;
 		port->serial = serial;
 		spin_lock_init(&port->lock);
 		mutex_init(&port->mutex);
@@ -1071,6 +1122,9 @@
 		if (port) {
 			struct tty_struct *tty = tty_port_tty_get(&port->port);
 			if (tty) {
+				/* The hangup will occur asynchronously but
+				   the object refcounts will sort out all the
+				   cleanup */
 				tty_hangup(tty);
 				tty_kref_put(tty);
 			}
@@ -1135,6 +1189,7 @@
 	.open =			serial_open,
 	.close =		serial_close,
 	.write =		serial_write,
+	.hangup = 		serial_hangup,
 	.write_room =		serial_write_room,
 	.ioctl =		serial_ioctl,
 	.set_termios =		serial_set_termios,
@@ -1147,6 +1202,7 @@
 	.proc_fops =		&serial_proc_fops,
 };
 
+
 struct tty_driver *usb_serial_tty_driver;
 
 static int __init usb_serial_init(void)
diff --git a/drivers/usb/serial/visor.c b/drivers/usb/serial/visor.c
index 5ac414b..b15f1c0 100644
--- a/drivers/usb/serial/visor.c
+++ b/drivers/usb/serial/visor.c
@@ -38,8 +38,7 @@
 /* function prototypes for a handspring visor */
 static int  visor_open(struct tty_struct *tty, struct usb_serial_port *port,
 					struct file *filp);
-static void visor_close(struct tty_struct *tty, struct usb_serial_port *port,
-					struct file *filp);
+static void visor_close(struct usb_serial_port *port);
 static int  visor_write(struct tty_struct *tty, struct usb_serial_port *port,
 					const unsigned char *buf, int count);
 static int  visor_write_room(struct tty_struct *tty);
@@ -324,8 +323,7 @@
 }
 
 
-static void visor_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void visor_close(struct usb_serial_port *port)
 {
 	struct visor_private *priv = usb_get_serial_port_data(port);
 	unsigned char *transfer_buffer;
diff --git a/drivers/usb/serial/whiteheat.c b/drivers/usb/serial/whiteheat.c
index 5335d32..7c7295d 100644
--- a/drivers/usb/serial/whiteheat.c
+++ b/drivers/usb/serial/whiteheat.c
@@ -147,8 +147,7 @@
 static void whiteheat_shutdown(struct usb_serial *serial);
 static int  whiteheat_open(struct tty_struct *tty,
 			struct usb_serial_port *port, struct file *filp);
-static void whiteheat_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp);
+static void whiteheat_close(struct usb_serial_port *port);
 static int  whiteheat_write(struct tty_struct *tty,
 			struct usb_serial_port *port,
 			const unsigned char *buf, int count);
@@ -712,8 +711,7 @@
 }
 
 
-static void whiteheat_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void whiteheat_close(struct usb_serial_port *port)
 {
 	struct whiteheat_private *info = usb_get_serial_port_data(port);
 	struct whiteheat_urb_wrap *wrap;
@@ -723,31 +721,7 @@
 
 	dbg("%s - port %d", __func__, port->number);
 
-	mutex_lock(&port->serial->disc_mutex);
-	/* filp is NULL when called from usb_serial_disconnect */
-	if ((filp && (tty_hung_up_p(filp))) || port->serial->disconnected) {
-		mutex_unlock(&port->serial->disc_mutex);
-		return;
-	}
-	mutex_unlock(&port->serial->disc_mutex);
-
-	tty->closing = 1;
-
-/*
- * Not currently in use; tty_wait_until_sent() calls
- * serial_chars_in_buffer() which deadlocks on the second semaphore
- * acquisition. This should be fixed at some point. Greg's been
- * notified.
-	if ((filp->f_flags & (O_NDELAY | O_NONBLOCK)) == 0) {
-		tty_wait_until_sent(tty, CLOSING_DELAY);
-	}
-*/
-
-	tty_driver_flush_buffer(tty);
-	tty_ldisc_flush(tty);
-
 	firm_report_tx_done(port);
-
 	firm_close(port);
 
 	/* shutdown our bulk reads and writes */
@@ -775,10 +749,7 @@
 	}
 	spin_unlock_irq(&info->lock);
 	mutex_unlock(&info->deathwarrant);
-
 	stop_command_port(port->serial);
-
-	tty->closing = 0;
 }
 
 
diff --git a/fs/bio.c b/fs/bio.c
index 9871164..740699c 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -26,10 +26,9 @@
 #include <linux/mempool.h>
 #include <linux/workqueue.h>
 #include <linux/blktrace_api.h>
-#include <trace/block.h>
 #include <scsi/sg.h>		/* for struct sg_iovec */
 
-DEFINE_TRACE(block_split);
+#include <trace/events/block.h>
 
 /*
  * Test patch to inline a certain number of bi_io_vec's inside the bio
diff --git a/fs/buffer.c b/fs/buffer.c
index 4910612..1864d0b 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2935,6 +2935,8 @@
 	BUG_ON(!buffer_locked(bh));
 	BUG_ON(!buffer_mapped(bh));
 	BUG_ON(!bh->b_end_io);
+	BUG_ON(buffer_delay(bh));
+	BUG_ON(buffer_unwritten(bh));
 
 	/*
 	 * Mask in barrier bit for a write (could be either a WRITE or a
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index c68edb9..9b1d285 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -557,8 +557,10 @@
 	int err = register_filesystem(&devpts_fs_type);
 	if (!err) {
 		devpts_mnt = kern_mount(&devpts_fs_type);
-		if (IS_ERR(devpts_mnt))
+		if (IS_ERR(devpts_mnt)) {
 			err = PTR_ERR(devpts_mnt);
+			unregister_filesystem(&devpts_fs_type);
+		}
 	}
 	return err;
 }
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 5c4afe6..e3c748f 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -1093,6 +1093,7 @@
 	brelse(bh);
 failed_sbi:
 	sb->s_fs_info = NULL;
+	kfree(sbi->s_blockgroup_lock);
 	kfree(sbi);
 	return ret;
 }
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 599dbfe..d8b73d4a 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -2021,6 +2021,7 @@
 	brelse(bh);
 out_fail:
 	sb->s_fs_info = NULL;
+	kfree(sbi->s_blockgroup_lock);
 	kfree(sbi);
 	lock_kernel();
 	return ret;
diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile
index a8ff003..8a34710 100644
--- a/fs/ext4/Makefile
+++ b/fs/ext4/Makefile
@@ -5,8 +5,8 @@
 obj-$(CONFIG_EXT4_FS) += ext4.o
 
 ext4-y	:= balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
-		   ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \
-		   ext4_jbd2.o migrate.o mballoc.o
+		ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \
+		ext4_jbd2.o migrate.o mballoc.o block_validity.o
 
 ext4-$(CONFIG_EXT4_FS_XATTR)		+= xattr.o xattr_user.o xattr_trusted.o
 ext4-$(CONFIG_EXT4_FS_POSIX_ACL)	+= acl.o
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 53c72ad..e2126d7 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -19,7 +19,6 @@
 #include <linux/buffer_head.h>
 #include "ext4.h"
 #include "ext4_jbd2.h"
-#include "group.h"
 #include "mballoc.h"
 
 /*
@@ -88,6 +87,7 @@
 		 ext4_group_t block_group, struct ext4_group_desc *gdp)
 {
 	int bit, bit_max;
+	ext4_group_t ngroups = ext4_get_groups_count(sb);
 	unsigned free_blocks, group_blocks;
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
 
@@ -123,7 +123,7 @@
 		bit_max += ext4_bg_num_gdb(sb, block_group);
 	}
 
-	if (block_group == sbi->s_groups_count - 1) {
+	if (block_group == ngroups - 1) {
 		/*
 		 * Even though mke2fs always initialize first and last group
 		 * if some other tool enabled the EXT4_BG_BLOCK_UNINIT we need
@@ -131,7 +131,7 @@
 		 */
 		group_blocks = ext4_blocks_count(sbi->s_es) -
 			le32_to_cpu(sbi->s_es->s_first_data_block) -
-			(EXT4_BLOCKS_PER_GROUP(sb) * (sbi->s_groups_count - 1));
+			(EXT4_BLOCKS_PER_GROUP(sb) * (ngroups - 1));
 	} else {
 		group_blocks = EXT4_BLOCKS_PER_GROUP(sb);
 	}
@@ -205,18 +205,18 @@
 {
 	unsigned int group_desc;
 	unsigned int offset;
+	ext4_group_t ngroups = ext4_get_groups_count(sb);
 	struct ext4_group_desc *desc;
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
 
-	if (block_group >= sbi->s_groups_count) {
+	if (block_group >= ngroups) {
 		ext4_error(sb, "ext4_get_group_desc",
 			   "block_group >= groups_count - "
 			   "block_group = %u, groups_count = %u",
-			   block_group, sbi->s_groups_count);
+			   block_group, ngroups);
 
 		return NULL;
 	}
-	smp_rmb();
 
 	group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb);
 	offset = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1);
@@ -326,16 +326,16 @@
 		unlock_buffer(bh);
 		return bh;
 	}
-	spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group));
+	ext4_lock_group(sb, block_group);
 	if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
 		ext4_init_block_bitmap(sb, bh, block_group, desc);
 		set_bitmap_uptodate(bh);
 		set_buffer_uptodate(bh);
-		spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group));
+		ext4_unlock_group(sb, block_group);
 		unlock_buffer(bh);
 		return bh;
 	}
-	spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group));
+	ext4_unlock_group(sb, block_group);
 	if (buffer_uptodate(bh)) {
 		/*
 		 * if not uninit if bh is uptodate,
@@ -451,7 +451,7 @@
 	down_write(&grp->alloc_sem);
 	for (i = 0, blocks_freed = 0; i < count; i++) {
 		BUFFER_TRACE(bitmap_bh, "clear bit");
-		if (!ext4_clear_bit_atomic(sb_bgl_lock(sbi, block_group),
+		if (!ext4_clear_bit_atomic(ext4_group_lock_ptr(sb, block_group),
 						bit + i, bitmap_bh->b_data)) {
 			ext4_error(sb, __func__,
 				   "bit already cleared for block %llu",
@@ -461,11 +461,11 @@
 			blocks_freed++;
 		}
 	}
-	spin_lock(sb_bgl_lock(sbi, block_group));
+	ext4_lock_group(sb, block_group);
 	blk_free_count = blocks_freed + ext4_free_blks_count(sb, desc);
 	ext4_free_blks_set(sb, desc, blk_free_count);
 	desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc);
-	spin_unlock(sb_bgl_lock(sbi, block_group));
+	ext4_unlock_group(sb, block_group);
 	percpu_counter_add(&sbi->s_freeblocks_counter, blocks_freed);
 
 	if (sbi->s_log_groups_per_flex) {
@@ -665,7 +665,7 @@
 	ext4_fsblk_t desc_count;
 	struct ext4_group_desc *gdp;
 	ext4_group_t i;
-	ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
+	ext4_group_t ngroups = ext4_get_groups_count(sb);
 #ifdef EXT4FS_DEBUG
 	struct ext4_super_block *es;
 	ext4_fsblk_t bitmap_count;
@@ -677,7 +677,6 @@
 	bitmap_count = 0;
 	gdp = NULL;
 
-	smp_rmb();
 	for (i = 0; i < ngroups; i++) {
 		gdp = ext4_get_group_desc(sb, i, NULL);
 		if (!gdp)
@@ -700,7 +699,6 @@
 	return bitmap_count;
 #else
 	desc_count = 0;
-	smp_rmb();
 	for (i = 0; i < ngroups; i++) {
 		gdp = ext4_get_group_desc(sb, i, NULL);
 		if (!gdp)
diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c
new file mode 100644
index 0000000..50784ef
--- /dev/null
+++ b/fs/ext4/block_validity.c
@@ -0,0 +1,244 @@
+/*
+ *  linux/fs/ext4/block_validity.c
+ *
+ * Copyright (C) 2009
+ * Theodore Ts'o (tytso@mit.edu)
+ *
+ * Track which blocks in the filesystem are metadata blocks that
+ * should never be used as data blocks by files or directories.
+ */
+
+#include <linux/time.h>
+#include <linux/fs.h>
+#include <linux/namei.h>
+#include <linux/quotaops.h>
+#include <linux/buffer_head.h>
+#include <linux/module.h>
+#include <linux/swap.h>
+#include <linux/pagemap.h>
+#include <linux/version.h>
+#include <linux/blkdev.h>
+#include <linux/mutex.h>
+#include "ext4.h"
+
+struct ext4_system_zone {
+	struct rb_node	node;
+	ext4_fsblk_t	start_blk;
+	unsigned int	count;
+};
+
+static struct kmem_cache *ext4_system_zone_cachep;
+
+int __init init_ext4_system_zone(void)
+{
+	ext4_system_zone_cachep = KMEM_CACHE(ext4_system_zone,
+					     SLAB_RECLAIM_ACCOUNT);
+	if (ext4_system_zone_cachep == NULL)
+		return -ENOMEM;
+	return 0;
+}
+
+void exit_ext4_system_zone(void)
+{
+	kmem_cache_destroy(ext4_system_zone_cachep);
+}
+
+static inline int can_merge(struct ext4_system_zone *entry1,
+		     struct ext4_system_zone *entry2)
+{
+	if ((entry1->start_blk + entry1->count) == entry2->start_blk)
+		return 1;
+	return 0;
+}
+
+/*
+ * Mark a range of blocks as belonging to the "system zone" --- that
+ * is, filesystem metadata blocks which should never be used by
+ * inodes.
+ */
+static int add_system_zone(struct ext4_sb_info *sbi,
+			   ext4_fsblk_t start_blk,
+			   unsigned int count)
+{
+	struct ext4_system_zone *new_entry = NULL, *entry;
+	struct rb_node **n = &sbi->system_blks.rb_node, *node;
+	struct rb_node *parent = NULL, *new_node = NULL;
+
+	while (*n) {
+		parent = *n;
+		entry = rb_entry(parent, struct ext4_system_zone, node);
+		if (start_blk < entry->start_blk)
+			n = &(*n)->rb_left;
+		else if (start_blk >= (entry->start_blk + entry->count))
+			n = &(*n)->rb_right;
+		else {
+			if (start_blk + count > (entry->start_blk + 
+						 entry->count))
+				entry->count = (start_blk + count - 
+						entry->start_blk);
+			new_node = *n;
+			new_entry = rb_entry(new_node, struct ext4_system_zone,
+					     node);
+			break;
+		}
+	}
+
+	if (!new_entry) {
+		new_entry = kmem_cache_alloc(ext4_system_zone_cachep,
+					     GFP_KERNEL);
+		if (!new_entry)
+			return -ENOMEM;
+		new_entry->start_blk = start_blk;
+		new_entry->count = count;
+		new_node = &new_entry->node;
+
+		rb_link_node(new_node, parent, n);
+		rb_insert_color(new_node, &sbi->system_blks);
+	}
+
+	/* Can we merge to the left? */
+	node = rb_prev(new_node);
+	if (node) {
+		entry = rb_entry(node, struct ext4_system_zone, node);
+		if (can_merge(entry, new_entry)) {
+			new_entry->start_blk = entry->start_blk;
+			new_entry->count += entry->count;
+			rb_erase(node, &sbi->system_blks);
+			kmem_cache_free(ext4_system_zone_cachep, entry);
+		}
+	}
+
+	/* Can we merge to the right? */
+	node = rb_next(new_node);
+	if (node) {
+		entry = rb_entry(node, struct ext4_system_zone, node);
+		if (can_merge(new_entry, entry)) {
+			new_entry->count += entry->count;
+			rb_erase(node, &sbi->system_blks);
+			kmem_cache_free(ext4_system_zone_cachep, entry);
+		}
+	}
+	return 0;
+}
+
+static void debug_print_tree(struct ext4_sb_info *sbi)
+{
+	struct rb_node *node;
+	struct ext4_system_zone *entry;
+	int first = 1;
+
+	printk(KERN_INFO "System zones: ");
+	node = rb_first(&sbi->system_blks);
+	while (node) {
+		entry = rb_entry(node, struct ext4_system_zone, node);
+		printk("%s%llu-%llu", first ? "" : ", ",
+		       entry->start_blk, entry->start_blk + entry->count - 1);
+		first = 0;
+		node = rb_next(node);
+	}
+	printk("\n");
+}
+
+int ext4_setup_system_zone(struct super_block *sb)
+{
+	ext4_group_t ngroups = ext4_get_groups_count(sb);
+	struct ext4_sb_info *sbi = EXT4_SB(sb);
+	struct ext4_group_desc *gdp;
+	ext4_group_t i;
+	int flex_size = ext4_flex_bg_size(sbi);
+	int ret;
+
+	if (!test_opt(sb, BLOCK_VALIDITY)) {
+		if (EXT4_SB(sb)->system_blks.rb_node)
+			ext4_release_system_zone(sb);
+		return 0;
+	}
+	if (EXT4_SB(sb)->system_blks.rb_node)
+		return 0;
+
+	for (i=0; i < ngroups; i++) {
+		if (ext4_bg_has_super(sb, i) &&
+		    ((i < 5) || ((i % flex_size) == 0)))
+			add_system_zone(sbi, ext4_group_first_block_no(sb, i),
+					sbi->s_gdb_count + 1);
+		gdp = ext4_get_group_desc(sb, i, NULL);
+		ret = add_system_zone(sbi, ext4_block_bitmap(sb, gdp), 1);
+		if (ret)
+			return ret;
+		ret = add_system_zone(sbi, ext4_inode_bitmap(sb, gdp), 1);
+		if (ret)
+			return ret;
+		ret = add_system_zone(sbi, ext4_inode_table(sb, gdp),
+				sbi->s_itb_per_group);
+		if (ret)
+			return ret;
+	}
+
+	if (test_opt(sb, DEBUG))
+		debug_print_tree(EXT4_SB(sb));
+	return 0;
+}
+
+/* Called when the filesystem is unmounted */
+void ext4_release_system_zone(struct super_block *sb)
+{
+	struct rb_node	*n = EXT4_SB(sb)->system_blks.rb_node;
+	struct rb_node	*parent;
+	struct ext4_system_zone	*entry;
+
+	while (n) {
+		/* Do the node's children first */
+		if (n->rb_left) {
+			n = n->rb_left;
+			continue;
+		}
+		if (n->rb_right) {
+			n = n->rb_right;
+			continue;
+		}
+		/*
+		 * The node has no children; free it, and then zero
+		 * out parent's link to it.  Finally go to the
+		 * beginning of the loop and try to free the parent
+		 * node.
+		 */
+		parent = rb_parent(n);
+		entry = rb_entry(n, struct ext4_system_zone, node);
+		kmem_cache_free(ext4_system_zone_cachep, entry);
+		if (!parent)
+			EXT4_SB(sb)->system_blks.rb_node = NULL;
+		else if (parent->rb_left == n)
+			parent->rb_left = NULL;
+		else if (parent->rb_right == n)
+			parent->rb_right = NULL;
+		n = parent;
+	}
+	EXT4_SB(sb)->system_blks.rb_node = NULL;
+}
+
+/*
+ * Returns 1 if the passed-in block region (start_blk,
+ * start_blk+count) is valid; 0 if some part of the block region
+ * overlaps with filesystem metadata blocks.
+ */
+int ext4_data_block_valid(struct ext4_sb_info *sbi, ext4_fsblk_t start_blk,
+			  unsigned int count)
+{
+	struct ext4_system_zone *entry;
+	struct rb_node *n = sbi->system_blks.rb_node;
+
+	if ((start_blk <= le32_to_cpu(sbi->s_es->s_first_data_block)) ||
+	    (start_blk + count > ext4_blocks_count(sbi->s_es)))
+		return 0;
+	while (n) {
+		entry = rb_entry(n, struct ext4_system_zone, node);
+		if (start_blk + count - 1 < entry->start_blk)
+			n = n->rb_left;
+		else if (start_blk >= (entry->start_blk + entry->count))
+			n = n->rb_right;
+		else
+			return 0;
+	}
+	return 1;
+}
+
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index b647899..9dc93168 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -131,8 +131,7 @@
 		struct buffer_head *bh = NULL;
 
 		map_bh.b_state = 0;
-		err = ext4_get_blocks_wrap(NULL, inode, blk, 1, &map_bh,
-						0, 0, 0);
+		err = ext4_get_blocks(NULL, inode, blk, 1, &map_bh, 0);
 		if (err > 0) {
 			pgoff_t index = map_bh.b_blocknr >>
 					(PAGE_CACHE_SHIFT - inode->i_blkbits);
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index d0f15ef..cc7d5ed 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -21,7 +21,14 @@
 #include <linux/magic.h>
 #include <linux/jbd2.h>
 #include <linux/quota.h>
-#include "ext4_i.h"
+#include <linux/rwsem.h>
+#include <linux/rbtree.h>
+#include <linux/seqlock.h>
+#include <linux/mutex.h>
+#include <linux/timer.h>
+#include <linux/wait.h>
+#include <linux/blockgroup_lock.h>
+#include <linux/percpu_counter.h>
 
 /*
  * The fourth extended filesystem constants/structures
@@ -46,6 +53,19 @@
 #define ext4_debug(f, a...)	do {} while (0)
 #endif
 
+/* data type for block offset of block group */
+typedef int ext4_grpblk_t;
+
+/* data type for filesystem-wide blocks number */
+typedef unsigned long long ext4_fsblk_t;
+
+/* data type for file logical block number */
+typedef __u32 ext4_lblk_t;
+
+/* data type for block group number */
+typedef unsigned int ext4_group_t;
+
+
 /* prefer goal again. length */
 #define EXT4_MB_HINT_MERGE		1
 /* blocks already reserved */
@@ -179,9 +199,6 @@
 #define EXT4_BG_BLOCK_UNINIT	0x0002 /* Block bitmap not in use */
 #define EXT4_BG_INODE_ZEROED	0x0004 /* On-disk itable initialized to zero */
 
-#ifdef __KERNEL__
-#include "ext4_sb.h"
-#endif
 /*
  * Macro-instructions used to manage group descriptors
  */
@@ -297,10 +314,23 @@
 };
 
 /*
- * Following is used by preallocation code to tell get_blocks() that we
- * want uninitialzed extents.
+ * Flags used by ext4_get_blocks()
  */
-#define EXT4_CREATE_UNINITIALIZED_EXT		2
+	/* Allocate any needed blocks and/or convert an unitialized
+	   extent to be an initialized ext4 */
+#define EXT4_GET_BLOCKS_CREATE			0x0001
+	/* Request the creation of an unitialized extent */
+#define EXT4_GET_BLOCKS_UNINIT_EXT		0x0002
+#define EXT4_GET_BLOCKS_CREATE_UNINIT_EXT	(EXT4_GET_BLOCKS_UNINIT_EXT|\
+						 EXT4_GET_BLOCKS_CREATE)
+	/* Caller is from the delayed allocation writeout path,
+	   so set the magic i_delalloc_reserve_flag after taking the 
+	   inode allocation semaphore for */
+#define EXT4_GET_BLOCKS_DELALLOC_RESERVE	0x0004
+	/* Call ext4_da_update_reserve_space() after successfully 
+	   allocating the blocks */
+#define EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE	0x0008
+
 
 /*
  * ioctl commands
@@ -516,6 +546,110 @@
 #endif /* defined(__KERNEL__) || defined(__linux__) */
 
 /*
+ * storage for cached extent
+ */
+struct ext4_ext_cache {
+	ext4_fsblk_t	ec_start;
+	ext4_lblk_t	ec_block;
+	__u32		ec_len; /* must be 32bit to return holes */
+	__u32		ec_type;
+};
+
+/*
+ * fourth extended file system inode data in memory
+ */
+struct ext4_inode_info {
+	__le32	i_data[15];	/* unconverted */
+	__u32	i_flags;
+	ext4_fsblk_t	i_file_acl;
+	__u32	i_dtime;
+
+	/*
+	 * i_block_group is the number of the block group which contains
+	 * this file's inode.  Constant across the lifetime of the inode,
+	 * it is ued for making block allocation decisions - we try to
+	 * place a file's data blocks near its inode block, and new inodes
+	 * near to their parent directory's inode.
+	 */
+	ext4_group_t	i_block_group;
+	__u32	i_state;		/* Dynamic state flags for ext4 */
+
+	ext4_lblk_t		i_dir_start_lookup;
+#ifdef CONFIG_EXT4_FS_XATTR
+	/*
+	 * Extended attributes can be read independently of the main file
+	 * data. Taking i_mutex even when reading would cause contention
+	 * between readers of EAs and writers of regular file data, so
+	 * instead we synchronize on xattr_sem when reading or changing
+	 * EAs.
+	 */
+	struct rw_semaphore xattr_sem;
+#endif
+#ifdef CONFIG_EXT4_FS_POSIX_ACL
+	struct posix_acl	*i_acl;
+	struct posix_acl	*i_default_acl;
+#endif
+
+	struct list_head i_orphan;	/* unlinked but open inodes */
+
+	/*
+	 * i_disksize keeps track of what the inode size is ON DISK, not
+	 * in memory.  During truncate, i_size is set to the new size by
+	 * the VFS prior to calling ext4_truncate(), but the filesystem won't
+	 * set i_disksize to 0 until the truncate is actually under way.
+	 *
+	 * The intent is that i_disksize always represents the blocks which
+	 * are used by this file.  This allows recovery to restart truncate
+	 * on orphans if we crash during truncate.  We actually write i_disksize
+	 * into the on-disk inode when writing inodes out, instead of i_size.
+	 *
+	 * The only time when i_disksize and i_size may be different is when
+	 * a truncate is in progress.  The only things which change i_disksize
+	 * are ext4_get_block (growth) and ext4_truncate (shrinkth).
+	 */
+	loff_t	i_disksize;
+
+	/*
+	 * i_data_sem is for serialising ext4_truncate() against
+	 * ext4_getblock().  In the 2.4 ext2 design, great chunks of inode's
+	 * data tree are chopped off during truncate. We can't do that in
+	 * ext4 because whenever we perform intermediate commits during
+	 * truncate, the inode and all the metadata blocks *must* be in a
+	 * consistent state which allows truncation of the orphans to restart
+	 * during recovery.  Hence we must fix the get_block-vs-truncate race
+	 * by other means, so we have i_data_sem.
+	 */
+	struct rw_semaphore i_data_sem;
+	struct inode vfs_inode;
+	struct jbd2_inode jinode;
+
+	struct ext4_ext_cache i_cached_extent;
+	/*
+	 * File creation time. Its function is same as that of
+	 * struct timespec i_{a,c,m}time in the generic inode.
+	 */
+	struct timespec i_crtime;
+
+	/* mballoc */
+	struct list_head i_prealloc_list;
+	spinlock_t i_prealloc_lock;
+
+	/* ialloc */
+	ext4_group_t	i_last_alloc_group;
+
+	/* allocation reservation info for delalloc */
+	unsigned int i_reserved_data_blocks;
+	unsigned int i_reserved_meta_blocks;
+	unsigned int i_allocated_meta_blocks;
+	unsigned short i_delalloc_reserved_flag;
+
+	/* on-disk additional length */
+	__u16 i_extra_isize;
+
+	spinlock_t i_block_reservation_lock;
+};
+
+/*
  * File system states
  */
 #define	EXT4_VALID_FS			0x0001	/* Unmounted cleanly */
@@ -560,6 +694,7 @@
 #define EXT4_MOUNT_I_VERSION            0x2000000 /* i_version support */
 #define EXT4_MOUNT_DELALLOC		0x8000000 /* Delalloc support */
 #define EXT4_MOUNT_DATA_ERR_ABORT	0x10000000 /* Abort on file data write */
+#define EXT4_MOUNT_BLOCK_VALIDITY	0x20000000 /* Block validity checking */
 
 /* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */
 #ifndef _LINUX_EXT2_FS_H
@@ -689,6 +824,137 @@
 };
 
 #ifdef __KERNEL__
+/*
+ * fourth extended-fs super-block data in memory
+ */
+struct ext4_sb_info {
+	unsigned long s_desc_size;	/* Size of a group descriptor in bytes */
+	unsigned long s_inodes_per_block;/* Number of inodes per block */
+	unsigned long s_blocks_per_group;/* Number of blocks in a group */
+	unsigned long s_inodes_per_group;/* Number of inodes in a group */
+	unsigned long s_itb_per_group;	/* Number of inode table blocks per group */
+	unsigned long s_gdb_count;	/* Number of group descriptor blocks */
+	unsigned long s_desc_per_block;	/* Number of group descriptors per block */
+	ext4_group_t s_groups_count;	/* Number of groups in the fs */
+	unsigned long s_overhead_last;  /* Last calculated overhead */
+	unsigned long s_blocks_last;    /* Last seen block count */
+	loff_t s_bitmap_maxbytes;	/* max bytes for bitmap files */
+	struct buffer_head * s_sbh;	/* Buffer containing the super block */
+	struct ext4_super_block *s_es;	/* Pointer to the super block in the buffer */
+	struct buffer_head **s_group_desc;
+	unsigned long  s_mount_opt;
+	ext4_fsblk_t s_sb_block;
+	uid_t s_resuid;
+	gid_t s_resgid;
+	unsigned short s_mount_state;
+	unsigned short s_pad;
+	int s_addr_per_block_bits;
+	int s_desc_per_block_bits;
+	int s_inode_size;
+	int s_first_ino;
+	unsigned int s_inode_readahead_blks;
+	spinlock_t s_next_gen_lock;
+	u32 s_next_generation;
+	u32 s_hash_seed[4];
+	int s_def_hash_version;
+	int s_hash_unsigned;	/* 3 if hash should be signed, 0 if not */
+	struct percpu_counter s_freeblocks_counter;
+	struct percpu_counter s_freeinodes_counter;
+	struct percpu_counter s_dirs_counter;
+	struct percpu_counter s_dirtyblocks_counter;
+	struct blockgroup_lock *s_blockgroup_lock;
+	struct proc_dir_entry *s_proc;
+	struct kobject s_kobj;
+	struct completion s_kobj_unregister;
+
+	/* Journaling */
+	struct inode *s_journal_inode;
+	struct journal_s *s_journal;
+	struct list_head s_orphan;
+	struct mutex s_orphan_lock;
+	struct mutex s_resize_lock;
+	unsigned long s_commit_interval;
+	u32 s_max_batch_time;
+	u32 s_min_batch_time;
+	struct block_device *journal_bdev;
+#ifdef CONFIG_JBD2_DEBUG
+	struct timer_list turn_ro_timer;	/* For turning read-only (crash simulation) */
+	wait_queue_head_t ro_wait_queue;	/* For people waiting for the fs to go read-only */
+#endif
+#ifdef CONFIG_QUOTA
+	char *s_qf_names[MAXQUOTAS];		/* Names of quota files with journalled quota */
+	int s_jquota_fmt;			/* Format of quota to use */
+#endif
+	unsigned int s_want_extra_isize; /* New inodes should reserve # bytes */
+	struct rb_root system_blks;
+
+#ifdef EXTENTS_STATS
+	/* ext4 extents stats */
+	unsigned long s_ext_min;
+	unsigned long s_ext_max;
+	unsigned long s_depth_max;
+	spinlock_t s_ext_stats_lock;
+	unsigned long s_ext_blocks;
+	unsigned long s_ext_extents;
+#endif
+
+	/* for buddy allocator */
+	struct ext4_group_info ***s_group_info;
+	struct inode *s_buddy_cache;
+	long s_blocks_reserved;
+	spinlock_t s_reserve_lock;
+	spinlock_t s_md_lock;
+	tid_t s_last_transaction;
+	unsigned short *s_mb_offsets;
+	unsigned int *s_mb_maxs;
+
+	/* tunables */
+	unsigned long s_stripe;
+	unsigned int s_mb_stream_request;
+	unsigned int s_mb_max_to_scan;
+	unsigned int s_mb_min_to_scan;
+	unsigned int s_mb_stats;
+	unsigned int s_mb_order2_reqs;
+	unsigned int s_mb_group_prealloc;
+	/* where last allocation was done - for stream allocation */
+	unsigned long s_mb_last_group;
+	unsigned long s_mb_last_start;
+
+	/* history to debug policy */
+	struct ext4_mb_history *s_mb_history;
+	int s_mb_history_cur;
+	int s_mb_history_max;
+	int s_mb_history_num;
+	spinlock_t s_mb_history_lock;
+	int s_mb_history_filter;
+
+	/* stats for buddy allocator */
+	spinlock_t s_mb_pa_lock;
+	atomic_t s_bal_reqs;	/* number of reqs with len > 1 */
+	atomic_t s_bal_success;	/* we found long enough chunks */
+	atomic_t s_bal_allocated;	/* in blocks */
+	atomic_t s_bal_ex_scanned;	/* total extents scanned */
+	atomic_t s_bal_goals;	/* goal hits */
+	atomic_t s_bal_breaks;	/* too long searches */
+	atomic_t s_bal_2orders;	/* 2^order hits */
+	spinlock_t s_bal_lock;
+	unsigned long s_mb_buddies_generated;
+	unsigned long long s_mb_generation_time;
+	atomic_t s_mb_lost_chunks;
+	atomic_t s_mb_preallocated;
+	atomic_t s_mb_discarded;
+
+	/* locality groups */
+	struct ext4_locality_group *s_locality_groups;
+
+	/* for write statistics */
+	unsigned long s_sectors_written_start;
+	u64 s_kbytes_written;
+
+	unsigned int s_log_groups_per_flex;
+	struct flex_groups *s_flex_groups;
+};
+
 static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb)
 {
 	return sb->s_fs_info;
@@ -704,7 +970,6 @@
 		current_fs_time(inode->i_sb) : CURRENT_TIME_SEC;
 }
 
-
 static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
 {
 	return ino == EXT4_ROOT_INO ||
@@ -1014,6 +1279,14 @@
 						    ext4_group_t block_group,
 						    struct buffer_head ** bh);
 extern int ext4_should_retry_alloc(struct super_block *sb, int *retries);
+struct buffer_head *ext4_read_block_bitmap(struct super_block *sb,
+				      ext4_group_t block_group);
+extern unsigned ext4_init_block_bitmap(struct super_block *sb,
+				       struct buffer_head *bh,
+				       ext4_group_t group,
+				       struct ext4_group_desc *desc);
+#define ext4_free_blocks_after_init(sb, group, desc)			\
+		ext4_init_block_bitmap(sb, NULL, group, desc)
 
 /* dir.c */
 extern int ext4_check_dir_entry(const char *, struct inode *,
@@ -1038,6 +1311,11 @@
 extern unsigned long ext4_count_free_inodes(struct super_block *);
 extern unsigned long ext4_count_dirs(struct super_block *);
 extern void ext4_check_inodes_bitmap(struct super_block *);
+extern unsigned ext4_init_inode_bitmap(struct super_block *sb,
+				       struct buffer_head *bh,
+				       ext4_group_t group,
+				       struct ext4_group_desc *desc);
+extern void mark_bitmap_end(int start_bit, int end_bit, char *bitmap);
 
 /* mballoc.c */
 extern long ext4_mb_stats;
@@ -1123,6 +1401,8 @@
 	__attribute__ ((format (printf, 3, 4)));
 extern void ext4_warning(struct super_block *, const char *, const char *, ...)
 	__attribute__ ((format (printf, 3, 4)));
+extern void ext4_msg(struct super_block *, const char *, const char *, ...)
+	__attribute__ ((format (printf, 3, 4)));
 extern void ext4_grp_locked_error(struct super_block *, ext4_group_t,
 				const char *, const char *, ...)
 	__attribute__ ((format (printf, 4, 5)));
@@ -1161,6 +1441,10 @@
 				struct ext4_group_desc *bg, __u32 count);
 extern void ext4_itable_unused_set(struct super_block *sb,
 				   struct ext4_group_desc *bg, __u32 count);
+extern __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 group,
+				   struct ext4_group_desc *gdp);
+extern int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 group,
+				       struct ext4_group_desc *gdp);
 
 static inline ext4_fsblk_t ext4_blocks_count(struct ext4_super_block *es)
 {
@@ -1228,6 +1512,18 @@
 	 return grp_info[indexv][indexh];
 }
 
+/*
+ * Reading s_groups_count requires using smp_rmb() afterwards.  See
+ * the locking protocol documented in the comments of ext4_group_add()
+ * in resize.c
+ */
+static inline ext4_group_t ext4_get_groups_count(struct super_block *sb)
+{
+	ext4_group_t	ngroups = EXT4_SB(sb)->s_groups_count;
+
+	smp_rmb();
+	return ngroups;
+}
 
 static inline ext4_group_t ext4_flex_group(struct ext4_sb_info *sbi,
 					     ext4_group_t block_group)
@@ -1283,33 +1579,25 @@
 };
 
 #define EXT4_GROUP_INFO_NEED_INIT_BIT	0
-#define EXT4_GROUP_INFO_LOCKED_BIT	1
 
 #define EXT4_MB_GRP_NEED_INIT(grp)	\
 	(test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state)))
 
+static inline spinlock_t *ext4_group_lock_ptr(struct super_block *sb,
+					      ext4_group_t group)
+{
+	return bgl_lock_ptr(EXT4_SB(sb)->s_blockgroup_lock, group);
+}
+
 static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group)
 {
-	struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
-
-	bit_spin_lock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state));
+	spin_lock(ext4_group_lock_ptr(sb, group));
 }
 
 static inline void ext4_unlock_group(struct super_block *sb,
 					ext4_group_t group)
 {
-	struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
-
-	bit_spin_unlock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state));
-}
-
-static inline int ext4_is_group_locked(struct super_block *sb,
-					ext4_group_t group)
-{
-	struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
-
-	return bit_spin_is_locked(EXT4_GROUP_INFO_LOCKED_BIT,
-						&(grinfo->bb_state));
+	spin_unlock(ext4_group_lock_ptr(sb, group));
 }
 
 /*
@@ -1326,11 +1614,21 @@
 /* namei.c */
 extern const struct inode_operations ext4_dir_inode_operations;
 extern const struct inode_operations ext4_special_inode_operations;
+extern struct dentry *ext4_get_parent(struct dentry *child);
 
 /* symlink.c */
 extern const struct inode_operations ext4_symlink_inode_operations;
 extern const struct inode_operations ext4_fast_symlink_inode_operations;
 
+/* block_validity */
+extern void ext4_release_system_zone(struct super_block *sb);
+extern int ext4_setup_system_zone(struct super_block *sb);
+extern int __init init_ext4_system_zone(void);
+extern void exit_ext4_system_zone(void);
+extern int ext4_data_block_valid(struct ext4_sb_info *sbi,
+				 ext4_fsblk_t start_blk,
+				 unsigned int count);
+
 /* extents.c */
 extern int ext4_ext_tree_init(handle_t *handle, struct inode *);
 extern int ext4_ext_writepage_trans_blocks(struct inode *, int);
@@ -1338,17 +1636,15 @@
 				       int chunk);
 extern int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
 			       ext4_lblk_t iblock, unsigned int max_blocks,
-			       struct buffer_head *bh_result,
-			       int create, int extend_disksize);
+			       struct buffer_head *bh_result, int flags);
 extern void ext4_ext_truncate(struct inode *);
 extern void ext4_ext_init(struct super_block *);
 extern void ext4_ext_release(struct super_block *);
 extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset,
 			  loff_t len);
-extern int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode,
-			sector_t block, unsigned int max_blocks,
-			struct buffer_head *bh, int create,
-			int extend_disksize, int flag);
+extern int ext4_get_blocks(handle_t *handle, struct inode *inode,
+			   sector_t block, unsigned int max_blocks,
+			   struct buffer_head *bh, int flags);
 extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 			__u64 start, __u64 len);
 
diff --git a/fs/ext4/ext4_i.h b/fs/ext4/ext4_i.h
deleted file mode 100644
index 4ce2187..0000000
--- a/fs/ext4/ext4_i.h
+++ /dev/null
@@ -1,140 +0,0 @@
-/*
- *  ext4_i.h
- *
- * Copyright (C) 1992, 1993, 1994, 1995
- * Remy Card (card@masi.ibp.fr)
- * Laboratoire MASI - Institut Blaise Pascal
- * Universite Pierre et Marie Curie (Paris VI)
- *
- *  from
- *
- *  linux/include/linux/minix_fs_i.h
- *
- *  Copyright (C) 1991, 1992  Linus Torvalds
- */
-
-#ifndef _EXT4_I
-#define _EXT4_I
-
-#include <linux/rwsem.h>
-#include <linux/rbtree.h>
-#include <linux/seqlock.h>
-#include <linux/mutex.h>
-
-/* data type for block offset of block group */
-typedef int ext4_grpblk_t;
-
-/* data type for filesystem-wide blocks number */
-typedef unsigned long long ext4_fsblk_t;
-
-/* data type for file logical block number */
-typedef __u32 ext4_lblk_t;
-
-/* data type for block group number */
-typedef unsigned int ext4_group_t;
-
-/*
- * storage for cached extent
- */
-struct ext4_ext_cache {
-	ext4_fsblk_t	ec_start;
-	ext4_lblk_t	ec_block;
-	__u32		ec_len; /* must be 32bit to return holes */
-	__u32		ec_type;
-};
-
-/*
- * fourth extended file system inode data in memory
- */
-struct ext4_inode_info {
-	__le32	i_data[15];	/* unconverted */
-	__u32	i_flags;
-	ext4_fsblk_t	i_file_acl;
-	__u32	i_dtime;
-
-	/*
-	 * i_block_group is the number of the block group which contains
-	 * this file's inode.  Constant across the lifetime of the inode,
-	 * it is ued for making block allocation decisions - we try to
-	 * place a file's data blocks near its inode block, and new inodes
-	 * near to their parent directory's inode.
-	 */
-	ext4_group_t	i_block_group;
-	__u32	i_state;		/* Dynamic state flags for ext4 */
-
-	ext4_lblk_t		i_dir_start_lookup;
-#ifdef CONFIG_EXT4_FS_XATTR
-	/*
-	 * Extended attributes can be read independently of the main file
-	 * data. Taking i_mutex even when reading would cause contention
-	 * between readers of EAs and writers of regular file data, so
-	 * instead we synchronize on xattr_sem when reading or changing
-	 * EAs.
-	 */
-	struct rw_semaphore xattr_sem;
-#endif
-#ifdef CONFIG_EXT4_FS_POSIX_ACL
-	struct posix_acl	*i_acl;
-	struct posix_acl	*i_default_acl;
-#endif
-
-	struct list_head i_orphan;	/* unlinked but open inodes */
-
-	/*
-	 * i_disksize keeps track of what the inode size is ON DISK, not
-	 * in memory.  During truncate, i_size is set to the new size by
-	 * the VFS prior to calling ext4_truncate(), but the filesystem won't
-	 * set i_disksize to 0 until the truncate is actually under way.
-	 *
-	 * The intent is that i_disksize always represents the blocks which
-	 * are used by this file.  This allows recovery to restart truncate
-	 * on orphans if we crash during truncate.  We actually write i_disksize
-	 * into the on-disk inode when writing inodes out, instead of i_size.
-	 *
-	 * The only time when i_disksize and i_size may be different is when
-	 * a truncate is in progress.  The only things which change i_disksize
-	 * are ext4_get_block (growth) and ext4_truncate (shrinkth).
-	 */
-	loff_t	i_disksize;
-
-	/*
-	 * i_data_sem is for serialising ext4_truncate() against
-	 * ext4_getblock().  In the 2.4 ext2 design, great chunks of inode's
-	 * data tree are chopped off during truncate. We can't do that in
-	 * ext4 because whenever we perform intermediate commits during
-	 * truncate, the inode and all the metadata blocks *must* be in a
-	 * consistent state which allows truncation of the orphans to restart
-	 * during recovery.  Hence we must fix the get_block-vs-truncate race
-	 * by other means, so we have i_data_sem.
-	 */
-	struct rw_semaphore i_data_sem;
-	struct inode vfs_inode;
-	struct jbd2_inode jinode;
-
-	struct ext4_ext_cache i_cached_extent;
-	/*
-	 * File creation time. Its function is same as that of
-	 * struct timespec i_{a,c,m}time in the generic inode.
-	 */
-	struct timespec i_crtime;
-
-	/* mballoc */
-	struct list_head i_prealloc_list;
-	spinlock_t i_prealloc_lock;
-
-	/* ialloc */
-	ext4_group_t	i_last_alloc_group;
-
-	/* allocation reservation info for delalloc */
-	unsigned int i_reserved_data_blocks;
-	unsigned int i_reserved_meta_blocks;
-	unsigned int i_allocated_meta_blocks;
-	unsigned short i_delalloc_reserved_flag;
-
-	/* on-disk additional length */
-	__u16 i_extra_isize;
-
-	spinlock_t i_block_reservation_lock;
-};
-
-#endif	/* _EXT4_I */
diff --git a/fs/ext4/ext4_sb.h b/fs/ext4/ext4_sb.h
deleted file mode 100644
index 57b71fe..0000000
--- a/fs/ext4/ext4_sb.h
+++ /dev/null
@@ -1,161 +0,0 @@
-/*
- *  ext4_sb.h
- *
- * Copyright (C) 1992, 1993, 1994, 1995
- * Remy Card (card@masi.ibp.fr)
- * Laboratoire MASI - Institut Blaise Pascal
- * Universite Pierre et Marie Curie (Paris VI)
- *
- *  from
- *
- *  linux/include/linux/minix_fs_sb.h
- *
- *  Copyright (C) 1991, 1992  Linus Torvalds
- */
-
-#ifndef _EXT4_SB
-#define _EXT4_SB
-
-#ifdef __KERNEL__
-#include <linux/timer.h>
-#include <linux/wait.h>
-#include <linux/blockgroup_lock.h>
-#include <linux/percpu_counter.h>
-#endif
-#include <linux/rbtree.h>
-
-/*
- * fourth extended-fs super-block data in memory
- */
-struct ext4_sb_info {
-	unsigned long s_desc_size;	/* Size of a group descriptor in bytes */
-	unsigned long s_inodes_per_block;/* Number of inodes per block */
-	unsigned long s_blocks_per_group;/* Number of blocks in a group */
-	unsigned long s_inodes_per_group;/* Number of inodes in a group */
-	unsigned long s_itb_per_group;	/* Number of inode table blocks per group */
-	unsigned long s_gdb_count;	/* Number of group descriptor blocks */
-	unsigned long s_desc_per_block;	/* Number of group descriptors per block */
-	ext4_group_t s_groups_count;	/* Number of groups in the fs */
-	unsigned long s_overhead_last;  /* Last calculated overhead */
-	unsigned long s_blocks_last;    /* Last seen block count */
-	loff_t s_bitmap_maxbytes;	/* max bytes for bitmap files */
-	struct buffer_head * s_sbh;	/* Buffer containing the super block */
-	struct ext4_super_block *s_es;	/* Pointer to the super block in the buffer */
-	struct buffer_head **s_group_desc;
-	unsigned long  s_mount_opt;
-	ext4_fsblk_t s_sb_block;
-	uid_t s_resuid;
-	gid_t s_resgid;
-	unsigned short s_mount_state;
-	unsigned short s_pad;
-	int s_addr_per_block_bits;
-	int s_desc_per_block_bits;
-	int s_inode_size;
-	int s_first_ino;
-	unsigned int s_inode_readahead_blks;
-	spinlock_t s_next_gen_lock;
-	u32 s_next_generation;
-	u32 s_hash_seed[4];
-	int s_def_hash_version;
-	int s_hash_unsigned;	/* 3 if hash should be signed, 0 if not */
-	struct percpu_counter s_freeblocks_counter;
-	struct percpu_counter s_freeinodes_counter;
-	struct percpu_counter s_dirs_counter;
-	struct percpu_counter s_dirtyblocks_counter;
-	struct blockgroup_lock *s_blockgroup_lock;
-	struct proc_dir_entry *s_proc;
-	struct kobject s_kobj;
-	struct completion s_kobj_unregister;
-
-	/* Journaling */
-	struct inode *s_journal_inode;
-	struct journal_s *s_journal;
-	struct list_head s_orphan;
-	unsigned long s_commit_interval;
-	u32 s_max_batch_time;
-	u32 s_min_batch_time;
-	struct block_device *journal_bdev;
-#ifdef CONFIG_JBD2_DEBUG
-	struct timer_list turn_ro_timer;	/* For turning read-only (crash simulation) */
-	wait_queue_head_t ro_wait_queue;	/* For people waiting for the fs to go read-only */
-#endif
-#ifdef CONFIG_QUOTA
-	char *s_qf_names[MAXQUOTAS];		/* Names of quota files with journalled quota */
-	int s_jquota_fmt;			/* Format of quota to use */
-#endif
-	unsigned int s_want_extra_isize; /* New inodes should reserve # bytes */
-
-#ifdef EXTENTS_STATS
-	/* ext4 extents stats */
-	unsigned long s_ext_min;
-	unsigned long s_ext_max;
-	unsigned long s_depth_max;
-	spinlock_t s_ext_stats_lock;
-	unsigned long s_ext_blocks;
-	unsigned long s_ext_extents;
-#endif
-
-	/* for buddy allocator */
-	struct ext4_group_info ***s_group_info;
-	struct inode *s_buddy_cache;
-	long s_blocks_reserved;
-	spinlock_t s_reserve_lock;
-	spinlock_t s_md_lock;
-	tid_t s_last_transaction;
-	unsigned short *s_mb_offsets;
-	unsigned int *s_mb_maxs;
-
-	/* tunables */
-	unsigned long s_stripe;
-	unsigned int s_mb_stream_request;
-	unsigned int s_mb_max_to_scan;
-	unsigned int s_mb_min_to_scan;
-	unsigned int s_mb_stats;
-	unsigned int s_mb_order2_reqs;
-	unsigned int s_mb_group_prealloc;
-	/* where last allocation was done - for stream allocation */
-	unsigned long s_mb_last_group;
-	unsigned long s_mb_last_start;
-
-	/* history to debug policy */
-	struct ext4_mb_history *s_mb_history;
-	int s_mb_history_cur;
-	int s_mb_history_max;
-	int s_mb_history_num;
-	spinlock_t s_mb_history_lock;
-	int s_mb_history_filter;
-
-	/* stats for buddy allocator */
-	spinlock_t s_mb_pa_lock;
-	atomic_t s_bal_reqs;	/* number of reqs with len > 1 */
-	atomic_t s_bal_success;	/* we found long enough chunks */
-	atomic_t s_bal_allocated;	/* in blocks */
-	atomic_t s_bal_ex_scanned;	/* total extents scanned */
-	atomic_t s_bal_goals;	/* goal hits */
-	atomic_t s_bal_breaks;	/* too long searches */
-	atomic_t s_bal_2orders;	/* 2^order hits */
-	spinlock_t s_bal_lock;
-	unsigned long s_mb_buddies_generated;
-	unsigned long long s_mb_generation_time;
-	atomic_t s_mb_lost_chunks;
-	atomic_t s_mb_preallocated;
-	atomic_t s_mb_discarded;
-
-	/* locality groups */
-	struct ext4_locality_group *s_locality_groups;
-
-	/* for write statistics */
-	unsigned long s_sectors_written_start;
-	u64 s_kbytes_written;
-
-	unsigned int s_log_groups_per_flex;
-	struct flex_groups *s_flex_groups;
-};
-
-static inline spinlock_t *
-sb_bgl_lock(struct ext4_sb_info *sbi, unsigned int block_group)
-{
-	return bgl_lock_ptr(sbi->s_blockgroup_lock, block_group);
-}
-
-#endif	/* _EXT4_SB */
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index e3a55eb..2593f74 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -326,32 +326,18 @@
 
 static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext)
 {
-	ext4_fsblk_t block = ext_pblock(ext), valid_block;
+	ext4_fsblk_t block = ext_pblock(ext);
 	int len = ext4_ext_get_actual_len(ext);
-	struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
 
-	valid_block = le32_to_cpu(es->s_first_data_block) +
-		EXT4_SB(inode->i_sb)->s_gdb_count;
-	if (unlikely(block <= valid_block ||
-		     ((block + len) > ext4_blocks_count(es))))
-		return 0;
-	else
-		return 1;
+	return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len);
 }
 
 static int ext4_valid_extent_idx(struct inode *inode,
 				struct ext4_extent_idx *ext_idx)
 {
-	ext4_fsblk_t block = idx_pblock(ext_idx), valid_block;
-	struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
+	ext4_fsblk_t block = idx_pblock(ext_idx);
 
-	valid_block = le32_to_cpu(es->s_first_data_block) +
-		EXT4_SB(inode->i_sb)->s_gdb_count;
-	if (unlikely(block <= valid_block ||
-		     (block >= ext4_blocks_count(es))))
-		return 0;
-	else
-		return 1;
+	return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, 1);
 }
 
 static int ext4_valid_extent_entries(struct inode *inode,
@@ -2097,12 +2083,16 @@
 	ex = EXT_LAST_EXTENT(eh);
 
 	ex_ee_block = le32_to_cpu(ex->ee_block);
-	if (ext4_ext_is_uninitialized(ex))
-		uninitialized = 1;
 	ex_ee_len = ext4_ext_get_actual_len(ex);
 
 	while (ex >= EXT_FIRST_EXTENT(eh) &&
 			ex_ee_block + ex_ee_len > start) {
+
+		if (ext4_ext_is_uninitialized(ex))
+			uninitialized = 1;
+		else
+			uninitialized = 0;
+
 		ext_debug("remove ext %lu:%u\n", ex_ee_block, ex_ee_len);
 		path[depth].p_ext = ex;
 
@@ -2784,7 +2774,7 @@
 int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
 			ext4_lblk_t iblock,
 			unsigned int max_blocks, struct buffer_head *bh_result,
-			int create, int extend_disksize)
+			int flags)
 {
 	struct ext4_ext_path *path = NULL;
 	struct ext4_extent_header *eh;
@@ -2793,7 +2783,6 @@
 	int err = 0, depth, ret, cache_type;
 	unsigned int allocated = 0;
 	struct ext4_allocation_request ar;
-	loff_t disksize;
 
 	__clear_bit(BH_New, &bh_result->b_state);
 	ext_debug("blocks %u/%u requested for inode %u\n",
@@ -2803,7 +2792,7 @@
 	cache_type = ext4_ext_in_cache(inode, iblock, &newex);
 	if (cache_type) {
 		if (cache_type == EXT4_EXT_CACHE_GAP) {
-			if (!create) {
+			if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
 				/*
 				 * block isn't allocated yet and
 				 * user doesn't want to allocate it
@@ -2869,9 +2858,11 @@
 							EXT4_EXT_CACHE_EXTENT);
 				goto out;
 			}
-			if (create == EXT4_CREATE_UNINITIALIZED_EXT)
+			if (flags & EXT4_GET_BLOCKS_UNINIT_EXT)
 				goto out;
-			if (!create) {
+			if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
+				if (allocated > max_blocks)
+					allocated = max_blocks;
 				/*
 				 * We have blocks reserved already.  We
 				 * return allocated blocks so that delalloc
@@ -2879,8 +2870,6 @@
 				 * the buffer head will be unmapped so that
 				 * a read from the block returns 0s.
 				 */
-				if (allocated > max_blocks)
-					allocated = max_blocks;
 				set_buffer_unwritten(bh_result);
 				bh_result->b_bdev = inode->i_sb->s_bdev;
 				bh_result->b_blocknr = newblock;
@@ -2903,7 +2892,7 @@
 	 * requested block isn't allocated yet;
 	 * we couldn't try to create block if create flag is zero
 	 */
-	if (!create) {
+	if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
 		/*
 		 * put just found gap into cache to speed up
 		 * subsequent requests
@@ -2932,10 +2921,10 @@
 	 * EXT_UNINIT_MAX_LEN.
 	 */
 	if (max_blocks > EXT_INIT_MAX_LEN &&
-	    create != EXT4_CREATE_UNINITIALIZED_EXT)
+	    !(flags & EXT4_GET_BLOCKS_UNINIT_EXT))
 		max_blocks = EXT_INIT_MAX_LEN;
 	else if (max_blocks > EXT_UNINIT_MAX_LEN &&
-		 create == EXT4_CREATE_UNINITIALIZED_EXT)
+		 (flags & EXT4_GET_BLOCKS_UNINIT_EXT))
 		max_blocks = EXT_UNINIT_MAX_LEN;
 
 	/* Check if we can really insert (iblock)::(iblock+max_blocks) extent */
@@ -2966,7 +2955,7 @@
 	/* try to insert new extent into found leaf and return */
 	ext4_ext_store_pblock(&newex, newblock);
 	newex.ee_len = cpu_to_le16(ar.len);
-	if (create == EXT4_CREATE_UNINITIALIZED_EXT)  /* Mark uninitialized */
+	if (flags & EXT4_GET_BLOCKS_UNINIT_EXT)  /* Mark uninitialized */
 		ext4_ext_mark_uninitialized(&newex);
 	err = ext4_ext_insert_extent(handle, inode, path, &newex);
 	if (err) {
@@ -2983,18 +2972,10 @@
 	newblock = ext_pblock(&newex);
 	allocated = ext4_ext_get_actual_len(&newex);
 outnew:
-	if (extend_disksize) {
-		disksize = ((loff_t) iblock + ar.len) << inode->i_blkbits;
-		if (disksize > i_size_read(inode))
-			disksize = i_size_read(inode);
-		if (disksize > EXT4_I(inode)->i_disksize)
-			EXT4_I(inode)->i_disksize = disksize;
-	}
-
 	set_buffer_new(bh_result);
 
 	/* Cache only when it is _not_ an uninitialized extent */
-	if (create != EXT4_CREATE_UNINITIALIZED_EXT)
+	if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0)
 		ext4_ext_put_in_cache(inode, iblock, allocated, newblock,
 						EXT4_EXT_CACHE_EXTENT);
 out:
@@ -3150,9 +3131,10 @@
 			ret = PTR_ERR(handle);
 			break;
 		}
-		ret = ext4_get_blocks_wrap(handle, inode, block,
-					  max_blocks, &map_bh,
-					  EXT4_CREATE_UNINITIALIZED_EXT, 0, 0);
+		map_bh.b_state = 0;
+		ret = ext4_get_blocks(handle, inode, block,
+				      max_blocks, &map_bh,
+				      EXT4_GET_BLOCKS_CREATE_UNINIT_EXT);
 		if (ret <= 0) {
 #ifdef EXT4FS_DEBUG
 			WARN_ON(ret <= 0);
@@ -3195,7 +3177,7 @@
 		       void *data)
 {
 	struct fiemap_extent_info *fieinfo = data;
-	unsigned long blksize_bits = inode->i_sb->s_blocksize_bits;
+	unsigned char blksize_bits = inode->i_sb->s_blocksize_bits;
 	__u64	logical;
 	__u64	physical;
 	__u64	length;
@@ -3242,9 +3224,16 @@
 	 *
 	 * XXX this might miss a single-block extent at EXT_MAX_BLOCK
 	 */
-	if (logical + length - 1 == EXT_MAX_BLOCK ||
-	    ext4_ext_next_allocated_block(path) == EXT_MAX_BLOCK)
+	if (ext4_ext_next_allocated_block(path) == EXT_MAX_BLOCK ||
+	    newex->ec_block + newex->ec_len - 1 == EXT_MAX_BLOCK) {
+		loff_t size = i_size_read(inode);
+		loff_t bs = EXT4_BLOCK_SIZE(inode->i_sb);
+
 		flags |= FIEMAP_EXTENT_LAST;
+		if ((flags & FIEMAP_EXTENT_DELALLOC) &&
+		    logical+length > size)
+			length = (size - logical + bs - 1) & ~(bs-1);
+	}
 
 	error = fiemap_fill_next_extent(fieinfo, logical, physical,
 					length, flags);
@@ -3318,10 +3307,10 @@
 		 * Walk the extent tree gathering extent information.
 		 * ext4_ext_fiemap_cb will push extents back to user.
 		 */
-		down_write(&EXT4_I(inode)->i_data_sem);
+		down_read(&EXT4_I(inode)->i_data_sem);
 		error = ext4_ext_walk_space(inode, start_blk, len_blks,
 					  ext4_ext_fiemap_cb, fieinfo);
-		up_write(&EXT4_I(inode)->i_data_sem);
+		up_read(&EXT4_I(inode)->i_data_sem);
 	}
 
 	return error;
diff --git a/fs/ext4/group.h b/fs/ext4/group.h
deleted file mode 100644
index c2c0a8d..0000000
--- a/fs/ext4/group.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- *  linux/fs/ext4/group.h
- *
- * Copyright (C) 2007 Cluster File Systems, Inc
- *
- * Author: Andreas Dilger <adilger@clusterfs.com>
- */
-
-#ifndef _LINUX_EXT4_GROUP_H
-#define _LINUX_EXT4_GROUP_H
-
-extern __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 group,
-				   struct ext4_group_desc *gdp);
-extern int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 group,
-				       struct ext4_group_desc *gdp);
-struct buffer_head *ext4_read_block_bitmap(struct super_block *sb,
-				      ext4_group_t block_group);
-extern unsigned ext4_init_block_bitmap(struct super_block *sb,
-				       struct buffer_head *bh,
-				       ext4_group_t group,
-				       struct ext4_group_desc *desc);
-#define ext4_free_blocks_after_init(sb, group, desc)			\
-		ext4_init_block_bitmap(sb, NULL, group, desc)
-extern unsigned ext4_init_inode_bitmap(struct super_block *sb,
-				       struct buffer_head *bh,
-				       ext4_group_t group,
-				       struct ext4_group_desc *desc);
-extern void mark_bitmap_end(int start_bit, int end_bit, char *bitmap);
-#endif /* _LINUX_EXT4_GROUP_H */
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index f18e0a0..3743bd8 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -27,7 +27,6 @@
 #include "ext4_jbd2.h"
 #include "xattr.h"
 #include "acl.h"
-#include "group.h"
 
 /*
  * ialloc.c contains the inodes allocation and deallocation routines
@@ -123,16 +122,16 @@
 		unlock_buffer(bh);
 		return bh;
 	}
-	spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group));
+	ext4_lock_group(sb, block_group);
 	if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
 		ext4_init_inode_bitmap(sb, bh, block_group, desc);
 		set_bitmap_uptodate(bh);
 		set_buffer_uptodate(bh);
-		spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group));
+		ext4_unlock_group(sb, block_group);
 		unlock_buffer(bh);
 		return bh;
 	}
-	spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group));
+	ext4_unlock_group(sb, block_group);
 	if (buffer_uptodate(bh)) {
 		/*
 		 * if not uninit if bh is uptodate,
@@ -247,9 +246,8 @@
 		goto error_return;
 
 	/* Ok, now we can actually update the inode bitmaps.. */
-	spin_lock(sb_bgl_lock(sbi, block_group));
-	cleared = ext4_clear_bit(bit, bitmap_bh->b_data);
-	spin_unlock(sb_bgl_lock(sbi, block_group));
+	cleared = ext4_clear_bit_atomic(ext4_group_lock_ptr(sb, block_group),
+					bit, bitmap_bh->b_data);
 	if (!cleared)
 		ext4_error(sb, "ext4_free_inode",
 			   "bit already cleared for inode %lu", ino);
@@ -261,7 +259,7 @@
 		if (fatal) goto error_return;
 
 		if (gdp) {
-			spin_lock(sb_bgl_lock(sbi, block_group));
+			ext4_lock_group(sb, block_group);
 			count = ext4_free_inodes_count(sb, gdp) + 1;
 			ext4_free_inodes_set(sb, gdp, count);
 			if (is_directory) {
@@ -277,7 +275,7 @@
 			}
 			gdp->bg_checksum = ext4_group_desc_csum(sbi,
 							block_group, gdp);
-			spin_unlock(sb_bgl_lock(sbi, block_group));
+			ext4_unlock_group(sb, block_group);
 			percpu_counter_inc(&sbi->s_freeinodes_counter);
 			if (is_directory)
 				percpu_counter_dec(&sbi->s_dirs_counter);
@@ -316,7 +314,7 @@
 static int find_group_dir(struct super_block *sb, struct inode *parent,
 				ext4_group_t *best_group)
 {
-	ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
+	ext4_group_t ngroups = ext4_get_groups_count(sb);
 	unsigned int freei, avefreei;
 	struct ext4_group_desc *desc, *best_desc = NULL;
 	ext4_group_t group;
@@ -349,11 +347,10 @@
 {
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
 	struct ext4_group_desc *desc;
-	struct buffer_head *bh;
 	struct flex_groups *flex_group = sbi->s_flex_groups;
 	ext4_group_t parent_group = EXT4_I(parent)->i_block_group;
 	ext4_group_t parent_fbg_group = ext4_flex_group(sbi, parent_group);
-	ext4_group_t ngroups = sbi->s_groups_count;
+	ext4_group_t ngroups = ext4_get_groups_count(sb);
 	int flex_size = ext4_flex_bg_size(sbi);
 	ext4_group_t best_flex = parent_fbg_group;
 	int blocks_per_flex = sbi->s_blocks_per_group * flex_size;
@@ -362,7 +359,7 @@
 	ext4_group_t n_fbg_groups;
 	ext4_group_t i;
 
-	n_fbg_groups = (sbi->s_groups_count + flex_size - 1) >>
+	n_fbg_groups = (ngroups + flex_size - 1) >>
 		sbi->s_log_groups_per_flex;
 
 find_close_to_parent:
@@ -404,7 +401,7 @@
 found_flexbg:
 	for (i = best_flex * flex_size; i < ngroups &&
 		     i < (best_flex + 1) * flex_size; i++) {
-		desc = ext4_get_group_desc(sb, i, &bh);
+		desc = ext4_get_group_desc(sb, i, NULL);
 		if (ext4_free_inodes_count(sb, desc)) {
 			*best_group = i;
 			goto out;
@@ -478,20 +475,21 @@
 {
 	ext4_group_t parent_group = EXT4_I(parent)->i_block_group;
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
-	ext4_group_t ngroups = sbi->s_groups_count;
+	ext4_group_t real_ngroups = ext4_get_groups_count(sb);
 	int inodes_per_group = EXT4_INODES_PER_GROUP(sb);
 	unsigned int freei, avefreei;
 	ext4_fsblk_t freeb, avefreeb;
 	unsigned int ndirs;
 	int max_dirs, min_inodes;
 	ext4_grpblk_t min_blocks;
-	ext4_group_t i, grp, g;
+	ext4_group_t i, grp, g, ngroups;
 	struct ext4_group_desc *desc;
 	struct orlov_stats stats;
 	int flex_size = ext4_flex_bg_size(sbi);
 
+	ngroups = real_ngroups;
 	if (flex_size > 1) {
-		ngroups = (ngroups + flex_size - 1) >>
+		ngroups = (real_ngroups + flex_size - 1) >>
 			sbi->s_log_groups_per_flex;
 		parent_group >>= sbi->s_log_groups_per_flex;
 	}
@@ -543,7 +541,7 @@
 		 */
 		grp *= flex_size;
 		for (i = 0; i < flex_size; i++) {
-			if (grp+i >= sbi->s_groups_count)
+			if (grp+i >= real_ngroups)
 				break;
 			desc = ext4_get_group_desc(sb, grp+i, NULL);
 			if (desc && ext4_free_inodes_count(sb, desc)) {
@@ -583,7 +581,7 @@
 	}
 
 fallback:
-	ngroups = sbi->s_groups_count;
+	ngroups = real_ngroups;
 	avefreei = freei / ngroups;
 fallback_retry:
 	parent_group = EXT4_I(parent)->i_block_group;
@@ -613,9 +611,8 @@
 			    ext4_group_t *group, int mode)
 {
 	ext4_group_t parent_group = EXT4_I(parent)->i_block_group;
-	ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
+	ext4_group_t i, last, ngroups = ext4_get_groups_count(sb);
 	struct ext4_group_desc *desc;
-	ext4_group_t i, last;
 	int flex_size = ext4_flex_bg_size(EXT4_SB(sb));
 
 	/*
@@ -708,10 +705,10 @@
 
 /*
  * claim the inode from the inode bitmap. If the group
- * is uninit we need to take the groups's sb_bgl_lock
+ * is uninit we need to take the groups's ext4_group_lock
  * and clear the uninit flag. The inode bitmap update
  * and group desc uninit flag clear should be done
- * after holding sb_bgl_lock so that ext4_read_inode_bitmap
+ * after holding ext4_group_lock so that ext4_read_inode_bitmap
  * doesn't race with the ext4_claim_inode
  */
 static int ext4_claim_inode(struct super_block *sb,
@@ -722,7 +719,7 @@
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
 	struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL);
 
-	spin_lock(sb_bgl_lock(sbi, group));
+	ext4_lock_group(sb, group);
 	if (ext4_set_bit(ino, inode_bitmap_bh->b_data)) {
 		/* not a free inode */
 		retval = 1;
@@ -731,7 +728,7 @@
 	ino++;
 	if ((group == 0 && ino < EXT4_FIRST_INO(sb)) ||
 			ino > EXT4_INODES_PER_GROUP(sb)) {
-		spin_unlock(sb_bgl_lock(sbi, group));
+		ext4_unlock_group(sb, group);
 		ext4_error(sb, __func__,
 			   "reserved inode or inode > inodes count - "
 			   "block_group = %u, inode=%lu", group,
@@ -780,7 +777,7 @@
 	}
 	gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
 err_ret:
-	spin_unlock(sb_bgl_lock(sbi, group));
+	ext4_unlock_group(sb, group);
 	return retval;
 }
 
@@ -799,11 +796,10 @@
 	struct super_block *sb;
 	struct buffer_head *inode_bitmap_bh = NULL;
 	struct buffer_head *group_desc_bh;
-	ext4_group_t group = 0;
+	ext4_group_t ngroups, group = 0;
 	unsigned long ino = 0;
 	struct inode *inode;
 	struct ext4_group_desc *gdp = NULL;
-	struct ext4_super_block *es;
 	struct ext4_inode_info *ei;
 	struct ext4_sb_info *sbi;
 	int ret2, err = 0;
@@ -818,15 +814,14 @@
 		return ERR_PTR(-EPERM);
 
 	sb = dir->i_sb;
+	ngroups = ext4_get_groups_count(sb);
 	trace_mark(ext4_request_inode, "dev %s dir %lu mode %d", sb->s_id,
 		   dir->i_ino, mode);
 	inode = new_inode(sb);
 	if (!inode)
 		return ERR_PTR(-ENOMEM);
 	ei = EXT4_I(inode);
-
 	sbi = EXT4_SB(sb);
-	es = sbi->s_es;
 
 	if (sbi->s_log_groups_per_flex && test_opt(sb, OLDALLOC)) {
 		ret2 = find_group_flex(sb, dir, &group);
@@ -856,7 +851,7 @@
 	if (ret2 == -1)
 		goto out;
 
-	for (i = 0; i < sbi->s_groups_count; i++) {
+	for (i = 0; i < ngroups; i++) {
 		err = -EIO;
 
 		gdp = ext4_get_group_desc(sb, group, &group_desc_bh);
@@ -917,7 +912,7 @@
 		 * group descriptor metadata has not yet been updated.
 		 * So we just go onto the next blockgroup.
 		 */
-		if (++group == sbi->s_groups_count)
+		if (++group == ngroups)
 			group = 0;
 	}
 	err = -ENOSPC;
@@ -938,7 +933,7 @@
 		}
 
 		free = 0;
-		spin_lock(sb_bgl_lock(sbi, group));
+		ext4_lock_group(sb, group);
 		/* recheck and clear flag under lock if we still need to */
 		if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
 			free = ext4_free_blocks_after_init(sb, group, gdp);
@@ -947,7 +942,7 @@
 			gdp->bg_checksum = ext4_group_desc_csum(sbi, group,
 								gdp);
 		}
-		spin_unlock(sb_bgl_lock(sbi, group));
+		ext4_unlock_group(sb, group);
 
 		/* Don't need to dirty bitmap block if we didn't change it */
 		if (free) {
@@ -1158,7 +1153,7 @@
 {
 	unsigned long desc_count;
 	struct ext4_group_desc *gdp;
-	ext4_group_t i;
+	ext4_group_t i, ngroups = ext4_get_groups_count(sb);
 #ifdef EXT4FS_DEBUG
 	struct ext4_super_block *es;
 	unsigned long bitmap_count, x;
@@ -1168,7 +1163,7 @@
 	desc_count = 0;
 	bitmap_count = 0;
 	gdp = NULL;
-	for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) {
+	for (i = 0; i < ngroups; i++) {
 		gdp = ext4_get_group_desc(sb, i, NULL);
 		if (!gdp)
 			continue;
@@ -1190,7 +1185,7 @@
 	return desc_count;
 #else
 	desc_count = 0;
-	for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) {
+	for (i = 0; i < ngroups; i++) {
 		gdp = ext4_get_group_desc(sb, i, NULL);
 		if (!gdp)
 			continue;
@@ -1205,9 +1200,9 @@
 unsigned long ext4_count_dirs(struct super_block * sb)
 {
 	unsigned long count = 0;
-	ext4_group_t i;
+	ext4_group_t i, ngroups = ext4_get_groups_count(sb);
 
-	for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) {
+	for (i = 0; i < ngroups; i++) {
 		struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL);
 		if (!gdp)
 			continue;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 2a9ffd5..875db94 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -372,20 +372,21 @@
 }
 
 static int __ext4_check_blockref(const char *function, struct inode *inode,
-				 __le32 *p, unsigned int max) {
-
-	unsigned int maxblocks = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es);
+				 __le32 *p, unsigned int max)
+{
 	__le32 *bref = p;
+	unsigned int blk;
+
 	while (bref < p+max) {
-		if (unlikely(le32_to_cpu(*bref) >= maxblocks)) {
+		blk = le32_to_cpu(*bref++);
+		if (blk && 
+		    unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb), 
+						    blk, 1))) {
 			ext4_error(inode->i_sb, function,
-				   "block reference %u >= max (%u) "
-				   "in inode #%lu, offset=%d",
-				   le32_to_cpu(*bref), maxblocks,
-				   inode->i_ino, (int)(bref-p));
+				   "invalid block reference %u "
+				   "in inode #%lu", blk, inode->i_ino);
  			return -EIO;
  		}
-		bref++;
  	}
  	return 0;
 }
@@ -892,6 +893,10 @@
 }
 
 /*
+ * The ext4_ind_get_blocks() function handles non-extents inodes
+ * (i.e., using the traditional indirect/double-indirect i_blocks
+ * scheme) for ext4_get_blocks().
+ *
  * Allocation strategy is simple: if we have to allocate something, we will
  * have to go the whole way to leaf. So let's do it before attaching anything
  * to tree, set linkage between the newborn blocks, write them if sync is
@@ -909,15 +914,16 @@
  * return = 0, if plain lookup failed.
  * return < 0, error case.
  *
- *
- * Need to be called with
- * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system block
- * (ie, create is zero). Otherwise down_write(&EXT4_I(inode)->i_data_sem)
+ * The ext4_ind_get_blocks() function should be called with
+ * down_write(&EXT4_I(inode)->i_data_sem) if allocating filesystem
+ * blocks (i.e., flags has EXT4_GET_BLOCKS_CREATE set) or
+ * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system
+ * blocks.
  */
-static int ext4_get_blocks_handle(handle_t *handle, struct inode *inode,
+static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode,
 				  ext4_lblk_t iblock, unsigned int maxblocks,
 				  struct buffer_head *bh_result,
-				  int create, int extend_disksize)
+				  int flags)
 {
 	int err = -EIO;
 	ext4_lblk_t offsets[4];
@@ -927,14 +933,11 @@
 	int indirect_blks;
 	int blocks_to_boundary = 0;
 	int depth;
-	struct ext4_inode_info *ei = EXT4_I(inode);
 	int count = 0;
 	ext4_fsblk_t first_block = 0;
-	loff_t disksize;
-
 
 	J_ASSERT(!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL));
-	J_ASSERT(handle != NULL || create == 0);
+	J_ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0);
 	depth = ext4_block_to_path(inode, iblock, offsets,
 					&blocks_to_boundary);
 
@@ -963,7 +966,7 @@
 	}
 
 	/* Next simple case - plain lookup or failed read of indirect block */
-	if (!create || err == -EIO)
+	if ((flags & EXT4_GET_BLOCKS_CREATE) == 0 || err == -EIO)
 		goto cleanup;
 
 	/*
@@ -997,19 +1000,7 @@
 	if (!err)
 		err = ext4_splice_branch(handle, inode, iblock,
 					partial, indirect_blks, count);
-	/*
-	 * i_disksize growing is protected by i_data_sem.  Don't forget to
-	 * protect it if you're about to implement concurrent
-	 * ext4_get_block() -bzzz
-	*/
-	if (!err && extend_disksize) {
-		disksize = ((loff_t) iblock + count) << inode->i_blkbits;
-		if (disksize > i_size_read(inode))
-			disksize = i_size_read(inode);
-		if (disksize > ei->i_disksize)
-			ei->i_disksize = disksize;
-	}
-	if (err)
+	else 
 		goto cleanup;
 
 	set_buffer_new(bh_result);
@@ -1120,8 +1111,23 @@
 		ext4_discard_preallocations(inode);
 }
 
+static int check_block_validity(struct inode *inode, sector_t logical,
+				sector_t phys, int len)
+{
+	if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), phys, len)) {
+		ext4_error(inode->i_sb, "check_block_validity",
+			   "inode #%lu logical block %llu mapped to %llu "
+			   "(size %d)", inode->i_ino,
+			   (unsigned long long) logical,
+			   (unsigned long long) phys, len);
+		WARN_ON(1);
+		return -EIO;
+	}
+	return 0;
+}
+
 /*
- * The ext4_get_blocks_wrap() function try to look up the requested blocks,
+ * The ext4_get_blocks() function tries to look up the requested blocks,
  * and returns if the blocks are already mapped.
  *
  * Otherwise it takes the write lock of the i_data_sem and allocate blocks
@@ -1129,7 +1135,7 @@
  * mapped.
  *
  * If file type is extents based, it will call ext4_ext_get_blocks(),
- * Otherwise, call with ext4_get_blocks_handle() to handle indirect mapping
+ * Otherwise, call with ext4_ind_get_blocks() to handle indirect mapping
  * based files
  *
  * On success, it returns the number of blocks being mapped or allocate.
@@ -1142,9 +1148,9 @@
  *
  * It returns the error in case of allocation failure.
  */
-int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
-			unsigned int max_blocks, struct buffer_head *bh,
-			int create, int extend_disksize, int flag)
+int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block,
+		    unsigned int max_blocks, struct buffer_head *bh,
+		    int flags)
 {
 	int retval;
 
@@ -1152,21 +1158,28 @@
 	clear_buffer_unwritten(bh);
 
 	/*
-	 * Try to see if we can get  the block without requesting
-	 * for new file system block.
+	 * Try to see if we can get the block without requesting a new
+	 * file system block.
 	 */
 	down_read((&EXT4_I(inode)->i_data_sem));
 	if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) {
 		retval =  ext4_ext_get_blocks(handle, inode, block, max_blocks,
-				bh, 0, 0);
+				bh, 0);
 	} else {
-		retval = ext4_get_blocks_handle(handle,
-				inode, block, max_blocks, bh, 0, 0);
+		retval = ext4_ind_get_blocks(handle, inode, block, max_blocks,
+					     bh, 0);
 	}
 	up_read((&EXT4_I(inode)->i_data_sem));
 
+	if (retval > 0 && buffer_mapped(bh)) {
+		int ret = check_block_validity(inode, block, 
+					       bh->b_blocknr, retval);
+		if (ret != 0)
+			return ret;
+	}
+
 	/* If it is only a block(s) look up */
-	if (!create)
+	if ((flags & EXT4_GET_BLOCKS_CREATE) == 0)
 		return retval;
 
 	/*
@@ -1205,7 +1218,7 @@
 	 * let the underlying get_block() function know to
 	 * avoid double accounting
 	 */
-	if (flag)
+	if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
 		EXT4_I(inode)->i_delalloc_reserved_flag = 1;
 	/*
 	 * We need to check for EXT4 here because migrate
@@ -1213,10 +1226,10 @@
 	 */
 	if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) {
 		retval =  ext4_ext_get_blocks(handle, inode, block, max_blocks,
-				bh, create, extend_disksize);
+					      bh, flags);
 	} else {
-		retval = ext4_get_blocks_handle(handle, inode, block,
-				max_blocks, bh, create, extend_disksize);
+		retval = ext4_ind_get_blocks(handle, inode, block,
+					     max_blocks, bh, flags);
 
 		if (retval > 0 && buffer_new(bh)) {
 			/*
@@ -1229,18 +1242,23 @@
 		}
 	}
 
-	if (flag) {
+	if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
 		EXT4_I(inode)->i_delalloc_reserved_flag = 0;
-		/*
-		 * Update reserved blocks/metadata blocks
-		 * after successful block allocation
-		 * which were deferred till now
-		 */
-		if ((retval > 0) && buffer_delay(bh))
-			ext4_da_update_reserve_space(inode, retval);
-	}
+
+	/*
+	 * Update reserved blocks/metadata blocks after successful
+	 * block allocation which had been deferred till now.
+	 */
+	if ((retval > 0) && (flags & EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE))
+		ext4_da_update_reserve_space(inode, retval);
 
 	up_write((&EXT4_I(inode)->i_data_sem));
+	if (retval > 0 && buffer_mapped(bh)) {
+		int ret = check_block_validity(inode, block, 
+					       bh->b_blocknr, retval);
+		if (ret != 0)
+			return ret;
+	}
 	return retval;
 }
 
@@ -1268,8 +1286,8 @@
 		started = 1;
 	}
 
-	ret = ext4_get_blocks_wrap(handle, inode, iblock,
-					max_blocks, bh_result, create, 0, 0);
+	ret = ext4_get_blocks(handle, inode, iblock, max_blocks, bh_result,
+			      create ? EXT4_GET_BLOCKS_CREATE : 0);
 	if (ret > 0) {
 		bh_result->b_size = (ret << inode->i_blkbits);
 		ret = 0;
@@ -1288,17 +1306,19 @@
 {
 	struct buffer_head dummy;
 	int fatal = 0, err;
+	int flags = 0;
 
 	J_ASSERT(handle != NULL || create == 0);
 
 	dummy.b_state = 0;
 	dummy.b_blocknr = -1000;
 	buffer_trace_init(&dummy.b_history);
-	err = ext4_get_blocks_wrap(handle, inode, block, 1,
-					&dummy, create, 1, 0);
+	if (create)
+		flags |= EXT4_GET_BLOCKS_CREATE;
+	err = ext4_get_blocks(handle, inode, block, 1, &dummy, flags);
 	/*
-	 * ext4_get_blocks_handle() returns number of blocks
-	 * mapped. 0 in case of a HOLE.
+	 * ext4_get_blocks() returns number of blocks mapped. 0 in
+	 * case of a HOLE.
 	 */
 	if (err > 0) {
 		if (err > 1)
@@ -1439,7 +1459,7 @@
 				struct page **pagep, void **fsdata)
 {
 	struct inode *inode = mapping->host;
-	int ret, needed_blocks = ext4_writepage_trans_blocks(inode);
+	int ret, needed_blocks;
 	handle_t *handle;
 	int retries = 0;
 	struct page *page;
@@ -1450,6 +1470,11 @@
 		   "dev %s ino %lu pos %llu len %u flags %u",
 		   inode->i_sb->s_id, inode->i_ino,
 		   (unsigned long long) pos, len, flags);
+	/*
+	 * Reserve one block more for addition to orphan list in case
+	 * we allocate blocks but write fails for some reason
+	 */
+	needed_blocks = ext4_writepage_trans_blocks(inode) + 1;
  	index = pos >> PAGE_CACHE_SHIFT;
 	from = pos & (PAGE_CACHE_SIZE - 1);
 	to = from + len;
@@ -1483,15 +1508,30 @@
 
 	if (ret) {
 		unlock_page(page);
-		ext4_journal_stop(handle);
 		page_cache_release(page);
 		/*
 		 * block_write_begin may have instantiated a few blocks
 		 * outside i_size.  Trim these off again. Don't need
 		 * i_size_read because we hold i_mutex.
+		 *
+		 * Add inode to orphan list in case we crash before
+		 * truncate finishes
 		 */
 		if (pos + len > inode->i_size)
+			ext4_orphan_add(handle, inode);
+
+		ext4_journal_stop(handle);
+		if (pos + len > inode->i_size) {
 			vmtruncate(inode, inode->i_size);
+			/* 
+			 * If vmtruncate failed early the inode might
+			 * still be on the orphan list; we need to
+			 * make sure the inode is removed from the
+			 * orphan list in that case.
+			 */
+			if (inode->i_nlink)
+				ext4_orphan_del(NULL, inode);
+		}
 	}
 
 	if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
@@ -1509,6 +1549,52 @@
 	return ext4_handle_dirty_metadata(handle, NULL, bh);
 }
 
+static int ext4_generic_write_end(struct file *file,
+				struct address_space *mapping,
+				loff_t pos, unsigned len, unsigned copied,
+				struct page *page, void *fsdata)
+{
+	int i_size_changed = 0;
+	struct inode *inode = mapping->host;
+	handle_t *handle = ext4_journal_current_handle();
+
+	copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
+
+	/*
+	 * No need to use i_size_read() here, the i_size
+	 * cannot change under us because we hold i_mutex.
+	 *
+	 * But it's important to update i_size while still holding page lock:
+	 * page writeout could otherwise come in and zero beyond i_size.
+	 */
+	if (pos + copied > inode->i_size) {
+		i_size_write(inode, pos + copied);
+		i_size_changed = 1;
+	}
+
+	if (pos + copied >  EXT4_I(inode)->i_disksize) {
+		/* We need to mark inode dirty even if
+		 * new_i_size is less that inode->i_size
+		 * bu greater than i_disksize.(hint delalloc)
+		 */
+		ext4_update_i_disksize(inode, (pos + copied));
+		i_size_changed = 1;
+	}
+	unlock_page(page);
+	page_cache_release(page);
+
+	/*
+	 * Don't mark the inode dirty under page lock. First, it unnecessarily
+	 * makes the holding time of page lock longer. Second, it forces lock
+	 * ordering of page lock and transaction start for journaling
+	 * filesystems.
+	 */
+	if (i_size_changed)
+		ext4_mark_inode_dirty(handle, inode);
+
+	return copied;
+}
+
 /*
  * We need to pick up the new inode size which generic_commit_write gave us
  * `file' can be NULL - eg, when called from page_symlink().
@@ -1532,21 +1618,15 @@
 	ret = ext4_jbd2_file_inode(handle, inode);
 
 	if (ret == 0) {
-		loff_t new_i_size;
-
-		new_i_size = pos + copied;
-		if (new_i_size > EXT4_I(inode)->i_disksize) {
-			ext4_update_i_disksize(inode, new_i_size);
-			/* We need to mark inode dirty even if
-			 * new_i_size is less that inode->i_size
-			 * bu greater than i_disksize.(hint delalloc)
-			 */
-			ext4_mark_inode_dirty(handle, inode);
-		}
-
-		ret2 = generic_write_end(file, mapping, pos, len, copied,
+		ret2 = ext4_generic_write_end(file, mapping, pos, len, copied,
 							page, fsdata);
 		copied = ret2;
+		if (pos + len > inode->i_size)
+			/* if we have allocated more blocks and copied
+			 * less. We will have blocks allocated outside
+			 * inode->i_size. So truncate them
+			 */
+			ext4_orphan_add(handle, inode);
 		if (ret2 < 0)
 			ret = ret2;
 	}
@@ -1554,6 +1634,18 @@
 	if (!ret)
 		ret = ret2;
 
+	if (pos + len > inode->i_size) {
+		vmtruncate(inode, inode->i_size);
+		/* 
+		 * If vmtruncate failed early the inode might still be
+		 * on the orphan list; we need to make sure the inode
+		 * is removed from the orphan list in that case.
+		 */
+		if (inode->i_nlink)
+			ext4_orphan_del(NULL, inode);
+	}
+
+
 	return ret ? ret : copied;
 }
 
@@ -1565,25 +1657,21 @@
 	handle_t *handle = ext4_journal_current_handle();
 	struct inode *inode = mapping->host;
 	int ret = 0, ret2;
-	loff_t new_i_size;
 
 	trace_mark(ext4_writeback_write_end,
 		   "dev %s ino %lu pos %llu len %u copied %u",
 		   inode->i_sb->s_id, inode->i_ino,
 		   (unsigned long long) pos, len, copied);
-	new_i_size = pos + copied;
-	if (new_i_size > EXT4_I(inode)->i_disksize) {
-		ext4_update_i_disksize(inode, new_i_size);
-		/* We need to mark inode dirty even if
-		 * new_i_size is less that inode->i_size
-		 * bu greater than i_disksize.(hint delalloc)
-		 */
-		ext4_mark_inode_dirty(handle, inode);
-	}
-
-	ret2 = generic_write_end(file, mapping, pos, len, copied,
+	ret2 = ext4_generic_write_end(file, mapping, pos, len, copied,
 							page, fsdata);
 	copied = ret2;
+	if (pos + len > inode->i_size)
+		/* if we have allocated more blocks and copied
+		 * less. We will have blocks allocated outside
+		 * inode->i_size. So truncate them
+		 */
+		ext4_orphan_add(handle, inode);
+
 	if (ret2 < 0)
 		ret = ret2;
 
@@ -1591,6 +1679,17 @@
 	if (!ret)
 		ret = ret2;
 
+	if (pos + len > inode->i_size) {
+		vmtruncate(inode, inode->i_size);
+		/* 
+		 * If vmtruncate failed early the inode might still be
+		 * on the orphan list; we need to make sure the inode
+		 * is removed from the orphan list in that case.
+		 */
+		if (inode->i_nlink)
+			ext4_orphan_del(NULL, inode);
+	}
+
 	return ret ? ret : copied;
 }
 
@@ -1635,10 +1734,27 @@
 	}
 
 	unlock_page(page);
+	page_cache_release(page);
+	if (pos + len > inode->i_size)
+		/* if we have allocated more blocks and copied
+		 * less. We will have blocks allocated outside
+		 * inode->i_size. So truncate them
+		 */
+		ext4_orphan_add(handle, inode);
+
 	ret2 = ext4_journal_stop(handle);
 	if (!ret)
 		ret = ret2;
-	page_cache_release(page);
+	if (pos + len > inode->i_size) {
+		vmtruncate(inode, inode->i_size);
+		/* 
+		 * If vmtruncate failed early the inode might still be
+		 * on the orphan list; we need to make sure the inode
+		 * is removed from the orphan list in that case.
+		 */
+		if (inode->i_nlink)
+			ext4_orphan_del(NULL, inode);
+	}
 
 	return ret ? ret : copied;
 }
@@ -1852,7 +1968,7 @@
  * @logical - first logical block to start assignment with
  *
  * the function goes through all passed space and put actual disk
- * block numbers into buffer heads, dropping BH_Delay
+ * block numbers into buffer heads, dropping BH_Delay and BH_Unwritten
  */
 static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical,
 				 struct buffer_head *exbh)
@@ -1902,16 +2018,24 @@
 			do {
 				if (cur_logical >= logical + blocks)
 					break;
-				if (buffer_delay(bh)) {
-					bh->b_blocknr = pblock;
-					clear_buffer_delay(bh);
-					bh->b_bdev = inode->i_sb->s_bdev;
-				} else if (buffer_unwritten(bh)) {
-					bh->b_blocknr = pblock;
-					clear_buffer_unwritten(bh);
-					set_buffer_mapped(bh);
-					set_buffer_new(bh);
-					bh->b_bdev = inode->i_sb->s_bdev;
+
+				if (buffer_delay(bh) ||
+						buffer_unwritten(bh)) {
+
+					BUG_ON(bh->b_bdev != inode->i_sb->s_bdev);
+
+					if (buffer_delay(bh)) {
+						clear_buffer_delay(bh);
+						bh->b_blocknr = pblock;
+					} else {
+						/*
+						 * unwritten already should have
+						 * blocknr assigned. Verify that
+						 */
+						clear_buffer_unwritten(bh);
+						BUG_ON(bh->b_blocknr != pblock);
+					}
+
 				} else if (buffer_mapped(bh))
 					BUG_ON(bh->b_blocknr != pblock);
 
@@ -1990,51 +2114,6 @@
 	return;
 }
 
-#define		EXT4_DELALLOC_RSVED	1
-static int ext4_da_get_block_write(struct inode *inode, sector_t iblock,
-				   struct buffer_head *bh_result, int create)
-{
-	int ret;
-	unsigned max_blocks = bh_result->b_size >> inode->i_blkbits;
-	loff_t disksize = EXT4_I(inode)->i_disksize;
-	handle_t *handle = NULL;
-
-	handle = ext4_journal_current_handle();
-	BUG_ON(!handle);
-	ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks,
-				   bh_result, create, 0, EXT4_DELALLOC_RSVED);
-	if (ret <= 0)
-		return ret;
-
-	bh_result->b_size = (ret << inode->i_blkbits);
-
-	if (ext4_should_order_data(inode)) {
-		int retval;
-		retval = ext4_jbd2_file_inode(handle, inode);
-		if (retval)
-			/*
-			 * Failed to add inode for ordered mode. Don't
-			 * update file size
-			 */
-			return retval;
-	}
-
-	/*
-	 * Update on-disk size along with block allocation we don't
-	 * use 'extend_disksize' as size may change within already
-	 * allocated block -bzzz
-	 */
-	disksize = ((loff_t) iblock + ret) << inode->i_blkbits;
-	if (disksize > i_size_read(inode))
-		disksize = i_size_read(inode);
-	if (disksize > EXT4_I(inode)->i_disksize) {
-		ext4_update_i_disksize(inode, disksize);
-		ret = ext4_mark_inode_dirty(handle, inode);
-		return ret;
-	}
-	return 0;
-}
-
 /*
  * mpage_da_map_blocks - go through given space
  *
@@ -2045,29 +2124,57 @@
  */
 static int mpage_da_map_blocks(struct mpage_da_data *mpd)
 {
-	int err = 0;
+	int err, blks, get_blocks_flags;
 	struct buffer_head new;
-	sector_t next;
+	sector_t next = mpd->b_blocknr;
+	unsigned max_blocks = mpd->b_size >> mpd->inode->i_blkbits;
+	loff_t disksize = EXT4_I(mpd->inode)->i_disksize;
+	handle_t *handle = NULL;
 
 	/*
 	 * We consider only non-mapped and non-allocated blocks
 	 */
 	if ((mpd->b_state  & (1 << BH_Mapped)) &&
-	    !(mpd->b_state & (1 << BH_Delay)))
-		return 0;
-	new.b_state = mpd->b_state;
-	new.b_blocknr = 0;
-	new.b_size = mpd->b_size;
-	next = mpd->b_blocknr;
-	/*
-	 * If we didn't accumulate anything
-	 * to write simply return
-	 */
-	if (!new.b_size)
+		!(mpd->b_state & (1 << BH_Delay)) &&
+		!(mpd->b_state & (1 << BH_Unwritten)))
 		return 0;
 
-	err = ext4_da_get_block_write(mpd->inode, next, &new, 1);
-	if (err) {
+	/*
+	 * If we didn't accumulate anything to write simply return
+	 */
+	if (!mpd->b_size)
+		return 0;
+
+	handle = ext4_journal_current_handle();
+	BUG_ON(!handle);
+
+	/*
+	 * Call ext4_get_blocks() to allocate any delayed allocation
+	 * blocks, or to convert an uninitialized extent to be
+	 * initialized (in the case where we have written into
+	 * one or more preallocated blocks).
+	 *
+	 * We pass in the magic EXT4_GET_BLOCKS_DELALLOC_RESERVE to
+	 * indicate that we are on the delayed allocation path.  This
+	 * affects functions in many different parts of the allocation
+	 * call path.  This flag exists primarily because we don't
+	 * want to change *many* call functions, so ext4_get_blocks()
+	 * will set the magic i_delalloc_reserved_flag once the
+	 * inode's allocation semaphore is taken.
+	 *
+	 * If the blocks in questions were delalloc blocks, set
+	 * EXT4_GET_BLOCKS_DELALLOC_RESERVE so the delalloc accounting
+	 * variables are updated after the blocks have been allocated.
+	 */
+	new.b_state = 0;
+	get_blocks_flags = (EXT4_GET_BLOCKS_CREATE |
+			    EXT4_GET_BLOCKS_DELALLOC_RESERVE);
+	if (mpd->b_state & (1 << BH_Delay))
+		get_blocks_flags |= EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE;
+	blks = ext4_get_blocks(handle, mpd->inode, next, max_blocks,
+			       &new, get_blocks_flags);
+	if (blks < 0) {
+		err = blks;
 		/*
 		 * If get block returns with error we simply
 		 * return. Later writepage will redirty the page and
@@ -2100,12 +2207,14 @@
 		if (err == -ENOSPC) {
 			ext4_print_free_blocks(mpd->inode);
 		}
-		/* invlaidate all the pages */
+		/* invalidate all the pages */
 		ext4_da_block_invalidatepages(mpd, next,
 				mpd->b_size >> mpd->inode->i_blkbits);
 		return err;
 	}
-	BUG_ON(new.b_size == 0);
+	BUG_ON(blks == 0);
+
+	new.b_size = (blks << mpd->inode->i_blkbits);
 
 	if (buffer_new(&new))
 		__unmap_underlying_blocks(mpd->inode, &new);
@@ -2118,6 +2227,23 @@
 	    (mpd->b_state & (1 << BH_Unwritten)))
 		mpage_put_bnr_to_bhs(mpd, next, &new);
 
+	if (ext4_should_order_data(mpd->inode)) {
+		err = ext4_jbd2_file_inode(handle, mpd->inode);
+		if (err)
+			return err;
+	}
+
+	/*
+	 * Update on-disk size along with block allocation.
+	 */
+	disksize = ((loff_t) next + blks) << mpd->inode->i_blkbits;
+	if (disksize > i_size_read(mpd->inode))
+		disksize = i_size_read(mpd->inode);
+	if (disksize > EXT4_I(mpd->inode)->i_disksize) {
+		ext4_update_i_disksize(mpd->inode, disksize);
+		return ext4_mark_inode_dirty(handle, mpd->inode);
+	}
+
 	return 0;
 }
 
@@ -2192,6 +2318,17 @@
 	return;
 }
 
+static int ext4_bh_unmapped_or_delay(handle_t *handle, struct buffer_head *bh)
+{
+	/*
+	 * unmapped buffer is possible for holes.
+	 * delay buffer is possible with delayed allocation.
+	 * We also need to consider unwritten buffer as unmapped.
+	 */
+	return (!buffer_mapped(bh) || buffer_delay(bh) ||
+				buffer_unwritten(bh)) && buffer_dirty(bh);
+}
+
 /*
  * __mpage_da_writepage - finds extent of pages and blocks
  *
@@ -2276,8 +2413,7 @@
 			 * Otherwise we won't make progress
 			 * with the page in ext4_da_writepage
 			 */
-			if (buffer_dirty(bh) &&
-			    (!buffer_mapped(bh) || buffer_delay(bh))) {
+			if (ext4_bh_unmapped_or_delay(NULL, bh)) {
 				mpage_add_bh_to_extent(mpd, logical,
 						       bh->b_size,
 						       bh->b_state);
@@ -2303,8 +2439,16 @@
 }
 
 /*
- * this is a special callback for ->write_begin() only
- * it's intention is to return mapped block or reserve space
+ * This is a special get_blocks_t callback which is used by
+ * ext4_da_write_begin().  It will either return mapped block or
+ * reserve space for a single block.
+ *
+ * For delayed buffer_head we have BH_Mapped, BH_New, BH_Delay set.
+ * We also have b_blocknr = -1 and b_bdev initialized properly
+ *
+ * For unwritten buffer_head we have BH_Mapped, BH_New, BH_Unwritten set.
+ * We also have b_blocknr = physicalblock mapping unwritten extent and b_bdev
+ * initialized properly.
  */
 static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
 				  struct buffer_head *bh_result, int create)
@@ -2323,7 +2467,7 @@
 	 * preallocated blocks are unmapped but should treated
 	 * the same as allocated blocks.
 	 */
-	ret = ext4_get_blocks_wrap(NULL, inode, iblock, 1,  bh_result, 0, 0, 0);
+	ret = ext4_get_blocks(NULL, inode, iblock, 1,  bh_result, 0);
 	if ((ret == 0) && !buffer_delay(bh_result)) {
 		/* the block isn't (pre)allocated yet, let's reserve space */
 		/*
@@ -2340,40 +2484,53 @@
 		set_buffer_delay(bh_result);
 	} else if (ret > 0) {
 		bh_result->b_size = (ret << inode->i_blkbits);
-		/*
-		 * With sub-block writes into unwritten extents
-		 * we also need to mark the buffer as new so that
-		 * the unwritten parts of the buffer gets correctly zeroed.
-		 */
-		if (buffer_unwritten(bh_result))
+		if (buffer_unwritten(bh_result)) {
+			/* A delayed write to unwritten bh should
+			 * be marked new and mapped.  Mapped ensures
+			 * that we don't do get_block multiple times
+			 * when we write to the same offset and new
+			 * ensures that we do proper zero out for
+			 * partial write.
+			 */
 			set_buffer_new(bh_result);
+			set_buffer_mapped(bh_result);
+		}
 		ret = 0;
 	}
 
 	return ret;
 }
 
-static int ext4_bh_unmapped_or_delay(handle_t *handle, struct buffer_head *bh)
-{
-	/*
-	 * unmapped buffer is possible for holes.
-	 * delay buffer is possible with delayed allocation
-	 */
-	return ((!buffer_mapped(bh) || buffer_delay(bh)) && buffer_dirty(bh));
-}
-
-static int ext4_normal_get_block_write(struct inode *inode, sector_t iblock,
+/*
+ * This function is used as a standard get_block_t calback function
+ * when there is no desire to allocate any blocks.  It is used as a
+ * callback function for block_prepare_write(), nobh_writepage(), and
+ * block_write_full_page().  These functions should only try to map a
+ * single block at a time.
+ *
+ * Since this function doesn't do block allocations even if the caller
+ * requests it by passing in create=1, it is critically important that
+ * any caller checks to make sure that any buffer heads are returned
+ * by this function are either all already mapped or marked for
+ * delayed allocation before calling nobh_writepage() or
+ * block_write_full_page().  Otherwise, b_blocknr could be left
+ * unitialized, and the page write functions will be taken by
+ * surprise.
+ */
+static int noalloc_get_block_write(struct inode *inode, sector_t iblock,
 				   struct buffer_head *bh_result, int create)
 {
 	int ret = 0;
 	unsigned max_blocks = bh_result->b_size >> inode->i_blkbits;
 
+	BUG_ON(bh_result->b_size != inode->i_sb->s_blocksize);
+
 	/*
 	 * we don't want to do block allocation in writepage
 	 * so call get_block_wrap with create = 0
 	 */
-	ret = ext4_get_blocks_wrap(NULL, inode, iblock, max_blocks,
-				   bh_result, 0, 0, 0);
+	ret = ext4_get_blocks(NULL, inode, iblock, max_blocks, bh_result, 0);
+	BUG_ON(create && ret == 0);
 	if (ret > 0) {
 		bh_result->b_size = (ret << inode->i_blkbits);
 		ret = 0;
@@ -2382,10 +2539,11 @@
 }
 
 /*
- * get called vi ext4_da_writepages after taking page lock (have journal handle)
- * get called via journal_submit_inode_data_buffers (no journal handle)
- * get called via shrink_page_list via pdflush (no journal handle)
- * or grab_page_cache when doing write_begin (have journal handle)
+ * This function can get called via...
+ *   - ext4_da_writepages after taking page lock (have journal handle)
+ *   - journal_submit_inode_data_buffers (no journal handle)
+ *   - shrink_page_list via pdflush (no journal handle)
+ *   - grab_page_cache when doing write_begin (have journal handle)
  */
 static int ext4_da_writepage(struct page *page,
 				struct writeback_control *wbc)
@@ -2436,7 +2594,7 @@
 		 * do block allocation here.
 		 */
 		ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE,
-						ext4_normal_get_block_write);
+					  noalloc_get_block_write);
 		if (!ret) {
 			page_bufs = page_buffers(page);
 			/* check whether all are mapped and non delay */
@@ -2461,11 +2619,10 @@
 	}
 
 	if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode))
-		ret = nobh_writepage(page, ext4_normal_get_block_write, wbc);
+		ret = nobh_writepage(page, noalloc_get_block_write, wbc);
 	else
-		ret = block_write_full_page(page,
-						ext4_normal_get_block_write,
-						wbc);
+		ret = block_write_full_page(page, noalloc_get_block_write,
+					    wbc);
 
 	return ret;
 }
@@ -2777,7 +2934,7 @@
 	*pagep = page;
 
 	ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
-							ext4_da_get_block_prep);
+				ext4_da_get_block_prep);
 	if (ret < 0) {
 		unlock_page(page);
 		ext4_journal_stop(handle);
@@ -2815,7 +2972,7 @@
 	for (i = 0; i < idx; i++)
 		bh = bh->b_this_page;
 
-	if (!buffer_mapped(bh) || (buffer_delay(bh)))
+	if (!buffer_mapped(bh) || (buffer_delay(bh)) || buffer_unwritten(bh))
 		return 0;
 	return 1;
 }
@@ -3085,12 +3242,10 @@
 	struct inode *inode = page->mapping->host;
 
 	if (test_opt(inode->i_sb, NOBH))
-		return nobh_writepage(page,
-					ext4_normal_get_block_write, wbc);
+		return nobh_writepage(page, noalloc_get_block_write, wbc);
 	else
-		return block_write_full_page(page,
-						ext4_normal_get_block_write,
-						wbc);
+		return block_write_full_page(page, noalloc_get_block_write,
+					     wbc);
 }
 
 static int ext4_normal_writepage(struct page *page,
@@ -3142,7 +3297,7 @@
 	int err;
 
 	ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE,
-					ext4_normal_get_block_write);
+				  noalloc_get_block_write);
 	if (ret != 0)
 		goto out_unlock;
 
@@ -3227,9 +3382,8 @@
 		 * really know unless we go poke around in the buffer_heads.
 		 * But block_write_full_page will do the right thing.
 		 */
-		return block_write_full_page(page,
-						ext4_normal_get_block_write,
-						wbc);
+		return block_write_full_page(page, noalloc_get_block_write,
+					     wbc);
 	}
 no_write:
 	redirty_page_for_writepage(wbc, page);
@@ -3973,7 +4127,8 @@
 	if (!ext4_can_truncate(inode))
 		return;
 
-	if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC))
+	if (ei->i_disksize && inode->i_size == 0 &&
+	    !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC))
 		ei->i_state |= EXT4_STATE_DA_ALLOC_CLOSE;
 
 	if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) {
@@ -4715,25 +4870,6 @@
 	return ext4_force_commit(inode->i_sb);
 }
 
-int __ext4_write_dirty_metadata(struct inode *inode, struct buffer_head *bh)
-{
-	int err = 0;
-
-	mark_buffer_dirty(bh);
-	if (inode && inode_needs_sync(inode)) {
-		sync_dirty_buffer(bh);
-		if (buffer_req(bh) && !buffer_uptodate(bh)) {
-			ext4_error(inode->i_sb, __func__,
-				   "IO error syncing inode, "
-				   "inode=%lu, block=%llu",
-				   inode->i_ino,
-				   (unsigned long long)bh->b_blocknr);
-			err = -EIO;
-		}
-	}
-	return err;
-}
-
 /*
  * ext4_setattr()
  *
@@ -4930,7 +5066,8 @@
  */
 int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk)
 {
-	int groups, gdpblocks;
+	ext4_group_t groups, ngroups = ext4_get_groups_count(inode->i_sb);
+	int gdpblocks;
 	int idxblocks;
 	int ret = 0;
 
@@ -4957,8 +5094,8 @@
 		groups += nrblocks;
 
 	gdpblocks = groups;
-	if (groups > EXT4_SB(inode->i_sb)->s_groups_count)
-		groups = EXT4_SB(inode->i_sb)->s_groups_count;
+	if (groups > ngroups)
+		groups = ngroups;
 	if (groups > EXT4_SB(inode->i_sb)->s_gdb_count)
 		gdpblocks = EXT4_SB(inode->i_sb)->s_gdb_count;
 
@@ -4998,7 +5135,7 @@
  * Calculate the journal credits for a chunk of data modification.
  *
  * This is called from DIO, fallocate or whoever calling
- * ext4_get_blocks_wrap() to map/allocate a chunk of contigous disk blocks.
+ * ext4_get_blocks() to map/allocate a chunk of contigous disk blocks.
  *
  * journal buffers for data blocks are not included here, as DIO
  * and fallocate do no need to journal data buffers.
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index f871677..ed8482e 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -372,24 +372,12 @@
 	ext4_set_bit(bit, addr);
 }
 
-static inline void mb_set_bit_atomic(spinlock_t *lock, int bit, void *addr)
-{
-	addr = mb_correct_addr_and_bit(&bit, addr);
-	ext4_set_bit_atomic(lock, bit, addr);
-}
-
 static inline void mb_clear_bit(int bit, void *addr)
 {
 	addr = mb_correct_addr_and_bit(&bit, addr);
 	ext4_clear_bit(bit, addr);
 }
 
-static inline void mb_clear_bit_atomic(spinlock_t *lock, int bit, void *addr)
-{
-	addr = mb_correct_addr_and_bit(&bit, addr);
-	ext4_clear_bit_atomic(lock, bit, addr);
-}
-
 static inline int mb_find_next_zero_bit(void *addr, int max, int start)
 {
 	int fix = 0, ret, tmpmax;
@@ -448,7 +436,7 @@
 
 	if (unlikely(e4b->bd_info->bb_bitmap == NULL))
 		return;
-	BUG_ON(!ext4_is_group_locked(sb, e4b->bd_group));
+	assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group));
 	for (i = 0; i < count; i++) {
 		if (!mb_test_bit(first + i, e4b->bd_info->bb_bitmap)) {
 			ext4_fsblk_t blocknr;
@@ -472,7 +460,7 @@
 
 	if (unlikely(e4b->bd_info->bb_bitmap == NULL))
 		return;
-	BUG_ON(!ext4_is_group_locked(e4b->bd_sb, e4b->bd_group));
+	assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
 	for (i = 0; i < count; i++) {
 		BUG_ON(mb_test_bit(first + i, e4b->bd_info->bb_bitmap));
 		mb_set_bit(first + i, e4b->bd_info->bb_bitmap);
@@ -739,6 +727,7 @@
 
 static int ext4_mb_init_cache(struct page *page, char *incore)
 {
+	ext4_group_t ngroups;
 	int blocksize;
 	int blocks_per_page;
 	int groups_per_page;
@@ -757,6 +746,7 @@
 
 	inode = page->mapping->host;
 	sb = inode->i_sb;
+	ngroups = ext4_get_groups_count(sb);
 	blocksize = 1 << inode->i_blkbits;
 	blocks_per_page = PAGE_CACHE_SIZE / blocksize;
 
@@ -780,7 +770,7 @@
 	for (i = 0; i < groups_per_page; i++) {
 		struct ext4_group_desc *desc;
 
-		if (first_group + i >= EXT4_SB(sb)->s_groups_count)
+		if (first_group + i >= ngroups)
 			break;
 
 		err = -EIO;
@@ -801,17 +791,17 @@
 			unlock_buffer(bh[i]);
 			continue;
 		}
-		spin_lock(sb_bgl_lock(EXT4_SB(sb), first_group + i));
+		ext4_lock_group(sb, first_group + i);
 		if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
 			ext4_init_block_bitmap(sb, bh[i],
 						first_group + i, desc);
 			set_bitmap_uptodate(bh[i]);
 			set_buffer_uptodate(bh[i]);
-			spin_unlock(sb_bgl_lock(EXT4_SB(sb), first_group + i));
+			ext4_unlock_group(sb, first_group + i);
 			unlock_buffer(bh[i]);
 			continue;
 		}
-		spin_unlock(sb_bgl_lock(EXT4_SB(sb), first_group + i));
+		ext4_unlock_group(sb, first_group + i);
 		if (buffer_uptodate(bh[i])) {
 			/*
 			 * if not uninit if bh is uptodate,
@@ -852,7 +842,7 @@
 		struct ext4_group_info *grinfo;
 
 		group = (first_block + i) >> 1;
-		if (group >= EXT4_SB(sb)->s_groups_count)
+		if (group >= ngroups)
 			break;
 
 		/*
@@ -1078,7 +1068,7 @@
 	return 0;
 }
 
-static void mb_clear_bits(spinlock_t *lock, void *bm, int cur, int len)
+static void mb_clear_bits(void *bm, int cur, int len)
 {
 	__u32 *addr;
 
@@ -1091,15 +1081,12 @@
 			cur += 32;
 			continue;
 		}
-		if (lock)
-			mb_clear_bit_atomic(lock, cur, bm);
-		else
-			mb_clear_bit(cur, bm);
+		mb_clear_bit(cur, bm);
 		cur++;
 	}
 }
 
-static void mb_set_bits(spinlock_t *lock, void *bm, int cur, int len)
+static void mb_set_bits(void *bm, int cur, int len)
 {
 	__u32 *addr;
 
@@ -1112,10 +1099,7 @@
 			cur += 32;
 			continue;
 		}
-		if (lock)
-			mb_set_bit_atomic(lock, cur, bm);
-		else
-			mb_set_bit(cur, bm);
+		mb_set_bit(cur, bm);
 		cur++;
 	}
 }
@@ -1131,7 +1115,7 @@
 	struct super_block *sb = e4b->bd_sb;
 
 	BUG_ON(first + count > (sb->s_blocksize << 3));
-	BUG_ON(!ext4_is_group_locked(sb, e4b->bd_group));
+	assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group));
 	mb_check_buddy(e4b);
 	mb_free_blocks_double(inode, e4b, first, count);
 
@@ -1212,7 +1196,7 @@
 	int ord;
 	void *buddy;
 
-	BUG_ON(!ext4_is_group_locked(e4b->bd_sb, e4b->bd_group));
+	assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
 	BUG_ON(ex == NULL);
 
 	buddy = mb_find_buddy(e4b, order, &max);
@@ -1276,7 +1260,7 @@
 
 	BUG_ON(start + len > (e4b->bd_sb->s_blocksize << 3));
 	BUG_ON(e4b->bd_group != ex->fe_group);
-	BUG_ON(!ext4_is_group_locked(e4b->bd_sb, e4b->bd_group));
+	assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
 	mb_check_buddy(e4b);
 	mb_mark_used_double(e4b, start, len);
 
@@ -1330,8 +1314,7 @@
 		e4b->bd_info->bb_counters[ord]++;
 	}
 
-	mb_set_bits(sb_bgl_lock(EXT4_SB(e4b->bd_sb), ex->fe_group),
-			EXT4_MB_BITMAP(e4b), ex->fe_start, len0);
+	mb_set_bits(EXT4_MB_BITMAP(e4b), ex->fe_start, len0);
 	mb_check_buddy(e4b);
 
 	return ret;
@@ -1726,7 +1709,6 @@
 	unsigned free, fragments;
 	unsigned i, bits;
 	int flex_size = ext4_flex_bg_size(EXT4_SB(ac->ac_sb));
-	struct ext4_group_desc *desc;
 	struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group);
 
 	BUG_ON(cr < 0 || cr >= 4);
@@ -1742,10 +1724,6 @@
 	switch (cr) {
 	case 0:
 		BUG_ON(ac->ac_2order == 0);
-		/* If this group is uninitialized, skip it initially */
-		desc = ext4_get_group_desc(ac->ac_sb, group, NULL);
-		if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))
-			return 0;
 
 		/* Avoid using the first bg of a flexgroup for data files */
 		if ((ac->ac_flags & EXT4_MB_HINT_DATA) &&
@@ -1788,6 +1766,7 @@
 	int block, pnum;
 	int blocks_per_page;
 	int groups_per_page;
+	ext4_group_t ngroups = ext4_get_groups_count(sb);
 	ext4_group_t first_group;
 	struct ext4_group_info *grp;
 
@@ -1807,7 +1786,7 @@
 	/* read all groups the page covers into the cache */
 	for (i = 0; i < groups_per_page; i++) {
 
-		if ((first_group + i) >= EXT4_SB(sb)->s_groups_count)
+		if ((first_group + i) >= ngroups)
 			break;
 		grp = ext4_get_group_info(sb, first_group + i);
 		/* take all groups write allocation
@@ -1945,8 +1924,7 @@
 static noinline_for_stack int
 ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
 {
-	ext4_group_t group;
-	ext4_group_t i;
+	ext4_group_t ngroups, group, i;
 	int cr;
 	int err = 0;
 	int bsbits;
@@ -1957,6 +1935,7 @@
 
 	sb = ac->ac_sb;
 	sbi = EXT4_SB(sb);
+	ngroups = ext4_get_groups_count(sb);
 	BUG_ON(ac->ac_status == AC_STATUS_FOUND);
 
 	/* first, try the goal */
@@ -2017,11 +1996,11 @@
 		 */
 		group = ac->ac_g_ex.fe_group;
 
-		for (i = 0; i < EXT4_SB(sb)->s_groups_count; group++, i++) {
+		for (i = 0; i < ngroups; group++, i++) {
 			struct ext4_group_info *grp;
 			struct ext4_group_desc *desc;
 
-			if (group == EXT4_SB(sb)->s_groups_count)
+			if (group == ngroups)
 				group = 0;
 
 			/* quick check to skip empty groups */
@@ -2064,9 +2043,7 @@
 
 			ac->ac_groups_scanned++;
 			desc = ext4_get_group_desc(sb, group, NULL);
-			if (cr == 0 || (desc->bg_flags &
-					cpu_to_le16(EXT4_BG_BLOCK_UNINIT) &&
-					ac->ac_2order != 0))
+			if (cr == 0)
 				ext4_mb_simple_scan_group(ac, &e4b);
 			else if (cr == 1 &&
 					ac->ac_g_ex.fe_len == sbi->s_stripe)
@@ -2315,12 +2292,10 @@
 static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos)
 {
 	struct super_block *sb = seq->private;
-	struct ext4_sb_info *sbi = EXT4_SB(sb);
 	ext4_group_t group;
 
-	if (*pos < 0 || *pos >= sbi->s_groups_count)
+	if (*pos < 0 || *pos >= ext4_get_groups_count(sb))
 		return NULL;
-
 	group = *pos + 1;
 	return (void *) ((unsigned long) group);
 }
@@ -2328,11 +2303,10 @@
 static void *ext4_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos)
 {
 	struct super_block *sb = seq->private;
-	struct ext4_sb_info *sbi = EXT4_SB(sb);
 	ext4_group_t group;
 
 	++*pos;
-	if (*pos < 0 || *pos >= sbi->s_groups_count)
+	if (*pos < 0 || *pos >= ext4_get_groups_count(sb))
 		return NULL;
 	group = *pos + 1;
 	return (void *) ((unsigned long) group);
@@ -2420,7 +2394,8 @@
 
 	if (sbi->s_proc != NULL) {
 		remove_proc_entry("mb_groups", sbi->s_proc);
-		remove_proc_entry("mb_history", sbi->s_proc);
+		if (sbi->s_mb_history_max)
+			remove_proc_entry("mb_history", sbi->s_proc);
 	}
 	kfree(sbi->s_mb_history);
 }
@@ -2431,17 +2406,17 @@
 	int i;
 
 	if (sbi->s_proc != NULL) {
-		proc_create_data("mb_history", S_IRUGO, sbi->s_proc,
-				 &ext4_mb_seq_history_fops, sb);
+		if (sbi->s_mb_history_max)
+			proc_create_data("mb_history", S_IRUGO, sbi->s_proc,
+					 &ext4_mb_seq_history_fops, sb);
 		proc_create_data("mb_groups", S_IRUGO, sbi->s_proc,
 				 &ext4_mb_seq_groups_fops, sb);
 	}
 
-	sbi->s_mb_history_max = 1000;
 	sbi->s_mb_history_cur = 0;
 	spin_lock_init(&sbi->s_mb_history_lock);
 	i = sbi->s_mb_history_max * sizeof(struct ext4_mb_history);
-	sbi->s_mb_history = kzalloc(i, GFP_KERNEL);
+	sbi->s_mb_history = i ? kzalloc(i, GFP_KERNEL) : NULL;
 	/* if we can't allocate history, then we simple won't use it */
 }
 
@@ -2451,7 +2426,7 @@
 	struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
 	struct ext4_mb_history h;
 
-	if (unlikely(sbi->s_mb_history == NULL))
+	if (sbi->s_mb_history == NULL)
 		return;
 
 	if (!(ac->ac_op & sbi->s_mb_history_filter))
@@ -2587,6 +2562,7 @@
 
 static int ext4_mb_init_backend(struct super_block *sb)
 {
+	ext4_group_t ngroups = ext4_get_groups_count(sb);
 	ext4_group_t i;
 	int metalen;
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -2598,7 +2574,7 @@
 	struct ext4_group_desc *desc;
 
 	/* This is the number of blocks used by GDT */
-	num_meta_group_infos = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) -
+	num_meta_group_infos = (ngroups + EXT4_DESC_PER_BLOCK(sb) -
 				1) >> EXT4_DESC_PER_BLOCK_BITS(sb);
 
 	/*
@@ -2644,7 +2620,7 @@
 	for (i = 0; i < num_meta_group_infos; i++) {
 		if ((i + 1) == num_meta_group_infos)
 			metalen = sizeof(*meta_group_info) *
-				(sbi->s_groups_count -
+				(ngroups -
 					(i << EXT4_DESC_PER_BLOCK_BITS(sb)));
 		meta_group_info = kmalloc(metalen, GFP_KERNEL);
 		if (meta_group_info == NULL) {
@@ -2655,7 +2631,7 @@
 		sbi->s_group_info[i] = meta_group_info;
 	}
 
-	for (i = 0; i < sbi->s_groups_count; i++) {
+	for (i = 0; i < ngroups; i++) {
 		desc = ext4_get_group_desc(sb, i, NULL);
 		if (desc == NULL) {
 			printk(KERN_ERR
@@ -2761,7 +2737,7 @@
 	return 0;
 }
 
-/* need to called with ext4 group lock (ext4_lock_group) */
+/* need to called with the ext4 group lock held */
 static void ext4_mb_cleanup_pa(struct ext4_group_info *grp)
 {
 	struct ext4_prealloc_space *pa;
@@ -2781,13 +2757,14 @@
 
 int ext4_mb_release(struct super_block *sb)
 {
+	ext4_group_t ngroups = ext4_get_groups_count(sb);
 	ext4_group_t i;
 	int num_meta_group_infos;
 	struct ext4_group_info *grinfo;
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
 
 	if (sbi->s_group_info) {
-		for (i = 0; i < sbi->s_groups_count; i++) {
+		for (i = 0; i < ngroups; i++) {
 			grinfo = ext4_get_group_info(sb, i);
 #ifdef DOUBLE_CHECK
 			kfree(grinfo->bb_bitmap);
@@ -2797,7 +2774,7 @@
 			ext4_unlock_group(sb, i);
 			kfree(grinfo);
 		}
-		num_meta_group_infos = (sbi->s_groups_count +
+		num_meta_group_infos = (ngroups +
 				EXT4_DESC_PER_BLOCK(sb) - 1) >>
 			EXT4_DESC_PER_BLOCK_BITS(sb);
 		for (i = 0; i < num_meta_group_infos; i++)
@@ -2984,27 +2961,25 @@
 		+ le32_to_cpu(es->s_first_data_block);
 
 	len = ac->ac_b_ex.fe_len;
-	if (in_range(ext4_block_bitmap(sb, gdp), block, len) ||
-	    in_range(ext4_inode_bitmap(sb, gdp), block, len) ||
-	    in_range(block, ext4_inode_table(sb, gdp),
-		     EXT4_SB(sb)->s_itb_per_group) ||
-	    in_range(block + len - 1, ext4_inode_table(sb, gdp),
-		     EXT4_SB(sb)->s_itb_per_group)) {
+	if (!ext4_data_block_valid(sbi, block, len)) {
 		ext4_error(sb, __func__,
-			   "Allocating block %llu in system zone of %d group\n",
-			   block, ac->ac_b_ex.fe_group);
+			   "Allocating blocks %llu-%llu which overlap "
+			   "fs metadata\n", block, block+len);
 		/* File system mounted not to panic on error
 		 * Fix the bitmap and repeat the block allocation
 		 * We leak some of the blocks here.
 		 */
-		mb_set_bits(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group),
-				bitmap_bh->b_data, ac->ac_b_ex.fe_start,
-				ac->ac_b_ex.fe_len);
+		ext4_lock_group(sb, ac->ac_b_ex.fe_group);
+		mb_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,
+			    ac->ac_b_ex.fe_len);
+		ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
 		err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
 		if (!err)
 			err = -EAGAIN;
 		goto out_err;
 	}
+
+	ext4_lock_group(sb, ac->ac_b_ex.fe_group);
 #ifdef AGGRESSIVE_CHECK
 	{
 		int i;
@@ -3014,9 +2989,7 @@
 		}
 	}
 #endif
-	spin_lock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group));
-	mb_set_bits(NULL, bitmap_bh->b_data,
-				ac->ac_b_ex.fe_start, ac->ac_b_ex.fe_len);
+	mb_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,ac->ac_b_ex.fe_len);
 	if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
 		gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
 		ext4_free_blks_set(sb, gdp,
@@ -3026,7 +2999,8 @@
 	len = ext4_free_blks_count(sb, gdp) - ac->ac_b_ex.fe_len;
 	ext4_free_blks_set(sb, gdp, len);
 	gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp);
-	spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group));
+
+	ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
 	percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len);
 	/*
 	 * Now reduce the dirty block count also. Should not go negative
@@ -3459,7 +3433,7 @@
  * the function goes through all block freed in the group
  * but not yet committed and marks them used in in-core bitmap.
  * buddy must be generated from this bitmap
- * Need to be called with ext4 group lock (ext4_lock_group)
+ * Need to be called with the ext4 group lock held
  */
 static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
 						ext4_group_t group)
@@ -3473,9 +3447,7 @@
 
 	while (n) {
 		entry = rb_entry(n, struct ext4_free_data, node);
-		mb_set_bits(sb_bgl_lock(EXT4_SB(sb), group),
-				bitmap, entry->start_blk,
-				entry->count);
+		mb_set_bits(bitmap, entry->start_blk, entry->count);
 		n = rb_next(n);
 	}
 	return;
@@ -3484,7 +3456,7 @@
 /*
  * the function goes through all preallocation in this group and marks them
  * used in in-core bitmap. buddy must be generated from this bitmap
- * Need to be called with ext4 group lock (ext4_lock_group)
+ * Need to be called with ext4 group lock held
  */
 static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
 					ext4_group_t group)
@@ -3516,8 +3488,7 @@
 		if (unlikely(len == 0))
 			continue;
 		BUG_ON(groupnr != group);
-		mb_set_bits(sb_bgl_lock(EXT4_SB(sb), group),
-						bitmap, start, len);
+		mb_set_bits(bitmap, start, len);
 		preallocated += len;
 		count++;
 	}
@@ -4121,7 +4092,7 @@
 static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
 {
 	struct super_block *sb = ac->ac_sb;
-	ext4_group_t i;
+	ext4_group_t ngroups, i;
 
 	printk(KERN_ERR "EXT4-fs: Can't allocate:"
 			" Allocation context details:\n");
@@ -4145,7 +4116,8 @@
 	printk(KERN_ERR "EXT4-fs: %lu scanned, %d found\n", ac->ac_ex_scanned,
 		ac->ac_found);
 	printk(KERN_ERR "EXT4-fs: groups: \n");
-	for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) {
+	ngroups = ext4_get_groups_count(sb);
+	for (i = 0; i < ngroups; i++) {
 		struct ext4_group_info *grp = ext4_get_group_info(sb, i);
 		struct ext4_prealloc_space *pa;
 		ext4_grpblk_t start;
@@ -4469,13 +4441,13 @@
 
 static int ext4_mb_discard_preallocations(struct super_block *sb, int needed)
 {
-	ext4_group_t i;
+	ext4_group_t i, ngroups = ext4_get_groups_count(sb);
 	int ret;
 	int freed = 0;
 
 	trace_mark(ext4_mb_discard_preallocations, "dev %s needed %d",
 		   sb->s_id, needed);
-	for (i = 0; i < EXT4_SB(sb)->s_groups_count && needed > 0; i++) {
+	for (i = 0; i < ngroups && needed > 0; i++) {
 		ret = ext4_mb_discard_group_preallocations(sb, i, needed);
 		freed += ret;
 		needed -= ret;
@@ -4859,29 +4831,25 @@
 		new_entry->group  = block_group;
 		new_entry->count = count;
 		new_entry->t_tid = handle->h_transaction->t_tid;
+
 		ext4_lock_group(sb, block_group);
-		mb_clear_bits(sb_bgl_lock(sbi, block_group), bitmap_bh->b_data,
-				bit, count);
+		mb_clear_bits(bitmap_bh->b_data, bit, count);
 		ext4_mb_free_metadata(handle, &e4b, new_entry);
-		ext4_unlock_group(sb, block_group);
 	} else {
-		ext4_lock_group(sb, block_group);
 		/* need to update group_info->bb_free and bitmap
 		 * with group lock held. generate_buddy look at
 		 * them with group lock_held
 		 */
-		mb_clear_bits(sb_bgl_lock(sbi, block_group), bitmap_bh->b_data,
-				bit, count);
+		ext4_lock_group(sb, block_group);
+		mb_clear_bits(bitmap_bh->b_data, bit, count);
 		mb_free_blocks(inode, &e4b, bit, count);
 		ext4_mb_return_to_preallocation(inode, &e4b, block, count);
-		ext4_unlock_group(sb, block_group);
 	}
 
-	spin_lock(sb_bgl_lock(sbi, block_group));
 	ret = ext4_free_blks_count(sb, gdp) + count;
 	ext4_free_blks_set(sb, gdp, ret);
 	gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp);
-	spin_unlock(sb_bgl_lock(sbi, block_group));
+	ext4_unlock_group(sb, block_group);
 	percpu_counter_add(&sbi->s_freeblocks_counter, count);
 
 	if (sbi->s_log_groups_per_flex) {
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index dd9e6cd..75e34f6 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -23,7 +23,6 @@
 #include <linux/mutex.h>
 #include "ext4_jbd2.h"
 #include "ext4.h"
-#include "group.h"
 
 /*
  * with AGGRESSIVE_CHECK allocator runs consistency checks over
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 22098e1..07eb664 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -37,7 +37,6 @@
 #include "ext4.h"
 #include "ext4_jbd2.h"
 
-#include "namei.h"
 #include "xattr.h"
 #include "acl.h"
 
@@ -750,7 +749,7 @@
 			ext4fs_dirhash(de->name, de->name_len, &h);
 			map_tail--;
 			map_tail->hash = h.hash;
-			map_tail->offs = (u16) ((char *) de - base);
+			map_tail->offs = ((char *) de - base)>>2;
 			map_tail->size = le16_to_cpu(de->rec_len);
 			count++;
 			cond_resched();
@@ -1148,7 +1147,8 @@
 	unsigned rec_len = 0;
 
 	while (count--) {
-		struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *) (from + map->offs);
+		struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *) 
+						(from + (map->offs<<2));
 		rec_len = EXT4_DIR_REC_LEN(de->name_len);
 		memcpy (to, de, rec_len);
 		((struct ext4_dir_entry_2 *) to)->rec_len =
@@ -1997,7 +1997,7 @@
 	if (!ext4_handle_valid(handle))
 		return 0;
 
-	lock_super(sb);
+	mutex_lock(&EXT4_SB(sb)->s_orphan_lock);
 	if (!list_empty(&EXT4_I(inode)->i_orphan))
 		goto out_unlock;
 
@@ -2006,9 +2006,13 @@
 
 	/* @@@ FIXME: Observation from aviro:
 	 * I think I can trigger J_ASSERT in ext4_orphan_add().  We block
-	 * here (on lock_super()), so race with ext4_link() which might bump
+	 * here (on s_orphan_lock), so race with ext4_link() which might bump
 	 * ->i_nlink. For, say it, character device. Not a regular file,
 	 * not a directory, not a symlink and ->i_nlink > 0.
+	 *
+	 * tytso, 4/25/2009: I'm not sure how that could happen;
+	 * shouldn't the fs core protect us from these sort of
+	 * unlink()/link() races?
 	 */
 	J_ASSERT((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
 		  S_ISLNK(inode->i_mode)) || inode->i_nlink == 0);
@@ -2045,7 +2049,7 @@
 	jbd_debug(4, "orphan inode %lu will point to %d\n",
 			inode->i_ino, NEXT_ORPHAN(inode));
 out_unlock:
-	unlock_super(sb);
+	mutex_unlock(&EXT4_SB(sb)->s_orphan_lock);
 	ext4_std_error(inode->i_sb, err);
 	return err;
 }
@@ -2066,11 +2070,9 @@
 	if (!ext4_handle_valid(handle))
 		return 0;
 
-	lock_super(inode->i_sb);
-	if (list_empty(&ei->i_orphan)) {
-		unlock_super(inode->i_sb);
-		return 0;
-	}
+	mutex_lock(&EXT4_SB(inode->i_sb)->s_orphan_lock);
+	if (list_empty(&ei->i_orphan))
+		goto out;
 
 	ino_next = NEXT_ORPHAN(inode);
 	prev = ei->i_orphan.prev;
@@ -2120,7 +2122,7 @@
 out_err:
 	ext4_std_error(inode->i_sb, err);
 out:
-	unlock_super(inode->i_sb);
+	mutex_unlock(&EXT4_SB(inode->i_sb)->s_orphan_lock);
 	return err;
 
 out_brelse:
@@ -2533,6 +2535,7 @@
 	.removexattr	= generic_removexattr,
 #endif
 	.permission	= ext4_permission,
+	.fiemap         = ext4_fiemap,
 };
 
 const struct inode_operations ext4_special_inode_operations = {
diff --git a/fs/ext4/namei.h b/fs/ext4/namei.h
deleted file mode 100644
index 5e4dfff..0000000
--- a/fs/ext4/namei.h
+++ /dev/null
@@ -1,8 +0,0 @@
-/*  linux/fs/ext4/namei.h
- *
- * Copyright (C) 2005 Simtec Electronics
- *	Ben Dooks <ben@simtec.co.uk>
- *
-*/
-
-extern struct dentry *ext4_get_parent(struct dentry *child);
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 546c7dd..27eb289 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -15,7 +15,6 @@
 #include <linux/slab.h>
 
 #include "ext4_jbd2.h"
-#include "group.h"
 
 #define outside(b, first, last)	((b) < (first) || (b) >= (last))
 #define inside(b, first, last)	((b) >= (first) && (b) < (last))
@@ -193,7 +192,7 @@
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
 
-	lock_super(sb);
+	mutex_lock(&sbi->s_resize_lock);
 	if (input->group != sbi->s_groups_count) {
 		err = -EBUSY;
 		goto exit_journal;
@@ -302,7 +301,7 @@
 	brelse(bh);
 
 exit_journal:
-	unlock_super(sb);
+	mutex_unlock(&sbi->s_resize_lock);
 	if ((err2 = ext4_journal_stop(handle)) && !err)
 		err = err2;
 
@@ -643,11 +642,12 @@
  * important part is that the new block and inode counts are in the backup
  * superblocks, and the location of the new group metadata in the GDT backups.
  *
- * We do not need lock_super() for this, because these blocks are not
- * otherwise touched by the filesystem code when it is mounted.  We don't
- * need to worry about last changing from sbi->s_groups_count, because the
- * worst that can happen is that we do not copy the full number of backups
- * at this time.  The resize which changed s_groups_count will backup again.
+ * We do not need take the s_resize_lock for this, because these
+ * blocks are not otherwise touched by the filesystem code when it is
+ * mounted.  We don't need to worry about last changing from
+ * sbi->s_groups_count, because the worst that can happen is that we
+ * do not copy the full number of backups at this time.  The resize
+ * which changed s_groups_count will backup again.
  */
 static void update_backups(struct super_block *sb,
 			   int blk_off, char *data, int size)
@@ -809,7 +809,7 @@
 		goto exit_put;
 	}
 
-	lock_super(sb);
+	mutex_lock(&sbi->s_resize_lock);
 	if (input->group != sbi->s_groups_count) {
 		ext4_warning(sb, __func__,
 			     "multiple resizers run on filesystem!");
@@ -840,7 +840,7 @@
         /*
          * OK, now we've set up the new group.  Time to make it active.
          *
-         * Current kernels don't lock all allocations via lock_super(),
+         * We do not lock all allocations via s_resize_lock
          * so we have to be safe wrt. concurrent accesses the group
          * data.  So we need to be careful to set all of the relevant
          * group descriptor data etc. *before* we enable the group.
@@ -900,12 +900,12 @@
 	 *
 	 * The precise rules we use are:
 	 *
-	 * * Writers of s_groups_count *must* hold lock_super
+	 * * Writers of s_groups_count *must* hold s_resize_lock
 	 * AND
 	 * * Writers must perform a smp_wmb() after updating all dependent
 	 *   data and before modifying the groups count
 	 *
-	 * * Readers must hold lock_super() over the access
+	 * * Readers must hold s_resize_lock over the access
 	 * OR
 	 * * Readers must perform an smp_rmb() after reading the groups count
 	 *   and before reading any dependent data.
@@ -948,7 +948,7 @@
 	sb->s_dirt = 1;
 
 exit_journal:
-	unlock_super(sb);
+	mutex_unlock(&sbi->s_resize_lock);
 	if ((err2 = ext4_journal_stop(handle)) && !err)
 		err = err2;
 	if (!err) {
@@ -986,7 +986,7 @@
 
 	/* We don't need to worry about locking wrt other resizers just
 	 * yet: we're going to revalidate es->s_blocks_count after
-	 * taking lock_super() below. */
+	 * taking the s_resize_lock below. */
 	o_blocks_count = ext4_blocks_count(es);
 	o_groups_count = EXT4_SB(sb)->s_groups_count;
 
@@ -1056,11 +1056,11 @@
 		goto exit_put;
 	}
 
-	lock_super(sb);
+	mutex_lock(&EXT4_SB(sb)->s_resize_lock);
 	if (o_blocks_count != ext4_blocks_count(es)) {
 		ext4_warning(sb, __func__,
 			     "multiple resizers run on filesystem!");
-		unlock_super(sb);
+		mutex_unlock(&EXT4_SB(sb)->s_resize_lock);
 		ext4_journal_stop(handle);
 		err = -EBUSY;
 		goto exit_put;
@@ -1070,14 +1070,14 @@
 						 EXT4_SB(sb)->s_sbh))) {
 		ext4_warning(sb, __func__,
 			     "error %d on journal write access", err);
-		unlock_super(sb);
+		mutex_unlock(&EXT4_SB(sb)->s_resize_lock);
 		ext4_journal_stop(handle);
 		goto exit_put;
 	}
 	ext4_blocks_count_set(es, o_blocks_count + add);
 	ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh);
 	sb->s_dirt = 1;
-	unlock_super(sb);
+	mutex_unlock(&EXT4_SB(sb)->s_resize_lock);
 	ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count,
 		   o_blocks_count + add);
 	/* We add the blocks to the bitmap and set the group need init bit */
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 2958f4e..c191d0f 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -20,6 +20,7 @@
 #include <linux/string.h>
 #include <linux/fs.h>
 #include <linux/time.h>
+#include <linux/vmalloc.h>
 #include <linux/jbd2.h>
 #include <linux/slab.h>
 #include <linux/init.h>
@@ -45,16 +46,20 @@
 #include "ext4_jbd2.h"
 #include "xattr.h"
 #include "acl.h"
-#include "namei.h"
-#include "group.h"
+
+static int default_mb_history_length = 1000;
+
+module_param_named(default_mb_history_length, default_mb_history_length,
+		   int, 0644);
+MODULE_PARM_DESC(default_mb_history_length,
+		 "Default number of entries saved for mb_history");
 
 struct proc_dir_entry *ext4_proc_root;
 static struct kset *ext4_kset;
 
 static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
 			     unsigned long journal_devnum);
-static int ext4_commit_super(struct super_block *sb,
-			      struct ext4_super_block *es, int sync);
+static int ext4_commit_super(struct super_block *sb, int sync);
 static void ext4_mark_recovery_complete(struct super_block *sb,
 					struct ext4_super_block *es);
 static void ext4_clear_journal_err(struct super_block *sb,
@@ -74,7 +79,7 @@
 {
 	return le32_to_cpu(bg->bg_block_bitmap_lo) |
 		(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
-		(ext4_fsblk_t)le32_to_cpu(bg->bg_block_bitmap_hi) << 32 : 0);
+		 (ext4_fsblk_t)le32_to_cpu(bg->bg_block_bitmap_hi) << 32 : 0);
 }
 
 ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb,
@@ -82,7 +87,7 @@
 {
 	return le32_to_cpu(bg->bg_inode_bitmap_lo) |
 		(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
-		(ext4_fsblk_t)le32_to_cpu(bg->bg_inode_bitmap_hi) << 32 : 0);
+		 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_bitmap_hi) << 32 : 0);
 }
 
 ext4_fsblk_t ext4_inode_table(struct super_block *sb,
@@ -90,7 +95,7 @@
 {
 	return le32_to_cpu(bg->bg_inode_table_lo) |
 		(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
-		(ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0);
+		 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0);
 }
 
 __u32 ext4_free_blks_count(struct super_block *sb,
@@ -98,7 +103,7 @@
 {
 	return le16_to_cpu(bg->bg_free_blocks_count_lo) |
 		(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
-		(__u32)le16_to_cpu(bg->bg_free_blocks_count_hi) << 16 : 0);
+		 (__u32)le16_to_cpu(bg->bg_free_blocks_count_hi) << 16 : 0);
 }
 
 __u32 ext4_free_inodes_count(struct super_block *sb,
@@ -106,7 +111,7 @@
 {
 	return le16_to_cpu(bg->bg_free_inodes_count_lo) |
 		(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
-		(__u32)le16_to_cpu(bg->bg_free_inodes_count_hi) << 16 : 0);
+		 (__u32)le16_to_cpu(bg->bg_free_inodes_count_hi) << 16 : 0);
 }
 
 __u32 ext4_used_dirs_count(struct super_block *sb,
@@ -114,7 +119,7 @@
 {
 	return le16_to_cpu(bg->bg_used_dirs_count_lo) |
 		(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
-		(__u32)le16_to_cpu(bg->bg_used_dirs_count_hi) << 16 : 0);
+		 (__u32)le16_to_cpu(bg->bg_used_dirs_count_hi) << 16 : 0);
 }
 
 __u32 ext4_itable_unused_count(struct super_block *sb,
@@ -122,7 +127,7 @@
 {
 	return le16_to_cpu(bg->bg_itable_unused_lo) |
 		(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
-		(__u32)le16_to_cpu(bg->bg_itable_unused_hi) << 16 : 0);
+		 (__u32)le16_to_cpu(bg->bg_itable_unused_hi) << 16 : 0);
 }
 
 void ext4_block_bitmap_set(struct super_block *sb,
@@ -202,8 +207,7 @@
 	journal = EXT4_SB(sb)->s_journal;
 	if (journal) {
 		if (is_journal_aborted(journal)) {
-			ext4_abort(sb, __func__,
-				   "Detected aborted journal");
+			ext4_abort(sb, __func__, "Detected aborted journal");
 			return ERR_PTR(-EROFS);
 		}
 		return jbd2_journal_start(journal, nblocks);
@@ -302,10 +306,10 @@
 			jbd2_journal_abort(journal, -EIO);
 	}
 	if (test_opt(sb, ERRORS_RO)) {
-		printk(KERN_CRIT "Remounting filesystem read-only\n");
+		ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
 		sb->s_flags |= MS_RDONLY;
 	}
-	ext4_commit_super(sb, es, 1);
+	ext4_commit_super(sb, 1);
 	if (test_opt(sb, ERRORS_PANIC))
 		panic("EXT4-fs (device %s): panic forced after error\n",
 			sb->s_id);
@@ -395,8 +399,6 @@
 {
 	va_list args;
 
-	printk(KERN_CRIT "ext4_abort called.\n");
-
 	va_start(args, fmt);
 	printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function);
 	vprintk(fmt, args);
@@ -409,7 +411,7 @@
 	if (sb->s_flags & MS_RDONLY)
 		return;
 
-	printk(KERN_CRIT "Remounting filesystem read-only\n");
+	ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
 	EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
 	sb->s_flags |= MS_RDONLY;
 	EXT4_SB(sb)->s_mount_opt |= EXT4_MOUNT_ABORT;
@@ -417,6 +419,18 @@
 		jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
 }
 
+void ext4_msg (struct super_block * sb, const char *prefix,
+		   const char *fmt, ...)
+{
+	va_list args;
+
+	va_start(args, fmt);
+	printk("%sEXT4-fs (%s): ", prefix, sb->s_id);
+	vprintk(fmt, args);
+	printk("\n");
+	va_end(args);
+}
+
 void ext4_warning(struct super_block *sb, const char *function,
 		  const char *fmt, ...)
 {
@@ -431,7 +445,7 @@
 }
 
 void ext4_grp_locked_error(struct super_block *sb, ext4_group_t grp,
-				const char *function, const char *fmt, ...)
+			   const char *function, const char *fmt, ...)
 __releases(bitlock)
 __acquires(bitlock)
 {
@@ -447,7 +461,7 @@
 	if (test_opt(sb, ERRORS_CONT)) {
 		EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
 		es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
-		ext4_commit_super(sb, es, 0);
+		ext4_commit_super(sb, 0);
 		return;
 	}
 	ext4_unlock_group(sb, grp);
@@ -467,7 +481,6 @@
 	return;
 }
 
-
 void ext4_update_dynamic_rev(struct super_block *sb)
 {
 	struct ext4_super_block *es = EXT4_SB(sb)->s_es;
@@ -496,7 +509,7 @@
 /*
  * Open the external journal device
  */
-static struct block_device *ext4_blkdev_get(dev_t dev)
+static struct block_device *ext4_blkdev_get(dev_t dev, struct super_block *sb)
 {
 	struct block_device *bdev;
 	char b[BDEVNAME_SIZE];
@@ -507,7 +520,7 @@
 	return bdev;
 
 fail:
-	printk(KERN_ERR "EXT4-fs: failed to open journal device %s: %ld\n",
+	ext4_msg(sb, KERN_ERR, "failed to open journal device %s: %ld",
 			__bdevname(dev, b), PTR_ERR(bdev));
 	return NULL;
 }
@@ -543,8 +556,8 @@
 {
 	struct list_head *l;
 
-	printk(KERN_ERR "sb orphan head is %d\n",
-	       le32_to_cpu(sbi->s_es->s_last_orphan));
+	ext4_msg(sb, KERN_ERR, "sb orphan head is %d",
+		 le32_to_cpu(sbi->s_es->s_last_orphan));
 
 	printk(KERN_ERR "sb_info orphan list:\n");
 	list_for_each(l, &sbi->s_orphan) {
@@ -563,6 +576,7 @@
 	struct ext4_super_block *es = sbi->s_es;
 	int i, err;
 
+	ext4_release_system_zone(sb);
 	ext4_mb_release(sb);
 	ext4_ext_release(sb);
 	ext4_xattr_put_super(sb);
@@ -576,7 +590,7 @@
 	if (!(sb->s_flags & MS_RDONLY)) {
 		EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
 		es->s_state = cpu_to_le16(sbi->s_mount_state);
-		ext4_commit_super(sb, es, 1);
+		ext4_commit_super(sb, 1);
 	}
 	if (sbi->s_proc) {
 		remove_proc_entry(sb->s_id, ext4_proc_root);
@@ -586,7 +600,10 @@
 	for (i = 0; i < sbi->s_gdb_count; i++)
 		brelse(sbi->s_group_desc[i]);
 	kfree(sbi->s_group_desc);
-	kfree(sbi->s_flex_groups);
+	if (is_vmalloc_addr(sbi->s_flex_groups))
+		vfree(sbi->s_flex_groups);
+	else
+		kfree(sbi->s_flex_groups);
 	percpu_counter_destroy(&sbi->s_freeblocks_counter);
 	percpu_counter_destroy(&sbi->s_freeinodes_counter);
 	percpu_counter_destroy(&sbi->s_dirs_counter);
@@ -629,7 +646,6 @@
 	lock_kernel();
 	kfree(sbi->s_blockgroup_lock);
 	kfree(sbi);
-	return;
 }
 
 static struct kmem_cache *ext4_inode_cachep;
@@ -644,6 +660,7 @@
 	ei = kmem_cache_alloc(ext4_inode_cachep, GFP_NOFS);
 	if (!ei)
 		return NULL;
+
 #ifdef CONFIG_EXT4_FS_POSIX_ACL
 	ei->i_acl = EXT4_ACL_NOT_CACHED;
 	ei->i_default_acl = EXT4_ACL_NOT_CACHED;
@@ -664,14 +681,16 @@
 	ei->i_allocated_meta_blocks = 0;
 	ei->i_delalloc_reserved_flag = 0;
 	spin_lock_init(&(ei->i_block_reservation_lock));
+
 	return &ei->vfs_inode;
 }
 
 static void ext4_destroy_inode(struct inode *inode)
 {
 	if (!list_empty(&(EXT4_I(inode)->i_orphan))) {
-		printk("EXT4 Inode %p: orphan list check failed!\n",
-			EXT4_I(inode));
+		ext4_msg(inode->i_sb, KERN_ERR,
+			 "Inode %lu (%p): orphan list check failed!",
+			 inode->i_ino, EXT4_I(inode));
 		print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 4,
 				EXT4_I(inode), sizeof(struct ext4_inode_info),
 				true);
@@ -870,12 +889,12 @@
 		seq_puts(seq, ",noauto_da_alloc");
 
 	ext4_show_quota_options(seq, sb);
+
 	return 0;
 }
 
-
 static struct inode *ext4_nfs_get_inode(struct super_block *sb,
-		u64 ino, u32 generation)
+					u64 ino, u32 generation)
 {
 	struct inode *inode;
 
@@ -904,14 +923,14 @@
 }
 
 static struct dentry *ext4_fh_to_dentry(struct super_block *sb, struct fid *fid,
-		int fh_len, int fh_type)
+					int fh_len, int fh_type)
 {
 	return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
 				    ext4_nfs_get_inode);
 }
 
 static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid,
-		int fh_len, int fh_type)
+					int fh_len, int fh_type)
 {
 	return generic_fh_to_parent(sb, fid, fh_len, fh_type,
 				    ext4_nfs_get_inode);
@@ -923,7 +942,8 @@
  * which would prevent try_to_free_buffers() from freeing them, we must use
  * jbd2 layer's try_to_free_buffers() function to release them.
  */
-static int bdev_try_to_free_page(struct super_block *sb, struct page *page, gfp_t wait)
+static int bdev_try_to_free_page(struct super_block *sb, struct page *page,
+				 gfp_t wait)
 {
 	journal_t *journal = EXT4_SB(sb)->s_journal;
 
@@ -992,7 +1012,6 @@
 	.dirty_inode	= ext4_dirty_inode,
 	.delete_inode	= ext4_delete_inode,
 	.put_super	= ext4_put_super,
-	.write_super	= ext4_write_super,
 	.sync_fs	= ext4_sync_fs,
 	.freeze_fs	= ext4_freeze,
 	.unfreeze_fs	= ext4_unfreeze,
@@ -1007,6 +1026,25 @@
 	.bdev_try_to_free_page = bdev_try_to_free_page,
 };
 
+static const struct super_operations ext4_nojournal_sops = {
+	.alloc_inode	= ext4_alloc_inode,
+	.destroy_inode	= ext4_destroy_inode,
+	.write_inode	= ext4_write_inode,
+	.dirty_inode	= ext4_dirty_inode,
+	.delete_inode	= ext4_delete_inode,
+	.write_super	= ext4_write_super,
+	.put_super	= ext4_put_super,
+	.statfs		= ext4_statfs,
+	.remount_fs	= ext4_remount,
+	.clear_inode	= ext4_clear_inode,
+	.show_options	= ext4_show_options,
+#ifdef CONFIG_QUOTA
+	.quota_read	= ext4_quota_read,
+	.quota_write	= ext4_quota_write,
+#endif
+	.bdev_try_to_free_page = bdev_try_to_free_page,
+};
+
 static const struct export_operations ext4_export_ops = {
 	.fh_to_dentry = ext4_fh_to_dentry,
 	.fh_to_parent = ext4_fh_to_parent,
@@ -1023,12 +1061,13 @@
 	Opt_journal_update, Opt_journal_dev,
 	Opt_journal_checksum, Opt_journal_async_commit,
 	Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
-	Opt_data_err_abort, Opt_data_err_ignore,
+	Opt_data_err_abort, Opt_data_err_ignore, Opt_mb_history_length,
 	Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
 	Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
 	Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err, Opt_resize,
 	Opt_usrquota, Opt_grpquota, Opt_i_version,
 	Opt_stripe, Opt_delalloc, Opt_nodelalloc,
+	Opt_block_validity, Opt_noblock_validity,
 	Opt_inode_readahead_blks, Opt_journal_ioprio
 };
 
@@ -1069,6 +1108,7 @@
 	{Opt_data_writeback, "data=writeback"},
 	{Opt_data_err_abort, "data_err=abort"},
 	{Opt_data_err_ignore, "data_err=ignore"},
+	{Opt_mb_history_length, "mb_history_length=%u"},
 	{Opt_offusrjquota, "usrjquota="},
 	{Opt_usrjquota, "usrjquota=%s"},
 	{Opt_offgrpjquota, "grpjquota="},
@@ -1087,6 +1127,8 @@
 	{Opt_resize, "resize"},
 	{Opt_delalloc, "delalloc"},
 	{Opt_nodelalloc, "nodelalloc"},
+	{Opt_block_validity, "block_validity"},
+	{Opt_noblock_validity, "noblock_validity"},
 	{Opt_inode_readahead_blks, "inode_readahead_blks=%u"},
 	{Opt_journal_ioprio, "journal_ioprio=%u"},
 	{Opt_auto_da_alloc, "auto_da_alloc=%u"},
@@ -1102,8 +1144,9 @@
 
 	if (!options || strncmp(options, "sb=", 3) != 0)
 		return 1;	/* Default location */
+
 	options += 3;
-	/*todo: use simple_strtoll with >32bit ext4 */
+	/* TODO: use simple_strtoll with >32bit ext4 */
 	sb_block = simple_strtoul(options, &options, 0);
 	if (*options && *options != ',') {
 		printk(KERN_ERR "EXT4-fs: Invalid sb specification: %s\n",
@@ -1113,6 +1156,7 @@
 	if (*options == ',')
 		options++;
 	*data = (void *) options;
+
 	return sb_block;
 }
 
@@ -1206,8 +1250,7 @@
 #else
 		case Opt_user_xattr:
 		case Opt_nouser_xattr:
-			printk(KERN_ERR "EXT4 (no)user_xattr options "
-			       "not supported\n");
+			ext4_msg(sb, KERN_ERR, "(no)user_xattr options not supported");
 			break;
 #endif
 #ifdef CONFIG_EXT4_FS_POSIX_ACL
@@ -1220,8 +1263,7 @@
 #else
 		case Opt_acl:
 		case Opt_noacl:
-			printk(KERN_ERR "EXT4 (no)acl options "
-			       "not supported\n");
+			ext4_msg(sb, KERN_ERR, "(no)acl options not supported");
 			break;
 #endif
 		case Opt_journal_update:
@@ -1231,16 +1273,16 @@
 			   user to specify an existing inode to be the
 			   journal file. */
 			if (is_remount) {
-				printk(KERN_ERR "EXT4-fs: cannot specify "
-				       "journal on remount\n");
+				ext4_msg(sb, KERN_ERR,
+					 "Cannot specify journal on remount");
 				return 0;
 			}
 			set_opt(sbi->s_mount_opt, UPDATE_JOURNAL);
 			break;
 		case Opt_journal_dev:
 			if (is_remount) {
-				printk(KERN_ERR "EXT4-fs: cannot specify "
-				       "journal on remount\n");
+				ext4_msg(sb, KERN_ERR,
+					"Cannot specify journal on remount");
 				return 0;
 			}
 			if (match_int(&args[0], &option))
@@ -1294,9 +1336,8 @@
 			if (is_remount) {
 				if ((sbi->s_mount_opt & EXT4_MOUNT_DATA_FLAGS)
 						!= data_opt) {
-					printk(KERN_ERR
-						"EXT4-fs: cannot change data "
-						"mode on remount\n");
+					ext4_msg(sb, KERN_ERR,
+						"Cannot change data mode on remount");
 					return 0;
 				}
 			} else {
@@ -1310,6 +1351,13 @@
 		case Opt_data_err_ignore:
 			clear_opt(sbi->s_mount_opt, DATA_ERR_ABORT);
 			break;
+		case Opt_mb_history_length:
+			if (match_int(&args[0], &option))
+				return 0;
+			if (option < 0)
+				return 0;
+			sbi->s_mb_history_max = option;
+			break;
 #ifdef CONFIG_QUOTA
 		case Opt_usrjquota:
 			qtype = USRQUOTA;
@@ -1319,31 +1367,31 @@
 set_qf_name:
 			if (sb_any_quota_loaded(sb) &&
 			    !sbi->s_qf_names[qtype]) {
-				printk(KERN_ERR
-				       "EXT4-fs: Cannot change journaled "
-				       "quota options when quota turned on.\n");
+				ext4_msg(sb, KERN_ERR,
+				       "Cannot change journaled "
+				       "quota options when quota turned on");
 				return 0;
 			}
 			qname = match_strdup(&args[0]);
 			if (!qname) {
-				printk(KERN_ERR
-					"EXT4-fs: not enough memory for "
-					"storing quotafile name.\n");
+				ext4_msg(sb, KERN_ERR,
+					"Not enough memory for "
+					"storing quotafile name");
 				return 0;
 			}
 			if (sbi->s_qf_names[qtype] &&
 			    strcmp(sbi->s_qf_names[qtype], qname)) {
-				printk(KERN_ERR
-					"EXT4-fs: %s quota file already "
-					"specified.\n", QTYPE2NAME(qtype));
+				ext4_msg(sb, KERN_ERR,
+					"%s quota file already "
+					"specified", QTYPE2NAME(qtype));
 				kfree(qname);
 				return 0;
 			}
 			sbi->s_qf_names[qtype] = qname;
 			if (strchr(sbi->s_qf_names[qtype], '/')) {
-				printk(KERN_ERR
-					"EXT4-fs: quotafile must be on "
-					"filesystem root.\n");
+				ext4_msg(sb, KERN_ERR,
+					"quotafile must be on "
+					"filesystem root");
 				kfree(sbi->s_qf_names[qtype]);
 				sbi->s_qf_names[qtype] = NULL;
 				return 0;
@@ -1358,9 +1406,9 @@
 clear_qf_name:
 			if (sb_any_quota_loaded(sb) &&
 			    sbi->s_qf_names[qtype]) {
-				printk(KERN_ERR "EXT4-fs: Cannot change "
+				ext4_msg(sb, KERN_ERR, "Cannot change "
 					"journaled quota options when "
-					"quota turned on.\n");
+					"quota turned on");
 				return 0;
 			}
 			/*
@@ -1377,9 +1425,9 @@
 set_qf_format:
 			if (sb_any_quota_loaded(sb) &&
 			    sbi->s_jquota_fmt != qfmt) {
-				printk(KERN_ERR "EXT4-fs: Cannot change "
+				ext4_msg(sb, KERN_ERR, "Cannot change "
 					"journaled quota options when "
-					"quota turned on.\n");
+					"quota turned on");
 				return 0;
 			}
 			sbi->s_jquota_fmt = qfmt;
@@ -1395,8 +1443,8 @@
 			break;
 		case Opt_noquota:
 			if (sb_any_quota_loaded(sb)) {
-				printk(KERN_ERR "EXT4-fs: Cannot change quota "
-					"options when quota turned on.\n");
+				ext4_msg(sb, KERN_ERR, "Cannot change quota "
+					"options when quota turned on");
 				return 0;
 			}
 			clear_opt(sbi->s_mount_opt, QUOTA);
@@ -1407,8 +1455,8 @@
 		case Opt_quota:
 		case Opt_usrquota:
 		case Opt_grpquota:
-			printk(KERN_ERR
-				"EXT4-fs: quota options not supported.\n");
+			ext4_msg(sb, KERN_ERR,
+				"quota options not supported");
 			break;
 		case Opt_usrjquota:
 		case Opt_grpjquota:
@@ -1416,9 +1464,8 @@
 		case Opt_offgrpjquota:
 		case Opt_jqfmt_vfsold:
 		case Opt_jqfmt_vfsv0:
-			printk(KERN_ERR
-				"EXT4-fs: journaled quota options not "
-				"supported.\n");
+			ext4_msg(sb, KERN_ERR,
+				"journaled quota options not supported");
 			break;
 		case Opt_noquota:
 			break;
@@ -1443,8 +1490,9 @@
 			break;
 		case Opt_resize:
 			if (!is_remount) {
-				printk("EXT4-fs: resize option only available "
-					"for remount\n");
+				ext4_msg(sb, KERN_ERR,
+					"resize option only available "
+					"for remount");
 				return 0;
 			}
 			if (match_int(&args[0], &option) != 0)
@@ -1474,14 +1522,21 @@
 		case Opt_delalloc:
 			set_opt(sbi->s_mount_opt, DELALLOC);
 			break;
+		case Opt_block_validity:
+			set_opt(sbi->s_mount_opt, BLOCK_VALIDITY);
+			break;
+		case Opt_noblock_validity:
+			clear_opt(sbi->s_mount_opt, BLOCK_VALIDITY);
+			break;
 		case Opt_inode_readahead_blks:
 			if (match_int(&args[0], &option))
 				return 0;
 			if (option < 0 || option > (1 << 30))
 				return 0;
-			if (option & (option - 1)) {
-				printk(KERN_ERR "EXT4-fs: inode_readahead_blks"
-				       " must be a power of 2\n");
+			if (!is_power_of_2(option)) {
+				ext4_msg(sb, KERN_ERR,
+					 "EXT4-fs: inode_readahead_blks"
+					 " must be a power of 2");
 				return 0;
 			}
 			sbi->s_inode_readahead_blks = option;
@@ -1508,9 +1563,9 @@
 				set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC);
 			break;
 		default:
-			printk(KERN_ERR
-			       "EXT4-fs: Unrecognized mount option \"%s\" "
-			       "or missing value\n", p);
+			ext4_msg(sb, KERN_ERR,
+			       "Unrecognized mount option \"%s\" "
+			       "or missing value", p);
 			return 0;
 		}
 	}
@@ -1528,21 +1583,21 @@
 				(sbi->s_mount_opt & EXT4_MOUNT_GRPQUOTA)) ||
 		    (sbi->s_qf_names[GRPQUOTA] &&
 				(sbi->s_mount_opt & EXT4_MOUNT_USRQUOTA))) {
-			printk(KERN_ERR "EXT4-fs: old and new quota "
-					"format mixing.\n");
+			ext4_msg(sb, KERN_ERR, "old and new quota "
+					"format mixing");
 			return 0;
 		}
 
 		if (!sbi->s_jquota_fmt) {
-			printk(KERN_ERR "EXT4-fs: journaled quota format "
-					"not specified.\n");
+			ext4_msg(sb, KERN_ERR, "journaled quota format "
+					"not specified");
 			return 0;
 		}
 	} else {
 		if (sbi->s_jquota_fmt) {
-			printk(KERN_ERR "EXT4-fs: journaled quota format "
+			ext4_msg(sb, KERN_ERR, "journaled quota format "
 					"specified with no journaling "
-					"enabled.\n");
+					"enabled");
 			return 0;
 		}
 	}
@@ -1557,32 +1612,32 @@
 	int res = 0;
 
 	if (le32_to_cpu(es->s_rev_level) > EXT4_MAX_SUPP_REV) {
-		printk(KERN_ERR "EXT4-fs warning: revision level too high, "
-		       "forcing read-only mode\n");
+		ext4_msg(sb, KERN_ERR, "revision level too high, "
+			 "forcing read-only mode");
 		res = MS_RDONLY;
 	}
 	if (read_only)
 		return res;
 	if (!(sbi->s_mount_state & EXT4_VALID_FS))
-		printk(KERN_WARNING "EXT4-fs warning: mounting unchecked fs, "
-		       "running e2fsck is recommended\n");
+		ext4_msg(sb, KERN_WARNING, "warning: mounting unchecked fs, "
+			 "running e2fsck is recommended");
 	else if ((sbi->s_mount_state & EXT4_ERROR_FS))
-		printk(KERN_WARNING
-		       "EXT4-fs warning: mounting fs with errors, "
-		       "running e2fsck is recommended\n");
+		ext4_msg(sb, KERN_WARNING,
+			 "warning: mounting fs with errors, "
+			 "running e2fsck is recommended");
 	else if ((__s16) le16_to_cpu(es->s_max_mnt_count) >= 0 &&
 		 le16_to_cpu(es->s_mnt_count) >=
 		 (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count))
-		printk(KERN_WARNING
-		       "EXT4-fs warning: maximal mount count reached, "
-		       "running e2fsck is recommended\n");
+		ext4_msg(sb, KERN_WARNING,
+			 "warning: maximal mount count reached, "
+			 "running e2fsck is recommended");
 	else if (le32_to_cpu(es->s_checkinterval) &&
 		(le32_to_cpu(es->s_lastcheck) +
 			le32_to_cpu(es->s_checkinterval) <= get_seconds()))
-		printk(KERN_WARNING
-		       "EXT4-fs warning: checktime reached, "
-		       "running e2fsck is recommended\n");
-	if (!sbi->s_journal) 
+		ext4_msg(sb, KERN_WARNING,
+			 "warning: checktime reached, "
+			 "running e2fsck is recommended");
+	if (!sbi->s_journal)
 		es->s_state &= cpu_to_le16(~EXT4_VALID_FS);
 	if (!(__s16) le16_to_cpu(es->s_max_mnt_count))
 		es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT);
@@ -1592,7 +1647,7 @@
 	if (sbi->s_journal)
 		EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
 
-	ext4_commit_super(sb, es, 1);
+	ext4_commit_super(sb, 1);
 	if (test_opt(sb, DEBUG))
 		printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, "
 				"bpg=%lu, ipg=%lu, mo=%04lx]\n",
@@ -1603,11 +1658,11 @@
 			sbi->s_mount_opt);
 
 	if (EXT4_SB(sb)->s_journal) {
-		printk(KERN_INFO "EXT4 FS on %s, %s journal on %s\n",
-		       sb->s_id, EXT4_SB(sb)->s_journal->j_inode ? "internal" :
+		ext4_msg(sb, KERN_INFO, "%s journal on %s",
+		       EXT4_SB(sb)->s_journal->j_inode ? "internal" :
 		       "external", EXT4_SB(sb)->s_journal->j_devname);
 	} else {
-		printk(KERN_INFO "EXT4 FS on %s, no journal\n", sb->s_id);
+		ext4_msg(sb, KERN_INFO, "no journal");
 	}
 	return res;
 }
@@ -1616,10 +1671,10 @@
 {
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
 	struct ext4_group_desc *gdp = NULL;
-	struct buffer_head *bh;
 	ext4_group_t flex_group_count;
 	ext4_group_t flex_group;
 	int groups_per_flex = 0;
+	size_t size;
 	int i;
 
 	if (!sbi->s_es->s_log_groups_per_flex) {
@@ -1634,16 +1689,21 @@
 	flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) +
 			((le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) + 1) <<
 			      EXT4_DESC_PER_BLOCK_BITS(sb))) / groups_per_flex;
-	sbi->s_flex_groups = kzalloc(flex_group_count *
-				     sizeof(struct flex_groups), GFP_KERNEL);
+	size = flex_group_count * sizeof(struct flex_groups);
+	sbi->s_flex_groups = kzalloc(size, GFP_KERNEL);
 	if (sbi->s_flex_groups == NULL) {
-		printk(KERN_ERR "EXT4-fs: not enough memory for "
-				"%u flex groups\n", flex_group_count);
+		sbi->s_flex_groups = vmalloc(size);
+		if (sbi->s_flex_groups)
+			memset(sbi->s_flex_groups, 0, size);
+	}
+	if (sbi->s_flex_groups == NULL) {
+		ext4_msg(sb, KERN_ERR, "not enough memory for "
+				"%u flex groups", flex_group_count);
 		goto failed;
 	}
 
 	for (i = 0; i < sbi->s_groups_count; i++) {
-		gdp = ext4_get_group_desc(sb, i, &bh);
+		gdp = ext4_get_group_desc(sb, i, NULL);
 
 		flex_group = ext4_flex_group(sbi, i);
 		atomic_set(&sbi->s_flex_groups[flex_group].free_inodes,
@@ -1724,44 +1784,44 @@
 
 		block_bitmap = ext4_block_bitmap(sb, gdp);
 		if (block_bitmap < first_block || block_bitmap > last_block) {
-			printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
+			ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
 			       "Block bitmap for group %u not in group "
-			       "(block %llu)!\n", i, block_bitmap);
+			       "(block %llu)!", i, block_bitmap);
 			return 0;
 		}
 		inode_bitmap = ext4_inode_bitmap(sb, gdp);
 		if (inode_bitmap < first_block || inode_bitmap > last_block) {
-			printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
+			ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
 			       "Inode bitmap for group %u not in group "
-			       "(block %llu)!\n", i, inode_bitmap);
+			       "(block %llu)!", i, inode_bitmap);
 			return 0;
 		}
 		inode_table = ext4_inode_table(sb, gdp);
 		if (inode_table < first_block ||
 		    inode_table + sbi->s_itb_per_group - 1 > last_block) {
-			printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
+			ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
 			       "Inode table for group %u not in group "
-			       "(block %llu)!\n", i, inode_table);
+			       "(block %llu)!", i, inode_table);
 			return 0;
 		}
-		spin_lock(sb_bgl_lock(sbi, i));
+		ext4_lock_group(sb, i);
 		if (!ext4_group_desc_csum_verify(sbi, i, gdp)) {
-			printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
-			       "Checksum for group %u failed (%u!=%u)\n",
-			       i, le16_to_cpu(ext4_group_desc_csum(sbi, i,
-			       gdp)), le16_to_cpu(gdp->bg_checksum));
+			ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
+				 "Checksum for group %u failed (%u!=%u)",
+				 i, le16_to_cpu(ext4_group_desc_csum(sbi, i,
+				     gdp)), le16_to_cpu(gdp->bg_checksum));
 			if (!(sb->s_flags & MS_RDONLY)) {
-				spin_unlock(sb_bgl_lock(sbi, i));
+				ext4_unlock_group(sb, i);
 				return 0;
 			}
 		}
-		spin_unlock(sb_bgl_lock(sbi, i));
+		ext4_unlock_group(sb, i);
 		if (!flexbg_flag)
 			first_block += EXT4_BLOCKS_PER_GROUP(sb);
 	}
 
 	ext4_free_blocks_count_set(sbi->s_es, ext4_count_free_blocks(sb));
-	sbi->s_es->s_free_inodes_count = cpu_to_le32(ext4_count_free_inodes(sb));
+	sbi->s_es->s_free_inodes_count =cpu_to_le32(ext4_count_free_inodes(sb));
 	return 1;
 }
 
@@ -1796,8 +1856,8 @@
 	}
 
 	if (bdev_read_only(sb->s_bdev)) {
-		printk(KERN_ERR "EXT4-fs: write access "
-			"unavailable, skipping orphan cleanup.\n");
+		ext4_msg(sb, KERN_ERR, "write access "
+			"unavailable, skipping orphan cleanup");
 		return;
 	}
 
@@ -1811,8 +1871,7 @@
 	}
 
 	if (s_flags & MS_RDONLY) {
-		printk(KERN_INFO "EXT4-fs: %s: orphan cleanup on readonly fs\n",
-		       sb->s_id);
+		ext4_msg(sb, KERN_INFO, "orphan cleanup on readonly fs");
 		sb->s_flags &= ~MS_RDONLY;
 	}
 #ifdef CONFIG_QUOTA
@@ -1823,9 +1882,9 @@
 		if (EXT4_SB(sb)->s_qf_names[i]) {
 			int ret = ext4_quota_on_mount(sb, i);
 			if (ret < 0)
-				printk(KERN_ERR
-					"EXT4-fs: Cannot turn on journaled "
-					"quota: error %d\n", ret);
+				ext4_msg(sb, KERN_ERR,
+					"Cannot turn on journaled "
+					"quota: error %d", ret);
 		}
 	}
 #endif
@@ -1842,16 +1901,16 @@
 		list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan);
 		vfs_dq_init(inode);
 		if (inode->i_nlink) {
-			printk(KERN_DEBUG
-				"%s: truncating inode %lu to %lld bytes\n",
+			ext4_msg(sb, KERN_DEBUG,
+				"%s: truncating inode %lu to %lld bytes",
 				__func__, inode->i_ino, inode->i_size);
 			jbd_debug(2, "truncating inode %lu to %lld bytes\n",
 				  inode->i_ino, inode->i_size);
 			ext4_truncate(inode);
 			nr_truncates++;
 		} else {
-			printk(KERN_DEBUG
-				"%s: deleting unreferenced inode %lu\n",
+			ext4_msg(sb, KERN_DEBUG,
+				"%s: deleting unreferenced inode %lu",
 				__func__, inode->i_ino);
 			jbd_debug(2, "deleting unreferenced inode %lu\n",
 				  inode->i_ino);
@@ -1863,11 +1922,11 @@
 #define PLURAL(x) (x), ((x) == 1) ? "" : "s"
 
 	if (nr_orphans)
-		printk(KERN_INFO "EXT4-fs: %s: %d orphan inode%s deleted\n",
-		       sb->s_id, PLURAL(nr_orphans));
+		ext4_msg(sb, KERN_INFO, "%d orphan inode%s deleted",
+		       PLURAL(nr_orphans));
 	if (nr_truncates)
-		printk(KERN_INFO "EXT4-fs: %s: %d truncate%s cleaned up\n",
-		       sb->s_id, PLURAL(nr_truncates));
+		ext4_msg(sb, KERN_INFO, "%d truncate%s cleaned up",
+		       PLURAL(nr_truncates));
 #ifdef CONFIG_QUOTA
 	/* Turn quotas off */
 	for (i = 0; i < MAXQUOTAS; i++) {
@@ -1877,6 +1936,7 @@
 #endif
 	sb->s_flags = s_flags; /* Restore MS_RDONLY status */
 }
+
 /*
  * Maximal extent format file size.
  * Resulting logical blkno at s_maxbytes must fit in our on-disk
@@ -1927,19 +1987,19 @@
 	loff_t res = EXT4_NDIR_BLOCKS;
 	int meta_blocks;
 	loff_t upper_limit;
-	/* This is calculated to be the largest file size for a
-	 * dense, bitmapped file such that the total number of
-	 * sectors in the file, including data and all indirect blocks,
-	 * does not exceed 2^48 -1
-	 * __u32 i_blocks_lo and _u16 i_blocks_high representing the
-	 * total number of  512 bytes blocks of the file
+	/* This is calculated to be the largest file size for a dense, block
+	 * mapped file such that the file's total number of 512-byte sectors,
+	 * including data and all indirect blocks, does not exceed (2^48 - 1).
+	 *
+	 * __u32 i_blocks_lo and _u16 i_blocks_high represent the total
+	 * number of 512-byte sectors of the file.
 	 */
 
 	if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) {
 		/*
-		 * !has_huge_files or CONFIG_LBD is not enabled
-		 * implies the inode i_block represent total blocks in
-		 * 512 bytes 32 == size of vfs inode i_blocks * 8
+		 * !has_huge_files or CONFIG_LBD not enabled implies that
+		 * the inode i_block field represents total file blocks in
+		 * 2^32 512-byte sectors == size of vfs inode i_blocks * 8
 		 */
 		upper_limit = (1LL << 32) - 1;
 
@@ -1981,7 +2041,7 @@
 }
 
 static ext4_fsblk_t descriptor_loc(struct super_block *sb,
-				ext4_fsblk_t logical_sb_block, int nr)
+				   ext4_fsblk_t logical_sb_block, int nr)
 {
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
 	ext4_group_t bg, first_meta_bg;
@@ -1995,6 +2055,7 @@
 	bg = sbi->s_desc_per_block * nr;
 	if (ext4_bg_has_super(sb, bg))
 		has_super = 1;
+
 	return (has_super + ext4_group_first_block_no(sb, bg));
 }
 
@@ -2091,8 +2152,7 @@
 	if (parse_strtoul(buf, 0x40000000, &t))
 		return -EINVAL;
 
-	/* inode_readahead_blks must be a power of 2 */
-	if (t & (t-1))
+	if (!is_power_of_2(t))
 		return -EINVAL;
 
 	sbi->s_inode_readahead_blks = t;
@@ -2100,7 +2160,7 @@
 }
 
 static ssize_t sbi_ui_show(struct ext4_attr *a,
-				struct ext4_sb_info *sbi, char *buf)
+			   struct ext4_sb_info *sbi, char *buf)
 {
 	unsigned int *ui = (unsigned int *) (((char *) sbi) + a->offset);
 
@@ -2205,7 +2265,6 @@
 static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 				__releases(kernel_lock)
 				__acquires(kernel_lock)
-
 {
 	struct buffer_head *bh;
 	struct ext4_super_block *es = NULL;
@@ -2256,7 +2315,7 @@
 
 	blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE);
 	if (!blocksize) {
-		printk(KERN_ERR "EXT4-fs: unable to set blocksize\n");
+		ext4_msg(sb, KERN_ERR, "unable to set blocksize");
 		goto out_fail;
 	}
 
@@ -2272,7 +2331,7 @@
 	}
 
 	if (!(bh = sb_bread(sb, logical_sb_block))) {
-		printk(KERN_ERR "EXT4-fs: unable to read superblock\n");
+		ext4_msg(sb, KERN_ERR, "unable to read superblock");
 		goto out_fail;
 	}
 	/*
@@ -2321,6 +2380,7 @@
 	sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ;
 	sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME;
 	sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME;
+	sbi->s_mb_history_max = default_mb_history_length;
 
 	set_opt(sbi->s_mount_opt, BARRIER);
 
@@ -2330,7 +2390,6 @@
 	 */
 	set_opt(sbi->s_mount_opt, DELALLOC);
 
-
 	if (!parse_options((char *) data, sb, &journal_devnum,
 			   &journal_ioprio, NULL, 0))
 		goto failed_mount;
@@ -2342,9 +2401,9 @@
 	    (EXT4_HAS_COMPAT_FEATURE(sb, ~0U) ||
 	     EXT4_HAS_RO_COMPAT_FEATURE(sb, ~0U) ||
 	     EXT4_HAS_INCOMPAT_FEATURE(sb, ~0U)))
-		printk(KERN_WARNING
-		       "EXT4-fs warning: feature flags set on rev 0 fs, "
-		       "running e2fsck is recommended\n");
+		ext4_msg(sb, KERN_WARNING,
+		       "feature flags set on rev 0 fs, "
+		       "running e2fsck is recommended");
 
 	/*
 	 * Check feature flags regardless of the revision level, since we
@@ -2353,16 +2412,18 @@
 	 */
 	features = EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT4_FEATURE_INCOMPAT_SUPP);
 	if (features) {
-		printk(KERN_ERR "EXT4-fs: %s: couldn't mount because of "
-		       "unsupported optional features (%x).\n", sb->s_id,
+		ext4_msg(sb, KERN_ERR,
+			"Couldn't mount because of "
+			"unsupported optional features (%x)",
 			(le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_incompat) &
 			~EXT4_FEATURE_INCOMPAT_SUPP));
 		goto failed_mount;
 	}
 	features = EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT4_FEATURE_RO_COMPAT_SUPP);
 	if (!(sb->s_flags & MS_RDONLY) && features) {
-		printk(KERN_ERR "EXT4-fs: %s: couldn't mount RDWR because of "
-		       "unsupported optional features (%x).\n", sb->s_id,
+		ext4_msg(sb, KERN_ERR,
+			"Couldn't mount RDWR because of "
+			"unsupported optional features (%x)",
 			(le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_ro_compat) &
 			~EXT4_FEATURE_RO_COMPAT_SUPP));
 		goto failed_mount;
@@ -2376,9 +2437,9 @@
 		 */
 		if (sizeof(root->i_blocks) < sizeof(u64) &&
 				!(sb->s_flags & MS_RDONLY)) {
-			printk(KERN_ERR "EXT4-fs: %s: Filesystem with huge "
+			ext4_msg(sb, KERN_ERR, "Filesystem with huge "
 					"files cannot be mounted read-write "
-					"without CONFIG_LBD.\n", sb->s_id);
+					"without CONFIG_LBD");
 			goto failed_mount;
 		}
 	}
@@ -2386,17 +2447,15 @@
 
 	if (blocksize < EXT4_MIN_BLOCK_SIZE ||
 	    blocksize > EXT4_MAX_BLOCK_SIZE) {
-		printk(KERN_ERR
-		       "EXT4-fs: Unsupported filesystem blocksize %d on %s.\n",
-		       blocksize, sb->s_id);
+		ext4_msg(sb, KERN_ERR,
+		       "Unsupported filesystem blocksize %d", blocksize);
 		goto failed_mount;
 	}
 
 	if (sb->s_blocksize != blocksize) {
-
 		/* Validate the filesystem blocksize */
 		if (!sb_set_blocksize(sb, blocksize)) {
-			printk(KERN_ERR "EXT4-fs: bad block size %d.\n",
+			ext4_msg(sb, KERN_ERR, "bad block size %d",
 					blocksize);
 			goto failed_mount;
 		}
@@ -2406,15 +2465,15 @@
 		offset = do_div(logical_sb_block, blocksize);
 		bh = sb_bread(sb, logical_sb_block);
 		if (!bh) {
-			printk(KERN_ERR
-			       "EXT4-fs: Can't read superblock on 2nd try.\n");
+			ext4_msg(sb, KERN_ERR,
+			       "Can't read superblock on 2nd try");
 			goto failed_mount;
 		}
 		es = (struct ext4_super_block *)(((char *)bh->b_data) + offset);
 		sbi->s_es = es;
 		if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) {
-			printk(KERN_ERR
-			       "EXT4-fs: Magic mismatch, very weird !\n");
+			ext4_msg(sb, KERN_ERR,
+			       "Magic mismatch, very weird!");
 			goto failed_mount;
 		}
 	}
@@ -2432,30 +2491,33 @@
 		if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) ||
 		    (!is_power_of_2(sbi->s_inode_size)) ||
 		    (sbi->s_inode_size > blocksize)) {
-			printk(KERN_ERR
-			       "EXT4-fs: unsupported inode size: %d\n",
+			ext4_msg(sb, KERN_ERR,
+			       "unsupported inode size: %d",
 			       sbi->s_inode_size);
 			goto failed_mount;
 		}
 		if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE)
 			sb->s_time_gran = 1 << (EXT4_EPOCH_BITS - 2);
 	}
+
 	sbi->s_desc_size = le16_to_cpu(es->s_desc_size);
 	if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT)) {
 		if (sbi->s_desc_size < EXT4_MIN_DESC_SIZE_64BIT ||
 		    sbi->s_desc_size > EXT4_MAX_DESC_SIZE ||
 		    !is_power_of_2(sbi->s_desc_size)) {
-			printk(KERN_ERR
-			       "EXT4-fs: unsupported descriptor size %lu\n",
+			ext4_msg(sb, KERN_ERR,
+			       "unsupported descriptor size %lu",
 			       sbi->s_desc_size);
 			goto failed_mount;
 		}
 	} else
 		sbi->s_desc_size = EXT4_MIN_DESC_SIZE;
+
 	sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group);
 	sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group);
 	if (EXT4_INODE_SIZE(sb) == 0 || EXT4_INODES_PER_GROUP(sb) == 0)
 		goto cantfind_ext4;
+
 	sbi->s_inodes_per_block = blocksize / EXT4_INODE_SIZE(sb);
 	if (sbi->s_inodes_per_block == 0)
 		goto cantfind_ext4;
@@ -2466,6 +2528,7 @@
 	sbi->s_mount_state = le16_to_cpu(es->s_state);
 	sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb));
 	sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb));
+
 	for (i = 0; i < 4; i++)
 		sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]);
 	sbi->s_def_hash_version = es->s_def_hash_version;
@@ -2483,25 +2546,24 @@
 	}
 
 	if (sbi->s_blocks_per_group > blocksize * 8) {
-		printk(KERN_ERR
-		       "EXT4-fs: #blocks per group too big: %lu\n",
+		ext4_msg(sb, KERN_ERR,
+		       "#blocks per group too big: %lu",
 		       sbi->s_blocks_per_group);
 		goto failed_mount;
 	}
 	if (sbi->s_inodes_per_group > blocksize * 8) {
-		printk(KERN_ERR
-		       "EXT4-fs: #inodes per group too big: %lu\n",
+		ext4_msg(sb, KERN_ERR,
+		       "#inodes per group too big: %lu",
 		       sbi->s_inodes_per_group);
 		goto failed_mount;
 	}
 
 	if (ext4_blocks_count(es) >
 		    (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) {
-		printk(KERN_ERR "EXT4-fs: filesystem on %s:"
-			" too large to mount safely\n", sb->s_id);
+		ext4_msg(sb, KERN_ERR, "filesystem"
+			" too large to mount safely");
 		if (sizeof(sector_t) < 8)
-			printk(KERN_WARNING "EXT4-fs: CONFIG_LBD not "
-					"enabled\n");
+			ext4_msg(sb, KERN_WARNING, "CONFIG_LBD not enabled");
 		goto failed_mount;
 	}
 
@@ -2511,21 +2573,21 @@
 	/* check blocks count against device size */
 	blocks_count = sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits;
 	if (blocks_count && ext4_blocks_count(es) > blocks_count) {
-		printk(KERN_WARNING "EXT4-fs: bad geometry: block count %llu "
-		       "exceeds size of device (%llu blocks)\n",
+		ext4_msg(sb, KERN_WARNING, "bad geometry: block count %llu "
+		       "exceeds size of device (%llu blocks)",
 		       ext4_blocks_count(es), blocks_count);
 		goto failed_mount;
 	}
 
-        /*
-         * It makes no sense for the first data block to be beyond the end
-         * of the filesystem.
-         */
-        if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) {
-                printk(KERN_WARNING "EXT4-fs: bad geometry: first data"
-		       "block %u is beyond end of filesystem (%llu)\n",
-		       le32_to_cpu(es->s_first_data_block),
-		       ext4_blocks_count(es));
+	/*
+	 * It makes no sense for the first data block to be beyond the end
+	 * of the filesystem.
+	 */
+	if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) {
+                ext4_msg(sb, KERN_WARNING, "bad geometry: first data"
+			 "block %u is beyond end of filesystem (%llu)",
+			 le32_to_cpu(es->s_first_data_block),
+			 ext4_blocks_count(es));
 		goto failed_mount;
 	}
 	blocks_count = (ext4_blocks_count(es) -
@@ -2533,9 +2595,9 @@
 			EXT4_BLOCKS_PER_GROUP(sb) - 1);
 	do_div(blocks_count, EXT4_BLOCKS_PER_GROUP(sb));
 	if (blocks_count > ((uint64_t)1<<32) - EXT4_DESC_PER_BLOCK(sb)) {
-		printk(KERN_WARNING "EXT4-fs: groups count too large: %u "
+		ext4_msg(sb, KERN_WARNING, "groups count too large: %u "
 		       "(block count %llu, first data block %u, "
-		       "blocks per group %lu)\n", sbi->s_groups_count,
+		       "blocks per group %lu)", sbi->s_groups_count,
 		       ext4_blocks_count(es),
 		       le32_to_cpu(es->s_first_data_block),
 		       EXT4_BLOCKS_PER_GROUP(sb));
@@ -2547,7 +2609,7 @@
 	sbi->s_group_desc = kmalloc(db_count * sizeof(struct buffer_head *),
 				    GFP_KERNEL);
 	if (sbi->s_group_desc == NULL) {
-		printk(KERN_ERR "EXT4-fs: not enough memory\n");
+		ext4_msg(sb, KERN_ERR, "not enough memory");
 		goto failed_mount;
 	}
 
@@ -2562,21 +2624,21 @@
 		block = descriptor_loc(sb, logical_sb_block, i);
 		sbi->s_group_desc[i] = sb_bread(sb, block);
 		if (!sbi->s_group_desc[i]) {
-			printk(KERN_ERR "EXT4-fs: "
-			       "can't read group descriptor %d\n", i);
+			ext4_msg(sb, KERN_ERR,
+			       "can't read group descriptor %d", i);
 			db_count = i;
 			goto failed_mount2;
 		}
 	}
 	if (!ext4_check_descriptors(sb)) {
-		printk(KERN_ERR "EXT4-fs: group descriptors corrupted!\n");
+		ext4_msg(sb, KERN_ERR, "group descriptors corrupted!");
 		goto failed_mount2;
 	}
 	if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
 		if (!ext4_fill_flex_info(sb)) {
-			printk(KERN_ERR
-			       "EXT4-fs: unable to initialize "
-			       "flex_bg meta info!\n");
+			ext4_msg(sb, KERN_ERR,
+			       "unable to initialize "
+			       "flex_bg meta info!");
 			goto failed_mount2;
 		}
 
@@ -2598,7 +2660,7 @@
 		err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0);
 	}
 	if (err) {
-		printk(KERN_ERR "EXT4-fs: insufficient memory\n");
+		ext4_msg(sb, KERN_ERR, "insufficient memory");
 		goto failed_mount3;
 	}
 
@@ -2607,7 +2669,11 @@
 	/*
 	 * set up enough so that it can read an inode
 	 */
-	sb->s_op = &ext4_sops;
+	if (!test_opt(sb, NOLOAD) &&
+	    EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL))
+		sb->s_op = &ext4_sops;
+	else
+		sb->s_op = &ext4_nojournal_sops;
 	sb->s_export_op = &ext4_export_ops;
 	sb->s_xattr = ext4_xattr_handlers;
 #ifdef CONFIG_QUOTA
@@ -2615,6 +2681,8 @@
 	sb->dq_op = &ext4_quota_operations;
 #endif
 	INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
+	mutex_init(&sbi->s_orphan_lock);
+	mutex_init(&sbi->s_resize_lock);
 
 	sb->s_root = NULL;
 
@@ -2632,13 +2700,13 @@
 			goto failed_mount3;
 		if (!(sb->s_flags & MS_RDONLY) &&
 		    EXT4_SB(sb)->s_journal->j_failed_commit) {
-			printk(KERN_CRIT "EXT4-fs error (device %s): "
+			ext4_msg(sb, KERN_CRIT, "error: "
 			       "ext4_fill_super: Journal transaction "
-			       "%u is corrupt\n", sb->s_id,
+			       "%u is corrupt",
 			       EXT4_SB(sb)->s_journal->j_failed_commit);
 			if (test_opt(sb, ERRORS_RO)) {
-				printk(KERN_CRIT
-				       "Mounting filesystem read-only\n");
+				ext4_msg(sb, KERN_CRIT,
+				       "Mounting filesystem read-only");
 				sb->s_flags |= MS_RDONLY;
 				EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
 				es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
@@ -2646,14 +2714,14 @@
 			if (test_opt(sb, ERRORS_PANIC)) {
 				EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
 				es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
-				ext4_commit_super(sb, es, 1);
+				ext4_commit_super(sb, 1);
 				goto failed_mount4;
 			}
 		}
 	} else if (test_opt(sb, NOLOAD) && !(sb->s_flags & MS_RDONLY) &&
 	      EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) {
-		printk(KERN_ERR "EXT4-fs: required journal recovery "
-		       "suppressed and not mounted read-only\n");
+		ext4_msg(sb, KERN_ERR, "required journal recovery "
+		       "suppressed and not mounted read-only");
 		goto failed_mount4;
 	} else {
 		clear_opt(sbi->s_mount_opt, DATA_FLAGS);
@@ -2666,7 +2734,7 @@
 	if (ext4_blocks_count(es) > 0xffffffffULL &&
 	    !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0,
 				       JBD2_FEATURE_INCOMPAT_64BIT)) {
-		printk(KERN_ERR "EXT4-fs: Failed to set 64-bit journal feature\n");
+		ext4_msg(sb, KERN_ERR, "Failed to set 64-bit journal feature");
 		goto failed_mount4;
 	}
 
@@ -2704,8 +2772,8 @@
 	case EXT4_MOUNT_WRITEBACK_DATA:
 		if (!jbd2_journal_check_available_features
 		    (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) {
-			printk(KERN_ERR "EXT4-fs: Journal does not support "
-			       "requested data journaling mode\n");
+			ext4_msg(sb, KERN_ERR, "Journal does not support "
+			       "requested data journaling mode");
 			goto failed_mount4;
 		}
 	default:
@@ -2717,8 +2785,8 @@
 
 	if (test_opt(sb, NOBH)) {
 		if (!(test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)) {
-			printk(KERN_WARNING "EXT4-fs: Ignoring nobh option - "
-				"its supported only with writeback mode\n");
+			ext4_msg(sb, KERN_WARNING, "Ignoring nobh option - "
+				"its supported only with writeback mode");
 			clear_opt(sbi->s_mount_opt, NOBH);
 		}
 	}
@@ -2729,18 +2797,18 @@
 
 	root = ext4_iget(sb, EXT4_ROOT_INO);
 	if (IS_ERR(root)) {
-		printk(KERN_ERR "EXT4-fs: get root inode failed\n");
+		ext4_msg(sb, KERN_ERR, "get root inode failed");
 		ret = PTR_ERR(root);
 		goto failed_mount4;
 	}
 	if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
 		iput(root);
-		printk(KERN_ERR "EXT4-fs: corrupt root inode, run e2fsck\n");
+		ext4_msg(sb, KERN_ERR, "corrupt root inode, run e2fsck");
 		goto failed_mount4;
 	}
 	sb->s_root = d_alloc_root(root);
 	if (!sb->s_root) {
-		printk(KERN_ERR "EXT4-fs: get root dentry failed\n");
+		ext4_msg(sb, KERN_ERR, "get root dentry failed");
 		iput(root);
 		ret = -ENOMEM;
 		goto failed_mount4;
@@ -2769,22 +2837,29 @@
 							sbi->s_inode_size) {
 		sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
 						       EXT4_GOOD_OLD_INODE_SIZE;
-		printk(KERN_INFO "EXT4-fs: required extra inode space not"
-			"available.\n");
+		ext4_msg(sb, KERN_INFO, "required extra inode space not"
+			 "available");
 	}
 
 	if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
-		printk(KERN_WARNING "EXT4-fs: Ignoring delalloc option - "
-				"requested data journaling mode\n");
+		ext4_msg(sb, KERN_WARNING, "Ignoring delalloc option - "
+			 "requested data journaling mode");
 		clear_opt(sbi->s_mount_opt, DELALLOC);
 	} else if (test_opt(sb, DELALLOC))
-		printk(KERN_INFO "EXT4-fs: delayed allocation enabled\n");
+		ext4_msg(sb, KERN_INFO, "delayed allocation enabled");
+
+	err = ext4_setup_system_zone(sb);
+	if (err) {
+		ext4_msg(sb, KERN_ERR, "failed to initialize system "
+			 "zone (%d)\n", err);
+		goto failed_mount4;
+	}
 
 	ext4_ext_init(sb);
 	err = ext4_mb_init(sb, needs_recovery);
 	if (err) {
-		printk(KERN_ERR "EXT4-fs: failed to initalize mballoc (%d)\n",
-		       err);
+		ext4_msg(sb, KERN_ERR, "failed to initalize mballoc (%d)",
+			 err);
 		goto failed_mount4;
 	}
 
@@ -2798,19 +2873,11 @@
 		goto failed_mount4;
 	};
 
-	/*
-	 * akpm: core read_super() calls in here with the superblock locked.
-	 * That deadlocks, because orphan cleanup needs to lock the superblock
-	 * in numerous places.  Here we just pop the lock - it's relatively
-	 * harmless, because we are now ready to accept write_super() requests,
-	 * and aviro says that's the only reason for hanging onto the
-	 * superblock lock.
-	 */
 	EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS;
 	ext4_orphan_cleanup(sb, es);
 	EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS;
 	if (needs_recovery) {
-		printk(KERN_INFO "EXT4-fs: recovery complete.\n");
+		ext4_msg(sb, KERN_INFO, "recovery complete");
 		ext4_mark_recovery_complete(sb, es);
 	}
 	if (EXT4_SB(sb)->s_journal) {
@@ -2823,25 +2890,30 @@
 	} else
 		descr = "out journal";
 
-	printk(KERN_INFO "EXT4-fs: mounted filesystem %s with%s\n",
-	       sb->s_id, descr);
+	ext4_msg(sb, KERN_INFO, "mounted filesystem with%s", descr);
 
 	lock_kernel();
 	return 0;
 
 cantfind_ext4:
 	if (!silent)
-		printk(KERN_ERR "VFS: Can't find ext4 filesystem on dev %s.\n",
-		       sb->s_id);
+		ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem");
 	goto failed_mount;
 
 failed_mount4:
-	printk(KERN_ERR "EXT4-fs (device %s): mount failed\n", sb->s_id);
+	ext4_msg(sb, KERN_ERR, "mount failed");
+	ext4_release_system_zone(sb);
 	if (sbi->s_journal) {
 		jbd2_journal_destroy(sbi->s_journal);
 		sbi->s_journal = NULL;
 	}
 failed_mount3:
+	if (sbi->s_flex_groups) {
+		if (is_vmalloc_addr(sbi->s_flex_groups))
+			vfree(sbi->s_flex_groups);
+		else
+			kfree(sbi->s_flex_groups);
+	}
 	percpu_counter_destroy(&sbi->s_freeblocks_counter);
 	percpu_counter_destroy(&sbi->s_freeinodes_counter);
 	percpu_counter_destroy(&sbi->s_dirs_counter);
@@ -2862,6 +2934,7 @@
 	brelse(bh);
 out_fail:
 	sb->s_fs_info = NULL;
+	kfree(sbi->s_blockgroup_lock);
 	kfree(sbi);
 	lock_kernel();
 	return ret;
@@ -2906,27 +2979,27 @@
 
 	journal_inode = ext4_iget(sb, journal_inum);
 	if (IS_ERR(journal_inode)) {
-		printk(KERN_ERR "EXT4-fs: no journal found.\n");
+		ext4_msg(sb, KERN_ERR, "no journal found");
 		return NULL;
 	}
 	if (!journal_inode->i_nlink) {
 		make_bad_inode(journal_inode);
 		iput(journal_inode);
-		printk(KERN_ERR "EXT4-fs: journal inode is deleted.\n");
+		ext4_msg(sb, KERN_ERR, "journal inode is deleted");
 		return NULL;
 	}
 
 	jbd_debug(2, "Journal inode found at %p: %lld bytes\n",
 		  journal_inode, journal_inode->i_size);
 	if (!S_ISREG(journal_inode->i_mode)) {
-		printk(KERN_ERR "EXT4-fs: invalid journal inode.\n");
+		ext4_msg(sb, KERN_ERR, "invalid journal inode");
 		iput(journal_inode);
 		return NULL;
 	}
 
 	journal = jbd2_journal_init_inode(journal_inode);
 	if (!journal) {
-		printk(KERN_ERR "EXT4-fs: Could not load journal inode\n");
+		ext4_msg(sb, KERN_ERR, "Could not load journal inode");
 		iput(journal_inode);
 		return NULL;
 	}
@@ -2950,13 +3023,13 @@
 
 	BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
 
-	bdev = ext4_blkdev_get(j_dev);
+	bdev = ext4_blkdev_get(j_dev, sb);
 	if (bdev == NULL)
 		return NULL;
 
 	if (bd_claim(bdev, sb)) {
-		printk(KERN_ERR
-			"EXT4-fs: failed to claim external journal device.\n");
+		ext4_msg(sb, KERN_ERR,
+			"failed to claim external journal device");
 		blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
 		return NULL;
 	}
@@ -2964,8 +3037,8 @@
 	blocksize = sb->s_blocksize;
 	hblock = bdev_hardsect_size(bdev);
 	if (blocksize < hblock) {
-		printk(KERN_ERR
-			"EXT4-fs: blocksize too small for journal device.\n");
+		ext4_msg(sb, KERN_ERR,
+			"blocksize too small for journal device");
 		goto out_bdev;
 	}
 
@@ -2973,8 +3046,8 @@
 	offset = EXT4_MIN_BLOCK_SIZE % blocksize;
 	set_blocksize(bdev, blocksize);
 	if (!(bh = __bread(bdev, sb_block, blocksize))) {
-		printk(KERN_ERR "EXT4-fs: couldn't read superblock of "
-		       "external journal\n");
+		ext4_msg(sb, KERN_ERR, "couldn't read superblock of "
+		       "external journal");
 		goto out_bdev;
 	}
 
@@ -2982,14 +3055,14 @@
 	if ((le16_to_cpu(es->s_magic) != EXT4_SUPER_MAGIC) ||
 	    !(le32_to_cpu(es->s_feature_incompat) &
 	      EXT4_FEATURE_INCOMPAT_JOURNAL_DEV)) {
-		printk(KERN_ERR "EXT4-fs: external journal has "
-					"bad superblock\n");
+		ext4_msg(sb, KERN_ERR, "external journal has "
+					"bad superblock");
 		brelse(bh);
 		goto out_bdev;
 	}
 
 	if (memcmp(EXT4_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) {
-		printk(KERN_ERR "EXT4-fs: journal UUID does not match\n");
+		ext4_msg(sb, KERN_ERR, "journal UUID does not match");
 		brelse(bh);
 		goto out_bdev;
 	}
@@ -3001,25 +3074,26 @@
 	journal = jbd2_journal_init_dev(bdev, sb->s_bdev,
 					start, len, blocksize);
 	if (!journal) {
-		printk(KERN_ERR "EXT4-fs: failed to create device journal\n");
+		ext4_msg(sb, KERN_ERR, "failed to create device journal");
 		goto out_bdev;
 	}
 	journal->j_private = sb;
 	ll_rw_block(READ, 1, &journal->j_sb_buffer);
 	wait_on_buffer(journal->j_sb_buffer);
 	if (!buffer_uptodate(journal->j_sb_buffer)) {
-		printk(KERN_ERR "EXT4-fs: I/O error on journal device\n");
+		ext4_msg(sb, KERN_ERR, "I/O error on journal device");
 		goto out_journal;
 	}
 	if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) {
-		printk(KERN_ERR "EXT4-fs: External journal has more than one "
-					"user (unsupported) - %d\n",
+		ext4_msg(sb, KERN_ERR, "External journal has more than one "
+					"user (unsupported) - %d",
 			be32_to_cpu(journal->j_superblock->s_nr_users));
 		goto out_journal;
 	}
 	EXT4_SB(sb)->journal_bdev = bdev;
 	ext4_init_journal_params(sb, journal);
 	return journal;
+
 out_journal:
 	jbd2_journal_destroy(journal);
 out_bdev:
@@ -3041,8 +3115,8 @@
 
 	if (journal_devnum &&
 	    journal_devnum != le32_to_cpu(es->s_journal_dev)) {
-		printk(KERN_INFO "EXT4-fs: external journal device major/minor "
-			"numbers have changed\n");
+		ext4_msg(sb, KERN_INFO, "external journal device major/minor "
+			"numbers have changed");
 		journal_dev = new_decode_dev(journal_devnum);
 	} else
 		journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev));
@@ -3054,24 +3128,23 @@
 	 * crash?  For recovery, we need to check in advance whether we
 	 * can get read-write access to the device.
 	 */
-
 	if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) {
 		if (sb->s_flags & MS_RDONLY) {
-			printk(KERN_INFO "EXT4-fs: INFO: recovery "
-					"required on readonly filesystem.\n");
+			ext4_msg(sb, KERN_INFO, "INFO: recovery "
+					"required on readonly filesystem");
 			if (really_read_only) {
-				printk(KERN_ERR "EXT4-fs: write access "
-					"unavailable, cannot proceed.\n");
+				ext4_msg(sb, KERN_ERR, "write access "
+					"unavailable, cannot proceed");
 				return -EROFS;
 			}
-			printk(KERN_INFO "EXT4-fs: write access will "
-			       "be enabled during recovery.\n");
+			ext4_msg(sb, KERN_INFO, "write access will "
+			       "be enabled during recovery");
 		}
 	}
 
 	if (journal_inum && journal_dev) {
-		printk(KERN_ERR "EXT4-fs: filesystem has both journal "
-		       "and inode journals!\n");
+		ext4_msg(sb, KERN_ERR, "filesystem has both journal "
+		       "and inode journals!");
 		return -EINVAL;
 	}
 
@@ -3084,14 +3157,14 @@
 	}
 
 	if (journal->j_flags & JBD2_BARRIER)
-		printk(KERN_INFO "EXT4-fs: barriers enabled\n");
+		ext4_msg(sb, KERN_INFO, "barriers enabled");
 	else
-		printk(KERN_INFO "EXT4-fs: barriers disabled\n");
+		ext4_msg(sb, KERN_INFO, "barriers disabled");
 
 	if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) {
 		err = jbd2_journal_update_format(journal);
 		if (err)  {
-			printk(KERN_ERR "EXT4-fs: error updating journal.\n");
+			ext4_msg(sb, KERN_ERR, "error updating journal");
 			jbd2_journal_destroy(journal);
 			return err;
 		}
@@ -3103,7 +3176,7 @@
 		err = jbd2_journal_load(journal);
 
 	if (err) {
-		printk(KERN_ERR "EXT4-fs: error loading journal.\n");
+		ext4_msg(sb, KERN_ERR, "error loading journal");
 		jbd2_journal_destroy(journal);
 		return err;
 	}
@@ -3114,18 +3187,17 @@
 	if (journal_devnum &&
 	    journal_devnum != le32_to_cpu(es->s_journal_dev)) {
 		es->s_journal_dev = cpu_to_le32(journal_devnum);
-		sb->s_dirt = 1;
 
 		/* Make sure we flush the recovery flag to disk. */
-		ext4_commit_super(sb, es, 1);
+		ext4_commit_super(sb, 1);
 	}
 
 	return 0;
 }
 
-static int ext4_commit_super(struct super_block *sb,
-			      struct ext4_super_block *es, int sync)
+static int ext4_commit_super(struct super_block *sb, int sync)
 {
+	struct ext4_super_block *es = EXT4_SB(sb)->s_es;
 	struct buffer_head *sbh = EXT4_SB(sb)->s_sbh;
 	int error = 0;
 
@@ -3140,8 +3212,8 @@
 		 * be remapped.  Nothing we can do but to retry the
 		 * write and hope for the best.
 		 */
-		printk(KERN_ERR "EXT4-fs: previous I/O error to "
-		       "superblock detected for %s.\n", sb->s_id);
+		ext4_msg(sb, KERN_ERR, "previous I/O error to "
+		       "superblock detected");
 		clear_buffer_write_io_error(sbh);
 		set_buffer_uptodate(sbh);
 	}
@@ -3154,7 +3226,7 @@
 					&EXT4_SB(sb)->s_freeblocks_counter));
 	es->s_free_inodes_count = cpu_to_le32(percpu_counter_sum_positive(
 					&EXT4_SB(sb)->s_freeinodes_counter));
-
+	sb->s_dirt = 0;
 	BUFFER_TRACE(sbh, "marking dirty");
 	mark_buffer_dirty(sbh);
 	if (sync) {
@@ -3164,8 +3236,8 @@
 
 		error = buffer_write_io_error(sbh);
 		if (error) {
-			printk(KERN_ERR "EXT4-fs: I/O error while writing "
-			       "superblock for %s.\n", sb->s_id);
+			ext4_msg(sb, KERN_ERR, "I/O error while writing "
+			       "superblock");
 			clear_buffer_write_io_error(sbh);
 			set_buffer_uptodate(sbh);
 		}
@@ -3173,7 +3245,6 @@
 	return error;
 }
 
-
 /*
  * Have we just finished recovery?  If so, and if we are mounting (or
  * remounting) the filesystem readonly, then we will end up with a
@@ -3192,14 +3263,11 @@
 	if (jbd2_journal_flush(journal) < 0)
 		goto out;
 
-	lock_super(sb);
 	if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER) &&
 	    sb->s_flags & MS_RDONLY) {
 		EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
-		sb->s_dirt = 0;
-		ext4_commit_super(sb, es, 1);
+		ext4_commit_super(sb, 1);
 	}
-	unlock_super(sb);
 
 out:
 	jbd2_journal_unlock_updates(journal);
@@ -3238,7 +3306,7 @@
 
 		EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
 		es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
-		ext4_commit_super(sb, es, 1);
+		ext4_commit_super(sb, 1);
 
 		jbd2_journal_clear_err(journal);
 	}
@@ -3257,29 +3325,15 @@
 		return 0;
 
 	journal = EXT4_SB(sb)->s_journal;
-	if (journal) {
-		sb->s_dirt = 0;
+	if (journal)
 		ret = ext4_journal_force_commit(journal);
-	}
 
 	return ret;
 }
 
-/*
- * Ext4 always journals updates to the superblock itself, so we don't
- * have to propagate any other updates to the superblock on disk at this
- * point.  (We can probably nuke this function altogether, and remove
- * any mention to sb->s_dirt in all of fs/ext4; eventual cleanup...)
- */
 static void ext4_write_super(struct super_block *sb)
 {
-	if (EXT4_SB(sb)->s_journal) {
-		if (mutex_trylock(&sb->s_lock) != 0)
-			BUG();
-		sb->s_dirt = 0;
-	} else {
-		ext4_commit_super(sb, EXT4_SB(sb)->s_es, 1);
-	}
+	ext4_commit_super(sb, 1);
 }
 
 static int ext4_sync_fs(struct super_block *sb, int wait)
@@ -3288,16 +3342,9 @@
 	tid_t target;
 
 	trace_mark(ext4_sync_fs, "dev %s wait %d", sb->s_id, wait);
-	sb->s_dirt = 0;
-	if (EXT4_SB(sb)->s_journal) {
-		if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal,
-					      &target)) {
-			if (wait)
-				jbd2_log_wait_commit(EXT4_SB(sb)->s_journal,
-						     target);
-		}
-	} else {
-		ext4_commit_super(sb, EXT4_SB(sb)->s_es, wait);
+	if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, &target)) {
+		if (wait)
+			jbd2_log_wait_commit(EXT4_SB(sb)->s_journal, target);
 	}
 	return ret;
 }
@@ -3310,34 +3357,32 @@
 {
 	int error = 0;
 	journal_t *journal;
-	sb->s_dirt = 0;
 
-	if (!(sb->s_flags & MS_RDONLY)) {
-		journal = EXT4_SB(sb)->s_journal;
+	if (sb->s_flags & MS_RDONLY)
+		return 0;
 
-		if (journal) {
-			/* Now we set up the journal barrier. */
-			jbd2_journal_lock_updates(journal);
+	journal = EXT4_SB(sb)->s_journal;
 
-			/*
-			 * We don't want to clear needs_recovery flag when we
-			 * failed to flush the journal.
-			 */
-			error = jbd2_journal_flush(journal);
-			if (error < 0)
-				goto out;
-		}
+	/* Now we set up the journal barrier. */
+	jbd2_journal_lock_updates(journal);
 
-		/* Journal blocked and flushed, clear needs_recovery flag. */
-		EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
-		error = ext4_commit_super(sb, EXT4_SB(sb)->s_es, 1);
-		if (error)
-			goto out;
+	/*
+	 * Don't clear the needs_recovery flag if we failed to flush
+	 * the journal.
+	 */
+	error = jbd2_journal_flush(journal);
+	if (error < 0) {
+	out:
+		jbd2_journal_unlock_updates(journal);
+		return error;
 	}
+
+	/* Journal blocked and flushed, clear needs_recovery flag. */
+	EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
+	error = ext4_commit_super(sb, 1);
+	if (error)
+		goto out;
 	return 0;
-out:
-	jbd2_journal_unlock_updates(journal);
-	return error;
 }
 
 /*
@@ -3346,14 +3391,15 @@
  */
 static int ext4_unfreeze(struct super_block *sb)
 {
-	if (EXT4_SB(sb)->s_journal && !(sb->s_flags & MS_RDONLY)) {
-		lock_super(sb);
-		/* Reser the needs_recovery flag before the fs is unlocked. */
-		EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
-		ext4_commit_super(sb, EXT4_SB(sb)->s_es, 1);
-		unlock_super(sb);
-		jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
-	}
+	if (sb->s_flags & MS_RDONLY)
+		return 0;
+
+	lock_super(sb);
+	/* Reset the needs_recovery flag before the fs is unlocked. */
+	EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
+	ext4_commit_super(sb, 1);
+	unlock_super(sb);
+	jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
 	return 0;
 }
 
@@ -3432,22 +3478,15 @@
 			    (sbi->s_mount_state & EXT4_VALID_FS))
 				es->s_state = cpu_to_le16(sbi->s_mount_state);
 
-			/*
-			 * We have to unlock super so that we can wait for
-			 * transactions.
-			 */
-			if (sbi->s_journal) {
-				unlock_super(sb);
+			if (sbi->s_journal)
 				ext4_mark_recovery_complete(sb, es);
-				lock_super(sb);
-			}
 		} else {
 			int ret;
 			if ((ret = EXT4_HAS_RO_COMPAT_FEATURE(sb,
 					~EXT4_FEATURE_RO_COMPAT_SUPP))) {
-				printk(KERN_WARNING "EXT4-fs: %s: couldn't "
+				ext4_msg(sb, KERN_WARNING, "couldn't "
 				       "remount RDWR because of unsupported "
-				       "optional features (%x).\n", sb->s_id,
+				       "optional features (%x)",
 				(le32_to_cpu(sbi->s_es->s_feature_ro_compat) &
 					~EXT4_FEATURE_RO_COMPAT_SUPP));
 				err = -EROFS;
@@ -3456,17 +3495,15 @@
 
 			/*
 			 * Make sure the group descriptor checksums
-			 * are sane.  If they aren't, refuse to
-			 * remount r/w.
+			 * are sane.  If they aren't, refuse to remount r/w.
 			 */
 			for (g = 0; g < sbi->s_groups_count; g++) {
 				struct ext4_group_desc *gdp =
 					ext4_get_group_desc(sb, g, NULL);
 
 				if (!ext4_group_desc_csum_verify(sbi, g, gdp)) {
-					printk(KERN_ERR
-	       "EXT4-fs: ext4_remount: "
-		"Checksum for group %u failed (%u!=%u)\n",
+					ext4_msg(sb, KERN_ERR,
+	       "ext4_remount: Checksum for group %u failed (%u!=%u)",
 		g, le16_to_cpu(ext4_group_desc_csum(sbi, g, gdp)),
 					       le16_to_cpu(gdp->bg_checksum));
 					err = -EINVAL;
@@ -3480,11 +3517,10 @@
 			 * require a full umount/remount for now.
 			 */
 			if (es->s_last_orphan) {
-				printk(KERN_WARNING "EXT4-fs: %s: couldn't "
+				ext4_msg(sb, KERN_WARNING, "Couldn't "
 				       "remount RDWR because of unprocessed "
 				       "orphan inode list.  Please "
-				       "umount/remount instead.\n",
-				       sb->s_id);
+				       "umount/remount instead");
 				err = -EINVAL;
 				goto restore_opts;
 			}
@@ -3504,8 +3540,9 @@
 				sb->s_flags &= ~MS_RDONLY;
 		}
 	}
+	ext4_setup_system_zone(sb);
 	if (sbi->s_journal == NULL)
-		ext4_commit_super(sb, es, 1);
+		ext4_commit_super(sb, 1);
 
 #ifdef CONFIG_QUOTA
 	/* Release old quota file names */
@@ -3515,6 +3552,7 @@
 			kfree(old_opts.s_qf_names[i]);
 #endif
 	return 0;
+
 restore_opts:
 	sb->s_flags = old_sb_flags;
 	sbi->s_mount_opt = old_opts.s_mount_opt;
@@ -3545,9 +3583,8 @@
 	if (test_opt(sb, MINIX_DF)) {
 		sbi->s_overhead_last = 0;
 	} else if (sbi->s_blocks_last != ext4_blocks_count(es)) {
-		ext4_group_t ngroups = sbi->s_groups_count, i;
+		ext4_group_t i, ngroups = ext4_get_groups_count(sb);
 		ext4_fsblk_t overhead = 0;
-		smp_rmb();
 
 		/*
 		 * Compute the overhead (FS structures).  This is constant
@@ -3599,11 +3636,12 @@
 	       le64_to_cpup((void *)es->s_uuid + sizeof(u64));
 	buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL;
 	buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL;
+
 	return 0;
 }
 
-/* Helper function for writing quotas on sync - we need to start transaction before quota file
- * is locked for write. Otherwise the are possible deadlocks:
+/* Helper function for writing quotas on sync - we need to start transaction
+ * before quota file is locked for write. Otherwise the are possible deadlocks:
  * Process 1                         Process 2
  * ext4_create()                     quota_sync()
  *   jbd2_journal_start()                  write_dquot()
@@ -3627,7 +3665,7 @@
 
 	inode = dquot_to_inode(dquot);
 	handle = ext4_journal_start(inode,
-					EXT4_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
+				    EXT4_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
 	ret = dquot_commit(dquot);
@@ -3643,7 +3681,7 @@
 	handle_t *handle;
 
 	handle = ext4_journal_start(dquot_to_inode(dquot),
-					EXT4_QUOTA_INIT_BLOCKS(dquot->dq_sb));
+				    EXT4_QUOTA_INIT_BLOCKS(dquot->dq_sb));
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
 	ret = dquot_acquire(dquot);
@@ -3659,7 +3697,7 @@
 	handle_t *handle;
 
 	handle = ext4_journal_start(dquot_to_inode(dquot),
-					EXT4_QUOTA_DEL_BLOCKS(dquot->dq_sb));
+				    EXT4_QUOTA_DEL_BLOCKS(dquot->dq_sb));
 	if (IS_ERR(handle)) {
 		/* Release dquot anyway to avoid endless cycle in dqput() */
 		dquot_release(dquot);
@@ -3707,7 +3745,7 @@
 static int ext4_quota_on_mount(struct super_block *sb, int type)
 {
 	return vfs_quota_on_mount(sb, EXT4_SB(sb)->s_qf_names[type],
-			EXT4_SB(sb)->s_jquota_fmt, type);
+				  EXT4_SB(sb)->s_jquota_fmt, type);
 }
 
 /*
@@ -3738,9 +3776,9 @@
 	if (EXT4_SB(sb)->s_qf_names[type]) {
 		/* Quotafile not in fs root? */
 		if (path.dentry->d_parent != sb->s_root)
-			printk(KERN_WARNING
-				"EXT4-fs: Quota file not on filesystem root. "
-				"Journaled quota will not work.\n");
+			ext4_msg(sb, KERN_WARNING,
+				"Quota file not on filesystem root. "
+				"Journaled quota will not work");
 	}
 
 	/*
@@ -3823,8 +3861,8 @@
 	handle_t *handle = journal_current_handle();
 
 	if (EXT4_SB(sb)->s_journal && !handle) {
-		printk(KERN_WARNING "EXT4-fs: Quota write (off=%llu, len=%llu)"
-			" cancelled because transaction is not started.\n",
+		ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)"
+			" cancelled because transaction is not started",
 			(unsigned long long)off, (unsigned long long)len);
 		return -EIO;
 	}
@@ -3878,10 +3916,10 @@
 
 #endif
 
-static int ext4_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static int ext4_get_sb(struct file_system_type *fs_type, int flags,
+		       const char *dev_name, void *data, struct vfsmount *mnt)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super, mnt);
+	return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super,mnt);
 }
 
 static struct file_system_type ext4_fs_type = {
@@ -3893,14 +3931,14 @@
 };
 
 #ifdef CONFIG_EXT4DEV_COMPAT
-static int ext4dev_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static int ext4dev_get_sb(struct file_system_type *fs_type, int flags,
+			  const char *dev_name, void *data,struct vfsmount *mnt)
 {
-	printk(KERN_WARNING "EXT4-fs: Update your userspace programs "
-	       "to mount using ext4\n");
-	printk(KERN_WARNING "EXT4-fs: ext4dev backwards compatibility "
-	       "will go away by 2.6.31\n");
-	return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super, mnt);
+	printk(KERN_WARNING "EXT4-fs (%s): Update your userspace programs "
+	       "to mount using ext4\n", dev_name);
+	printk(KERN_WARNING "EXT4-fs (%s): ext4dev backwards compatibility "
+	       "will go away by 2.6.31\n", dev_name);
+	return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super,mnt);
 }
 
 static struct file_system_type ext4dev_fs_type = {
@@ -3917,13 +3955,16 @@
 {
 	int err;
 
+	err = init_ext4_system_zone();
+	if (err)
+		return err;
 	ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj);
 	if (!ext4_kset)
-		return -ENOMEM;
+		goto out4;
 	ext4_proc_root = proc_mkdir("fs/ext4", NULL);
 	err = init_ext4_mballoc();
 	if (err)
-		return err;
+		goto out3;
 
 	err = init_ext4_xattr();
 	if (err)
@@ -3948,6 +3989,11 @@
 	exit_ext4_xattr();
 out2:
 	exit_ext4_mballoc();
+out3:
+	remove_proc_entry("fs/ext4", NULL);
+	kset_unregister(ext4_kset);
+out4:
+	exit_ext4_system_zone();
 	return err;
 }
 
@@ -3962,6 +4008,7 @@
 	exit_ext4_mballoc();
 	remove_proc_entry("fs/ext4", NULL);
 	kset_unregister(ext4_kset);
+	exit_ext4_system_zone();
 }
 
 MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 82d9c42..286f38d 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -414,10 +414,6 @@
 	switch (cmd) {
 	case FIBMAP:
 		return ioctl_fibmap(filp, p);
-	case FS_IOC_FIEMAP:
-		return ioctl_fiemap(filp, arg);
-	case FIGETBSZ:
-		return put_user(inode->i_sb->s_blocksize, p);
 	case FIONREAD:
 		return put_user(i_size_read(inode) - filp->f_pos, p);
 	}
@@ -557,6 +553,16 @@
 		error = ioctl_fsthaw(filp);
 		break;
 
+	case FS_IOC_FIEMAP:
+		return ioctl_fiemap(filp, arg);
+
+	case FIGETBSZ:
+	{
+		struct inode *inode = filp->f_path.dentry->d_inode;
+		int __user *p = (int __user *)arg;
+		return put_user(inode->i_sb->s_blocksize, p);
+	}
+
 	default:
 		if (S_ISREG(filp->f_path.dentry->d_inode->i_mode))
 			error = file_ioctl(filp, cmd, arg);
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 5814410..62be7d2 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -1781,7 +1781,7 @@
  * Journal abort has very specific semantics, which we describe
  * for journal abort.
  *
- * Two internal function, which provide abort to te jbd layer
+ * Two internal functions, which provide abort to the jbd layer
  * itself are here.
  */
 
@@ -1879,7 +1879,7 @@
  * int jbd2_journal_errno () - returns the journal's error state.
  * @journal: journal to examine.
  *
- * This is the errno numbet set with jbd2_journal_abort(), the last
+ * This is the errno number set with jbd2_journal_abort(), the last
  * time the journal was mounted - if the journal was stopped
  * without calling abort this will be 0.
  *
@@ -1903,7 +1903,7 @@
  * int jbd2_journal_clear_err () - clears the journal's error state
  * @journal: journal to act on.
  *
- * An error must be cleared or Acked to take a FS out of readonly
+ * An error must be cleared or acked to take a FS out of readonly
  * mode.
  */
 int jbd2_journal_clear_err(journal_t *journal)
@@ -1923,7 +1923,7 @@
  * void jbd2_journal_ack_err() - Ack journal err.
  * @journal: journal to act on.
  *
- * An error must be cleared or Acked to take a FS out of readonly
+ * An error must be cleared or acked to take a FS out of readonly
  * mode.
  */
 void jbd2_journal_ack_err(journal_t *journal)
diff --git a/fs/mpage.c b/fs/mpage.c
index 680ba60..42381bd 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -379,7 +379,8 @@
 	struct buffer_head map_bh;
 	unsigned long first_logical_block = 0;
 
-	clear_buffer_mapped(&map_bh);
+	map_bh.b_state = 0;
+	map_bh.b_size = 0;
 	for (page_idx = 0; page_idx < nr_pages; page_idx++) {
 		struct page *page = list_entry(pages->prev, struct page, lru);
 
@@ -412,7 +413,8 @@
 	struct buffer_head map_bh;
 	unsigned long first_logical_block = 0;
 
-	clear_buffer_mapped(&map_bh);
+	map_bh.b_state = 0;
+	map_bh.b_size = 0;
 	bio = do_mpage_readpage(bio, page, 1, &last_block_in_bio,
 			&map_bh, &first_logical_block, get_block);
 	if (bio)
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 89853bc..f1736ca 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -63,7 +63,7 @@
 #define BRANCH_PROFILE()
 #endif
 
-#ifdef CONFIG_EVENT_TRACER
+#ifdef CONFIG_EVENT_TRACING
 #define FTRACE_EVENTS()	VMLINUX_SYMBOL(__start_ftrace_events) = .;	\
 			*(_ftrace_events)				\
 			VMLINUX_SYMBOL(__stop_ftrace_events) = .;
diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
index d960889..7e4350e 100644
--- a/include/linux/blktrace_api.h
+++ b/include/linux/blktrace_api.h
@@ -116,9 +116,9 @@
  * The remap event
  */
 struct blk_io_trace_remap {
-	__be32 device;
 	__be32 device_from;
-	__be64 sector;
+	__be32 device_to;
+	__be64 sector_from;
 };
 
 enum {
@@ -165,8 +165,9 @@
 
 extern int blk_trace_ioctl(struct block_device *, unsigned, char __user *);
 extern void blk_trace_shutdown(struct request_queue *);
-extern int do_blk_trace_setup(struct request_queue *q,
-	char *name, dev_t dev, struct blk_user_trace_setup *buts);
+extern int do_blk_trace_setup(struct request_queue *q, char *name,
+			      dev_t dev, struct block_device *bdev,
+			      struct blk_user_trace_setup *buts);
 extern void __trace_note_message(struct blk_trace *, const char *fmt, ...);
 
 /**
@@ -193,22 +194,42 @@
 extern void blk_add_driver_data(struct request_queue *q, struct request *rq,
 				void *data, size_t len);
 extern int blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
+			   struct block_device *bdev,
 			   char __user *arg);
 extern int blk_trace_startstop(struct request_queue *q, int start);
 extern int blk_trace_remove(struct request_queue *q);
+extern int blk_trace_init_sysfs(struct device *dev);
 
 extern struct attribute_group blk_trace_attr_group;
 
 #else /* !CONFIG_BLK_DEV_IO_TRACE */
-#define blk_trace_ioctl(bdev, cmd, arg)		(-ENOTTY)
-#define blk_trace_shutdown(q)			do { } while (0)
-#define do_blk_trace_setup(q, name, dev, buts)	(-ENOTTY)
-#define blk_add_driver_data(q, rq, data, len)	do {} while (0)
-#define blk_trace_setup(q, name, dev, arg)	(-ENOTTY)
-#define blk_trace_startstop(q, start)		(-ENOTTY)
-#define blk_trace_remove(q)			(-ENOTTY)
-#define blk_add_trace_msg(q, fmt, ...)		do { } while (0)
+# define blk_trace_ioctl(bdev, cmd, arg)		(-ENOTTY)
+# define blk_trace_shutdown(q)				do { } while (0)
+# define do_blk_trace_setup(q, name, dev, bdev, buts)	(-ENOTTY)
+# define blk_add_driver_data(q, rq, data, len)		do {} while (0)
+# define blk_trace_setup(q, name, dev, bdev, arg)	(-ENOTTY)
+# define blk_trace_startstop(q, start)			(-ENOTTY)
+# define blk_trace_remove(q)				(-ENOTTY)
+# define blk_add_trace_msg(q, fmt, ...)			do { } while (0)
+static inline int blk_trace_init_sysfs(struct device *dev)
+{
+	return 0;
+}
 
 #endif /* CONFIG_BLK_DEV_IO_TRACE */
+
+#if defined(CONFIG_EVENT_TRACING) && defined(CONFIG_BLOCK)
+
+static inline int blk_cmd_buf_len(struct request *rq)
+{
+	return blk_pc_request(rq) ? rq->cmd_len * 3 : 1;
+}
+
+extern void blk_dump_cmd(char *buf, struct request *rq);
+extern void blk_fill_rwbs(char *rwbs, u32 rw, int bytes);
+extern void blk_fill_rwbs_rq(char *rwbs, struct request *rq);
+
+#endif /* CONFIG_EVENT_TRACING && CONFIG_BLOCK */
+
 #endif /* __KERNEL__ */
 #endif
diff --git a/include/linux/compat.h b/include/linux/compat.h
index f2ded21..af931ee 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -222,6 +222,8 @@
 int copy_siginfo_to_user32(struct compat_siginfo __user *to, siginfo_t *from);
 int get_compat_sigevent(struct sigevent *event,
 		const struct compat_sigevent __user *u_event);
+long compat_sys_rt_tgsigqueueinfo(compat_pid_t tgid, compat_pid_t pid, int sig,
+				  struct compat_siginfo __user *uinfo);
 
 static inline int compat_timeval_compare(struct compat_timeval *lhs,
 					struct compat_timeval *rhs)
diff --git a/include/linux/cyclades.h b/include/linux/cyclades.h
index 788850b..1fbdea4 100644
--- a/include/linux/cyclades.h
+++ b/include/linux/cyclades.h
@@ -142,19 +142,6 @@
 
 
 #ifndef DP_WINDOW_SIZE
-/* #include "cyclomz.h" */
-/****************** ****************** *******************/
-/*
- *	The data types defined below are used in all ZFIRM interface
- *	data structures. They accomodate differences between HW
- *	architectures and compilers.
- */
-
-typedef __u64  ucdouble;		/* 64 bits, unsigned */
-typedef __u32  uclong;			/* 32 bits, unsigned */
-typedef __u16  ucshort;		/* 16 bits, unsigned */
-typedef __u8   ucchar;			/* 8 bits, unsigned */
-
 /*
  *	Memory Window Sizes
  */
@@ -507,16 +494,20 @@
 
 /* Per card data structure */
 struct cyclades_card {
-    void __iomem *base_addr;
-    void __iomem *ctl_addr;
-    int irq;
-    unsigned int num_chips;	/* 0 if card absent, -1 if Z/PCI, else Y */
-    unsigned int first_line;	/* minor number of first channel on card */
-    unsigned int nports;	/* Number of ports in the card */
-    int bus_index;		/* address shift - 0 for ISA, 1 for PCI */
-    int intr_enabled;		/* FW Interrupt flag - 0 disabled, 1 enabled */
-    spinlock_t card_lock;
-    struct cyclades_port *ports;
+	void __iomem *base_addr;
+	union {
+		void __iomem *p9050;
+		struct RUNTIME_9060 __iomem *p9060;
+	} ctl_addr;
+	int irq;
+	unsigned int num_chips;	/* 0 if card absent, -1 if Z/PCI, else Y */
+	unsigned int first_line;	/* minor number of first channel on card */
+	unsigned int nports;	/* Number of ports in the card */
+	int bus_index;		/* address shift - 0 for ISA, 1 for PCI */
+	int intr_enabled;		/* FW Interrupt flag - 0 disabled, 1 enabled */
+	u32 hw_ver;
+	spinlock_t card_lock;
+	struct cyclades_port *ports;
 };
 
 /***************************************
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 8a0c2f2..39b95c5 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -233,8 +233,6 @@
 
 extern int skip_trace(unsigned long ip);
 
-extern void ftrace_release(void *start, unsigned long size);
-
 extern void ftrace_disable_daemon(void);
 extern void ftrace_enable_daemon(void);
 #else
@@ -325,13 +323,8 @@
 
 #ifdef CONFIG_FTRACE_MCOUNT_RECORD
 extern void ftrace_init(void);
-extern void ftrace_init_module(struct module *mod,
-			       unsigned long *start, unsigned long *end);
 #else
 static inline void ftrace_init(void) { }
-static inline void
-ftrace_init_module(struct module *mod,
-		   unsigned long *start, unsigned long *end) { }
 #endif
 
 /*
@@ -368,6 +361,7 @@
 	unsigned long ret;
 	unsigned long func;
 	unsigned long long calltime;
+	unsigned long long subtime;
 };
 
 /*
@@ -379,8 +373,6 @@
 
 extern int
 ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth);
-extern void
-ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret);
 
 /*
  * Sometimes we don't want to trace a function with the function
@@ -496,8 +488,15 @@
 
 extern int ftrace_dump_on_oops;
 
+#ifdef CONFIG_PREEMPT
+#define INIT_TRACE_RECURSION		.trace_recursion = 0,
+#endif
+
 #endif /* CONFIG_TRACING */
 
+#ifndef INIT_TRACE_RECURSION
+#define INIT_TRACE_RECURSION
+#endif
 
 #ifdef CONFIG_HW_BRANCH_TRACER
 
diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
new file mode 100644
index 0000000..5c093ff
--- /dev/null
+++ b/include/linux/ftrace_event.h
@@ -0,0 +1,172 @@
+#ifndef _LINUX_FTRACE_EVENT_H
+#define _LINUX_FTRACE_EVENT_H
+
+#include <linux/trace_seq.h>
+#include <linux/ring_buffer.h>
+#include <linux/percpu.h>
+
+struct trace_array;
+struct tracer;
+struct dentry;
+
+DECLARE_PER_CPU(struct trace_seq, ftrace_event_seq);
+
+struct trace_print_flags {
+	unsigned long		mask;
+	const char		*name;
+};
+
+const char *ftrace_print_flags_seq(struct trace_seq *p, const char *delim,
+				   unsigned long flags,
+				   const struct trace_print_flags *flag_array);
+
+const char *ftrace_print_symbols_seq(struct trace_seq *p, unsigned long val,
+				     const struct trace_print_flags *symbol_array);
+
+/*
+ * The trace entry - the most basic unit of tracing. This is what
+ * is printed in the end as a single line in the trace output, such as:
+ *
+ *     bash-15816 [01]   235.197585: idle_cpu <- irq_enter
+ */
+struct trace_entry {
+	unsigned short		type;
+	unsigned char		flags;
+	unsigned char		preempt_count;
+	int			pid;
+	int			tgid;
+};
+
+#define FTRACE_MAX_EVENT						\
+	((1 << (sizeof(((struct trace_entry *)0)->type) * 8)) - 1)
+
+/*
+ * Trace iterator - used by printout routines who present trace
+ * results to users and which routines might sleep, etc:
+ */
+struct trace_iterator {
+	struct trace_array	*tr;
+	struct tracer		*trace;
+	void			*private;
+	int			cpu_file;
+	struct mutex		mutex;
+	struct ring_buffer_iter	*buffer_iter[NR_CPUS];
+	unsigned long		iter_flags;
+
+	/* The below is zeroed out in pipe_read */
+	struct trace_seq	seq;
+	struct trace_entry	*ent;
+	int			cpu;
+	u64			ts;
+
+	loff_t			pos;
+	long			idx;
+
+	cpumask_var_t		started;
+};
+
+
+typedef enum print_line_t (*trace_print_func)(struct trace_iterator *iter,
+					      int flags);
+struct trace_event {
+	struct hlist_node	node;
+	struct list_head	list;
+	int			type;
+	trace_print_func	trace;
+	trace_print_func	raw;
+	trace_print_func	hex;
+	trace_print_func	binary;
+};
+
+extern int register_ftrace_event(struct trace_event *event);
+extern int unregister_ftrace_event(struct trace_event *event);
+
+/* Return values for print_line callback */
+enum print_line_t {
+	TRACE_TYPE_PARTIAL_LINE	= 0,	/* Retry after flushing the seq */
+	TRACE_TYPE_HANDLED	= 1,
+	TRACE_TYPE_UNHANDLED	= 2,	/* Relay to other output functions */
+	TRACE_TYPE_NO_CONSUME	= 3	/* Handled but ask to not consume */
+};
+
+
+struct ring_buffer_event *
+trace_current_buffer_lock_reserve(int type, unsigned long len,
+				  unsigned long flags, int pc);
+void trace_current_buffer_unlock_commit(struct ring_buffer_event *event,
+					unsigned long flags, int pc);
+void trace_nowake_buffer_unlock_commit(struct ring_buffer_event *event,
+					unsigned long flags, int pc);
+void trace_current_buffer_discard_commit(struct ring_buffer_event *event);
+
+void tracing_record_cmdline(struct task_struct *tsk);
+
+struct ftrace_event_call {
+	struct list_head	list;
+	char			*name;
+	char			*system;
+	struct dentry		*dir;
+	struct trace_event	*event;
+	int			enabled;
+	int			(*regfunc)(void);
+	void			(*unregfunc)(void);
+	int			id;
+	int			(*raw_init)(void);
+	int			(*show_format)(struct trace_seq *s);
+	int			(*define_fields)(void);
+	struct list_head	fields;
+	int			filter_active;
+	void			*filter;
+	void			*mod;
+
+#ifdef CONFIG_EVENT_PROFILE
+	atomic_t	profile_count;
+	int		(*profile_enable)(struct ftrace_event_call *);
+	void		(*profile_disable)(struct ftrace_event_call *);
+#endif
+};
+
+#define MAX_FILTER_PRED		32
+#define MAX_FILTER_STR_VAL	128
+
+extern int init_preds(struct ftrace_event_call *call);
+extern void destroy_preds(struct ftrace_event_call *call);
+extern int filter_match_preds(struct ftrace_event_call *call, void *rec);
+extern int filter_current_check_discard(struct ftrace_event_call *call,
+					void *rec,
+					struct ring_buffer_event *event);
+
+extern int trace_define_field(struct ftrace_event_call *call, char *type,
+			      char *name, int offset, int size, int is_signed);
+
+#define is_signed_type(type)	(((type)(-1)) < 0)
+
+int trace_set_clr_event(const char *system, const char *event, int set);
+
+/*
+ * The double __builtin_constant_p is because gcc will give us an error
+ * if we try to allocate the static variable to fmt if it is not a
+ * constant. Even with the outer if statement optimizing out.
+ */
+#define event_trace_printk(ip, fmt, args...)				\
+do {									\
+	__trace_printk_check_format(fmt, ##args);			\
+	tracing_record_cmdline(current);				\
+	if (__builtin_constant_p(fmt)) {				\
+		static const char *trace_printk_fmt			\
+		  __attribute__((section("__trace_printk_fmt"))) =	\
+			__builtin_constant_p(fmt) ? fmt : NULL;		\
+									\
+		__trace_bprintk(ip, trace_printk_fmt, ##args);		\
+	} else								\
+		__trace_printk(ip, fmt, ##args);			\
+} while (0)
+
+#define __common_field(type, item, is_signed)				\
+	ret = trace_define_field(event_call, #type, "common_" #item,	\
+				 offsetof(typeof(field.ent), item),	\
+				 sizeof(field.ent.item), is_signed);	\
+	if (ret)							\
+		return ret;
+
+#endif /* _LINUX_FTRACE_EVENT_H */
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 9fed365..867cb68d 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -26,6 +26,9 @@
 #include <asm/io.h>
 #include <asm/mutex.h>
 
+/* for request_sense */
+#include <linux/cdrom.h>
+
 #if defined(CONFIG_CRIS) || defined(CONFIG_FRV) || defined(CONFIG_MN10300)
 # define SUPPORT_VLB_SYNC 0
 #else
@@ -324,7 +327,6 @@
 	unsigned int		cursg_ofs;
 
 	struct request		*rq;		/* copy of request */
-	void			*special;	/* valid_t generally */
 };
 
 /* ATAPI packet command flags */
@@ -360,11 +362,7 @@
 
 	/* data buffer */
 	u8 *buf;
-	/* current buffer position */
-	u8 *cur_pos;
 	int buf_size;
-	/* missing/available data on the current buffer */
-	int b_count;
 
 	/* the corresponding request */
 	struct request *rq;
@@ -377,10 +375,6 @@
 	 */
 	u8 pc_buf[IDE_PC_BUFFER_SIZE];
 
-	/* idetape only */
-	struct idetape_bh *bh;
-	char *b_data;
-
 	unsigned long timeout;
 };
 
@@ -593,16 +587,16 @@
 	/* callback for packet commands */
 	int  (*pc_callback)(struct ide_drive_s *, int);
 
-	void (*pc_update_buffers)(struct ide_drive_s *, struct ide_atapi_pc *);
-	int  (*pc_io_buffers)(struct ide_drive_s *, struct ide_atapi_pc *,
-			      unsigned int, int);
-
 	ide_startstop_t (*irq_handler)(struct ide_drive_s *);
 
 	unsigned long atapi_flags;
 
 	struct ide_atapi_pc request_sense_pc;
-	struct request request_sense_rq;
+
+	/* current sense rq and buffer */
+	bool sense_rq_armed;
+	struct request sense_rq;
+	struct request_sense sense_data;
 };
 
 typedef struct ide_drive_s ide_drive_t;
@@ -1174,7 +1168,10 @@
 int ide_do_start_stop(ide_drive_t *, struct gendisk *, int);
 int ide_set_media_lock(ide_drive_t *, struct gendisk *, int);
 void ide_create_request_sense_cmd(ide_drive_t *, struct ide_atapi_pc *);
-void ide_retry_pc(ide_drive_t *, struct gendisk *);
+void ide_retry_pc(ide_drive_t *drive);
+
+void ide_prep_sense(ide_drive_t *drive, struct request *rq);
+int ide_queue_sense_rq(ide_drive_t *drive, void *special);
 
 int ide_cd_expiry(ide_drive_t *);
 
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 7f54ba9..6646bfc 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -174,6 +174,7 @@
 	INIT_TRACE_IRQFLAGS						\
 	INIT_LOCKDEP							\
 	INIT_FTRACE_GRAPH						\
+	INIT_TRACE_RECURSION						\
 }
 
 
diff --git a/include/linux/kmemtrace.h b/include/linux/kmemtrace.h
new file mode 100644
index 0000000..b616d39
--- /dev/null
+++ b/include/linux/kmemtrace.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright (C) 2008 Eduard - Gabriel Munteanu
+ *
+ * This file is released under GPL version 2.
+ */
+
+#ifndef _LINUX_KMEMTRACE_H
+#define _LINUX_KMEMTRACE_H
+
+#ifdef __KERNEL__
+
+#include <trace/events/kmem.h>
+
+#ifdef CONFIG_KMEMTRACE
+extern void kmemtrace_init(void);
+#else
+static inline void kmemtrace_init(void)
+{
+}
+#endif
+
+#endif /* __KERNEL__ */
+
+#endif /* _LINUX_KMEMTRACE_H */
+
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 5528ff3..ad613ed 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -19,6 +19,7 @@
 struct file_ra_state;
 struct user_struct;
 struct writeback_control;
+struct rlimit;
 
 #ifndef CONFIG_DISCONTIGMEM          /* Don't use mapnrs, do it properly */
 extern unsigned long max_mapnr;
@@ -1315,8 +1316,8 @@
 int vmemmap_populate(struct page *start_page, unsigned long pages, int node);
 void vmemmap_populate_print_last(void);
 
-extern void *alloc_locked_buffer(size_t size);
-extern void free_locked_buffer(void *buffer, size_t size);
-extern void release_locked_buffer(void *buffer, size_t size);
+extern int account_locked_memory(struct mm_struct *mm, struct rlimit *rlim,
+				 size_t size);
+extern void refund_locked_memory(struct mm_struct *mm, size_t size);
 #endif /* __KERNEL__ */
 #endif /* _LINUX_MM_H */
diff --git a/include/linux/mmiotrace.h b/include/linux/mmiotrace.h
index 3d1b7bd..97491f7 100644
--- a/include/linux/mmiotrace.h
+++ b/include/linux/mmiotrace.h
@@ -30,6 +30,8 @@
 
 extern int register_kmmio_probe(struct kmmio_probe *p);
 extern void unregister_kmmio_probe(struct kmmio_probe *p);
+extern int kmmio_init(void);
+extern void kmmio_cleanup(void);
 
 #ifdef CONFIG_MMIOTRACE
 /* kmmio is active by some kmmio_probes? */
diff --git a/include/linux/module.h b/include/linux/module.h
index 627ac08..a8f2c0a 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -337,6 +337,14 @@
 	const char **trace_bprintk_fmt_start;
 	unsigned int num_trace_bprintk_fmt;
 #endif
+#ifdef CONFIG_EVENT_TRACING
+	struct ftrace_event_call *trace_events;
+	unsigned int num_trace_events;
+#endif
+#ifdef CONFIG_FTRACE_MCOUNT_RECORD
+	unsigned long *ftrace_callsites;
+	unsigned int num_ftrace_callsites;
+#endif
 
 #ifdef CONFIG_MODULE_UNLOAD
 	/* What modules depend on me? */
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 0f71812..d7d1c41 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -1996,10 +1996,12 @@
 #define PCI_DEVICE_ID_OXSEMI_PCIe952_1_U	0xC118
 #define PCI_DEVICE_ID_OXSEMI_PCIe952_1_GU	0xC11C
 #define PCI_DEVICE_ID_OXSEMI_16PCI954	0x9501
+#define PCI_DEVICE_ID_OXSEMI_C950	0x950B
 #define PCI_DEVICE_ID_OXSEMI_16PCI95N	0x9511
 #define PCI_DEVICE_ID_OXSEMI_16PCI954PP	0x9513
 #define PCI_DEVICE_ID_OXSEMI_16PCI952	0x9521
 #define PCI_DEVICE_ID_OXSEMI_16PCI952PP	0x9523
+#define PCI_SUBDEVICE_ID_OXSEMI_C950	0x0001
 
 #define PCI_VENDOR_ID_CHELSIO		0x1425
 
diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index 67c1565..59e133d 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -95,7 +95,6 @@
 			  struct task_struct *new_parent);
 extern void __ptrace_unlink(struct task_struct *child);
 extern void exit_ptrace(struct task_struct *tracer);
-extern void ptrace_fork(struct task_struct *task, unsigned long clone_flags);
 #define PTRACE_MODE_READ   1
 #define PTRACE_MODE_ATTACH 2
 /* Returns 0 on success, -errno on denial. */
@@ -327,15 +326,6 @@
 #define arch_ptrace_untrace(task)		do { } while (0)
 #endif
 
-#ifndef arch_ptrace_fork
-/*
- * Do machine-specific work to initialize a new task.
- *
- * This is called from copy_process().
- */
-#define arch_ptrace_fork(child, clone_flags)	do { } while (0)
-#endif
-
 extern int task_current_syscall(struct task_struct *target, long *callno,
 				unsigned long args[6], unsigned int maxargs,
 				unsigned long *sp, unsigned long *pc);
diff --git a/include/linux/rational.h b/include/linux/rational.h
new file mode 100644
index 0000000..4f532fc
--- /dev/null
+++ b/include/linux/rational.h
@@ -0,0 +1,19 @@
+/*
+ * rational fractions
+ *
+ * Copyright (C) 2009 emlix GmbH, Oskar Schirmer <os@emlix.com>
+ *
+ * helper functions when coping with rational numbers,
+ * e.g. when calculating optimum numerator/denominator pairs for
+ * pll configuration taking into account restricted register size
+ */
+
+#ifndef _LINUX_RATIONAL_H
+#define _LINUX_RATIONAL_H
+
+void rational_best_approximation(
+	unsigned long given_numerator, unsigned long given_denominator,
+	unsigned long max_numerator, unsigned long max_denominator,
+	unsigned long *best_numerator, unsigned long *best_denominator);
+
+#endif /* _LINUX_RATIONAL_H */
diff --git a/include/linux/rculist.h b/include/linux/rculist.h
index e649bd3..5710f43 100644
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -198,6 +198,32 @@
 	at->prev = last;
 }
 
+/**
+ * list_entry_rcu - get the struct for this entry
+ * @ptr:        the &struct list_head pointer.
+ * @type:       the type of the struct this is embedded in.
+ * @member:     the name of the list_struct within the struct.
+ *
+ * This primitive may safely run concurrently with the _rcu list-mutation
+ * primitives such as list_add_rcu() as long as it's guarded by rcu_read_lock().
+ */
+#define list_entry_rcu(ptr, type, member) \
+	container_of(rcu_dereference(ptr), type, member)
+
+/**
+ * list_first_entry_rcu - get the first element from a list
+ * @ptr:        the list head to take the element from.
+ * @type:       the type of the struct this is embedded in.
+ * @member:     the name of the list_struct within the struct.
+ *
+ * Note, that list is expected to be not empty.
+ *
+ * This primitive may safely run concurrently with the _rcu list-mutation
+ * primitives such as list_add_rcu() as long as it's guarded by rcu_read_lock().
+ */
+#define list_first_entry_rcu(ptr, type, member) \
+	list_entry_rcu((ptr)->next, type, member)
+
 #define __list_for_each_rcu(pos, head) \
 	for (pos = rcu_dereference((head)->next); \
 		pos != (head); \
@@ -214,9 +240,9 @@
  * as long as the traversal is guarded by rcu_read_lock().
  */
 #define list_for_each_entry_rcu(pos, head, member) \
-	for (pos = list_entry(rcu_dereference((head)->next), typeof(*pos), member); \
+	for (pos = list_entry_rcu((head)->next, typeof(*pos), member); \
 		prefetch(pos->member.next), &pos->member != (head); \
-		pos = list_entry(rcu_dereference(pos->member.next), typeof(*pos), member))
+		pos = list_entry_rcu(pos->member.next, typeof(*pos), member))
 
 
 /**
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 58b2aa5..5a51538 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -161,8 +161,15 @@
 	unsigned long offline_fqs;	/* Kicked due to being offline. */
 	unsigned long resched_ipi;	/* Sent a resched IPI. */
 
-	/* 5) For future __rcu_pending statistics. */
+	/* 5) __rcu_pending() statistics. */
 	long n_rcu_pending;		/* rcu_pending() calls since boot. */
+	long n_rp_qs_pending;
+	long n_rp_cb_ready;
+	long n_rp_cpu_needs_gp;
+	long n_rp_gp_completed;
+	long n_rp_gp_started;
+	long n_rp_need_fqs;
+	long n_rp_need_nothing;
 
 	int cpu;
 };
diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index e1b7b21..8670f15 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -11,7 +11,7 @@
  * Don't refer to this struct directly, use functions below.
  */
 struct ring_buffer_event {
-	u32		type:2, len:3, time_delta:27;
+	u32		type_len:5, time_delta:27;
 	u32		array[];
 };
 
@@ -24,7 +24,8 @@
  *				  size is variable depending on how much
  *				  padding is needed
  *				 If time_delta is non zero:
- *				  everything else same as RINGBUF_TYPE_DATA
+ *				  array[0] holds the actual length
+ *				  size = 4 + length (bytes)
  *
  * @RINGBUF_TYPE_TIME_EXTEND:	Extend the time delta
  *				 array[0] = time delta (28 .. 59)
@@ -35,22 +36,23 @@
  *				 array[1..2] = tv_sec
  *				 size = 16 bytes
  *
- * @RINGBUF_TYPE_DATA:		Data record
- *				 If len is zero:
+ * <= @RINGBUF_TYPE_DATA_TYPE_LEN_MAX:
+ *				Data record
+ *				 If type_len is zero:
  *				  array[0] holds the actual length
  *				  array[1..(length+3)/4] holds data
- *				  size = 4 + 4 + length (bytes)
+ *				  size = 4 + length (bytes)
  *				 else
- *				  length = len << 2
+ *				  length = type_len << 2
  *				  array[0..(length+3)/4-1] holds data
  *				  size = 4 + length (bytes)
  */
 enum ring_buffer_type {
+	RINGBUF_TYPE_DATA_TYPE_LEN_MAX = 28,
 	RINGBUF_TYPE_PADDING,
 	RINGBUF_TYPE_TIME_EXTEND,
 	/* FIXME: RINGBUF_TYPE_TIME_STAMP not implemented */
 	RINGBUF_TYPE_TIME_STAMP,
-	RINGBUF_TYPE_DATA,
 };
 
 unsigned ring_buffer_event_length(struct ring_buffer_event *event);
@@ -68,13 +70,54 @@
 	return event->time_delta;
 }
 
+/*
+ * ring_buffer_event_discard can discard any event in the ring buffer.
+ *   it is up to the caller to protect against a reader from
+ *   consuming it or a writer from wrapping and replacing it.
+ *
+ * No external protection is needed if this is called before
+ * the event is commited. But in that case it would be better to
+ * use ring_buffer_discard_commit.
+ *
+ * Note, if an event that has not been committed is discarded
+ * with ring_buffer_event_discard, it must still be committed.
+ */
 void ring_buffer_event_discard(struct ring_buffer_event *event);
 
 /*
+ * ring_buffer_discard_commit will remove an event that has not
+ *   ben committed yet. If this is used, then ring_buffer_unlock_commit
+ *   must not be called on the discarded event. This function
+ *   will try to remove the event from the ring buffer completely
+ *   if another event has not been written after it.
+ *
+ * Example use:
+ *
+ *  if (some_condition)
+ *    ring_buffer_discard_commit(buffer, event);
+ *  else
+ *    ring_buffer_unlock_commit(buffer, event);
+ */
+void ring_buffer_discard_commit(struct ring_buffer *buffer,
+				struct ring_buffer_event *event);
+
+/*
  * size is in bytes for each per CPU buffer.
  */
 struct ring_buffer *
-ring_buffer_alloc(unsigned long size, unsigned flags);
+__ring_buffer_alloc(unsigned long size, unsigned flags, struct lock_class_key *key);
+
+/*
+ * Because the ring buffer is generic, if other users of the ring buffer get
+ * traced by ftrace, it can produce lockdep warnings. We need to keep each
+ * ring buffer's lock class separate.
+ */
+#define ring_buffer_alloc(size, flags)			\
+({							\
+	static struct lock_class_key __key;		\
+	__ring_buffer_alloc((size), (flags), &__key);	\
+})
+
 void ring_buffer_free(struct ring_buffer *buffer);
 
 int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size);
@@ -122,6 +165,8 @@
 unsigned long ring_buffer_overruns(struct ring_buffer *buffer);
 unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu);
 unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu);
+unsigned long ring_buffer_commit_overrun_cpu(struct ring_buffer *buffer, int cpu);
+unsigned long ring_buffer_nmi_dropped_cpu(struct ring_buffer *buffer, int cpu);
 
 u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu);
 void ring_buffer_normalize_time_stamp(struct ring_buffer *buffer,
@@ -137,6 +182,11 @@
 int ring_buffer_read_page(struct ring_buffer *buffer, void **data_page,
 			  size_t len, int cpu, int full);
 
+struct trace_seq;
+
+int ring_buffer_print_entry_header(struct trace_seq *s);
+int ring_buffer_print_page_header(struct trace_seq *s);
+
 enum ring_buffer_flags {
 	RB_FL_OVERWRITE		= 1 << 0,
 };
diff --git a/include/linux/sched.h b/include/linux/sched.h
index ff687281f..42bf276 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -77,6 +77,7 @@
 #include <linux/proportions.h>
 #include <linux/seccomp.h>
 #include <linux/rcupdate.h>
+#include <linux/rculist.h>
 #include <linux/rtmutex.h>
 
 #include <linux/time.h>
@@ -96,8 +97,8 @@
 struct futex_pi_state;
 struct robust_list_head;
 struct bio;
-struct bts_tracer;
 struct fs_struct;
+struct bts_context;
 
 /*
  * List of flags we want to share for kernel threads,
@@ -1229,18 +1230,11 @@
 	struct list_head ptraced;
 	struct list_head ptrace_entry;
 
-#ifdef CONFIG_X86_PTRACE_BTS
 	/*
 	 * This is the tracer handle for the ptrace BTS extension.
 	 * This field actually belongs to the ptracer task.
 	 */
-	struct bts_tracer *bts;
-	/*
-	 * The buffer to hold the BTS data.
-	 */
-	void *bts_buffer;
-	size_t bts_size;
-#endif /* CONFIG_X86_PTRACE_BTS */
+	struct bts_context *bts;
 
 	/* PID/PID hash table linkage. */
 	struct pid_link pids[PIDTYPE_MAX];
@@ -1450,7 +1444,9 @@
 #ifdef CONFIG_TRACING
 	/* state flags for use by tracers */
 	unsigned long trace;
-#endif
+	/* bitmask of trace recursion */
+	unsigned long trace_recursion;
+#endif /* CONFIG_TRACING */
 };
 
 /* Future-safe accessor for struct task_struct's cpus_allowed. */
@@ -2024,8 +2020,10 @@
 extern char *get_task_comm(char *to, struct task_struct *tsk);
 
 #ifdef CONFIG_SMP
+extern void wait_task_context_switch(struct task_struct *p);
 extern unsigned long wait_task_inactive(struct task_struct *, long match_state);
 #else
+static inline void wait_task_context_switch(struct task_struct *p) {}
 static inline unsigned long wait_task_inactive(struct task_struct *p,
 					       long match_state)
 {
@@ -2033,7 +2031,8 @@
 }
 #endif
 
-#define next_task(p)	list_entry(rcu_dereference((p)->tasks.next), struct task_struct, tasks)
+#define next_task(p) \
+	list_entry_rcu((p)->tasks.next, struct task_struct, tasks)
 
 #define for_each_process(p) \
 	for (p = &init_task ; (p = next_task(p)) != &init_task ; )
@@ -2072,8 +2071,8 @@
 
 static inline struct task_struct *next_thread(const struct task_struct *p)
 {
-	return list_entry(rcu_dereference(p->thread_group.next),
-			  struct task_struct, thread_group);
+	return list_entry_rcu(p->thread_group.next,
+			      struct task_struct, thread_group);
 }
 
 static inline int thread_group_empty(struct task_struct *p)
diff --git a/include/linux/serial.h b/include/linux/serial.h
index 9136cc5..e5bb75a 100644
--- a/include/linux/serial.h
+++ b/include/linux/serial.h
@@ -96,54 +96,76 @@
 
 /*
  * Definitions for async_struct (and serial_struct) flags field
+ *
+ * Define ASYNCB_* for convenient use with {test,set,clear}_bit.
  */
-#define ASYNC_HUP_NOTIFY 0x0001 /* Notify getty on hangups and closes 
-				   on the callout port */
-#define ASYNC_FOURPORT  0x0002	/* Set OU1, OUT2 per AST Fourport settings */
-#define ASYNC_SAK	0x0004	/* Secure Attention Key (Orange book) */
-#define ASYNC_SPLIT_TERMIOS 0x0008 /* Separate termios for dialin/callout */
+#define ASYNCB_HUP_NOTIFY	 0 /* Notify getty on hangups and closes
+				    * on the callout port */
+#define ASYNCB_FOURPORT		 1 /* Set OU1, OUT2 per AST Fourport settings */
+#define ASYNCB_SAK		 2 /* Secure Attention Key (Orange book) */
+#define ASYNCB_SPLIT_TERMIOS	 3 /* Separate termios for dialin/callout */
+#define ASYNCB_SPD_HI		 4 /* Use 56000 instead of 38400 bps */
+#define ASYNCB_SPD_VHI		 5 /* Use 115200 instead of 38400 bps */
+#define ASYNCB_SKIP_TEST	 6 /* Skip UART test during autoconfiguration */
+#define ASYNCB_AUTO_IRQ		 7 /* Do automatic IRQ during
+				    * autoconfiguration */
+#define ASYNCB_SESSION_LOCKOUT	 8 /* Lock out cua opens based on session */
+#define ASYNCB_PGRP_LOCKOUT	 9 /* Lock out cua opens based on pgrp */
+#define ASYNCB_CALLOUT_NOHUP	10 /* Don't do hangups for cua device */
+#define ASYNCB_HARDPPS_CD	11 /* Call hardpps when CD goes high  */
+#define ASYNCB_SPD_SHI		12 /* Use 230400 instead of 38400 bps */
+#define ASYNCB_LOW_LATENCY	13 /* Request low latency behaviour */
+#define ASYNCB_BUGGY_UART	14 /* This is a buggy UART, skip some safety
+				    * checks.  Note: can be dangerous! */
+#define ASYNCB_AUTOPROBE	15 /* Port was autoprobed by PCI or PNP code */
+#define ASYNCB_LAST_USER	15
 
-#define ASYNC_SPD_MASK	0x1030
-#define ASYNC_SPD_HI	0x0010	/* Use 56000 instead of 38400 bps */
+/* Internal flags used only by kernel */
+#define ASYNCB_INITIALIZED	31 /* Serial port was initialized */
+#define ASYNCB_NORMAL_ACTIVE	29 /* Normal device is active */
+#define ASYNCB_BOOT_AUTOCONF	28 /* Autoconfigure port on bootup */
+#define ASYNCB_CLOSING		27 /* Serial port is closing */
+#define ASYNCB_CTS_FLOW		26 /* Do CTS flow control */
+#define ASYNCB_CHECK_CD		25 /* i.e., CLOCAL */
+#define ASYNCB_SHARE_IRQ	24 /* for multifunction cards, no longer used */
+#define ASYNCB_CONS_FLOW	23 /* flow control for console  */
+#define ASYNCB_BOOT_ONLYMCA	22 /* Probe only if MCA bus */
+#define ASYNCB_FIRST_KERNEL	22
 
-#define ASYNC_SPD_VHI	0x0020  /* Use 115200 instead of 38400 bps */
-#define ASYNC_SPD_CUST	0x0030  /* Use user-specified divisor */
+#define ASYNC_HUP_NOTIFY	(1U << ASYNCB_HUP_NOTIFY)
+#define ASYNC_FOURPORT		(1U << ASYNCB_FOURPORT)
+#define ASYNC_SAK		(1U << ASYNCB_SAK)
+#define ASYNC_SPLIT_TERMIOS	(1U << ASYNCB_SPLIT_TERMIOS)
+#define ASYNC_SPD_HI		(1U << ASYNCB_SPD_HI)
+#define ASYNC_SPD_VHI		(1U << ASYNCB_SPD_VHI)
+#define ASYNC_SKIP_TEST		(1U << ASYNCB_SKIP_TEST)
+#define ASYNC_AUTO_IRQ		(1U << ASYNCB_AUTO_IRQ)
+#define ASYNC_SESSION_LOCKOUT	(1U << ASYNCB_SESSION_LOCKOUT)
+#define ASYNC_PGRP_LOCKOUT	(1U << ASYNCB_PGRP_LOCKOUT)
+#define ASYNC_CALLOUT_NOHUP	(1U << ASYNCB_CALLOUT_NOHUP)
+#define ASYNC_HARDPPS_CD	(1U << ASYNCB_HARDPPS_CD)
+#define ASYNC_SPD_SHI		(1U << ASYNCB_SPD_SHI)
+#define ASYNC_LOW_LATENCY	(1U << ASYNCB_LOW_LATENCY)
+#define ASYNC_BUGGY_UART	(1U << ASYNCB_BUGGY_UART)
+#define ASYNC_AUTOPROBE		(1U << ASYNCB_AUTOPROBE)
 
-#define ASYNC_SKIP_TEST	0x0040 /* Skip UART test during autoconfiguration */
-#define ASYNC_AUTO_IRQ  0x0080 /* Do automatic IRQ during autoconfiguration */
-#define ASYNC_SESSION_LOCKOUT 0x0100 /* Lock out cua opens based on session */
-#define ASYNC_PGRP_LOCKOUT    0x0200 /* Lock out cua opens based on pgrp */
-#define ASYNC_CALLOUT_NOHUP   0x0400 /* Don't do hangups for cua device */
+#define ASYNC_FLAGS		((1U << ASYNCB_LAST_USER) - 1)
+#define ASYNC_USR_MASK		(ASYNC_SPD_HI|ASYNC_SPD_VHI| \
+		ASYNC_CALLOUT_NOHUP|ASYNC_SPD_SHI|ASYNC_LOW_LATENCY)
+#define ASYNC_SPD_CUST		(ASYNC_SPD_HI|ASYNC_SPD_VHI)
+#define ASYNC_SPD_WARP		(ASYNC_SPD_HI|ASYNC_SPD_SHI)
+#define ASYNC_SPD_MASK		(ASYNC_SPD_HI|ASYNC_SPD_VHI|ASYNC_SPD_SHI)
 
-#define ASYNC_HARDPPS_CD	0x0800	/* Call hardpps when CD goes high  */
-
-#define ASYNC_SPD_SHI	0x1000	/* Use 230400 instead of 38400 bps */
-#define ASYNC_SPD_WARP	0x1010	/* Use 460800 instead of 38400 bps */
-
-#define ASYNC_LOW_LATENCY 0x2000 /* Request low latency behaviour */
-
-#define ASYNC_BUGGY_UART  0x4000 /* This is a buggy UART, skip some safety
-				  * checks.  Note: can be dangerous! */
-
-#define ASYNC_AUTOPROBE	 0x8000 /* Port was autoprobed by PCI or PNP code */
-
-#define ASYNC_FLAGS	0x7FFF	/* Possible legal async flags */
-#define ASYNC_USR_MASK	0x3430	/* Legal flags that non-privileged
-				 * users can set or reset */
-
-/* Internal flags used only by kernel/chr_drv/serial.c */
-#define ASYNC_INITIALIZED	0x80000000 /* Serial port was initialized */
-#define ASYNC_NORMAL_ACTIVE	0x20000000 /* Normal device is active */
-#define ASYNC_BOOT_AUTOCONF	0x10000000 /* Autoconfigure port on bootup */
-#define ASYNC_CLOSING		0x08000000 /* Serial port is closing */
-#define ASYNC_CTS_FLOW		0x04000000 /* Do CTS flow control */
-#define ASYNC_CHECK_CD		0x02000000 /* i.e., CLOCAL */
-#define ASYNC_SHARE_IRQ		0x01000000 /* for multifunction cards
-					     --- no longer used */
-#define ASYNC_CONS_FLOW		0x00800000 /* flow control for console  */
-
-#define ASYNC_BOOT_ONLYMCA	0x00400000 /* Probe only if MCA bus */
-#define ASYNC_INTERNAL_FLAGS	0xFFC00000 /* Internal flags */
+#define ASYNC_INITIALIZED	(1U << ASYNCB_INITIALIZED)
+#define ASYNC_NORMAL_ACTIVE	(1U << ASYNCB_NORMAL_ACTIVE)
+#define ASYNC_BOOT_AUTOCONF	(1U << ASYNCB_BOOT_AUTOCONF)
+#define ASYNC_CLOSING		(1U << ASYNCB_CLOSING)
+#define ASYNC_CTS_FLOW		(1U << ASYNCB_CTS_FLOW)
+#define ASYNC_CHECK_CD		(1U << ASYNCB_CHECK_CD)
+#define ASYNC_SHARE_IRQ		(1U << ASYNCB_SHARE_IRQ)
+#define ASYNC_CONS_FLOW		(1U << ASYNCB_CONS_FLOW)
+#define ASYNC_BOOT_ONLYMCA	(1U << ASYNCB_BOOT_ONLYMCA)
+#define ASYNC_INTERNAL_FLAGS	(~((1U << ASYNCB_FIRST_KERNEL) - 1))
 
 /*
  * Multiport serial configuration structure --- external structure
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 57a97e5..6fd80c4 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -41,7 +41,8 @@
 #define PORT_XSCALE	15
 #define PORT_RM9000	16	/* PMC-Sierra RM9xxx internal UART */
 #define PORT_OCTEON	17	/* Cavium OCTEON internal UART */
-#define PORT_MAX_8250	17	/* max port ID */
+#define PORT_AR7	18	/* Texas Instruments AR7 internal UART */
+#define PORT_MAX_8250	18	/* max port ID */
 
 /*
  * ARM specific type numbers.  These are not currently guaranteed
@@ -167,6 +168,9 @@
 /* MAX3100 */
 #define PORT_MAX3100    86
 
+/* Timberdale UART */
+#define PORT_TIMBUART	87
+
 #ifdef __KERNEL__
 
 #include <linux/compiler.h>
diff --git a/include/linux/signal.h b/include/linux/signal.h
index 84f997f..c755283 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -235,6 +235,8 @@
 extern int next_signal(struct sigpending *pending, sigset_t *mask);
 extern int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p);
 extern int __group_send_sig_info(int, struct siginfo *, struct task_struct *);
+extern long do_rt_tgsigqueueinfo(pid_t tgid, pid_t pid, int sig,
+				 siginfo_t *info);
 extern long do_sigpending(void __user *, unsigned long);
 extern int sigprocmask(int, sigset_t *, sigset_t *);
 extern int show_unhandled_signals;
diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h
index 5ac9b0b..713f841 100644
--- a/include/linux/slab_def.h
+++ b/include/linux/slab_def.h
@@ -14,7 +14,7 @@
 #include <asm/page.h>		/* kmalloc_sizes.h needs PAGE_SIZE */
 #include <asm/cache.h>		/* kmalloc_sizes.h needs L1_CACHE_BYTES */
 #include <linux/compiler.h>
-#include <trace/kmemtrace.h>
+#include <linux/kmemtrace.h>
 
 /* Size description struct for general caches. */
 struct cache_sizes {
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index 5046f90..be5d40c 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -10,7 +10,7 @@
 #include <linux/gfp.h>
 #include <linux/workqueue.h>
 #include <linux/kobject.h>
-#include <trace/kmemtrace.h>
+#include <linux/kmemtrace.h>
 
 enum stat_item {
 	ALLOC_FASTPATH,		/* Allocation from cpu slab */
diff --git a/include/linux/trace_seq.h b/include/linux/trace_seq.h
new file mode 100644
index 0000000..c68bccb
--- /dev/null
+++ b/include/linux/trace_seq.h
@@ -0,0 +1,92 @@
+#ifndef _LINUX_TRACE_SEQ_H
+#define _LINUX_TRACE_SEQ_H
+
+#include <linux/fs.h>
+
+/*
+ * Trace sequences are used to allow a function to call several other functions
+ * to create a string of data to use (up to a max of PAGE_SIZE.
+ */
+
+struct trace_seq {
+	unsigned char		buffer[PAGE_SIZE];
+	unsigned int		len;
+	unsigned int		readpos;
+};
+
+static inline void
+trace_seq_init(struct trace_seq *s)
+{
+	s->len = 0;
+	s->readpos = 0;
+}
+
+/*
+ * Currently only defined when tracing is enabled.
+ */
+#ifdef CONFIG_TRACING
+extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
+	__attribute__ ((format (printf, 2, 3)));
+extern int trace_seq_vprintf(struct trace_seq *s, const char *fmt, va_list args)
+	__attribute__ ((format (printf, 2, 0)));
+extern int
+trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary);
+extern void trace_print_seq(struct seq_file *m, struct trace_seq *s);
+extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf,
+				 size_t cnt);
+extern int trace_seq_puts(struct trace_seq *s, const char *str);
+extern int trace_seq_putc(struct trace_seq *s, unsigned char c);
+extern int trace_seq_putmem(struct trace_seq *s, const void *mem, size_t len);
+extern int trace_seq_putmem_hex(struct trace_seq *s, const void *mem,
+				size_t len);
+extern void *trace_seq_reserve(struct trace_seq *s, size_t len);
+extern int trace_seq_path(struct trace_seq *s, struct path *path);
+
+#else /* CONFIG_TRACING */
+static inline int trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
+{
+	return 0;
+}
+static inline int
+trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary)
+{
+	return 0;
+}
+
+static inline void trace_print_seq(struct seq_file *m, struct trace_seq *s)
+{
+}
+static inline ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf,
+				 size_t cnt)
+{
+	return 0;
+}
+static inline int trace_seq_puts(struct trace_seq *s, const char *str)
+{
+	return 0;
+}
+static inline int trace_seq_putc(struct trace_seq *s, unsigned char c)
+{
+	return 0;
+}
+static inline int
+trace_seq_putmem(struct trace_seq *s, const void *mem, size_t len)
+{
+	return 0;
+}
+static inline int trace_seq_putmem_hex(struct trace_seq *s, const void *mem,
+				       size_t len)
+{
+	return 0;
+}
+static inline void *trace_seq_reserve(struct trace_seq *s, size_t len)
+{
+	return NULL;
+}
+static inline int trace_seq_path(struct trace_seq *s, struct path *path)
+{
+	return 0;
+}
+#endif /* CONFIG_TRACING */
+
+#endif /* _LINUX_TRACE_SEQ_H */
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index d35a7ee..14df7e6 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -31,6 +31,8 @@
 					 * Keep in sync with vmlinux.lds.h.
 					 */
 
+#ifndef DECLARE_TRACE
+
 #define TP_PROTO(args...)	args
 #define TP_ARGS(args...)		args
 
@@ -114,6 +116,7 @@
 	struct tracepoint *end)
 { }
 #endif /* CONFIG_TRACEPOINTS */
+#endif /* DECLARE_TRACE */
 
 /*
  * Connect a probe to a tracepoint.
@@ -154,10 +157,8 @@
 }
 
 #define PARAMS(args...) args
-#define TRACE_FORMAT(name, proto, args, fmt)		\
-	DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
 
-
+#ifndef TRACE_EVENT
 /*
  * For use with the TRACE_EVENT macro:
  *
@@ -262,5 +263,6 @@
 
 #define TRACE_EVENT(name, proto, args, struct, assign, print)	\
 	DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
+#endif
 
 #endif
diff --git a/include/linux/tty.h b/include/linux/tty.h
index fc39db95..1488d8c 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -185,7 +185,7 @@
 struct tty_port_operations {
 	/* Return 1 if the carrier is raised */
 	int (*carrier_raised)(struct tty_port *port);
-	void (*raise_dtr_rts)(struct tty_port *port);
+	void (*dtr_rts)(struct tty_port *port, int raise);
 };
 	
 struct tty_port {
@@ -201,6 +201,9 @@
 	unsigned char		*xmit_buf;	/* Optional buffer */
 	int			close_delay;	/* Close port delay */
 	int			closing_wait;	/* Delay for output */
+	int			drain_delay;	/* Set to zero if no pure time
+						   based drain is needed else
+						   set to size of fifo */
 };
 
 /*
@@ -223,8 +226,11 @@
 	struct tty_driver *driver;
 	const struct tty_operations *ops;
 	int index;
-	/* The ldisc objects are protected by tty_ldisc_lock at the moment */
-	struct tty_ldisc ldisc;
+
+	/* Protects ldisc changes: Lock tty not pty */
+	struct mutex ldisc_mutex;
+	struct tty_ldisc *ldisc;
+
 	struct mutex termios_mutex;
 	spinlock_t ctrl_lock;
 	/* Termios values are protected by the termios mutex */
@@ -311,6 +317,7 @@
 #define TTY_CLOSING 		7	/* ->close() in progress */
 #define TTY_LDISC 		9	/* Line discipline attached */
 #define TTY_LDISC_CHANGING 	10	/* Line discipline changing */
+#define TTY_LDISC_OPEN	 	11	/* Line discipline is open */
 #define TTY_HW_COOK_OUT 	14	/* Hardware can do output cooking */
 #define TTY_HW_COOK_IN 		15	/* Hardware can do input cooking */
 #define TTY_PTY_LOCK 		16	/* pty private */
@@ -403,6 +410,7 @@
 extern struct tty_ldisc *tty_ldisc_ref(struct tty_struct *);
 extern void tty_ldisc_deref(struct tty_ldisc *);
 extern struct tty_ldisc *tty_ldisc_ref_wait(struct tty_struct *);
+extern void tty_ldisc_hangup(struct tty_struct *tty);
 extern const struct file_operations tty_ldiscs_proc_fops;
 
 extern void tty_wakeup(struct tty_struct *tty);
@@ -425,6 +433,9 @@
 extern void tty_release_dev(struct file *filp);
 extern int tty_init_termios(struct tty_struct *tty);
 
+extern struct tty_struct *tty_pair_get_tty(struct tty_struct *tty);
+extern struct tty_struct *tty_pair_get_pty(struct tty_struct *tty);
+
 extern struct mutex tty_mutex;
 
 extern void tty_write_unlock(struct tty_struct *tty);
@@ -438,6 +449,7 @@
 extern void tty_port_tty_set(struct tty_port *port, struct tty_struct *tty);
 extern int tty_port_carrier_raised(struct tty_port *port);
 extern void tty_port_raise_dtr_rts(struct tty_port *port);
+extern void tty_port_lower_dtr_rts(struct tty_port *port);
 extern void tty_port_hangup(struct tty_port *port);
 extern int tty_port_block_til_ready(struct tty_port *port,
 				struct tty_struct *tty, struct file *filp);
diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h
index bcba84e..3566129 100644
--- a/include/linux/tty_driver.h
+++ b/include/linux/tty_driver.h
@@ -127,7 +127,8 @@
  * 	the line discipline are close to full, and it should somehow
  * 	signal that no more characters should be sent to the tty.
  *
- *	Optional: Always invoke via tty_throttle();
+ *	Optional: Always invoke via tty_throttle(), called under the
+ *	termios lock.
  * 
  * void (*unthrottle)(struct tty_struct * tty);
  *
@@ -135,7 +136,8 @@
  * 	that characters can now be sent to the tty without fear of
  * 	overrunning the input buffers of the line disciplines.
  * 
- *	Optional: Always invoke via tty_unthrottle();
+ *	Optional: Always invoke via tty_unthrottle(), called under the
+ *	termios lock.
  *
  * void (*stop)(struct tty_struct *tty);
  *
diff --git a/include/linux/usb/serial.h b/include/linux/usb/serial.h
index 625e9e4..8cdfed7 100644
--- a/include/linux/usb/serial.h
+++ b/include/linux/usb/serial.h
@@ -224,8 +224,7 @@
 	/* Called by console with tty = NULL and by tty */
 	int  (*open)(struct tty_struct *tty,
 			struct usb_serial_port *port, struct file *filp);
-	void (*close)(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp);
+	void (*close)(struct usb_serial_port *port);
 	int  (*write)(struct tty_struct *tty, struct usb_serial_port *port,
 			const unsigned char *buf, int count);
 	/* Called only by the tty layer */
@@ -241,6 +240,10 @@
 	int  (*tiocmget)(struct tty_struct *tty, struct file *file);
 	int  (*tiocmset)(struct tty_struct *tty, struct file *file,
 			 unsigned int set, unsigned int clear);
+	/* Called by the tty layer for port level work. There may or may not
+	   be an attached tty at this point */
+	void (*dtr_rts)(struct usb_serial_port *port, int on);
+	int  (*carrier_raised)(struct usb_serial_port *port);
 	/* USB events */
 	void (*read_int_callback)(struct urb *urb);
 	void (*write_int_callback)(struct urb *urb);
@@ -283,8 +286,7 @@
 		struct usb_serial_port *port, struct file *filp);
 extern int usb_serial_generic_write(struct tty_struct *tty,
 	struct usb_serial_port *port, const unsigned char *buf, int count);
-extern void usb_serial_generic_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp);
+extern void usb_serial_generic_close(struct usb_serial_port *port);
 extern int usb_serial_generic_resume(struct usb_serial *serial);
 extern int usb_serial_generic_write_room(struct tty_struct *tty);
 extern int usb_serial_generic_chars_in_buffer(struct tty_struct *tty);
diff --git a/include/trace/block.h b/include/trace/block.h
deleted file mode 100644
index 25b7068..0000000
--- a/include/trace/block.h
+++ /dev/null
@@ -1,76 +0,0 @@
-#ifndef _TRACE_BLOCK_H
-#define _TRACE_BLOCK_H
-
-#include <linux/blkdev.h>
-#include <linux/tracepoint.h>
-
-DECLARE_TRACE(block_rq_abort,
-	TP_PROTO(struct request_queue *q, struct request *rq),
-	      TP_ARGS(q, rq));
-
-DECLARE_TRACE(block_rq_insert,
-	TP_PROTO(struct request_queue *q, struct request *rq),
-	      TP_ARGS(q, rq));
-
-DECLARE_TRACE(block_rq_issue,
-	TP_PROTO(struct request_queue *q, struct request *rq),
-	      TP_ARGS(q, rq));
-
-DECLARE_TRACE(block_rq_requeue,
-	TP_PROTO(struct request_queue *q, struct request *rq),
-	      TP_ARGS(q, rq));
-
-DECLARE_TRACE(block_rq_complete,
-	TP_PROTO(struct request_queue *q, struct request *rq),
-	      TP_ARGS(q, rq));
-
-DECLARE_TRACE(block_bio_bounce,
-	TP_PROTO(struct request_queue *q, struct bio *bio),
-	      TP_ARGS(q, bio));
-
-DECLARE_TRACE(block_bio_complete,
-	TP_PROTO(struct request_queue *q, struct bio *bio),
-	      TP_ARGS(q, bio));
-
-DECLARE_TRACE(block_bio_backmerge,
-	TP_PROTO(struct request_queue *q, struct bio *bio),
-	      TP_ARGS(q, bio));
-
-DECLARE_TRACE(block_bio_frontmerge,
-	TP_PROTO(struct request_queue *q, struct bio *bio),
-	      TP_ARGS(q, bio));
-
-DECLARE_TRACE(block_bio_queue,
-	TP_PROTO(struct request_queue *q, struct bio *bio),
-	      TP_ARGS(q, bio));
-
-DECLARE_TRACE(block_getrq,
-	TP_PROTO(struct request_queue *q, struct bio *bio, int rw),
-	      TP_ARGS(q, bio, rw));
-
-DECLARE_TRACE(block_sleeprq,
-	TP_PROTO(struct request_queue *q, struct bio *bio, int rw),
-	      TP_ARGS(q, bio, rw));
-
-DECLARE_TRACE(block_plug,
-	TP_PROTO(struct request_queue *q),
-	      TP_ARGS(q));
-
-DECLARE_TRACE(block_unplug_timer,
-	TP_PROTO(struct request_queue *q),
-	      TP_ARGS(q));
-
-DECLARE_TRACE(block_unplug_io,
-	TP_PROTO(struct request_queue *q),
-	      TP_ARGS(q));
-
-DECLARE_TRACE(block_split,
-	TP_PROTO(struct request_queue *q, struct bio *bio, unsigned int pdu),
-	      TP_ARGS(q, bio, pdu));
-
-DECLARE_TRACE(block_remap,
-	TP_PROTO(struct request_queue *q, struct bio *bio, dev_t dev,
-		 sector_t from, sector_t to),
-	      TP_ARGS(q, bio, dev, from, to));
-
-#endif
diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h
new file mode 100644
index 0000000..f7a7ae1
--- /dev/null
+++ b/include/trace/define_trace.h
@@ -0,0 +1,75 @@
+/*
+ * Trace files that want to automate creationg of all tracepoints defined
+ * in their file should include this file. The following are macros that the
+ * trace file may define:
+ *
+ * TRACE_SYSTEM defines the system the tracepoint is for
+ *
+ * TRACE_INCLUDE_FILE if the file name is something other than TRACE_SYSTEM.h
+ *     This macro may be defined to tell define_trace.h what file to include.
+ *     Note, leave off the ".h".
+ *
+ * TRACE_INCLUDE_PATH if the path is something other than core kernel include/trace
+ *     then this macro can define the path to use. Note, the path is relative to
+ *     define_trace.h, not the file including it. Full path names for out of tree
+ *     modules must be used.
+ */
+
+#ifdef CREATE_TRACE_POINTS
+
+/* Prevent recursion */
+#undef CREATE_TRACE_POINTS
+
+#include <linux/stringify.h>
+
+#undef TRACE_EVENT
+#define TRACE_EVENT(name, proto, args, tstruct, assign, print)	\
+	DEFINE_TRACE(name)
+
+#undef DECLARE_TRACE
+#define DECLARE_TRACE(name, proto, args)	\
+	DEFINE_TRACE(name)
+
+#undef TRACE_INCLUDE
+#undef __TRACE_INCLUDE
+
+#ifndef TRACE_INCLUDE_FILE
+# define TRACE_INCLUDE_FILE TRACE_SYSTEM
+# define UNDEF_TRACE_INCLUDE_FILE
+#endif
+
+#ifndef TRACE_INCLUDE_PATH
+# define __TRACE_INCLUDE(system) <trace/events/system.h>
+# define UNDEF_TRACE_INCLUDE_PATH
+#else
+# define __TRACE_INCLUDE(system) __stringify(TRACE_INCLUDE_PATH/system.h)
+#endif
+
+# define TRACE_INCLUDE(system) __TRACE_INCLUDE(system)
+
+/* Let the trace headers be reread */
+#define TRACE_HEADER_MULTI_READ
+
+#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
+
+#ifdef CONFIG_EVENT_TRACING
+#include <trace/ftrace.h>
+#endif
+
+#undef TRACE_HEADER_MULTI_READ
+
+/* Only undef what we defined in this file */
+#ifdef UNDEF_TRACE_INCLUDE_FILE
+# undef TRACE_INCLUDE_FILE
+# undef UNDEF_TRACE_INCLUDE_FILE
+#endif
+
+#ifdef UNDEF_TRACE_INCLUDE_PATH
+# undef TRACE_INCLUDE_PATH
+# undef UNDEF_TRACE_INCLUDE_PATH
+#endif
+
+/* We may be processing more files */
+#define CREATE_TRACE_POINTS
+
+#endif /* CREATE_TRACE_POINTS */
diff --git a/include/trace/events/block.h b/include/trace/events/block.h
new file mode 100644
index 0000000..53effd4
--- /dev/null
+++ b/include/trace/events/block.h
@@ -0,0 +1,498 @@
+#if !defined(_TRACE_BLOCK_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_BLOCK_H
+
+#include <linux/blktrace_api.h>
+#include <linux/blkdev.h>
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM block
+
+TRACE_EVENT(block_rq_abort,
+
+	TP_PROTO(struct request_queue *q, struct request *rq),
+
+	TP_ARGS(q, rq),
+
+	TP_STRUCT__entry(
+		__field(  dev_t,	dev			)
+		__field(  sector_t,	sector			)
+		__field(  unsigned int,	nr_sector		)
+		__field(  int,		errors			)
+		__array(  char,		rwbs,	6		)
+		__dynamic_array( char,	cmd,	blk_cmd_buf_len(rq)	)
+	),
+
+	TP_fast_assign(
+		__entry->dev	   = rq->rq_disk ? disk_devt(rq->rq_disk) : 0;
+		__entry->sector    = blk_pc_request(rq) ? 0 : rq->hard_sector;
+		__entry->nr_sector = blk_pc_request(rq) ?
+						0 : rq->hard_nr_sectors;
+		__entry->errors    = rq->errors;
+
+		blk_fill_rwbs_rq(__entry->rwbs, rq);
+		blk_dump_cmd(__get_str(cmd), rq);
+	),
+
+	TP_printk("%d,%d %s (%s) %llu + %u [%d]",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->rwbs, __get_str(cmd),
+		  (unsigned long long)__entry->sector,
+		  __entry->nr_sector, __entry->errors)
+);
+
+TRACE_EVENT(block_rq_insert,
+
+	TP_PROTO(struct request_queue *q, struct request *rq),
+
+	TP_ARGS(q, rq),
+
+	TP_STRUCT__entry(
+		__field(  dev_t,	dev			)
+		__field(  sector_t,	sector			)
+		__field(  unsigned int,	nr_sector		)
+		__field(  unsigned int,	bytes			)
+		__array(  char,		rwbs,	6		)
+		__array(  char,         comm,   TASK_COMM_LEN   )
+		__dynamic_array( char,	cmd,	blk_cmd_buf_len(rq)	)
+	),
+
+	TP_fast_assign(
+		__entry->dev	   = rq->rq_disk ? disk_devt(rq->rq_disk) : 0;
+		__entry->sector    = blk_pc_request(rq) ? 0 : rq->hard_sector;
+		__entry->nr_sector = blk_pc_request(rq) ?
+						0 : rq->hard_nr_sectors;
+		__entry->bytes     = blk_pc_request(rq) ? rq->data_len : 0;
+
+		blk_fill_rwbs_rq(__entry->rwbs, rq);
+		blk_dump_cmd(__get_str(cmd), rq);
+		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+	),
+
+	TP_printk("%d,%d %s %u (%s) %llu + %u [%s]",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->rwbs, __entry->bytes, __get_str(cmd),
+		  (unsigned long long)__entry->sector,
+		  __entry->nr_sector, __entry->comm)
+);
+
+TRACE_EVENT(block_rq_issue,
+
+	TP_PROTO(struct request_queue *q, struct request *rq),
+
+	TP_ARGS(q, rq),
+
+	TP_STRUCT__entry(
+		__field(  dev_t,	dev			)
+		__field(  sector_t,	sector			)
+		__field(  unsigned int,	nr_sector		)
+		__field(  unsigned int,	bytes			)
+		__array(  char,		rwbs,	6		)
+		__array(  char,		comm,   TASK_COMM_LEN   )
+		__dynamic_array( char,	cmd,	blk_cmd_buf_len(rq)	)
+	),
+
+	TP_fast_assign(
+		__entry->dev	   = rq->rq_disk ? disk_devt(rq->rq_disk) : 0;
+		__entry->sector    = blk_pc_request(rq) ? 0 : rq->hard_sector;
+		__entry->nr_sector = blk_pc_request(rq) ?
+						0 : rq->hard_nr_sectors;
+		__entry->bytes     = blk_pc_request(rq) ? rq->data_len : 0;
+
+		blk_fill_rwbs_rq(__entry->rwbs, rq);
+		blk_dump_cmd(__get_str(cmd), rq);
+		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+	),
+
+	TP_printk("%d,%d %s %u (%s) %llu + %u [%s]",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->rwbs, __entry->bytes, __get_str(cmd),
+		  (unsigned long long)__entry->sector,
+		  __entry->nr_sector, __entry->comm)
+);
+
+TRACE_EVENT(block_rq_requeue,
+
+	TP_PROTO(struct request_queue *q, struct request *rq),
+
+	TP_ARGS(q, rq),
+
+	TP_STRUCT__entry(
+		__field(  dev_t,	dev			)
+		__field(  sector_t,	sector			)
+		__field(  unsigned int,	nr_sector		)
+		__field(  int,		errors			)
+		__array(  char,		rwbs,	6		)
+		__dynamic_array( char,	cmd,	blk_cmd_buf_len(rq)	)
+	),
+
+	TP_fast_assign(
+		__entry->dev	   = rq->rq_disk ? disk_devt(rq->rq_disk) : 0;
+		__entry->sector    = blk_pc_request(rq) ? 0 : rq->hard_sector;
+		__entry->nr_sector = blk_pc_request(rq) ?
+						0 : rq->hard_nr_sectors;
+		__entry->errors	   = rq->errors;
+
+		blk_fill_rwbs_rq(__entry->rwbs, rq);
+		blk_dump_cmd(__get_str(cmd), rq);
+	),
+
+	TP_printk("%d,%d %s (%s) %llu + %u [%d]",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->rwbs, __get_str(cmd),
+		  (unsigned long long)__entry->sector,
+		  __entry->nr_sector, __entry->errors)
+);
+
+TRACE_EVENT(block_rq_complete,
+
+	TP_PROTO(struct request_queue *q, struct request *rq),
+
+	TP_ARGS(q, rq),
+
+	TP_STRUCT__entry(
+		__field(  dev_t,	dev			)
+		__field(  sector_t,	sector			)
+		__field(  unsigned int,	nr_sector		)
+		__field(  int,		errors			)
+		__array(  char,		rwbs,	6		)
+		__dynamic_array( char,	cmd,	blk_cmd_buf_len(rq)	)
+	),
+
+	TP_fast_assign(
+		__entry->dev	   = rq->rq_disk ? disk_devt(rq->rq_disk) : 0;
+		__entry->sector    = blk_pc_request(rq) ? 0 : rq->hard_sector;
+		__entry->nr_sector = blk_pc_request(rq) ?
+						0 : rq->hard_nr_sectors;
+		__entry->errors    = rq->errors;
+
+		blk_fill_rwbs_rq(__entry->rwbs, rq);
+		blk_dump_cmd(__get_str(cmd), rq);
+	),
+
+	TP_printk("%d,%d %s (%s) %llu + %u [%d]",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->rwbs, __get_str(cmd),
+		  (unsigned long long)__entry->sector,
+		  __entry->nr_sector, __entry->errors)
+);
+TRACE_EVENT(block_bio_bounce,
+
+	TP_PROTO(struct request_queue *q, struct bio *bio),
+
+	TP_ARGS(q, bio),
+
+	TP_STRUCT__entry(
+		__field( dev_t,		dev			)
+		__field( sector_t,	sector			)
+		__field( unsigned int,	nr_sector		)
+		__array( char,		rwbs,	6		)
+		__array( char,		comm,	TASK_COMM_LEN	)
+	),
+
+	TP_fast_assign(
+		__entry->dev		= bio->bi_bdev->bd_dev;
+		__entry->sector		= bio->bi_sector;
+		__entry->nr_sector	= bio->bi_size >> 9;
+		blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
+		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+	),
+
+	TP_printk("%d,%d %s %llu + %u [%s]",
+		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
+		  (unsigned long long)__entry->sector,
+		  __entry->nr_sector, __entry->comm)
+);
+
+TRACE_EVENT(block_bio_complete,
+
+	TP_PROTO(struct request_queue *q, struct bio *bio),
+
+	TP_ARGS(q, bio),
+
+	TP_STRUCT__entry(
+		__field( dev_t,		dev		)
+		__field( sector_t,	sector		)
+		__field( unsigned,	nr_sector	)
+		__field( int,		error		)
+		__array( char,		rwbs,	6	)
+	),
+
+	TP_fast_assign(
+		__entry->dev		= bio->bi_bdev->bd_dev;
+		__entry->sector		= bio->bi_sector;
+		__entry->nr_sector	= bio->bi_size >> 9;
+		blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
+	),
+
+	TP_printk("%d,%d %s %llu + %u [%d]",
+		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
+		  (unsigned long long)__entry->sector,
+		  __entry->nr_sector, __entry->error)
+);
+
+TRACE_EVENT(block_bio_backmerge,
+
+	TP_PROTO(struct request_queue *q, struct bio *bio),
+
+	TP_ARGS(q, bio),
+
+	TP_STRUCT__entry(
+		__field( dev_t,		dev			)
+		__field( sector_t,	sector			)
+		__field( unsigned int,	nr_sector		)
+		__array( char,		rwbs,	6		)
+		__array( char,		comm,	TASK_COMM_LEN	)
+	),
+
+	TP_fast_assign(
+		__entry->dev		= bio->bi_bdev->bd_dev;
+		__entry->sector		= bio->bi_sector;
+		__entry->nr_sector	= bio->bi_size >> 9;
+		blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
+		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+	),
+
+	TP_printk("%d,%d %s %llu + %u [%s]",
+		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
+		  (unsigned long long)__entry->sector,
+		  __entry->nr_sector, __entry->comm)
+);
+
+TRACE_EVENT(block_bio_frontmerge,
+
+	TP_PROTO(struct request_queue *q, struct bio *bio),
+
+	TP_ARGS(q, bio),
+
+	TP_STRUCT__entry(
+		__field( dev_t,		dev			)
+		__field( sector_t,	sector			)
+		__field( unsigned,	nr_sector		)
+		__array( char,		rwbs,	6		)
+		__array( char,		comm,	TASK_COMM_LEN	)
+	),
+
+	TP_fast_assign(
+		__entry->dev		= bio->bi_bdev->bd_dev;
+		__entry->sector		= bio->bi_sector;
+		__entry->nr_sector	= bio->bi_size >> 9;
+		blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
+		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+	),
+
+	TP_printk("%d,%d %s %llu + %u [%s]",
+		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
+		  (unsigned long long)__entry->sector,
+		  __entry->nr_sector, __entry->comm)
+);
+
+TRACE_EVENT(block_bio_queue,
+
+	TP_PROTO(struct request_queue *q, struct bio *bio),
+
+	TP_ARGS(q, bio),
+
+	TP_STRUCT__entry(
+		__field( dev_t,		dev			)
+		__field( sector_t,	sector			)
+		__field( unsigned int,	nr_sector		)
+		__array( char,		rwbs,	6		)
+		__array( char,		comm,	TASK_COMM_LEN	)
+	),
+
+	TP_fast_assign(
+		__entry->dev		= bio->bi_bdev->bd_dev;
+		__entry->sector		= bio->bi_sector;
+		__entry->nr_sector	= bio->bi_size >> 9;
+		blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
+		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+	),
+
+	TP_printk("%d,%d %s %llu + %u [%s]",
+		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
+		  (unsigned long long)__entry->sector,
+		  __entry->nr_sector, __entry->comm)
+);
+
+TRACE_EVENT(block_getrq,
+
+	TP_PROTO(struct request_queue *q, struct bio *bio, int rw),
+
+	TP_ARGS(q, bio, rw),
+
+	TP_STRUCT__entry(
+		__field( dev_t,		dev			)
+		__field( sector_t,	sector			)
+		__field( unsigned int,	nr_sector		)
+		__array( char,		rwbs,	6		)
+		__array( char,		comm,	TASK_COMM_LEN	)
+        ),
+
+	TP_fast_assign(
+		__entry->dev		= bio ? bio->bi_bdev->bd_dev : 0;
+		__entry->sector		= bio ? bio->bi_sector : 0;
+		__entry->nr_sector	= bio ? bio->bi_size >> 9 : 0;
+		blk_fill_rwbs(__entry->rwbs,
+			      bio ? bio->bi_rw : 0, __entry->nr_sector);
+		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+        ),
+
+	TP_printk("%d,%d %s %llu + %u [%s]",
+		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
+		  (unsigned long long)__entry->sector,
+		  __entry->nr_sector, __entry->comm)
+);
+
+TRACE_EVENT(block_sleeprq,
+
+	TP_PROTO(struct request_queue *q, struct bio *bio, int rw),
+
+	TP_ARGS(q, bio, rw),
+
+	TP_STRUCT__entry(
+		__field( dev_t,		dev			)
+		__field( sector_t,	sector			)
+		__field( unsigned int,	nr_sector		)
+		__array( char,		rwbs,	6		)
+		__array( char,		comm,	TASK_COMM_LEN	)
+	),
+
+	TP_fast_assign(
+		__entry->dev		= bio ? bio->bi_bdev->bd_dev : 0;
+		__entry->sector		= bio ? bio->bi_sector : 0;
+		__entry->nr_sector	= bio ? bio->bi_size >> 9 : 0;
+		blk_fill_rwbs(__entry->rwbs,
+			    bio ? bio->bi_rw : 0, __entry->nr_sector);
+		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+	),
+
+	TP_printk("%d,%d %s %llu + %u [%s]",
+		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
+		  (unsigned long long)__entry->sector,
+		  __entry->nr_sector, __entry->comm)
+);
+
+TRACE_EVENT(block_plug,
+
+	TP_PROTO(struct request_queue *q),
+
+	TP_ARGS(q),
+
+	TP_STRUCT__entry(
+		__array( char,		comm,	TASK_COMM_LEN	)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+	),
+
+	TP_printk("[%s]", __entry->comm)
+);
+
+TRACE_EVENT(block_unplug_timer,
+
+	TP_PROTO(struct request_queue *q),
+
+	TP_ARGS(q),
+
+	TP_STRUCT__entry(
+		__field( int,		nr_rq			)
+		__array( char,		comm,	TASK_COMM_LEN	)
+	),
+
+	TP_fast_assign(
+		__entry->nr_rq	= q->rq.count[READ] + q->rq.count[WRITE];
+		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+	),
+
+	TP_printk("[%s] %d", __entry->comm, __entry->nr_rq)
+);
+
+TRACE_EVENT(block_unplug_io,
+
+	TP_PROTO(struct request_queue *q),
+
+	TP_ARGS(q),
+
+	TP_STRUCT__entry(
+		__field( int,		nr_rq			)
+		__array( char,		comm,	TASK_COMM_LEN	)
+	),
+
+	TP_fast_assign(
+		__entry->nr_rq	= q->rq.count[READ] + q->rq.count[WRITE];
+		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+	),
+
+	TP_printk("[%s] %d", __entry->comm, __entry->nr_rq)
+);
+
+TRACE_EVENT(block_split,
+
+	TP_PROTO(struct request_queue *q, struct bio *bio,
+		 unsigned int new_sector),
+
+	TP_ARGS(q, bio, new_sector),
+
+	TP_STRUCT__entry(
+		__field( dev_t,		dev				)
+		__field( sector_t,	sector				)
+		__field( sector_t,	new_sector			)
+		__array( char,		rwbs,		6		)
+		__array( char,		comm,		TASK_COMM_LEN	)
+	),
+
+	TP_fast_assign(
+		__entry->dev		= bio->bi_bdev->bd_dev;
+		__entry->sector		= bio->bi_sector;
+		__entry->new_sector	= new_sector;
+		blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
+		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+	),
+
+	TP_printk("%d,%d %s %llu / %llu [%s]",
+		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
+		  (unsigned long long)__entry->sector,
+		  (unsigned long long)__entry->new_sector,
+		  __entry->comm)
+);
+
+TRACE_EVENT(block_remap,
+
+	TP_PROTO(struct request_queue *q, struct bio *bio, dev_t dev,
+		 sector_t from),
+
+	TP_ARGS(q, bio, dev, from),
+
+	TP_STRUCT__entry(
+		__field( dev_t,		dev		)
+		__field( sector_t,	sector		)
+		__field( unsigned int,	nr_sector	)
+		__field( dev_t,		old_dev		)
+		__field( sector_t,	old_sector	)
+		__array( char,		rwbs,	6	)
+	),
+
+	TP_fast_assign(
+		__entry->dev		= bio->bi_bdev->bd_dev;
+		__entry->sector		= bio->bi_sector;
+		__entry->nr_sector	= bio->bi_size >> 9;
+		__entry->old_dev	= dev;
+		__entry->old_sector	= from;
+		blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
+	),
+
+	TP_printk("%d,%d %s %llu + %u <- (%d,%d) %llu",
+		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
+		  (unsigned long long)__entry->sector,
+		  __entry->nr_sector,
+		  MAJOR(__entry->old_dev), MINOR(__entry->old_dev),
+		  (unsigned long long)__entry->old_sector)
+);
+
+#endif /* _TRACE_BLOCK_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
+
diff --git a/include/trace/events/irq.h b/include/trace/events/irq.h
new file mode 100644
index 0000000..b0c7ede
--- /dev/null
+++ b/include/trace/events/irq.h
@@ -0,0 +1,145 @@
+#if !defined(_TRACE_IRQ_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_IRQ_H
+
+#include <linux/tracepoint.h>
+#include <linux/interrupt.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM irq
+
+#define softirq_name(sirq) { sirq##_SOFTIRQ, #sirq }
+#define show_softirq_name(val)			\
+	__print_symbolic(val,			\
+			 softirq_name(HI),	\
+			 softirq_name(TIMER),	\
+			 softirq_name(NET_TX),	\
+			 softirq_name(NET_RX),	\
+			 softirq_name(BLOCK),	\
+			 softirq_name(TASKLET),	\
+			 softirq_name(SCHED),	\
+			 softirq_name(HRTIMER),	\
+			 softirq_name(RCU))
+
+/**
+ * irq_handler_entry - called immediately before the irq action handler
+ * @irq: irq number
+ * @action: pointer to struct irqaction
+ *
+ * The struct irqaction pointed to by @action contains various
+ * information about the handler, including the device name,
+ * @action->name, and the device id, @action->dev_id. When used in
+ * conjunction with the irq_handler_exit tracepoint, we can figure
+ * out irq handler latencies.
+ */
+TRACE_EVENT(irq_handler_entry,
+
+	TP_PROTO(int irq, struct irqaction *action),
+
+	TP_ARGS(irq, action),
+
+	TP_STRUCT__entry(
+		__field(	int,	irq		)
+		__string(	name,	action->name	)
+	),
+
+	TP_fast_assign(
+		__entry->irq = irq;
+		__assign_str(name, action->name);
+	),
+
+	TP_printk("irq=%d handler=%s", __entry->irq, __get_str(name))
+);
+
+/**
+ * irq_handler_exit - called immediately after the irq action handler returns
+ * @irq: irq number
+ * @action: pointer to struct irqaction
+ * @ret: return value
+ *
+ * If the @ret value is set to IRQ_HANDLED, then we know that the corresponding
+ * @action->handler scuccessully handled this irq. Otherwise, the irq might be
+ * a shared irq line, or the irq was not handled successfully. Can be used in
+ * conjunction with the irq_handler_entry to understand irq handler latencies.
+ */
+TRACE_EVENT(irq_handler_exit,
+
+	TP_PROTO(int irq, struct irqaction *action, int ret),
+
+	TP_ARGS(irq, action, ret),
+
+	TP_STRUCT__entry(
+		__field(	int,	irq	)
+		__field(	int,	ret	)
+	),
+
+	TP_fast_assign(
+		__entry->irq	= irq;
+		__entry->ret	= ret;
+	),
+
+	TP_printk("irq=%d return=%s",
+		  __entry->irq, __entry->ret ? "handled" : "unhandled")
+);
+
+/**
+ * softirq_entry - called immediately before the softirq handler
+ * @h: pointer to struct softirq_action
+ * @vec: pointer to first struct softirq_action in softirq_vec array
+ *
+ * The @h parameter, contains a pointer to the struct softirq_action
+ * which has a pointer to the action handler that is called. By subtracting
+ * the @vec pointer from the @h pointer, we can determine the softirq
+ * number. Also, when used in combination with the softirq_exit tracepoint
+ * we can determine the softirq latency.
+ */
+TRACE_EVENT(softirq_entry,
+
+	TP_PROTO(struct softirq_action *h, struct softirq_action *vec),
+
+	TP_ARGS(h, vec),
+
+	TP_STRUCT__entry(
+		__field(	int,	vec			)
+	),
+
+	TP_fast_assign(
+		__entry->vec = (int)(h - vec);
+	),
+
+	TP_printk("softirq=%d action=%s", __entry->vec,
+		  show_softirq_name(__entry->vec))
+);
+
+/**
+ * softirq_exit - called immediately after the softirq handler returns
+ * @h: pointer to struct softirq_action
+ * @vec: pointer to first struct softirq_action in softirq_vec array
+ *
+ * The @h parameter contains a pointer to the struct softirq_action
+ * that has handled the softirq. By subtracting the @vec pointer from
+ * the @h pointer, we can determine the softirq number. Also, when used in
+ * combination with the softirq_entry tracepoint we can determine the softirq
+ * latency.
+ */
+TRACE_EVENT(softirq_exit,
+
+	TP_PROTO(struct softirq_action *h, struct softirq_action *vec),
+
+	TP_ARGS(h, vec),
+
+	TP_STRUCT__entry(
+		__field(	int,	vec			)
+	),
+
+	TP_fast_assign(
+		__entry->vec = (int)(h - vec);
+	),
+
+	TP_printk("softirq=%d action=%s", __entry->vec,
+		  show_softirq_name(__entry->vec))
+);
+
+#endif /*  _TRACE_IRQ_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h
new file mode 100644
index 0000000..9baba50
--- /dev/null
+++ b/include/trace/events/kmem.h
@@ -0,0 +1,231 @@
+#if !defined(_TRACE_KMEM_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_KMEM_H
+
+#include <linux/types.h>
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM kmem
+
+/*
+ * The order of these masks is important. Matching masks will be seen
+ * first and the left over flags will end up showing by themselves.
+ *
+ * For example, if we have GFP_KERNEL before GFP_USER we wil get:
+ *
+ *  GFP_KERNEL|GFP_HARDWALL
+ *
+ * Thus most bits set go first.
+ */
+#define show_gfp_flags(flags)						\
+	(flags) ? __print_flags(flags, "|",				\
+	{(unsigned long)GFP_HIGHUSER_MOVABLE,	"GFP_HIGHUSER_MOVABLE"}, \
+	{(unsigned long)GFP_HIGHUSER,		"GFP_HIGHUSER"},	\
+	{(unsigned long)GFP_USER,		"GFP_USER"},		\
+	{(unsigned long)GFP_TEMPORARY,		"GFP_TEMPORARY"},	\
+	{(unsigned long)GFP_KERNEL,		"GFP_KERNEL"},		\
+	{(unsigned long)GFP_NOFS,		"GFP_NOFS"},		\
+	{(unsigned long)GFP_ATOMIC,		"GFP_ATOMIC"},		\
+	{(unsigned long)GFP_NOIO,		"GFP_NOIO"},		\
+	{(unsigned long)__GFP_HIGH,		"GFP_HIGH"},		\
+	{(unsigned long)__GFP_WAIT,		"GFP_WAIT"},		\
+	{(unsigned long)__GFP_IO,		"GFP_IO"},		\
+	{(unsigned long)__GFP_COLD,		"GFP_COLD"},		\
+	{(unsigned long)__GFP_NOWARN,		"GFP_NOWARN"},		\
+	{(unsigned long)__GFP_REPEAT,		"GFP_REPEAT"},		\
+	{(unsigned long)__GFP_NOFAIL,		"GFP_NOFAIL"},		\
+	{(unsigned long)__GFP_NORETRY,		"GFP_NORETRY"},		\
+	{(unsigned long)__GFP_COMP,		"GFP_COMP"},		\
+	{(unsigned long)__GFP_ZERO,		"GFP_ZERO"},		\
+	{(unsigned long)__GFP_NOMEMALLOC,	"GFP_NOMEMALLOC"},	\
+	{(unsigned long)__GFP_HARDWALL,		"GFP_HARDWALL"},	\
+	{(unsigned long)__GFP_THISNODE,		"GFP_THISNODE"},	\
+	{(unsigned long)__GFP_RECLAIMABLE,	"GFP_RECLAIMABLE"},	\
+	{(unsigned long)__GFP_MOVABLE,		"GFP_MOVABLE"}		\
+	) : "GFP_NOWAIT"
+
+TRACE_EVENT(kmalloc,
+
+	TP_PROTO(unsigned long call_site,
+		 const void *ptr,
+		 size_t bytes_req,
+		 size_t bytes_alloc,
+		 gfp_t gfp_flags),
+
+	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long,	call_site	)
+		__field(	const void *,	ptr		)
+		__field(	size_t,		bytes_req	)
+		__field(	size_t,		bytes_alloc	)
+		__field(	gfp_t,		gfp_flags	)
+	),
+
+	TP_fast_assign(
+		__entry->call_site	= call_site;
+		__entry->ptr		= ptr;
+		__entry->bytes_req	= bytes_req;
+		__entry->bytes_alloc	= bytes_alloc;
+		__entry->gfp_flags	= gfp_flags;
+	),
+
+	TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s",
+		__entry->call_site,
+		__entry->ptr,
+		__entry->bytes_req,
+		__entry->bytes_alloc,
+		show_gfp_flags(__entry->gfp_flags))
+);
+
+TRACE_EVENT(kmem_cache_alloc,
+
+	TP_PROTO(unsigned long call_site,
+		 const void *ptr,
+		 size_t bytes_req,
+		 size_t bytes_alloc,
+		 gfp_t gfp_flags),
+
+	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long,	call_site	)
+		__field(	const void *,	ptr		)
+		__field(	size_t,		bytes_req	)
+		__field(	size_t,		bytes_alloc	)
+		__field(	gfp_t,		gfp_flags	)
+	),
+
+	TP_fast_assign(
+		__entry->call_site	= call_site;
+		__entry->ptr		= ptr;
+		__entry->bytes_req	= bytes_req;
+		__entry->bytes_alloc	= bytes_alloc;
+		__entry->gfp_flags	= gfp_flags;
+	),
+
+	TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s",
+		__entry->call_site,
+		__entry->ptr,
+		__entry->bytes_req,
+		__entry->bytes_alloc,
+		show_gfp_flags(__entry->gfp_flags))
+);
+
+TRACE_EVENT(kmalloc_node,
+
+	TP_PROTO(unsigned long call_site,
+		 const void *ptr,
+		 size_t bytes_req,
+		 size_t bytes_alloc,
+		 gfp_t gfp_flags,
+		 int node),
+
+	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long,	call_site	)
+		__field(	const void *,	ptr		)
+		__field(	size_t,		bytes_req	)
+		__field(	size_t,		bytes_alloc	)
+		__field(	gfp_t,		gfp_flags	)
+		__field(	int,		node		)
+	),
+
+	TP_fast_assign(
+		__entry->call_site	= call_site;
+		__entry->ptr		= ptr;
+		__entry->bytes_req	= bytes_req;
+		__entry->bytes_alloc	= bytes_alloc;
+		__entry->gfp_flags	= gfp_flags;
+		__entry->node		= node;
+	),
+
+	TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s node=%d",
+		__entry->call_site,
+		__entry->ptr,
+		__entry->bytes_req,
+		__entry->bytes_alloc,
+		show_gfp_flags(__entry->gfp_flags),
+		__entry->node)
+);
+
+TRACE_EVENT(kmem_cache_alloc_node,
+
+	TP_PROTO(unsigned long call_site,
+		 const void *ptr,
+		 size_t bytes_req,
+		 size_t bytes_alloc,
+		 gfp_t gfp_flags,
+		 int node),
+
+	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long,	call_site	)
+		__field(	const void *,	ptr		)
+		__field(	size_t,		bytes_req	)
+		__field(	size_t,		bytes_alloc	)
+		__field(	gfp_t,		gfp_flags	)
+		__field(	int,		node		)
+	),
+
+	TP_fast_assign(
+		__entry->call_site	= call_site;
+		__entry->ptr		= ptr;
+		__entry->bytes_req	= bytes_req;
+		__entry->bytes_alloc	= bytes_alloc;
+		__entry->gfp_flags	= gfp_flags;
+		__entry->node		= node;
+	),
+
+	TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s node=%d",
+		__entry->call_site,
+		__entry->ptr,
+		__entry->bytes_req,
+		__entry->bytes_alloc,
+		show_gfp_flags(__entry->gfp_flags),
+		__entry->node)
+);
+
+TRACE_EVENT(kfree,
+
+	TP_PROTO(unsigned long call_site, const void *ptr),
+
+	TP_ARGS(call_site, ptr),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long,	call_site	)
+		__field(	const void *,	ptr		)
+	),
+
+	TP_fast_assign(
+		__entry->call_site	= call_site;
+		__entry->ptr		= ptr;
+	),
+
+	TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr)
+);
+
+TRACE_EVENT(kmem_cache_free,
+
+	TP_PROTO(unsigned long call_site, const void *ptr),
+
+	TP_ARGS(call_site, ptr),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long,	call_site	)
+		__field(	const void *,	ptr		)
+	),
+
+	TP_fast_assign(
+		__entry->call_site	= call_site;
+		__entry->ptr		= ptr;
+	),
+
+	TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr)
+);
+#endif /* _TRACE_KMEM_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/trace/events/lockdep.h b/include/trace/events/lockdep.h
new file mode 100644
index 0000000..0e956c9
--- /dev/null
+++ b/include/trace/events/lockdep.h
@@ -0,0 +1,96 @@
+#if !defined(_TRACE_LOCKDEP_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_LOCKDEP_H
+
+#include <linux/lockdep.h>
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM lockdep
+
+#ifdef CONFIG_LOCKDEP
+
+TRACE_EVENT(lock_acquire,
+
+	TP_PROTO(struct lockdep_map *lock, unsigned int subclass,
+		int trylock, int read, int check,
+		struct lockdep_map *next_lock, unsigned long ip),
+
+	TP_ARGS(lock, subclass, trylock, read, check, next_lock, ip),
+
+	TP_STRUCT__entry(
+		__field(unsigned int, flags)
+		__string(name, lock->name)
+	),
+
+	TP_fast_assign(
+		__entry->flags = (trylock ? 1 : 0) | (read ? 2 : 0);
+		__assign_str(name, lock->name);
+	),
+
+	TP_printk("%s%s%s", (__entry->flags & 1) ? "try " : "",
+		  (__entry->flags & 2) ? "read " : "",
+		  __get_str(name))
+);
+
+TRACE_EVENT(lock_release,
+
+	TP_PROTO(struct lockdep_map *lock, int nested, unsigned long ip),
+
+	TP_ARGS(lock, nested, ip),
+
+	TP_STRUCT__entry(
+		__string(name, lock->name)
+	),
+
+	TP_fast_assign(
+		__assign_str(name, lock->name);
+	),
+
+	TP_printk("%s", __get_str(name))
+);
+
+#ifdef CONFIG_LOCK_STAT
+
+TRACE_EVENT(lock_contended,
+
+	TP_PROTO(struct lockdep_map *lock, unsigned long ip),
+
+	TP_ARGS(lock, ip),
+
+	TP_STRUCT__entry(
+		__string(name, lock->name)
+	),
+
+	TP_fast_assign(
+		__assign_str(name, lock->name);
+	),
+
+	TP_printk("%s", __get_str(name))
+);
+
+TRACE_EVENT(lock_acquired,
+	TP_PROTO(struct lockdep_map *lock, unsigned long ip, s64 waittime),
+
+	TP_ARGS(lock, ip, waittime),
+
+	TP_STRUCT__entry(
+		__string(name, lock->name)
+		__field(unsigned long, wait_usec)
+		__field(unsigned long, wait_nsec_rem)
+	),
+	TP_fast_assign(
+		__assign_str(name, lock->name);
+		__entry->wait_nsec_rem = do_div(waittime, NSEC_PER_USEC);
+		__entry->wait_usec = (unsigned long) waittime;
+	),
+	TP_printk("%s (%lu.%03lu us)", __get_str(name), __entry->wait_usec,
+				       __entry->wait_nsec_rem)
+);
+
+#endif
+#endif
+
+#endif /* _TRACE_LOCKDEP_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/trace/sched_event_types.h b/include/trace/events/sched.h
similarity index 90%
rename from include/trace/sched_event_types.h
rename to include/trace/events/sched.h
index 63547dc..24ab5bc 100644
--- a/include/trace/sched_event_types.h
+++ b/include/trace/events/sched.h
@@ -1,9 +1,8 @@
+#if !defined(_TRACE_SCHED_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_SCHED_H
 
-/* use <trace/sched.h> instead */
-#ifndef TRACE_EVENT
-# error Do not include this file directly.
-# error Unless you know what you are doing.
-#endif
+#include <linux/sched.h>
+#include <linux/tracepoint.h>
 
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM sched
@@ -157,6 +156,7 @@
 		__array(	char,	prev_comm,	TASK_COMM_LEN	)
 		__field(	pid_t,	prev_pid			)
 		__field(	int,	prev_prio			)
+		__field(	long,	prev_state			)
 		__array(	char,	next_comm,	TASK_COMM_LEN	)
 		__field(	pid_t,	next_pid			)
 		__field(	int,	next_prio			)
@@ -166,13 +166,19 @@
 		memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN);
 		__entry->prev_pid	= prev->pid;
 		__entry->prev_prio	= prev->prio;
+		__entry->prev_state	= prev->state;
 		memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN);
 		__entry->next_pid	= next->pid;
 		__entry->next_prio	= next->prio;
 	),
 
-	TP_printk("task %s:%d [%d] ==> %s:%d [%d]",
+	TP_printk("task %s:%d [%d] (%s) ==> %s:%d [%d]",
 		__entry->prev_comm, __entry->prev_pid, __entry->prev_prio,
+		__entry->prev_state ?
+		  __print_flags(__entry->prev_state, "|",
+				{ 1, "S"} , { 2, "D" }, { 4, "T" }, { 8, "t" },
+				{ 16, "Z" }, { 32, "X" }, { 64, "x" },
+				{ 128, "W" }) : "R",
 		__entry->next_comm, __entry->next_pid, __entry->next_prio)
 );
 
@@ -181,9 +187,9 @@
  */
 TRACE_EVENT(sched_migrate_task,
 
-	TP_PROTO(struct task_struct *p, int orig_cpu, int dest_cpu),
+	TP_PROTO(struct task_struct *p, int dest_cpu),
 
-	TP_ARGS(p, orig_cpu, dest_cpu),
+	TP_ARGS(p, dest_cpu),
 
 	TP_STRUCT__entry(
 		__array(	char,	comm,	TASK_COMM_LEN	)
@@ -197,7 +203,7 @@
 		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
 		__entry->pid		= p->pid;
 		__entry->prio		= p->prio;
-		__entry->orig_cpu	= orig_cpu;
+		__entry->orig_cpu	= task_cpu(p);
 		__entry->dest_cpu	= dest_cpu;
 	),
 
@@ -334,4 +340,7 @@
 		  __entry->sig, __entry->comm, __entry->pid)
 );
 
-#undef TRACE_SYSTEM
+#endif /* _TRACE_SCHED_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h
new file mode 100644
index 0000000..1e8fabb
--- /dev/null
+++ b/include/trace/events/skb.h
@@ -0,0 +1,40 @@
+#if !defined(_TRACE_SKB_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_SKB_H
+
+#include <linux/skbuff.h>
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM skb
+
+/*
+ * Tracepoint for free an sk_buff:
+ */
+TRACE_EVENT(kfree_skb,
+
+	TP_PROTO(struct sk_buff *skb, void *location),
+
+	TP_ARGS(skb, location),
+
+	TP_STRUCT__entry(
+		__field(	void *,		skbaddr		)
+		__field(	unsigned short,	protocol	)
+		__field(	void *,		location	)
+	),
+
+	TP_fast_assign(
+		__entry->skbaddr = skb;
+		if (skb) {
+			__entry->protocol = ntohs(skb->protocol);
+		}
+		__entry->location = location;
+	),
+
+	TP_printk("skbaddr=%p protocol=%u location=%p",
+		__entry->skbaddr, __entry->protocol, __entry->location)
+);
+
+#endif /* _TRACE_SKB_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/trace/events/workqueue.h b/include/trace/events/workqueue.h
new file mode 100644
index 0000000..035f1bf
--- /dev/null
+++ b/include/trace/events/workqueue.h
@@ -0,0 +1,100 @@
+#if !defined(_TRACE_WORKQUEUE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_WORKQUEUE_H
+
+#include <linux/workqueue.h>
+#include <linux/sched.h>
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM workqueue
+
+TRACE_EVENT(workqueue_insertion,
+
+	TP_PROTO(struct task_struct *wq_thread, struct work_struct *work),
+
+	TP_ARGS(wq_thread, work),
+
+	TP_STRUCT__entry(
+		__array(char,		thread_comm,	TASK_COMM_LEN)
+		__field(pid_t,		thread_pid)
+		__field(work_func_t,	func)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->thread_comm, wq_thread->comm, TASK_COMM_LEN);
+		__entry->thread_pid	= wq_thread->pid;
+		__entry->func		= work->func;
+	),
+
+	TP_printk("thread=%s:%d func=%pF", __entry->thread_comm,
+		__entry->thread_pid, __entry->func)
+);
+
+TRACE_EVENT(workqueue_execution,
+
+	TP_PROTO(struct task_struct *wq_thread, struct work_struct *work),
+
+	TP_ARGS(wq_thread, work),
+
+	TP_STRUCT__entry(
+		__array(char,		thread_comm,	TASK_COMM_LEN)
+		__field(pid_t,		thread_pid)
+		__field(work_func_t,	func)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->thread_comm, wq_thread->comm, TASK_COMM_LEN);
+		__entry->thread_pid	= wq_thread->pid;
+		__entry->func		= work->func;
+	),
+
+	TP_printk("thread=%s:%d func=%pF", __entry->thread_comm,
+		__entry->thread_pid, __entry->func)
+);
+
+/* Trace the creation of one workqueue thread on a cpu */
+TRACE_EVENT(workqueue_creation,
+
+	TP_PROTO(struct task_struct *wq_thread, int cpu),
+
+	TP_ARGS(wq_thread, cpu),
+
+	TP_STRUCT__entry(
+		__array(char,	thread_comm,	TASK_COMM_LEN)
+		__field(pid_t,	thread_pid)
+		__field(int,	cpu)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->thread_comm, wq_thread->comm, TASK_COMM_LEN);
+		__entry->thread_pid	= wq_thread->pid;
+		__entry->cpu		= cpu;
+	),
+
+	TP_printk("thread=%s:%d cpu=%d", __entry->thread_comm,
+		__entry->thread_pid, __entry->cpu)
+);
+
+TRACE_EVENT(workqueue_destruction,
+
+	TP_PROTO(struct task_struct *wq_thread),
+
+	TP_ARGS(wq_thread),
+
+	TP_STRUCT__entry(
+		__array(char,	thread_comm,	TASK_COMM_LEN)
+		__field(pid_t,	thread_pid)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->thread_comm, wq_thread->comm, TASK_COMM_LEN);
+		__entry->thread_pid	= wq_thread->pid;
+	),
+
+	TP_printk("thread=%s:%d", __entry->thread_comm, __entry->thread_pid)
+);
+
+#endif /* _TRACE_WORKQUEUE_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
new file mode 100644
index 0000000..1867553
--- /dev/null
+++ b/include/trace/ftrace.h
@@ -0,0 +1,591 @@
+/*
+ * Stage 1 of the trace events.
+ *
+ * Override the macros in <trace/trace_events.h> to include the following:
+ *
+ * struct ftrace_raw_<call> {
+ *	struct trace_entry		ent;
+ *	<type>				<item>;
+ *	<type2>				<item2>[<len>];
+ *	[...]
+ * };
+ *
+ * The <type> <item> is created by the __field(type, item) macro or
+ * the __array(type2, item2, len) macro.
+ * We simply do "type item;", and that will create the fields
+ * in the structure.
+ */
+
+#include <linux/ftrace_event.h>
+
+#undef __field
+#define __field(type, item)		type	item;
+
+#undef __array
+#define __array(type, item, len)	type	item[len];
+
+#undef __dynamic_array
+#define __dynamic_array(type, item, len) unsigned short __data_loc_##item;
+
+#undef __string
+#define __string(item, src) __dynamic_array(char, item, -1)
+
+#undef TP_STRUCT__entry
+#define TP_STRUCT__entry(args...) args
+
+#undef TRACE_EVENT
+#define TRACE_EVENT(name, proto, args, tstruct, assign, print)	\
+	struct ftrace_raw_##name {				\
+		struct trace_entry	ent;			\
+		tstruct						\
+		char			__data[0];		\
+	};							\
+	static struct ftrace_event_call event_##name
+
+#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
+
+
+/*
+ * Stage 2 of the trace events.
+ *
+ * Include the following:
+ *
+ * struct ftrace_data_offsets_<call> {
+ *	int				<item1>;
+ *	int				<item2>;
+ *	[...]
+ * };
+ *
+ * The __dynamic_array() macro will create each int <item>, this is
+ * to keep the offset of each array from the beginning of the event.
+ */
+
+#undef __field
+#define __field(type, item);
+
+#undef __array
+#define __array(type, item, len)
+
+#undef __dynamic_array
+#define __dynamic_array(type, item, len)	int item;
+
+#undef __string
+#define __string(item, src) __dynamic_array(char, item, -1)
+
+#undef TRACE_EVENT
+#define TRACE_EVENT(call, proto, args, tstruct, assign, print)		\
+	struct ftrace_data_offsets_##call {				\
+		tstruct;						\
+	};
+
+#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
+
+/*
+ * Setup the showing format of trace point.
+ *
+ * int
+ * ftrace_format_##call(struct trace_seq *s)
+ * {
+ *	struct ftrace_raw_##call field;
+ *	int ret;
+ *
+ *	ret = trace_seq_printf(s, #type " " #item ";"
+ *			       " offset:%u; size:%u;\n",
+ *			       offsetof(struct ftrace_raw_##call, item),
+ *			       sizeof(field.type));
+ *
+ * }
+ */
+
+#undef TP_STRUCT__entry
+#define TP_STRUCT__entry(args...) args
+
+#undef __field
+#define __field(type, item)					\
+	ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t"	\
+			       "offset:%u;\tsize:%u;\n",		\
+			       (unsigned int)offsetof(typeof(field), item), \
+			       (unsigned int)sizeof(field.item));	\
+	if (!ret)							\
+		return 0;
+
+#undef __array
+#define __array(type, item, len)						\
+	ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t"	\
+			       "offset:%u;\tsize:%u;\n",		\
+			       (unsigned int)offsetof(typeof(field), item), \
+			       (unsigned int)sizeof(field.item));	\
+	if (!ret)							\
+		return 0;
+
+#undef __dynamic_array
+#define __dynamic_array(type, item, len)				       \
+	ret = trace_seq_printf(s, "\tfield:__data_loc " #item ";\t"	       \
+			       "offset:%u;\tsize:%u;\n",		       \
+			       (unsigned int)offsetof(typeof(field),	       \
+					__data_loc_##item),		       \
+			       (unsigned int)sizeof(field.__data_loc_##item)); \
+	if (!ret)							       \
+		return 0;
+
+#undef __string
+#define __string(item, src) __dynamic_array(char, item, -1)
+
+#undef __entry
+#define __entry REC
+
+#undef __print_symbolic
+#undef __get_dynamic_array
+#undef __get_str
+
+#undef TP_printk
+#define TP_printk(fmt, args...) "%s, %s\n", #fmt, __stringify(args)
+
+#undef TP_fast_assign
+#define TP_fast_assign(args...) args
+
+#undef TRACE_EVENT
+#define TRACE_EVENT(call, proto, args, tstruct, func, print)		\
+static int								\
+ftrace_format_##call(struct trace_seq *s)				\
+{									\
+	struct ftrace_raw_##call field __attribute__((unused));		\
+	int ret = 0;							\
+									\
+	tstruct;							\
+									\
+	trace_seq_printf(s, "\nprint fmt: " print);			\
+									\
+	return ret;							\
+}
+
+#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
+
+/*
+ * Stage 3 of the trace events.
+ *
+ * Override the macros in <trace/trace_events.h> to include the following:
+ *
+ * enum print_line_t
+ * ftrace_raw_output_<call>(struct trace_iterator *iter, int flags)
+ * {
+ *	struct trace_seq *s = &iter->seq;
+ *	struct ftrace_raw_<call> *field; <-- defined in stage 1
+ *	struct trace_entry *entry;
+ *	struct trace_seq *p;
+ *	int ret;
+ *
+ *	entry = iter->ent;
+ *
+ *	if (entry->type != event_<call>.id) {
+ *		WARN_ON_ONCE(1);
+ *		return TRACE_TYPE_UNHANDLED;
+ *	}
+ *
+ *	field = (typeof(field))entry;
+ *
+ *	p = get_cpu_var(ftrace_event_seq);
+ *	trace_seq_init(p);
+ *	ret = trace_seq_printf(s, <TP_printk> "\n");
+ *	put_cpu();
+ *	if (!ret)
+ *		return TRACE_TYPE_PARTIAL_LINE;
+ *
+ *	return TRACE_TYPE_HANDLED;
+ * }
+ *
+ * This is the method used to print the raw event to the trace
+ * output format. Note, this is not needed if the data is read
+ * in binary.
+ */
+
+#undef __entry
+#define __entry field
+
+#undef TP_printk
+#define TP_printk(fmt, args...) fmt "\n", args
+
+#undef __get_dynamic_array
+#define __get_dynamic_array(field)	\
+		((void *)__entry + __entry->__data_loc_##field)
+
+#undef __get_str
+#define __get_str(field) (char *)__get_dynamic_array(field)
+
+#undef __print_flags
+#define __print_flags(flag, delim, flag_array...)			\
+	({								\
+		static const struct trace_print_flags flags[] =		\
+			{ flag_array, { -1, NULL }};			\
+		ftrace_print_flags_seq(p, delim, flag, flags);		\
+	})
+
+#undef __print_symbolic
+#define __print_symbolic(value, symbol_array...)			\
+	({								\
+		static const struct trace_print_flags symbols[] =	\
+			{ symbol_array, { -1, NULL }};			\
+		ftrace_print_symbols_seq(p, value, symbols);		\
+	})
+
+#undef TRACE_EVENT
+#define TRACE_EVENT(call, proto, args, tstruct, assign, print)		\
+enum print_line_t							\
+ftrace_raw_output_##call(struct trace_iterator *iter, int flags)	\
+{									\
+	struct trace_seq *s = &iter->seq;				\
+	struct ftrace_raw_##call *field;				\
+	struct trace_entry *entry;					\
+	struct trace_seq *p;						\
+	int ret;							\
+									\
+	entry = iter->ent;						\
+									\
+	if (entry->type != event_##call.id) {				\
+		WARN_ON_ONCE(1);					\
+		return TRACE_TYPE_UNHANDLED;				\
+	}								\
+									\
+	field = (typeof(field))entry;					\
+									\
+	p = &get_cpu_var(ftrace_event_seq);				\
+	trace_seq_init(p);						\
+	ret = trace_seq_printf(s, #call ": " print);			\
+	put_cpu();							\
+	if (!ret)							\
+		return TRACE_TYPE_PARTIAL_LINE;				\
+									\
+	return TRACE_TYPE_HANDLED;					\
+}
+	
+#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
+
+#undef __field
+#define __field(type, item)						\
+	ret = trace_define_field(event_call, #type, #item,		\
+				 offsetof(typeof(field), item),		\
+				 sizeof(field.item), is_signed_type(type));	\
+	if (ret)							\
+		return ret;
+
+#undef __array
+#define __array(type, item, len)					\
+	BUILD_BUG_ON(len > MAX_FILTER_STR_VAL);				\
+	ret = trace_define_field(event_call, #type "[" #len "]", #item,	\
+				 offsetof(typeof(field), item),		\
+				 sizeof(field.item), 0);		\
+	if (ret)							\
+		return ret;
+
+#undef __dynamic_array
+#define __dynamic_array(type, item, len)				       \
+	ret = trace_define_field(event_call, "__data_loc" "[" #type "]", #item,\
+				offsetof(typeof(field), __data_loc_##item),    \
+				 sizeof(field.__data_loc_##item), 0);
+
+#undef __string
+#define __string(item, src) __dynamic_array(char, item, -1)
+
+#undef TRACE_EVENT
+#define TRACE_EVENT(call, proto, args, tstruct, func, print)		\
+int									\
+ftrace_define_fields_##call(void)					\
+{									\
+	struct ftrace_raw_##call field;					\
+	struct ftrace_event_call *event_call = &event_##call;		\
+	int ret;							\
+									\
+	__common_field(int, type, 1);					\
+	__common_field(unsigned char, flags, 0);			\
+	__common_field(unsigned char, preempt_count, 0);		\
+	__common_field(int, pid, 1);					\
+	__common_field(int, tgid, 1);					\
+									\
+	tstruct;							\
+									\
+	return ret;							\
+}
+
+#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
+
+/*
+ * remember the offset of each array from the beginning of the event.
+ */
+
+#undef __entry
+#define __entry entry
+
+#undef __field
+#define __field(type, item)
+
+#undef __array
+#define __array(type, item, len)
+
+#undef __dynamic_array
+#define __dynamic_array(type, item, len)				\
+	__data_offsets->item = __data_size +				\
+			       offsetof(typeof(*entry), __data);	\
+	__data_size += (len) * sizeof(type);
+
+#undef __string
+#define __string(item, src) __dynamic_array(char, item, strlen(src) + 1)       \
+
+#undef TRACE_EVENT
+#define TRACE_EVENT(call, proto, args, tstruct, assign, print)		\
+static inline int ftrace_get_offsets_##call(				\
+	struct ftrace_data_offsets_##call *__data_offsets, proto)       \
+{									\
+	int __data_size = 0;						\
+	struct ftrace_raw_##call __maybe_unused *entry;			\
+									\
+	tstruct;							\
+									\
+	return __data_size;						\
+}
+
+#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
+
+/*
+ * Stage 4 of the trace events.
+ *
+ * Override the macros in <trace/trace_events.h> to include the following:
+ *
+ * static void ftrace_event_<call>(proto)
+ * {
+ *	event_trace_printk(_RET_IP_, "<call>: " <fmt>);
+ * }
+ *
+ * static int ftrace_reg_event_<call>(void)
+ * {
+ *	int ret;
+ *
+ *	ret = register_trace_<call>(ftrace_event_<call>);
+ *	if (!ret)
+ *		pr_info("event trace: Could not activate trace point "
+ *			"probe to  <call>");
+ *	return ret;
+ * }
+ *
+ * static void ftrace_unreg_event_<call>(void)
+ * {
+ *	unregister_trace_<call>(ftrace_event_<call>);
+ * }
+ *
+ *
+ * For those macros defined with TRACE_EVENT:
+ *
+ * static struct ftrace_event_call event_<call>;
+ *
+ * static void ftrace_raw_event_<call>(proto)
+ * {
+ *	struct ring_buffer_event *event;
+ *	struct ftrace_raw_<call> *entry; <-- defined in stage 1
+ *	unsigned long irq_flags;
+ *	int pc;
+ *
+ *	local_save_flags(irq_flags);
+ *	pc = preempt_count();
+ *
+ *	event = trace_current_buffer_lock_reserve(event_<call>.id,
+ *				  sizeof(struct ftrace_raw_<call>),
+ *				  irq_flags, pc);
+ *	if (!event)
+ *		return;
+ *	entry	= ring_buffer_event_data(event);
+ *
+ *	<assign>;  <-- Here we assign the entries by the __field and
+ *			__array macros.
+ *
+ *	trace_current_buffer_unlock_commit(event, irq_flags, pc);
+ * }
+ *
+ * static int ftrace_raw_reg_event_<call>(void)
+ * {
+ *	int ret;
+ *
+ *	ret = register_trace_<call>(ftrace_raw_event_<call>);
+ *	if (!ret)
+ *		pr_info("event trace: Could not activate trace point "
+ *			"probe to <call>");
+ *	return ret;
+ * }
+ *
+ * static void ftrace_unreg_event_<call>(void)
+ * {
+ *	unregister_trace_<call>(ftrace_raw_event_<call>);
+ * }
+ *
+ * static struct trace_event ftrace_event_type_<call> = {
+ *	.trace			= ftrace_raw_output_<call>, <-- stage 2
+ * };
+ *
+ * static int ftrace_raw_init_event_<call>(void)
+ * {
+ *	int id;
+ *
+ *	id = register_ftrace_event(&ftrace_event_type_<call>);
+ *	if (!id)
+ *		return -ENODEV;
+ *	event_<call>.id = id;
+ *	return 0;
+ * }
+ *
+ * static struct ftrace_event_call __used
+ * __attribute__((__aligned__(4)))
+ * __attribute__((section("_ftrace_events"))) event_<call> = {
+ *	.name			= "<call>",
+ *	.system			= "<system>",
+ *	.raw_init		= ftrace_raw_init_event_<call>,
+ *	.regfunc		= ftrace_reg_event_<call>,
+ *	.unregfunc		= ftrace_unreg_event_<call>,
+ *	.show_format		= ftrace_format_<call>,
+ * }
+ *
+ */
+
+#undef TP_FMT
+#define TP_FMT(fmt, args...)	fmt "\n", ##args
+
+#ifdef CONFIG_EVENT_PROFILE
+#define _TRACE_PROFILE(call, proto, args)				\
+static void ftrace_profile_##call(proto)				\
+{									\
+	extern void perf_tpcounter_event(int);				\
+	perf_tpcounter_event(event_##call.id);				\
+}									\
+									\
+static int ftrace_profile_enable_##call(struct ftrace_event_call *event_call) \
+{									\
+	int ret = 0;							\
+									\
+	if (!atomic_inc_return(&event_call->profile_count))		\
+		ret = register_trace_##call(ftrace_profile_##call);	\
+									\
+	return ret;							\
+}									\
+									\
+static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\
+{									\
+	if (atomic_add_negative(-1, &event_call->profile_count))	\
+		unregister_trace_##call(ftrace_profile_##call);		\
+}
+
+#define _TRACE_PROFILE_INIT(call)					\
+	.profile_count = ATOMIC_INIT(-1),				\
+	.profile_enable = ftrace_profile_enable_##call,			\
+	.profile_disable = ftrace_profile_disable_##call,
+
+#else
+#define _TRACE_PROFILE(call, proto, args)
+#define _TRACE_PROFILE_INIT(call)
+#endif
+
+#undef __entry
+#define __entry entry
+
+#undef __field
+#define __field(type, item)
+
+#undef __array
+#define __array(type, item, len)
+
+#undef __dynamic_array
+#define __dynamic_array(type, item, len)				\
+	__entry->__data_loc_##item = __data_offsets.item;
+
+#undef __string
+#define __string(item, src) __dynamic_array(char, item, -1)       	\
+
+#undef __assign_str
+#define __assign_str(dst, src)						\
+	strcpy(__get_str(dst), src);
+
+#undef TRACE_EVENT
+#define TRACE_EVENT(call, proto, args, tstruct, assign, print)		\
+_TRACE_PROFILE(call, PARAMS(proto), PARAMS(args))			\
+									\
+static struct ftrace_event_call event_##call;				\
+									\
+static void ftrace_raw_event_##call(proto)				\
+{									\
+	struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\
+	struct ftrace_event_call *event_call = &event_##call;		\
+	struct ring_buffer_event *event;				\
+	struct ftrace_raw_##call *entry;				\
+	unsigned long irq_flags;					\
+	int __data_size;						\
+	int pc;								\
+									\
+	local_save_flags(irq_flags);					\
+	pc = preempt_count();						\
+									\
+	__data_size = ftrace_get_offsets_##call(&__data_offsets, args); \
+									\
+	event = trace_current_buffer_lock_reserve(event_##call.id,	\
+				 sizeof(*entry) + __data_size,		\
+				 irq_flags, pc);			\
+	if (!event)							\
+		return;							\
+	entry	= ring_buffer_event_data(event);			\
+									\
+									\
+	tstruct								\
+									\
+	{ assign; }							\
+									\
+	if (!filter_current_check_discard(event_call, entry, event))	\
+		trace_nowake_buffer_unlock_commit(event, irq_flags, pc); \
+}									\
+									\
+static int ftrace_raw_reg_event_##call(void)				\
+{									\
+	int ret;							\
+									\
+	ret = register_trace_##call(ftrace_raw_event_##call);		\
+	if (ret)							\
+		pr_info("event trace: Could not activate trace point "	\
+			"probe to " #call "\n");			\
+	return ret;							\
+}									\
+									\
+static void ftrace_raw_unreg_event_##call(void)				\
+{									\
+	unregister_trace_##call(ftrace_raw_event_##call);		\
+}									\
+									\
+static struct trace_event ftrace_event_type_##call = {			\
+	.trace			= ftrace_raw_output_##call,		\
+};									\
+									\
+static int ftrace_raw_init_event_##call(void)				\
+{									\
+	int id;								\
+									\
+	id = register_ftrace_event(&ftrace_event_type_##call);		\
+	if (!id)							\
+		return -ENODEV;						\
+	event_##call.id = id;						\
+	INIT_LIST_HEAD(&event_##call.fields);				\
+	init_preds(&event_##call);					\
+	return 0;							\
+}									\
+									\
+static struct ftrace_event_call __used					\
+__attribute__((__aligned__(4)))						\
+__attribute__((section("_ftrace_events"))) event_##call = {		\
+	.name			= #call,				\
+	.system			= __stringify(TRACE_SYSTEM),		\
+	.event			= &ftrace_event_type_##call,		\
+	.raw_init		= ftrace_raw_init_event_##call,		\
+	.regfunc		= ftrace_raw_reg_event_##call,		\
+	.unregfunc		= ftrace_raw_unreg_event_##call,	\
+	.show_format		= ftrace_format_##call,			\
+	.define_fields		= ftrace_define_fields_##call,		\
+	_TRACE_PROFILE_INIT(call)					\
+}
+
+#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
+
+#undef _TRACE_PROFILE
+#undef _TRACE_PROFILE_INIT
+
diff --git a/include/trace/irq.h b/include/trace/irq.h
deleted file mode 100644
index ff5d449..0000000
--- a/include/trace/irq.h
+++ /dev/null
@@ -1,9 +0,0 @@
-#ifndef _TRACE_IRQ_H
-#define _TRACE_IRQ_H
-
-#include <linux/interrupt.h>
-#include <linux/tracepoint.h>
-
-#include <trace/irq_event_types.h>
-
-#endif
diff --git a/include/trace/irq_event_types.h b/include/trace/irq_event_types.h
deleted file mode 100644
index 85964eb..0000000
--- a/include/trace/irq_event_types.h
+++ /dev/null
@@ -1,55 +0,0 @@
-
-/* use <trace/irq.h> instead */
-#ifndef TRACE_FORMAT
-# error Do not include this file directly.
-# error Unless you know what you are doing.
-#endif
-
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM irq
-
-/*
- * Tracepoint for entry of interrupt handler:
- */
-TRACE_FORMAT(irq_handler_entry,
-	TP_PROTO(int irq, struct irqaction *action),
-	TP_ARGS(irq, action),
-	TP_FMT("irq=%d handler=%s", irq, action->name)
-	);
-
-/*
- * Tracepoint for return of an interrupt handler:
- */
-TRACE_EVENT(irq_handler_exit,
-
-	TP_PROTO(int irq, struct irqaction *action, int ret),
-
-	TP_ARGS(irq, action, ret),
-
-	TP_STRUCT__entry(
-		__field(	int,	irq	)
-		__field(	int,	ret	)
-	),
-
-	TP_fast_assign(
-		__entry->irq	= irq;
-		__entry->ret	= ret;
-	),
-
-	TP_printk("irq=%d return=%s",
-		  __entry->irq, __entry->ret ? "handled" : "unhandled")
-);
-
-TRACE_FORMAT(softirq_entry,
-	TP_PROTO(struct softirq_action *h, struct softirq_action *vec),
-	TP_ARGS(h, vec),
-	TP_FMT("softirq=%d action=%s", (int)(h - vec), softirq_to_name[h-vec])
-	);
-
-TRACE_FORMAT(softirq_exit,
-	TP_PROTO(struct softirq_action *h, struct softirq_action *vec),
-	TP_ARGS(h, vec),
-	TP_FMT("softirq=%d action=%s", (int)(h - vec), softirq_to_name[h-vec])
-	);
-
-#undef TRACE_SYSTEM
diff --git a/include/trace/kmemtrace.h b/include/trace/kmemtrace.h
deleted file mode 100644
index 28ee69f..0000000
--- a/include/trace/kmemtrace.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (C) 2008 Eduard - Gabriel Munteanu
- *
- * This file is released under GPL version 2.
- */
-
-#ifndef _LINUX_KMEMTRACE_H
-#define _LINUX_KMEMTRACE_H
-
-#ifdef __KERNEL__
-
-#include <linux/tracepoint.h>
-#include <linux/types.h>
-
-#ifdef CONFIG_KMEMTRACE
-extern void kmemtrace_init(void);
-#else
-static inline void kmemtrace_init(void)
-{
-}
-#endif
-
-DECLARE_TRACE(kmalloc,
-	      TP_PROTO(unsigned long call_site,
-		      const void *ptr,
-		      size_t bytes_req,
-		      size_t bytes_alloc,
-		      gfp_t gfp_flags),
-	      TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags));
-DECLARE_TRACE(kmem_cache_alloc,
-	      TP_PROTO(unsigned long call_site,
-		      const void *ptr,
-		      size_t bytes_req,
-		      size_t bytes_alloc,
-		      gfp_t gfp_flags),
-	      TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags));
-DECLARE_TRACE(kmalloc_node,
-	      TP_PROTO(unsigned long call_site,
-		      const void *ptr,
-		      size_t bytes_req,
-		      size_t bytes_alloc,
-		      gfp_t gfp_flags,
-		      int node),
-	      TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node));
-DECLARE_TRACE(kmem_cache_alloc_node,
-	      TP_PROTO(unsigned long call_site,
-		      const void *ptr,
-		      size_t bytes_req,
-		      size_t bytes_alloc,
-		      gfp_t gfp_flags,
-		      int node),
-	      TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node));
-DECLARE_TRACE(kfree,
-	      TP_PROTO(unsigned long call_site, const void *ptr),
-	      TP_ARGS(call_site, ptr));
-DECLARE_TRACE(kmem_cache_free,
-	      TP_PROTO(unsigned long call_site, const void *ptr),
-	      TP_ARGS(call_site, ptr));
-
-#endif /* __KERNEL__ */
-
-#endif /* _LINUX_KMEMTRACE_H */
-
diff --git a/include/trace/lockdep.h b/include/trace/lockdep.h
deleted file mode 100644
index 5ca67df..0000000
--- a/include/trace/lockdep.h
+++ /dev/null
@@ -1,9 +0,0 @@
-#ifndef _TRACE_LOCKDEP_H
-#define _TRACE_LOCKDEP_H
-
-#include <linux/lockdep.h>
-#include <linux/tracepoint.h>
-
-#include <trace/lockdep_event_types.h>
-
-#endif
diff --git a/include/trace/lockdep_event_types.h b/include/trace/lockdep_event_types.h
deleted file mode 100644
index adccfcd..0000000
--- a/include/trace/lockdep_event_types.h
+++ /dev/null
@@ -1,44 +0,0 @@
-
-#ifndef TRACE_FORMAT
-# error Do not include this file directly.
-# error Unless you know what you are doing.
-#endif
-
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM lock
-
-#ifdef CONFIG_LOCKDEP
-
-TRACE_FORMAT(lock_acquire,
-	TP_PROTO(struct lockdep_map *lock, unsigned int subclass,
-		int trylock, int read, int check,
-		struct lockdep_map *next_lock, unsigned long ip),
-	TP_ARGS(lock, subclass, trylock, read, check, next_lock, ip),
-	TP_FMT("%s%s%s", trylock ? "try " : "",
-		read ? "read " : "", lock->name)
-	);
-
-TRACE_FORMAT(lock_release,
-	TP_PROTO(struct lockdep_map *lock, int nested, unsigned long ip),
-	TP_ARGS(lock, nested, ip),
-	TP_FMT("%s", lock->name)
-	);
-
-#ifdef CONFIG_LOCK_STAT
-
-TRACE_FORMAT(lock_contended,
-	TP_PROTO(struct lockdep_map *lock, unsigned long ip),
-	TP_ARGS(lock, ip),
-	TP_FMT("%s", lock->name)
-	);
-
-TRACE_FORMAT(lock_acquired,
-	TP_PROTO(struct lockdep_map *lock, unsigned long ip),
-	TP_ARGS(lock, ip),
-	TP_FMT("%s", lock->name)
-	);
-
-#endif
-#endif
-
-#undef TRACE_SYSTEM
diff --git a/include/trace/sched.h b/include/trace/sched.h
deleted file mode 100644
index 4e372a1..0000000
--- a/include/trace/sched.h
+++ /dev/null
@@ -1,9 +0,0 @@
-#ifndef _TRACE_SCHED_H
-#define _TRACE_SCHED_H
-
-#include <linux/sched.h>
-#include <linux/tracepoint.h>
-
-#include <trace/sched_event_types.h>
-
-#endif
diff --git a/include/trace/skb.h b/include/trace/skb.h
deleted file mode 100644
index b66206d..0000000
--- a/include/trace/skb.h
+++ /dev/null
@@ -1,11 +0,0 @@
-#ifndef _TRACE_SKB_H_
-#define _TRACE_SKB_H_
-
-#include <linux/skbuff.h>
-#include <linux/tracepoint.h>
-
-DECLARE_TRACE(kfree_skb,
-	TP_PROTO(struct sk_buff *skb, void *location),
-	TP_ARGS(skb, location));
-
-#endif
diff --git a/include/trace/trace_event_types.h b/include/trace/trace_event_types.h
deleted file mode 100644
index df56f56..0000000
--- a/include/trace/trace_event_types.h
+++ /dev/null
@@ -1,5 +0,0 @@
-/* trace/<type>_event_types.h here */
-
-#include <trace/sched_event_types.h>
-#include <trace/irq_event_types.h>
-#include <trace/lockdep_event_types.h>
diff --git a/include/trace/trace_events.h b/include/trace/trace_events.h
deleted file mode 100644
index fd13750..0000000
--- a/include/trace/trace_events.h
+++ /dev/null
@@ -1,5 +0,0 @@
-/* trace/<type>.h here */
-
-#include <trace/sched.h>
-#include <trace/irq.h>
-#include <trace/lockdep.h>
diff --git a/include/trace/workqueue.h b/include/trace/workqueue.h
deleted file mode 100644
index 7626523..0000000
--- a/include/trace/workqueue.h
+++ /dev/null
@@ -1,25 +0,0 @@
-#ifndef __TRACE_WORKQUEUE_H
-#define __TRACE_WORKQUEUE_H
-
-#include <linux/tracepoint.h>
-#include <linux/workqueue.h>
-#include <linux/sched.h>
-
-DECLARE_TRACE(workqueue_insertion,
-	   TP_PROTO(struct task_struct *wq_thread, struct work_struct *work),
-	   TP_ARGS(wq_thread, work));
-
-DECLARE_TRACE(workqueue_execution,
-	   TP_PROTO(struct task_struct *wq_thread, struct work_struct *work),
-	   TP_ARGS(wq_thread, work));
-
-/* Trace the creation of one workqueue thread on a cpu */
-DECLARE_TRACE(workqueue_creation,
-	   TP_PROTO(struct task_struct *wq_thread, int cpu),
-	   TP_ARGS(wq_thread, cpu));
-
-DECLARE_TRACE(workqueue_destruction,
-	   TP_PROTO(struct task_struct *wq_thread),
-	   TP_ARGS(wq_thread));
-
-#endif /* __TRACE_WORKQUEUE_H */
diff --git a/init/Kconfig b/init/Kconfig
index 7be4d38..d4e9671 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -308,7 +308,7 @@
 
 choice
 	prompt "RCU Implementation"
-	default CLASSIC_RCU
+	default TREE_RCU
 
 config CLASSIC_RCU
 	bool "Classic RCU"
diff --git a/init/main.c b/init/main.c
index d721dad..bb7dc57 100644
--- a/init/main.c
+++ b/init/main.c
@@ -64,6 +64,7 @@
 #include <linux/idr.h>
 #include <linux/ftrace.h>
 #include <linux/async.h>
+#include <linux/kmemtrace.h>
 #include <trace/boot.h>
 
 #include <asm/io.h>
@@ -71,7 +72,6 @@
 #include <asm/setup.h>
 #include <asm/sections.h>
 #include <asm/cacheflush.h>
-#include <trace/kmemtrace.h>
 
 #ifdef CONFIG_X86_LOCAL_APIC
 #include <asm/smp.h>
diff --git a/ipc/sem.c b/ipc/sem.c
index 16a2189..87c2b64 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -1290,8 +1290,8 @@
 		int i;
 
 		rcu_read_lock();
-		un = list_entry(rcu_dereference(ulp->list_proc.next),
-					struct sem_undo, list_proc);
+		un = list_entry_rcu(ulp->list_proc.next,
+				    struct sem_undo, list_proc);
 		if (&un->list_proc == &ulp->list_proc)
 			semid = -1;
 		 else
diff --git a/kernel/Makefile b/kernel/Makefile
index 4242366..a35eee34 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -93,6 +93,7 @@
 obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o
 obj-$(CONFIG_FUNCTION_TRACER) += trace/
 obj-$(CONFIG_TRACING) += trace/
+obj-$(CONFIG_X86_DS) += trace/
 obj-$(CONFIG_SMP) += sched_cpupri.o
 obj-$(CONFIG_SLOW_WORK) += slow-work.o
 
diff --git a/kernel/compat.c b/kernel/compat.c
index 42d5654..f6c204f0 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -882,6 +882,17 @@
 
 }
 
+asmlinkage long
+compat_sys_rt_tgsigqueueinfo(compat_pid_t tgid, compat_pid_t pid, int sig,
+			     struct compat_siginfo __user *uinfo)
+{
+	siginfo_t info;
+
+	if (copy_siginfo_from_user32(&info, uinfo))
+		return -EFAULT;
+	return do_rt_tgsigqueueinfo(tgid, pid, sig, &info);
+}
+
 #ifdef __ARCH_WANT_COMPAT_SYS_TIME
 
 /* compat_time_t is a 32 bit "long" and needs to get converted. */
diff --git a/kernel/exit.c b/kernel/exit.c
index 036e8d7..51d1fe3 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -48,7 +48,7 @@
 #include <linux/tracehook.h>
 #include <linux/fs_struct.h>
 #include <linux/init_task.h>
-#include <trace/sched.h>
+#include <trace/events/sched.h>
 
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
@@ -56,10 +56,6 @@
 #include <asm/mmu_context.h>
 #include "cred-internals.h"
 
-DEFINE_TRACE(sched_process_free);
-DEFINE_TRACE(sched_process_exit);
-DEFINE_TRACE(sched_process_wait);
-
 static void exit_mm(struct task_struct * tsk);
 
 static void __unhash_process(struct task_struct *p)
diff --git a/kernel/fork.c b/kernel/fork.c
index 875ffbd..bb762b4 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -61,7 +61,6 @@
 #include <linux/proc_fs.h>
 #include <linux/blkdev.h>
 #include <linux/fs_struct.h>
-#include <trace/sched.h>
 #include <linux/magic.h>
 
 #include <asm/pgtable.h>
@@ -71,6 +70,8 @@
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
 
+#include <trace/events/sched.h>
+
 /*
  * Protected counters by write_lock_irq(&tasklist_lock)
  */
@@ -83,8 +84,6 @@
 
 __cacheline_aligned DEFINE_RWLOCK(tasklist_lock);  /* outer */
 
-DEFINE_TRACE(sched_process_fork);
-
 int nr_processes(void)
 {
 	int cpu;
@@ -982,6 +981,8 @@
 	if (!p)
 		goto fork_out;
 
+	ftrace_graph_init_task(p);
+
 	rt_mutex_init_task(p);
 
 #ifdef CONFIG_PROVE_LOCKING
@@ -1089,8 +1090,8 @@
 #ifdef CONFIG_DEBUG_MUTEXES
 	p->blocked_on = NULL; /* not blocked yet */
 #endif
-	if (unlikely(current->ptrace))
-		ptrace_fork(p, clone_flags);
+
+	p->bts = NULL;
 
 	/* Perform scheduler related setup. Assign this task to a CPU. */
 	sched_fork(p, clone_flags);
@@ -1131,8 +1132,6 @@
 		}
 	}
 
-	ftrace_graph_init_task(p);
-
 	p->pid = pid_nr(pid);
 	p->tgid = p->pid;
 	if (clone_flags & CLONE_THREAD)
@@ -1141,7 +1140,7 @@
 	if (current->nsproxy != p->nsproxy) {
 		retval = ns_cgroup_clone(p, pid);
 		if (retval)
-			goto bad_fork_free_graph;
+			goto bad_fork_free_pid;
 	}
 
 	p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;
@@ -1233,7 +1232,7 @@
 		spin_unlock(&current->sighand->siglock);
 		write_unlock_irq(&tasklist_lock);
 		retval = -ERESTARTNOINTR;
-		goto bad_fork_free_graph;
+		goto bad_fork_free_pid;
 	}
 
 	if (clone_flags & CLONE_THREAD) {
@@ -1268,8 +1267,6 @@
 	cgroup_post_fork(p);
 	return p;
 
-bad_fork_free_graph:
-	ftrace_graph_exit_task(p);
 bad_fork_free_pid:
 	if (pid != &init_struct_pid)
 		free_pid(pid);
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 18041a2..a600184 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -18,8 +18,8 @@
 #include <linux/kernel_stat.h>
 #include <linux/rculist.h>
 #include <linux/hash.h>
-#include <trace/irq.h>
 #include <linux/bootmem.h>
+#include <trace/events/irq.h>
 
 #include "internals.h"
 
@@ -355,9 +355,6 @@
 	       "but no thread function available.", irq, action->name);
 }
 
-DEFINE_TRACE(irq_handler_entry);
-DEFINE_TRACE(irq_handler_exit);
-
 /**
  * handle_IRQ_event - irq action chain handler
  * @irq:	the interrupt number
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 4ebaf85..41c88fe 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -13,7 +13,7 @@
 #include <linux/file.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
-#include <trace/sched.h>
+#include <trace/events/sched.h>
 
 #define KTHREAD_NICE_LEVEL (-5)
 
@@ -21,9 +21,6 @@
 static LIST_HEAD(kthread_create_list);
 struct task_struct *kthreadd_task;
 
-DEFINE_TRACE(sched_kthread_stop);
-DEFINE_TRACE(sched_kthread_stop_ret);
-
 struct kthread_create_info
 {
 	/* Information passed to kthread() from kthreadd. */
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index accb40c..8bbeef9 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -42,12 +42,14 @@
 #include <linux/hash.h>
 #include <linux/ftrace.h>
 #include <linux/stringify.h>
-#include <trace/lockdep.h>
 
 #include <asm/sections.h>
 
 #include "lockdep_internals.h"
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/lockdep.h>
+
 #ifdef CONFIG_PROVE_LOCKING
 int prove_locking = 1;
 module_param(prove_locking, int, 0644);
@@ -2935,8 +2937,6 @@
 }
 EXPORT_SYMBOL_GPL(lock_set_class);
 
-DEFINE_TRACE(lock_acquire);
-
 /*
  * We are not always called with irqs disabled - do that here,
  * and also avoid lockdep recursion:
@@ -2963,8 +2963,6 @@
 }
 EXPORT_SYMBOL_GPL(lock_acquire);
 
-DEFINE_TRACE(lock_release);
-
 void lock_release(struct lockdep_map *lock, int nested,
 			  unsigned long ip)
 {
@@ -3105,6 +3103,8 @@
 		hlock->holdtime_stamp = now;
 	}
 
+	trace_lock_acquired(lock, ip, waittime);
+
 	stats = get_lock_stats(hlock_class(hlock));
 	if (waittime) {
 		if (hlock->read)
@@ -3120,8 +3120,6 @@
 	lock->ip = ip;
 }
 
-DEFINE_TRACE(lock_contended);
-
 void lock_contended(struct lockdep_map *lock, unsigned long ip)
 {
 	unsigned long flags;
@@ -3143,14 +3141,10 @@
 }
 EXPORT_SYMBOL_GPL(lock_contended);
 
-DEFINE_TRACE(lock_acquired);
-
 void lock_acquired(struct lockdep_map *lock, unsigned long ip)
 {
 	unsigned long flags;
 
-	trace_lock_acquired(lock, ip);
-
 	if (unlikely(!lock_stat))
 		return;
 
diff --git a/kernel/module.c b/kernel/module.c
index cb3887e..278e9b6 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -18,6 +18,7 @@
 */
 #include <linux/module.h>
 #include <linux/moduleloader.h>
+#include <linux/ftrace_event.h>
 #include <linux/init.h>
 #include <linux/kallsyms.h>
 #include <linux/fs.h>
@@ -1492,9 +1493,6 @@
 	/* Free any allocated parameters. */
 	destroy_params(mod->kp, mod->num_kp);
 
-	/* release any pointers to mcount in this module */
-	ftrace_release(mod->module_core, mod->core_size);
-
 	/* This may be NULL, but that's OK */
 	module_free(mod, mod->module_init);
 	kfree(mod->args);
@@ -1895,11 +1893,9 @@
 	unsigned int symindex = 0;
 	unsigned int strindex = 0;
 	unsigned int modindex, versindex, infoindex, pcpuindex;
-	unsigned int num_mcount;
 	struct module *mod;
 	long err = 0;
 	void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */
-	unsigned long *mseg;
 	mm_segment_t old_fs;
 
 	DEBUGP("load_module: umod=%p, len=%lu, uargs=%p\n",
@@ -2175,7 +2171,19 @@
 					sizeof(*mod->tracepoints),
 					&mod->num_tracepoints);
 #endif
-
+#ifdef CONFIG_EVENT_TRACING
+	mod->trace_events = section_objs(hdr, sechdrs, secstrings,
+					 "_ftrace_events",
+					 sizeof(*mod->trace_events),
+					 &mod->num_trace_events);
+#endif
+#ifdef CONFIG_FTRACE_MCOUNT_RECORD
+	/* sechdrs[0].sh_size is always zero */
+	mod->ftrace_callsites = section_objs(hdr, sechdrs, secstrings,
+					     "__mcount_loc",
+					     sizeof(*mod->ftrace_callsites),
+					     &mod->num_ftrace_callsites);
+#endif
 #ifdef CONFIG_MODVERSIONS
 	if ((mod->num_syms && !mod->crcs)
 	    || (mod->num_gpl_syms && !mod->gpl_crcs)
@@ -2240,11 +2248,6 @@
 			dynamic_debug_setup(debug, num_debug);
 	}
 
-	/* sechdrs[0].sh_size is always zero */
-	mseg = section_objs(hdr, sechdrs, secstrings, "__mcount_loc",
-			    sizeof(*mseg), &num_mcount);
-	ftrace_init_module(mod, mseg, mseg + num_mcount);
-
 	err = module_finalize(hdr, sechdrs, mod);
 	if (err < 0)
 		goto cleanup;
@@ -2305,7 +2308,6 @@
  cleanup:
 	kobject_del(&mod->mkobj.kobj);
 	kobject_put(&mod->mkobj.kobj);
-	ftrace_release(mod->module_core, mod->core_size);
  free_unload:
 	module_unload_free(mod);
 #if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP)
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 43a5a3b..f6d8b8c 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -25,16 +25,6 @@
 
 
 /*
- * Initialize a new task whose father had been ptraced.
- *
- * Called from copy_process().
- */
-void ptrace_fork(struct task_struct *child, unsigned long clone_flags)
-{
-	arch_ptrace_fork(child, clone_flags);
-}
-
-/*
  * ptrace a task: make the debugger its new parent and
  * move it to the ptrace list.
  *
diff --git a/kernel/rcupreempt.c b/kernel/rcupreempt.c
index ce97a4d..beb0e65 100644
--- a/kernel/rcupreempt.c
+++ b/kernel/rcupreempt.c
@@ -1356,17 +1356,11 @@
 
 		rcu_ctrlblk.sched_sleep = rcu_sched_sleeping;
 		spin_unlock_irqrestore(&rcu_ctrlblk.schedlock, flags);
-		ret = 0;
+		ret = 0; /* unused */
 		__wait_event_interruptible(rcu_ctrlblk.sched_wq,
 			rcu_ctrlblk.sched_sleep != rcu_sched_sleeping,
 			ret);
 
-		/*
-		 * Signals would prevent us from sleeping, and we cannot
-		 * do much with them in any case.  So flush them.
-		 */
-		if (ret)
-			flush_signals(current);
 		couldsleepnext = 0;
 
 	} while (!kthread_should_stop());
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index d2a372f..0dccfbb 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -1259,31 +1259,44 @@
 	check_cpu_stall(rsp, rdp);
 
 	/* Is the RCU core waiting for a quiescent state from this CPU? */
-	if (rdp->qs_pending)
+	if (rdp->qs_pending) {
+		rdp->n_rp_qs_pending++;
 		return 1;
+	}
 
 	/* Does this CPU have callbacks ready to invoke? */
-	if (cpu_has_callbacks_ready_to_invoke(rdp))
+	if (cpu_has_callbacks_ready_to_invoke(rdp)) {
+		rdp->n_rp_cb_ready++;
 		return 1;
+	}
 
 	/* Has RCU gone idle with this CPU needing another grace period? */
-	if (cpu_needs_another_gp(rsp, rdp))
+	if (cpu_needs_another_gp(rsp, rdp)) {
+		rdp->n_rp_cpu_needs_gp++;
 		return 1;
+	}
 
 	/* Has another RCU grace period completed?  */
-	if (ACCESS_ONCE(rsp->completed) != rdp->completed) /* outside of lock */
+	if (ACCESS_ONCE(rsp->completed) != rdp->completed) { /* outside lock */
+		rdp->n_rp_gp_completed++;
 		return 1;
+	}
 
 	/* Has a new RCU grace period started? */
-	if (ACCESS_ONCE(rsp->gpnum) != rdp->gpnum) /* outside of lock */
+	if (ACCESS_ONCE(rsp->gpnum) != rdp->gpnum) { /* outside lock */
+		rdp->n_rp_gp_started++;
 		return 1;
+	}
 
 	/* Has an RCU GP gone long enough to send resched IPIs &c? */
 	if (ACCESS_ONCE(rsp->completed) != ACCESS_ONCE(rsp->gpnum) &&
-	    ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0))
+	    ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0)) {
+		rdp->n_rp_need_fqs++;
 		return 1;
+	}
 
 	/* nothing to do */
+	rdp->n_rp_need_nothing++;
 	return 0;
 }
 
diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c
index 4b1875b..fe1dcdb 100644
--- a/kernel/rcutree_trace.c
+++ b/kernel/rcutree_trace.c
@@ -213,7 +213,63 @@
 	.release = single_release,
 };
 
-static struct dentry *rcudir, *datadir, *datadir_csv, *hierdir, *gpdir;
+static void print_one_rcu_pending(struct seq_file *m, struct rcu_data *rdp)
+{
+	seq_printf(m, "%3d%cnp=%ld "
+		   "qsp=%ld cbr=%ld cng=%ld gpc=%ld gps=%ld nf=%ld nn=%ld\n",
+		   rdp->cpu,
+		   cpu_is_offline(rdp->cpu) ? '!' : ' ',
+		   rdp->n_rcu_pending,
+		   rdp->n_rp_qs_pending,
+		   rdp->n_rp_cb_ready,
+		   rdp->n_rp_cpu_needs_gp,
+		   rdp->n_rp_gp_completed,
+		   rdp->n_rp_gp_started,
+		   rdp->n_rp_need_fqs,
+		   rdp->n_rp_need_nothing);
+}
+
+static void print_rcu_pendings(struct seq_file *m, struct rcu_state *rsp)
+{
+	int cpu;
+	struct rcu_data *rdp;
+
+	for_each_possible_cpu(cpu) {
+		rdp = rsp->rda[cpu];
+		if (rdp->beenonline)
+			print_one_rcu_pending(m, rdp);
+	}
+}
+
+static int show_rcu_pending(struct seq_file *m, void *unused)
+{
+	seq_puts(m, "rcu:\n");
+	print_rcu_pendings(m, &rcu_state);
+	seq_puts(m, "rcu_bh:\n");
+	print_rcu_pendings(m, &rcu_bh_state);
+	return 0;
+}
+
+static int rcu_pending_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, show_rcu_pending, NULL);
+}
+
+static struct file_operations rcu_pending_fops = {
+	.owner = THIS_MODULE,
+	.open = rcu_pending_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+static struct dentry *rcudir;
+static struct dentry *datadir;
+static struct dentry *datadir_csv;
+static struct dentry *gpdir;
+static struct dentry *hierdir;
+static struct dentry *rcu_pendingdir;
+
 static int __init rcuclassic_trace_init(void)
 {
 	rcudir = debugfs_create_dir("rcu", NULL);
@@ -238,6 +294,11 @@
 						NULL, &rcuhier_fops);
 	if (!hierdir)
 		goto free_out;
+
+	rcu_pendingdir = debugfs_create_file("rcu_pending", 0444, rcudir,
+						NULL, &rcu_pending_fops);
+	if (!rcu_pendingdir)
+		goto free_out;
 	return 0;
 free_out:
 	if (datadir)
@@ -257,6 +318,7 @@
 	debugfs_remove(datadir_csv);
 	debugfs_remove(gpdir);
 	debugfs_remove(hierdir);
+	debugfs_remove(rcu_pendingdir);
 	debugfs_remove(rcudir);
 }
 
diff --git a/kernel/sched.c b/kernel/sched.c
index 076e403..14c447a 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -72,13 +72,15 @@
 #include <linux/debugfs.h>
 #include <linux/ctype.h>
 #include <linux/ftrace.h>
-#include <trace/sched.h>
 
 #include <asm/tlb.h>
 #include <asm/irq_regs.h>
 
 #include "sched_cpupri.h"
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/sched.h>
+
 /*
  * Convert user-nice values [ -20 ... 0 ... 19 ]
  * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ],
@@ -118,12 +120,6 @@
  */
 #define RUNTIME_INF	((u64)~0ULL)
 
-DEFINE_TRACE(sched_wait_task);
-DEFINE_TRACE(sched_wakeup);
-DEFINE_TRACE(sched_wakeup_new);
-DEFINE_TRACE(sched_switch);
-DEFINE_TRACE(sched_migrate_task);
-
 #ifdef CONFIG_SMP
 
 static void double_rq_lock(struct rq *rq1, struct rq *rq2);
@@ -1964,7 +1960,7 @@
 
 	clock_offset = old_rq->clock - new_rq->clock;
 
-	trace_sched_migrate_task(p, task_cpu(p), new_cpu);
+	trace_sched_migrate_task(p, new_cpu);
 
 #ifdef CONFIG_SCHEDSTATS
 	if (p->se.wait_start)
@@ -2021,6 +2017,49 @@
 }
 
 /*
+ * wait_task_context_switch -	wait for a thread to complete at least one
+ *				context switch.
+ *
+ * @p must not be current.
+ */
+void wait_task_context_switch(struct task_struct *p)
+{
+	unsigned long nvcsw, nivcsw, flags;
+	int running;
+	struct rq *rq;
+
+	nvcsw	= p->nvcsw;
+	nivcsw	= p->nivcsw;
+	for (;;) {
+		/*
+		 * The runqueue is assigned before the actual context
+		 * switch. We need to take the runqueue lock.
+		 *
+		 * We could check initially without the lock but it is
+		 * very likely that we need to take the lock in every
+		 * iteration.
+		 */
+		rq = task_rq_lock(p, &flags);
+		running = task_running(rq, p);
+		task_rq_unlock(rq, &flags);
+
+		if (likely(!running))
+			break;
+		/*
+		 * The switch count is incremented before the actual
+		 * context switch. We thus wait for two switches to be
+		 * sure at least one completed.
+		 */
+		if ((p->nvcsw - nvcsw) > 1)
+			break;
+		if ((p->nivcsw - nivcsw) > 1)
+			break;
+
+		cpu_relax();
+	}
+}
+
+/*
  * wait_task_inactive - wait for a thread to unschedule.
  *
  * If @match_state is nonzero, it's the @p->state value just checked and
diff --git a/kernel/signal.c b/kernel/signal.c
index d2dd9cf..809a228 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -27,7 +27,7 @@
 #include <linux/freezer.h>
 #include <linux/pid_namespace.h>
 #include <linux/nsproxy.h>
-#include <trace/sched.h>
+#include <trace/events/sched.h>
 
 #include <asm/param.h>
 #include <asm/uaccess.h>
@@ -41,8 +41,6 @@
 
 static struct kmem_cache *sigqueue_cachep;
 
-DEFINE_TRACE(sched_signal_send);
-
 static void __user *sig_handler(struct task_struct *t, int sig)
 {
 	return t->sighand->action[sig - 1].sa.sa_handler;
@@ -2283,24 +2281,17 @@
 	return kill_something_info(sig, &info, pid);
 }
 
-static int do_tkill(pid_t tgid, pid_t pid, int sig)
+static int
+do_send_specific(pid_t tgid, pid_t pid, int sig, struct siginfo *info)
 {
-	int error;
-	struct siginfo info;
 	struct task_struct *p;
 	unsigned long flags;
-
-	error = -ESRCH;
-	info.si_signo = sig;
-	info.si_errno = 0;
-	info.si_code = SI_TKILL;
-	info.si_pid = task_tgid_vnr(current);
-	info.si_uid = current_uid();
+	int error = -ESRCH;
 
 	rcu_read_lock();
 	p = find_task_by_vpid(pid);
 	if (p && (tgid <= 0 || task_tgid_vnr(p) == tgid)) {
-		error = check_kill_permission(sig, &info, p);
+		error = check_kill_permission(sig, info, p);
 		/*
 		 * The null signal is a permissions and process existence
 		 * probe.  No signal is actually delivered.
@@ -2310,7 +2301,7 @@
 		 * signal is private anyway.
 		 */
 		if (!error && sig && lock_task_sighand(p, &flags)) {
-			error = specific_send_sig_info(sig, &info, p);
+			error = specific_send_sig_info(sig, info, p);
 			unlock_task_sighand(p, &flags);
 		}
 	}
@@ -2319,6 +2310,19 @@
 	return error;
 }
 
+static int do_tkill(pid_t tgid, pid_t pid, int sig)
+{
+	struct siginfo info;
+
+	info.si_signo = sig;
+	info.si_errno = 0;
+	info.si_code = SI_TKILL;
+	info.si_pid = task_tgid_vnr(current);
+	info.si_uid = current_uid();
+
+	return do_send_specific(tgid, pid, sig, &info);
+}
+
 /**
  *  sys_tgkill - send signal to one specific thread
  *  @tgid: the thread group ID of the thread
@@ -2368,6 +2372,32 @@
 	return kill_proc_info(sig, &info, pid);
 }
 
+long do_rt_tgsigqueueinfo(pid_t tgid, pid_t pid, int sig, siginfo_t *info)
+{
+	/* This is only valid for single tasks */
+	if (pid <= 0 || tgid <= 0)
+		return -EINVAL;
+
+	/* Not even root can pretend to send signals from the kernel.
+	   Nor can they impersonate a kill(), which adds source info.  */
+	if (info->si_code >= 0)
+		return -EPERM;
+	info->si_signo = sig;
+
+	return do_send_specific(tgid, pid, sig, info);
+}
+
+SYSCALL_DEFINE4(rt_tgsigqueueinfo, pid_t, tgid, pid_t, pid, int, sig,
+		siginfo_t __user *, uinfo)
+{
+	siginfo_t info;
+
+	if (copy_from_user(&info, uinfo, sizeof(siginfo_t)))
+		return -EFAULT;
+
+	return do_rt_tgsigqueueinfo(tgid, pid, sig, &info);
+}
+
 int do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact)
 {
 	struct task_struct *t = current;
diff --git a/kernel/softirq.c b/kernel/softirq.c
index f674f33..258885a 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -24,7 +24,9 @@
 #include <linux/ftrace.h>
 #include <linux/smp.h>
 #include <linux/tick.h>
-#include <trace/irq.h>
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/irq.h>
 
 #include <asm/irq.h>
 /*
@@ -186,9 +188,6 @@
  */
 #define MAX_SOFTIRQ_RESTART 10
 
-DEFINE_TRACE(softirq_entry);
-DEFINE_TRACE(softirq_exit);
-
 asmlinkage void __do_softirq(void)
 {
 	struct softirq_action *h;
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 417d198..4a13e5a 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -48,6 +48,21 @@
        depends on HAVE_FTRACE_NMI_ENTER
        default y
 
+config EVENT_TRACING
+	select CONTEXT_SWITCH_TRACER
+	bool
+
+config CONTEXT_SWITCH_TRACER
+	select MARKERS
+	bool
+
+# All tracer options should select GENERIC_TRACER. For those options that are
+# enabled by all tracers (context switch and event tracer) they select TRACING.
+# This allows those options to appear when no other tracer is selected. But the
+# options do not appear when something else selects it. We need the two options
+# GENERIC_TRACER and TRACING to avoid circular dependencies to accomplish the
+# hidding of the automatic options options.
+
 config TRACING
 	bool
 	select DEBUG_FS
@@ -56,6 +71,11 @@
 	select TRACEPOINTS
 	select NOP_TRACER
 	select BINARY_PRINTF
+	select EVENT_TRACING
+
+config GENERIC_TRACER
+	bool
+	select TRACING
 
 #
 # Minimum requirements an architecture has to meet for us to
@@ -73,14 +93,20 @@
 
 if TRACING_SUPPORT
 
-menu "Tracers"
+menuconfig FTRACE
+	bool "Tracers"
+	default y if DEBUG_KERNEL
+	help
+	 Enable the kernel tracing infrastructure.
+
+if FTRACE
 
 config FUNCTION_TRACER
 	bool "Kernel Function Tracer"
 	depends on HAVE_FUNCTION_TRACER
 	select FRAME_POINTER
 	select KALLSYMS
-	select TRACING
+	select GENERIC_TRACER
 	select CONTEXT_SWITCH_TRACER
 	help
 	  Enable the kernel to trace every kernel function. This is done
@@ -104,13 +130,14 @@
 	  the return value. This is done by setting the current return 
 	  address on the current task structure into a stack of calls.
 
+
 config IRQSOFF_TRACER
 	bool "Interrupts-off Latency Tracer"
 	default n
 	depends on TRACE_IRQFLAGS_SUPPORT
 	depends on GENERIC_TIME
 	select TRACE_IRQFLAGS
-	select TRACING
+	select GENERIC_TRACER
 	select TRACER_MAX_TRACE
 	help
 	  This option measures the time spent in irqs-off critical
@@ -131,7 +158,7 @@
 	default n
 	depends on GENERIC_TIME
 	depends on PREEMPT
-	select TRACING
+	select GENERIC_TRACER
 	select TRACER_MAX_TRACE
 	help
 	  This option measures the time spent in preemption off critical
@@ -150,7 +177,7 @@
 config SYSPROF_TRACER
 	bool "Sysprof Tracer"
 	depends on X86
-	select TRACING
+	select GENERIC_TRACER
 	select CONTEXT_SWITCH_TRACER
 	help
 	  This tracer provides the trace needed by the 'Sysprof' userspace
@@ -158,40 +185,33 @@
 
 config SCHED_TRACER
 	bool "Scheduling Latency Tracer"
-	select TRACING
+	select GENERIC_TRACER
 	select CONTEXT_SWITCH_TRACER
 	select TRACER_MAX_TRACE
 	help
 	  This tracer tracks the latency of the highest priority task
 	  to be scheduled in, starting from the point it has woken up.
 
-config CONTEXT_SWITCH_TRACER
-	bool "Trace process context switches"
-	select TRACING
-	select MARKERS
-	help
-	  This tracer gets called from the context switch and records
-	  all switching of tasks.
-
-config EVENT_TRACER
-	bool "Trace various events in the kernel"
+config ENABLE_DEFAULT_TRACERS
+	bool "Trace process context switches and events"
+	depends on !GENERIC_TRACER
 	select TRACING
 	help
 	  This tracer hooks to various trace points in the kernel
 	  allowing the user to pick and choose which trace point they
-	  want to trace.
+	  want to trace. It also includes the sched_switch tracer plugin.
 
 config FTRACE_SYSCALLS
 	bool "Trace syscalls"
 	depends on HAVE_FTRACE_SYSCALLS
-	select TRACING
+	select GENERIC_TRACER
 	select KALLSYMS
 	help
 	  Basic tracer to catch the syscall entry and exit events.
 
 config BOOT_TRACER
 	bool "Trace boot initcalls"
-	select TRACING
+	select GENERIC_TRACER
 	select CONTEXT_SWITCH_TRACER
 	help
 	  This tracer helps developers to optimize boot times: it records
@@ -207,8 +227,36 @@
 	  to enable this on bootup.
 
 config TRACE_BRANCH_PROFILING
+	bool
+	select GENERIC_TRACER
+
+choice
+	prompt "Branch Profiling"
+	default BRANCH_PROFILE_NONE
+	help
+	 The branch profiling is a software profiler. It will add hooks
+	 into the C conditionals to test which path a branch takes.
+
+	 The likely/unlikely profiler only looks at the conditions that
+	 are annotated with a likely or unlikely macro.
+
+	 The "all branch" profiler will profile every if statement in the
+	 kernel. This profiler will also enable the likely/unlikely
+	 profiler as well.
+
+	 Either of the above profilers add a bit of overhead to the system.
+	 If unsure choose "No branch profiling".
+
+config BRANCH_PROFILE_NONE
+	bool "No branch profiling"
+	help
+	 No branch profiling. Branch profiling adds a bit of overhead.
+	 Only enable it if you want to analyse the branching behavior.
+	 Otherwise keep it disabled.
+
+config PROFILE_ANNOTATED_BRANCHES
 	bool "Trace likely/unlikely profiler"
-	select TRACING
+	select TRACE_BRANCH_PROFILING
 	help
 	  This tracer profiles all the the likely and unlikely macros
 	  in the kernel. It will display the results in:
@@ -218,11 +266,9 @@
 	  Note: this will add a significant overhead, only turn this
 	  on if you need to profile the system's use of these macros.
 
-	  Say N if unsure.
-
 config PROFILE_ALL_BRANCHES
 	bool "Profile all if conditionals"
-	depends on TRACE_BRANCH_PROFILING
+	select TRACE_BRANCH_PROFILING
 	help
 	  This tracer profiles all branch conditions. Every if ()
 	  taken in the kernel is recorded whether it hit or miss.
@@ -230,11 +276,12 @@
 
 	  /debugfs/tracing/profile_branch
 
+	  This option also enables the likely/unlikely profiler.
+
 	  This configuration, when enabled, will impose a great overhead
 	  on the system. This should only be enabled when the system
 	  is to be analyzed
-
-	  Say N if unsure.
+endchoice
 
 config TRACING_BRANCHES
 	bool
@@ -261,7 +308,7 @@
 config POWER_TRACER
 	bool "Trace power consumption behavior"
 	depends on X86
-	select TRACING
+	select GENERIC_TRACER
 	help
 	  This tracer helps developers to analyze and optimize the kernels
 	  power management decisions, specifically the C-state and P-state
@@ -295,14 +342,14 @@
 config HW_BRANCH_TRACER
 	depends on HAVE_HW_BRANCH_TRACER
 	bool "Trace hw branches"
-	select TRACING
+	select GENERIC_TRACER
 	help
 	  This tracer records all branches on the system in a circular
 	  buffer giving access to the last N branches for each cpu.
 
 config KMEMTRACE
 	bool "Trace SLAB allocations"
-	select TRACING
+	select GENERIC_TRACER
 	help
 	  kmemtrace provides tracing for slab allocator functions, such as
 	  kmalloc, kfree, kmem_cache_alloc, kmem_cache_free etc.. Collected
@@ -322,7 +369,7 @@
 
 config WORKQUEUE_TRACER
 	bool "Trace workqueues"
-	select TRACING
+	select GENERIC_TRACER
 	help
 	  The workqueue tracer provides some statistical informations
           about each cpu workqueue thread such as the number of the
@@ -338,7 +385,7 @@
 	select RELAY
 	select DEBUG_FS
 	select TRACEPOINTS
-	select TRACING
+	select GENERIC_TRACER
 	select STACKTRACE
 	help
 	  Say Y here if you want to be able to trace the block layer actions
@@ -375,6 +422,20 @@
 	 were made. If so, it runs stop_machine (stops all CPUS)
 	 and modifies the code to jump over the call to ftrace.
 
+config FUNCTION_PROFILER
+	bool "Kernel function profiler"
+	depends on FUNCTION_TRACER
+	default n
+	help
+	 This option enables the kernel function profiler. A file is created
+	 in debugfs called function_profile_enabled which defaults to zero.
+	 When a 1 is echoed into this file profiling begins, and when a
+	 zero is entered, profiling stops. A file in the trace_stats
+	 directory called functions, that show the list of functions that
+	 have been hit and their counters.
+
+	 If in doubt, say N
+
 config FTRACE_MCOUNT_RECORD
 	def_bool y
 	depends on DYNAMIC_FTRACE
@@ -385,7 +446,7 @@
 
 config FTRACE_STARTUP_TEST
 	bool "Perform a startup test on ftrace"
-	depends on TRACING
+	depends on GENERIC_TRACER
 	select FTRACE_SELFTEST
 	help
 	  This option performs a series of startup tests on ftrace. On bootup
@@ -396,7 +457,7 @@
 config MMIOTRACE
 	bool "Memory mapped IO tracing"
 	depends on HAVE_MMIOTRACE_SUPPORT && PCI
-	select TRACING
+	select GENERIC_TRACER
 	help
 	  Mmiotrace traces Memory Mapped I/O access and is meant for
 	  debugging and reverse engineering. It is called from the ioremap
@@ -416,7 +477,23 @@
 
 	  Say N, unless you absolutely know what you are doing.
 
-endmenu
+config RING_BUFFER_BENCHMARK
+	tristate "Ring buffer benchmark stress tester"
+	depends on RING_BUFFER
+	help
+	  This option creates a test to stress the ring buffer and bench mark it.
+	  It creates its own ring buffer such that it will not interfer with
+	  any other users of the ring buffer (such as ftrace). It then creates
+	  a producer and consumer that will run for 10 seconds and sleep for
+	  10 seconds. Each interval it will print out the number of events
+	  it recorded and give a rough estimate of how long each iteration took.
+
+	  It does not disable interrupts or raise its priority, so it may be
+	  affected by processes that are running.
+
+	  If unsure, say N
+
+endif # FTRACE
 
 endif # TRACING_SUPPORT
 
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 2630f51..844164d 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -15,11 +15,17 @@
 KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING
 endif
 
+#
+# Make the trace clocks available generally: it's infrastructure
+# relied on by ptrace for example:
+#
+obj-y += trace_clock.o
+
 obj-$(CONFIG_FUNCTION_TRACER) += libftrace.o
 obj-$(CONFIG_RING_BUFFER) += ring_buffer.o
+obj-$(CONFIG_RING_BUFFER_BENCHMARK) += ring_buffer_benchmark.o
 
 obj-$(CONFIG_TRACING) += trace.o
-obj-$(CONFIG_TRACING) += trace_clock.o
 obj-$(CONFIG_TRACING) += trace_output.o
 obj-$(CONFIG_TRACING) += trace_stat.o
 obj-$(CONFIG_TRACING) += trace_printk.o
@@ -39,12 +45,14 @@
 obj-$(CONFIG_POWER_TRACER) += trace_power.o
 obj-$(CONFIG_KMEMTRACE) += kmemtrace.o
 obj-$(CONFIG_WORKQUEUE_TRACER) += trace_workqueue.o
-obj-$(CONFIG_BLK_DEV_IO_TRACE)	+= blktrace.o
-obj-$(CONFIG_EVENT_TRACER) += trace_events.o
-obj-$(CONFIG_EVENT_TRACER) += events.o
-obj-$(CONFIG_EVENT_TRACER) += trace_export.o
+obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o
+ifeq ($(CONFIG_BLOCK),y)
+obj-$(CONFIG_EVENT_TRACING) += blktrace.o
+endif
+obj-$(CONFIG_EVENT_TRACING) += trace_events.o
+obj-$(CONFIG_EVENT_TRACING) += trace_export.o
 obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o
 obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o
-obj-$(CONFIG_EVENT_TRACER) += trace_events_filter.o
+obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
 
 libftrace-y := ftrace.o
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 921ef5d..7bd6a98 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -23,10 +23,14 @@
 #include <linux/mutex.h>
 #include <linux/debugfs.h>
 #include <linux/time.h>
-#include <trace/block.h>
 #include <linux/uaccess.h>
+
+#include <trace/events/block.h>
+
 #include "trace_output.h"
 
+#ifdef CONFIG_BLK_DEV_IO_TRACE
+
 static unsigned int blktrace_seq __read_mostly = 1;
 
 static struct trace_array *blk_tr;
@@ -147,7 +151,7 @@
 {
 	if (((bt->act_mask << BLK_TC_SHIFT) & what) == 0)
 		return 1;
-	if (sector < bt->start_lba || sector > bt->end_lba)
+	if (sector && (sector < bt->start_lba || sector > bt->end_lba))
 		return 1;
 	if (bt->pid && pid != bt->pid)
 		return 1;
@@ -192,7 +196,7 @@
 	what |= MASK_TC_BIT(rw, DISCARD);
 
 	pid = tsk->pid;
-	if (unlikely(act_log_check(bt, what, sector, pid)))
+	if (act_log_check(bt, what, sector, pid))
 		return;
 	cpu = raw_smp_processor_id();
 
@@ -262,6 +266,7 @@
 {
 	debugfs_remove(bt->msg_file);
 	debugfs_remove(bt->dropped_file);
+	debugfs_remove(bt->dir);
 	relay_close(bt->rchan);
 	free_percpu(bt->sequence);
 	free_percpu(bt->msg_data);
@@ -403,11 +408,29 @@
 	.remove_buf_file	= blk_remove_buf_file_callback,
 };
 
+static void blk_trace_setup_lba(struct blk_trace *bt,
+				struct block_device *bdev)
+{
+	struct hd_struct *part = NULL;
+
+	if (bdev)
+		part = bdev->bd_part;
+
+	if (part) {
+		bt->start_lba = part->start_sect;
+		bt->end_lba = part->start_sect + part->nr_sects;
+	} else {
+		bt->start_lba = 0;
+		bt->end_lba = -1ULL;
+	}
+}
+
 /*
  * Setup everything required to start tracing
  */
 int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
-			struct blk_user_trace_setup *buts)
+		       struct block_device *bdev,
+		       struct blk_user_trace_setup *buts)
 {
 	struct blk_trace *old_bt, *bt = NULL;
 	struct dentry *dir = NULL;
@@ -480,10 +503,13 @@
 	if (!bt->act_mask)
 		bt->act_mask = (u16) -1;
 
-	bt->start_lba = buts->start_lba;
-	bt->end_lba = buts->end_lba;
-	if (!bt->end_lba)
-		bt->end_lba = -1ULL;
+	blk_trace_setup_lba(bt, bdev);
+
+	/* overwrite with user settings */
+	if (buts->start_lba)
+		bt->start_lba = buts->start_lba;
+	if (buts->end_lba)
+		bt->end_lba = buts->end_lba;
 
 	bt->pid = buts->pid;
 	bt->trace_state = Blktrace_setup;
@@ -505,6 +531,7 @@
 }
 
 int blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
+		    struct block_device *bdev,
 		    char __user *arg)
 {
 	struct blk_user_trace_setup buts;
@@ -514,7 +541,7 @@
 	if (ret)
 		return -EFAULT;
 
-	ret = do_blk_trace_setup(q, name, dev, &buts);
+	ret = do_blk_trace_setup(q, name, dev, bdev, &buts);
 	if (ret)
 		return ret;
 
@@ -582,7 +609,7 @@
 	switch (cmd) {
 	case BLKTRACESETUP:
 		bdevname(bdev, b);
-		ret = blk_trace_setup(q, b, bdev->bd_dev, arg);
+		ret = blk_trace_setup(q, b, bdev->bd_dev, bdev, arg);
 		break;
 	case BLKTRACESTART:
 		start = 1;
@@ -809,7 +836,6 @@
  * @bio:	the source bio
  * @dev:	target device
  * @from:	source sector
- * @to:		target sector
  *
  * Description:
  *     Device mapper or raid target sometimes need to split a bio because
@@ -817,7 +843,7 @@
  *
  **/
 static void blk_add_trace_remap(struct request_queue *q, struct bio *bio,
-				       dev_t dev, sector_t from, sector_t to)
+				       dev_t dev, sector_t from)
 {
 	struct blk_trace *bt = q->blk_trace;
 	struct blk_io_trace_remap r;
@@ -825,12 +851,13 @@
 	if (likely(!bt))
 		return;
 
-	r.device = cpu_to_be32(dev);
-	r.device_from = cpu_to_be32(bio->bi_bdev->bd_dev);
-	r.sector = cpu_to_be64(to);
+	r.device_from = cpu_to_be32(dev);
+	r.device_to   = cpu_to_be32(bio->bi_bdev->bd_dev);
+	r.sector_from = cpu_to_be64(from);
 
-	__blk_add_trace(bt, from, bio->bi_size, bio->bi_rw, BLK_TA_REMAP,
-			!bio_flagged(bio, BIO_UPTODATE), sizeof(r), &r);
+	__blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw,
+			BLK_TA_REMAP, !bio_flagged(bio, BIO_UPTODATE),
+			sizeof(r), &r);
 }
 
 /**
@@ -971,6 +998,16 @@
 	return te_blk_io_trace(ent) + 1;
 }
 
+static inline u32 t_action(const struct trace_entry *ent)
+{
+	return te_blk_io_trace(ent)->action;
+}
+
+static inline u32 t_bytes(const struct trace_entry *ent)
+{
+	return te_blk_io_trace(ent)->bytes;
+}
+
 static inline u32 t_sec(const struct trace_entry *ent)
 {
 	return te_blk_io_trace(ent)->bytes >> 9;
@@ -996,11 +1033,11 @@
 			  struct blk_io_trace_remap *r)
 {
 	const struct blk_io_trace_remap *__r = pdu_start(ent);
-	__u64 sector = __r->sector;
+	__u64 sector_from = __r->sector_from;
 
-	r->device = be32_to_cpu(__r->device);
 	r->device_from = be32_to_cpu(__r->device_from);
-	r->sector = be64_to_cpu(sector);
+	r->device_to   = be32_to_cpu(__r->device_to);
+	r->sector_from = be64_to_cpu(sector_from);
 }
 
 typedef int (blk_log_action_t) (struct trace_iterator *iter, const char *act);
@@ -1031,36 +1068,98 @@
 				MAJOR(t->device), MINOR(t->device), act, rwbs);
 }
 
+static int blk_log_dump_pdu(struct trace_seq *s, const struct trace_entry *ent)
+{
+	const unsigned char *pdu_buf;
+	int pdu_len;
+	int i, end, ret;
+
+	pdu_buf = pdu_start(ent);
+	pdu_len = te_blk_io_trace(ent)->pdu_len;
+
+	if (!pdu_len)
+		return 1;
+
+	/* find the last zero that needs to be printed */
+	for (end = pdu_len - 1; end >= 0; end--)
+		if (pdu_buf[end])
+			break;
+	end++;
+
+	if (!trace_seq_putc(s, '('))
+		return 0;
+
+	for (i = 0; i < pdu_len; i++) {
+
+		ret = trace_seq_printf(s, "%s%02x",
+				       i == 0 ? "" : " ", pdu_buf[i]);
+		if (!ret)
+			return ret;
+
+		/*
+		 * stop when the rest is just zeroes and indicate so
+		 * with a ".." appended
+		 */
+		if (i == end && end != pdu_len - 1)
+			return trace_seq_puts(s, " ..) ");
+	}
+
+	return trace_seq_puts(s, ") ");
+}
+
 static int blk_log_generic(struct trace_seq *s, const struct trace_entry *ent)
 {
 	char cmd[TASK_COMM_LEN];
 
 	trace_find_cmdline(ent->pid, cmd);
 
-	if (t_sec(ent))
-		return trace_seq_printf(s, "%llu + %u [%s]\n",
-					t_sector(ent), t_sec(ent), cmd);
-	return trace_seq_printf(s, "[%s]\n", cmd);
+	if (t_action(ent) & BLK_TC_ACT(BLK_TC_PC)) {
+		int ret;
+
+		ret = trace_seq_printf(s, "%u ", t_bytes(ent));
+		if (!ret)
+			return 0;
+		ret = blk_log_dump_pdu(s, ent);
+		if (!ret)
+			return 0;
+		return trace_seq_printf(s, "[%s]\n", cmd);
+	} else {
+		if (t_sec(ent))
+			return trace_seq_printf(s, "%llu + %u [%s]\n",
+						t_sector(ent), t_sec(ent), cmd);
+		return trace_seq_printf(s, "[%s]\n", cmd);
+	}
 }
 
 static int blk_log_with_error(struct trace_seq *s,
 			      const struct trace_entry *ent)
 {
-	if (t_sec(ent))
-		return trace_seq_printf(s, "%llu + %u [%d]\n", t_sector(ent),
-					t_sec(ent), t_error(ent));
-	return trace_seq_printf(s, "%llu [%d]\n", t_sector(ent), t_error(ent));
+	if (t_action(ent) & BLK_TC_ACT(BLK_TC_PC)) {
+		int ret;
+
+		ret = blk_log_dump_pdu(s, ent);
+		if (ret)
+			return trace_seq_printf(s, "[%d]\n", t_error(ent));
+		return 0;
+	} else {
+		if (t_sec(ent))
+			return trace_seq_printf(s, "%llu + %u [%d]\n",
+						t_sector(ent),
+						t_sec(ent), t_error(ent));
+		return trace_seq_printf(s, "%llu [%d]\n",
+					t_sector(ent), t_error(ent));
+	}
 }
 
 static int blk_log_remap(struct trace_seq *s, const struct trace_entry *ent)
 {
-	struct blk_io_trace_remap r = { .device = 0, };
+	struct blk_io_trace_remap r = { .device_from = 0, };
 
 	get_pdu_remap(ent, &r);
 	return trace_seq_printf(s, "%llu + %u <- (%d,%d) %llu\n",
-			       t_sector(ent),
-			       t_sec(ent), MAJOR(r.device), MINOR(r.device),
-			       (unsigned long long)r.sector);
+				t_sector(ent), t_sec(ent),
+				MAJOR(r.device_from), MINOR(r.device_from),
+				(unsigned long long)r.sector_from);
 }
 
 static int blk_log_plug(struct trace_seq *s, const struct trace_entry *ent)
@@ -1117,7 +1216,6 @@
 static void blk_tracer_start(struct trace_array *tr)
 {
 	blk_tracer_enabled = true;
-	trace_flags &= ~TRACE_ITER_CONTEXT_INFO;
 }
 
 static int blk_tracer_init(struct trace_array *tr)
@@ -1130,7 +1228,6 @@
 static void blk_tracer_stop(struct trace_array *tr)
 {
 	blk_tracer_enabled = false;
-	trace_flags |= TRACE_ITER_CONTEXT_INFO;
 }
 
 static void blk_tracer_reset(struct trace_array *tr)
@@ -1182,7 +1279,7 @@
 	}
 
 	if (unlikely(what == 0 || what >= ARRAY_SIZE(what2act)))
-		ret = trace_seq_printf(s, "Bad pc action %x\n", what);
+		ret = trace_seq_printf(s, "Unknown action %x\n", what);
 	else {
 		ret = log_action(iter, what2act[what].act[long_act]);
 		if (ret)
@@ -1195,9 +1292,6 @@
 static enum print_line_t blk_trace_event_print(struct trace_iterator *iter,
 					       int flags)
 {
-	if (!trace_print_context(iter))
-		return TRACE_TYPE_PARTIAL_LINE;
-
 	return print_one_line(iter, false);
 }
 
@@ -1232,6 +1326,18 @@
 	return print_one_line(iter, true);
 }
 
+static int blk_tracer_set_flag(u32 old_flags, u32 bit, int set)
+{
+	/* don't output context-info for blk_classic output */
+	if (bit == TRACE_BLK_OPT_CLASSIC) {
+		if (set)
+			trace_flags &= ~TRACE_ITER_CONTEXT_INFO;
+		else
+			trace_flags |= TRACE_ITER_CONTEXT_INFO;
+	}
+	return 0;
+}
+
 static struct tracer blk_tracer __read_mostly = {
 	.name		= "blk",
 	.init		= blk_tracer_init,
@@ -1241,6 +1347,7 @@
 	.print_header	= blk_tracer_print_header,
 	.print_line	= blk_tracer_print_line,
 	.flags		= &blk_tracer_flags,
+	.set_flag	= blk_tracer_set_flag,
 };
 
 static struct trace_event trace_blk_event = {
@@ -1285,7 +1392,8 @@
 /*
  * Setup everything required to start tracing
  */
-static int blk_trace_setup_queue(struct request_queue *q, dev_t dev)
+static int blk_trace_setup_queue(struct request_queue *q,
+				 struct block_device *bdev)
 {
 	struct blk_trace *old_bt, *bt = NULL;
 	int ret = -ENOMEM;
@@ -1298,9 +1406,10 @@
 	if (!bt->msg_data)
 		goto free_bt;
 
-	bt->dev = dev;
+	bt->dev = bdev->bd_dev;
 	bt->act_mask = (u16)-1;
-	bt->end_lba = -1ULL;
+
+	blk_trace_setup_lba(bt, bdev);
 
 	old_bt = xchg(&q->blk_trace, bt);
 	if (old_bt != NULL) {
@@ -1517,7 +1626,7 @@
 
 	if (attr == &dev_attr_enable) {
 		if (value)
-			ret = blk_trace_setup_queue(q, bdev->bd_dev);
+			ret = blk_trace_setup_queue(q, bdev);
 		else
 			ret = blk_trace_remove_queue(q);
 		goto out_unlock_bdev;
@@ -1525,7 +1634,7 @@
 
 	ret = 0;
 	if (q->blk_trace == NULL)
-		ret = blk_trace_setup_queue(q, bdev->bd_dev);
+		ret = blk_trace_setup_queue(q, bdev);
 
 	if (ret == 0) {
 		if (attr == &dev_attr_act_mask)
@@ -1548,3 +1657,80 @@
 	return ret ? ret : count;
 }
 
+int blk_trace_init_sysfs(struct device *dev)
+{
+	return sysfs_create_group(&dev->kobj, &blk_trace_attr_group);
+}
+
+#endif /* CONFIG_BLK_DEV_IO_TRACE */
+
+#ifdef CONFIG_EVENT_TRACING
+
+void blk_dump_cmd(char *buf, struct request *rq)
+{
+	int i, end;
+	int len = rq->cmd_len;
+	unsigned char *cmd = rq->cmd;
+
+	if (!blk_pc_request(rq)) {
+		buf[0] = '\0';
+		return;
+	}
+
+	for (end = len - 1; end >= 0; end--)
+		if (cmd[end])
+			break;
+	end++;
+
+	for (i = 0; i < len; i++) {
+		buf += sprintf(buf, "%s%02x", i == 0 ? "" : " ", cmd[i]);
+		if (i == end && end != len - 1) {
+			sprintf(buf, " ..");
+			break;
+		}
+	}
+}
+
+void blk_fill_rwbs(char *rwbs, u32 rw, int bytes)
+{
+	int i = 0;
+
+	if (rw & WRITE)
+		rwbs[i++] = 'W';
+	else if (rw & 1 << BIO_RW_DISCARD)
+		rwbs[i++] = 'D';
+	else if (bytes)
+		rwbs[i++] = 'R';
+	else
+		rwbs[i++] = 'N';
+
+	if (rw & 1 << BIO_RW_AHEAD)
+		rwbs[i++] = 'A';
+	if (rw & 1 << BIO_RW_BARRIER)
+		rwbs[i++] = 'B';
+	if (rw & 1 << BIO_RW_SYNCIO)
+		rwbs[i++] = 'S';
+	if (rw & 1 << BIO_RW_META)
+		rwbs[i++] = 'M';
+
+	rwbs[i] = '\0';
+}
+
+void blk_fill_rwbs_rq(char *rwbs, struct request *rq)
+{
+	int rw = rq->cmd_flags & 0x03;
+	int bytes;
+
+	if (blk_discard_rq(rq))
+		rw |= (1 << BIO_RW_DISCARD);
+
+	if (blk_pc_request(rq))
+		bytes = rq->data_len;
+	else
+		bytes = rq->hard_nr_sectors << 9;
+
+	blk_fill_rwbs(rwbs, rw, bytes);
+}
+
+#endif /* CONFIG_EVENT_TRACING */
+
diff --git a/kernel/trace/events.c b/kernel/trace/events.c
deleted file mode 100644
index 246f2aa..0000000
--- a/kernel/trace/events.c
+++ /dev/null
@@ -1,14 +0,0 @@
-/*
- * This is the place to register all trace points as events.
- */
-
-#include <linux/stringify.h>
-
-#include <trace/trace_events.h>
-
-#include "trace_output.h"
-
-#include "trace_events_stage_1.h"
-#include "trace_events_stage_2.h"
-#include "trace_events_stage_3.h"
-
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index f1ed080..bb60732 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -29,11 +29,13 @@
 #include <linux/list.h>
 #include <linux/hash.h>
 
-#include <trace/sched.h>
+#include <trace/events/sched.h>
 
 #include <asm/ftrace.h>
+#include <asm/setup.h>
 
-#include "trace.h"
+#include "trace_output.h"
+#include "trace_stat.h"
 
 #define FTRACE_WARN_ON(cond)			\
 	do {					\
@@ -68,7 +70,7 @@
 
 static struct ftrace_ops ftrace_list_end __read_mostly =
 {
-	.func = ftrace_stub,
+	.func		= ftrace_stub,
 };
 
 static struct ftrace_ops *ftrace_list __read_mostly = &ftrace_list_end;
@@ -240,6 +242,580 @@
 #endif
 }
 
+#ifdef CONFIG_FUNCTION_PROFILER
+struct ftrace_profile {
+	struct hlist_node		node;
+	unsigned long			ip;
+	unsigned long			counter;
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	unsigned long long		time;
+#endif
+};
+
+struct ftrace_profile_page {
+	struct ftrace_profile_page	*next;
+	unsigned long			index;
+	struct ftrace_profile		records[];
+};
+
+struct ftrace_profile_stat {
+	atomic_t			disabled;
+	struct hlist_head		*hash;
+	struct ftrace_profile_page	*pages;
+	struct ftrace_profile_page	*start;
+	struct tracer_stat		stat;
+};
+
+#define PROFILE_RECORDS_SIZE						\
+	(PAGE_SIZE - offsetof(struct ftrace_profile_page, records))
+
+#define PROFILES_PER_PAGE					\
+	(PROFILE_RECORDS_SIZE / sizeof(struct ftrace_profile))
+
+static int ftrace_profile_bits __read_mostly;
+static int ftrace_profile_enabled __read_mostly;
+
+/* ftrace_profile_lock - synchronize the enable and disable of the profiler */
+static DEFINE_MUTEX(ftrace_profile_lock);
+
+static DEFINE_PER_CPU(struct ftrace_profile_stat, ftrace_profile_stats);
+
+#define FTRACE_PROFILE_HASH_SIZE 1024 /* must be power of 2 */
+
+static void *
+function_stat_next(void *v, int idx)
+{
+	struct ftrace_profile *rec = v;
+	struct ftrace_profile_page *pg;
+
+	pg = (struct ftrace_profile_page *)((unsigned long)rec & PAGE_MASK);
+
+ again:
+	rec++;
+	if ((void *)rec >= (void *)&pg->records[pg->index]) {
+		pg = pg->next;
+		if (!pg)
+			return NULL;
+		rec = &pg->records[0];
+		if (!rec->counter)
+			goto again;
+	}
+
+	return rec;
+}
+
+static void *function_stat_start(struct tracer_stat *trace)
+{
+	struct ftrace_profile_stat *stat =
+		container_of(trace, struct ftrace_profile_stat, stat);
+
+	if (!stat || !stat->start)
+		return NULL;
+
+	return function_stat_next(&stat->start->records[0], 0);
+}
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+/* function graph compares on total time */
+static int function_stat_cmp(void *p1, void *p2)
+{
+	struct ftrace_profile *a = p1;
+	struct ftrace_profile *b = p2;
+
+	if (a->time < b->time)
+		return -1;
+	if (a->time > b->time)
+		return 1;
+	else
+		return 0;
+}
+#else
+/* not function graph compares against hits */
+static int function_stat_cmp(void *p1, void *p2)
+{
+	struct ftrace_profile *a = p1;
+	struct ftrace_profile *b = p2;
+
+	if (a->counter < b->counter)
+		return -1;
+	if (a->counter > b->counter)
+		return 1;
+	else
+		return 0;
+}
+#endif
+
+static int function_stat_headers(struct seq_file *m)
+{
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	seq_printf(m, "  Function                               "
+		   "Hit    Time            Avg\n"
+		      "  --------                               "
+		   "---    ----            ---\n");
+#else
+	seq_printf(m, "  Function                               Hit\n"
+		      "  --------                               ---\n");
+#endif
+	return 0;
+}
+
+static int function_stat_show(struct seq_file *m, void *v)
+{
+	struct ftrace_profile *rec = v;
+	char str[KSYM_SYMBOL_LEN];
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	static DEFINE_MUTEX(mutex);
+	static struct trace_seq s;
+	unsigned long long avg;
+#endif
+
+	kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
+	seq_printf(m, "  %-30.30s  %10lu", str, rec->counter);
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	seq_printf(m, "    ");
+	avg = rec->time;
+	do_div(avg, rec->counter);
+
+	mutex_lock(&mutex);
+	trace_seq_init(&s);
+	trace_print_graph_duration(rec->time, &s);
+	trace_seq_puts(&s, "    ");
+	trace_print_graph_duration(avg, &s);
+	trace_print_seq(m, &s);
+	mutex_unlock(&mutex);
+#endif
+	seq_putc(m, '\n');
+
+	return 0;
+}
+
+static void ftrace_profile_reset(struct ftrace_profile_stat *stat)
+{
+	struct ftrace_profile_page *pg;
+
+	pg = stat->pages = stat->start;
+
+	while (pg) {
+		memset(pg->records, 0, PROFILE_RECORDS_SIZE);
+		pg->index = 0;
+		pg = pg->next;
+	}
+
+	memset(stat->hash, 0,
+	       FTRACE_PROFILE_HASH_SIZE * sizeof(struct hlist_head));
+}
+
+int ftrace_profile_pages_init(struct ftrace_profile_stat *stat)
+{
+	struct ftrace_profile_page *pg;
+	int functions;
+	int pages;
+	int i;
+
+	/* If we already allocated, do nothing */
+	if (stat->pages)
+		return 0;
+
+	stat->pages = (void *)get_zeroed_page(GFP_KERNEL);
+	if (!stat->pages)
+		return -ENOMEM;
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+	functions = ftrace_update_tot_cnt;
+#else
+	/*
+	 * We do not know the number of functions that exist because
+	 * dynamic tracing is what counts them. With past experience
+	 * we have around 20K functions. That should be more than enough.
+	 * It is highly unlikely we will execute every function in
+	 * the kernel.
+	 */
+	functions = 20000;
+#endif
+
+	pg = stat->start = stat->pages;
+
+	pages = DIV_ROUND_UP(functions, PROFILES_PER_PAGE);
+
+	for (i = 0; i < pages; i++) {
+		pg->next = (void *)get_zeroed_page(GFP_KERNEL);
+		if (!pg->next)
+			goto out_free;
+		pg = pg->next;
+	}
+
+	return 0;
+
+ out_free:
+	pg = stat->start;
+	while (pg) {
+		unsigned long tmp = (unsigned long)pg;
+
+		pg = pg->next;
+		free_page(tmp);
+	}
+
+	free_page((unsigned long)stat->pages);
+	stat->pages = NULL;
+	stat->start = NULL;
+
+	return -ENOMEM;
+}
+
+static int ftrace_profile_init_cpu(int cpu)
+{
+	struct ftrace_profile_stat *stat;
+	int size;
+
+	stat = &per_cpu(ftrace_profile_stats, cpu);
+
+	if (stat->hash) {
+		/* If the profile is already created, simply reset it */
+		ftrace_profile_reset(stat);
+		return 0;
+	}
+
+	/*
+	 * We are profiling all functions, but usually only a few thousand
+	 * functions are hit. We'll make a hash of 1024 items.
+	 */
+	size = FTRACE_PROFILE_HASH_SIZE;
+
+	stat->hash = kzalloc(sizeof(struct hlist_head) * size, GFP_KERNEL);
+
+	if (!stat->hash)
+		return -ENOMEM;
+
+	if (!ftrace_profile_bits) {
+		size--;
+
+		for (; size; size >>= 1)
+			ftrace_profile_bits++;
+	}
+
+	/* Preallocate the function profiling pages */
+	if (ftrace_profile_pages_init(stat) < 0) {
+		kfree(stat->hash);
+		stat->hash = NULL;
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static int ftrace_profile_init(void)
+{
+	int cpu;
+	int ret = 0;
+
+	for_each_online_cpu(cpu) {
+		ret = ftrace_profile_init_cpu(cpu);
+		if (ret)
+			break;
+	}
+
+	return ret;
+}
+
+/* interrupts must be disabled */
+static struct ftrace_profile *
+ftrace_find_profiled_func(struct ftrace_profile_stat *stat, unsigned long ip)
+{
+	struct ftrace_profile *rec;
+	struct hlist_head *hhd;
+	struct hlist_node *n;
+	unsigned long key;
+
+	key = hash_long(ip, ftrace_profile_bits);
+	hhd = &stat->hash[key];
+
+	if (hlist_empty(hhd))
+		return NULL;
+
+	hlist_for_each_entry_rcu(rec, n, hhd, node) {
+		if (rec->ip == ip)
+			return rec;
+	}
+
+	return NULL;
+}
+
+static void ftrace_add_profile(struct ftrace_profile_stat *stat,
+			       struct ftrace_profile *rec)
+{
+	unsigned long key;
+
+	key = hash_long(rec->ip, ftrace_profile_bits);
+	hlist_add_head_rcu(&rec->node, &stat->hash[key]);
+}
+
+/*
+ * The memory is already allocated, this simply finds a new record to use.
+ */
+static struct ftrace_profile *
+ftrace_profile_alloc(struct ftrace_profile_stat *stat, unsigned long ip)
+{
+	struct ftrace_profile *rec = NULL;
+
+	/* prevent recursion (from NMIs) */
+	if (atomic_inc_return(&stat->disabled) != 1)
+		goto out;
+
+	/*
+	 * Try to find the function again since an NMI
+	 * could have added it
+	 */
+	rec = ftrace_find_profiled_func(stat, ip);
+	if (rec)
+		goto out;
+
+	if (stat->pages->index == PROFILES_PER_PAGE) {
+		if (!stat->pages->next)
+			goto out;
+		stat->pages = stat->pages->next;
+	}
+
+	rec = &stat->pages->records[stat->pages->index++];
+	rec->ip = ip;
+	ftrace_add_profile(stat, rec);
+
+ out:
+	atomic_dec(&stat->disabled);
+
+	return rec;
+}
+
+static void
+function_profile_call(unsigned long ip, unsigned long parent_ip)
+{
+	struct ftrace_profile_stat *stat;
+	struct ftrace_profile *rec;
+	unsigned long flags;
+
+	if (!ftrace_profile_enabled)
+		return;
+
+	local_irq_save(flags);
+
+	stat = &__get_cpu_var(ftrace_profile_stats);
+	if (!stat->hash || !ftrace_profile_enabled)
+		goto out;
+
+	rec = ftrace_find_profiled_func(stat, ip);
+	if (!rec) {
+		rec = ftrace_profile_alloc(stat, ip);
+		if (!rec)
+			goto out;
+	}
+
+	rec->counter++;
+ out:
+	local_irq_restore(flags);
+}
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+static int profile_graph_entry(struct ftrace_graph_ent *trace)
+{
+	function_profile_call(trace->func, 0);
+	return 1;
+}
+
+static void profile_graph_return(struct ftrace_graph_ret *trace)
+{
+	struct ftrace_profile_stat *stat;
+	unsigned long long calltime;
+	struct ftrace_profile *rec;
+	unsigned long flags;
+
+	local_irq_save(flags);
+	stat = &__get_cpu_var(ftrace_profile_stats);
+	if (!stat->hash || !ftrace_profile_enabled)
+		goto out;
+
+	calltime = trace->rettime - trace->calltime;
+
+	if (!(trace_flags & TRACE_ITER_GRAPH_TIME)) {
+		int index;
+
+		index = trace->depth;
+
+		/* Append this call time to the parent time to subtract */
+		if (index)
+			current->ret_stack[index - 1].subtime += calltime;
+
+		if (current->ret_stack[index].subtime < calltime)
+			calltime -= current->ret_stack[index].subtime;
+		else
+			calltime = 0;
+	}
+
+	rec = ftrace_find_profiled_func(stat, trace->func);
+	if (rec)
+		rec->time += calltime;
+
+ out:
+	local_irq_restore(flags);
+}
+
+static int register_ftrace_profiler(void)
+{
+	return register_ftrace_graph(&profile_graph_return,
+				     &profile_graph_entry);
+}
+
+static void unregister_ftrace_profiler(void)
+{
+	unregister_ftrace_graph();
+}
+#else
+static struct ftrace_ops ftrace_profile_ops __read_mostly =
+{
+	.func		= function_profile_call,
+};
+
+static int register_ftrace_profiler(void)
+{
+	return register_ftrace_function(&ftrace_profile_ops);
+}
+
+static void unregister_ftrace_profiler(void)
+{
+	unregister_ftrace_function(&ftrace_profile_ops);
+}
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
+
+static ssize_t
+ftrace_profile_write(struct file *filp, const char __user *ubuf,
+		     size_t cnt, loff_t *ppos)
+{
+	unsigned long val;
+	char buf[64];		/* big enough to hold a number */
+	int ret;
+
+	if (cnt >= sizeof(buf))
+		return -EINVAL;
+
+	if (copy_from_user(&buf, ubuf, cnt))
+		return -EFAULT;
+
+	buf[cnt] = 0;
+
+	ret = strict_strtoul(buf, 10, &val);
+	if (ret < 0)
+		return ret;
+
+	val = !!val;
+
+	mutex_lock(&ftrace_profile_lock);
+	if (ftrace_profile_enabled ^ val) {
+		if (val) {
+			ret = ftrace_profile_init();
+			if (ret < 0) {
+				cnt = ret;
+				goto out;
+			}
+
+			ret = register_ftrace_profiler();
+			if (ret < 0) {
+				cnt = ret;
+				goto out;
+			}
+			ftrace_profile_enabled = 1;
+		} else {
+			ftrace_profile_enabled = 0;
+			/*
+			 * unregister_ftrace_profiler calls stop_machine
+			 * so this acts like an synchronize_sched.
+			 */
+			unregister_ftrace_profiler();
+		}
+	}
+ out:
+	mutex_unlock(&ftrace_profile_lock);
+
+	filp->f_pos += cnt;
+
+	return cnt;
+}
+
+static ssize_t
+ftrace_profile_read(struct file *filp, char __user *ubuf,
+		     size_t cnt, loff_t *ppos)
+{
+	char buf[64];		/* big enough to hold a number */
+	int r;
+
+	r = sprintf(buf, "%u\n", ftrace_profile_enabled);
+	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+}
+
+static const struct file_operations ftrace_profile_fops = {
+	.open		= tracing_open_generic,
+	.read		= ftrace_profile_read,
+	.write		= ftrace_profile_write,
+};
+
+/* used to initialize the real stat files */
+static struct tracer_stat function_stats __initdata = {
+	.name		= "functions",
+	.stat_start	= function_stat_start,
+	.stat_next	= function_stat_next,
+	.stat_cmp	= function_stat_cmp,
+	.stat_headers	= function_stat_headers,
+	.stat_show	= function_stat_show
+};
+
+static void ftrace_profile_debugfs(struct dentry *d_tracer)
+{
+	struct ftrace_profile_stat *stat;
+	struct dentry *entry;
+	char *name;
+	int ret;
+	int cpu;
+
+	for_each_possible_cpu(cpu) {
+		stat = &per_cpu(ftrace_profile_stats, cpu);
+
+		/* allocate enough for function name + cpu number */
+		name = kmalloc(32, GFP_KERNEL);
+		if (!name) {
+			/*
+			 * The files created are permanent, if something happens
+			 * we still do not free memory.
+			 */
+			kfree(stat);
+			WARN(1,
+			     "Could not allocate stat file for cpu %d\n",
+			     cpu);
+			return;
+		}
+		stat->stat = function_stats;
+		snprintf(name, 32, "function%d", cpu);
+		stat->stat.name = name;
+		ret = register_stat_tracer(&stat->stat);
+		if (ret) {
+			WARN(1,
+			     "Could not register function stat for cpu %d\n",
+			     cpu);
+			kfree(name);
+			return;
+		}
+	}
+
+	entry = debugfs_create_file("function_profile_enabled", 0644,
+				    d_tracer, NULL, &ftrace_profile_fops);
+	if (!entry)
+		pr_warning("Could not create debugfs "
+			   "'function_profile_enabled' entry\n");
+}
+
+#else /* CONFIG_FUNCTION_PROFILER */
+static void ftrace_profile_debugfs(struct dentry *d_tracer)
+{
+}
+#endif /* CONFIG_FUNCTION_PROFILER */
+
 /* set when tracing only a pid */
 struct pid *ftrace_pid_trace;
 static struct pid * const ftrace_swapper_pid = &init_struct_pid;
@@ -261,7 +837,6 @@
 	struct rcu_head		rcu;
 };
 
-
 enum {
 	FTRACE_ENABLE_CALLS		= (1 << 0),
 	FTRACE_DISABLE_CALLS		= (1 << 1),
@@ -346,30 +921,6 @@
 	rec->flags |= FTRACE_FL_FREE;
 }
 
-void ftrace_release(void *start, unsigned long size)
-{
-	struct dyn_ftrace *rec;
-	struct ftrace_page *pg;
-	unsigned long s = (unsigned long)start;
-	unsigned long e = s + size;
-
-	if (ftrace_disabled || !start)
-		return;
-
-	mutex_lock(&ftrace_lock);
-	do_for_each_ftrace_rec(pg, rec) {
-		if ((rec->ip >= s) && (rec->ip < e)) {
-			/*
-			 * rec->ip is changed in ftrace_free_rec()
-			 * It should not between s and e if record was freed.
-			 */
-			FTRACE_WARN_ON(rec->flags & FTRACE_FL_FREE);
-			ftrace_free_rec(rec);
-		}
-	} while_for_each_ftrace_rec();
-	mutex_unlock(&ftrace_lock);
-}
-
 static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip)
 {
 	struct dyn_ftrace *rec;
@@ -1408,7 +1959,7 @@
 
 static struct ftrace_ops trace_probe_ops __read_mostly =
 {
-	.func = function_trace_probe_call,
+	.func		= function_trace_probe_call,
 };
 
 static int ftrace_probe_registered;
@@ -1823,6 +2374,45 @@
 	ftrace_set_regex(buf, len, reset, 0);
 }
 
+/*
+ * command line interface to allow users to set filters on boot up.
+ */
+#define FTRACE_FILTER_SIZE		COMMAND_LINE_SIZE
+static char ftrace_notrace_buf[FTRACE_FILTER_SIZE] __initdata;
+static char ftrace_filter_buf[FTRACE_FILTER_SIZE] __initdata;
+
+static int __init set_ftrace_notrace(char *str)
+{
+	strncpy(ftrace_notrace_buf, str, FTRACE_FILTER_SIZE);
+	return 1;
+}
+__setup("ftrace_notrace=", set_ftrace_notrace);
+
+static int __init set_ftrace_filter(char *str)
+{
+	strncpy(ftrace_filter_buf, str, FTRACE_FILTER_SIZE);
+	return 1;
+}
+__setup("ftrace_filter=", set_ftrace_filter);
+
+static void __init set_ftrace_early_filter(char *buf, int enable)
+{
+	char *func;
+
+	while (buf) {
+		func = strsep(&buf, ",");
+		ftrace_set_regex(func, strlen(func), 0, enable);
+	}
+}
+
+static void __init set_ftrace_early_filters(void)
+{
+	if (ftrace_filter_buf[0])
+		set_ftrace_early_filter(ftrace_filter_buf, 1);
+	if (ftrace_notrace_buf[0])
+		set_ftrace_early_filter(ftrace_notrace_buf, 0);
+}
+
 static int
 ftrace_regex_release(struct inode *inode, struct file *file, int enable)
 {
@@ -2128,38 +2718,23 @@
 
 static __init int ftrace_init_dyn_debugfs(struct dentry *d_tracer)
 {
-	struct dentry *entry;
 
-	entry = debugfs_create_file("available_filter_functions", 0444,
-				    d_tracer, NULL, &ftrace_avail_fops);
-	if (!entry)
-		pr_warning("Could not create debugfs "
-			   "'available_filter_functions' entry\n");
+	trace_create_file("available_filter_functions", 0444,
+			d_tracer, NULL, &ftrace_avail_fops);
 
-	entry = debugfs_create_file("failures", 0444,
-				    d_tracer, NULL, &ftrace_failures_fops);
-	if (!entry)
-		pr_warning("Could not create debugfs 'failures' entry\n");
+	trace_create_file("failures", 0444,
+			d_tracer, NULL, &ftrace_failures_fops);
 
-	entry = debugfs_create_file("set_ftrace_filter", 0644, d_tracer,
-				    NULL, &ftrace_filter_fops);
-	if (!entry)
-		pr_warning("Could not create debugfs "
-			   "'set_ftrace_filter' entry\n");
+	trace_create_file("set_ftrace_filter", 0644, d_tracer,
+			NULL, &ftrace_filter_fops);
 
-	entry = debugfs_create_file("set_ftrace_notrace", 0644, d_tracer,
+	trace_create_file("set_ftrace_notrace", 0644, d_tracer,
 				    NULL, &ftrace_notrace_fops);
-	if (!entry)
-		pr_warning("Could not create debugfs "
-			   "'set_ftrace_notrace' entry\n");
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
-	entry = debugfs_create_file("set_graph_function", 0444, d_tracer,
+	trace_create_file("set_graph_function", 0444, d_tracer,
 				    NULL,
 				    &ftrace_graph_fops);
-	if (!entry)
-		pr_warning("Could not create debugfs "
-			   "'set_graph_function' entry\n");
 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
 
 	return 0;
@@ -2197,14 +2772,72 @@
 	return 0;
 }
 
-void ftrace_init_module(struct module *mod,
-			unsigned long *start, unsigned long *end)
+#ifdef CONFIG_MODULES
+void ftrace_release(void *start, void *end)
+{
+	struct dyn_ftrace *rec;
+	struct ftrace_page *pg;
+	unsigned long s = (unsigned long)start;
+	unsigned long e = (unsigned long)end;
+
+	if (ftrace_disabled || !start || start == end)
+		return;
+
+	mutex_lock(&ftrace_lock);
+	do_for_each_ftrace_rec(pg, rec) {
+		if ((rec->ip >= s) && (rec->ip < e)) {
+			/*
+			 * rec->ip is changed in ftrace_free_rec()
+			 * It should not between s and e if record was freed.
+			 */
+			FTRACE_WARN_ON(rec->flags & FTRACE_FL_FREE);
+			ftrace_free_rec(rec);
+		}
+	} while_for_each_ftrace_rec();
+	mutex_unlock(&ftrace_lock);
+}
+
+static void ftrace_init_module(struct module *mod,
+			       unsigned long *start, unsigned long *end)
 {
 	if (ftrace_disabled || start == end)
 		return;
 	ftrace_convert_nops(mod, start, end);
 }
 
+static int ftrace_module_notify(struct notifier_block *self,
+				unsigned long val, void *data)
+{
+	struct module *mod = data;
+
+	switch (val) {
+	case MODULE_STATE_COMING:
+		ftrace_init_module(mod, mod->ftrace_callsites,
+				   mod->ftrace_callsites +
+				   mod->num_ftrace_callsites);
+		break;
+	case MODULE_STATE_GOING:
+		ftrace_release(mod->ftrace_callsites,
+			       mod->ftrace_callsites +
+			       mod->num_ftrace_callsites);
+		break;
+	}
+
+	return 0;
+}
+#else
+static int ftrace_module_notify(struct notifier_block *self,
+				unsigned long val, void *data)
+{
+	return 0;
+}
+#endif /* CONFIG_MODULES */
+
+struct notifier_block ftrace_module_nb = {
+	.notifier_call = ftrace_module_notify,
+	.priority = 0,
+};
+
 extern unsigned long __start_mcount_loc[];
 extern unsigned long __stop_mcount_loc[];
 
@@ -2236,6 +2869,12 @@
 				  __start_mcount_loc,
 				  __stop_mcount_loc);
 
+	ret = register_module_notifier(&ftrace_module_nb);
+	if (ret)
+		pr_warning("Failed to register trace ftrace module notifier\n");
+
+	set_ftrace_early_filters();
+
 	return;
  failed:
 	ftrace_disabled = 1;
@@ -2417,7 +3056,6 @@
 static __init int ftrace_init_debugfs(void)
 {
 	struct dentry *d_tracer;
-	struct dentry *entry;
 
 	d_tracer = tracing_init_dentry();
 	if (!d_tracer)
@@ -2425,11 +3063,11 @@
 
 	ftrace_init_dyn_debugfs(d_tracer);
 
-	entry = debugfs_create_file("set_ftrace_pid", 0644, d_tracer,
-				    NULL, &ftrace_pid_fops);
-	if (!entry)
-		pr_warning("Could not create debugfs "
-			   "'set_ftrace_pid' entry\n");
+	trace_create_file("set_ftrace_pid", 0644, d_tracer,
+			    NULL, &ftrace_pid_fops);
+
+	ftrace_profile_debugfs(d_tracer);
+
 	return 0;
 }
 fs_initcall(ftrace_init_debugfs);
@@ -2538,7 +3176,7 @@
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 
-static atomic_t ftrace_graph_active;
+static int ftrace_graph_active;
 static struct notifier_block ftrace_suspend_notifier;
 
 int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace)
@@ -2580,12 +3218,12 @@
 		}
 
 		if (t->ret_stack == NULL) {
-			t->curr_ret_stack = -1;
-			/* Make sure IRQs see the -1 first: */
-			barrier();
-			t->ret_stack = ret_stack_list[start++];
 			atomic_set(&t->tracing_graph_pause, 0);
 			atomic_set(&t->trace_overrun, 0);
+			t->curr_ret_stack = -1;
+			/* Make sure the tasks see the -1 first: */
+			smp_wmb();
+			t->ret_stack = ret_stack_list[start++];
 		}
 	} while_each_thread(g, t);
 
@@ -2643,8 +3281,10 @@
 		return -ENOMEM;
 
 	/* The cpu_boot init_task->ret_stack will never be freed */
-	for_each_online_cpu(cpu)
-		ftrace_graph_init_task(idle_task(cpu));
+	for_each_online_cpu(cpu) {
+		if (!idle_task(cpu)->ret_stack)
+			ftrace_graph_init_task(idle_task(cpu));
+	}
 
 	do {
 		ret = alloc_retstack_tasklist(ret_stack_list);
@@ -2690,7 +3330,7 @@
 	mutex_lock(&ftrace_lock);
 
 	/* we currently allow only one tracer registered at a time */
-	if (atomic_read(&ftrace_graph_active)) {
+	if (ftrace_graph_active) {
 		ret = -EBUSY;
 		goto out;
 	}
@@ -2698,10 +3338,10 @@
 	ftrace_suspend_notifier.notifier_call = ftrace_suspend_notifier_call;
 	register_pm_notifier(&ftrace_suspend_notifier);
 
-	atomic_inc(&ftrace_graph_active);
+	ftrace_graph_active++;
 	ret = start_graph_tracing();
 	if (ret) {
-		atomic_dec(&ftrace_graph_active);
+		ftrace_graph_active--;
 		goto out;
 	}
 
@@ -2719,10 +3359,10 @@
 {
 	mutex_lock(&ftrace_lock);
 
-	if (!unlikely(atomic_read(&ftrace_graph_active)))
+	if (unlikely(!ftrace_graph_active))
 		goto out;
 
-	atomic_dec(&ftrace_graph_active);
+	ftrace_graph_active--;
 	unregister_trace_sched_switch(ftrace_graph_probe_sched_switch);
 	ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub;
 	ftrace_graph_entry = ftrace_graph_entry_stub;
@@ -2736,18 +3376,25 @@
 /* Allocate a return stack for newly created task */
 void ftrace_graph_init_task(struct task_struct *t)
 {
-	if (atomic_read(&ftrace_graph_active)) {
-		t->ret_stack = kmalloc(FTRACE_RETFUNC_DEPTH
+	/* Make sure we do not use the parent ret_stack */
+	t->ret_stack = NULL;
+
+	if (ftrace_graph_active) {
+		struct ftrace_ret_stack *ret_stack;
+
+		ret_stack = kmalloc(FTRACE_RETFUNC_DEPTH
 				* sizeof(struct ftrace_ret_stack),
 				GFP_KERNEL);
-		if (!t->ret_stack)
+		if (!ret_stack)
 			return;
 		t->curr_ret_stack = -1;
 		atomic_set(&t->tracing_graph_pause, 0);
 		atomic_set(&t->trace_overrun, 0);
 		t->ftrace_timestamp = 0;
-	} else
-		t->ret_stack = NULL;
+		/* make curr_ret_stack visable before we add the ret_stack */
+		smp_wmb();
+		t->ret_stack = ret_stack;
+	}
 }
 
 void ftrace_graph_exit_task(struct task_struct *t)
diff --git a/kernel/trace/kmemtrace.c b/kernel/trace/kmemtrace.c
index 5011f4d..86cdf67 100644
--- a/kernel/trace/kmemtrace.c
+++ b/kernel/trace/kmemtrace.c
@@ -12,7 +12,7 @@
 #include <linux/dcache.h>
 #include <linux/fs.h>
 
-#include <trace/kmemtrace.h>
+#include <linux/kmemtrace.h>
 
 #include "trace_output.h"
 #include "trace.h"
@@ -42,6 +42,7 @@
 				   gfp_t gfp_flags,
 				   int node)
 {
+	struct ftrace_event_call *call = &event_kmem_alloc;
 	struct trace_array *tr = kmemtrace_array;
 	struct kmemtrace_alloc_entry *entry;
 	struct ring_buffer_event *event;
@@ -62,7 +63,8 @@
 	entry->gfp_flags	= gfp_flags;
 	entry->node		= node;
 
-	ring_buffer_unlock_commit(tr->buffer, event);
+	if (!filter_check_discard(call, entry, tr->buffer, event))
+		ring_buffer_unlock_commit(tr->buffer, event);
 
 	trace_wake_up();
 }
@@ -71,6 +73,7 @@
 				  unsigned long call_site,
 				  const void *ptr)
 {
+	struct ftrace_event_call *call = &event_kmem_free;
 	struct trace_array *tr = kmemtrace_array;
 	struct kmemtrace_free_entry *entry;
 	struct ring_buffer_event *event;
@@ -86,7 +89,8 @@
 	entry->call_site	= call_site;
 	entry->ptr		= ptr;
 
-	ring_buffer_unlock_commit(tr->buffer, event);
+	if (!filter_check_discard(call, entry, tr->buffer, event))
+		ring_buffer_unlock_commit(tr->buffer, event);
 
 	trace_wake_up();
 }
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 960cbf4..2e642b2 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -22,6 +22,28 @@
 #include "trace.h"
 
 /*
+ * The ring buffer header is special. We must manually up keep it.
+ */
+int ring_buffer_print_entry_header(struct trace_seq *s)
+{
+	int ret;
+
+	ret = trace_seq_printf(s, "# compressed entry header\n");
+	ret = trace_seq_printf(s, "\ttype_len    :    5 bits\n");
+	ret = trace_seq_printf(s, "\ttime_delta  :   27 bits\n");
+	ret = trace_seq_printf(s, "\tarray       :   32 bits\n");
+	ret = trace_seq_printf(s, "\n");
+	ret = trace_seq_printf(s, "\tpadding     : type == %d\n",
+			       RINGBUF_TYPE_PADDING);
+	ret = trace_seq_printf(s, "\ttime_extend : type == %d\n",
+			       RINGBUF_TYPE_TIME_EXTEND);
+	ret = trace_seq_printf(s, "\tdata max type_len  == %d\n",
+			       RINGBUF_TYPE_DATA_TYPE_LEN_MAX);
+
+	return ret;
+}
+
+/*
  * The ring buffer is made up of a list of pages. A separate list of pages is
  * allocated for each CPU. A writer may only write to a buffer that is
  * associated with the CPU it is currently executing on.  A reader may read
@@ -182,7 +204,10 @@
 
 #define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array))
 #define RB_ALIGNMENT		4U
-#define RB_MAX_SMALL_DATA	28
+#define RB_MAX_SMALL_DATA	(RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
+
+/* define RINGBUF_TYPE_DATA for 'case RINGBUF_TYPE_DATA:' */
+#define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX
 
 enum {
 	RB_LEN_TIME_EXTEND = 8,
@@ -191,48 +216,28 @@
 
 static inline int rb_null_event(struct ring_buffer_event *event)
 {
-	return event->type == RINGBUF_TYPE_PADDING && event->time_delta == 0;
+	return event->type_len == RINGBUF_TYPE_PADDING
+			&& event->time_delta == 0;
 }
 
 static inline int rb_discarded_event(struct ring_buffer_event *event)
 {
-	return event->type == RINGBUF_TYPE_PADDING && event->time_delta;
+	return event->type_len == RINGBUF_TYPE_PADDING && event->time_delta;
 }
 
 static void rb_event_set_padding(struct ring_buffer_event *event)
 {
-	event->type = RINGBUF_TYPE_PADDING;
+	event->type_len = RINGBUF_TYPE_PADDING;
 	event->time_delta = 0;
 }
 
-/**
- * ring_buffer_event_discard - discard an event in the ring buffer
- * @buffer: the ring buffer
- * @event: the event to discard
- *
- * Sometimes a event that is in the ring buffer needs to be ignored.
- * This function lets the user discard an event in the ring buffer
- * and then that event will not be read later.
- *
- * Note, it is up to the user to be careful with this, and protect
- * against races. If the user discards an event that has been consumed
- * it is possible that it could corrupt the ring buffer.
- */
-void ring_buffer_event_discard(struct ring_buffer_event *event)
-{
-	event->type = RINGBUF_TYPE_PADDING;
-	/* time delta must be non zero */
-	if (!event->time_delta)
-		event->time_delta = 1;
-}
-
 static unsigned
 rb_event_data_length(struct ring_buffer_event *event)
 {
 	unsigned length;
 
-	if (event->len)
-		length = event->len * RB_ALIGNMENT;
+	if (event->type_len)
+		length = event->type_len * RB_ALIGNMENT;
 	else
 		length = event->array[0];
 	return length + RB_EVNT_HDR_SIZE;
@@ -242,12 +247,12 @@
 static unsigned
 rb_event_length(struct ring_buffer_event *event)
 {
-	switch (event->type) {
+	switch (event->type_len) {
 	case RINGBUF_TYPE_PADDING:
 		if (rb_null_event(event))
 			/* undefined */
 			return -1;
-		return rb_event_data_length(event);
+		return  event->array[0] + RB_EVNT_HDR_SIZE;
 
 	case RINGBUF_TYPE_TIME_EXTEND:
 		return RB_LEN_TIME_EXTEND;
@@ -271,7 +276,7 @@
 unsigned ring_buffer_event_length(struct ring_buffer_event *event)
 {
 	unsigned length = rb_event_length(event);
-	if (event->type != RINGBUF_TYPE_DATA)
+	if (event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
 		return length;
 	length -= RB_EVNT_HDR_SIZE;
 	if (length > RB_MAX_SMALL_DATA + sizeof(event->array[0]))
@@ -284,9 +289,9 @@
 static void *
 rb_event_data(struct ring_buffer_event *event)
 {
-	BUG_ON(event->type != RINGBUF_TYPE_DATA);
+	BUG_ON(event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX);
 	/* If length is in len field, then array[0] has the data */
-	if (event->len)
+	if (event->type_len)
 		return (void *)&event->array[0];
 	/* Otherwise length is in array[0] and array[1] has the data */
 	return (void *)&event->array[1];
@@ -316,9 +321,10 @@
 };
 
 struct buffer_page {
+	struct list_head list;		/* list of buffer pages */
 	local_t		 write;		/* index for next write */
 	unsigned	 read;		/* index for next read */
-	struct list_head list;		/* list of free pages */
+	local_t		 entries;	/* entries on this page */
 	struct buffer_data_page *page;	/* Actual data page */
 };
 
@@ -361,6 +367,34 @@
 
 #define BUF_PAGE_SIZE (PAGE_SIZE - BUF_PAGE_HDR_SIZE)
 
+/* Max payload is BUF_PAGE_SIZE - header (8bytes) */
+#define BUF_MAX_DATA_SIZE (BUF_PAGE_SIZE - (sizeof(u32) * 2))
+
+/* Max number of timestamps that can fit on a page */
+#define RB_TIMESTAMPS_PER_PAGE	(BUF_PAGE_SIZE / RB_LEN_TIME_STAMP)
+
+int ring_buffer_print_page_header(struct trace_seq *s)
+{
+	struct buffer_data_page field;
+	int ret;
+
+	ret = trace_seq_printf(s, "\tfield: u64 timestamp;\t"
+			       "offset:0;\tsize:%u;\n",
+			       (unsigned int)sizeof(field.time_stamp));
+
+	ret = trace_seq_printf(s, "\tfield: local_t commit;\t"
+			       "offset:%u;\tsize:%u;\n",
+			       (unsigned int)offsetof(typeof(field), commit),
+			       (unsigned int)sizeof(field.commit));
+
+	ret = trace_seq_printf(s, "\tfield: char data;\t"
+			       "offset:%u;\tsize:%u;\n",
+			       (unsigned int)offsetof(typeof(field), data),
+			       (unsigned int)BUF_PAGE_SIZE);
+
+	return ret;
+}
+
 /*
  * head_page == tail_page && head == tail then buffer is empty.
  */
@@ -375,8 +409,11 @@
 	struct buffer_page		*tail_page;	/* write to tail */
 	struct buffer_page		*commit_page;	/* committed pages */
 	struct buffer_page		*reader_page;
+	unsigned long			nmi_dropped;
+	unsigned long			commit_overrun;
 	unsigned long			overrun;
-	unsigned long			entries;
+	unsigned long			read;
+	local_t				entries;
 	u64				write_stamp;
 	u64				read_stamp;
 	atomic_t			record_disabled;
@@ -389,6 +426,8 @@
 	atomic_t			record_disabled;
 	cpumask_var_t			cpumask;
 
+	struct lock_class_key		*reader_lock_key;
+
 	struct mutex			mutex;
 
 	struct ring_buffer_per_cpu	**buffers;
@@ -420,13 +459,18 @@
 /* Up this if you want to test the TIME_EXTENTS and normalization */
 #define DEBUG_SHIFT 0
 
+static inline u64 rb_time_stamp(struct ring_buffer *buffer, int cpu)
+{
+	/* shift to debug/test normalization and TIME_EXTENTS */
+	return buffer->clock() << DEBUG_SHIFT;
+}
+
 u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu)
 {
 	u64 time;
 
 	preempt_disable_notrace();
-	/* shift to debug/test normalization and TIME_EXTENTS */
-	time = buffer->clock() << DEBUG_SHIFT;
+	time = rb_time_stamp(buffer, cpu);
 	preempt_enable_no_resched_notrace();
 
 	return time;
@@ -523,6 +567,7 @@
 	cpu_buffer->cpu = cpu;
 	cpu_buffer->buffer = buffer;
 	spin_lock_init(&cpu_buffer->reader_lock);
+	lockdep_set_class(&cpu_buffer->reader_lock, buffer->reader_lock_key);
 	cpu_buffer->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
 	INIT_LIST_HEAD(&cpu_buffer->pages);
 
@@ -593,7 +638,8 @@
  * when the buffer wraps. If this flag is not set, the buffer will
  * drop data when the tail hits the head.
  */
-struct ring_buffer *ring_buffer_alloc(unsigned long size, unsigned flags)
+struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags,
+					struct lock_class_key *key)
 {
 	struct ring_buffer *buffer;
 	int bsize;
@@ -616,6 +662,7 @@
 	buffer->pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
 	buffer->flags = flags;
 	buffer->clock = trace_clock_local;
+	buffer->reader_lock_key = key;
 
 	/* need at least two pages */
 	if (buffer->pages == 1)
@@ -673,7 +720,7 @@
 	kfree(buffer);
 	return NULL;
 }
-EXPORT_SYMBOL_GPL(ring_buffer_alloc);
+EXPORT_SYMBOL_GPL(__ring_buffer_alloc);
 
 /**
  * ring_buffer_free - free a ring buffer.
@@ -947,31 +994,6 @@
 	return rb_page_commit(cpu_buffer->head_page);
 }
 
-/*
- * When the tail hits the head and the buffer is in overwrite mode,
- * the head jumps to the next page and all content on the previous
- * page is discarded. But before doing so, we update the overrun
- * variable of the buffer.
- */
-static void rb_update_overflow(struct ring_buffer_per_cpu *cpu_buffer)
-{
-	struct ring_buffer_event *event;
-	unsigned long head;
-
-	for (head = 0; head < rb_head_size(cpu_buffer);
-	     head += rb_event_length(event)) {
-
-		event = __rb_page_index(cpu_buffer->head_page, head);
-		if (RB_WARN_ON(cpu_buffer, rb_null_event(event)))
-			return;
-		/* Only count data entries */
-		if (event->type != RINGBUF_TYPE_DATA)
-			continue;
-		cpu_buffer->overrun++;
-		cpu_buffer->entries--;
-	}
-}
-
 static inline void rb_inc_page(struct ring_buffer_per_cpu *cpu_buffer,
 			       struct buffer_page **bpage)
 {
@@ -991,7 +1013,7 @@
 	return (addr & ~PAGE_MASK) - (PAGE_SIZE - BUF_PAGE_SIZE);
 }
 
-static int
+static inline int
 rb_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
 	     struct ring_buffer_event *event)
 {
@@ -1110,28 +1132,21 @@
 rb_update_event(struct ring_buffer_event *event,
 			 unsigned type, unsigned length)
 {
-	event->type = type;
+	event->type_len = type;
 
 	switch (type) {
 
 	case RINGBUF_TYPE_PADDING:
-		break;
-
 	case RINGBUF_TYPE_TIME_EXTEND:
-		event->len = DIV_ROUND_UP(RB_LEN_TIME_EXTEND, RB_ALIGNMENT);
-		break;
-
 	case RINGBUF_TYPE_TIME_STAMP:
-		event->len = DIV_ROUND_UP(RB_LEN_TIME_STAMP, RB_ALIGNMENT);
 		break;
 
-	case RINGBUF_TYPE_DATA:
+	case 0:
 		length -= RB_EVNT_HDR_SIZE;
-		if (length > RB_MAX_SMALL_DATA) {
-			event->len = 0;
+		if (length > RB_MAX_SMALL_DATA)
 			event->array[0] = length;
-		} else
-			event->len = DIV_ROUND_UP(length, RB_ALIGNMENT);
+		else
+			event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT);
 		break;
 	default:
 		BUG();
@@ -1155,16 +1170,144 @@
 	return length;
 }
 
+
+static struct ring_buffer_event *
+rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
+	     unsigned long length, unsigned long tail,
+	     struct buffer_page *commit_page,
+	     struct buffer_page *tail_page, u64 *ts)
+{
+	struct buffer_page *next_page, *head_page, *reader_page;
+	struct ring_buffer *buffer = cpu_buffer->buffer;
+	struct ring_buffer_event *event;
+	bool lock_taken = false;
+	unsigned long flags;
+
+	next_page = tail_page;
+
+	local_irq_save(flags);
+	/*
+	 * Since the write to the buffer is still not
+	 * fully lockless, we must be careful with NMIs.
+	 * The locks in the writers are taken when a write
+	 * crosses to a new page. The locks protect against
+	 * races with the readers (this will soon be fixed
+	 * with a lockless solution).
+	 *
+	 * Because we can not protect against NMIs, and we
+	 * want to keep traces reentrant, we need to manage
+	 * what happens when we are in an NMI.
+	 *
+	 * NMIs can happen after we take the lock.
+	 * If we are in an NMI, only take the lock
+	 * if it is not already taken. Otherwise
+	 * simply fail.
+	 */
+	if (unlikely(in_nmi())) {
+		if (!__raw_spin_trylock(&cpu_buffer->lock)) {
+			cpu_buffer->nmi_dropped++;
+			goto out_reset;
+		}
+	} else
+		__raw_spin_lock(&cpu_buffer->lock);
+
+	lock_taken = true;
+
+	rb_inc_page(cpu_buffer, &next_page);
+
+	head_page = cpu_buffer->head_page;
+	reader_page = cpu_buffer->reader_page;
+
+	/* we grabbed the lock before incrementing */
+	if (RB_WARN_ON(cpu_buffer, next_page == reader_page))
+		goto out_reset;
+
+	/*
+	 * If for some reason, we had an interrupt storm that made
+	 * it all the way around the buffer, bail, and warn
+	 * about it.
+	 */
+	if (unlikely(next_page == commit_page)) {
+		cpu_buffer->commit_overrun++;
+		goto out_reset;
+	}
+
+	if (next_page == head_page) {
+		if (!(buffer->flags & RB_FL_OVERWRITE))
+			goto out_reset;
+
+		/* tail_page has not moved yet? */
+		if (tail_page == cpu_buffer->tail_page) {
+			/* count overflows */
+			cpu_buffer->overrun +=
+				local_read(&head_page->entries);
+
+			rb_inc_page(cpu_buffer, &head_page);
+			cpu_buffer->head_page = head_page;
+			cpu_buffer->head_page->read = 0;
+		}
+	}
+
+	/*
+	 * If the tail page is still the same as what we think
+	 * it is, then it is up to us to update the tail
+	 * pointer.
+	 */
+	if (tail_page == cpu_buffer->tail_page) {
+		local_set(&next_page->write, 0);
+		local_set(&next_page->entries, 0);
+		local_set(&next_page->page->commit, 0);
+		cpu_buffer->tail_page = next_page;
+
+		/* reread the time stamp */
+		*ts = rb_time_stamp(buffer, cpu_buffer->cpu);
+		cpu_buffer->tail_page->page->time_stamp = *ts;
+	}
+
+	/*
+	 * The actual tail page has moved forward.
+	 */
+	if (tail < BUF_PAGE_SIZE) {
+		/* Mark the rest of the page with padding */
+		event = __rb_page_index(tail_page, tail);
+		rb_event_set_padding(event);
+	}
+
+	/* Set the write back to the previous setting */
+	local_sub(length, &tail_page->write);
+
+	/*
+	 * If this was a commit entry that failed,
+	 * increment that too
+	 */
+	if (tail_page == cpu_buffer->commit_page &&
+	    tail == rb_commit_index(cpu_buffer)) {
+		rb_set_commit_to_write(cpu_buffer);
+	}
+
+	__raw_spin_unlock(&cpu_buffer->lock);
+	local_irq_restore(flags);
+
+	/* fail and let the caller try again */
+	return ERR_PTR(-EAGAIN);
+
+ out_reset:
+	/* reset write */
+	local_sub(length, &tail_page->write);
+
+	if (likely(lock_taken))
+		__raw_spin_unlock(&cpu_buffer->lock);
+	local_irq_restore(flags);
+	return NULL;
+}
+
 static struct ring_buffer_event *
 __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
 		  unsigned type, unsigned long length, u64 *ts)
 {
-	struct buffer_page *tail_page, *head_page, *reader_page, *commit_page;
-	unsigned long tail, write;
-	struct ring_buffer *buffer = cpu_buffer->buffer;
+	struct buffer_page *tail_page, *commit_page;
 	struct ring_buffer_event *event;
-	unsigned long flags;
-	bool lock_taken = false;
+	unsigned long tail, write;
 
 	commit_page = cpu_buffer->commit_page;
 	/* we just need to protect against interrupts */
@@ -1174,112 +1317,9 @@
 	tail = write - length;
 
 	/* See if we shot pass the end of this buffer page */
-	if (write > BUF_PAGE_SIZE) {
-		struct buffer_page *next_page = tail_page;
-
-		local_irq_save(flags);
-		/*
-		 * Since the write to the buffer is still not
-		 * fully lockless, we must be careful with NMIs.
-		 * The locks in the writers are taken when a write
-		 * crosses to a new page. The locks protect against
-		 * races with the readers (this will soon be fixed
-		 * with a lockless solution).
-		 *
-		 * Because we can not protect against NMIs, and we
-		 * want to keep traces reentrant, we need to manage
-		 * what happens when we are in an NMI.
-		 *
-		 * NMIs can happen after we take the lock.
-		 * If we are in an NMI, only take the lock
-		 * if it is not already taken. Otherwise
-		 * simply fail.
-		 */
-		if (unlikely(in_nmi())) {
-			if (!__raw_spin_trylock(&cpu_buffer->lock))
-				goto out_reset;
-		} else
-			__raw_spin_lock(&cpu_buffer->lock);
-
-		lock_taken = true;
-
-		rb_inc_page(cpu_buffer, &next_page);
-
-		head_page = cpu_buffer->head_page;
-		reader_page = cpu_buffer->reader_page;
-
-		/* we grabbed the lock before incrementing */
-		if (RB_WARN_ON(cpu_buffer, next_page == reader_page))
-			goto out_reset;
-
-		/*
-		 * If for some reason, we had an interrupt storm that made
-		 * it all the way around the buffer, bail, and warn
-		 * about it.
-		 */
-		if (unlikely(next_page == commit_page)) {
-			WARN_ON_ONCE(1);
-			goto out_reset;
-		}
-
-		if (next_page == head_page) {
-			if (!(buffer->flags & RB_FL_OVERWRITE))
-				goto out_reset;
-
-			/* tail_page has not moved yet? */
-			if (tail_page == cpu_buffer->tail_page) {
-				/* count overflows */
-				rb_update_overflow(cpu_buffer);
-
-				rb_inc_page(cpu_buffer, &head_page);
-				cpu_buffer->head_page = head_page;
-				cpu_buffer->head_page->read = 0;
-			}
-		}
-
-		/*
-		 * If the tail page is still the same as what we think
-		 * it is, then it is up to us to update the tail
-		 * pointer.
-		 */
-		if (tail_page == cpu_buffer->tail_page) {
-			local_set(&next_page->write, 0);
-			local_set(&next_page->page->commit, 0);
-			cpu_buffer->tail_page = next_page;
-
-			/* reread the time stamp */
-			*ts = ring_buffer_time_stamp(buffer, cpu_buffer->cpu);
-			cpu_buffer->tail_page->page->time_stamp = *ts;
-		}
-
-		/*
-		 * The actual tail page has moved forward.
-		 */
-		if (tail < BUF_PAGE_SIZE) {
-			/* Mark the rest of the page with padding */
-			event = __rb_page_index(tail_page, tail);
-			rb_event_set_padding(event);
-		}
-
-		if (tail <= BUF_PAGE_SIZE)
-			/* Set the write back to the previous setting */
-			local_set(&tail_page->write, tail);
-
-		/*
-		 * If this was a commit entry that failed,
-		 * increment that too
-		 */
-		if (tail_page == cpu_buffer->commit_page &&
-		    tail == rb_commit_index(cpu_buffer)) {
-			rb_set_commit_to_write(cpu_buffer);
-		}
-
-		__raw_spin_unlock(&cpu_buffer->lock);
-		local_irq_restore(flags);
-
-		/* fail and let the caller try again */
-		return ERR_PTR(-EAGAIN);
-	}
+	if (write > BUF_PAGE_SIZE)
+		return rb_move_tail(cpu_buffer, length, tail,
+				    commit_page, tail_page, ts);
 
 	/* We reserved something on the buffer */
 
@@ -1289,6 +1329,10 @@
 	event = __rb_page_index(tail_page, tail);
 	rb_update_event(event, type, length);
 
+	/* The passed in type is zero for DATA */
+	if (likely(!type))
+		local_inc(&tail_page->entries);
+
 	/*
 	 * If this is a commit and the tail is zero, then update
 	 * this page's time stamp.
@@ -1297,16 +1341,38 @@
 		cpu_buffer->commit_page->page->time_stamp = *ts;
 
 	return event;
+}
 
- out_reset:
-	/* reset write */
-	if (tail <= BUF_PAGE_SIZE)
-		local_set(&tail_page->write, tail);
+static inline int
+rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
+		  struct ring_buffer_event *event)
+{
+	unsigned long new_index, old_index;
+	struct buffer_page *bpage;
+	unsigned long index;
+	unsigned long addr;
 
-	if (likely(lock_taken))
-		__raw_spin_unlock(&cpu_buffer->lock);
-	local_irq_restore(flags);
-	return NULL;
+	new_index = rb_event_index(event);
+	old_index = new_index + rb_event_length(event);
+	addr = (unsigned long)event;
+	addr &= PAGE_MASK;
+
+	bpage = cpu_buffer->tail_page;
+
+	if (bpage->page == (void *)addr && rb_page_write(bpage) == old_index) {
+		/*
+		 * This is on the tail page. It is possible that
+		 * a write could come in and move the tail page
+		 * and write to the next page. That is fine
+		 * because we just shorten what is on this page.
+		 */
+		index = local_cmpxchg(&bpage->write, old_index, new_index);
+		if (index == old_index)
+			return 1;
+	}
+
+	/* could not discard */
+	return 0;
 }
 
 static int
@@ -1351,16 +1417,23 @@
 			event->array[0] = *delta >> TS_SHIFT;
 		} else {
 			cpu_buffer->commit_page->page->time_stamp = *ts;
-			event->time_delta = 0;
-			event->array[0] = 0;
+			/* try to discard, since we do not need this */
+			if (!rb_try_to_discard(cpu_buffer, event)) {
+				/* nope, just zero it */
+				event->time_delta = 0;
+				event->array[0] = 0;
+			}
 		}
 		cpu_buffer->write_stamp = *ts;
 		/* let the caller know this was the commit */
 		ret = 1;
 	} else {
-		/* Darn, this is just wasted space */
-		event->time_delta = 0;
-		event->array[0] = 0;
+		/* Try to discard the event */
+		if (!rb_try_to_discard(cpu_buffer, event)) {
+			/* Darn, this is just wasted space */
+			event->time_delta = 0;
+			event->array[0] = 0;
+		}
 		ret = 0;
 	}
 
@@ -1371,13 +1444,14 @@
 
 static struct ring_buffer_event *
 rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
-		      unsigned type, unsigned long length)
+		      unsigned long length)
 {
 	struct ring_buffer_event *event;
-	u64 ts, delta;
+	u64 ts, delta = 0;
 	int commit = 0;
 	int nr_loops = 0;
 
+	length = rb_calculate_event_length(length);
  again:
 	/*
 	 * We allow for interrupts to reenter here and do a trace.
@@ -1391,7 +1465,7 @@
 	if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000))
 		return NULL;
 
-	ts = ring_buffer_time_stamp(cpu_buffer->buffer, cpu_buffer->cpu);
+	ts = rb_time_stamp(cpu_buffer->buffer, cpu_buffer->cpu);
 
 	/*
 	 * Only the first commit can update the timestamp.
@@ -1401,23 +1475,24 @@
 	 * also be made. But only the entry that did the actual
 	 * commit will be something other than zero.
 	 */
-	if (cpu_buffer->tail_page == cpu_buffer->commit_page &&
-	    rb_page_write(cpu_buffer->tail_page) ==
-	    rb_commit_index(cpu_buffer)) {
+	if (likely(cpu_buffer->tail_page == cpu_buffer->commit_page &&
+		   rb_page_write(cpu_buffer->tail_page) ==
+		   rb_commit_index(cpu_buffer))) {
+		u64 diff;
 
-		delta = ts - cpu_buffer->write_stamp;
+		diff = ts - cpu_buffer->write_stamp;
 
-		/* make sure this delta is calculated here */
+		/* make sure this diff is calculated here */
 		barrier();
 
 		/* Did the write stamp get updated already? */
 		if (unlikely(ts < cpu_buffer->write_stamp))
-			delta = 0;
+			goto get_event;
 
-		if (test_time_stamp(delta)) {
+		delta = diff;
+		if (unlikely(test_time_stamp(delta))) {
 
 			commit = rb_add_time_stamp(cpu_buffer, &ts, &delta);
-
 			if (commit == -EBUSY)
 				return NULL;
 
@@ -1426,12 +1501,11 @@
 
 			RB_WARN_ON(cpu_buffer, commit < 0);
 		}
-	} else
-		/* Non commits have zero deltas */
-		delta = 0;
+	}
 
-	event = __rb_reserve_next(cpu_buffer, type, length, &ts);
-	if (PTR_ERR(event) == -EAGAIN)
+ get_event:
+	event = __rb_reserve_next(cpu_buffer, 0, length, &ts);
+	if (unlikely(PTR_ERR(event) == -EAGAIN))
 		goto again;
 
 	if (!event) {
@@ -1448,7 +1522,7 @@
 	 * If the timestamp was commited, make the commit our entry
 	 * now so that we will update it when needed.
 	 */
-	if (commit)
+	if (unlikely(commit))
 		rb_set_commit_event(cpu_buffer, event);
 	else if (!rb_is_commit(cpu_buffer, event))
 		delta = 0;
@@ -1458,6 +1532,36 @@
 	return event;
 }
 
+#define TRACE_RECURSIVE_DEPTH 16
+
+static int trace_recursive_lock(void)
+{
+	current->trace_recursion++;
+
+	if (likely(current->trace_recursion < TRACE_RECURSIVE_DEPTH))
+		return 0;
+
+	/* Disable all tracing before we do anything else */
+	tracing_off_permanent();
+
+	printk_once(KERN_WARNING "Tracing recursion: depth[%ld]:"
+		    "HC[%lu]:SC[%lu]:NMI[%lu]\n",
+		    current->trace_recursion,
+		    hardirq_count() >> HARDIRQ_SHIFT,
+		    softirq_count() >> SOFTIRQ_SHIFT,
+		    in_nmi());
+
+	WARN_ON_ONCE(1);
+	return -1;
+}
+
+static void trace_recursive_unlock(void)
+{
+	WARN_ON_ONCE(!current->trace_recursion);
+
+	current->trace_recursion--;
+}
+
 static DEFINE_PER_CPU(int, rb_need_resched);
 
 /**
@@ -1491,6 +1595,9 @@
 	/* If we are tracing schedule, we don't want to recurse */
 	resched = ftrace_preempt_disable();
 
+	if (trace_recursive_lock())
+		goto out_nocheck;
+
 	cpu = raw_smp_processor_id();
 
 	if (!cpumask_test_cpu(cpu, buffer->cpumask))
@@ -1501,11 +1608,10 @@
 	if (atomic_read(&cpu_buffer->record_disabled))
 		goto out;
 
-	length = rb_calculate_event_length(length);
-	if (length > BUF_PAGE_SIZE)
+	if (length > BUF_MAX_DATA_SIZE)
 		goto out;
 
-	event = rb_reserve_next_event(cpu_buffer, RINGBUF_TYPE_DATA, length);
+	event = rb_reserve_next_event(cpu_buffer, length);
 	if (!event)
 		goto out;
 
@@ -1520,6 +1626,9 @@
 	return event;
 
  out:
+	trace_recursive_unlock();
+
+ out_nocheck:
 	ftrace_preempt_enable(resched);
 	return NULL;
 }
@@ -1528,7 +1637,7 @@
 static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
 		      struct ring_buffer_event *event)
 {
-	cpu_buffer->entries++;
+	local_inc(&cpu_buffer->entries);
 
 	/* Only process further if we own the commit */
 	if (!rb_is_commit(cpu_buffer, event))
@@ -1558,6 +1667,8 @@
 
 	rb_commit(cpu_buffer, event);
 
+	trace_recursive_unlock();
+
 	/*
 	 * Only the last preempt count needs to restore preemption.
 	 */
@@ -1570,6 +1681,99 @@
 }
 EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit);
 
+static inline void rb_event_discard(struct ring_buffer_event *event)
+{
+	/* array[0] holds the actual length for the discarded event */
+	event->array[0] = rb_event_data_length(event) - RB_EVNT_HDR_SIZE;
+	event->type_len = RINGBUF_TYPE_PADDING;
+	/* time delta must be non zero */
+	if (!event->time_delta)
+		event->time_delta = 1;
+}
+
+/**
+ * ring_buffer_event_discard - discard any event in the ring buffer
+ * @event: the event to discard
+ *
+ * Sometimes a event that is in the ring buffer needs to be ignored.
+ * This function lets the user discard an event in the ring buffer
+ * and then that event will not be read later.
+ *
+ * Note, it is up to the user to be careful with this, and protect
+ * against races. If the user discards an event that has been consumed
+ * it is possible that it could corrupt the ring buffer.
+ */
+void ring_buffer_event_discard(struct ring_buffer_event *event)
+{
+	rb_event_discard(event);
+}
+EXPORT_SYMBOL_GPL(ring_buffer_event_discard);
+
+/**
+ * ring_buffer_commit_discard - discard an event that has not been committed
+ * @buffer: the ring buffer
+ * @event: non committed event to discard
+ *
+ * This is similar to ring_buffer_event_discard but must only be
+ * performed on an event that has not been committed yet. The difference
+ * is that this will also try to free the event from the ring buffer
+ * if another event has not been added behind it.
+ *
+ * If another event has been added behind it, it will set the event
+ * up as discarded, and perform the commit.
+ *
+ * If this function is called, do not call ring_buffer_unlock_commit on
+ * the event.
+ */
+void ring_buffer_discard_commit(struct ring_buffer *buffer,
+				struct ring_buffer_event *event)
+{
+	struct ring_buffer_per_cpu *cpu_buffer;
+	int cpu;
+
+	/* The event is discarded regardless */
+	rb_event_discard(event);
+
+	/*
+	 * This must only be called if the event has not been
+	 * committed yet. Thus we can assume that preemption
+	 * is still disabled.
+	 */
+	RB_WARN_ON(buffer, preemptible());
+
+	cpu = smp_processor_id();
+	cpu_buffer = buffer->buffers[cpu];
+
+	if (!rb_try_to_discard(cpu_buffer, event))
+		goto out;
+
+	/*
+	 * The commit is still visible by the reader, so we
+	 * must increment entries.
+	 */
+	local_inc(&cpu_buffer->entries);
+ out:
+	/*
+	 * If a write came in and pushed the tail page
+	 * we still need to update the commit pointer
+	 * if we were the commit.
+	 */
+	if (rb_is_commit(cpu_buffer, event))
+		rb_set_commit_to_write(cpu_buffer);
+
+	trace_recursive_unlock();
+
+	/*
+	 * Only the last preempt count needs to restore preemption.
+	 */
+	if (preempt_count() == 1)
+		ftrace_preempt_enable(per_cpu(rb_need_resched, cpu));
+	else
+		preempt_enable_no_resched_notrace();
+
+}
+EXPORT_SYMBOL_GPL(ring_buffer_discard_commit);
+
 /**
  * ring_buffer_write - write data to the buffer without reserving
  * @buffer: The ring buffer to write to.
@@ -1589,7 +1793,6 @@
 {
 	struct ring_buffer_per_cpu *cpu_buffer;
 	struct ring_buffer_event *event;
-	unsigned long event_length;
 	void *body;
 	int ret = -EBUSY;
 	int cpu, resched;
@@ -1612,9 +1815,10 @@
 	if (atomic_read(&cpu_buffer->record_disabled))
 		goto out;
 
-	event_length = rb_calculate_event_length(length);
-	event = rb_reserve_next_event(cpu_buffer,
-				      RINGBUF_TYPE_DATA, event_length);
+	if (length > BUF_MAX_DATA_SIZE)
+		goto out;
+
+	event = rb_reserve_next_event(cpu_buffer, length);
 	if (!event)
 		goto out;
 
@@ -1728,7 +1932,8 @@
 		return 0;
 
 	cpu_buffer = buffer->buffers[cpu];
-	ret = cpu_buffer->entries;
+	ret = (local_read(&cpu_buffer->entries) - cpu_buffer->overrun)
+		- cpu_buffer->read;
 
 	return ret;
 }
@@ -1755,6 +1960,47 @@
 EXPORT_SYMBOL_GPL(ring_buffer_overrun_cpu);
 
 /**
+ * ring_buffer_nmi_dropped_cpu - get the number of nmis that were dropped
+ * @buffer: The ring buffer
+ * @cpu: The per CPU buffer to get the number of overruns from
+ */
+unsigned long ring_buffer_nmi_dropped_cpu(struct ring_buffer *buffer, int cpu)
+{
+	struct ring_buffer_per_cpu *cpu_buffer;
+	unsigned long ret;
+
+	if (!cpumask_test_cpu(cpu, buffer->cpumask))
+		return 0;
+
+	cpu_buffer = buffer->buffers[cpu];
+	ret = cpu_buffer->nmi_dropped;
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(ring_buffer_nmi_dropped_cpu);
+
+/**
+ * ring_buffer_commit_overrun_cpu - get the number of overruns caused by commits
+ * @buffer: The ring buffer
+ * @cpu: The per CPU buffer to get the number of overruns from
+ */
+unsigned long
+ring_buffer_commit_overrun_cpu(struct ring_buffer *buffer, int cpu)
+{
+	struct ring_buffer_per_cpu *cpu_buffer;
+	unsigned long ret;
+
+	if (!cpumask_test_cpu(cpu, buffer->cpumask))
+		return 0;
+
+	cpu_buffer = buffer->buffers[cpu];
+	ret = cpu_buffer->commit_overrun;
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(ring_buffer_commit_overrun_cpu);
+
+/**
  * ring_buffer_entries - get the number of entries in a buffer
  * @buffer: The ring buffer
  *
@@ -1770,7 +2016,8 @@
 	/* if you care about this being correct, lock the buffer */
 	for_each_buffer_cpu(buffer, cpu) {
 		cpu_buffer = buffer->buffers[cpu];
-		entries += cpu_buffer->entries;
+		entries += (local_read(&cpu_buffer->entries) -
+			    cpu_buffer->overrun) - cpu_buffer->read;
 	}
 
 	return entries;
@@ -1862,7 +2109,7 @@
 {
 	u64 delta;
 
-	switch (event->type) {
+	switch (event->type_len) {
 	case RINGBUF_TYPE_PADDING:
 		return;
 
@@ -1893,7 +2140,7 @@
 {
 	u64 delta;
 
-	switch (event->type) {
+	switch (event->type_len) {
 	case RINGBUF_TYPE_PADDING:
 		return;
 
@@ -1966,6 +2213,7 @@
 	cpu_buffer->reader_page->list.prev = reader->list.prev;
 
 	local_set(&cpu_buffer->reader_page->write, 0);
+	local_set(&cpu_buffer->reader_page->entries, 0);
 	local_set(&cpu_buffer->reader_page->page->commit, 0);
 
 	/* Make the reader page now replace the head */
@@ -2008,8 +2256,9 @@
 
 	event = rb_reader_event(cpu_buffer);
 
-	if (event->type == RINGBUF_TYPE_DATA || rb_discarded_event(event))
-		cpu_buffer->entries--;
+	if (event->type_len <= RINGBUF_TYPE_DATA_TYPE_LEN_MAX
+			|| rb_discarded_event(event))
+		cpu_buffer->read++;
 
 	rb_update_read_stamp(cpu_buffer, event);
 
@@ -2031,8 +2280,8 @@
 	 * Check if we are at the end of the buffer.
 	 */
 	if (iter->head >= rb_page_size(iter->head_page)) {
-		if (RB_WARN_ON(buffer,
-			       iter->head_page == cpu_buffer->commit_page))
+		/* discarded commits can make the page empty */
+		if (iter->head_page == cpu_buffer->commit_page)
 			return;
 		rb_inc_iter(iter);
 		return;
@@ -2075,12 +2324,10 @@
 	/*
 	 * We repeat when a timestamp is encountered. It is possible
 	 * to get multiple timestamps from an interrupt entering just
-	 * as one timestamp is about to be written. The max times
-	 * that this can happen is the number of nested interrupts we
-	 * can have.  Nesting 10 deep of interrupts is clearly
-	 * an anomaly.
+	 * as one timestamp is about to be written, or from discarded
+	 * commits. The most that we can have is the number on a single page.
 	 */
-	if (RB_WARN_ON(cpu_buffer, ++nr_loops > 10))
+	if (RB_WARN_ON(cpu_buffer, ++nr_loops > RB_TIMESTAMPS_PER_PAGE))
 		return NULL;
 
 	reader = rb_get_reader_page(cpu_buffer);
@@ -2089,7 +2336,7 @@
 
 	event = rb_reader_event(cpu_buffer);
 
-	switch (event->type) {
+	switch (event->type_len) {
 	case RINGBUF_TYPE_PADDING:
 		if (rb_null_event(event))
 			RB_WARN_ON(cpu_buffer, 1);
@@ -2146,14 +2393,14 @@
 
  again:
 	/*
-	 * We repeat when a timestamp is encountered. It is possible
-	 * to get multiple timestamps from an interrupt entering just
-	 * as one timestamp is about to be written. The max times
-	 * that this can happen is the number of nested interrupts we
-	 * can have. Nesting 10 deep of interrupts is clearly
-	 * an anomaly.
+	 * We repeat when a timestamp is encountered.
+	 * We can get multiple timestamps by nested interrupts or also
+	 * if filtering is on (discarding commits). Since discarding
+	 * commits can be frequent we can get a lot of timestamps.
+	 * But we limit them by not adding timestamps if they begin
+	 * at the start of a page.
 	 */
-	if (RB_WARN_ON(cpu_buffer, ++nr_loops > 10))
+	if (RB_WARN_ON(cpu_buffer, ++nr_loops > RB_TIMESTAMPS_PER_PAGE))
 		return NULL;
 
 	if (rb_per_cpu_empty(cpu_buffer))
@@ -2161,7 +2408,7 @@
 
 	event = rb_iter_head_event(iter);
 
-	switch (event->type) {
+	switch (event->type_len) {
 	case RINGBUF_TYPE_PADDING:
 		if (rb_null_event(event)) {
 			rb_inc_iter(iter);
@@ -2220,7 +2467,7 @@
 	event = rb_buffer_peek(buffer, cpu, ts);
 	spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
 
-	if (event && event->type == RINGBUF_TYPE_PADDING) {
+	if (event && event->type_len == RINGBUF_TYPE_PADDING) {
 		cpu_relax();
 		goto again;
 	}
@@ -2248,7 +2495,7 @@
 	event = rb_iter_peek(iter, ts);
 	spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
 
-	if (event && event->type == RINGBUF_TYPE_PADDING) {
+	if (event && event->type_len == RINGBUF_TYPE_PADDING) {
 		cpu_relax();
 		goto again;
 	}
@@ -2293,7 +2540,7 @@
  out:
 	preempt_enable();
 
-	if (event && event->type == RINGBUF_TYPE_PADDING) {
+	if (event && event->type_len == RINGBUF_TYPE_PADDING) {
 		cpu_relax();
 		goto again;
 	}
@@ -2386,7 +2633,7 @@
  out:
 	spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
 
-	if (event && event->type == RINGBUF_TYPE_PADDING) {
+	if (event && event->type_len == RINGBUF_TYPE_PADDING) {
 		cpu_relax();
 		goto again;
 	}
@@ -2411,6 +2658,7 @@
 	cpu_buffer->head_page
 		= list_entry(cpu_buffer->pages.next, struct buffer_page, list);
 	local_set(&cpu_buffer->head_page->write, 0);
+	local_set(&cpu_buffer->head_page->entries, 0);
 	local_set(&cpu_buffer->head_page->page->commit, 0);
 
 	cpu_buffer->head_page->read = 0;
@@ -2420,11 +2668,15 @@
 
 	INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
 	local_set(&cpu_buffer->reader_page->write, 0);
+	local_set(&cpu_buffer->reader_page->entries, 0);
 	local_set(&cpu_buffer->reader_page->page->commit, 0);
 	cpu_buffer->reader_page->read = 0;
 
+	cpu_buffer->nmi_dropped = 0;
+	cpu_buffer->commit_overrun = 0;
 	cpu_buffer->overrun = 0;
-	cpu_buffer->entries = 0;
+	cpu_buffer->read = 0;
+	local_set(&cpu_buffer->entries, 0);
 
 	cpu_buffer->write_stamp = 0;
 	cpu_buffer->read_stamp = 0;
@@ -2443,6 +2695,8 @@
 	if (!cpumask_test_cpu(cpu, buffer->cpumask))
 		return;
 
+	atomic_inc(&cpu_buffer->record_disabled);
+
 	spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
 
 	__raw_spin_lock(&cpu_buffer->lock);
@@ -2452,6 +2706,8 @@
 	__raw_spin_unlock(&cpu_buffer->lock);
 
 	spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
+
+	atomic_dec(&cpu_buffer->record_disabled);
 }
 EXPORT_SYMBOL_GPL(ring_buffer_reset_cpu);
 
@@ -2578,28 +2834,6 @@
 }
 EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu);
 
-static void rb_remove_entries(struct ring_buffer_per_cpu *cpu_buffer,
-			      struct buffer_data_page *bpage,
-			      unsigned int offset)
-{
-	struct ring_buffer_event *event;
-	unsigned long head;
-
-	__raw_spin_lock(&cpu_buffer->lock);
-	for (head = offset; head < local_read(&bpage->commit);
-	     head += rb_event_length(event)) {
-
-		event = __rb_data_page_index(bpage, head);
-		if (RB_WARN_ON(cpu_buffer, rb_null_event(event)))
-			return;
-		/* Only count data entries */
-		if (event->type != RINGBUF_TYPE_DATA)
-			continue;
-		cpu_buffer->entries--;
-	}
-	__raw_spin_unlock(&cpu_buffer->lock);
-}
-
 /**
  * ring_buffer_alloc_read_page - allocate a page to read from buffer
  * @buffer: the buffer to allocate for.
@@ -2630,6 +2864,7 @@
 
 	return bpage;
 }
+EXPORT_SYMBOL_GPL(ring_buffer_alloc_read_page);
 
 /**
  * ring_buffer_free_read_page - free an allocated read page
@@ -2642,6 +2877,7 @@
 {
 	free_page((unsigned long)data);
 }
+EXPORT_SYMBOL_GPL(ring_buffer_free_read_page);
 
 /**
  * ring_buffer_read_page - extract a page from the ring buffer
@@ -2768,16 +3004,17 @@
 		/* we copied everything to the beginning */
 		read = 0;
 	} else {
+		/* update the entry counter */
+		cpu_buffer->read += local_read(&reader->entries);
+
 		/* swap the pages */
 		rb_init_page(bpage);
 		bpage = reader->page;
 		reader->page = *data_page;
 		local_set(&reader->write, 0);
+		local_set(&reader->entries, 0);
 		reader->read = 0;
 		*data_page = bpage;
-
-		/* update the entry counter */
-		rb_remove_entries(cpu_buffer, bpage, read);
 	}
 	ret = read;
 
@@ -2787,6 +3024,7 @@
  out:
 	return ret;
 }
+EXPORT_SYMBOL_GPL(ring_buffer_read_page);
 
 static ssize_t
 rb_simple_read(struct file *filp, char __user *ubuf,
@@ -2845,14 +3083,11 @@
 static __init int rb_init_debugfs(void)
 {
 	struct dentry *d_tracer;
-	struct dentry *entry;
 
 	d_tracer = tracing_init_dentry();
 
-	entry = debugfs_create_file("tracing_on", 0644, d_tracer,
-				    &ring_buffer_flags, &rb_simple_fops);
-	if (!entry)
-		pr_warning("Could not create debugfs 'tracing_on' entry\n");
+	trace_create_file("tracing_on", 0644, d_tracer,
+			    &ring_buffer_flags, &rb_simple_fops);
 
 	return 0;
 }
diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c
new file mode 100644
index 0000000..8d68e14
--- /dev/null
+++ b/kernel/trace/ring_buffer_benchmark.c
@@ -0,0 +1,416 @@
+/*
+ * ring buffer tester and benchmark
+ *
+ * Copyright (C) 2009 Steven Rostedt <srostedt@redhat.com>
+ */
+#include <linux/ring_buffer.h>
+#include <linux/completion.h>
+#include <linux/kthread.h>
+#include <linux/module.h>
+#include <linux/time.h>
+
+struct rb_page {
+	u64		ts;
+	local_t		commit;
+	char		data[4080];
+};
+
+/* run time and sleep time in seconds */
+#define RUN_TIME	10
+#define SLEEP_TIME	10
+
+/* number of events for writer to wake up the reader */
+static int wakeup_interval = 100;
+
+static int reader_finish;
+static struct completion read_start;
+static struct completion read_done;
+
+static struct ring_buffer *buffer;
+static struct task_struct *producer;
+static struct task_struct *consumer;
+static unsigned long read;
+
+static int disable_reader;
+module_param(disable_reader, uint, 0644);
+MODULE_PARM_DESC(disable_reader, "only run producer");
+
+static int read_events;
+
+static int kill_test;
+
+#define KILL_TEST()				\
+	do {					\
+		if (!kill_test) {		\
+			kill_test = 1;		\
+			WARN_ON(1);		\
+		}				\
+	} while (0)
+
+enum event_status {
+	EVENT_FOUND,
+	EVENT_DROPPED,
+};
+
+static enum event_status read_event(int cpu)
+{
+	struct ring_buffer_event *event;
+	int *entry;
+	u64 ts;
+
+	event = ring_buffer_consume(buffer, cpu, &ts);
+	if (!event)
+		return EVENT_DROPPED;
+
+	entry = ring_buffer_event_data(event);
+	if (*entry != cpu) {
+		KILL_TEST();
+		return EVENT_DROPPED;
+	}
+
+	read++;
+	return EVENT_FOUND;
+}
+
+static enum event_status read_page(int cpu)
+{
+	struct ring_buffer_event *event;
+	struct rb_page *rpage;
+	unsigned long commit;
+	void *bpage;
+	int *entry;
+	int ret;
+	int inc;
+	int i;
+
+	bpage = ring_buffer_alloc_read_page(buffer);
+	if (!bpage)
+		return EVENT_DROPPED;
+
+	ret = ring_buffer_read_page(buffer, &bpage, PAGE_SIZE, cpu, 1);
+	if (ret >= 0) {
+		rpage = bpage;
+		commit = local_read(&rpage->commit);
+		for (i = 0; i < commit && !kill_test; i += inc) {
+
+			if (i >= (PAGE_SIZE - offsetof(struct rb_page, data))) {
+				KILL_TEST();
+				break;
+			}
+
+			inc = -1;
+			event = (void *)&rpage->data[i];
+			switch (event->type_len) {
+			case RINGBUF_TYPE_PADDING:
+				/* We don't expect any padding */
+				KILL_TEST();
+				break;
+			case RINGBUF_TYPE_TIME_EXTEND:
+				inc = 8;
+				break;
+			case 0:
+				entry = ring_buffer_event_data(event);
+				if (*entry != cpu) {
+					KILL_TEST();
+					break;
+				}
+				read++;
+				if (!event->array[0]) {
+					KILL_TEST();
+					break;
+				}
+				inc = event->array[0];
+				break;
+			default:
+				entry = ring_buffer_event_data(event);
+				if (*entry != cpu) {
+					KILL_TEST();
+					break;
+				}
+				read++;
+				inc = ((event->type_len + 1) * 4);
+			}
+			if (kill_test)
+				break;
+
+			if (inc <= 0) {
+				KILL_TEST();
+				break;
+			}
+		}
+	}
+	ring_buffer_free_read_page(buffer, bpage);
+
+	if (ret < 0)
+		return EVENT_DROPPED;
+	return EVENT_FOUND;
+}
+
+static void ring_buffer_consumer(void)
+{
+	/* toggle between reading pages and events */
+	read_events ^= 1;
+
+	read = 0;
+	while (!reader_finish && !kill_test) {
+		int found;
+
+		do {
+			int cpu;
+
+			found = 0;
+			for_each_online_cpu(cpu) {
+				enum event_status stat;
+
+				if (read_events)
+					stat = read_event(cpu);
+				else
+					stat = read_page(cpu);
+
+				if (kill_test)
+					break;
+				if (stat == EVENT_FOUND)
+					found = 1;
+			}
+		} while (found && !kill_test);
+
+		set_current_state(TASK_INTERRUPTIBLE);
+		if (reader_finish)
+			break;
+
+		schedule();
+		__set_current_state(TASK_RUNNING);
+	}
+	reader_finish = 0;
+	complete(&read_done);
+}
+
+static void ring_buffer_producer(void)
+{
+	struct timeval start_tv;
+	struct timeval end_tv;
+	unsigned long long time;
+	unsigned long long entries;
+	unsigned long long overruns;
+	unsigned long missed = 0;
+	unsigned long hit = 0;
+	unsigned long avg;
+	int cnt = 0;
+
+	/*
+	 * Hammer the buffer for 10 secs (this may
+	 * make the system stall)
+	 */
+	pr_info("Starting ring buffer hammer\n");
+	do_gettimeofday(&start_tv);
+	do {
+		struct ring_buffer_event *event;
+		int *entry;
+
+		event = ring_buffer_lock_reserve(buffer, 10);
+		if (!event) {
+			missed++;
+		} else {
+			hit++;
+			entry = ring_buffer_event_data(event);
+			*entry = smp_processor_id();
+			ring_buffer_unlock_commit(buffer, event);
+		}
+		do_gettimeofday(&end_tv);
+
+		cnt++;
+		if (consumer && !(cnt % wakeup_interval))
+			wake_up_process(consumer);
+
+#ifndef CONFIG_PREEMPT
+		/*
+		 * If we are a non preempt kernel, the 10 second run will
+		 * stop everything while it runs. Instead, we will call
+		 * cond_resched and also add any time that was lost by a
+		 * rescedule.
+		 *
+		 * Do a cond resched at the same frequency we would wake up
+		 * the reader.
+		 */
+		if (cnt % wakeup_interval)
+			cond_resched();
+#endif
+
+	} while (end_tv.tv_sec < (start_tv.tv_sec + RUN_TIME) && !kill_test);
+	pr_info("End ring buffer hammer\n");
+
+	if (consumer) {
+		/* Init both completions here to avoid races */
+		init_completion(&read_start);
+		init_completion(&read_done);
+		/* the completions must be visible before the finish var */
+		smp_wmb();
+		reader_finish = 1;
+		/* finish var visible before waking up the consumer */
+		smp_wmb();
+		wake_up_process(consumer);
+		wait_for_completion(&read_done);
+	}
+
+	time = end_tv.tv_sec - start_tv.tv_sec;
+	time *= USEC_PER_SEC;
+	time += (long long)((long)end_tv.tv_usec - (long)start_tv.tv_usec);
+
+	entries = ring_buffer_entries(buffer);
+	overruns = ring_buffer_overruns(buffer);
+
+	if (kill_test)
+		pr_info("ERROR!\n");
+	pr_info("Time:     %lld (usecs)\n", time);
+	pr_info("Overruns: %lld\n", overruns);
+	if (disable_reader)
+		pr_info("Read:     (reader disabled)\n");
+	else
+		pr_info("Read:     %ld  (by %s)\n", read,
+			read_events ? "events" : "pages");
+	pr_info("Entries:  %lld\n", entries);
+	pr_info("Total:    %lld\n", entries + overruns + read);
+	pr_info("Missed:   %ld\n", missed);
+	pr_info("Hit:      %ld\n", hit);
+
+	/* Convert time from usecs to millisecs */
+	do_div(time, USEC_PER_MSEC);
+	if (time)
+		hit /= (long)time;
+	else
+		pr_info("TIME IS ZERO??\n");
+
+	pr_info("Entries per millisec: %ld\n", hit);
+
+	if (hit) {
+		/* Calculate the average time in nanosecs */
+		avg = NSEC_PER_MSEC / hit;
+		pr_info("%ld ns per entry\n", avg);
+	}
+
+	if (missed) {
+		if (time)
+			missed /= (long)time;
+
+		pr_info("Total iterations per millisec: %ld\n", hit + missed);
+
+		/* it is possible that hit + missed will overflow and be zero */
+		if (!(hit + missed)) {
+			pr_info("hit + missed overflowed and totalled zero!\n");
+			hit--; /* make it non zero */
+		}
+
+		/* Caculate the average time in nanosecs */
+		avg = NSEC_PER_MSEC / (hit + missed);
+		pr_info("%ld ns per entry\n", avg);
+	}
+}
+
+static void wait_to_die(void)
+{
+	set_current_state(TASK_INTERRUPTIBLE);
+	while (!kthread_should_stop()) {
+		schedule();
+		set_current_state(TASK_INTERRUPTIBLE);
+	}
+	__set_current_state(TASK_RUNNING);
+}
+
+static int ring_buffer_consumer_thread(void *arg)
+{
+	while (!kthread_should_stop() && !kill_test) {
+		complete(&read_start);
+
+		ring_buffer_consumer();
+
+		set_current_state(TASK_INTERRUPTIBLE);
+		if (kthread_should_stop() || kill_test)
+			break;
+
+		schedule();
+		__set_current_state(TASK_RUNNING);
+	}
+	__set_current_state(TASK_RUNNING);
+
+	if (kill_test)
+		wait_to_die();
+
+	return 0;
+}
+
+static int ring_buffer_producer_thread(void *arg)
+{
+	init_completion(&read_start);
+
+	while (!kthread_should_stop() && !kill_test) {
+		ring_buffer_reset(buffer);
+
+		if (consumer) {
+			smp_wmb();
+			wake_up_process(consumer);
+			wait_for_completion(&read_start);
+		}
+
+		ring_buffer_producer();
+
+		pr_info("Sleeping for 10 secs\n");
+		set_current_state(TASK_INTERRUPTIBLE);
+		schedule_timeout(HZ * SLEEP_TIME);
+		__set_current_state(TASK_RUNNING);
+	}
+
+	if (kill_test)
+		wait_to_die();
+
+	return 0;
+}
+
+static int __init ring_buffer_benchmark_init(void)
+{
+	int ret;
+
+	/* make a one meg buffer in overwite mode */
+	buffer = ring_buffer_alloc(1000000, RB_FL_OVERWRITE);
+	if (!buffer)
+		return -ENOMEM;
+
+	if (!disable_reader) {
+		consumer = kthread_create(ring_buffer_consumer_thread,
+					  NULL, "rb_consumer");
+		ret = PTR_ERR(consumer);
+		if (IS_ERR(consumer))
+			goto out_fail;
+	}
+
+	producer = kthread_run(ring_buffer_producer_thread,
+			       NULL, "rb_producer");
+	ret = PTR_ERR(producer);
+
+	if (IS_ERR(producer))
+		goto out_kill;
+
+	return 0;
+
+ out_kill:
+	if (consumer)
+		kthread_stop(consumer);
+
+ out_fail:
+	ring_buffer_free(buffer);
+	return ret;
+}
+
+static void __exit ring_buffer_benchmark_exit(void)
+{
+	kthread_stop(producer);
+	if (consumer)
+		kthread_stop(consumer);
+	ring_buffer_free(buffer);
+}
+
+module_init(ring_buffer_benchmark_init);
+module_exit(ring_buffer_benchmark_exit);
+
+MODULE_AUTHOR("Steven Rostedt");
+MODULE_DESCRIPTION("ring_buffer_benchmark");
+MODULE_LICENSE("GPL");
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index cda81ec..8acd9b81 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -171,6 +171,13 @@
 
 static DEFINE_PER_CPU(struct trace_array_cpu, global_trace_cpu);
 
+int filter_current_check_discard(struct ftrace_event_call *call, void *rec,
+				 struct ring_buffer_event *event)
+{
+	return filter_check_discard(call, rec, global_trace.buffer, event);
+}
+EXPORT_SYMBOL_GPL(filter_current_check_discard);
+
 cycle_t ftrace_now(int cpu)
 {
 	u64 ts;
@@ -255,7 +262,8 @@
 
 /* trace_flags holds trace_options default values */
 unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
-	TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME;
+	TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME |
+	TRACE_ITER_GRAPH_TIME;
 
 /**
  * trace_wake_up - wake up tasks waiting for trace input
@@ -317,6 +325,7 @@
 	"latency-format",
 	"global-clock",
 	"sleep-time",
+	"graph-time",
 	NULL
 };
 
@@ -402,17 +411,6 @@
 	return cnt;
 }
 
-static void
-trace_print_seq(struct seq_file *m, struct trace_seq *s)
-{
-	int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len;
-
-	s->buffer[len] = 0;
-	seq_puts(m, s->buffer);
-
-	trace_seq_init(s);
-}
-
 /**
  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
  * @tr: tracer
@@ -641,6 +639,16 @@
 		tracing_reset(tr, cpu);
 }
 
+void tracing_reset_current(int cpu)
+{
+	tracing_reset(&global_trace, cpu);
+}
+
+void tracing_reset_current_online_cpus(void)
+{
+	tracing_reset_online_cpus(&global_trace);
+}
+
 #define SAVED_CMDLINES 128
 #define NO_CMDLINE_MAP UINT_MAX
 static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
@@ -800,6 +808,7 @@
 		return;
 	}
 
+	preempt_disable();
 	__raw_spin_lock(&trace_cmdline_lock);
 	map = map_pid_to_cmdline[pid];
 	if (map != NO_CMDLINE_MAP)
@@ -808,6 +817,7 @@
 		strcpy(comm, "<...>");
 
 	__raw_spin_unlock(&trace_cmdline_lock);
+	preempt_enable();
 }
 
 void tracing_record_cmdline(struct task_struct *tsk)
@@ -840,7 +850,7 @@
 }
 
 struct ring_buffer_event *trace_buffer_lock_reserve(struct trace_array *tr,
-						    unsigned char type,
+						    int type,
 						    unsigned long len,
 						    unsigned long flags, int pc)
 {
@@ -883,30 +893,40 @@
 }
 
 struct ring_buffer_event *
-trace_current_buffer_lock_reserve(unsigned char type, unsigned long len,
+trace_current_buffer_lock_reserve(int type, unsigned long len,
 				  unsigned long flags, int pc)
 {
 	return trace_buffer_lock_reserve(&global_trace,
 					 type, len, flags, pc);
 }
+EXPORT_SYMBOL_GPL(trace_current_buffer_lock_reserve);
 
 void trace_current_buffer_unlock_commit(struct ring_buffer_event *event,
 					unsigned long flags, int pc)
 {
-	return __trace_buffer_unlock_commit(&global_trace, event, flags, pc, 1);
+	__trace_buffer_unlock_commit(&global_trace, event, flags, pc, 1);
 }
+EXPORT_SYMBOL_GPL(trace_current_buffer_unlock_commit);
 
 void trace_nowake_buffer_unlock_commit(struct ring_buffer_event *event,
 					unsigned long flags, int pc)
 {
-	return __trace_buffer_unlock_commit(&global_trace, event, flags, pc, 0);
+	__trace_buffer_unlock_commit(&global_trace, event, flags, pc, 0);
 }
+EXPORT_SYMBOL_GPL(trace_nowake_buffer_unlock_commit);
+
+void trace_current_buffer_discard_commit(struct ring_buffer_event *event)
+{
+	ring_buffer_discard_commit(global_trace.buffer, event);
+}
+EXPORT_SYMBOL_GPL(trace_current_buffer_discard_commit);
 
 void
 trace_function(struct trace_array *tr,
 	       unsigned long ip, unsigned long parent_ip, unsigned long flags,
 	       int pc)
 {
+	struct ftrace_event_call *call = &event_function;
 	struct ring_buffer_event *event;
 	struct ftrace_entry *entry;
 
@@ -921,7 +941,9 @@
 	entry	= ring_buffer_event_data(event);
 	entry->ip			= ip;
 	entry->parent_ip		= parent_ip;
-	ring_buffer_unlock_commit(tr->buffer, event);
+
+	if (!filter_check_discard(call, entry, tr->buffer, event))
+		ring_buffer_unlock_commit(tr->buffer, event);
 }
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
@@ -930,6 +952,7 @@
 				unsigned long flags,
 				int pc)
 {
+	struct ftrace_event_call *call = &event_funcgraph_entry;
 	struct ring_buffer_event *event;
 	struct ftrace_graph_ent_entry *entry;
 
@@ -942,7 +965,8 @@
 		return 0;
 	entry	= ring_buffer_event_data(event);
 	entry->graph_ent			= *trace;
-	ring_buffer_unlock_commit(global_trace.buffer, event);
+	if (!filter_current_check_discard(call, entry, event))
+		ring_buffer_unlock_commit(global_trace.buffer, event);
 
 	return 1;
 }
@@ -952,6 +976,7 @@
 				unsigned long flags,
 				int pc)
 {
+	struct ftrace_event_call *call = &event_funcgraph_exit;
 	struct ring_buffer_event *event;
 	struct ftrace_graph_ret_entry *entry;
 
@@ -964,7 +989,8 @@
 		return;
 	entry	= ring_buffer_event_data(event);
 	entry->ret				= *trace;
-	ring_buffer_unlock_commit(global_trace.buffer, event);
+	if (!filter_current_check_discard(call, entry, event))
+		ring_buffer_unlock_commit(global_trace.buffer, event);
 }
 #endif
 
@@ -982,6 +1008,7 @@
 				 int skip, int pc)
 {
 #ifdef CONFIG_STACKTRACE
+	struct ftrace_event_call *call = &event_kernel_stack;
 	struct ring_buffer_event *event;
 	struct stack_entry *entry;
 	struct stack_trace trace;
@@ -999,7 +1026,8 @@
 	trace.entries		= entry->caller;
 
 	save_stack_trace(&trace);
-	ring_buffer_unlock_commit(tr->buffer, event);
+	if (!filter_check_discard(call, entry, tr->buffer, event))
+		ring_buffer_unlock_commit(tr->buffer, event);
 #endif
 }
 
@@ -1024,6 +1052,7 @@
 				   unsigned long flags, int pc)
 {
 #ifdef CONFIG_STACKTRACE
+	struct ftrace_event_call *call = &event_user_stack;
 	struct ring_buffer_event *event;
 	struct userstack_entry *entry;
 	struct stack_trace trace;
@@ -1045,7 +1074,8 @@
 	trace.entries		= entry->caller;
 
 	save_stack_trace_user(&trace);
-	ring_buffer_unlock_commit(tr->buffer, event);
+	if (!filter_check_discard(call, entry, tr->buffer, event))
+		ring_buffer_unlock_commit(tr->buffer, event);
 #endif
 }
 
@@ -1089,6 +1119,7 @@
 			   struct task_struct *next,
 			   unsigned long flags, int pc)
 {
+	struct ftrace_event_call *call = &event_context_switch;
 	struct ring_buffer_event *event;
 	struct ctx_switch_entry *entry;
 
@@ -1104,7 +1135,9 @@
 	entry->next_prio		= next->prio;
 	entry->next_state		= next->state;
 	entry->next_cpu	= task_cpu(next);
-	trace_buffer_unlock_commit(tr, event, flags, pc);
+
+	if (!filter_check_discard(call, entry, tr->buffer, event))
+		trace_buffer_unlock_commit(tr, event, flags, pc);
 }
 
 void
@@ -1113,6 +1146,7 @@
 			   struct task_struct *curr,
 			   unsigned long flags, int pc)
 {
+	struct ftrace_event_call *call = &event_wakeup;
 	struct ring_buffer_event *event;
 	struct ctx_switch_entry *entry;
 
@@ -1129,7 +1163,8 @@
 	entry->next_state		= wakee->state;
 	entry->next_cpu			= task_cpu(wakee);
 
-	ring_buffer_unlock_commit(tr->buffer, event);
+	if (!filter_check_discard(call, entry, tr->buffer, event))
+		ring_buffer_unlock_commit(tr->buffer, event);
 	ftrace_trace_stack(tr, flags, 6, pc);
 	ftrace_trace_userstack(tr, flags, pc);
 }
@@ -1230,11 +1265,13 @@
 		(raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
 	static u32 trace_buf[TRACE_BUF_SIZE];
 
+	struct ftrace_event_call *call = &event_bprint;
 	struct ring_buffer_event *event;
 	struct trace_array *tr = &global_trace;
 	struct trace_array_cpu *data;
 	struct bprint_entry *entry;
 	unsigned long flags;
+	int disable;
 	int resched;
 	int cpu, len = 0, size, pc;
 
@@ -1249,7 +1286,8 @@
 	cpu = raw_smp_processor_id();
 	data = tr->data[cpu];
 
-	if (unlikely(atomic_read(&data->disabled)))
+	disable = atomic_inc_return(&data->disabled);
+	if (unlikely(disable != 1))
 		goto out;
 
 	/* Lockdep uses trace_printk for lock tracing */
@@ -1269,13 +1307,15 @@
 	entry->fmt			= fmt;
 
 	memcpy(entry->buf, trace_buf, sizeof(u32) * len);
-	ring_buffer_unlock_commit(tr->buffer, event);
+	if (!filter_check_discard(call, entry, tr->buffer, event))
+		ring_buffer_unlock_commit(tr->buffer, event);
 
 out_unlock:
 	__raw_spin_unlock(&trace_buf_lock);
 	local_irq_restore(flags);
 
 out:
+	atomic_dec_return(&data->disabled);
 	ftrace_preempt_enable(resched);
 	unpause_graph_tracing();
 
@@ -1288,12 +1328,14 @@
 	static raw_spinlock_t trace_buf_lock = __RAW_SPIN_LOCK_UNLOCKED;
 	static char trace_buf[TRACE_BUF_SIZE];
 
+	struct ftrace_event_call *call = &event_print;
 	struct ring_buffer_event *event;
 	struct trace_array *tr = &global_trace;
 	struct trace_array_cpu *data;
 	int cpu, len = 0, size, pc;
 	struct print_entry *entry;
 	unsigned long irq_flags;
+	int disable;
 
 	if (tracing_disabled || tracing_selftest_running)
 		return 0;
@@ -1303,7 +1345,8 @@
 	cpu = raw_smp_processor_id();
 	data = tr->data[cpu];
 
-	if (unlikely(atomic_read(&data->disabled)))
+	disable = atomic_inc_return(&data->disabled);
+	if (unlikely(disable != 1))
 		goto out;
 
 	pause_graph_tracing();
@@ -1323,13 +1366,15 @@
 
 	memcpy(&entry->buf, trace_buf, len);
 	entry->buf[len] = 0;
-	ring_buffer_unlock_commit(tr->buffer, event);
+	if (!filter_check_discard(call, entry, tr->buffer, event))
+		ring_buffer_unlock_commit(tr->buffer, event);
 
  out_unlock:
 	__raw_spin_unlock(&trace_buf_lock);
 	raw_local_irq_restore(irq_flags);
 	unpause_graph_tracing();
  out:
+	atomic_dec_return(&data->disabled);
 	preempt_enable_notrace();
 
 	return len;
@@ -1526,12 +1571,14 @@
 		p = s_next(m, p, &l);
 	}
 
+	trace_event_read_lock();
 	return p;
 }
 
 static void s_stop(struct seq_file *m, void *p)
 {
 	atomic_dec(&trace_record_cmdline_disabled);
+	trace_event_read_unlock();
 }
 
 static void print_lat_help_header(struct seq_file *m)
@@ -1774,6 +1821,7 @@
 	return 1;
 }
 
+/*  Called with trace_event_read_lock() held. */
 static enum print_line_t print_trace_line(struct trace_iterator *iter)
 {
 	enum print_line_t ret;
@@ -2397,6 +2445,56 @@
 };
 
 static ssize_t
+tracing_saved_cmdlines_read(struct file *file, char __user *ubuf,
+				size_t cnt, loff_t *ppos)
+{
+	char *buf_comm;
+	char *file_buf;
+	char *buf;
+	int len = 0;
+	int pid;
+	int i;
+
+	file_buf = kmalloc(SAVED_CMDLINES*(16+TASK_COMM_LEN), GFP_KERNEL);
+	if (!file_buf)
+		return -ENOMEM;
+
+	buf_comm = kmalloc(TASK_COMM_LEN, GFP_KERNEL);
+	if (!buf_comm) {
+		kfree(file_buf);
+		return -ENOMEM;
+	}
+
+	buf = file_buf;
+
+	for (i = 0; i < SAVED_CMDLINES; i++) {
+		int r;
+
+		pid = map_cmdline_to_pid[i];
+		if (pid == -1 || pid == NO_CMDLINE_MAP)
+			continue;
+
+		trace_find_cmdline(pid, buf_comm);
+		r = sprintf(buf, "%d %s\n", pid, buf_comm);
+		buf += r;
+		len += r;
+	}
+
+	len = simple_read_from_buffer(ubuf, cnt, ppos,
+				      file_buf, len);
+
+	kfree(file_buf);
+	kfree(buf_comm);
+
+	return len;
+}
+
+static const struct file_operations tracing_saved_cmdlines_fops = {
+    .open       = tracing_open_generic,
+    .read       = tracing_saved_cmdlines_read,
+};
+
+static ssize_t
 tracing_ctrl_read(struct file *filp, char __user *ubuf,
 		  size_t cnt, loff_t *ppos)
 {
@@ -2728,6 +2826,9 @@
 	/* trace pipe does not show start of buffer */
 	cpumask_setall(iter->started);
 
+	if (trace_flags & TRACE_ITER_LATENCY_FMT)
+		iter->iter_flags |= TRACE_FILE_LAT_FMT;
+
 	iter->cpu_file = cpu_file;
 	iter->tr = &global_trace;
 	mutex_init(&iter->mutex);
@@ -2915,6 +3016,7 @@
 	       offsetof(struct trace_iterator, seq));
 	iter->pos = -1;
 
+	trace_event_read_lock();
 	while (find_next_entry_inc(iter) != NULL) {
 		enum print_line_t ret;
 		int len = iter->seq.len;
@@ -2931,6 +3033,7 @@
 		if (iter->seq.len >= cnt)
 			break;
 	}
+	trace_event_read_unlock();
 
 	/* Now copy what we have to the user */
 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
@@ -3053,6 +3156,8 @@
 		goto out_err;
 	}
 
+	trace_event_read_lock();
+
 	/* Fill as many pages as possible. */
 	for (i = 0, rem = len; i < PIPE_BUFFERS && rem; i++) {
 		pages[i] = alloc_page(GFP_KERNEL);
@@ -3075,6 +3180,7 @@
 		trace_seq_init(&iter->seq);
 	}
 
+	trace_event_read_unlock();
 	mutex_unlock(&iter->mutex);
 
 	spd.nr_pages = i;
@@ -3425,7 +3531,7 @@
 		.spd_release	= buffer_spd_release,
 	};
 	struct buffer_ref *ref;
-	int size, i;
+	int entries, size, i;
 	size_t ret;
 
 	if (*ppos & (PAGE_SIZE - 1)) {
@@ -3440,7 +3546,9 @@
 		len &= PAGE_MASK;
 	}
 
-	for (i = 0; i < PIPE_BUFFERS && len; i++, len -= PAGE_SIZE) {
+	entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu);
+
+	for (i = 0; i < PIPE_BUFFERS && len && entries; i++, len -= PAGE_SIZE) {
 		struct page *page;
 		int r;
 
@@ -3457,7 +3565,7 @@
 		}
 
 		r = ring_buffer_read_page(ref->buffer, &ref->page,
-					  len, info->cpu, 0);
+					  len, info->cpu, 1);
 		if (r < 0) {
 			ring_buffer_free_read_page(ref->buffer,
 						   ref->page);
@@ -3481,6 +3589,8 @@
 		spd.partial[i].private = (unsigned long)ref;
 		spd.nr_pages++;
 		*ppos += PAGE_SIZE;
+
+		entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu);
 	}
 
 	spd.nr_pages = i;
@@ -3508,6 +3618,45 @@
 	.llseek		= no_llseek,
 };
 
+static ssize_t
+tracing_stats_read(struct file *filp, char __user *ubuf,
+		   size_t count, loff_t *ppos)
+{
+	unsigned long cpu = (unsigned long)filp->private_data;
+	struct trace_array *tr = &global_trace;
+	struct trace_seq *s;
+	unsigned long cnt;
+
+	s = kmalloc(sizeof(*s), GFP_ATOMIC);
+	if (!s)
+		return ENOMEM;
+
+	trace_seq_init(s);
+
+	cnt = ring_buffer_entries_cpu(tr->buffer, cpu);
+	trace_seq_printf(s, "entries: %ld\n", cnt);
+
+	cnt = ring_buffer_overrun_cpu(tr->buffer, cpu);
+	trace_seq_printf(s, "overrun: %ld\n", cnt);
+
+	cnt = ring_buffer_commit_overrun_cpu(tr->buffer, cpu);
+	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
+
+	cnt = ring_buffer_nmi_dropped_cpu(tr->buffer, cpu);
+	trace_seq_printf(s, "nmi dropped: %ld\n", cnt);
+
+	count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len);
+
+	kfree(s);
+
+	return count;
+}
+
+static const struct file_operations tracing_stats_fops = {
+	.open		= tracing_open_generic,
+	.read		= tracing_stats_read,
+};
+
 #ifdef CONFIG_DYNAMIC_FTRACE
 
 int __weak ftrace_arch_read_dyn_info(char *buf, int size)
@@ -3597,7 +3746,7 @@
 static void tracing_init_debugfs_percpu(long cpu)
 {
 	struct dentry *d_percpu = tracing_dentry_percpu();
-	struct dentry *entry, *d_cpu;
+	struct dentry *d_cpu;
 	/* strlen(cpu) + MAX(log10(cpu)) + '\0' */
 	char cpu_dir[7];
 
@@ -3612,21 +3761,18 @@
 	}
 
 	/* per cpu trace_pipe */
-	entry = debugfs_create_file("trace_pipe", 0444, d_cpu,
-				(void *) cpu, &tracing_pipe_fops);
-	if (!entry)
-		pr_warning("Could not create debugfs 'trace_pipe' entry\n");
+	trace_create_file("trace_pipe", 0444, d_cpu,
+			(void *) cpu, &tracing_pipe_fops);
 
 	/* per cpu trace */
-	entry = debugfs_create_file("trace", 0644, d_cpu,
-				(void *) cpu, &tracing_fops);
-	if (!entry)
-		pr_warning("Could not create debugfs 'trace' entry\n");
+	trace_create_file("trace", 0644, d_cpu,
+			(void *) cpu, &tracing_fops);
 
-	entry = debugfs_create_file("trace_pipe_raw", 0444, d_cpu,
-				    (void *) cpu, &tracing_buffers_fops);
-	if (!entry)
-		pr_warning("Could not create debugfs 'trace_pipe_raw' entry\n");
+	trace_create_file("trace_pipe_raw", 0444, d_cpu,
+			(void *) cpu, &tracing_buffers_fops);
+
+	trace_create_file("stats", 0444, d_cpu,
+			(void *) cpu, &tracing_stats_fops);
 }
 
 #ifdef CONFIG_FTRACE_SELFTEST
@@ -3782,6 +3928,22 @@
 	.write = trace_options_core_write,
 };
 
+struct dentry *trace_create_file(const char *name,
+				 mode_t mode,
+				 struct dentry *parent,
+				 void *data,
+				 const struct file_operations *fops)
+{
+	struct dentry *ret;
+
+	ret = debugfs_create_file(name, mode, parent, data, fops);
+	if (!ret)
+		pr_warning("Could not create debugfs '%s' entry\n", name);
+
+	return ret;
+}
+
+
 static struct dentry *trace_options_init_dentry(void)
 {
 	struct dentry *d_tracer;
@@ -3809,7 +3971,6 @@
 			 struct tracer_opt *opt)
 {
 	struct dentry *t_options;
-	struct dentry *entry;
 
 	t_options = trace_options_init_dentry();
 	if (!t_options)
@@ -3818,11 +3979,9 @@
 	topt->flags = flags;
 	topt->opt = opt;
 
-	entry = debugfs_create_file(opt->name, 0644, t_options, topt,
+	topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
 				    &trace_options_fops);
 
-	topt->entry = entry;
-
 }
 
 static struct trace_option_dentry *
@@ -3877,123 +4036,84 @@
 create_trace_option_core_file(const char *option, long index)
 {
 	struct dentry *t_options;
-	struct dentry *entry;
 
 	t_options = trace_options_init_dentry();
 	if (!t_options)
 		return NULL;
 
-	entry = debugfs_create_file(option, 0644, t_options, (void *)index,
+	return trace_create_file(option, 0644, t_options, (void *)index,
 				    &trace_options_core_fops);
-
-	return entry;
 }
 
 static __init void create_trace_options_dir(void)
 {
 	struct dentry *t_options;
-	struct dentry *entry;
 	int i;
 
 	t_options = trace_options_init_dentry();
 	if (!t_options)
 		return;
 
-	for (i = 0; trace_options[i]; i++) {
-		entry = create_trace_option_core_file(trace_options[i], i);
-		if (!entry)
-			pr_warning("Could not create debugfs %s entry\n",
-				   trace_options[i]);
-	}
+	for (i = 0; trace_options[i]; i++)
+		create_trace_option_core_file(trace_options[i], i);
 }
 
 static __init int tracer_init_debugfs(void)
 {
 	struct dentry *d_tracer;
-	struct dentry *entry;
 	int cpu;
 
 	d_tracer = tracing_init_dentry();
 
-	entry = debugfs_create_file("tracing_enabled", 0644, d_tracer,
-				    &global_trace, &tracing_ctrl_fops);
-	if (!entry)
-		pr_warning("Could not create debugfs 'tracing_enabled' entry\n");
+	trace_create_file("tracing_enabled", 0644, d_tracer,
+			&global_trace, &tracing_ctrl_fops);
 
-	entry = debugfs_create_file("trace_options", 0644, d_tracer,
-				    NULL, &tracing_iter_fops);
-	if (!entry)
-		pr_warning("Could not create debugfs 'trace_options' entry\n");
+	trace_create_file("trace_options", 0644, d_tracer,
+			NULL, &tracing_iter_fops);
 
-	create_trace_options_dir();
+	trace_create_file("tracing_cpumask", 0644, d_tracer,
+			NULL, &tracing_cpumask_fops);
 
-	entry = debugfs_create_file("tracing_cpumask", 0644, d_tracer,
-				    NULL, &tracing_cpumask_fops);
-	if (!entry)
-		pr_warning("Could not create debugfs 'tracing_cpumask' entry\n");
+	trace_create_file("trace", 0644, d_tracer,
+			(void *) TRACE_PIPE_ALL_CPU, &tracing_fops);
 
-	entry = debugfs_create_file("trace", 0644, d_tracer,
-				 (void *) TRACE_PIPE_ALL_CPU, &tracing_fops);
-	if (!entry)
-		pr_warning("Could not create debugfs 'trace' entry\n");
+	trace_create_file("available_tracers", 0444, d_tracer,
+			&global_trace, &show_traces_fops);
 
-	entry = debugfs_create_file("available_tracers", 0444, d_tracer,
-				    &global_trace, &show_traces_fops);
-	if (!entry)
-		pr_warning("Could not create debugfs 'available_tracers' entry\n");
+	trace_create_file("current_tracer", 0644, d_tracer,
+			&global_trace, &set_tracer_fops);
 
-	entry = debugfs_create_file("current_tracer", 0444, d_tracer,
-				    &global_trace, &set_tracer_fops);
-	if (!entry)
-		pr_warning("Could not create debugfs 'current_tracer' entry\n");
+	trace_create_file("tracing_max_latency", 0644, d_tracer,
+			&tracing_max_latency, &tracing_max_lat_fops);
 
-	entry = debugfs_create_file("tracing_max_latency", 0644, d_tracer,
-				    &tracing_max_latency,
-				    &tracing_max_lat_fops);
-	if (!entry)
-		pr_warning("Could not create debugfs "
-			   "'tracing_max_latency' entry\n");
+	trace_create_file("tracing_thresh", 0644, d_tracer,
+			&tracing_thresh, &tracing_max_lat_fops);
 
-	entry = debugfs_create_file("tracing_thresh", 0644, d_tracer,
-				    &tracing_thresh, &tracing_max_lat_fops);
-	if (!entry)
-		pr_warning("Could not create debugfs "
-			   "'tracing_thresh' entry\n");
-	entry = debugfs_create_file("README", 0644, d_tracer,
-				    NULL, &tracing_readme_fops);
-	if (!entry)
-		pr_warning("Could not create debugfs 'README' entry\n");
+	trace_create_file("README", 0444, d_tracer,
+			NULL, &tracing_readme_fops);
 
-	entry = debugfs_create_file("trace_pipe", 0444, d_tracer,
+	trace_create_file("trace_pipe", 0444, d_tracer,
 			(void *) TRACE_PIPE_ALL_CPU, &tracing_pipe_fops);
-	if (!entry)
-		pr_warning("Could not create debugfs "
-			   "'trace_pipe' entry\n");
 
-	entry = debugfs_create_file("buffer_size_kb", 0644, d_tracer,
-				    &global_trace, &tracing_entries_fops);
-	if (!entry)
-		pr_warning("Could not create debugfs "
-			   "'buffer_size_kb' entry\n");
+	trace_create_file("buffer_size_kb", 0644, d_tracer,
+			&global_trace, &tracing_entries_fops);
 
-	entry = debugfs_create_file("trace_marker", 0220, d_tracer,
-				    NULL, &tracing_mark_fops);
-	if (!entry)
-		pr_warning("Could not create debugfs "
-			   "'trace_marker' entry\n");
+	trace_create_file("trace_marker", 0220, d_tracer,
+			NULL, &tracing_mark_fops);
+
+	trace_create_file("saved_cmdlines", 0444, d_tracer,
+			NULL, &tracing_saved_cmdlines_fops);
 
 #ifdef CONFIG_DYNAMIC_FTRACE
-	entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer,
-				    &ftrace_update_tot_cnt,
-				    &tracing_dyn_info_fops);
-	if (!entry)
-		pr_warning("Could not create debugfs "
-			   "'dyn_ftrace_total_info' entry\n");
+	trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
+			&ftrace_update_tot_cnt, &tracing_dyn_info_fops);
 #endif
 #ifdef CONFIG_SYSPROF_TRACER
 	init_tracer_sysprof_debugfs(d_tracer);
 #endif
 
+	create_trace_options_dir();
+
 	for_each_tracing_cpu(cpu)
 		tracing_init_debugfs_percpu(cpu);
 
@@ -4064,7 +4184,8 @@
 
 static void __ftrace_dump(bool disable_tracing)
 {
-	static DEFINE_SPINLOCK(ftrace_dump_lock);
+	static raw_spinlock_t ftrace_dump_lock =
+		(raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
 	/* use static because iter can be a bit big for the stack */
 	static struct trace_iterator iter;
 	unsigned int old_userobj;
@@ -4073,7 +4194,8 @@
 	int cnt = 0, cpu;
 
 	/* only one dump */
-	spin_lock_irqsave(&ftrace_dump_lock, flags);
+	local_irq_save(flags);
+	__raw_spin_lock(&ftrace_dump_lock);
 	if (dump_ran)
 		goto out;
 
@@ -4145,7 +4267,8 @@
 	}
 
  out:
-	spin_unlock_irqrestore(&ftrace_dump_lock, flags);
+	__raw_spin_unlock(&ftrace_dump_lock);
+	local_irq_restore(flags);
 }
 
 /* By default: disable tracing after the dump */
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index e685ac2..6e735d4 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -9,9 +9,12 @@
 #include <linux/mmiotrace.h>
 #include <linux/ftrace.h>
 #include <trace/boot.h>
-#include <trace/kmemtrace.h>
+#include <linux/kmemtrace.h>
 #include <trace/power.h>
 
+#include <linux/trace_seq.h>
+#include <linux/ftrace_event.h>
+
 enum trace_type {
 	__TRACE_FIRST_TYPE = 0,
 
@@ -42,20 +45,6 @@
 };
 
 /*
- * The trace entry - the most basic unit of tracing. This is what
- * is printed in the end as a single line in the trace output, such as:
- *
- *     bash-15816 [01]   235.197585: idle_cpu <- irq_enter
- */
-struct trace_entry {
-	unsigned char		type;
-	unsigned char		flags;
-	unsigned char		preempt_count;
-	int			pid;
-	int			tgid;
-};
-
-/*
  * Function trace entry - function address and parent function addres:
  */
 struct ftrace_entry {
@@ -263,8 +252,6 @@
 	char			comm[TASK_COMM_LEN];
 };
 
-struct trace_iterator;
-
 /*
  * The trace array - an array of per-CPU trace arrays. This is the
  * highest level data structure that individual tracers deal with.
@@ -339,15 +326,6 @@
 		__ftrace_bad_type();					\
 	} while (0)
 
-/* Return values for print_line callback */
-enum print_line_t {
-	TRACE_TYPE_PARTIAL_LINE	= 0,	/* Retry after flushing the seq */
-	TRACE_TYPE_HANDLED	= 1,
-	TRACE_TYPE_UNHANDLED	= 2,	/* Relay to other output functions */
-	TRACE_TYPE_NO_CONSUME	= 3	/* Handled but ask to not consume */
-};
-
-
 /*
  * An option specific to a tracer. This is a boolean value.
  * The bit is the bit index that sets its value on the
@@ -423,60 +401,30 @@
 	struct tracer_stat	*stats;
 };
 
-struct trace_seq {
-	unsigned char		buffer[PAGE_SIZE];
-	unsigned int		len;
-	unsigned int		readpos;
-};
-
-static inline void
-trace_seq_init(struct trace_seq *s)
-{
-	s->len = 0;
-	s->readpos = 0;
-}
-
 
 #define TRACE_PIPE_ALL_CPU	-1
 
-/*
- * Trace iterator - used by printout routines who present trace
- * results to users and which routines might sleep, etc:
- */
-struct trace_iterator {
-	struct trace_array	*tr;
-	struct tracer		*trace;
-	void			*private;
-	int			cpu_file;
-	struct mutex		mutex;
-	struct ring_buffer_iter	*buffer_iter[NR_CPUS];
-
-	/* The below is zeroed out in pipe_read */
-	struct trace_seq	seq;
-	struct trace_entry	*ent;
-	int			cpu;
-	u64			ts;
-
-	unsigned long		iter_flags;
-	loff_t			pos;
-	long			idx;
-
-	cpumask_var_t		started;
-};
-
 int tracer_init(struct tracer *t, struct trace_array *tr);
 int tracing_is_enabled(void);
 void trace_wake_up(void);
 void tracing_reset(struct trace_array *tr, int cpu);
 void tracing_reset_online_cpus(struct trace_array *tr);
+void tracing_reset_current(int cpu);
+void tracing_reset_current_online_cpus(void);
 int tracing_open_generic(struct inode *inode, struct file *filp);
+struct dentry *trace_create_file(const char *name,
+				 mode_t mode,
+				 struct dentry *parent,
+				 void *data,
+				 const struct file_operations *fops);
+
 struct dentry *tracing_init_dentry(void);
 void init_tracer_sysprof_debugfs(struct dentry *d_tracer);
 
 struct ring_buffer_event;
 
 struct ring_buffer_event *trace_buffer_lock_reserve(struct trace_array *tr,
-						    unsigned char type,
+						    int type,
 						    unsigned long len,
 						    unsigned long flags,
 						    int pc);
@@ -484,14 +432,6 @@
 				struct ring_buffer_event *event,
 				unsigned long flags, int pc);
 
-struct ring_buffer_event *
-trace_current_buffer_lock_reserve(unsigned char type, unsigned long len,
-				  unsigned long flags, int pc);
-void trace_current_buffer_unlock_commit(struct ring_buffer_event *event,
-					unsigned long flags, int pc);
-void trace_nowake_buffer_unlock_commit(struct ring_buffer_event *event,
-					unsigned long flags, int pc);
-
 struct trace_entry *tracing_get_trace_entry(struct trace_array *tr,
 						struct trace_array_cpu *data);
 
@@ -514,7 +454,6 @@
 				struct task_struct *prev,
 				struct task_struct *next,
 				unsigned long flags, int pc);
-void tracing_record_cmdline(struct task_struct *tsk);
 
 void tracing_sched_wakeup_trace(struct trace_array *tr,
 				struct task_struct *wakee,
@@ -599,6 +538,8 @@
 					       struct trace_array *tr);
 extern int trace_selftest_startup_branch(struct tracer *trace,
 					 struct trace_array *tr);
+extern int trace_selftest_startup_hw_branches(struct tracer *trace,
+					      struct trace_array *tr);
 #endif /* CONFIG_FTRACE_STARTUP_TEST */
 
 extern void *head_page(struct trace_array_cpu *data);
@@ -613,6 +554,8 @@
 /* Standard output formatting function used for function return traces */
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 extern enum print_line_t print_graph_function(struct trace_iterator *iter);
+extern enum print_line_t
+trace_print_graph_duration(unsigned long long duration, struct trace_seq *s);
 
 #ifdef CONFIG_DYNAMIC_FTRACE
 /* TODO: make this variable */
@@ -644,7 +587,6 @@
 	return 1;
 }
 #endif /* CONFIG_DYNAMIC_FTRACE */
-
 #else /* CONFIG_FUNCTION_GRAPH_TRACER */
 static inline enum print_line_t
 print_graph_function(struct trace_iterator *iter)
@@ -692,6 +634,7 @@
 	TRACE_ITER_LATENCY_FMT		= 0x40000,
 	TRACE_ITER_GLOBAL_CLK		= 0x80000,
 	TRACE_ITER_SLEEP_TIME		= 0x100000,
+	TRACE_ITER_GRAPH_TIME		= 0x200000,
 };
 
 /*
@@ -790,103 +733,113 @@
 	char			*type;
 	int			offset;
 	int			size;
+	int			is_signed;
 };
 
-struct ftrace_event_call {
-	char			*name;
-	char			*system;
-	struct dentry		*dir;
-	int			enabled;
-	int			(*regfunc)(void);
-	void			(*unregfunc)(void);
-	int			id;
-	int			(*raw_init)(void);
-	int			(*show_format)(struct trace_seq *s);
-	int			(*define_fields)(void);
-	struct list_head	fields;
+struct event_filter {
+	int			n_preds;
 	struct filter_pred	**preds;
-
-#ifdef CONFIG_EVENT_PROFILE
-	atomic_t	profile_count;
-	int		(*profile_enable)(struct ftrace_event_call *);
-	void		(*profile_disable)(struct ftrace_event_call *);
-#endif
+	char			*filter_string;
 };
 
 struct event_subsystem {
 	struct list_head	list;
 	const char		*name;
 	struct dentry		*entry;
-	struct filter_pred	**preds;
+	void			*filter;
 };
 
-#define events_for_each(event)						\
-	for (event = __start_ftrace_events;				\
-	     (unsigned long)event < (unsigned long)__stop_ftrace_events; \
-	     event++)
-
-#define MAX_FILTER_PRED 8
-
 struct filter_pred;
 
-typedef int (*filter_pred_fn_t) (struct filter_pred *pred, void *event);
+typedef int (*filter_pred_fn_t) (struct filter_pred *pred, void *event,
+				 int val1, int val2);
 
 struct filter_pred {
 	filter_pred_fn_t fn;
 	u64 val;
-	char *str_val;
+	char str_val[MAX_FILTER_STR_VAL];
 	int str_len;
 	char *field_name;
 	int offset;
 	int not;
-	int or;
-	int compound;
-	int clear;
+	int op;
+	int pop_n;
 };
 
-int trace_define_field(struct ftrace_event_call *call, char *type,
-		       char *name, int offset, int size);
-extern void filter_free_pred(struct filter_pred *pred);
-extern void filter_print_preds(struct filter_pred **preds,
+extern void print_event_filter(struct ftrace_event_call *call,
 			       struct trace_seq *s);
-extern int filter_parse(char **pbuf, struct filter_pred *pred);
-extern int filter_add_pred(struct ftrace_event_call *call,
-			   struct filter_pred *pred);
-extern void filter_free_preds(struct ftrace_event_call *call);
-extern int filter_match_preds(struct ftrace_event_call *call, void *rec);
-extern void filter_free_subsystem_preds(struct event_subsystem *system);
-extern int filter_add_subsystem_pred(struct event_subsystem *system,
-				     struct filter_pred *pred);
+extern int apply_event_filter(struct ftrace_event_call *call,
+			      char *filter_string);
+extern int apply_subsystem_event_filter(struct event_subsystem *system,
+					char *filter_string);
+extern void print_subsystem_event_filter(struct event_subsystem *system,
+					 struct trace_seq *s);
 
-void event_trace_printk(unsigned long ip, const char *fmt, ...);
-extern struct ftrace_event_call __start_ftrace_events[];
-extern struct ftrace_event_call __stop_ftrace_events[];
+static inline int
+filter_check_discard(struct ftrace_event_call *call, void *rec,
+		     struct ring_buffer *buffer,
+		     struct ring_buffer_event *event)
+{
+	if (unlikely(call->filter_active) && !filter_match_preds(call, rec)) {
+		ring_buffer_discard_commit(buffer, event);
+		return 1;
+	}
 
-#define for_each_event(event)						\
-	for (event = __start_ftrace_events;				\
-	     (unsigned long)event < (unsigned long)__stop_ftrace_events; \
-	     event++)
+	return 0;
+}
+
+#define DEFINE_COMPARISON_PRED(type)					\
+static int filter_pred_##type(struct filter_pred *pred, void *event,	\
+			      int val1, int val2)			\
+{									\
+	type *addr = (type *)(event + pred->offset);			\
+	type val = (type)pred->val;					\
+	int match = 0;							\
+									\
+	switch (pred->op) {						\
+	case OP_LT:							\
+		match = (*addr < val);					\
+		break;							\
+	case OP_LE:							\
+		match = (*addr <= val);					\
+		break;							\
+	case OP_GT:							\
+		match = (*addr > val);					\
+		break;							\
+	case OP_GE:							\
+		match = (*addr >= val);					\
+		break;							\
+	default:							\
+		break;							\
+	}								\
+									\
+	return match;							\
+}
+
+#define DEFINE_EQUALITY_PRED(size)					\
+static int filter_pred_##size(struct filter_pred *pred, void *event,	\
+			      int val1, int val2)			\
+{									\
+	u##size *addr = (u##size *)(event + pred->offset);		\
+	u##size val = (u##size)pred->val;				\
+	int match;							\
+									\
+	match = (val == *addr) ^ pred->not;				\
+									\
+	return match;							\
+}
+
+extern struct mutex event_mutex;
+extern struct list_head ftrace_events;
 
 extern const char *__start___trace_bprintk_fmt[];
 extern const char *__stop___trace_bprintk_fmt[];
 
-/*
- * The double __builtin_constant_p is because gcc will give us an error
- * if we try to allocate the static variable to fmt if it is not a
- * constant. Even with the outer if statement optimizing out.
- */
-#define event_trace_printk(ip, fmt, args...)				\
-do {									\
-	__trace_printk_check_format(fmt, ##args);			\
-	tracing_record_cmdline(current);				\
-	if (__builtin_constant_p(fmt)) {				\
-		static const char *trace_printk_fmt			\
-		  __attribute__((section("__trace_printk_fmt"))) =	\
-			__builtin_constant_p(fmt) ? fmt : NULL;		\
-									\
-		__trace_bprintk(ip, trace_printk_fmt, ##args);		\
-	} else								\
-		__trace_printk(ip, fmt, ##args);			\
-} while (0)
+#undef TRACE_EVENT_FORMAT
+#define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt)	\
+	extern struct ftrace_event_call event_##call;
+#undef TRACE_EVENT_FORMAT_NOFILTER
+#define TRACE_EVENT_FORMAT_NOFILTER(call, proto, args, fmt, tstruct, tpfmt)
+#include "trace_event_types.h"
 
 #endif /* _LINUX_KERNEL_TRACE_H */
diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c
index 7a30fc4..a29ef23 100644
--- a/kernel/trace/trace_boot.c
+++ b/kernel/trace/trace_boot.c
@@ -9,6 +9,7 @@
 #include <linux/debugfs.h>
 #include <linux/ftrace.h>
 #include <linux/kallsyms.h>
+#include <linux/time.h>
 
 #include "trace.h"
 #include "trace_output.h"
@@ -67,7 +68,7 @@
 	trace_assign_type(field, entry);
 	call = &field->boot_call;
 	ts = iter->ts;
-	nsec_rem = do_div(ts, 1000000000);
+	nsec_rem = do_div(ts, NSEC_PER_SEC);
 
 	ret = trace_seq_printf(s, "[%5ld.%09ld] calling  %s @ %i\n",
 			(unsigned long)ts, nsec_rem, call->func, call->caller);
@@ -92,7 +93,7 @@
 	trace_assign_type(field, entry);
 	init_ret = &field->boot_ret;
 	ts = iter->ts;
-	nsec_rem = do_div(ts, 1000000000);
+	nsec_rem = do_div(ts, NSEC_PER_SEC);
 
 	ret = trace_seq_printf(s, "[%5ld.%09ld] initcall %s "
 			"returned %d after %llu msecs\n",
diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c
index 8333715..7a7a9fd 100644
--- a/kernel/trace/trace_branch.c
+++ b/kernel/trace/trace_branch.c
@@ -30,6 +30,7 @@
 static void
 probe_likely_condition(struct ftrace_branch_data *f, int val, int expect)
 {
+	struct ftrace_event_call *call = &event_branch;
 	struct trace_array *tr = branch_tracer;
 	struct ring_buffer_event *event;
 	struct trace_branch *entry;
@@ -73,7 +74,8 @@
 	entry->line = f->line;
 	entry->correct = val == expect;
 
-	ring_buffer_unlock_commit(tr->buffer, event);
+	if (!filter_check_discard(call, entry, tr->buffer, event))
+		ring_buffer_unlock_commit(tr->buffer, event);
 
  out:
 	atomic_dec(&tr->data[cpu]->disabled);
@@ -271,7 +273,7 @@
 	return 0;
 }
 
-static void *annotated_branch_stat_start(void)
+static void *annotated_branch_stat_start(struct tracer_stat *trace)
 {
 	return __start_annotated_branch_profile;
 }
@@ -346,7 +348,7 @@
 	return 0;
 }
 
-static void *all_branch_stat_start(void)
+static void *all_branch_stat_start(struct tracer_stat *trace)
 {
 	return __start_branch_profile;
 }
diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c
index 22cba99..5b5895a 100644
--- a/kernel/trace/trace_event_profile.c
+++ b/kernel/trace/trace_event_profile.c
@@ -10,22 +10,30 @@
 int ftrace_profile_enable(int event_id)
 {
 	struct ftrace_event_call *event;
+	int ret = -EINVAL;
 
-	for_each_event(event) {
-		if (event->id == event_id)
-			return event->profile_enable(event);
+	mutex_lock(&event_mutex);
+	list_for_each_entry(event, &ftrace_events, list) {
+		if (event->id == event_id) {
+			ret = event->profile_enable(event);
+			break;
+		}
 	}
+	mutex_unlock(&event_mutex);
 
-	return -EINVAL;
+	return ret;
 }
 
 void ftrace_profile_disable(int event_id)
 {
 	struct ftrace_event_call *event;
 
-	for_each_event(event) {
-		if (event->id == event_id)
-			return event->profile_disable(event);
+	mutex_lock(&event_mutex);
+	list_for_each_entry(event, &ftrace_events, list) {
+		if (event->id == event_id) {
+			event->profile_disable(event);
+			break;
+		}
 	}
+	mutex_unlock(&event_mutex);
 }
-
diff --git a/kernel/trace/trace_event_types.h b/kernel/trace/trace_event_types.h
index fd78bee..5e32e37 100644
--- a/kernel/trace/trace_event_types.h
+++ b/kernel/trace/trace_event_types.h
@@ -57,7 +57,7 @@
 	TP_RAW_FMT("%u:%u:%u  ==+ %u:%u:%u [%03u]")
 );
 
-TRACE_EVENT_FORMAT(special, TRACE_SPECIAL, special_entry, ignore,
+TRACE_EVENT_FORMAT_NOFILTER(special, TRACE_SPECIAL, special_entry, ignore,
 	TRACE_STRUCT(
 		TRACE_FIELD(unsigned long, arg1, arg1)
 		TRACE_FIELD(unsigned long, arg2, arg2)
@@ -122,8 +122,10 @@
 TRACE_EVENT_FORMAT(branch, TRACE_BRANCH, trace_branch, ignore,
 	TRACE_STRUCT(
 		TRACE_FIELD(unsigned int, line, line)
-		TRACE_FIELD_SPECIAL(char func[TRACE_FUNC_SIZE+1], func, func)
-		TRACE_FIELD_SPECIAL(char file[TRACE_FUNC_SIZE+1], file, file)
+		TRACE_FIELD_SPECIAL(char func[TRACE_FUNC_SIZE+1], func,
+				    TRACE_FUNC_SIZE+1, func)
+		TRACE_FIELD_SPECIAL(char file[TRACE_FUNC_SIZE+1], file,
+				    TRACE_FUNC_SIZE+1, file)
 		TRACE_FIELD(char, correct, correct)
 	),
 	TP_RAW_FMT("%u:%s:%s (%u)")
@@ -139,8 +141,8 @@
 
 TRACE_EVENT_FORMAT(power, TRACE_POWER, trace_power, ignore,
 	TRACE_STRUCT(
-		TRACE_FIELD(ktime_t, state_data.stamp, stamp)
-		TRACE_FIELD(ktime_t, state_data.end, end)
+		TRACE_FIELD_SIGN(ktime_t, state_data.stamp, stamp, 1)
+		TRACE_FIELD_SIGN(ktime_t, state_data.end, end, 1)
 		TRACE_FIELD(int, state_data.type, type)
 		TRACE_FIELD(int, state_data.state, state)
 	),
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 576f4fa..aa08be6 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -8,19 +8,25 @@
  *
  */
 
+#include <linux/workqueue.h>
+#include <linux/spinlock.h>
+#include <linux/kthread.h>
 #include <linux/debugfs.h>
 #include <linux/uaccess.h>
 #include <linux/module.h>
 #include <linux/ctype.h>
+#include <linux/delay.h>
 
 #include "trace_output.h"
 
 #define TRACE_SYSTEM "TRACE_SYSTEM"
 
-static DEFINE_MUTEX(event_mutex);
+DEFINE_MUTEX(event_mutex);
+
+LIST_HEAD(ftrace_events);
 
 int trace_define_field(struct ftrace_event_call *call, char *type,
-		       char *name, int offset, int size)
+		       char *name, int offset, int size, int is_signed)
 {
 	struct ftrace_event_field *field;
 
@@ -38,6 +44,7 @@
 
 	field->offset = offset;
 	field->size = size;
+	field->is_signed = is_signed;
 	list_add(&field->link, &call->fields);
 
 	return 0;
@@ -51,47 +58,94 @@
 
 	return -ENOMEM;
 }
+EXPORT_SYMBOL_GPL(trace_define_field);
 
-static void ftrace_clear_events(void)
+#ifdef CONFIG_MODULES
+
+static void trace_destroy_fields(struct ftrace_event_call *call)
 {
-	struct ftrace_event_call *call = (void *)__start_ftrace_events;
+	struct ftrace_event_field *field, *next;
 
-
-	while ((unsigned long)call < (unsigned long)__stop_ftrace_events) {
-
-		if (call->enabled) {
-			call->enabled = 0;
-			call->unregfunc();
-		}
-		call++;
+	list_for_each_entry_safe(field, next, &call->fields, link) {
+		list_del(&field->link);
+		kfree(field->type);
+		kfree(field->name);
+		kfree(field);
 	}
 }
 
+#endif /* CONFIG_MODULES */
+
 static void ftrace_event_enable_disable(struct ftrace_event_call *call,
 					int enable)
 {
-
 	switch (enable) {
 	case 0:
 		if (call->enabled) {
 			call->enabled = 0;
+			tracing_stop_cmdline_record();
 			call->unregfunc();
 		}
 		break;
 	case 1:
 		if (!call->enabled) {
 			call->enabled = 1;
+			tracing_start_cmdline_record();
 			call->regfunc();
 		}
 		break;
 	}
 }
 
+static void ftrace_clear_events(void)
+{
+	struct ftrace_event_call *call;
+
+	mutex_lock(&event_mutex);
+	list_for_each_entry(call, &ftrace_events, list) {
+		ftrace_event_enable_disable(call, 0);
+	}
+	mutex_unlock(&event_mutex);
+}
+
+/*
+ * __ftrace_set_clr_event(NULL, NULL, NULL, set) will set/unset all events.
+ */
+static int __ftrace_set_clr_event(const char *match, const char *sub,
+				  const char *event, int set)
+{
+	struct ftrace_event_call *call;
+	int ret = -EINVAL;
+
+	mutex_lock(&event_mutex);
+	list_for_each_entry(call, &ftrace_events, list) {
+
+		if (!call->name || !call->regfunc)
+			continue;
+
+		if (match &&
+		    strcmp(match, call->name) != 0 &&
+		    strcmp(match, call->system) != 0)
+			continue;
+
+		if (sub && strcmp(sub, call->system) != 0)
+			continue;
+
+		if (event && strcmp(event, call->name) != 0)
+			continue;
+
+		ftrace_event_enable_disable(call, set);
+
+		ret = 0;
+	}
+	mutex_unlock(&event_mutex);
+
+	return ret;
+}
+
 static int ftrace_set_clr_event(char *buf, int set)
 {
-	struct ftrace_event_call *call = __start_ftrace_events;
 	char *event = NULL, *sub = NULL, *match;
-	int ret = -EINVAL;
 
 	/*
 	 * The buf format can be <subsystem>:<event-name>
@@ -117,30 +171,24 @@
 			event = NULL;
 	}
 
-	mutex_lock(&event_mutex);
-	for_each_event(call) {
+	return __ftrace_set_clr_event(match, sub, event, set);
+}
 
-		if (!call->name || !call->regfunc)
-			continue;
-
-		if (match &&
-		    strcmp(match, call->name) != 0 &&
-		    strcmp(match, call->system) != 0)
-			continue;
-
-		if (sub && strcmp(sub, call->system) != 0)
-			continue;
-
-		if (event && strcmp(event, call->name) != 0)
-			continue;
-
-		ftrace_event_enable_disable(call, set);
-
-		ret = 0;
-	}
-	mutex_unlock(&event_mutex);
-
-	return ret;
+/**
+ * trace_set_clr_event - enable or disable an event
+ * @system: system name to match (NULL for any system)
+ * @event: event name to match (NULL for all events, within system)
+ * @set: 1 to enable, 0 to disable
+ *
+ * This is a way for other parts of the kernel to enable or disable
+ * event recording.
+ *
+ * Returns 0 on success, -EINVAL if the parameters do not match any
+ * registered events.
+ */
+int trace_set_clr_event(const char *system, const char *event, int set)
+{
+	return __ftrace_set_clr_event(NULL, system, event, set);
 }
 
 /* 128 should be much more than enough */
@@ -224,15 +272,17 @@
 static void *
 t_next(struct seq_file *m, void *v, loff_t *pos)
 {
-	struct ftrace_event_call *call = m->private;
-	struct ftrace_event_call *next = call;
+	struct list_head *list = m->private;
+	struct ftrace_event_call *call;
 
 	(*pos)++;
 
 	for (;;) {
-		if ((unsigned long)call >= (unsigned long)__stop_ftrace_events)
+		if (list == &ftrace_events)
 			return NULL;
 
+		call = list_entry(list, struct ftrace_event_call, list);
+
 		/*
 		 * The ftrace subsystem is for showing formats only.
 		 * They can not be enabled or disabled via the event files.
@@ -240,45 +290,51 @@
 		if (call->regfunc)
 			break;
 
-		call++;
-		next = call;
+		list = list->next;
 	}
 
-	m->private = ++next;
+	m->private = list->next;
 
 	return call;
 }
 
 static void *t_start(struct seq_file *m, loff_t *pos)
 {
+	mutex_lock(&event_mutex);
+	if (*pos == 0)
+		m->private = ftrace_events.next;
 	return t_next(m, NULL, pos);
 }
 
 static void *
 s_next(struct seq_file *m, void *v, loff_t *pos)
 {
-	struct ftrace_event_call *call = m->private;
-	struct ftrace_event_call *next;
+	struct list_head *list = m->private;
+	struct ftrace_event_call *call;
 
 	(*pos)++;
 
  retry:
-	if ((unsigned long)call >= (unsigned long)__stop_ftrace_events)
+	if (list == &ftrace_events)
 		return NULL;
 
+	call = list_entry(list, struct ftrace_event_call, list);
+
 	if (!call->enabled) {
-		call++;
+		list = list->next;
 		goto retry;
 	}
 
-	next = call;
-	m->private = ++next;
+	m->private = list->next;
 
 	return call;
 }
 
 static void *s_start(struct seq_file *m, loff_t *pos)
 {
+	mutex_lock(&event_mutex);
+	if (*pos == 0)
+		m->private = ftrace_events.next;
 	return s_next(m, NULL, pos);
 }
 
@@ -295,12 +351,12 @@
 
 static void t_stop(struct seq_file *m, void *p)
 {
+	mutex_unlock(&event_mutex);
 }
 
 static int
 ftrace_event_seq_open(struct inode *inode, struct file *file)
 {
-	int ret;
 	const struct seq_operations *seq_ops;
 
 	if ((file->f_mode & FMODE_WRITE) &&
@@ -308,13 +364,7 @@
 		ftrace_clear_events();
 
 	seq_ops = inode->i_private;
-	ret = seq_open(file, seq_ops);
-	if (!ret) {
-		struct seq_file *m = file->private_data;
-
-		m->private = __start_ftrace_events;
-	}
-	return ret;
+	return seq_open(file, seq_ops);
 }
 
 static ssize_t
@@ -374,8 +424,93 @@
 	return cnt;
 }
 
+static ssize_t
+system_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
+		   loff_t *ppos)
+{
+	const char set_to_char[4] = { '?', '0', '1', 'X' };
+	const char *system = filp->private_data;
+	struct ftrace_event_call *call;
+	char buf[2];
+	int set = 0;
+	int ret;
+
+	mutex_lock(&event_mutex);
+	list_for_each_entry(call, &ftrace_events, list) {
+		if (!call->name || !call->regfunc)
+			continue;
+
+		if (system && strcmp(call->system, system) != 0)
+			continue;
+
+		/*
+		 * We need to find out if all the events are set
+		 * or if all events or cleared, or if we have
+		 * a mixture.
+		 */
+		set |= (1 << !!call->enabled);
+
+		/*
+		 * If we have a mixture, no need to look further.
+		 */
+		if (set == 3)
+			break;
+	}
+	mutex_unlock(&event_mutex);
+
+	buf[0] = set_to_char[set];
+	buf[1] = '\n';
+
+	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
+
+	return ret;
+}
+
+static ssize_t
+system_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
+		    loff_t *ppos)
+{
+	const char *system = filp->private_data;
+	unsigned long val;
+	char buf[64];
+	ssize_t ret;
+
+	if (cnt >= sizeof(buf))
+		return -EINVAL;
+
+	if (copy_from_user(&buf, ubuf, cnt))
+		return -EFAULT;
+
+	buf[cnt] = 0;
+
+	ret = strict_strtoul(buf, 10, &val);
+	if (ret < 0)
+		return ret;
+
+	ret = tracing_update_buffers();
+	if (ret < 0)
+		return ret;
+
+	if (val != 0 && val != 1)
+		return -EINVAL;
+
+	ret = __ftrace_set_clr_event(NULL, system, NULL, val);
+	if (ret)
+		goto out;
+
+	ret = cnt;
+
+out:
+	*ppos += cnt;
+
+	return ret;
+}
+
+extern char *__bad_type_size(void);
+
 #undef FIELD
 #define FIELD(type, name)						\
+	sizeof(type) != sizeof(field.name) ? __bad_type_size() :	\
 	#type, "common_" #name, offsetof(typeof(field), name),		\
 		sizeof(field.name)
 
@@ -391,7 +526,7 @@
 				"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
 				"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
 				"\n",
-				FIELD(unsigned char, type),
+				FIELD(unsigned short, type),
 				FIELD(unsigned char, flags),
 				FIELD(unsigned char, preempt_count),
 				FIELD(int, pid),
@@ -481,7 +616,7 @@
 
 	trace_seq_init(s);
 
-	filter_print_preds(call->preds, s);
+	print_event_filter(call, s);
 	r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len);
 
 	kfree(s);
@@ -494,38 +629,26 @@
 		   loff_t *ppos)
 {
 	struct ftrace_event_call *call = filp->private_data;
-	char buf[64], *pbuf = buf;
-	struct filter_pred *pred;
+	char *buf;
 	int err;
 
-	if (cnt >= sizeof(buf))
+	if (cnt >= PAGE_SIZE)
 		return -EINVAL;
 
-	if (copy_from_user(&buf, ubuf, cnt))
-		return -EFAULT;
-	buf[cnt] = '\0';
-
-	pred = kzalloc(sizeof(*pred), GFP_KERNEL);
-	if (!pred)
+	buf = (char *)__get_free_page(GFP_TEMPORARY);
+	if (!buf)
 		return -ENOMEM;
 
-	err = filter_parse(&pbuf, pred);
-	if (err < 0) {
-		filter_free_pred(pred);
-		return err;
+	if (copy_from_user(buf, ubuf, cnt)) {
+		free_page((unsigned long) buf);
+		return -EFAULT;
 	}
+	buf[cnt] = '\0';
 
-	if (pred->clear) {
-		filter_free_preds(call);
-		filter_free_pred(pred);
-		return cnt;
-	}
-
-	err = filter_add_pred(call, pred);
-	if (err < 0) {
-		filter_free_pred(pred);
+	err = apply_event_filter(call, buf);
+	free_page((unsigned long) buf);
+	if (err < 0)
 		return err;
-	}
 
 	*ppos += cnt;
 
@@ -549,7 +672,7 @@
 
 	trace_seq_init(s);
 
-	filter_print_preds(system->preds, s);
+	print_subsystem_event_filter(system, s);
 	r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len);
 
 	kfree(s);
@@ -562,45 +685,56 @@
 		       loff_t *ppos)
 {
 	struct event_subsystem *system = filp->private_data;
-	char buf[64], *pbuf = buf;
-	struct filter_pred *pred;
+	char *buf;
 	int err;
 
-	if (cnt >= sizeof(buf))
+	if (cnt >= PAGE_SIZE)
 		return -EINVAL;
 
-	if (copy_from_user(&buf, ubuf, cnt))
-		return -EFAULT;
-	buf[cnt] = '\0';
-
-	pred = kzalloc(sizeof(*pred), GFP_KERNEL);
-	if (!pred)
+	buf = (char *)__get_free_page(GFP_TEMPORARY);
+	if (!buf)
 		return -ENOMEM;
 
-	err = filter_parse(&pbuf, pred);
-	if (err < 0) {
-		filter_free_pred(pred);
-		return err;
+	if (copy_from_user(buf, ubuf, cnt)) {
+		free_page((unsigned long) buf);
+		return -EFAULT;
 	}
+	buf[cnt] = '\0';
 
-	if (pred->clear) {
-		filter_free_subsystem_preds(system);
-		filter_free_pred(pred);
-		return cnt;
-	}
-
-	err = filter_add_subsystem_pred(system, pred);
-	if (err < 0) {
-		filter_free_subsystem_preds(system);
-		filter_free_pred(pred);
+	err = apply_subsystem_event_filter(system, buf);
+	free_page((unsigned long) buf);
+	if (err < 0)
 		return err;
-	}
 
 	*ppos += cnt;
 
 	return cnt;
 }
 
+static ssize_t
+show_header(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	int (*func)(struct trace_seq *s) = filp->private_data;
+	struct trace_seq *s;
+	int r;
+
+	if (*ppos)
+		return 0;
+
+	s = kmalloc(sizeof(*s), GFP_KERNEL);
+	if (!s)
+		return -ENOMEM;
+
+	trace_seq_init(s);
+
+	func(s);
+	r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len);
+
+	kfree(s);
+
+	return r;
+}
+
 static const struct seq_operations show_event_seq_ops = {
 	.start = t_start,
 	.next = t_next,
@@ -658,6 +792,17 @@
 	.write = subsystem_filter_write,
 };
 
+static const struct file_operations ftrace_system_enable_fops = {
+	.open = tracing_open_generic,
+	.read = system_enable_read,
+	.write = system_enable_write,
+};
+
+static const struct file_operations ftrace_show_header_fops = {
+	.open = tracing_open_generic,
+	.read = show_header,
+};
+
 static struct dentry *event_trace_events_dir(void)
 {
 	static struct dentry *d_tracer;
@@ -684,6 +829,7 @@
 event_subsystem_dir(const char *name, struct dentry *d_events)
 {
 	struct event_subsystem *system;
+	struct dentry *entry;
 
 	/* First see if we did not already create this dir */
 	list_for_each_entry(system, &event_subsystems, list) {
@@ -707,16 +853,46 @@
 		return d_events;
 	}
 
-	system->name = name;
+	system->name = kstrdup(name, GFP_KERNEL);
+	if (!system->name) {
+		debugfs_remove(system->entry);
+		kfree(system);
+		return d_events;
+	}
+
 	list_add(&system->list, &event_subsystems);
 
-	system->preds = NULL;
+	system->filter = NULL;
+
+	system->filter = kzalloc(sizeof(struct event_filter), GFP_KERNEL);
+	if (!system->filter) {
+		pr_warning("Could not allocate filter for subsystem "
+			   "'%s'\n", name);
+		return system->entry;
+	}
+
+	entry = debugfs_create_file("filter", 0644, system->entry, system,
+				    &ftrace_subsystem_filter_fops);
+	if (!entry) {
+		kfree(system->filter);
+		system->filter = NULL;
+		pr_warning("Could not create debugfs "
+			   "'%s/filter' entry\n", name);
+	}
+
+	entry = trace_create_file("enable", 0644, system->entry,
+				  (void *)system->name,
+				  &ftrace_system_enable_fops);
 
 	return system->entry;
 }
 
 static int
-event_create_dir(struct ftrace_event_call *call, struct dentry *d_events)
+event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
+		 const struct file_operations *id,
+		 const struct file_operations *enable,
+		 const struct file_operations *filter,
+		 const struct file_operations *format)
 {
 	struct dentry *entry;
 	int ret;
@@ -725,7 +901,7 @@
 	 * If the trace point header did not define TRACE_SYSTEM
 	 * then the system would be called "TRACE_SYSTEM".
 	 */
-	if (strcmp(call->system, "TRACE_SYSTEM") != 0)
+	if (strcmp(call->system, TRACE_SYSTEM) != 0)
 		d_events = event_subsystem_dir(call->system, d_events);
 
 	if (call->raw_init) {
@@ -744,21 +920,13 @@
 		return -1;
 	}
 
-	if (call->regfunc) {
-		entry = debugfs_create_file("enable", 0644, call->dir, call,
-					    &ftrace_enable_fops);
-		if (!entry)
-			pr_warning("Could not create debugfs "
-				   "'%s/enable' entry\n", call->name);
-	}
+	if (call->regfunc)
+		entry = trace_create_file("enable", 0644, call->dir, call,
+					  enable);
 
-	if (call->id) {
-		entry = debugfs_create_file("id", 0444, call->dir, call,
-				&ftrace_event_id_fops);
-		if (!entry)
-			pr_warning("Could not create debugfs '%s/id' entry\n",
-					call->name);
-	}
+	if (call->id)
+		entry = trace_create_file("id", 0444, call->dir, call,
+					  id);
 
 	if (call->define_fields) {
 		ret = call->define_fields();
@@ -767,32 +935,195 @@
 				   " events/%s\n", call->name);
 			return ret;
 		}
-		entry = debugfs_create_file("filter", 0644, call->dir, call,
-					    &ftrace_event_filter_fops);
-		if (!entry)
-			pr_warning("Could not create debugfs "
-				   "'%s/filter' entry\n", call->name);
+		entry = trace_create_file("filter", 0644, call->dir, call,
+					  filter);
 	}
 
 	/* A trace may not want to export its format */
 	if (!call->show_format)
 		return 0;
 
-	entry = debugfs_create_file("format", 0444, call->dir, call,
-				    &ftrace_event_format_fops);
-	if (!entry)
-		pr_warning("Could not create debugfs "
-			   "'%s/format' entry\n", call->name);
+	entry = trace_create_file("format", 0444, call->dir, call,
+				  format);
 
 	return 0;
 }
 
+#define for_each_event(event, start, end)			\
+	for (event = start;					\
+	     (unsigned long)event < (unsigned long)end;		\
+	     event++)
+
+#ifdef CONFIG_MODULES
+
+static LIST_HEAD(ftrace_module_file_list);
+
+/*
+ * Modules must own their file_operations to keep up with
+ * reference counting.
+ */
+struct ftrace_module_file_ops {
+	struct list_head		list;
+	struct module			*mod;
+	struct file_operations		id;
+	struct file_operations		enable;
+	struct file_operations		format;
+	struct file_operations		filter;
+};
+
+static struct ftrace_module_file_ops *
+trace_create_file_ops(struct module *mod)
+{
+	struct ftrace_module_file_ops *file_ops;
+
+	/*
+	 * This is a bit of a PITA. To allow for correct reference
+	 * counting, modules must "own" their file_operations.
+	 * To do this, we allocate the file operations that will be
+	 * used in the event directory.
+	 */
+
+	file_ops = kmalloc(sizeof(*file_ops), GFP_KERNEL);
+	if (!file_ops)
+		return NULL;
+
+	file_ops->mod = mod;
+
+	file_ops->id = ftrace_event_id_fops;
+	file_ops->id.owner = mod;
+
+	file_ops->enable = ftrace_enable_fops;
+	file_ops->enable.owner = mod;
+
+	file_ops->filter = ftrace_event_filter_fops;
+	file_ops->filter.owner = mod;
+
+	file_ops->format = ftrace_event_format_fops;
+	file_ops->format.owner = mod;
+
+	list_add(&file_ops->list, &ftrace_module_file_list);
+
+	return file_ops;
+}
+
+static void trace_module_add_events(struct module *mod)
+{
+	struct ftrace_module_file_ops *file_ops = NULL;
+	struct ftrace_event_call *call, *start, *end;
+	struct dentry *d_events;
+
+	start = mod->trace_events;
+	end = mod->trace_events + mod->num_trace_events;
+
+	if (start == end)
+		return;
+
+	d_events = event_trace_events_dir();
+	if (!d_events)
+		return;
+
+	for_each_event(call, start, end) {
+		/* The linker may leave blanks */
+		if (!call->name)
+			continue;
+
+		/*
+		 * This module has events, create file ops for this module
+		 * if not already done.
+		 */
+		if (!file_ops) {
+			file_ops = trace_create_file_ops(mod);
+			if (!file_ops)
+				return;
+		}
+		call->mod = mod;
+		list_add(&call->list, &ftrace_events);
+		event_create_dir(call, d_events,
+				 &file_ops->id, &file_ops->enable,
+				 &file_ops->filter, &file_ops->format);
+	}
+}
+
+static void trace_module_remove_events(struct module *mod)
+{
+	struct ftrace_module_file_ops *file_ops;
+	struct ftrace_event_call *call, *p;
+	bool found = false;
+
+	down_write(&trace_event_mutex);
+	list_for_each_entry_safe(call, p, &ftrace_events, list) {
+		if (call->mod == mod) {
+			found = true;
+			ftrace_event_enable_disable(call, 0);
+			if (call->event)
+				__unregister_ftrace_event(call->event);
+			debugfs_remove_recursive(call->dir);
+			list_del(&call->list);
+			trace_destroy_fields(call);
+			destroy_preds(call);
+		}
+	}
+
+	/* Now free the file_operations */
+	list_for_each_entry(file_ops, &ftrace_module_file_list, list) {
+		if (file_ops->mod == mod)
+			break;
+	}
+	if (&file_ops->list != &ftrace_module_file_list) {
+		list_del(&file_ops->list);
+		kfree(file_ops);
+	}
+
+	/*
+	 * It is safest to reset the ring buffer if the module being unloaded
+	 * registered any events.
+	 */
+	if (found)
+		tracing_reset_current_online_cpus();
+	up_write(&trace_event_mutex);
+}
+
+static int trace_module_notify(struct notifier_block *self,
+			       unsigned long val, void *data)
+{
+	struct module *mod = data;
+
+	mutex_lock(&event_mutex);
+	switch (val) {
+	case MODULE_STATE_COMING:
+		trace_module_add_events(mod);
+		break;
+	case MODULE_STATE_GOING:
+		trace_module_remove_events(mod);
+		break;
+	}
+	mutex_unlock(&event_mutex);
+
+	return 0;
+}
+#else
+static int trace_module_notify(struct notifier_block *self,
+			       unsigned long val, void *data)
+{
+	return 0;
+}
+#endif /* CONFIG_MODULES */
+
+struct notifier_block trace_module_nb = {
+	.notifier_call = trace_module_notify,
+	.priority = 0,
+};
+
+extern struct ftrace_event_call __start_ftrace_events[];
+extern struct ftrace_event_call __stop_ftrace_events[];
+
 static __init int event_trace_init(void)
 {
-	struct ftrace_event_call *call = __start_ftrace_events;
+	struct ftrace_event_call *call;
 	struct dentry *d_tracer;
 	struct dentry *entry;
 	struct dentry *d_events;
+	int ret;
 
 	d_tracer = tracing_init_dentry();
 	if (!d_tracer)
@@ -816,13 +1147,243 @@
 	if (!d_events)
 		return 0;
 
-	for_each_event(call) {
+	/* ring buffer internal formats */
+	trace_create_file("header_page", 0444, d_events,
+			  ring_buffer_print_page_header,
+			  &ftrace_show_header_fops);
+
+	trace_create_file("header_event", 0444, d_events,
+			  ring_buffer_print_entry_header,
+			  &ftrace_show_header_fops);
+
+	trace_create_file("enable", 0644, d_events,
+			  NULL, &ftrace_system_enable_fops);
+
+	for_each_event(call, __start_ftrace_events, __stop_ftrace_events) {
 		/* The linker may leave blanks */
 		if (!call->name)
 			continue;
-		event_create_dir(call, d_events);
+		list_add(&call->list, &ftrace_events);
+		event_create_dir(call, d_events, &ftrace_event_id_fops,
+				 &ftrace_enable_fops, &ftrace_event_filter_fops,
+				 &ftrace_event_format_fops);
 	}
 
+	ret = register_module_notifier(&trace_module_nb);
+	if (ret)
+		pr_warning("Failed to register trace events module notifier\n");
+
 	return 0;
 }
 fs_initcall(event_trace_init);
+
+#ifdef CONFIG_FTRACE_STARTUP_TEST
+
+static DEFINE_SPINLOCK(test_spinlock);
+static DEFINE_SPINLOCK(test_spinlock_irq);
+static DEFINE_MUTEX(test_mutex);
+
+static __init void test_work(struct work_struct *dummy)
+{
+	spin_lock(&test_spinlock);
+	spin_lock_irq(&test_spinlock_irq);
+	udelay(1);
+	spin_unlock_irq(&test_spinlock_irq);
+	spin_unlock(&test_spinlock);
+
+	mutex_lock(&test_mutex);
+	msleep(1);
+	mutex_unlock(&test_mutex);
+}
+
+static __init int event_test_thread(void *unused)
+{
+	void *test_malloc;
+
+	test_malloc = kmalloc(1234, GFP_KERNEL);
+	if (!test_malloc)
+		pr_info("failed to kmalloc\n");
+
+	schedule_on_each_cpu(test_work);
+
+	kfree(test_malloc);
+
+	set_current_state(TASK_INTERRUPTIBLE);
+	while (!kthread_should_stop())
+		schedule();
+
+	return 0;
+}
+
+/*
+ * Do various things that may trigger events.
+ */
+static __init void event_test_stuff(void)
+{
+	struct task_struct *test_thread;
+
+	test_thread = kthread_run(event_test_thread, NULL, "test-events");
+	msleep(1);
+	kthread_stop(test_thread);
+}
+
+/*
+ * For every trace event defined, we will test each trace point separately,
+ * and then by groups, and finally all trace points.
+ */
+static __init void event_trace_self_tests(void)
+{
+	struct ftrace_event_call *call;
+	struct event_subsystem *system;
+	int ret;
+
+	pr_info("Running tests on trace events:\n");
+
+	list_for_each_entry(call, &ftrace_events, list) {
+
+		/* Only test those that have a regfunc */
+		if (!call->regfunc)
+			continue;
+
+		pr_info("Testing event %s: ", call->name);
+
+		/*
+		 * If an event is already enabled, someone is using
+		 * it and the self test should not be on.
+		 */
+		if (call->enabled) {
+			pr_warning("Enabled event during self test!\n");
+			WARN_ON_ONCE(1);
+			continue;
+		}
+
+		ftrace_event_enable_disable(call, 1);
+		event_test_stuff();
+		ftrace_event_enable_disable(call, 0);
+
+		pr_cont("OK\n");
+	}
+
+	/* Now test at the sub system level */
+
+	pr_info("Running tests on trace event systems:\n");
+
+	list_for_each_entry(system, &event_subsystems, list) {
+
+		/* the ftrace system is special, skip it */
+		if (strcmp(system->name, "ftrace") == 0)
+			continue;
+
+		pr_info("Testing event system %s: ", system->name);
+
+		ret = __ftrace_set_clr_event(NULL, system->name, NULL, 1);
+		if (WARN_ON_ONCE(ret)) {
+			pr_warning("error enabling system %s\n",
+				   system->name);
+			continue;
+		}
+
+		event_test_stuff();
+
+		ret = __ftrace_set_clr_event(NULL, system->name, NULL, 0);
+		if (WARN_ON_ONCE(ret))
+			pr_warning("error disabling system %s\n",
+				   system->name);
+
+		pr_cont("OK\n");
+	}
+
+	/* Test with all events enabled */
+
+	pr_info("Running tests on all trace events:\n");
+	pr_info("Testing all events: ");
+
+	ret = __ftrace_set_clr_event(NULL, NULL, NULL, 1);
+	if (WARN_ON_ONCE(ret)) {
+		pr_warning("error enabling all events\n");
+		return;
+	}
+
+	event_test_stuff();
+
+	/* reset sysname */
+	ret = __ftrace_set_clr_event(NULL, NULL, NULL, 0);
+	if (WARN_ON_ONCE(ret)) {
+		pr_warning("error disabling all events\n");
+		return;
+	}
+
+	pr_cont("OK\n");
+}
+
+#ifdef CONFIG_FUNCTION_TRACER
+
+static DEFINE_PER_CPU(atomic_t, test_event_disable);
+
+static void
+function_test_events_call(unsigned long ip, unsigned long parent_ip)
+{
+	struct ring_buffer_event *event;
+	struct ftrace_entry *entry;
+	unsigned long flags;
+	long disabled;
+	int resched;
+	int cpu;
+	int pc;
+
+	pc = preempt_count();
+	resched = ftrace_preempt_disable();
+	cpu = raw_smp_processor_id();
+	disabled = atomic_inc_return(&per_cpu(test_event_disable, cpu));
+
+	if (disabled != 1)
+		goto out;
+
+	local_save_flags(flags);
+
+	event = trace_current_buffer_lock_reserve(TRACE_FN, sizeof(*entry),
+						  flags, pc);
+	if (!event)
+		goto out;
+	entry	= ring_buffer_event_data(event);
+	entry->ip			= ip;
+	entry->parent_ip		= parent_ip;
+
+	trace_nowake_buffer_unlock_commit(event, flags, pc);
+
+ out:
+	atomic_dec(&per_cpu(test_event_disable, cpu));
+	ftrace_preempt_enable(resched);
+}
+
+static struct ftrace_ops trace_ops __initdata  =
+{
+	.func = function_test_events_call,
+};
+
+static __init void event_trace_self_test_with_function(void)
+{
+	register_ftrace_function(&trace_ops);
+	pr_info("Running tests again, along with the function tracer\n");
+	event_trace_self_tests();
+	unregister_ftrace_function(&trace_ops);
+}
+#else
+static __init void event_trace_self_test_with_function(void)
+{
+}
+#endif
+
+static __init int event_trace_self_tests_init(void)
+{
+
+	event_trace_self_tests();
+
+	event_trace_self_test_with_function();
+
+	return 0;
+}
+
+late_initcall(event_trace_self_tests_init);
+
+#endif
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index e03cbf1..db6e54b 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -22,55 +22,138 @@
 #include <linux/uaccess.h>
 #include <linux/module.h>
 #include <linux/ctype.h>
+#include <linux/mutex.h>
 
 #include "trace.h"
 #include "trace_output.h"
 
-static int filter_pred_64(struct filter_pred *pred, void *event)
+static DEFINE_MUTEX(filter_mutex);
+
+enum filter_op_ids
 {
-	u64 *addr = (u64 *)(event + pred->offset);
-	u64 val = (u64)pred->val;
-	int match;
+	OP_OR,
+	OP_AND,
+	OP_NE,
+	OP_EQ,
+	OP_LT,
+	OP_LE,
+	OP_GT,
+	OP_GE,
+	OP_NONE,
+	OP_OPEN_PAREN,
+};
 
-	match = (val == *addr) ^ pred->not;
+struct filter_op {
+	int id;
+	char *string;
+	int precedence;
+};
 
-	return match;
+static struct filter_op filter_ops[] = {
+	{ OP_OR, "||", 1 },
+	{ OP_AND, "&&", 2 },
+	{ OP_NE, "!=", 4 },
+	{ OP_EQ, "==", 4 },
+	{ OP_LT, "<", 5 },
+	{ OP_LE, "<=", 5 },
+	{ OP_GT, ">", 5 },
+	{ OP_GE, ">=", 5 },
+	{ OP_NONE, "OP_NONE", 0 },
+	{ OP_OPEN_PAREN, "(", 0 },
+};
+
+enum {
+	FILT_ERR_NONE,
+	FILT_ERR_INVALID_OP,
+	FILT_ERR_UNBALANCED_PAREN,
+	FILT_ERR_TOO_MANY_OPERANDS,
+	FILT_ERR_OPERAND_TOO_LONG,
+	FILT_ERR_FIELD_NOT_FOUND,
+	FILT_ERR_ILLEGAL_FIELD_OP,
+	FILT_ERR_ILLEGAL_INTVAL,
+	FILT_ERR_BAD_SUBSYS_FILTER,
+	FILT_ERR_TOO_MANY_PREDS,
+	FILT_ERR_MISSING_FIELD,
+	FILT_ERR_INVALID_FILTER,
+};
+
+static char *err_text[] = {
+	"No error",
+	"Invalid operator",
+	"Unbalanced parens",
+	"Too many operands",
+	"Operand too long",
+	"Field not found",
+	"Illegal operation for field type",
+	"Illegal integer value",
+	"Couldn't find or set field in one of a subsystem's events",
+	"Too many terms in predicate expression",
+	"Missing field name and/or value",
+	"Meaningless filter expression",
+};
+
+struct opstack_op {
+	int op;
+	struct list_head list;
+};
+
+struct postfix_elt {
+	int op;
+	char *operand;
+	struct list_head list;
+};
+
+struct filter_parse_state {
+	struct filter_op *ops;
+	struct list_head opstack;
+	struct list_head postfix;
+	int lasterr;
+	int lasterr_pos;
+
+	struct {
+		char *string;
+		unsigned int cnt;
+		unsigned int tail;
+	} infix;
+
+	struct {
+		char string[MAX_FILTER_STR_VAL];
+		int pos;
+		unsigned int tail;
+	} operand;
+};
+
+DEFINE_COMPARISON_PRED(s64);
+DEFINE_COMPARISON_PRED(u64);
+DEFINE_COMPARISON_PRED(s32);
+DEFINE_COMPARISON_PRED(u32);
+DEFINE_COMPARISON_PRED(s16);
+DEFINE_COMPARISON_PRED(u16);
+DEFINE_COMPARISON_PRED(s8);
+DEFINE_COMPARISON_PRED(u8);
+
+DEFINE_EQUALITY_PRED(64);
+DEFINE_EQUALITY_PRED(32);
+DEFINE_EQUALITY_PRED(16);
+DEFINE_EQUALITY_PRED(8);
+
+static int filter_pred_and(struct filter_pred *pred __attribute((unused)),
+			   void *event __attribute((unused)),
+			   int val1, int val2)
+{
+	return val1 && val2;
 }
 
-static int filter_pred_32(struct filter_pred *pred, void *event)
+static int filter_pred_or(struct filter_pred *pred __attribute((unused)),
+			  void *event __attribute((unused)),
+			  int val1, int val2)
 {
-	u32 *addr = (u32 *)(event + pred->offset);
-	u32 val = (u32)pred->val;
-	int match;
-
-	match = (val == *addr) ^ pred->not;
-
-	return match;
+	return val1 || val2;
 }
 
-static int filter_pred_16(struct filter_pred *pred, void *event)
-{
-	u16 *addr = (u16 *)(event + pred->offset);
-	u16 val = (u16)pred->val;
-	int match;
-
-	match = (val == *addr) ^ pred->not;
-
-	return match;
-}
-
-static int filter_pred_8(struct filter_pred *pred, void *event)
-{
-	u8 *addr = (u8 *)(event + pred->offset);
-	u8 val = (u8)pred->val;
-	int match;
-
-	match = (val == *addr) ^ pred->not;
-
-	return match;
-}
-
-static int filter_pred_string(struct filter_pred *pred, void *event)
+/* Filter predicate for fixed sized arrays of characters */
+static int filter_pred_string(struct filter_pred *pred, void *event,
+			      int val1, int val2)
 {
 	char *addr = (char *)(event + pred->offset);
 	int cmp, match;
@@ -82,59 +165,154 @@
 	return match;
 }
 
+/*
+ * Filter predicate for dynamic sized arrays of characters.
+ * These are implemented through a list of strings at the end
+ * of the entry.
+ * Also each of these strings have a field in the entry which
+ * contains its offset from the beginning of the entry.
+ * We have then first to get this field, dereference it
+ * and add it to the address of the entry, and at last we have
+ * the address of the string.
+ */
+static int filter_pred_strloc(struct filter_pred *pred, void *event,
+			      int val1, int val2)
+{
+	int str_loc = *(int *)(event + pred->offset);
+	char *addr = (char *)(event + str_loc);
+	int cmp, match;
+
+	cmp = strncmp(addr, pred->str_val, pred->str_len);
+
+	match = (!cmp) ^ pred->not;
+
+	return match;
+}
+
+static int filter_pred_none(struct filter_pred *pred, void *event,
+			    int val1, int val2)
+{
+	return 0;
+}
+
 /* return 1 if event matches, 0 otherwise (discard) */
 int filter_match_preds(struct ftrace_event_call *call, void *rec)
 {
-	int i, matched, and_failed = 0;
-	struct filter_pred *pred;
-
-	for (i = 0; i < MAX_FILTER_PRED; i++) {
-		if (call->preds[i]) {
-			pred = call->preds[i];
-			if (and_failed && !pred->or)
-				continue;
-			matched = pred->fn(pred, rec);
-			if (!matched && !pred->or) {
-				and_failed = 1;
-				continue;
-			} else if (matched && pred->or)
-				return 1;
-		} else
-			break;
-	}
-
-	if (and_failed)
-		return 0;
-
-	return 1;
-}
-
-void filter_print_preds(struct filter_pred **preds, struct trace_seq *s)
-{
-	char *field_name;
+	struct event_filter *filter = call->filter;
+	int match, top = 0, val1 = 0, val2 = 0;
+	int stack[MAX_FILTER_PRED];
 	struct filter_pred *pred;
 	int i;
 
-	if (!preds) {
-		trace_seq_printf(s, "none\n");
-		return;
+	for (i = 0; i < filter->n_preds; i++) {
+		pred = filter->preds[i];
+		if (!pred->pop_n) {
+			match = pred->fn(pred, rec, val1, val2);
+			stack[top++] = match;
+			continue;
+		}
+		if (pred->pop_n > top) {
+			WARN_ON_ONCE(1);
+			return 0;
+		}
+		val1 = stack[--top];
+		val2 = stack[--top];
+		match = pred->fn(pred, rec, val1, val2);
+		stack[top++] = match;
 	}
 
-	for (i = 0; i < MAX_FILTER_PRED; i++) {
-		if (preds[i]) {
-			pred = preds[i];
-			field_name = pred->field_name;
-			if (i)
-				trace_seq_printf(s, pred->or ? "|| " : "&& ");
-			trace_seq_printf(s, "%s ", field_name);
-			trace_seq_printf(s, pred->not ? "!= " : "== ");
-			if (pred->str_val)
-				trace_seq_printf(s, "%s\n", pred->str_val);
-			else
-				trace_seq_printf(s, "%llu\n", pred->val);
-		} else
-			break;
-	}
+	return stack[--top];
+}
+EXPORT_SYMBOL_GPL(filter_match_preds);
+
+static void parse_error(struct filter_parse_state *ps, int err, int pos)
+{
+	ps->lasterr = err;
+	ps->lasterr_pos = pos;
+}
+
+static void remove_filter_string(struct event_filter *filter)
+{
+	kfree(filter->filter_string);
+	filter->filter_string = NULL;
+}
+
+static int replace_filter_string(struct event_filter *filter,
+				 char *filter_string)
+{
+	kfree(filter->filter_string);
+	filter->filter_string = kstrdup(filter_string, GFP_KERNEL);
+	if (!filter->filter_string)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static int append_filter_string(struct event_filter *filter,
+				char *string)
+{
+	int newlen;
+	char *new_filter_string;
+
+	BUG_ON(!filter->filter_string);
+	newlen = strlen(filter->filter_string) + strlen(string) + 1;
+	new_filter_string = kmalloc(newlen, GFP_KERNEL);
+	if (!new_filter_string)
+		return -ENOMEM;
+
+	strcpy(new_filter_string, filter->filter_string);
+	strcat(new_filter_string, string);
+	kfree(filter->filter_string);
+	filter->filter_string = new_filter_string;
+
+	return 0;
+}
+
+static void append_filter_err(struct filter_parse_state *ps,
+			      struct event_filter *filter)
+{
+	int pos = ps->lasterr_pos;
+	char *buf, *pbuf;
+
+	buf = (char *)__get_free_page(GFP_TEMPORARY);
+	if (!buf)
+		return;
+
+	append_filter_string(filter, "\n");
+	memset(buf, ' ', PAGE_SIZE);
+	if (pos > PAGE_SIZE - 128)
+		pos = 0;
+	buf[pos] = '^';
+	pbuf = &buf[pos] + 1;
+
+	sprintf(pbuf, "\nparse_error: %s\n", err_text[ps->lasterr]);
+	append_filter_string(filter, buf);
+	free_page((unsigned long) buf);
+}
+
+void print_event_filter(struct ftrace_event_call *call, struct trace_seq *s)
+{
+	struct event_filter *filter = call->filter;
+
+	mutex_lock(&filter_mutex);
+	if (filter->filter_string)
+		trace_seq_printf(s, "%s\n", filter->filter_string);
+	else
+		trace_seq_printf(s, "none\n");
+	mutex_unlock(&filter_mutex);
+}
+
+void print_subsystem_event_filter(struct event_subsystem *system,
+				  struct trace_seq *s)
+{
+	struct event_filter *filter = system->filter;
+
+	mutex_lock(&filter_mutex);
+	if (filter->filter_string)
+		trace_seq_printf(s, "%s\n", filter->filter_string);
+	else
+		trace_seq_printf(s, "none\n");
+	mutex_unlock(&filter_mutex);
 }
 
 static struct ftrace_event_field *
@@ -150,284 +328,828 @@
 	return NULL;
 }
 
-void filter_free_pred(struct filter_pred *pred)
+static void filter_free_pred(struct filter_pred *pred)
 {
 	if (!pred)
 		return;
 
 	kfree(pred->field_name);
-	kfree(pred->str_val);
 	kfree(pred);
 }
 
-void filter_free_preds(struct ftrace_event_call *call)
+static void filter_clear_pred(struct filter_pred *pred)
 {
-	int i;
-
-	if (call->preds) {
-		for (i = 0; i < MAX_FILTER_PRED; i++)
-			filter_free_pred(call->preds[i]);
-		kfree(call->preds);
-		call->preds = NULL;
-	}
+	kfree(pred->field_name);
+	pred->field_name = NULL;
+	pred->str_len = 0;
 }
 
-void filter_free_subsystem_preds(struct event_subsystem *system)
+static int filter_set_pred(struct filter_pred *dest,
+			   struct filter_pred *src,
+			   filter_pred_fn_t fn)
 {
-	struct ftrace_event_call *call = __start_ftrace_events;
-	int i;
-
-	if (system->preds) {
-		for (i = 0; i < MAX_FILTER_PRED; i++)
-			filter_free_pred(system->preds[i]);
-		kfree(system->preds);
-		system->preds = NULL;
-	}
-
-	events_for_each(call) {
-		if (!call->name || !call->regfunc)
-			continue;
-
-		if (!strcmp(call->system, system->name))
-			filter_free_preds(call);
-	}
-}
-
-static int __filter_add_pred(struct ftrace_event_call *call,
-			     struct filter_pred *pred)
-{
-	int i;
-
-	if (call->preds && !pred->compound)
-		filter_free_preds(call);
-
-	if (!call->preds) {
-		call->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred),
-				      GFP_KERNEL);
-		if (!call->preds)
+	*dest = *src;
+	if (src->field_name) {
+		dest->field_name = kstrdup(src->field_name, GFP_KERNEL);
+		if (!dest->field_name)
 			return -ENOMEM;
 	}
-
-	for (i = 0; i < MAX_FILTER_PRED; i++) {
-		if (!call->preds[i]) {
-			call->preds[i] = pred;
-			return 0;
-		}
-	}
-
-	return -ENOSPC;
-}
-
-static int is_string_field(const char *type)
-{
-	if (strchr(type, '[') && strstr(type, "char"))
-		return 1;
+	dest->fn = fn;
 
 	return 0;
 }
 
-int filter_add_pred(struct ftrace_event_call *call, struct filter_pred *pred)
+static void filter_disable_preds(struct ftrace_event_call *call)
 {
-	struct ftrace_event_field *field;
-
-	field = find_event_field(call, pred->field_name);
-	if (!field)
-		return -EINVAL;
-
-	pred->offset = field->offset;
-
-	if (is_string_field(field->type)) {
-		if (!pred->str_val)
-			return -EINVAL;
-		pred->fn = filter_pred_string;
-		pred->str_len = field->size;
-		return __filter_add_pred(call, pred);
-	} else {
-		if (pred->str_val)
-			return -EINVAL;
-	}
-
-	switch (field->size) {
-	case 8:
-		pred->fn = filter_pred_64;
-		break;
-	case 4:
-		pred->fn = filter_pred_32;
-		break;
-	case 2:
-		pred->fn = filter_pred_16;
-		break;
-	case 1:
-		pred->fn = filter_pred_8;
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	return __filter_add_pred(call, pred);
-}
-
-static struct filter_pred *copy_pred(struct filter_pred *pred)
-{
-	struct filter_pred *new_pred = kmalloc(sizeof(*pred), GFP_KERNEL);
-	if (!new_pred)
-		return NULL;
-
-	memcpy(new_pred, pred, sizeof(*pred));
-
-	if (pred->field_name) {
-		new_pred->field_name = kstrdup(pred->field_name, GFP_KERNEL);
-		if (!new_pred->field_name) {
-			kfree(new_pred);
-			return NULL;
-		}
-	}
-
-	if (pred->str_val) {
-		new_pred->str_val = kstrdup(pred->str_val, GFP_KERNEL);
-		if (!new_pred->str_val) {
-			filter_free_pred(new_pred);
-			return NULL;
-		}
-	}
-
-	return new_pred;
-}
-
-int filter_add_subsystem_pred(struct event_subsystem *system,
-			      struct filter_pred *pred)
-{
-	struct ftrace_event_call *call = __start_ftrace_events;
-	struct filter_pred *event_pred;
+	struct event_filter *filter = call->filter;
 	int i;
 
-	if (system->preds && !pred->compound)
-		filter_free_subsystem_preds(system);
+	call->filter_active = 0;
+	filter->n_preds = 0;
 
-	if (!system->preds) {
-		system->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred),
-					GFP_KERNEL);
-		if (!system->preds)
-			return -ENOMEM;
-	}
+	for (i = 0; i < MAX_FILTER_PRED; i++)
+		filter->preds[i]->fn = filter_pred_none;
+}
+
+void destroy_preds(struct ftrace_event_call *call)
+{
+	struct event_filter *filter = call->filter;
+	int i;
 
 	for (i = 0; i < MAX_FILTER_PRED; i++) {
-		if (!system->preds[i]) {
-			system->preds[i] = pred;
-			break;
-		}
+		if (filter->preds[i])
+			filter_free_pred(filter->preds[i]);
 	}
+	kfree(filter->preds);
+	kfree(filter);
+	call->filter = NULL;
+}
 
-	if (i == MAX_FILTER_PRED)
-		return -ENOSPC;
+int init_preds(struct ftrace_event_call *call)
+{
+	struct event_filter *filter;
+	struct filter_pred *pred;
+	int i;
 
-	events_for_each(call) {
-		int err;
+	filter = call->filter = kzalloc(sizeof(*filter), GFP_KERNEL);
+	if (!call->filter)
+		return -ENOMEM;
 
-		if (!call->name || !call->regfunc)
-			continue;
+	call->filter_active = 0;
+	filter->n_preds = 0;
 
-		if (strcmp(call->system, system->name))
-			continue;
+	filter->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred), GFP_KERNEL);
+	if (!filter->preds)
+		goto oom;
 
-		if (!find_event_field(call, pred->field_name))
-			continue;
-
-		event_pred = copy_pred(pred);
-		if (!event_pred)
+	for (i = 0; i < MAX_FILTER_PRED; i++) {
+		pred = kzalloc(sizeof(*pred), GFP_KERNEL);
+		if (!pred)
 			goto oom;
-
-		err = filter_add_pred(call, event_pred);
-		if (err)
-			filter_free_pred(event_pred);
-		if (err == -ENOMEM)
-			goto oom;
+		pred->fn = filter_pred_none;
+		filter->preds[i] = pred;
 	}
 
 	return 0;
 
 oom:
-	system->preds[i] = NULL;
+	destroy_preds(call);
+
 	return -ENOMEM;
 }
+EXPORT_SYMBOL_GPL(init_preds);
 
-int filter_parse(char **pbuf, struct filter_pred *pred)
+static void filter_free_subsystem_preds(struct event_subsystem *system)
 {
-	char *tmp, *tok, *val_str = NULL;
-	int tok_n = 0;
+	struct event_filter *filter = system->filter;
+	struct ftrace_event_call *call;
+	int i;
 
-	/* field ==/!= number, or/and field ==/!= number, number */
-	while ((tok = strsep(pbuf, " \n"))) {
-		if (tok_n == 0) {
-			if (!strcmp(tok, "0")) {
-				pred->clear = 1;
-				return 0;
-			} else if (!strcmp(tok, "&&")) {
-				pred->or = 0;
-				pred->compound = 1;
-			} else if (!strcmp(tok, "||")) {
-				pred->or = 1;
-				pred->compound = 1;
-			} else
-				pred->field_name = tok;
-			tok_n = 1;
-			continue;
-		}
-		if (tok_n == 1) {
-			if (!pred->field_name)
-				pred->field_name = tok;
-			else if (!strcmp(tok, "!="))
-				pred->not = 1;
-			else if (!strcmp(tok, "=="))
-				pred->not = 0;
-			else {
-				pred->field_name = NULL;
-				return -EINVAL;
-			}
-			tok_n = 2;
-			continue;
-		}
-		if (tok_n == 2) {
-			if (pred->compound) {
-				if (!strcmp(tok, "!="))
-					pred->not = 1;
-				else if (!strcmp(tok, "=="))
-					pred->not = 0;
-				else {
-					pred->field_name = NULL;
-					return -EINVAL;
-				}
-			} else {
-				val_str = tok;
-				break; /* done */
-			}
-			tok_n = 3;
-			continue;
-		}
-		if (tok_n == 3) {
-			val_str = tok;
-			break; /* done */
-		}
+	if (filter->n_preds) {
+		for (i = 0; i < filter->n_preds; i++)
+			filter_free_pred(filter->preds[i]);
+		kfree(filter->preds);
+		filter->preds = NULL;
+		filter->n_preds = 0;
 	}
 
-	if (!val_str) {
-		pred->field_name = NULL;
-		return -EINVAL;
+	mutex_lock(&event_mutex);
+	list_for_each_entry(call, &ftrace_events, list) {
+		if (!call->define_fields)
+			continue;
+
+		if (!strcmp(call->system, system->name)) {
+			filter_disable_preds(call);
+			remove_filter_string(call->filter);
+		}
+	}
+	mutex_unlock(&event_mutex);
+}
+
+static int filter_add_pred_fn(struct filter_parse_state *ps,
+			      struct ftrace_event_call *call,
+			      struct filter_pred *pred,
+			      filter_pred_fn_t fn)
+{
+	struct event_filter *filter = call->filter;
+	int idx, err;
+
+	if (filter->n_preds == MAX_FILTER_PRED) {
+		parse_error(ps, FILT_ERR_TOO_MANY_PREDS, 0);
+		return -ENOSPC;
 	}
 
-	pred->field_name = kstrdup(pred->field_name, GFP_KERNEL);
-	if (!pred->field_name)
-		return -ENOMEM;
+	idx = filter->n_preds;
+	filter_clear_pred(filter->preds[idx]);
+	err = filter_set_pred(filter->preds[idx], pred, fn);
+	if (err)
+		return err;
 
-	pred->val = simple_strtoull(val_str, &tmp, 0);
-	if (tmp == val_str) {
-		pred->str_val = kstrdup(val_str, GFP_KERNEL);
-		if (!pred->str_val)
-			return -ENOMEM;
-	} else if (*tmp != '\0')
-		return -EINVAL;
+	filter->n_preds++;
+	call->filter_active = 1;
 
 	return 0;
 }
 
+enum {
+	FILTER_STATIC_STRING = 1,
+	FILTER_DYN_STRING
+};
+
+static int is_string_field(const char *type)
+{
+	if (strstr(type, "__data_loc") && strstr(type, "char"))
+		return FILTER_DYN_STRING;
+
+	if (strchr(type, '[') && strstr(type, "char"))
+		return FILTER_STATIC_STRING;
+
+	return 0;
+}
+
+static int is_legal_op(struct ftrace_event_field *field, int op)
+{
+	if (is_string_field(field->type) && (op != OP_EQ && op != OP_NE))
+		return 0;
+
+	return 1;
+}
+
+static filter_pred_fn_t select_comparison_fn(int op, int field_size,
+					     int field_is_signed)
+{
+	filter_pred_fn_t fn = NULL;
+
+	switch (field_size) {
+	case 8:
+		if (op == OP_EQ || op == OP_NE)
+			fn = filter_pred_64;
+		else if (field_is_signed)
+			fn = filter_pred_s64;
+		else
+			fn = filter_pred_u64;
+		break;
+	case 4:
+		if (op == OP_EQ || op == OP_NE)
+			fn = filter_pred_32;
+		else if (field_is_signed)
+			fn = filter_pred_s32;
+		else
+			fn = filter_pred_u32;
+		break;
+	case 2:
+		if (op == OP_EQ || op == OP_NE)
+			fn = filter_pred_16;
+		else if (field_is_signed)
+			fn = filter_pred_s16;
+		else
+			fn = filter_pred_u16;
+		break;
+	case 1:
+		if (op == OP_EQ || op == OP_NE)
+			fn = filter_pred_8;
+		else if (field_is_signed)
+			fn = filter_pred_s8;
+		else
+			fn = filter_pred_u8;
+		break;
+	}
+
+	return fn;
+}
+
+static int filter_add_pred(struct filter_parse_state *ps,
+			   struct ftrace_event_call *call,
+			   struct filter_pred *pred)
+{
+	struct ftrace_event_field *field;
+	filter_pred_fn_t fn;
+	unsigned long long val;
+	int string_type;
+
+	pred->fn = filter_pred_none;
+
+	if (pred->op == OP_AND) {
+		pred->pop_n = 2;
+		return filter_add_pred_fn(ps, call, pred, filter_pred_and);
+	} else if (pred->op == OP_OR) {
+		pred->pop_n = 2;
+		return filter_add_pred_fn(ps, call, pred, filter_pred_or);
+	}
+
+	field = find_event_field(call, pred->field_name);
+	if (!field) {
+		parse_error(ps, FILT_ERR_FIELD_NOT_FOUND, 0);
+		return -EINVAL;
+	}
+
+	pred->offset = field->offset;
+
+	if (!is_legal_op(field, pred->op)) {
+		parse_error(ps, FILT_ERR_ILLEGAL_FIELD_OP, 0);
+		return -EINVAL;
+	}
+
+	string_type = is_string_field(field->type);
+	if (string_type) {
+		if (string_type == FILTER_STATIC_STRING)
+			fn = filter_pred_string;
+		else
+			fn = filter_pred_strloc;
+		pred->str_len = field->size;
+		if (pred->op == OP_NE)
+			pred->not = 1;
+		return filter_add_pred_fn(ps, call, pred, fn);
+	} else {
+		if (strict_strtoull(pred->str_val, 0, &val)) {
+			parse_error(ps, FILT_ERR_ILLEGAL_INTVAL, 0);
+			return -EINVAL;
+		}
+		pred->val = val;
+	}
+
+	fn = select_comparison_fn(pred->op, field->size, field->is_signed);
+	if (!fn) {
+		parse_error(ps, FILT_ERR_INVALID_OP, 0);
+		return -EINVAL;
+	}
+
+	if (pred->op == OP_NE)
+		pred->not = 1;
+
+	return filter_add_pred_fn(ps, call, pred, fn);
+}
+
+static int filter_add_subsystem_pred(struct filter_parse_state *ps,
+				     struct event_subsystem *system,
+				     struct filter_pred *pred,
+				     char *filter_string)
+{
+	struct event_filter *filter = system->filter;
+	struct ftrace_event_call *call;
+	int err = 0;
+
+	if (!filter->preds) {
+		filter->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred),
+					GFP_KERNEL);
+
+		if (!filter->preds)
+			return -ENOMEM;
+	}
+
+	if (filter->n_preds == MAX_FILTER_PRED) {
+		parse_error(ps, FILT_ERR_TOO_MANY_PREDS, 0);
+		return -ENOSPC;
+	}
+
+	filter->preds[filter->n_preds] = pred;
+	filter->n_preds++;
+
+	mutex_lock(&event_mutex);
+	list_for_each_entry(call, &ftrace_events, list) {
+
+		if (!call->define_fields)
+			continue;
+
+		if (strcmp(call->system, system->name))
+			continue;
+
+		err = filter_add_pred(ps, call, pred);
+		if (err) {
+			mutex_unlock(&event_mutex);
+			filter_free_subsystem_preds(system);
+			parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0);
+			goto out;
+		}
+		replace_filter_string(call->filter, filter_string);
+	}
+	mutex_unlock(&event_mutex);
+out:
+	return err;
+}
+
+static void parse_init(struct filter_parse_state *ps,
+		       struct filter_op *ops,
+		       char *infix_string)
+{
+	memset(ps, '\0', sizeof(*ps));
+
+	ps->infix.string = infix_string;
+	ps->infix.cnt = strlen(infix_string);
+	ps->ops = ops;
+
+	INIT_LIST_HEAD(&ps->opstack);
+	INIT_LIST_HEAD(&ps->postfix);
+}
+
+static char infix_next(struct filter_parse_state *ps)
+{
+	ps->infix.cnt--;
+
+	return ps->infix.string[ps->infix.tail++];
+}
+
+static char infix_peek(struct filter_parse_state *ps)
+{
+	if (ps->infix.tail == strlen(ps->infix.string))
+		return 0;
+
+	return ps->infix.string[ps->infix.tail];
+}
+
+static void infix_advance(struct filter_parse_state *ps)
+{
+	ps->infix.cnt--;
+	ps->infix.tail++;
+}
+
+static inline int is_precedence_lower(struct filter_parse_state *ps,
+				      int a, int b)
+{
+	return ps->ops[a].precedence < ps->ops[b].precedence;
+}
+
+static inline int is_op_char(struct filter_parse_state *ps, char c)
+{
+	int i;
+
+	for (i = 0; strcmp(ps->ops[i].string, "OP_NONE"); i++) {
+		if (ps->ops[i].string[0] == c)
+			return 1;
+	}
+
+	return 0;
+}
+
+static int infix_get_op(struct filter_parse_state *ps, char firstc)
+{
+	char nextc = infix_peek(ps);
+	char opstr[3];
+	int i;
+
+	opstr[0] = firstc;
+	opstr[1] = nextc;
+	opstr[2] = '\0';
+
+	for (i = 0; strcmp(ps->ops[i].string, "OP_NONE"); i++) {
+		if (!strcmp(opstr, ps->ops[i].string)) {
+			infix_advance(ps);
+			return ps->ops[i].id;
+		}
+	}
+
+	opstr[1] = '\0';
+
+	for (i = 0; strcmp(ps->ops[i].string, "OP_NONE"); i++) {
+		if (!strcmp(opstr, ps->ops[i].string))
+			return ps->ops[i].id;
+	}
+
+	return OP_NONE;
+}
+
+static inline void clear_operand_string(struct filter_parse_state *ps)
+{
+	memset(ps->operand.string, '\0', MAX_FILTER_STR_VAL);
+	ps->operand.tail = 0;
+}
+
+static inline int append_operand_char(struct filter_parse_state *ps, char c)
+{
+	if (ps->operand.tail == MAX_FILTER_STR_VAL - 1)
+		return -EINVAL;
+
+	ps->operand.string[ps->operand.tail++] = c;
+
+	return 0;
+}
+
+static int filter_opstack_push(struct filter_parse_state *ps, int op)
+{
+	struct opstack_op *opstack_op;
+
+	opstack_op = kmalloc(sizeof(*opstack_op), GFP_KERNEL);
+	if (!opstack_op)
+		return -ENOMEM;
+
+	opstack_op->op = op;
+	list_add(&opstack_op->list, &ps->opstack);
+
+	return 0;
+}
+
+static int filter_opstack_empty(struct filter_parse_state *ps)
+{
+	return list_empty(&ps->opstack);
+}
+
+static int filter_opstack_top(struct filter_parse_state *ps)
+{
+	struct opstack_op *opstack_op;
+
+	if (filter_opstack_empty(ps))
+		return OP_NONE;
+
+	opstack_op = list_first_entry(&ps->opstack, struct opstack_op, list);
+
+	return opstack_op->op;
+}
+
+static int filter_opstack_pop(struct filter_parse_state *ps)
+{
+	struct opstack_op *opstack_op;
+	int op;
+
+	if (filter_opstack_empty(ps))
+		return OP_NONE;
+
+	opstack_op = list_first_entry(&ps->opstack, struct opstack_op, list);
+	op = opstack_op->op;
+	list_del(&opstack_op->list);
+
+	kfree(opstack_op);
+
+	return op;
+}
+
+static void filter_opstack_clear(struct filter_parse_state *ps)
+{
+	while (!filter_opstack_empty(ps))
+		filter_opstack_pop(ps);
+}
+
+static char *curr_operand(struct filter_parse_state *ps)
+{
+	return ps->operand.string;
+}
+
+static int postfix_append_operand(struct filter_parse_state *ps, char *operand)
+{
+	struct postfix_elt *elt;
+
+	elt = kmalloc(sizeof(*elt), GFP_KERNEL);
+	if (!elt)
+		return -ENOMEM;
+
+	elt->op = OP_NONE;
+	elt->operand = kstrdup(operand, GFP_KERNEL);
+	if (!elt->operand) {
+		kfree(elt);
+		return -ENOMEM;
+	}
+
+	list_add_tail(&elt->list, &ps->postfix);
+
+	return 0;
+}
+
+static int postfix_append_op(struct filter_parse_state *ps, int op)
+{
+	struct postfix_elt *elt;
+
+	elt = kmalloc(sizeof(*elt), GFP_KERNEL);
+	if (!elt)
+		return -ENOMEM;
+
+	elt->op = op;
+	elt->operand = NULL;
+
+	list_add_tail(&elt->list, &ps->postfix);
+
+	return 0;
+}
+
+static void postfix_clear(struct filter_parse_state *ps)
+{
+	struct postfix_elt *elt;
+
+	while (!list_empty(&ps->postfix)) {
+		elt = list_first_entry(&ps->postfix, struct postfix_elt, list);
+		kfree(elt->operand);
+		list_del(&elt->list);
+	}
+}
+
+static int filter_parse(struct filter_parse_state *ps)
+{
+	int in_string = 0;
+	int op, top_op;
+	char ch;
+
+	while ((ch = infix_next(ps))) {
+		if (ch == '"') {
+			in_string ^= 1;
+			continue;
+		}
+
+		if (in_string)
+			goto parse_operand;
+
+		if (isspace(ch))
+			continue;
+
+		if (is_op_char(ps, ch)) {
+			op = infix_get_op(ps, ch);
+			if (op == OP_NONE) {
+				parse_error(ps, FILT_ERR_INVALID_OP, 0);
+				return -EINVAL;
+			}
+
+			if (strlen(curr_operand(ps))) {
+				postfix_append_operand(ps, curr_operand(ps));
+				clear_operand_string(ps);
+			}
+
+			while (!filter_opstack_empty(ps)) {
+				top_op = filter_opstack_top(ps);
+				if (!is_precedence_lower(ps, top_op, op)) {
+					top_op = filter_opstack_pop(ps);
+					postfix_append_op(ps, top_op);
+					continue;
+				}
+				break;
+			}
+
+			filter_opstack_push(ps, op);
+			continue;
+		}
+
+		if (ch == '(') {
+			filter_opstack_push(ps, OP_OPEN_PAREN);
+			continue;
+		}
+
+		if (ch == ')') {
+			if (strlen(curr_operand(ps))) {
+				postfix_append_operand(ps, curr_operand(ps));
+				clear_operand_string(ps);
+			}
+
+			top_op = filter_opstack_pop(ps);
+			while (top_op != OP_NONE) {
+				if (top_op == OP_OPEN_PAREN)
+					break;
+				postfix_append_op(ps, top_op);
+				top_op = filter_opstack_pop(ps);
+			}
+			if (top_op == OP_NONE) {
+				parse_error(ps, FILT_ERR_UNBALANCED_PAREN, 0);
+				return -EINVAL;
+			}
+			continue;
+		}
+parse_operand:
+		if (append_operand_char(ps, ch)) {
+			parse_error(ps, FILT_ERR_OPERAND_TOO_LONG, 0);
+			return -EINVAL;
+		}
+	}
+
+	if (strlen(curr_operand(ps)))
+		postfix_append_operand(ps, curr_operand(ps));
+
+	while (!filter_opstack_empty(ps)) {
+		top_op = filter_opstack_pop(ps);
+		if (top_op == OP_NONE)
+			break;
+		if (top_op == OP_OPEN_PAREN) {
+			parse_error(ps, FILT_ERR_UNBALANCED_PAREN, 0);
+			return -EINVAL;
+		}
+		postfix_append_op(ps, top_op);
+	}
+
+	return 0;
+}
+
+static struct filter_pred *create_pred(int op, char *operand1, char *operand2)
+{
+	struct filter_pred *pred;
+
+	pred = kzalloc(sizeof(*pred), GFP_KERNEL);
+	if (!pred)
+		return NULL;
+
+	pred->field_name = kstrdup(operand1, GFP_KERNEL);
+	if (!pred->field_name) {
+		kfree(pred);
+		return NULL;
+	}
+
+	strcpy(pred->str_val, operand2);
+	pred->str_len = strlen(operand2);
+
+	pred->op = op;
+
+	return pred;
+}
+
+static struct filter_pred *create_logical_pred(int op)
+{
+	struct filter_pred *pred;
+
+	pred = kzalloc(sizeof(*pred), GFP_KERNEL);
+	if (!pred)
+		return NULL;
+
+	pred->op = op;
+
+	return pred;
+}
+
+static int check_preds(struct filter_parse_state *ps)
+{
+	int n_normal_preds = 0, n_logical_preds = 0;
+	struct postfix_elt *elt;
+
+	list_for_each_entry(elt, &ps->postfix, list) {
+		if (elt->op == OP_NONE)
+			continue;
+
+		if (elt->op == OP_AND || elt->op == OP_OR) {
+			n_logical_preds++;
+			continue;
+		}
+		n_normal_preds++;
+	}
+
+	if (!n_normal_preds || n_logical_preds >= n_normal_preds) {
+		parse_error(ps, FILT_ERR_INVALID_FILTER, 0);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int replace_preds(struct event_subsystem *system,
+			 struct ftrace_event_call *call,
+			 struct filter_parse_state *ps,
+			 char *filter_string)
+{
+	char *operand1 = NULL, *operand2 = NULL;
+	struct filter_pred *pred;
+	struct postfix_elt *elt;
+	int err;
+
+	err = check_preds(ps);
+	if (err)
+		return err;
+
+	list_for_each_entry(elt, &ps->postfix, list) {
+		if (elt->op == OP_NONE) {
+			if (!operand1)
+				operand1 = elt->operand;
+			else if (!operand2)
+				operand2 = elt->operand;
+			else {
+				parse_error(ps, FILT_ERR_TOO_MANY_OPERANDS, 0);
+				return -EINVAL;
+			}
+			continue;
+		}
+
+		if (elt->op == OP_AND || elt->op == OP_OR) {
+			pred = create_logical_pred(elt->op);
+			if (call) {
+				err = filter_add_pred(ps, call, pred);
+				filter_free_pred(pred);
+			} else
+				err = filter_add_subsystem_pred(ps, system,
+							pred, filter_string);
+			if (err)
+				return err;
+
+			operand1 = operand2 = NULL;
+			continue;
+		}
+
+		if (!operand1 || !operand2) {
+			parse_error(ps, FILT_ERR_MISSING_FIELD, 0);
+			return -EINVAL;
+		}
+
+		pred = create_pred(elt->op, operand1, operand2);
+		if (call) {
+			err = filter_add_pred(ps, call, pred);
+			filter_free_pred(pred);
+		} else
+			err = filter_add_subsystem_pred(ps, system, pred,
+							filter_string);
+		if (err)
+			return err;
+
+		operand1 = operand2 = NULL;
+	}
+
+	return 0;
+}
+
+int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
+{
+	int err;
+
+	struct filter_parse_state *ps;
+
+	mutex_lock(&filter_mutex);
+
+	if (!strcmp(strstrip(filter_string), "0")) {
+		filter_disable_preds(call);
+		remove_filter_string(call->filter);
+		mutex_unlock(&filter_mutex);
+		return 0;
+	}
+
+	err = -ENOMEM;
+	ps = kzalloc(sizeof(*ps), GFP_KERNEL);
+	if (!ps)
+		goto out_unlock;
+
+	filter_disable_preds(call);
+	replace_filter_string(call->filter, filter_string);
+
+	parse_init(ps, filter_ops, filter_string);
+	err = filter_parse(ps);
+	if (err) {
+		append_filter_err(ps, call->filter);
+		goto out;
+	}
+
+	err = replace_preds(NULL, call, ps, filter_string);
+	if (err)
+		append_filter_err(ps, call->filter);
+
+out:
+	filter_opstack_clear(ps);
+	postfix_clear(ps);
+	kfree(ps);
+out_unlock:
+	mutex_unlock(&filter_mutex);
+
+	return err;
+}
+
+int apply_subsystem_event_filter(struct event_subsystem *system,
+				 char *filter_string)
+{
+	int err;
+
+	struct filter_parse_state *ps;
+
+	mutex_lock(&filter_mutex);
+
+	if (!strcmp(strstrip(filter_string), "0")) {
+		filter_free_subsystem_preds(system);
+		remove_filter_string(system->filter);
+		mutex_unlock(&filter_mutex);
+		return 0;
+	}
+
+	err = -ENOMEM;
+	ps = kzalloc(sizeof(*ps), GFP_KERNEL);
+	if (!ps)
+		goto out_unlock;
+
+	filter_free_subsystem_preds(system);
+	replace_filter_string(system->filter, filter_string);
+
+	parse_init(ps, filter_ops, filter_string);
+	err = filter_parse(ps);
+	if (err) {
+		append_filter_err(ps, system->filter);
+		goto out;
+	}
+
+	err = replace_preds(system, NULL, ps, filter_string);
+	if (err)
+		append_filter_err(ps, system->filter);
+
+out:
+	filter_opstack_clear(ps);
+	postfix_clear(ps);
+	kfree(ps);
+out_unlock:
+	mutex_unlock(&filter_mutex);
+
+	return err;
+}
 
diff --git a/kernel/trace/trace_events_stage_1.h b/kernel/trace/trace_events_stage_1.h
deleted file mode 100644
index 38985f9..0000000
--- a/kernel/trace/trace_events_stage_1.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Stage 1 of the trace events.
- *
- * Override the macros in <trace/trace_event_types.h> to include the following:
- *
- * struct ftrace_raw_<call> {
- *	struct trace_entry		ent;
- *	<type>				<item>;
- *	<type2>				<item2>[<len>];
- *	[...]
- * };
- *
- * The <type> <item> is created by the __field(type, item) macro or
- * the __array(type2, item2, len) macro.
- * We simply do "type item;", and that will create the fields
- * in the structure.
- */
-
-#undef TRACE_FORMAT
-#define TRACE_FORMAT(call, proto, args, fmt)
-
-#undef __array
-#define __array(type, item, len)	type	item[len];
-
-#undef __field
-#define __field(type, item)		type	item;
-
-#undef TP_STRUCT__entry
-#define TP_STRUCT__entry(args...) args
-
-#undef TRACE_EVENT
-#define TRACE_EVENT(name, proto, args, tstruct, assign, print)	\
-	struct ftrace_raw_##name {				\
-		struct trace_entry	ent;			\
-		tstruct						\
-	};							\
-	static struct ftrace_event_call event_##name
-
-#include <trace/trace_event_types.h>
diff --git a/kernel/trace/trace_events_stage_2.h b/kernel/trace/trace_events_stage_2.h
deleted file mode 100644
index d363c66..0000000
--- a/kernel/trace/trace_events_stage_2.h
+++ /dev/null
@@ -1,176 +0,0 @@
-/*
- * Stage 2 of the trace events.
- *
- * Override the macros in <trace/trace_event_types.h> to include the following:
- *
- * enum print_line_t
- * ftrace_raw_output_<call>(struct trace_iterator *iter, int flags)
- * {
- *	struct trace_seq *s = &iter->seq;
- *	struct ftrace_raw_<call> *field; <-- defined in stage 1
- *	struct trace_entry *entry;
- *	int ret;
- *
- *	entry = iter->ent;
- *
- *	if (entry->type != event_<call>.id) {
- *		WARN_ON_ONCE(1);
- *		return TRACE_TYPE_UNHANDLED;
- *	}
- *
- *	field = (typeof(field))entry;
- *
- *	ret = trace_seq_printf(s, <TP_printk> "\n");
- *	if (!ret)
- *		return TRACE_TYPE_PARTIAL_LINE;
- *
- *	return TRACE_TYPE_HANDLED;
- * }
- *
- * This is the method used to print the raw event to the trace
- * output format. Note, this is not needed if the data is read
- * in binary.
- */
-
-#undef __entry
-#define __entry field
-
-#undef TP_printk
-#define TP_printk(fmt, args...) fmt "\n", args
-
-#undef TRACE_EVENT
-#define TRACE_EVENT(call, proto, args, tstruct, assign, print)		\
-enum print_line_t							\
-ftrace_raw_output_##call(struct trace_iterator *iter, int flags)	\
-{									\
-	struct trace_seq *s = &iter->seq;				\
-	struct ftrace_raw_##call *field;				\
-	struct trace_entry *entry;					\
-	int ret;							\
-									\
-	entry = iter->ent;						\
-									\
-	if (entry->type != event_##call.id) {				\
-		WARN_ON_ONCE(1);					\
-		return TRACE_TYPE_UNHANDLED;				\
-	}								\
-									\
-	field = (typeof(field))entry;					\
-									\
-	ret = trace_seq_printf(s, #call ": " print);			\
-	if (!ret)							\
-		return TRACE_TYPE_PARTIAL_LINE;				\
-									\
-	return TRACE_TYPE_HANDLED;					\
-}
-	
-#include <trace/trace_event_types.h>
-
-/*
- * Setup the showing format of trace point.
- *
- * int
- * ftrace_format_##call(struct trace_seq *s)
- * {
- *	struct ftrace_raw_##call field;
- *	int ret;
- *
- *	ret = trace_seq_printf(s, #type " " #item ";"
- *			       " offset:%u; size:%u;\n",
- *			       offsetof(struct ftrace_raw_##call, item),
- *			       sizeof(field.type));
- *
- * }
- */
-
-#undef TP_STRUCT__entry
-#define TP_STRUCT__entry(args...) args
-
-#undef __field
-#define __field(type, item)					\
-	ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t"	\
-			       "offset:%u;\tsize:%u;\n",		\
-			       (unsigned int)offsetof(typeof(field), item), \
-			       (unsigned int)sizeof(field.item));	\
-	if (!ret)							\
-		return 0;
-
-#undef __array
-#define __array(type, item, len)						\
-	ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t"	\
-			       "offset:%u;\tsize:%u;\n",		\
-			       (unsigned int)offsetof(typeof(field), item), \
-			       (unsigned int)sizeof(field.item));	\
-	if (!ret)							\
-		return 0;
-
-#undef __entry
-#define __entry REC
-
-#undef TP_printk
-#define TP_printk(fmt, args...) "%s, %s\n", #fmt, __stringify(args)
-
-#undef TP_fast_assign
-#define TP_fast_assign(args...) args
-
-#undef TRACE_EVENT
-#define TRACE_EVENT(call, proto, args, tstruct, func, print)		\
-static int								\
-ftrace_format_##call(struct trace_seq *s)				\
-{									\
-	struct ftrace_raw_##call field;					\
-	int ret;							\
-									\
-	tstruct;							\
-									\
-	trace_seq_printf(s, "\nprint fmt: " print);			\
-									\
-	return ret;							\
-}
-
-#include <trace/trace_event_types.h>
-
-#undef __field
-#define __field(type, item)						\
-	ret = trace_define_field(event_call, #type, #item,		\
-				 offsetof(typeof(field), item),		\
-				 sizeof(field.item));			\
-	if (ret)							\
-		return ret;
-
-#undef __array
-#define __array(type, item, len)					\
-	ret = trace_define_field(event_call, #type "[" #len "]", #item,	\
-				 offsetof(typeof(field), item),		\
-				 sizeof(field.item));			\
-	if (ret)							\
-		return ret;
-
-#define __common_field(type, item)					\
-	ret = trace_define_field(event_call, #type, "common_" #item,	\
-				 offsetof(typeof(field.ent), item),	\
-				 sizeof(field.ent.item));		\
-	if (ret)							\
-		return ret;
-
-#undef TRACE_EVENT
-#define TRACE_EVENT(call, proto, args, tstruct, func, print)		\
-int									\
-ftrace_define_fields_##call(void)					\
-{									\
-	struct ftrace_raw_##call field;					\
-	struct ftrace_event_call *event_call = &event_##call;		\
-	int ret;							\
-									\
-	__common_field(unsigned char, type);				\
-	__common_field(unsigned char, flags);				\
-	__common_field(unsigned char, preempt_count);			\
-	__common_field(int, pid);					\
-	__common_field(int, tgid);					\
-									\
-	tstruct;							\
-									\
-	return ret;							\
-}
-
-#include <trace/trace_event_types.h>
diff --git a/kernel/trace/trace_events_stage_3.h b/kernel/trace/trace_events_stage_3.h
deleted file mode 100644
index 9d2fa78..0000000
--- a/kernel/trace/trace_events_stage_3.h
+++ /dev/null
@@ -1,281 +0,0 @@
-/*
- * Stage 3 of the trace events.
- *
- * Override the macros in <trace/trace_event_types.h> to include the following:
- *
- * static void ftrace_event_<call>(proto)
- * {
- *	event_trace_printk(_RET_IP_, "<call>: " <fmt>);
- * }
- *
- * static int ftrace_reg_event_<call>(void)
- * {
- *	int ret;
- *
- *	ret = register_trace_<call>(ftrace_event_<call>);
- *	if (!ret)
- *		pr_info("event trace: Could not activate trace point "
- *			"probe to  <call>");
- *	return ret;
- * }
- *
- * static void ftrace_unreg_event_<call>(void)
- * {
- *	unregister_trace_<call>(ftrace_event_<call>);
- * }
- *
- * For those macros defined with TRACE_FORMAT:
- *
- * static struct ftrace_event_call __used
- * __attribute__((__aligned__(4)))
- * __attribute__((section("_ftrace_events"))) event_<call> = {
- *	.name			= "<call>",
- *	.regfunc		= ftrace_reg_event_<call>,
- *	.unregfunc		= ftrace_unreg_event_<call>,
- * }
- *
- *
- * For those macros defined with TRACE_EVENT:
- *
- * static struct ftrace_event_call event_<call>;
- *
- * static void ftrace_raw_event_<call>(proto)
- * {
- *	struct ring_buffer_event *event;
- *	struct ftrace_raw_<call> *entry; <-- defined in stage 1
- *	unsigned long irq_flags;
- *	int pc;
- *
- *	local_save_flags(irq_flags);
- *	pc = preempt_count();
- *
- *	event = trace_current_buffer_lock_reserve(event_<call>.id,
- *				  sizeof(struct ftrace_raw_<call>),
- *				  irq_flags, pc);
- *	if (!event)
- *		return;
- *	entry	= ring_buffer_event_data(event);
- *
- *	<assign>;  <-- Here we assign the entries by the __field and
- *			__array macros.
- *
- *	trace_current_buffer_unlock_commit(event, irq_flags, pc);
- * }
- *
- * static int ftrace_raw_reg_event_<call>(void)
- * {
- *	int ret;
- *
- *	ret = register_trace_<call>(ftrace_raw_event_<call>);
- *	if (!ret)
- *		pr_info("event trace: Could not activate trace point "
- *			"probe to <call>");
- *	return ret;
- * }
- *
- * static void ftrace_unreg_event_<call>(void)
- * {
- *	unregister_trace_<call>(ftrace_raw_event_<call>);
- * }
- *
- * static struct trace_event ftrace_event_type_<call> = {
- *	.trace			= ftrace_raw_output_<call>, <-- stage 2
- * };
- *
- * static int ftrace_raw_init_event_<call>(void)
- * {
- *	int id;
- *
- *	id = register_ftrace_event(&ftrace_event_type_<call>);
- *	if (!id)
- *		return -ENODEV;
- *	event_<call>.id = id;
- *	return 0;
- * }
- *
- * static struct ftrace_event_call __used
- * __attribute__((__aligned__(4)))
- * __attribute__((section("_ftrace_events"))) event_<call> = {
- *	.name			= "<call>",
- *	.system			= "<system>",
- *	.raw_init		= ftrace_raw_init_event_<call>,
- *	.regfunc		= ftrace_reg_event_<call>,
- *	.unregfunc		= ftrace_unreg_event_<call>,
- *	.show_format		= ftrace_format_<call>,
- * }
- *
- */
-
-#undef TP_FMT
-#define TP_FMT(fmt, args...)	fmt "\n", ##args
-
-#ifdef CONFIG_EVENT_PROFILE
-#define _TRACE_PROFILE(call, proto, args)				\
-static void ftrace_profile_##call(proto)				\
-{									\
-	extern void perf_tpcounter_event(int);				\
-	perf_tpcounter_event(event_##call.id);				\
-}									\
-									\
-static int ftrace_profile_enable_##call(struct ftrace_event_call *call) \
-{									\
-	int ret = 0;							\
-									\
-	if (!atomic_inc_return(&call->profile_count))			\
-		ret = register_trace_##call(ftrace_profile_##call);	\
-									\
-	return ret;							\
-}									\
-									\
-static void ftrace_profile_disable_##call(struct ftrace_event_call *call) \
-{									\
-	if (atomic_add_negative(-1, &call->profile_count))		\
-		unregister_trace_##call(ftrace_profile_##call);		\
-}
-
-#define _TRACE_PROFILE_INIT(call)					\
-	.profile_count = ATOMIC_INIT(-1),				\
-	.profile_enable = ftrace_profile_enable_##call,			\
-	.profile_disable = ftrace_profile_disable_##call,
-
-#else
-#define _TRACE_PROFILE(call, proto, args)
-#define _TRACE_PROFILE_INIT(call)
-#endif
-
-#define _TRACE_FORMAT(call, proto, args, fmt)				\
-static void ftrace_event_##call(proto)					\
-{									\
-	event_trace_printk(_RET_IP_, #call ": " fmt);			\
-}									\
-									\
-static int ftrace_reg_event_##call(void)				\
-{									\
-	int ret;							\
-									\
-	ret = register_trace_##call(ftrace_event_##call);		\
-	if (ret)							\
-		pr_info("event trace: Could not activate trace point "	\
-			"probe to " #call "\n");			\
-	return ret;							\
-}									\
-									\
-static void ftrace_unreg_event_##call(void)				\
-{									\
-	unregister_trace_##call(ftrace_event_##call);			\
-}									\
-									\
-static struct ftrace_event_call event_##call;				\
-									\
-static int ftrace_init_event_##call(void)				\
-{									\
-	int id;								\
-									\
-	id = register_ftrace_event(NULL);				\
-	if (!id)							\
-		return -ENODEV;						\
-	event_##call.id = id;						\
-	return 0;							\
-}
-
-#undef TRACE_FORMAT
-#define TRACE_FORMAT(call, proto, args, fmt)				\
-_TRACE_FORMAT(call, PARAMS(proto), PARAMS(args), PARAMS(fmt))		\
-_TRACE_PROFILE(call, PARAMS(proto), PARAMS(args))			\
-static struct ftrace_event_call __used					\
-__attribute__((__aligned__(4)))						\
-__attribute__((section("_ftrace_events"))) event_##call = {		\
-	.name			= #call,				\
-	.system			= __stringify(TRACE_SYSTEM),		\
-	.raw_init		= ftrace_init_event_##call,		\
-	.regfunc		= ftrace_reg_event_##call,		\
-	.unregfunc		= ftrace_unreg_event_##call,		\
-	_TRACE_PROFILE_INIT(call)					\
-}
-
-#undef __entry
-#define __entry entry
-
-#undef TRACE_EVENT
-#define TRACE_EVENT(call, proto, args, tstruct, assign, print)		\
-_TRACE_PROFILE(call, PARAMS(proto), PARAMS(args))			\
-									\
-static struct ftrace_event_call event_##call;				\
-									\
-static void ftrace_raw_event_##call(proto)				\
-{									\
-	struct ftrace_event_call *call = &event_##call;			\
-	struct ring_buffer_event *event;				\
-	struct ftrace_raw_##call *entry;				\
-	unsigned long irq_flags;					\
-	int pc;								\
-									\
-	local_save_flags(irq_flags);					\
-	pc = preempt_count();						\
-									\
-	event = trace_current_buffer_lock_reserve(event_##call.id,	\
-				  sizeof(struct ftrace_raw_##call),	\
-				  irq_flags, pc);			\
-	if (!event)							\
-		return;							\
-	entry	= ring_buffer_event_data(event);			\
-									\
-	assign;								\
-									\
-	if (call->preds && !filter_match_preds(call, entry))		\
-		ring_buffer_event_discard(event);			\
-									\
-	trace_nowake_buffer_unlock_commit(event, irq_flags, pc);	\
-									\
-}									\
-									\
-static int ftrace_raw_reg_event_##call(void)				\
-{									\
-	int ret;							\
-									\
-	ret = register_trace_##call(ftrace_raw_event_##call);		\
-	if (ret)							\
-		pr_info("event trace: Could not activate trace point "	\
-			"probe to " #call "\n");			\
-	return ret;							\
-}									\
-									\
-static void ftrace_raw_unreg_event_##call(void)				\
-{									\
-	unregister_trace_##call(ftrace_raw_event_##call);		\
-}									\
-									\
-static struct trace_event ftrace_event_type_##call = {			\
-	.trace			= ftrace_raw_output_##call,		\
-};									\
-									\
-static int ftrace_raw_init_event_##call(void)				\
-{									\
-	int id;								\
-									\
-	id = register_ftrace_event(&ftrace_event_type_##call);		\
-	if (!id)							\
-		return -ENODEV;						\
-	event_##call.id = id;						\
-	INIT_LIST_HEAD(&event_##call.fields);				\
-	return 0;							\
-}									\
-									\
-static struct ftrace_event_call __used					\
-__attribute__((__aligned__(4)))						\
-__attribute__((section("_ftrace_events"))) event_##call = {		\
-	.name			= #call,				\
-	.system			= __stringify(TRACE_SYSTEM),		\
-	.raw_init		= ftrace_raw_init_event_##call,		\
-	.regfunc		= ftrace_raw_reg_event_##call,		\
-	.unregfunc		= ftrace_raw_unreg_event_##call,	\
-	.show_format		= ftrace_format_##call,			\
-	.define_fields		= ftrace_define_fields_##call,		\
-	_TRACE_PROFILE_INIT(call)					\
-}
-
-#include <trace/trace_event_types.h>
-
-#undef _TRACE_PROFILE
-#undef _TRACE_PROFILE_INIT
-
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index 07a22c3..d06cf89 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -19,8 +19,12 @@
 #undef TRACE_STRUCT
 #define TRACE_STRUCT(args...) args
 
+extern void __bad_type_size(void);
+
 #undef TRACE_FIELD
 #define TRACE_FIELD(type, item, assign)					\
+	if (sizeof(type) != sizeof(field.item))				\
+		__bad_type_size();					\
 	ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t"	\
 			       "offset:%u;\tsize:%u;\n",		\
 			       (unsigned int)offsetof(typeof(field), item), \
@@ -30,7 +34,7 @@
 
 
 #undef TRACE_FIELD_SPECIAL
-#define TRACE_FIELD_SPECIAL(type_item, item, cmd)			\
+#define TRACE_FIELD_SPECIAL(type_item, item, len, cmd)			\
 	ret = trace_seq_printf(s, "\tfield special:" #type_item ";\t"	\
 			       "offset:%u;\tsize:%u;\n",		\
 			       (unsigned int)offsetof(typeof(field), item), \
@@ -46,6 +50,9 @@
 	if (!ret)							\
 		return 0;
 
+#undef TRACE_FIELD_SIGN
+#define TRACE_FIELD_SIGN(type, item, assign, is_signed)	\
+	TRACE_FIELD(type, item, assign)
 
 #undef TP_RAW_FMT
 #define TP_RAW_FMT(args...) args
@@ -65,6 +72,22 @@
 	return ret;							\
 }
 
+#undef TRACE_EVENT_FORMAT_NOFILTER
+#define TRACE_EVENT_FORMAT_NOFILTER(call, proto, args, fmt, tstruct,	\
+				    tpfmt)				\
+static int								\
+ftrace_format_##call(struct trace_seq *s)				\
+{									\
+	struct args field;						\
+	int ret;							\
+									\
+	tstruct;							\
+									\
+	trace_seq_printf(s, "\nprint fmt: \"%s\"\n", tpfmt);		\
+									\
+	return ret;							\
+}
+
 #include "trace_event_types.h"
 
 #undef TRACE_ZERO_CHAR
@@ -78,6 +101,10 @@
 #define TRACE_FIELD(type, item, assign)\
 	entry->item = assign;
 
+#undef TRACE_FIELD_SIGN
+#define TRACE_FIELD_SIGN(type, item, assign, is_signed)	\
+	TRACE_FIELD(type, item, assign)
+
 #undef TP_CMD
 #define TP_CMD(cmd...)	cmd
 
@@ -85,18 +112,95 @@
 #define TRACE_ENTRY	entry
 
 #undef TRACE_FIELD_SPECIAL
-#define TRACE_FIELD_SPECIAL(type_item, item, cmd) \
+#define TRACE_FIELD_SPECIAL(type_item, item, len, cmd)	\
 	cmd;
 
 #undef TRACE_EVENT_FORMAT
 #define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt)	\
+int ftrace_define_fields_##call(void);					\
+static int ftrace_raw_init_event_##call(void);				\
 									\
-static struct ftrace_event_call __used					\
+struct ftrace_event_call __used						\
+__attribute__((__aligned__(4)))						\
+__attribute__((section("_ftrace_events"))) event_##call = {		\
+	.name			= #call,				\
+	.id			= proto,				\
+	.system			= __stringify(TRACE_SYSTEM),		\
+	.raw_init		= ftrace_raw_init_event_##call,		\
+	.show_format		= ftrace_format_##call,			\
+	.define_fields		= ftrace_define_fields_##call,		\
+};									\
+static int ftrace_raw_init_event_##call(void)				\
+{									\
+	INIT_LIST_HEAD(&event_##call.fields);				\
+	init_preds(&event_##call);					\
+	return 0;							\
+}									\
+
+#undef TRACE_EVENT_FORMAT_NOFILTER
+#define TRACE_EVENT_FORMAT_NOFILTER(call, proto, args, fmt, tstruct,	\
+				    tpfmt)				\
+									\
+struct ftrace_event_call __used						\
 __attribute__((__aligned__(4)))						\
 __attribute__((section("_ftrace_events"))) event_##call = {		\
 	.name			= #call,				\
 	.id			= proto,				\
 	.system			= __stringify(TRACE_SYSTEM),		\
 	.show_format		= ftrace_format_##call,			\
+};
+
+#include "trace_event_types.h"
+
+#undef TRACE_FIELD
+#define TRACE_FIELD(type, item, assign)					\
+	ret = trace_define_field(event_call, #type, #item,		\
+				 offsetof(typeof(field), item),		\
+				 sizeof(field.item), is_signed_type(type));	\
+	if (ret)							\
+		return ret;
+
+#undef TRACE_FIELD_SPECIAL
+#define TRACE_FIELD_SPECIAL(type, item, len, cmd)			\
+	ret = trace_define_field(event_call, #type "[" #len "]", #item,	\
+				 offsetof(typeof(field), item),		\
+				 sizeof(field.item), 0);		\
+	if (ret)							\
+		return ret;
+
+#undef TRACE_FIELD_SIGN
+#define TRACE_FIELD_SIGN(type, item, assign, is_signed)			\
+	ret = trace_define_field(event_call, #type, #item,		\
+				 offsetof(typeof(field), item),		\
+				 sizeof(field.item), is_signed);	\
+	if (ret)							\
+		return ret;
+
+#undef TRACE_FIELD_ZERO_CHAR
+#define TRACE_FIELD_ZERO_CHAR(item)
+
+#undef TRACE_EVENT_FORMAT
+#define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt)	\
+int									\
+ftrace_define_fields_##call(void)					\
+{									\
+	struct ftrace_event_call *event_call = &event_##call;		\
+	struct args field;						\
+	int ret;							\
+									\
+	__common_field(unsigned char, type, 0);				\
+	__common_field(unsigned char, flags, 0);			\
+	__common_field(unsigned char, preempt_count, 0);		\
+	__common_field(int, pid, 1);					\
+	__common_field(int, tgid, 1);					\
+									\
+	tstruct;							\
+									\
+	return ret;							\
 }
+
+#undef TRACE_EVENT_FORMAT_NOFILTER
+#define TRACE_EVENT_FORMAT_NOFILTER(call, proto, args, fmt, tstruct,	\
+				    tpfmt)
+
 #include "trace_event_types.h"
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index d28687e..8b59241 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -65,6 +65,12 @@
 	if (!current->ret_stack)
 		return -EBUSY;
 
+	/*
+	 * We must make sure the ret_stack is tested before we read
+	 * anything else.
+	 */
+	smp_rmb();
+
 	/* The return trace stack is full */
 	if (current->curr_ret_stack == FTRACE_RETFUNC_DEPTH - 1) {
 		atomic_inc(&current->trace_overrun);
@@ -78,13 +84,14 @@
 	current->ret_stack[index].ret = ret;
 	current->ret_stack[index].func = func;
 	current->ret_stack[index].calltime = calltime;
+	current->ret_stack[index].subtime = 0;
 	*depth = index;
 
 	return 0;
 }
 
 /* Retrieve a function return address to the trace stack on thread info.*/
-void
+static void
 ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret)
 {
 	int index;
@@ -104,9 +111,6 @@
 	trace->calltime = current->ret_stack[index].calltime;
 	trace->overrun = atomic_read(&current->trace_overrun);
 	trace->depth = index;
-	barrier();
-	current->curr_ret_stack--;
-
 }
 
 /*
@@ -121,6 +125,8 @@
 	ftrace_pop_return_trace(&trace, &ret);
 	trace.rettime = trace_clock_local();
 	ftrace_graph_return(&trace);
+	barrier();
+	current->curr_ret_stack--;
 
 	if (unlikely(!ret)) {
 		ftrace_graph_stop();
@@ -426,8 +432,8 @@
 	return TRACE_TYPE_HANDLED;
 }
 
-static enum print_line_t
-print_graph_duration(unsigned long long duration, struct trace_seq *s)
+enum print_line_t
+trace_print_graph_duration(unsigned long long duration, struct trace_seq *s)
 {
 	unsigned long nsecs_rem = do_div(duration, 1000);
 	/* log10(ULONG_MAX) + '\0' */
@@ -464,12 +470,23 @@
 		if (!ret)
 			return TRACE_TYPE_PARTIAL_LINE;
 	}
+	return TRACE_TYPE_HANDLED;
+}
+
+static enum print_line_t
+print_graph_duration(unsigned long long duration, struct trace_seq *s)
+{
+	int ret;
+
+	ret = trace_print_graph_duration(duration, s);
+	if (ret != TRACE_TYPE_HANDLED)
+		return ret;
 
 	ret = trace_seq_printf(s, "|  ");
 	if (!ret)
 		return TRACE_TYPE_PARTIAL_LINE;
-	return TRACE_TYPE_HANDLED;
 
+	return TRACE_TYPE_HANDLED;
 }
 
 /* Case of a leaf function on its call entry */
diff --git a/kernel/trace/trace_hw_branches.c b/kernel/trace/trace_hw_branches.c
index 7bfdf4c..ca7d7c4 100644
--- a/kernel/trace/trace_hw_branches.c
+++ b/kernel/trace/trace_hw_branches.c
@@ -1,10 +1,9 @@
 /*
- * h/w branch tracer for x86 based on bts
+ * h/w branch tracer for x86 based on BTS
  *
  * Copyright (C) 2008-2009 Intel Corporation.
  * Markus Metzger <markus.t.metzger@gmail.com>, 2008-2009
  */
-#include <linux/spinlock.h>
 #include <linux/kallsyms.h>
 #include <linux/debugfs.h>
 #include <linux/ftrace.h>
@@ -15,110 +14,119 @@
 
 #include <asm/ds.h>
 
-#include "trace.h"
 #include "trace_output.h"
+#include "trace.h"
 
 
-#define SIZEOF_BTS (1 << 13)
+#define BTS_BUFFER_SIZE (1 << 13)
 
-/*
- * The tracer lock protects the below per-cpu tracer array.
- * It needs to be held to:
- * - start tracing on all cpus
- * - stop tracing on all cpus
- * - start tracing on a single hotplug cpu
- * - stop tracing on a single hotplug cpu
- * - read the trace from all cpus
- * - read the trace from a single cpu
- */
-static DEFINE_SPINLOCK(bts_tracer_lock);
 static DEFINE_PER_CPU(struct bts_tracer *, tracer);
-static DEFINE_PER_CPU(unsigned char[SIZEOF_BTS], buffer);
+static DEFINE_PER_CPU(unsigned char[BTS_BUFFER_SIZE], buffer);
 
 #define this_tracer per_cpu(tracer, smp_processor_id())
-#define this_buffer per_cpu(buffer, smp_processor_id())
 
-static int __read_mostly trace_hw_branches_enabled;
+static int trace_hw_branches_enabled __read_mostly;
+static int trace_hw_branches_suspended __read_mostly;
 static struct trace_array *hw_branch_trace __read_mostly;
 
 
-/*
- * Start tracing on the current cpu.
- * The argument is ignored.
- *
- * pre: bts_tracer_lock must be locked.
- */
-static void bts_trace_start_cpu(void *arg)
+static void bts_trace_init_cpu(int cpu)
 {
-	if (this_tracer)
-		ds_release_bts(this_tracer);
+	per_cpu(tracer, cpu) =
+		ds_request_bts_cpu(cpu, per_cpu(buffer, cpu), BTS_BUFFER_SIZE,
+				   NULL, (size_t)-1, BTS_KERNEL);
 
-	this_tracer =
-		ds_request_bts(/* task = */ NULL, this_buffer, SIZEOF_BTS,
-			       /* ovfl = */ NULL, /* th = */ (size_t)-1,
-			       BTS_KERNEL);
-	if (IS_ERR(this_tracer)) {
-		this_tracer = NULL;
-		return;
+	if (IS_ERR(per_cpu(tracer, cpu)))
+		per_cpu(tracer, cpu) = NULL;
+}
+
+static int bts_trace_init(struct trace_array *tr)
+{
+	int cpu;
+
+	hw_branch_trace = tr;
+	trace_hw_branches_enabled = 0;
+
+	get_online_cpus();
+	for_each_online_cpu(cpu) {
+		bts_trace_init_cpu(cpu);
+
+		if (likely(per_cpu(tracer, cpu)))
+			trace_hw_branches_enabled = 1;
 	}
+	trace_hw_branches_suspended = 0;
+	put_online_cpus();
+
+	/* If we could not enable tracing on a single cpu, we fail. */
+	return trace_hw_branches_enabled ? 0 : -EOPNOTSUPP;
+}
+
+static void bts_trace_reset(struct trace_array *tr)
+{
+	int cpu;
+
+	get_online_cpus();
+	for_each_online_cpu(cpu) {
+		if (likely(per_cpu(tracer, cpu))) {
+			ds_release_bts(per_cpu(tracer, cpu));
+			per_cpu(tracer, cpu) = NULL;
+		}
+	}
+	trace_hw_branches_enabled = 0;
+	trace_hw_branches_suspended = 0;
+	put_online_cpus();
 }
 
 static void bts_trace_start(struct trace_array *tr)
 {
-	spin_lock(&bts_tracer_lock);
+	int cpu;
 
-	on_each_cpu(bts_trace_start_cpu, NULL, 1);
-	trace_hw_branches_enabled = 1;
-
-	spin_unlock(&bts_tracer_lock);
-}
-
-/*
- * Stop tracing on the current cpu.
- * The argument is ignored.
- *
- * pre: bts_tracer_lock must be locked.
- */
-static void bts_trace_stop_cpu(void *arg)
-{
-	if (this_tracer) {
-		ds_release_bts(this_tracer);
-		this_tracer = NULL;
-	}
+	get_online_cpus();
+	for_each_online_cpu(cpu)
+		if (likely(per_cpu(tracer, cpu)))
+			ds_resume_bts(per_cpu(tracer, cpu));
+	trace_hw_branches_suspended = 0;
+	put_online_cpus();
 }
 
 static void bts_trace_stop(struct trace_array *tr)
 {
-	spin_lock(&bts_tracer_lock);
+	int cpu;
 
-	trace_hw_branches_enabled = 0;
-	on_each_cpu(bts_trace_stop_cpu, NULL, 1);
-
-	spin_unlock(&bts_tracer_lock);
+	get_online_cpus();
+	for_each_online_cpu(cpu)
+		if (likely(per_cpu(tracer, cpu)))
+			ds_suspend_bts(per_cpu(tracer, cpu));
+	trace_hw_branches_suspended = 1;
+	put_online_cpus();
 }
 
 static int __cpuinit bts_hotcpu_handler(struct notifier_block *nfb,
 				     unsigned long action, void *hcpu)
 {
-	unsigned int cpu = (unsigned long)hcpu;
-
-	spin_lock(&bts_tracer_lock);
-
-	if (!trace_hw_branches_enabled)
-		goto out;
+	int cpu = (long)hcpu;
 
 	switch (action) {
 	case CPU_ONLINE:
 	case CPU_DOWN_FAILED:
-		smp_call_function_single(cpu, bts_trace_start_cpu, NULL, 1);
+		/* The notification is sent with interrupts enabled. */
+		if (trace_hw_branches_enabled) {
+			bts_trace_init_cpu(cpu);
+
+			if (trace_hw_branches_suspended &&
+			    likely(per_cpu(tracer, cpu)))
+				ds_suspend_bts(per_cpu(tracer, cpu));
+		}
 		break;
+
 	case CPU_DOWN_PREPARE:
-		smp_call_function_single(cpu, bts_trace_stop_cpu, NULL, 1);
-		break;
+		/* The notification is sent with interrupts enabled. */
+		if (likely(per_cpu(tracer, cpu))) {
+			ds_release_bts(per_cpu(tracer, cpu));
+			per_cpu(tracer, cpu) = NULL;
+		}
 	}
 
- out:
-	spin_unlock(&bts_tracer_lock);
 	return NOTIFY_DONE;
 }
 
@@ -126,20 +134,6 @@
 	.notifier_call = bts_hotcpu_handler
 };
 
-static int bts_trace_init(struct trace_array *tr)
-{
-	hw_branch_trace = tr;
-
-	bts_trace_start(tr);
-
-	return 0;
-}
-
-static void bts_trace_reset(struct trace_array *tr)
-{
-	bts_trace_stop(tr);
-}
-
 static void bts_trace_print_header(struct seq_file *m)
 {
 	seq_puts(m, "# CPU#        TO  <-  FROM\n");
@@ -147,10 +141,10 @@
 
 static enum print_line_t bts_trace_print_line(struct trace_iterator *iter)
 {
+	unsigned long symflags = TRACE_ITER_SYM_OFFSET;
 	struct trace_entry *entry = iter->ent;
 	struct trace_seq *seq = &iter->seq;
 	struct hw_branch_entry *it;
-	unsigned long symflags = TRACE_ITER_SYM_OFFSET;
 
 	trace_assign_type(it, entry);
 
@@ -168,6 +162,7 @@
 
 void trace_hw_branch(u64 from, u64 to)
 {
+	struct ftrace_event_call *call = &event_hw_branch;
 	struct trace_array *tr = hw_branch_trace;
 	struct ring_buffer_event *event;
 	struct hw_branch_entry *entry;
@@ -194,7 +189,8 @@
 	entry->ent.type = TRACE_HW_BRANCHES;
 	entry->from = from;
 	entry->to   = to;
-	trace_buffer_unlock_commit(tr, event, 0, 0);
+	if (!filter_check_discard(call, entry, tr->buffer, event))
+		trace_buffer_unlock_commit(tr, event, 0, 0);
 
  out:
 	atomic_dec(&tr->data[cpu]->disabled);
@@ -224,11 +220,11 @@
 /*
  * Collect the trace on the current cpu and write it into the ftrace buffer.
  *
- * pre: bts_tracer_lock must be locked
+ * pre: tracing must be suspended on the current cpu
  */
 static void trace_bts_cpu(void *arg)
 {
-	struct trace_array *tr = (struct trace_array *) arg;
+	struct trace_array *tr = (struct trace_array *)arg;
 	const struct bts_trace *trace;
 	unsigned char *at;
 
@@ -241,10 +237,9 @@
 	if (unlikely(!this_tracer))
 		return;
 
-	ds_suspend_bts(this_tracer);
 	trace = ds_read_bts(this_tracer);
 	if (!trace)
-		goto out;
+		return;
 
 	for (at = trace->ds.top; (void *)at < trace->ds.end;
 	     at += trace->ds.size)
@@ -253,18 +248,27 @@
 	for (at = trace->ds.begin; (void *)at < trace->ds.top;
 	     at += trace->ds.size)
 		trace_bts_at(trace, at);
-
-out:
-	ds_resume_bts(this_tracer);
 }
 
 static void trace_bts_prepare(struct trace_iterator *iter)
 {
-	spin_lock(&bts_tracer_lock);
+	int cpu;
 
+	get_online_cpus();
+	for_each_online_cpu(cpu)
+		if (likely(per_cpu(tracer, cpu)))
+			ds_suspend_bts(per_cpu(tracer, cpu));
+	/*
+	 * We need to collect the trace on the respective cpu since ftrace
+	 * implicitly adds the record for the current cpu.
+	 * Once that is more flexible, we could collect the data from any cpu.
+	 */
 	on_each_cpu(trace_bts_cpu, iter->tr, 1);
 
-	spin_unlock(&bts_tracer_lock);
+	for_each_online_cpu(cpu)
+		if (likely(per_cpu(tracer, cpu)))
+			ds_resume_bts(per_cpu(tracer, cpu));
+	put_online_cpus();
 }
 
 static void trace_bts_close(struct trace_iterator *iter)
@@ -274,11 +278,11 @@
 
 void trace_hw_branch_oops(void)
 {
-	spin_lock(&bts_tracer_lock);
-
-	trace_bts_cpu(hw_branch_trace);
-
-	spin_unlock(&bts_tracer_lock);
+	if (this_tracer) {
+		ds_suspend_bts_noirq(this_tracer);
+		trace_bts_cpu(hw_branch_trace);
+		ds_resume_bts_noirq(this_tracer);
+	}
 }
 
 struct tracer bts_tracer __read_mostly =
@@ -291,7 +295,10 @@
 	.start		= bts_trace_start,
 	.stop		= bts_trace_stop,
 	.open		= trace_bts_prepare,
-	.close		= trace_bts_close
+	.close		= trace_bts_close,
+#ifdef CONFIG_FTRACE_SELFTEST
+	.selftest	= trace_selftest_startup_hw_branches,
+#endif /* CONFIG_FTRACE_SELFTEST */
 };
 
 __init static int init_bts_trace(void)
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
index 8e37fcd..d53b45e 100644
--- a/kernel/trace/trace_mmiotrace.c
+++ b/kernel/trace/trace_mmiotrace.c
@@ -9,6 +9,8 @@
 #include <linux/kernel.h>
 #include <linux/mmiotrace.h>
 #include <linux/pci.h>
+#include <linux/time.h>
+
 #include <asm/atomic.h>
 
 #include "trace.h"
@@ -174,7 +176,7 @@
 	struct mmiotrace_rw *rw;
 	struct trace_seq *s	= &iter->seq;
 	unsigned long long t	= ns2usecs(iter->ts);
-	unsigned long usec_rem	= do_div(t, 1000000ULL);
+	unsigned long usec_rem	= do_div(t, USEC_PER_SEC);
 	unsigned secs		= (unsigned long)t;
 	int ret = 1;
 
@@ -221,7 +223,7 @@
 	struct mmiotrace_map *m;
 	struct trace_seq *s	= &iter->seq;
 	unsigned long long t	= ns2usecs(iter->ts);
-	unsigned long usec_rem	= do_div(t, 1000000ULL);
+	unsigned long usec_rem	= do_div(t, USEC_PER_SEC);
 	unsigned secs		= (unsigned long)t;
 	int ret;
 
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 64b54a5..7938f3a 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -14,11 +14,25 @@
 /* must be a power of 2 */
 #define EVENT_HASHSIZE	128
 
-static DEFINE_MUTEX(trace_event_mutex);
+DECLARE_RWSEM(trace_event_mutex);
+
+DEFINE_PER_CPU(struct trace_seq, ftrace_event_seq);
+EXPORT_PER_CPU_SYMBOL(ftrace_event_seq);
+
 static struct hlist_head event_hash[EVENT_HASHSIZE] __read_mostly;
 
 static int next_event_type = __TRACE_LAST_TYPE + 1;
 
+void trace_print_seq(struct seq_file *m, struct trace_seq *s)
+{
+	int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len;
+
+	s->buffer[len] = 0;
+	seq_puts(m, s->buffer);
+
+	trace_seq_init(s);
+}
+
 enum print_line_t trace_print_bprintk_msg_only(struct trace_iterator *iter)
 {
 	struct trace_seq *s = &iter->seq;
@@ -84,6 +98,39 @@
 
 	return len;
 }
+EXPORT_SYMBOL_GPL(trace_seq_printf);
+
+/**
+ * trace_seq_vprintf - sequence printing of trace information
+ * @s: trace sequence descriptor
+ * @fmt: printf format string
+ *
+ * The tracer may use either sequence operations or its own
+ * copy to user routines. To simplify formating of a trace
+ * trace_seq_printf is used to store strings into a special
+ * buffer (@s). Then the output may be either used by
+ * the sequencer or pulled into another buffer.
+ */
+int
+trace_seq_vprintf(struct trace_seq *s, const char *fmt, va_list args)
+{
+	int len = (PAGE_SIZE - 1) - s->len;
+	int ret;
+
+	if (!len)
+		return 0;
+
+	ret = vsnprintf(s->buffer + s->len, len, fmt, args);
+
+	/* If we can't write it all, don't bother writing anything */
+	if (ret >= len)
+		return 0;
+
+	s->len += ret;
+
+	return len;
+}
+EXPORT_SYMBOL_GPL(trace_seq_vprintf);
 
 int trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary)
 {
@@ -201,6 +248,67 @@
 	return 0;
 }
 
+const char *
+ftrace_print_flags_seq(struct trace_seq *p, const char *delim,
+		       unsigned long flags,
+		       const struct trace_print_flags *flag_array)
+{
+	unsigned long mask;
+	const char *str;
+	const char *ret = p->buffer + p->len;
+	int i;
+
+	for (i = 0;  flag_array[i].name && flags; i++) {
+
+		mask = flag_array[i].mask;
+		if ((flags & mask) != mask)
+			continue;
+
+		str = flag_array[i].name;
+		flags &= ~mask;
+		if (p->len && delim)
+			trace_seq_puts(p, delim);
+		trace_seq_puts(p, str);
+	}
+
+	/* check for left over flags */
+	if (flags) {
+		if (p->len && delim)
+			trace_seq_puts(p, delim);
+		trace_seq_printf(p, "0x%lx", flags);
+	}
+
+	trace_seq_putc(p, 0);
+
+	return ret;
+}
+EXPORT_SYMBOL(ftrace_print_flags_seq);
+
+const char *
+ftrace_print_symbols_seq(struct trace_seq *p, unsigned long val,
+			 const struct trace_print_flags *symbol_array)
+{
+	int i;
+	const char *ret = p->buffer + p->len;
+
+	for (i = 0;  symbol_array[i].name; i++) {
+
+		if (val != symbol_array[i].mask)
+			continue;
+
+		trace_seq_puts(p, symbol_array[i].name);
+		break;
+	}
+
+	if (!p->len)
+		trace_seq_printf(p, "0x%lx", val);
+		
+	trace_seq_putc(p, 0);
+
+	return ret;
+}
+EXPORT_SYMBOL(ftrace_print_symbols_seq);
+
 #ifdef CONFIG_KRETPROBES
 static inline const char *kretprobed(const char *name)
 {
@@ -311,17 +419,20 @@
 
 		if (ip == ULONG_MAX || !ret)
 			break;
-		if (i && ret)
-			ret = trace_seq_puts(s, " <- ");
+		if (ret)
+			ret = trace_seq_puts(s, " => ");
 		if (!ip) {
 			if (ret)
 				ret = trace_seq_puts(s, "??");
+			if (ret)
+				ret = trace_seq_puts(s, "\n");
 			continue;
 		}
 		if (!ret)
 			break;
 		if (ret)
 			ret = seq_print_user_ip(s, mm, ip, sym_flags);
+		ret = trace_seq_puts(s, "\n");
 	}
 
 	if (mm)
@@ -455,6 +566,7 @@
  * @type: the type of event to look for
  *
  * Returns an event of type @type otherwise NULL
+ * Called with trace_event_read_lock() held.
  */
 struct trace_event *ftrace_find_event(int type)
 {
@@ -464,7 +576,7 @@
 
 	key = type & (EVENT_HASHSIZE - 1);
 
-	hlist_for_each_entry_rcu(event, n, &event_hash[key], node) {
+	hlist_for_each_entry(event, n, &event_hash[key], node) {
 		if (event->type == type)
 			return event;
 	}
@@ -472,6 +584,46 @@
 	return NULL;
 }
 
+static LIST_HEAD(ftrace_event_list);
+
+static int trace_search_list(struct list_head **list)
+{
+	struct trace_event *e;
+	int last = __TRACE_LAST_TYPE;
+
+	if (list_empty(&ftrace_event_list)) {
+		*list = &ftrace_event_list;
+		return last + 1;
+	}
+
+	/*
+	 * We used up all possible max events,
+	 * lets see if somebody freed one.
+	 */
+	list_for_each_entry(e, &ftrace_event_list, list) {
+		if (e->type != last + 1)
+			break;
+		last++;
+	}
+
+	/* Did we used up all 65 thousand events??? */
+	if ((last + 1) > FTRACE_MAX_EVENT)
+		return 0;
+
+	*list = &e->list;
+	return last + 1;
+}
+
+void trace_event_read_lock(void)
+{
+	down_read(&trace_event_mutex);
+}
+
+void trace_event_read_unlock(void)
+{
+	up_read(&trace_event_mutex);
+}
+
 /**
  * register_ftrace_event - register output for an event type
  * @event: the event type to register
@@ -492,22 +644,42 @@
 	unsigned key;
 	int ret = 0;
 
-	mutex_lock(&trace_event_mutex);
+	down_write(&trace_event_mutex);
 
-	if (!event) {
-		ret = next_event_type++;
+	if (WARN_ON(!event))
 		goto out;
-	}
 
-	if (!event->type)
-		event->type = next_event_type++;
-	else if (event->type > __TRACE_LAST_TYPE) {
+	INIT_LIST_HEAD(&event->list);
+
+	if (!event->type) {
+		struct list_head *list = NULL;
+
+		if (next_event_type > FTRACE_MAX_EVENT) {
+
+			event->type = trace_search_list(&list);
+			if (!event->type)
+				goto out;
+
+		} else {
+			
+			event->type = next_event_type++;
+			list = &ftrace_event_list;
+		}
+
+		if (WARN_ON(ftrace_find_event(event->type)))
+			goto out;
+
+		list_add_tail(&event->list, list);
+
+	} else if (event->type > __TRACE_LAST_TYPE) {
 		printk(KERN_WARNING "Need to add type to trace.h\n");
 		WARN_ON(1);
-	}
-
-	if (ftrace_find_event(event->type))
 		goto out;
+	} else {
+		/* Is this event already used */
+		if (ftrace_find_event(event->type))
+			goto out;
+	}
 
 	if (event->trace == NULL)
 		event->trace = trace_nop_print;
@@ -520,14 +692,25 @@
 
 	key = event->type & (EVENT_HASHSIZE - 1);
 
-	hlist_add_head_rcu(&event->node, &event_hash[key]);
+	hlist_add_head(&event->node, &event_hash[key]);
 
 	ret = event->type;
  out:
-	mutex_unlock(&trace_event_mutex);
+	up_write(&trace_event_mutex);
 
 	return ret;
 }
+EXPORT_SYMBOL_GPL(register_ftrace_event);
+
+/*
+ * Used by module code with the trace_event_mutex held for write.
+ */
+int __unregister_ftrace_event(struct trace_event *event)
+{
+	hlist_del(&event->node);
+	list_del(&event->list);
+	return 0;
+}
 
 /**
  * unregister_ftrace_event - remove a no longer used event
@@ -535,12 +718,13 @@
  */
 int unregister_ftrace_event(struct trace_event *event)
 {
-	mutex_lock(&trace_event_mutex);
-	hlist_del(&event->node);
-	mutex_unlock(&trace_event_mutex);
+	down_write(&trace_event_mutex);
+	__unregister_ftrace_event(event);
+	up_write(&trace_event_mutex);
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(unregister_ftrace_event);
 
 /*
  * Standard events
@@ -833,14 +1017,16 @@
 
 	trace_assign_type(field, iter->ent);
 
+	if (!trace_seq_puts(s, "<stack trace>\n"))
+		goto partial;
 	for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
-		if (i) {
-			if (!trace_seq_puts(s, " <= "))
-				goto partial;
+		if (!field->caller[i] || (field->caller[i] == ULONG_MAX))
+			break;
+		if (!trace_seq_puts(s, " => "))
+			goto partial;
 
-			if (!seq_print_ip_sym(s, field->caller[i], flags))
-				goto partial;
-		}
+		if (!seq_print_ip_sym(s, field->caller[i], flags))
+			goto partial;
 		if (!trace_seq_puts(s, "\n"))
 			goto partial;
 	}
@@ -868,10 +1054,10 @@
 
 	trace_assign_type(field, iter->ent);
 
-	if (!seq_print_userip_objs(field, s, flags))
+	if (!trace_seq_puts(s, "<user stack trace>\n"))
 		goto partial;
 
-	if (!trace_seq_putc(s, '\n'))
+	if (!seq_print_userip_objs(field, s, flags))
 		goto partial;
 
 	return TRACE_TYPE_HANDLED;
diff --git a/kernel/trace/trace_output.h b/kernel/trace/trace_output.h
index e0bde39..d38bec4 100644
--- a/kernel/trace/trace_output.h
+++ b/kernel/trace/trace_output.h
@@ -1,41 +1,17 @@
 #ifndef __TRACE_EVENTS_H
 #define __TRACE_EVENTS_H
 
+#include <linux/trace_seq.h>
 #include "trace.h"
 
-typedef enum print_line_t (*trace_print_func)(struct trace_iterator *iter,
-					      int flags);
-
-struct trace_event {
-	struct hlist_node	node;
-	int			type;
-	trace_print_func	trace;
-	trace_print_func	raw;
-	trace_print_func	hex;
-	trace_print_func	binary;
-};
-
 extern enum print_line_t
 trace_print_bprintk_msg_only(struct trace_iterator *iter);
 extern enum print_line_t
 trace_print_printk_msg_only(struct trace_iterator *iter);
 
-extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
-	__attribute__ ((format (printf, 2, 3)));
-extern int
-trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary);
 extern int
 seq_print_ip_sym(struct trace_seq *s, unsigned long ip,
 		unsigned long sym_flags);
-extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf,
-				 size_t cnt);
-extern int trace_seq_puts(struct trace_seq *s, const char *str);
-extern int trace_seq_putc(struct trace_seq *s, unsigned char c);
-extern int trace_seq_putmem(struct trace_seq *s, const void *mem, size_t len);
-extern int trace_seq_putmem_hex(struct trace_seq *s, const void *mem,
-				size_t len);
-extern void *trace_seq_reserve(struct trace_seq *s, size_t len);
-extern int trace_seq_path(struct trace_seq *s, struct path *path);
 extern int seq_print_userip_objs(const struct userstack_entry *entry,
 				 struct trace_seq *s, unsigned long sym_flags);
 extern int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm,
@@ -44,13 +20,17 @@
 extern int trace_print_context(struct trace_iterator *iter);
 extern int trace_print_lat_context(struct trace_iterator *iter);
 
+extern void trace_event_read_lock(void);
+extern void trace_event_read_unlock(void);
 extern struct trace_event *ftrace_find_event(int type);
-extern int register_ftrace_event(struct trace_event *event);
-extern int unregister_ftrace_event(struct trace_event *event);
 
 extern enum print_line_t trace_nop_print(struct trace_iterator *iter,
 					 int flags);
 
+/* used by module unregistering */
+extern int __unregister_ftrace_event(struct trace_event *event);
+extern struct rw_semaphore trace_event_mutex;
+
 #define MAX_MEMHEX_BYTES	8
 #define HEX_CHARS		(MAX_MEMHEX_BYTES*2 + 1)
 
diff --git a/kernel/trace/trace_power.c b/kernel/trace/trace_power.c
index 1184397..8a30d98 100644
--- a/kernel/trace/trace_power.c
+++ b/kernel/trace/trace_power.c
@@ -36,6 +36,7 @@
 
 static void probe_power_end(struct power_trace *it)
 {
+	struct ftrace_event_call *call = &event_power;
 	struct ring_buffer_event *event;
 	struct trace_power *entry;
 	struct trace_array_cpu *data;
@@ -54,7 +55,8 @@
 		goto out;
 	entry	= ring_buffer_event_data(event);
 	entry->state_data = *it;
-	trace_buffer_unlock_commit(tr, event, 0, 0);
+	if (!filter_check_discard(call, entry, tr->buffer, event))
+		trace_buffer_unlock_commit(tr, event, 0, 0);
  out:
 	preempt_enable();
 }
@@ -62,6 +64,7 @@
 static void probe_power_mark(struct power_trace *it, unsigned int type,
 				unsigned int level)
 {
+	struct ftrace_event_call *call = &event_power;
 	struct ring_buffer_event *event;
 	struct trace_power *entry;
 	struct trace_array_cpu *data;
@@ -84,7 +87,8 @@
 		goto out;
 	entry	= ring_buffer_event_data(event);
 	entry->state_data = *it;
-	trace_buffer_unlock_commit(tr, event, 0, 0);
+	if (!filter_check_discard(call, entry, tr->buffer, event))
+		trace_buffer_unlock_commit(tr, event, 0, 0);
  out:
 	preempt_enable();
 }
diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c
index eb81556..9bece96 100644
--- a/kernel/trace/trace_printk.c
+++ b/kernel/trace/trace_printk.c
@@ -245,17 +245,13 @@
 static __init int init_trace_printk_function_export(void)
 {
 	struct dentry *d_tracer;
-	struct dentry *entry;
 
 	d_tracer = tracing_init_dentry();
 	if (!d_tracer)
 		return 0;
 
-	entry = debugfs_create_file("printk_formats", 0444, d_tracer,
+	trace_create_file("printk_formats", 0444, d_tracer,
 				    NULL, &ftrace_formats_fops);
-	if (!entry)
-		pr_warning("Could not create debugfs "
-			   "'printk_formats' entry\n");
 
 	return 0;
 }
diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
index 9117cea..a98106d 100644
--- a/kernel/trace/trace_sched_switch.c
+++ b/kernel/trace/trace_sched_switch.c
@@ -10,7 +10,7 @@
 #include <linux/kallsyms.h>
 #include <linux/uaccess.h>
 #include <linux/ftrace.h>
-#include <trace/sched.h>
+#include <trace/events/sched.h>
 
 #include "trace.h"
 
@@ -29,13 +29,13 @@
 	int cpu;
 	int pc;
 
-	if (!sched_ref || sched_stopped)
+	if (unlikely(!sched_ref))
 		return;
 
 	tracing_record_cmdline(prev);
 	tracing_record_cmdline(next);
 
-	if (!tracer_enabled)
+	if (!tracer_enabled || sched_stopped)
 		return;
 
 	pc = preempt_count();
@@ -56,15 +56,15 @@
 	unsigned long flags;
 	int cpu, pc;
 
-	if (!likely(tracer_enabled))
+	if (unlikely(!sched_ref))
+		return;
+
+	tracing_record_cmdline(current);
+
+	if (!tracer_enabled || sched_stopped)
 		return;
 
 	pc = preempt_count();
-	tracing_record_cmdline(current);
-
-	if (sched_stopped)
-		return;
-
 	local_irq_save(flags);
 	cpu = raw_smp_processor_id();
 	data = ctx_trace->data[cpu];
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 5bc00e8f..eacb272 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -15,7 +15,7 @@
 #include <linux/kallsyms.h>
 #include <linux/uaccess.h>
 #include <linux/ftrace.h>
-#include <trace/sched.h>
+#include <trace/events/sched.h>
 
 #include "trace.h"
 
@@ -138,9 +138,6 @@
 
 	pc = preempt_count();
 
-	/* The task we are waiting for is waking up */
-	data = wakeup_trace->data[wakeup_cpu];
-
 	/* disable local data, not wakeup_cpu data */
 	cpu = raw_smp_processor_id();
 	disabled = atomic_inc_return(&wakeup_trace->data[cpu]->disabled);
@@ -154,6 +151,9 @@
 	if (unlikely(!tracer_enabled || next != wakeup_task))
 		goto out_unlock;
 
+	/* The task we are waiting for is waking up */
+	data = wakeup_trace->data[wakeup_cpu];
+
 	trace_function(wakeup_trace, CALLER_ADDR0, CALLER_ADDR1, flags, pc);
 	tracing_sched_switch_trace(wakeup_trace, prev, next, flags, pc);
 
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 08f4eb2..00dd648 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -16,6 +16,7 @@
 	case TRACE_BRANCH:
 	case TRACE_GRAPH_ENT:
 	case TRACE_GRAPH_RET:
+	case TRACE_HW_BRANCHES:
 		return 1;
 	}
 	return 0;
@@ -188,6 +189,7 @@
 #else
 # define trace_selftest_startup_dynamic_tracing(trace, tr, func) ({ 0; })
 #endif /* CONFIG_DYNAMIC_FTRACE */
+
 /*
  * Simple verification test of ftrace function tracer.
  * Enable ftrace, sleep 1/10 second, and then read the trace
@@ -749,3 +751,59 @@
 	return ret;
 }
 #endif /* CONFIG_BRANCH_TRACER */
+
+#ifdef CONFIG_HW_BRANCH_TRACER
+int
+trace_selftest_startup_hw_branches(struct tracer *trace,
+				   struct trace_array *tr)
+{
+	struct trace_iterator *iter;
+	struct tracer tracer;
+	unsigned long count;
+	int ret;
+
+	if (!trace->open) {
+		printk(KERN_CONT "missing open function...");
+		return -1;
+	}
+
+	ret = tracer_init(trace, tr);
+	if (ret) {
+		warn_failed_init_tracer(trace, ret);
+		return ret;
+	}
+
+	/*
+	 * The hw-branch tracer needs to collect the trace from the various
+	 * cpu trace buffers - before tracing is stopped.
+	 */
+	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
+	if (!iter)
+		return -ENOMEM;
+
+	memcpy(&tracer, trace, sizeof(tracer));
+
+	iter->trace = &tracer;
+	iter->tr = tr;
+	iter->pos = -1;
+	mutex_init(&iter->mutex);
+
+	trace->open(iter);
+
+	mutex_destroy(&iter->mutex);
+	kfree(iter);
+
+	tracing_stop();
+
+	ret = trace_test_buffer(tr, &count);
+	trace->reset(tr);
+	tracing_start();
+
+	if (!ret && !count) {
+		printk(KERN_CONT "no entries found..");
+		ret = -1;
+	}
+
+	return ret;
+}
+#endif /* CONFIG_HW_BRANCH_TRACER */
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index c750f65..2d7aebd 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -265,7 +265,7 @@
 		seq_printf(m, "        Depth    Size   Location"
 			   "    (%d entries)\n"
 			   "        -----    ----   --------\n",
-			   max_stack_trace.nr_entries);
+			   max_stack_trace.nr_entries - 1);
 
 		if (!stack_tracer_enabled && !max_stack_size)
 			print_disabled(m);
@@ -352,19 +352,14 @@
 static __init int stack_trace_init(void)
 {
 	struct dentry *d_tracer;
-	struct dentry *entry;
 
 	d_tracer = tracing_init_dentry();
 
-	entry = debugfs_create_file("stack_max_size", 0644, d_tracer,
-				    &max_stack_size, &stack_max_size_fops);
-	if (!entry)
-		pr_warning("Could not create debugfs 'stack_max_size' entry\n");
+	trace_create_file("stack_max_size", 0644, d_tracer,
+			&max_stack_size, &stack_max_size_fops);
 
-	entry = debugfs_create_file("stack_trace", 0444, d_tracer,
-				    NULL, &stack_trace_fops);
-	if (!entry)
-		pr_warning("Could not create debugfs 'stack_trace' entry\n");
+	trace_create_file("stack_trace", 0444, d_tracer,
+			NULL, &stack_trace_fops);
 
 	if (stack_tracer_enabled)
 		register_ftrace_function(&trace_ops);
diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c
index acdebd7..c006437 100644
--- a/kernel/trace/trace_stat.c
+++ b/kernel/trace/trace_stat.c
@@ -1,7 +1,7 @@
 /*
  * Infrastructure for statistic tracing (histogram output).
  *
- * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com>
+ * Copyright (C) 2008-2009 Frederic Weisbecker <fweisbec@gmail.com>
  *
  * Based on the code from trace_branch.c which is
  * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com>
@@ -10,22 +10,27 @@
 
 
 #include <linux/list.h>
+#include <linux/rbtree.h>
 #include <linux/debugfs.h>
 #include "trace_stat.h"
 #include "trace.h"
 
 
-/* List of stat entries from a tracer */
-struct trace_stat_list {
-	struct list_head	list;
+/*
+ * List of stat red-black nodes from a tracer
+ * We use a such tree to sort quickly the stat
+ * entries from the tracer.
+ */
+struct stat_node {
+	struct rb_node		node;
 	void			*stat;
 };
 
 /* A stat session is the stats output in one file */
-struct tracer_stat_session {
+struct stat_session {
 	struct list_head	session_list;
 	struct tracer_stat	*ts;
-	struct list_head	stat_list;
+	struct rb_root		stat_root;
 	struct mutex		stat_mutex;
 	struct dentry		*file;
 };
@@ -37,18 +42,48 @@
 /* The root directory for all stat files */
 static struct dentry		*stat_dir;
 
-
-static void reset_stat_session(struct tracer_stat_session *session)
+/*
+ * Iterate through the rbtree using a post order traversal path
+ * to release the next node.
+ * It won't necessary release one at each iteration
+ * but it will at least advance closer to the next one
+ * to be released.
+ */
+static struct rb_node *release_next(struct rb_node *node)
 {
-	struct trace_stat_list *node, *next;
+	struct stat_node *snode;
+	struct rb_node *parent = rb_parent(node);
 
-	list_for_each_entry_safe(node, next, &session->stat_list, list)
-		kfree(node);
+	if (node->rb_left)
+		return node->rb_left;
+	else if (node->rb_right)
+		return node->rb_right;
+	else {
+		if (!parent)
+			;
+		else if (parent->rb_left == node)
+			parent->rb_left = NULL;
+		else
+			parent->rb_right = NULL;
 
-	INIT_LIST_HEAD(&session->stat_list);
+		snode = container_of(node, struct stat_node, node);
+		kfree(snode);
+
+		return parent;
+	}
 }
 
-static void destroy_session(struct tracer_stat_session *session)
+static void reset_stat_session(struct stat_session *session)
+{
+	struct rb_node *node = session->stat_root.rb_node;
+
+	while (node)
+		node = release_next(node);
+
+	session->stat_root = RB_ROOT;
+}
+
+static void destroy_session(struct stat_session *session)
 {
 	debugfs_remove(session->file);
 	reset_stat_session(session);
@@ -56,25 +91,60 @@
 	kfree(session);
 }
 
-/*
- * For tracers that don't provide a stat_cmp callback.
- * This one will force an immediate insertion on tail of
- * the list.
- */
-static int dummy_cmp(void *p1, void *p2)
+typedef int (*cmp_stat_t)(void *, void *);
+
+static int insert_stat(struct rb_root *root, void *stat, cmp_stat_t cmp)
 {
-	return 1;
+	struct rb_node **new = &(root->rb_node), *parent = NULL;
+	struct stat_node *data;
+
+	data = kzalloc(sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+	data->stat = stat;
+
+	/*
+	 * Figure out where to put new node
+	 * This is a descendent sorting
+	 */
+	while (*new) {
+		struct stat_node *this;
+		int result;
+
+		this = container_of(*new, struct stat_node, node);
+		result = cmp(data->stat, this->stat);
+
+		parent = *new;
+		if (result >= 0)
+			new = &((*new)->rb_left);
+		else
+			new = &((*new)->rb_right);
+	}
+
+	rb_link_node(&data->node, parent, new);
+	rb_insert_color(&data->node, root);
+	return 0;
 }
 
 /*
- * Initialize the stat list at each trace_stat file opening.
+ * For tracers that don't provide a stat_cmp callback.
+ * This one will force an insertion as right-most node
+ * in the rbtree.
+ */
+static int dummy_cmp(void *p1, void *p2)
+{
+	return -1;
+}
+
+/*
+ * Initialize the stat rbtree at each trace_stat file opening.
  * All of these copies and sorting are required on all opening
  * since the stats could have changed between two file sessions.
  */
-static int stat_seq_init(struct tracer_stat_session *session)
+static int stat_seq_init(struct stat_session *session)
 {
-	struct trace_stat_list *iter_entry, *new_entry;
 	struct tracer_stat *ts = session->ts;
+	struct rb_root *root = &session->stat_root;
 	void *stat;
 	int ret = 0;
 	int i;
@@ -85,29 +155,16 @@
 	if (!ts->stat_cmp)
 		ts->stat_cmp = dummy_cmp;
 
-	stat = ts->stat_start();
+	stat = ts->stat_start(ts);
 	if (!stat)
 		goto exit;
 
-	/*
-	 * The first entry. Actually this is the second, but the first
-	 * one (the stat_list head) is pointless.
-	 */
-	new_entry = kmalloc(sizeof(struct trace_stat_list), GFP_KERNEL);
-	if (!new_entry) {
-		ret = -ENOMEM;
+	ret = insert_stat(root, stat, ts->stat_cmp);
+	if (ret)
 		goto exit;
-	}
-
-	INIT_LIST_HEAD(&new_entry->list);
-
-	list_add(&new_entry->list, &session->stat_list);
-
-	new_entry->stat = stat;
 
 	/*
-	 * Iterate over the tracer stat entries and store them in a sorted
-	 * list.
+	 * Iterate over the tracer stat entries and store them in an rbtree.
 	 */
 	for (i = 1; ; i++) {
 		stat = ts->stat_next(stat, i);
@@ -116,36 +173,16 @@
 		if (!stat)
 			break;
 
-		new_entry = kmalloc(sizeof(struct trace_stat_list), GFP_KERNEL);
-		if (!new_entry) {
-			ret = -ENOMEM;
-			goto exit_free_list;
-		}
-
-		INIT_LIST_HEAD(&new_entry->list);
-		new_entry->stat = stat;
-
-		list_for_each_entry_reverse(iter_entry, &session->stat_list,
-				list) {
-
-			/* Insertion with a descendent sorting */
-			if (ts->stat_cmp(iter_entry->stat,
-					new_entry->stat) >= 0) {
-
-				list_add(&new_entry->list, &iter_entry->list);
-				break;
-			}
-		}
-
-		/* The current larger value */
-		if (list_empty(&new_entry->list))
-			list_add(&new_entry->list, &session->stat_list);
+		ret = insert_stat(root, stat, ts->stat_cmp);
+		if (ret)
+			goto exit_free_rbtree;
 	}
+
 exit:
 	mutex_unlock(&session->stat_mutex);
 	return ret;
 
-exit_free_list:
+exit_free_rbtree:
 	reset_stat_session(session);
 	mutex_unlock(&session->stat_mutex);
 	return ret;
@@ -154,38 +191,51 @@
 
 static void *stat_seq_start(struct seq_file *s, loff_t *pos)
 {
-	struct tracer_stat_session *session = s->private;
+	struct stat_session *session = s->private;
+	struct rb_node *node;
+	int i;
 
-	/* Prevent from tracer switch or stat_list modification */
+	/* Prevent from tracer switch or rbtree modification */
 	mutex_lock(&session->stat_mutex);
 
 	/* If we are in the beginning of the file, print the headers */
-	if (!*pos && session->ts->stat_headers)
+	if (!*pos && session->ts->stat_headers) {
+		(*pos)++;
 		return SEQ_START_TOKEN;
+	}
 
-	return seq_list_start(&session->stat_list, *pos);
+	node = rb_first(&session->stat_root);
+	for (i = 0; node && i < *pos; i++)
+		node = rb_next(node);
+
+	(*pos)++;
+
+	return node;
 }
 
 static void *stat_seq_next(struct seq_file *s, void *p, loff_t *pos)
 {
-	struct tracer_stat_session *session = s->private;
+	struct stat_session *session = s->private;
+	struct rb_node *node = p;
+
+	(*pos)++;
 
 	if (p == SEQ_START_TOKEN)
-		return seq_list_start(&session->stat_list, *pos);
+		return rb_first(&session->stat_root);
 
-	return seq_list_next(p, &session->stat_list, pos);
+	return rb_next(node);
 }
 
 static void stat_seq_stop(struct seq_file *s, void *p)
 {
-	struct tracer_stat_session *session = s->private;
+	struct stat_session *session = s->private;
 	mutex_unlock(&session->stat_mutex);
 }
 
 static int stat_seq_show(struct seq_file *s, void *v)
 {
-	struct tracer_stat_session *session = s->private;
-	struct trace_stat_list *l = list_entry(v, struct trace_stat_list, list);
+	struct stat_session *session = s->private;
+	struct stat_node *l = container_of(v, struct stat_node, node);
 
 	if (v == SEQ_START_TOKEN)
 		return session->ts->stat_headers(s);
@@ -205,7 +255,7 @@
 {
 	int ret;
 
-	struct tracer_stat_session *session = inode->i_private;
+	struct stat_session *session = inode->i_private;
 
 	ret = seq_open(file, &trace_stat_seq_ops);
 	if (!ret) {
@@ -218,11 +268,11 @@
 }
 
 /*
- * Avoid consuming memory with our now useless list.
+ * Avoid consuming memory with our now useless rbtree.
  */
 static int tracing_stat_release(struct inode *i, struct file *f)
 {
-	struct tracer_stat_session *session = i->i_private;
+	struct stat_session *session = i->i_private;
 
 	mutex_lock(&session->stat_mutex);
 	reset_stat_session(session);
@@ -251,7 +301,7 @@
 	return 0;
 }
 
-static int init_stat_file(struct tracer_stat_session *session)
+static int init_stat_file(struct stat_session *session)
 {
 	if (!stat_dir && tracing_stat_init())
 		return -ENODEV;
@@ -266,7 +316,7 @@
 
 int register_stat_tracer(struct tracer_stat *trace)
 {
-	struct tracer_stat_session *session, *node, *tmp;
+	struct stat_session *session, *node;
 	int ret;
 
 	if (!trace)
@@ -277,7 +327,7 @@
 
 	/* Already registered? */
 	mutex_lock(&all_stat_sessions_mutex);
-	list_for_each_entry_safe(node, tmp, &all_stat_sessions, session_list) {
+	list_for_each_entry(node, &all_stat_sessions, session_list) {
 		if (node->ts == trace) {
 			mutex_unlock(&all_stat_sessions_mutex);
 			return -EINVAL;
@@ -286,15 +336,13 @@
 	mutex_unlock(&all_stat_sessions_mutex);
 
 	/* Init the session */
-	session = kmalloc(sizeof(struct tracer_stat_session), GFP_KERNEL);
+	session = kzalloc(sizeof(*session), GFP_KERNEL);
 	if (!session)
 		return -ENOMEM;
 
 	session->ts = trace;
 	INIT_LIST_HEAD(&session->session_list);
-	INIT_LIST_HEAD(&session->stat_list);
 	mutex_init(&session->stat_mutex);
-	session->file = NULL;
 
 	ret = init_stat_file(session);
 	if (ret) {
@@ -312,7 +360,7 @@
 
 void unregister_stat_tracer(struct tracer_stat *trace)
 {
-	struct tracer_stat_session *node, *tmp;
+	struct stat_session *node, *tmp;
 
 	mutex_lock(&all_stat_sessions_mutex);
 	list_for_each_entry_safe(node, tmp, &all_stat_sessions, session_list) {
diff --git a/kernel/trace/trace_stat.h b/kernel/trace/trace_stat.h
index 202274c..f3546a2 100644
--- a/kernel/trace/trace_stat.h
+++ b/kernel/trace/trace_stat.h
@@ -12,7 +12,7 @@
 	/* The name of your stat file */
 	const char		*name;
 	/* Iteration over statistic entries */
-	void			*(*stat_start)(void);
+	void			*(*stat_start)(struct tracer_stat *trace);
 	void			*(*stat_next)(void *prev, int idx);
 	/* Compare two entries for stats sorting */
 	int			(*stat_cmp)(void *p1, void *p2);
diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c
index 91fd19c..e04b76c 100644
--- a/kernel/trace/trace_sysprof.c
+++ b/kernel/trace/trace_sysprof.c
@@ -321,11 +321,7 @@
 
 void init_tracer_sysprof_debugfs(struct dentry *d_tracer)
 {
-	struct dentry *entry;
 
-	entry = debugfs_create_file("sysprof_sample_period", 0644,
+	trace_create_file("sysprof_sample_period", 0644,
 			d_tracer, NULL, &sysprof_sample_fops);
-	if (entry)
-		return;
-	pr_warning("Could not create debugfs 'sysprof_sample_period' entry\n");
 }
diff --git a/kernel/trace/trace_workqueue.c b/kernel/trace/trace_workqueue.c
index 797201e..97fcea4 100644
--- a/kernel/trace/trace_workqueue.c
+++ b/kernel/trace/trace_workqueue.c
@@ -6,7 +6,7 @@
  */
 
 
-#include <trace/workqueue.h>
+#include <trace/events/workqueue.h>
 #include <linux/list.h>
 #include <linux/percpu.h>
 #include "trace_stat.h"
@@ -16,8 +16,6 @@
 /* A cpu workqueue thread */
 struct cpu_workqueue_stats {
 	struct list_head            list;
-/* Useful to know if we print the cpu headers */
-	bool		            first_entry;
 	int		            cpu;
 	pid_t			    pid;
 /* Can be inserted from interrupt or user context, need to be atomic */
@@ -47,12 +45,11 @@
 			  struct work_struct *work)
 {
 	int cpu = cpumask_first(&wq_thread->cpus_allowed);
-	struct cpu_workqueue_stats *node, *next;
+	struct cpu_workqueue_stats *node;
 	unsigned long flags;
 
 	spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
-	list_for_each_entry_safe(node, next, &workqueue_cpu_stat(cpu)->list,
-							list) {
+	list_for_each_entry(node, &workqueue_cpu_stat(cpu)->list, list) {
 		if (node->pid == wq_thread->pid) {
 			atomic_inc(&node->inserted);
 			goto found;
@@ -69,12 +66,11 @@
 			  struct work_struct *work)
 {
 	int cpu = cpumask_first(&wq_thread->cpus_allowed);
-	struct cpu_workqueue_stats *node, *next;
+	struct cpu_workqueue_stats *node;
 	unsigned long flags;
 
 	spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
-	list_for_each_entry_safe(node, next, &workqueue_cpu_stat(cpu)->list,
-							list) {
+	list_for_each_entry(node, &workqueue_cpu_stat(cpu)->list, list) {
 		if (node->pid == wq_thread->pid) {
 			node->executed++;
 			goto found;
@@ -105,8 +101,6 @@
 	cws->pid = wq_thread->pid;
 
 	spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
-	if (list_empty(&workqueue_cpu_stat(cpu)->list))
-		cws->first_entry = true;
 	list_add_tail(&cws->list, &workqueue_cpu_stat(cpu)->list);
 	spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
 }
@@ -152,7 +146,7 @@
 	return ret;
 }
 
-static void *workqueue_stat_start(void)
+static void *workqueue_stat_start(struct tracer_stat *trace)
 {
 	int cpu;
 	void *ret = NULL;
@@ -191,16 +185,9 @@
 static int workqueue_stat_show(struct seq_file *s, void *p)
 {
 	struct cpu_workqueue_stats *cws = p;
-	unsigned long flags;
-	int cpu = cws->cpu;
 	struct pid *pid;
 	struct task_struct *tsk;
 
-	spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
-	if (&cws->list == workqueue_cpu_stat(cpu)->list.next)
-		seq_printf(s, "\n");
-	spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
-
 	pid = find_get_pid(cws->pid);
 	if (pid) {
 		tsk = get_pid_task(pid, PIDTYPE_PID);
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index f71fb2a..0668795 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -33,7 +33,8 @@
 #include <linux/kallsyms.h>
 #include <linux/debug_locks.h>
 #include <linux/lockdep.h>
-#include <trace/workqueue.h>
+#define CREATE_TRACE_POINTS
+#include <trace/events/workqueue.h>
 
 /*
  * The per-CPU workqueue (if single thread, we always use the first
@@ -124,8 +125,6 @@
 	return (void *) (atomic_long_read(&work->data) & WORK_STRUCT_WQ_DATA_MASK);
 }
 
-DEFINE_TRACE(workqueue_insertion);
-
 static void insert_work(struct cpu_workqueue_struct *cwq,
 			struct work_struct *work, struct list_head *head)
 {
@@ -262,8 +261,6 @@
 }
 EXPORT_SYMBOL_GPL(queue_delayed_work_on);
 
-DEFINE_TRACE(workqueue_execution);
-
 static void run_workqueue(struct cpu_workqueue_struct *cwq)
 {
 	spin_lock_irq(&cwq->lock);
@@ -753,8 +750,6 @@
 	return cwq;
 }
 
-DEFINE_TRACE(workqueue_creation);
-
 static int create_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
 {
 	struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
@@ -860,8 +855,6 @@
 }
 EXPORT_SYMBOL_GPL(__create_workqueue_key);
 
-DEFINE_TRACE(workqueue_destruction);
-
 static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq)
 {
 	/*
diff --git a/lib/Kconfig b/lib/Kconfig
index 8ade0a7..9960be0 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -10,6 +10,9 @@
 config BITREVERSE
 	tristate
 
+config RATIONAL
+	boolean
+
 config GENERIC_FIND_FIRST_BIT
 	bool
 
diff --git a/lib/Makefile b/lib/Makefile
index 33a40e4..1f6edef 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -50,6 +50,7 @@
 endif
 
 obj-$(CONFIG_BITREVERSE) += bitrev.o
+obj-$(CONFIG_RATIONAL)	+= rational.o
 obj-$(CONFIG_CRC_CCITT)	+= crc-ccitt.o
 obj-$(CONFIG_CRC16)	+= crc16.o
 obj-$(CONFIG_CRC_T10DIF)+= crc-t10dif.o
diff --git a/lib/rational.c b/lib/rational.c
new file mode 100644
index 0000000..b3c099b
--- /dev/null
+++ b/lib/rational.c
@@ -0,0 +1,62 @@
+/*
+ * rational fractions
+ *
+ * Copyright (C) 2009 emlix GmbH, Oskar Schirmer <os@emlix.com>
+ *
+ * helper functions when coping with rational numbers
+ */
+
+#include <linux/rational.h>
+
+/*
+ * calculate best rational approximation for a given fraction
+ * taking into account restricted register size, e.g. to find
+ * appropriate values for a pll with 5 bit denominator and
+ * 8 bit numerator register fields, trying to set up with a
+ * frequency ratio of 3.1415, one would say:
+ *
+ * rational_best_approximation(31415, 10000,
+ *		(1 << 8) - 1, (1 << 5) - 1, &n, &d);
+ *
+ * you may look at given_numerator as a fixed point number,
+ * with the fractional part size described in given_denominator.
+ *
+ * for theoretical background, see:
+ * http://en.wikipedia.org/wiki/Continued_fraction
+ */
+
+void rational_best_approximation(
+	unsigned long given_numerator, unsigned long given_denominator,
+	unsigned long max_numerator, unsigned long max_denominator,
+	unsigned long *best_numerator, unsigned long *best_denominator)
+{
+	unsigned long n, d, n0, d0, n1, d1;
+	n = given_numerator;
+	d = given_denominator;
+	n0 = d1 = 0;
+	n1 = d0 = 1;
+	for (;;) {
+		unsigned long t, a;
+		if ((n1 > max_numerator) || (d1 > max_denominator)) {
+			n1 = n0;
+			d1 = d0;
+			break;
+		}
+		if (d == 0)
+			break;
+		t = d;
+		a = n / d;
+		d = n % d;
+		n = t;
+		t = n0 + a * n1;
+		n0 = n1;
+		n1 = t;
+		t = d0 + a * d1;
+		d0 = d1;
+		d1 = t;
+	}
+	*best_numerator = n1;
+	*best_denominator = d1;
+}
+
+EXPORT_SYMBOL(rational_best_approximation);
diff --git a/mm/bounce.c b/mm/bounce.c
index e590272..65f5e17 100644
--- a/mm/bounce.c
+++ b/mm/bounce.c
@@ -14,16 +14,15 @@
 #include <linux/hash.h>
 #include <linux/highmem.h>
 #include <linux/blktrace_api.h>
-#include <trace/block.h>
 #include <asm/tlbflush.h>
 
+#include <trace/events/block.h>
+
 #define POOL_SIZE	64
 #define ISA_POOL_SIZE	16
 
 static mempool_t *page_pool, *isa_page_pool;
 
-DEFINE_TRACE(block_bio_bounce);
-
 #ifdef CONFIG_HIGHMEM
 static __init int init_emergency_pool(void)
 {
diff --git a/mm/mlock.c b/mm/mlock.c
index cbe9e05..ac13043 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -629,52 +629,43 @@
 	free_uid(user);
 }
 
-void *alloc_locked_buffer(size_t size)
+int account_locked_memory(struct mm_struct *mm, struct rlimit *rlim,
+			  size_t size)
 {
-	unsigned long rlim, vm, pgsz;
-	void *buffer = NULL;
+	unsigned long lim, vm, pgsz;
+	int error = -ENOMEM;
 
 	pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT;
 
-	down_write(&current->mm->mmap_sem);
+	down_write(&mm->mmap_sem);
 
-	rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT;
-	vm   = current->mm->total_vm + pgsz;
-	if (rlim < vm)
+	lim = rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT;
+	vm   = mm->total_vm + pgsz;
+	if (lim < vm)
 		goto out;
 
-	rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT;
-	vm   = current->mm->locked_vm + pgsz;
-	if (rlim < vm)
+	lim = rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT;
+	vm   = mm->locked_vm + pgsz;
+	if (lim < vm)
 		goto out;
 
-	buffer = kzalloc(size, GFP_KERNEL);
-	if (!buffer)
-		goto out;
+	mm->total_vm  += pgsz;
+	mm->locked_vm += pgsz;
 
-	current->mm->total_vm  += pgsz;
-	current->mm->locked_vm += pgsz;
-
+	error = 0;
  out:
-	up_write(&current->mm->mmap_sem);
-	return buffer;
+	up_write(&mm->mmap_sem);
+	return error;
 }
 
-void release_locked_buffer(void *buffer, size_t size)
+void refund_locked_memory(struct mm_struct *mm, size_t size)
 {
 	unsigned long pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT;
 
-	down_write(&current->mm->mmap_sem);
+	down_write(&mm->mmap_sem);
 
-	current->mm->total_vm  -= pgsz;
-	current->mm->locked_vm -= pgsz;
+	mm->total_vm  -= pgsz;
+	mm->locked_vm -= pgsz;
 
-	up_write(&current->mm->mmap_sem);
-}
-
-void free_locked_buffer(void *buffer, size_t size)
-{
-	release_locked_buffer(buffer, size);
-
-	kfree(buffer);
+	up_write(&mm->mmap_sem);
 }
diff --git a/mm/slab.c b/mm/slab.c
index 9a90b00..f85831d 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -102,7 +102,7 @@
 #include	<linux/cpu.h>
 #include	<linux/sysctl.h>
 #include	<linux/module.h>
-#include	<trace/kmemtrace.h>
+#include	<linux/kmemtrace.h>
 #include	<linux/rcupdate.h>
 #include	<linux/string.h>
 #include	<linux/uaccess.h>
diff --git a/mm/slob.c b/mm/slob.c
index f92e66d..9b1737b 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -66,7 +66,7 @@
 #include <linux/module.h>
 #include <linux/rcupdate.h>
 #include <linux/list.h>
-#include <trace/kmemtrace.h>
+#include <linux/kmemtrace.h>
 #include <asm/atomic.h>
 
 /*
diff --git a/mm/slub.c b/mm/slub.c
index 65ffda59..5e805a6 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -17,7 +17,7 @@
 #include <linux/slab.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
-#include <trace/kmemtrace.h>
+#include <linux/kmemtrace.h>
 #include <linux/cpu.h>
 #include <linux/cpuset.h>
 #include <linux/mempolicy.h>
diff --git a/mm/util.c b/mm/util.c
index 55bef16..abc65aa 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -4,9 +4,11 @@
 #include <linux/module.h>
 #include <linux/err.h>
 #include <linux/sched.h>
-#include <linux/tracepoint.h>
 #include <asm/uaccess.h>
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/kmem.h>
+
 /**
  * kstrdup - allocate space for and copy an existing string
  * @s: the string to duplicate
@@ -255,13 +257,6 @@
 EXPORT_SYMBOL_GPL(get_user_pages_fast);
 
 /* Tracepoints definitions. */
-DEFINE_TRACE(kmalloc);
-DEFINE_TRACE(kmem_cache_alloc);
-DEFINE_TRACE(kmalloc_node);
-DEFINE_TRACE(kmem_cache_alloc_node);
-DEFINE_TRACE(kfree);
-DEFINE_TRACE(kmem_cache_free);
-
 EXPORT_TRACEPOINT_SYMBOL(kmalloc);
 EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc);
 EXPORT_TRACEPOINT_SYMBOL(kmalloc_node);
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index 9fd0dc3..b75b6ce 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -23,7 +23,7 @@
 #include <linux/bitops.h>
 #include <net/genetlink.h>
 
-#include <trace/skb.h>
+#include <trace/events/skb.h>
 
 #include <asm/unaligned.h>
 
diff --git a/net/core/net-traces.c b/net/core/net-traces.c
index c8fb456..499a67e 100644
--- a/net/core/net-traces.c
+++ b/net/core/net-traces.c
@@ -19,11 +19,11 @@
 #include <linux/workqueue.h>
 #include <linux/netlink.h>
 #include <linux/net_dropmon.h>
-#include <trace/skb.h>
 
 #include <asm/unaligned.h>
 #include <asm/bitops.h>
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/skb.h>
 
-DEFINE_TRACE(kfree_skb);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index e505b53..c2e4fb8 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -65,7 +65,7 @@
 
 #include <asm/uaccess.h>
 #include <asm/system.h>
-#include <trace/skb.h>
+#include <trace/events/skb.h>
 
 #include "kmap_skb.h"
 
diff --git a/samples/Kconfig b/samples/Kconfig
index 4b02f5a..b75d28c 100644
--- a/samples/Kconfig
+++ b/samples/Kconfig
@@ -19,6 +19,12 @@
 	help
 	  This build tracepoints example modules.
 
+config SAMPLE_TRACE_EVENTS
+	tristate "Build trace_events examples -- loadable modules only"
+	depends on EVENT_TRACING && m
+	help
+	  This build trace event example modules.
+
 config SAMPLE_KOBJECT
 	tristate "Build kobject examples"
 	help
diff --git a/samples/Makefile b/samples/Makefile
index 10eaca8..13e4b47 100644
--- a/samples/Makefile
+++ b/samples/Makefile
@@ -1,3 +1,3 @@
 # Makefile for Linux samples code
 
-obj-$(CONFIG_SAMPLES)	+= markers/ kobject/ kprobes/ tracepoints/
+obj-$(CONFIG_SAMPLES)	+= markers/ kobject/ kprobes/ tracepoints/ trace_events/
diff --git a/samples/trace_events/Makefile b/samples/trace_events/Makefile
new file mode 100644
index 0000000..0d428dc
--- /dev/null
+++ b/samples/trace_events/Makefile
@@ -0,0 +1,6 @@
+# builds the trace events example kernel modules;
+# then to use one (as root):  insmod <module_name.ko>
+
+CFLAGS_trace-events-sample.o := -I$(src)
+
+obj-$(CONFIG_SAMPLE_TRACE_EVENTS) += trace-events-sample.o
diff --git a/samples/trace_events/trace-events-sample.c b/samples/trace_events/trace-events-sample.c
new file mode 100644
index 0000000..aabc4e9
--- /dev/null
+++ b/samples/trace_events/trace-events-sample.c
@@ -0,0 +1,52 @@
+#include <linux/module.h>
+#include <linux/kthread.h>
+
+/*
+ * Any file that uses trace points, must include the header.
+ * But only one file, must include the header by defining
+ * CREATE_TRACE_POINTS first.  This will make the C code that
+ * creates the handles for the trace points.
+ */
+#define CREATE_TRACE_POINTS
+#include "trace-events-sample.h"
+
+
+static void simple_thread_func(int cnt)
+{
+	set_current_state(TASK_INTERRUPTIBLE);
+	schedule_timeout(HZ);
+	trace_foo_bar("hello", cnt);
+}
+
+static int simple_thread(void *arg)
+{
+	int cnt = 0;
+
+	while (!kthread_should_stop())
+		simple_thread_func(cnt++);
+
+	return 0;
+}
+
+static struct task_struct *simple_tsk;
+
+static int __init trace_event_init(void)
+{
+	simple_tsk = kthread_run(simple_thread, NULL, "event-sample");
+	if (IS_ERR(simple_tsk))
+		return -1;
+
+	return 0;
+}
+
+static void __exit trace_event_exit(void)
+{
+	kthread_stop(simple_tsk);
+}
+
+module_init(trace_event_init);
+module_exit(trace_event_exit);
+
+MODULE_AUTHOR("Steven Rostedt");
+MODULE_DESCRIPTION("trace-events-sample");
+MODULE_LICENSE("GPL");
diff --git a/samples/trace_events/trace-events-sample.h b/samples/trace_events/trace-events-sample.h
new file mode 100644
index 0000000..128a897
--- /dev/null
+++ b/samples/trace_events/trace-events-sample.h
@@ -0,0 +1,129 @@
+/*
+ * Notice that this file is not protected like a normal header.
+ * We also must allow for rereading of this file. The
+ *
+ *  || defined(TRACE_HEADER_MULTI_READ)
+ *
+ * serves this purpose.
+ */
+#if !defined(_TRACE_EVENT_SAMPLE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_EVENT_SAMPLE_H
+
+/*
+ * All trace headers should include tracepoint.h, until we finally
+ * make it into a standard header.
+ */
+#include <linux/tracepoint.h>
+
+/*
+ * If TRACE_SYSTEM is defined, that will be the directory created
+ * in the ftrace directory under /debugfs/tracing/events/<system>
+ *
+ * The define_trace.h belowe will also look for a file name of
+ * TRACE_SYSTEM.h where TRACE_SYSTEM is what is defined here.
+ *
+ * If you want a different system than file name, you can override
+ * the header name by defining TRACE_INCLUDE_FILE
+ *
+ * If this file was called, goofy.h, then we would define:
+ *
+ * #define TRACE_INCLUDE_FILE goofy
+ *
+ */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM sample
+
+/*
+ * The TRACE_EVENT macro is broken up into 5 parts.
+ *
+ * name: name of the trace point. This is also how to enable the tracepoint.
+ *   A function called trace_foo_bar() will be created.
+ *
+ * proto: the prototype of the function trace_foo_bar()
+ *   Here it is trace_foo_bar(char *foo, int bar).
+ *
+ * args:  must match the arguments in the prototype.
+ *    Here it is simply "foo, bar".
+ *
+ * struct:  This defines the way the data will be stored in the ring buffer.
+ *    There are currently two types of elements. __field and __array.
+ *    a __field is broken up into (type, name). Where type can be any
+ *    type but an array.
+ *    For an array. there are three fields. (type, name, size). The
+ *    type of elements in the array, the name of the field and the size
+ *    of the array.
+ *
+ *    __array( char, foo, 10) is the same as saying   char foo[10].
+ *
+ * fast_assign: This is a C like function that is used to store the items
+ *    into the ring buffer.
+ *
+ * printk: This is a way to print out the data in pretty print. This is
+ *    useful if the system crashes and you are logging via a serial line,
+ *    the data can be printed to the console using this "printk" method.
+ *
+ * Note, that for both the assign and the printk, __entry is the handler
+ * to the data structure in the ring buffer, and is defined by the
+ * TP_STRUCT__entry.
+ */
+TRACE_EVENT(foo_bar,
+
+	TP_PROTO(char *foo, int bar),
+
+	TP_ARGS(foo, bar),
+
+	TP_STRUCT__entry(
+		__array(	char,	foo,    10		)
+		__field(	int,	bar			)
+	),
+
+	TP_fast_assign(
+		strncpy(__entry->foo, foo, 10);
+		__entry->bar	= bar;
+	),
+
+	TP_printk("foo %s %d", __entry->foo, __entry->bar)
+);
+#endif
+
+/***** NOTICE! The #if protection ends here. *****/
+
+
+/*
+ * There are several ways I could have done this. If I left out the
+ * TRACE_INCLUDE_PATH, then it would default to the kernel source
+ * include/trace/events directory.
+ *
+ * I could specify a path from the define_trace.h file back to this
+ * file.
+ *
+ * #define TRACE_INCLUDE_PATH ../../samples/trace_events
+ *
+ * But I chose to simply make it use the current directory and then in
+ * the Makefile I added:
+ *
+ * CFLAGS_trace-events-sample.o := -I$(PWD)/samples/trace_events/
+ *
+ * This will make sure the current path is part of the include
+ * structure for our file so that we can find it.
+ *
+ * I could have made only the top level directory the include:
+ *
+ * CFLAGS_trace-events-sample.o := -I$(PWD)
+ *
+ * And then let the path to this directory be the TRACE_INCLUDE_PATH:
+ *
+ * #define TRACE_INCLUDE_PATH samples/trace_events
+ *
+ * But then if something defines "samples" or "trace_events" then we
+ * could risk that being converted too, and give us an unexpected
+ * result.
+ */
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_PATH .
+/*
+ * TRACE_INCLUDE_FILE is not needed if the filename and TRACE_SYSTEM are equal
+ */
+#define TRACE_INCLUDE_FILE trace-events-sample
+#include <trace/define_trace.h>
diff --git a/scripts/kernel-doc b/scripts/kernel-doc
index 3208a3a..acd8c4a 100755
--- a/scripts/kernel-doc
+++ b/scripts/kernel-doc
@@ -1828,6 +1828,25 @@
     $state = 0;
 }
 
+sub tracepoint_munge($) {
+	my $file = shift;
+	my $tracepointname = 0;
+	my $tracepointargs = 0;
+
+	if($prototype =~ m/TRACE_EVENT\((.*?),/) {
+		$tracepointname = $1;
+	}
+	if($prototype =~ m/TP_PROTO\((.*?)\)/) {
+		$tracepointargs = $1;
+	}
+	if (($tracepointname eq 0) || ($tracepointargs eq 0)) {
+		print STDERR "Warning(${file}:$.): Unrecognized tracepoint format: \n".
+			     "$prototype\n";
+	} else {
+		$prototype = "static inline void trace_$tracepointname($tracepointargs)";
+	}
+}
+
 sub syscall_munge() {
 	my $void = 0;
 
@@ -1882,6 +1901,9 @@
 	if ($prototype =~ /SYSCALL_DEFINE/) {
 		syscall_munge();
 	}
+	if ($prototype =~ /TRACE_EVENT/) {
+		tracepoint_munge($file);
+	}
 	dump_function($prototype, $file);
 	reset_state();
     }
diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl
index 409596e..0fae7da 100755
--- a/scripts/recordmcount.pl
+++ b/scripts/recordmcount.pl
@@ -26,7 +26,7 @@
 # which will also be the location of that section after final link.
 # e.g.
 #
-#  .section ".text.sched"
+#  .section ".sched.text", "ax"
 #  .globl my_func
 #  my_func:
 #        [...]
@@ -39,7 +39,7 @@
 #        [...]
 #
 # Both relocation offsets for the mcounts in the above example will be
-# offset from .text.sched. If we make another file called tmp.s with:
+# offset from .sched.text. If we make another file called tmp.s with:
 #
 #  .section __mcount_loc
 #  .quad  my_func + 0x5
@@ -51,7 +51,7 @@
 # But this gets hard if my_func is not globl (a static function).
 # In such a case we have:
 #
-#  .section ".text.sched"
+#  .section ".sched.text", "ax"
 #  my_func:
 #        [...]
 #        call mcount  (offset: 0x5)
diff --git a/security/integrity/ima/ima_fs.c b/security/integrity/ima/ima_fs.c
index 7039b14..6bfc7ea 100644
--- a/security/integrity/ima/ima_fs.c
+++ b/security/integrity/ima/ima_fs.c
@@ -85,8 +85,8 @@
 	 * against concurrent list-extension
 	 */
 	rcu_read_lock();
-	qe = list_entry(rcu_dereference(qe->later.next),
-			struct ima_queue_entry, later);
+	qe = list_entry_rcu(qe->later.next,
+			    struct ima_queue_entry, later);
 	rcu_read_unlock();
 	(*pos)++;
 
diff --git a/security/smack/smackfs.c b/security/smack/smackfs.c
index 8d3c2a0..f83a809 100644
--- a/security/smack/smackfs.c
+++ b/security/smack/smackfs.c
@@ -735,8 +735,8 @@
 		return;
 	}
 
-	m = list_entry(rcu_dereference(smk_netlbladdr_list.next),
-			 struct smk_netlbladdr, list);
+	m = list_entry_rcu(smk_netlbladdr_list.next,
+			   struct smk_netlbladdr, list);
 
 	/* the comparison '>' is a bit hacky, but works */
 	if (new->smk_mask.s_addr > m->smk_mask.s_addr) {
@@ -749,8 +749,8 @@
 			list_add_rcu(&new->list, &m->list);
 			return;
 		}
-		m_next = list_entry(rcu_dereference(m->list.next),
-				 struct smk_netlbladdr, list);
+		m_next = list_entry_rcu(m->list.next,
+					struct smk_netlbladdr, list);
 		if (new->smk_mask.s_addr > m_next->smk_mask.s_addr) {
 			list_add_rcu(&new->list, &m->list);
 			return;