Merge git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi-rc-fixes-2.6 * git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi-rc-fixes-2.6: [SCSI] gdth: update deprecated pci_find_device [SCSI] gdth: scan for scsi devices [SCSI] sym53c416: fix module parameters [SCSI] lpfc 8.2.5 : Update lpfc driver version to 8.2.5 [SCSI] lpfc 8.2.5 : Fix buffer leaks [SCSI] lpfc 8.2.5 : Miscellaneous discovery Fixes [SCSI] lpfc 8.2.5 : Add MSI-X single message support [SCSI] lpfc 8.2.5 : Miscellaneous Fixes [SCSI] lpfc 8.2.5 : Correct ndlp referencing issues [SCSI] update SG_ALL to avoid causing chaining [SCSI] aic94xx: fix ABORT_TASK define conflict [SCSI] fas216: Use scsi_eh API for REQUEST_SENSE invocation [SCSI] ses: fix memory leaks [SCSI] aacraid: informational sysfs value corrections [SCSI] mpt fusion: Request I/O resources only when required [SCSI] aacraid: ignore adapter reset check polarity [SCSI] aacraid: add optional MSI support [SCSI] mpt fusion: Avoid racing when mptsas and mptcl module are loaded in parallel [SCSI] MegaRAID driver management char device moved to misc [SCSI] advansys: fix overrun_buf aligned bug

commit: e760e716d47b48caf98da348368fd41b4a9b9e7e [log] [tgz]
author: Linus Torvalds <torvalds@woody.linux-foundation.org> Wed Feb 13 16:23:44 2008 -0800
committer: Linus Torvalds <torvalds@woody.linux-foundation.org> Wed Feb 13 16:23:44 2008 -0800
tree: 92d401fdbc618a4bdf4afe7ae5ee509e09dda0e6
parent: b2e3e658b344c6bcfb8fb694100ab2f2b5b2edb0 [diff]
parent: 99109301d103fbf0de43fc5a580a406c12a501e0 [diff]
diff --git a/Documentation/DocBook/Makefile b/Documentation/DocBook/Makefile
index 6a0ad47..300e170 100644
--- a/Documentation/DocBook/Makefile
+++ b/Documentation/DocBook/Makefile

@@ -8,7 +8,7 @@
 
 DOCBOOKS := wanbook.xml z8530book.xml mcabook.xml videobook.xml \
 	    kernel-hacking.xml kernel-locking.xml deviceiobook.xml \
-	    procfs-guide.xml writing_usb_driver.xml \
+	    procfs-guide.xml writing_usb_driver.xml networking.xml \
 	    kernel-api.xml filesystems.xml lsm.xml usb.xml \
 	    gadget.xml libata.xml mtdnand.xml librs.xml rapidio.xml \
 	    genericirq.xml s390-drivers.xml uio-howto.xml scsi.xml

diff --git a/Documentation/DocBook/filesystems.tmpl b/Documentation/DocBook/filesystems.tmpl
index 5eaef87..5e87ad5 100644
--- a/Documentation/DocBook/filesystems.tmpl
+++ b/Documentation/DocBook/filesystems.tmpl

@@ -398,4 +398,24 @@
 
   </chapter>
 
+  <chapter id="splice">
+      <title>splice API</title>
+  <para>
+	splice is a method for moving blocks of data around inside the
+	kernel, without continually transferring them between the kernel
+	and user space.
+  </para>
+!Ffs/splice.c
+  </chapter>
+
+  <chapter id="pipes">
+      <title>pipes API</title>
+  <para>
+	Pipe interfaces are all for in-kernel (builtin image) use.
+	They are not exported for use by modules.
+  </para>
+!Iinclude/linux/pipe_fs_i.h
+!Ffs/pipe.c
+  </chapter>
+
 </book>

diff --git a/Documentation/DocBook/kernel-api.tmpl b/Documentation/DocBook/kernel-api.tmpl
index 059aaf2..f31601e 100644
--- a/Documentation/DocBook/kernel-api.tmpl
+++ b/Documentation/DocBook/kernel-api.tmpl

@@ -204,65 +204,6 @@
      </sect1>
   </chapter>
 
-  <chapter id="netcore">
-     <title>Linux Networking</title>
-     <sect1><title>Networking Base Types</title>
-!Iinclude/linux/net.h
-     </sect1>
-     <sect1><title>Socket Buffer Functions</title>
-!Iinclude/linux/skbuff.h
-!Iinclude/net/sock.h
-!Enet/socket.c
-!Enet/core/skbuff.c
-!Enet/core/sock.c
-!Enet/core/datagram.c
-!Enet/core/stream.c
-     </sect1>
-     <sect1><title>Socket Filter</title>
-!Enet/core/filter.c
-     </sect1>
-     <sect1><title>Generic Network Statistics</title>
-!Iinclude/linux/gen_stats.h
-!Enet/core/gen_stats.c
-!Enet/core/gen_estimator.c
-     </sect1>
-     <sect1><title>SUN RPC subsystem</title>
-<!-- The !D functionality is not perfect, garbage has to be protected by comments
-!Dnet/sunrpc/sunrpc_syms.c
--->
-!Enet/sunrpc/xdr.c
-!Enet/sunrpc/svcsock.c
-!Enet/sunrpc/sched.c
-     </sect1>
-  </chapter>
-
-  <chapter id="netdev">
-     <title>Network device support</title>
-     <sect1><title>Driver Support</title>
-!Enet/core/dev.c
-!Enet/ethernet/eth.c
-!Enet/sched/sch_generic.c
-!Iinclude/linux/etherdevice.h
-!Iinclude/linux/netdevice.h
-     </sect1>
-     <sect1><title>PHY Support</title>
-!Edrivers/net/phy/phy.c
-!Idrivers/net/phy/phy.c
-!Edrivers/net/phy/phy_device.c
-!Idrivers/net/phy/phy_device.c
-!Edrivers/net/phy/mdio_bus.c
-!Idrivers/net/phy/mdio_bus.c
-     </sect1>
-<!-- FIXME: Removed for now since no structured comments in source
-     <sect1><title>Wireless</title>
-X!Enet/core/wireless.c
-     </sect1>
--->
-     <sect1><title>Synchronous PPP</title>
-!Edrivers/net/wan/syncppp.c
-     </sect1>
-  </chapter>
-
   <chapter id="modload">
      <title>Module Support</title>
      <sect1><title>Module Loading</title>
@@ -508,11 +449,6 @@
 !Edrivers/serial/8250.c
   </chapter>
 
-  <chapter id="z85230">
-     <title>Z85230 Support Library</title>
-!Edrivers/net/wan/z85230.c
-  </chapter>
-
   <chapter id="fbdev">
      <title>Frame Buffer Library</title>
 
@@ -712,24 +648,4 @@
 !Edrivers/i2c/i2c-core.c
   </chapter>
 
-  <chapter id="splice">
-      <title>splice API</title>
-  <para>
-	splice is a method for moving blocks of data around inside the
-	kernel, without continually transferring them between the kernel
-	and user space.
-  </para>
-!Ffs/splice.c
-  </chapter>
-
-  <chapter id="pipes">
-      <title>pipes API</title>
-  <para>
-	Pipe interfaces are all for in-kernel (builtin image) use.
-	They are not exported for use by modules.
-  </para>
-!Iinclude/linux/pipe_fs_i.h
-!Ffs/pipe.c
-  </chapter>
-
 </book>

diff --git a/Documentation/DocBook/networking.tmpl b/Documentation/DocBook/networking.tmpl
new file mode 100644
index 0000000..f24f9e8
--- /dev/null
+++ b/Documentation/DocBook/networking.tmpl

@@ -0,0 +1,106 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
+	"http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
+
+<book id="LinuxNetworking">
+ <bookinfo>
+  <title>Linux Networking and Network Devices APIs</title>
+
+  <legalnotice>
+   <para>
+     This documentation is free software; you can redistribute
+     it and/or modify it under the terms of the GNU General Public
+     License as published by the Free Software Foundation; either
+     version 2 of the License, or (at your option) any later
+     version.
+   </para>
+
+   <para>
+     This program is distributed in the hope that it will be
+     useful, but WITHOUT ANY WARRANTY; without even the implied
+     warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+     See the GNU General Public License for more details.
+   </para>
+
+   <para>
+     You should have received a copy of the GNU General Public
+     License along with this program; if not, write to the Free
+     Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+     MA 02111-1307 USA
+   </para>
+
+   <para>
+     For more details see the file COPYING in the source
+     distribution of Linux.
+   </para>
+  </legalnotice>
+ </bookinfo>
+
+<toc></toc>
+
+  <chapter id="netcore">
+     <title>Linux Networking</title>
+     <sect1><title>Networking Base Types</title>
+!Iinclude/linux/net.h
+     </sect1>
+     <sect1><title>Socket Buffer Functions</title>
+!Iinclude/linux/skbuff.h
+!Iinclude/net/sock.h
+!Enet/socket.c
+!Enet/core/skbuff.c
+!Enet/core/sock.c
+!Enet/core/datagram.c
+!Enet/core/stream.c
+     </sect1>
+     <sect1><title>Socket Filter</title>
+!Enet/core/filter.c
+     </sect1>
+     <sect1><title>Generic Network Statistics</title>
+!Iinclude/linux/gen_stats.h
+!Enet/core/gen_stats.c
+!Enet/core/gen_estimator.c
+     </sect1>
+     <sect1><title>SUN RPC subsystem</title>
+<!-- The !D functionality is not perfect, garbage has to be protected by comments
+!Dnet/sunrpc/sunrpc_syms.c
+-->
+!Enet/sunrpc/xdr.c
+!Enet/sunrpc/svc_xprt.c
+!Enet/sunrpc/xprt.c
+!Enet/sunrpc/sched.c
+!Enet/sunrpc/socklib.c
+!Enet/sunrpc/stats.c
+!Enet/sunrpc/rpc_pipe.c
+!Enet/sunrpc/rpcb_clnt.c
+!Enet/sunrpc/clnt.c
+     </sect1>
+  </chapter>
+
+  <chapter id="netdev">
+     <title>Network device support</title>
+     <sect1><title>Driver Support</title>
+!Enet/core/dev.c
+!Enet/ethernet/eth.c
+!Enet/sched/sch_generic.c
+!Iinclude/linux/etherdevice.h
+!Iinclude/linux/netdevice.h
+     </sect1>
+     <sect1><title>PHY Support</title>
+!Edrivers/net/phy/phy.c
+!Idrivers/net/phy/phy.c
+!Edrivers/net/phy/phy_device.c
+!Idrivers/net/phy/phy_device.c
+!Edrivers/net/phy/mdio_bus.c
+!Idrivers/net/phy/mdio_bus.c
+     </sect1>
+<!-- FIXME: Removed for now since no structured comments in source
+     <sect1><title>Wireless</title>
+X!Enet/core/wireless.c
+     </sect1>
+-->
+     <sect1><title>Synchronous PPP</title>
+!Edrivers/net/wan/syncppp.c
+     </sect1>
+  </chapter>
+
+</book>

diff --git a/Documentation/RCU/NMI-RCU.txt b/Documentation/RCU/NMI-RCU.txt
index d0634a5..c64158e 100644
--- a/Documentation/RCU/NMI-RCU.txt
+++ b/Documentation/RCU/NMI-RCU.txt

@@ -25,7 +25,7 @@
 This nmi_callback variable is a global function pointer to the current
 NMI handler.
 
-	fastcall void do_nmi(struct pt_regs * regs, long error_code)
+	void do_nmi(struct pt_regs * regs, long error_code)
 	{
 		int cpu;
 

diff --git a/Documentation/SubmitChecklist b/Documentation/SubmitChecklist
index 34e06d2..da10e07 100644
--- a/Documentation/SubmitChecklist
+++ b/Documentation/SubmitChecklist

@@ -20,7 +20,11 @@
 4: ppc64 is a good architecture for cross-compilation checking because it
    tends to use `unsigned long' for 64-bit quantities.
 
-5: Matches kernel coding style(!)
+5: Check your patch for general style as detailed in
+   Documentation/CodingStyle.  Check for trivial violations with the
+   patch style checker prior to submission (scripts/checkpatch.pl).
+   You should be able to justify all violations that remain in
+   your patch.
 
 6: Any new or modified CONFIG options don't muck up the config menu.
 
@@ -79,13 +83,3 @@
 23: Tested after it has been merged into the -mm patchset to make sure
     that it still works with all of the other queued patches and various
     changes in the VM, VFS, and other subsystems.
-
-24: Avoid whitespace damage such as indenting with spaces or whitespace
-    at the end of lines.  You can test this by feeding the patch to
-    "git apply --check --whitespace=error-all"
-
-25: Check your patch for general style as detailed in
-    Documentation/CodingStyle.  Check for trivial violations with the
-    patch style checker prior to submission (scripts/checkpatch.pl).
-    You should be able to justify all violations that remain in
-    your patch.

diff --git a/Documentation/kprobes.txt b/Documentation/kprobes.txt
index 30c1017..83f515c2 100644
--- a/Documentation/kprobes.txt
+++ b/Documentation/kprobes.txt

@@ -92,9 +92,8 @@
 64 bytes on i386.
 
 Note that the probed function's args may be passed on the stack
-or in registers (e.g., for x86_64 or for an i386 fastcall function).
-The jprobe will work in either case, so long as the handler's
-prototype matches that of the probed function.
+or in registers.  The jprobe will work in either case, so long as the
+handler's prototype matches that of the probed function.
 
 1.3 Return Probes
 
@@ -270,9 +269,9 @@
 The handler should have the same arg list and return type as the probed
 function; and just before it returns, it must call jprobe_return().
 (The handler never actually returns, since jprobe_return() returns
-control to Kprobes.)  If the probed function is declared asmlinkage,
-fastcall, or anything else that affects how args are passed, the
-handler's declaration must match.
+control to Kprobes.)  If the probed function is declared asmlinkage
+or anything else that affects how args are passed, the handler's
+declaration must match.
 
 register_jprobe() returns 0 on success, or a negative errno otherwise.
 

diff --git a/Documentation/sched-rt-group.txt b/Documentation/sched-rt-group.txt
new file mode 100644
index 0000000..1c6332f
--- /dev/null
+++ b/Documentation/sched-rt-group.txt

@@ -0,0 +1,59 @@
+
+
+Real-Time group scheduling.
+
+The problem space:
+
+In order to schedule multiple groups of realtime tasks each group must
+be assigned a fixed portion of the CPU time available. Without a minimum
+guarantee a realtime group can obviously fall short. A fuzzy upper limit
+is of no use since it cannot be relied upon. Which leaves us with just
+the single fixed portion.
+
+CPU time is divided by means of specifying how much time can be spent
+running in a given period. Say a frame fixed realtime renderer must
+deliver 25 frames a second, which yields a period of 0.04s. Now say
+it will also have to play some music and respond to input, leaving it
+with around 80% for the graphics. We can then give this group a runtime
+of 0.8 * 0.04s = 0.032s.
+
+This way the graphics group will have a 0.04s period with a 0.032s runtime
+limit.
+
+Now if the audio thread needs to refill the DMA buffer every 0.005s, but
+needs only about 3% CPU time to do so, it can do with a 0.03 * 0.005s
+= 0.00015s.
+
+
+The Interface:
+
+system wide:
+
+/proc/sys/kernel/sched_rt_period_ms
+/proc/sys/kernel/sched_rt_runtime_us
+
+CONFIG_FAIR_USER_SCHED
+
+/sys/kernel/uids/<uid>/cpu_rt_runtime_us
+
+or
+
+CONFIG_FAIR_CGROUP_SCHED
+
+/cgroup/<cgroup>/cpu.rt_runtime_us
+
+[ time is specified in us because the interface is s32; this gives an
+  operating range of ~35m to 1us ]
+
+The period takes values in [ 1, INT_MAX ], runtime in [ -1, INT_MAX - 1 ].
+
+A runtime of -1 specifies runtime == period, ie. no limit.
+
+New groups get the period from /proc/sys/kernel/sched_rt_period_us and
+a runtime of 0.
+
+Settings are constrained to:
+
+   \Sum_{i} runtime_{i} / global_period <= global_runtime / global_period
+
+in order to keep the configuration schedulable.

diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index dc8801d..276a7e6 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt

@@ -29,7 +29,7 @@
 - java-interpreter            [ binfmt_java, obsolete ]
 - kstack_depth_to_print       [ X86 only ]
 - l2cr                        [ PPC only ]
-- modprobe                    ==> Documentation/kmod.txt
+- modprobe                    ==> Documentation/debugging-modules.txt
 - msgmax
 - msgmnb
 - msgmni

diff --git a/MAINTAINERS b/MAINTAINERS
index c40f0ae..6680ec4 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS

@@ -3561,6 +3561,8 @@
 M:	clameter@sgi.com
 P:	Pekka Enberg
 M:	penberg@cs.helsinki.fi
+P:	Matt Mackall
+M:	mpm@selenic.com
 L:	linux-mm@kvack.org
 S:	Maintained
 

diff --git a/arch/alpha/kernel/time.c b/arch/alpha/kernel/time.c
index 1dd50d0..75480ca 100644
--- a/arch/alpha/kernel/time.c
+++ b/arch/alpha/kernel/time.c

@@ -119,13 +119,8 @@
 	state.partial_tick = delta & ((1UL << FIX_SHIFT) - 1); 
 	nticks = delta >> FIX_SHIFT;
 
-	while (nticks > 0) {
-		do_timer(1);
-#ifndef CONFIG_SMP
-		update_process_times(user_mode(get_irq_regs()));
-#endif
-		nticks--;
-	}
+	if (nticks)
+		do_timer(nticks);
 
 	/*
 	 * If we have an externally synchronized Linux clock, then update
@@ -141,6 +136,12 @@
 	}
 
 	write_sequnlock(&xtime_lock);
+
+#ifndef CONFIG_SMP
+	while (nticks--)
+		update_process_times(user_mode(get_irq_regs()));
+#endif
+
 	return IRQ_HANDLED;
 }
 

diff --git a/arch/blackfin/kernel/time.c b/arch/blackfin/kernel/time.c
index 5bd64e3..9bdc8f9 100644
--- a/arch/blackfin/kernel/time.c
+++ b/arch/blackfin/kernel/time.c

@@ -137,9 +137,6 @@
 
 	do_timer(1);
 
-#ifndef CONFIG_SMP
-	update_process_times(user_mode(get_irq_regs()));
-#endif
 	profile_tick(CPU_PROFILING);
 
 	/*
@@ -161,6 +158,11 @@
 			last_rtc_update = xtime.tv_sec - 600;
 	}
 	write_sequnlock(&xtime_lock);
+
+#ifndef CONFIG_SMP
+	update_process_times(user_mode(get_irq_regs()));
+#endif
+
 	return IRQ_HANDLED;
 }
 

diff --git a/arch/frv/kernel/time.c b/arch/frv/kernel/time.c
index 925fb01..69f6a4e 100644
--- a/arch/frv/kernel/time.c
+++ b/arch/frv/kernel/time.c

@@ -63,6 +63,7 @@
 	/* last time the cmos clock got updated */
 	static long last_rtc_update = 0;
 
+	profile_tick(CPU_PROFILING);
 	/*
 	 * Here we are in the timer irq handler. We just have irqs locally
 	 * disabled but we don't know if the timer_bh is running on the other
@@ -73,8 +74,6 @@
 	write_seqlock(&xtime_lock);
 
 	do_timer(1);
-	update_process_times(user_mode(get_irq_regs()));
-	profile_tick(CPU_PROFILING);
 
 	/*
 	 * If we have an externally synchronized Linux clock, then update
@@ -99,6 +98,9 @@
 #endif /* CONFIG_HEARTBEAT */
 
 	write_sequnlock(&xtime_lock);
+
+	update_process_times(user_mode(get_irq_regs()));
+
 	return IRQ_HANDLED;
 }
 

diff --git a/arch/frv/kernel/vmlinux.lds.S b/arch/frv/kernel/vmlinux.lds.S
index ef7527b..17725a5 100644
--- a/arch/frv/kernel/vmlinux.lds.S
+++ b/arch/frv/kernel/vmlinux.lds.S

@@ -105,11 +105,9 @@
 	SCHED_TEXT
 	LOCK_TEXT
 #ifdef CONFIG_DEBUG_INFO
-	*(
 	INIT_TEXT
 	EXIT_TEXT
-	.exitcall.exit
-	)
+	*(.exitcall.exit)
 #endif
 	*(.fixup)
 	*(.gnu.warning)

diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 2d4fcd0..dff9edf 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig

@@ -232,7 +232,14 @@
 
 endchoice
 
+if IA64_HP_SIM
+config HZ
+	default 32
+endif
+
+if !IA64_HP_SIM
 source kernel/Kconfig.hz
+endif
 
 config IA64_BRL_EMU
 	bool

diff --git a/arch/m68knommu/kernel/time.c b/arch/m68knommu/kernel/time.c
index 89cdbca..0ccfb2a 100644
--- a/arch/m68knommu/kernel/time.c
+++ b/arch/m68knommu/kernel/time.c

@@ -42,14 +42,12 @@
 	/* last time the cmos clock got updated */
 	static long last_rtc_update=0;
 
+	if (current->pid)
+		profile_tick(CPU_PROFILING);
+
 	write_seqlock(&xtime_lock);
 
 	do_timer(1);
-#ifndef CONFIG_SMP
-	update_process_times(user_mode(get_irq_regs()));
-#endif
-	if (current->pid)
-		profile_tick(CPU_PROFILING);
 
 	/*
 	 * If we have an externally synchronized Linux clock, then update
@@ -67,6 +65,10 @@
 	}
 
 	write_sequnlock(&xtime_lock);
+
+#ifndef CONFIG_SMP
+	update_process_times(user_mode(get_irq_regs()));
+#endif
 	return(IRQ_HANDLED);
 }
 

diff --git a/arch/powerpc/platforms/cell/spufs/sputrace.c b/arch/powerpc/platforms/cell/spufs/sputrace.c
index 2b1953f..01974f7 100644
--- a/arch/powerpc/platforms/cell/spufs/sputrace.c
+++ b/arch/powerpc/platforms/cell/spufs/sputrace.c

@@ -146,34 +146,28 @@
 	wake_up(&sputrace_wait);
 }
 
-static void spu_context_event(const struct marker *mdata,
-		void *private, const char *format, ...)
+static void spu_context_event(void *probe_private, void *call_data,
+		const char *format, va_list *args)
 {
-	struct spu_probe *p = mdata->private;
-	va_list ap;
+	struct spu_probe *p = probe_private;
 	struct spu_context *ctx;
 	struct spu *spu;
 
-	va_start(ap, format);
-	ctx = va_arg(ap, struct spu_context *);
-	spu = va_arg(ap, struct spu *);
+	ctx = va_arg(*args, struct spu_context *);
+	spu = va_arg(*args, struct spu *);
 
 	sputrace_log_item(p->name, ctx, spu);
-	va_end(ap);
 }
 
-static void spu_context_nospu_event(const struct marker *mdata,
-		void *private, const char *format, ...)
+static void spu_context_nospu_event(void *probe_private, void *call_data,
+		const char *format, va_list *args)
 {
-	struct spu_probe *p = mdata->private;
-	va_list ap;
+	struct spu_probe *p = probe_private;
 	struct spu_context *ctx;
 
-	va_start(ap, format);
-	ctx = va_arg(ap, struct spu_context *);
+	ctx = va_arg(*args, struct spu_context *);
 
 	sputrace_log_item(p->name, ctx, NULL);
-	va_end(ap);
 }
 
 struct spu_probe spu_probes[] = {
@@ -219,10 +213,6 @@
 		if (error)
 			printk(KERN_INFO "Unable to register probe %s\n",
 					p->name);
-
-		error = marker_arm(p->name);
-		if (error)
-			printk(KERN_INFO "Unable to arm probe %s\n", p->name);
 	}
 
 	return 0;
@@ -238,7 +228,8 @@
 	int i;
 
 	for (i = 0; i < ARRAY_SIZE(spu_probes); i++)
-		marker_probe_unregister(spu_probes[i].name);
+		marker_probe_unregister(spu_probes[i].name,
+			spu_probes[i].probe_func, &spu_probes[i]);
 
 	remove_proc_entry("sputrace", NULL);
 	kfree(sputrace_log);

diff --git a/arch/sh/kernel/timers/timer-cmt.c b/arch/sh/kernel/timers/timer-cmt.c
index 499e07b..71312324 100644
--- a/arch/sh/kernel/timers/timer-cmt.c
+++ b/arch/sh/kernel/timers/timer-cmt.c

@@ -100,16 +100,7 @@
 	timer_status &= ~0x80;
 	ctrl_outw(timer_status, CMT_CMCSR_0);
 
-	/*
-	 * Here we are in the timer irq handler. We just have irqs locally
-	 * disabled but we don't know if the timer_bh is running on the other
-	 * CPU. We need to avoid to SMP race with it. NOTE: we don' t need
-	 * the irq version of write_lock because as just said we have irq
-	 * locally disabled. -arca
-	 */
-	write_seqlock(&xtime_lock);
 	handle_timer_tick();
-	write_sequnlock(&xtime_lock);
 
 	return IRQ_HANDLED;
 }

diff --git a/arch/sh/kernel/timers/timer-mtu2.c b/arch/sh/kernel/timers/timer-mtu2.c
index b7499a2..463cd08 100644
--- a/arch/sh/kernel/timers/timer-mtu2.c
+++ b/arch/sh/kernel/timers/timer-mtu2.c

@@ -100,9 +100,7 @@
 	ctrl_outb(timer_status, MTU2_TSR_1);
 
 	/* Do timer tick */
-	write_seqlock(&xtime_lock);
 	handle_timer_tick();
-	write_sequnlock(&xtime_lock);
 
 	return IRQ_HANDLED;
 }

diff --git a/arch/sparc/kernel/pcic.c b/arch/sparc/kernel/pcic.c
index 4cd5d78..a6a6f98 100644
--- a/arch/sparc/kernel/pcic.c
+++ b/arch/sparc/kernel/pcic.c

@@ -713,10 +713,10 @@
 	write_seqlock(&xtime_lock);	/* Dummy, to show that we remember */
 	pcic_clear_clock_irq();
 	do_timer(1);
+	write_sequnlock(&xtime_lock);
 #ifndef CONFIG_SMP
 	update_process_times(user_mode(get_irq_regs()));
 #endif
-	write_sequnlock(&xtime_lock);
 	return IRQ_HANDLED;
 }
 

diff --git a/arch/sparc/kernel/time.c b/arch/sparc/kernel/time.c
index 00b393c..cfaf22c 100644
--- a/arch/sparc/kernel/time.c
+++ b/arch/sparc/kernel/time.c

@@ -128,10 +128,6 @@
 	clear_clock_irq();
 
 	do_timer(1);
-#ifndef CONFIG_SMP
-	update_process_times(user_mode(get_irq_regs()));
-#endif
-
 
 	/* Determine when to update the Mostek clock. */
 	if (ntp_synced() &&
@@ -145,6 +141,9 @@
 	}
 	write_sequnlock(&xtime_lock);
 
+#ifndef CONFIG_SMP
+	update_process_times(user_mode(get_irq_regs()));
+#endif
 	return IRQ_HANDLED;
 }
 

diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c
index 32dd62b..cbdf9ba 100644
--- a/arch/x86/kernel/efi.c
+++ b/arch/x86/kernel/efi.c

@@ -384,9 +384,6 @@
 	efi_memory_desc_t *md;
 	void *p;
 
-	if (!(__supported_pte_mask & _PAGE_NX))
-		return;
-
 	/* Make EFI runtime service code area executable */
 	for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
 		md = p;
@@ -428,9 +425,6 @@
 		else
 			va = efi_ioremap(md->phys_addr, size);
 
-		if (md->attribute & EFI_MEMORY_WB)
-			set_memory_uc(md->virt_addr, size);
-
 		md->virt_addr = (u64) (unsigned long) va;
 
 		if (!va) {
@@ -439,6 +433,9 @@
 			continue;
 		}
 
+		if (!(md->attribute & EFI_MEMORY_WB))
+			set_memory_uc(md->virt_addr, size);
+
 		systab = (u64) (unsigned long) efi_phys.systab;
 		if (md->phys_addr <= systab && systab < end) {
 			systab += md->virt_addr - md->phys_addr;
@@ -476,7 +473,8 @@
 	efi.get_next_high_mono_count = virt_efi_get_next_high_mono_count;
 	efi.reset_system = virt_efi_reset_system;
 	efi.set_virtual_address_map = virt_efi_set_virtual_address_map;
-	runtime_code_page_mkexec();
+	if (__supported_pte_mask & _PAGE_NX)
+		runtime_code_page_mkexec();
 	early_iounmap(memmap.map, memmap.nr_map * memmap.desc_size);
 	memmap.map = NULL;
 }

diff --git a/arch/x86/kernel/efi_64.c b/arch/x86/kernel/efi_64.c
index 09d5c23..d143a1e 100644
--- a/arch/x86/kernel/efi_64.c
+++ b/arch/x86/kernel/efi_64.c

@@ -35,6 +35,7 @@
 #include <asm/tlbflush.h>
 #include <asm/proto.h>
 #include <asm/efi.h>
+#include <asm/cacheflush.h>
 
 static pgd_t save_pgd __initdata;
 static unsigned long efi_flags __initdata;
@@ -43,22 +44,15 @@
 					  unsigned long end,
 					  int executable)
 {
-	pte_t *kpte;
-	unsigned int level;
+	unsigned long num_pages;
 
-	while (start < end) {
-		kpte = lookup_address((unsigned long)__va(start), &level);
-		BUG_ON(!kpte);
-		if (executable)
-			set_pte(kpte, pte_mkexec(*kpte));
-		else
-			set_pte(kpte, __pte((pte_val(*kpte) | _PAGE_NX) & \
-					    __supported_pte_mask));
-		if (level == PG_LEVEL_4K)
-			start = (start + PAGE_SIZE) & PAGE_MASK;
-		else
-			start = (start + PMD_SIZE) & PMD_MASK;
-	}
+	start &= PMD_MASK;
+	end = (end + PMD_SIZE - 1) & PMD_MASK;
+	num_pages = (end - start) >> PAGE_SHIFT;
+	if (executable)
+		set_memory_x((unsigned long)__va(start), num_pages);
+	else
+		set_memory_nx((unsigned long)__va(start), num_pages);
 }
 
 static void __init early_runtime_code_mapping_set_exec(int executable)
@@ -74,7 +68,7 @@
 		md = p;
 		if (md->type == EFI_RUNTIME_SERVICES_CODE) {
 			unsigned long end;
-			end = md->phys_addr + (md->num_pages << PAGE_SHIFT);
+			end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT);
 			early_mapping_set_exec(md->phys_addr, end, executable);
 		}
 	}
@@ -84,8 +78,8 @@
 {
 	unsigned long vaddress;
 
-	local_irq_save(efi_flags);
 	early_runtime_code_mapping_set_exec(1);
+	local_irq_save(efi_flags);
 	vaddress = (unsigned long)__va(0x0UL);
 	save_pgd = *pgd_offset_k(0x0UL);
 	set_pgd(pgd_offset_k(0x0UL), *pgd_offset_k(vaddress));
@@ -98,9 +92,9 @@
 	 * After the lock is released, the original page table is restored.
 	 */
 	set_pgd(pgd_offset_k(0x0UL), save_pgd);
-	early_runtime_code_mapping_set_exec(0);
 	__flush_tlb_all();
 	local_irq_restore(efi_flags);
+	early_runtime_code_mapping_set_exec(0);
 }
 
 void __init efi_reserve_bootmem(void)

diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c
index ef62b07..8540abe 100644
--- a/arch/x86/kernel/i8253.c
+++ b/arch/x86/kernel/i8253.c

@@ -95,7 +95,7 @@
  * registered. This mechanism replaces the previous #ifdef LOCAL_APIC -
  * !using_apic_timer decisions in do_timer_interrupt_hook()
  */
-struct clock_event_device pit_clockevent = {
+static struct clock_event_device pit_clockevent = {
 	.name		= "pit",
 	.features	= CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
 	.set_mode	= init_pit_timer,

diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index 1941482..c47208f 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c

@@ -11,7 +11,7 @@
 static void __devinit quirk_intel_irqbalance(struct pci_dev *dev)
 {
 	u8 config, rev;
-	u32 word;
+	u16 word;
 
 	/* BIOS may enable hardware IRQ balancing for
 	 * E7520/E7320/E7525(revision ID 0x9 and below)
@@ -26,8 +26,11 @@
 	pci_read_config_byte(dev, 0xf4, &config);
 	pci_write_config_byte(dev, 0xf4, config|0x2);
 
-	/* read xTPR register */
-	raw_pci_read(0, 0, 0x40, 0x4c, 2, &word);
+	/*
+	 * read xTPR register.  We may not have a pci_dev for device 8
+	 * because it might be hidden until the above write.
+	 */
+	pci_bus_read_config_word(dev->bus, PCI_DEVFN(8, 0), 0x4c, &word);
 
 	if (!(word & (1 << 13))) {
 		dev_info(&dev->dev, "Intel E7520/7320/7525 detected; "

diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 5818dc2..7fd6ac4 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c

@@ -326,7 +326,7 @@
 	}
 }
 
-void machine_emergency_restart(void)
+static void native_machine_emergency_restart(void)
 {
 	int i;
 
@@ -376,7 +376,7 @@
 	}
 }
 
-void machine_shutdown(void)
+static void native_machine_shutdown(void)
 {
 	/* Stop the cpus and apics */
 #ifdef CONFIG_SMP
@@ -420,7 +420,7 @@
 #endif
 }
 
-void machine_restart(char *__unused)
+static void native_machine_restart(char *__unused)
 {
 	printk("machine restart\n");
 
@@ -429,11 +429,11 @@
 	machine_emergency_restart();
 }
 
-void machine_halt(void)
+static void native_machine_halt(void)
 {
 }
 
-void machine_power_off(void)
+static void native_machine_power_off(void)
 {
 	if (pm_power_off) {
 		if (!reboot_force)
@@ -443,9 +443,35 @@
 }
 
 struct machine_ops machine_ops = {
-	.power_off = machine_power_off,
-	.shutdown = machine_shutdown,
-	.emergency_restart = machine_emergency_restart,
-	.restart = machine_restart,
-	.halt = machine_halt
+	.power_off = native_machine_power_off,
+	.shutdown = native_machine_shutdown,
+	.emergency_restart = native_machine_emergency_restart,
+	.restart = native_machine_restart,
+	.halt = native_machine_halt
 };
+
+void machine_power_off(void)
+{
+	machine_ops.power_off();
+}
+
+void machine_shutdown(void)
+{
+	machine_ops.shutdown();
+}
+
+void machine_emergency_restart(void)
+{
+	machine_ops.emergency_restart();
+}
+
+void machine_restart(char *cmd)
+{
+	machine_ops.restart(cmd);
+}
+
+void machine_halt(void)
+{
+	machine_ops.halt();
+}
+

diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index a4897a8..9f42d7e 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c

@@ -265,7 +265,9 @@
 
 static inline pmd_t * __init early_ioremap_pmd(unsigned long addr)
 {
-	pgd_t *pgd = &swapper_pg_dir[pgd_index(addr)];
+	/* Don't assume we're using swapper_pg_dir at this point */
+	pgd_t *base = __va(read_cr3());
+	pgd_t *pgd = &base[pgd_index(addr)];
 	pud_t *pud = pud_offset(pgd, addr);
 	pmd_t *pmd = pmd_offset(pud, addr);
 

diff --git a/arch/x86/mm/pageattr-test.c b/arch/x86/mm/pageattr-test.c
index ed82016..75f1b10 100644
--- a/arch/x86/mm/pageattr-test.c
+++ b/arch/x86/mm/pageattr-test.c

@@ -40,7 +40,6 @@
 static int print_split(struct split_state *s)
 {
 	long i, expected, missed = 0;
-	int printed = 0;
 	int err = 0;
 
 	s->lpg = s->gpg = s->spg = s->exec = 0;
@@ -53,12 +52,6 @@
 
 		pte = lookup_address(addr, &level);
 		if (!pte) {
-			if (!printed) {
-				dump_pagetable(addr);
-				printk(KERN_INFO "CPA %lx no pte level %d\n",
-					addr, level);
-				printed = 1;
-			}
 			missed++;
 			i++;
 			continue;

diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 440210a..bd61ed1 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c

@@ -275,8 +275,8 @@
 		break;
 #ifdef CONFIG_X86_64
 	case PG_LEVEL_1G:
-		psize = PMD_PAGE_SIZE;
-		pmask = PMD_PAGE_MASK;
+		psize = PUD_PAGE_SIZE;
+		pmask = PUD_PAGE_MASK;
 		break;
 #endif
 	default:

diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile
index d28dda5..f385a4b 100644
--- a/arch/x86/vdso/Makefile
+++ b/arch/x86/vdso/Makefile

@@ -7,7 +7,7 @@
 VDSO32-$(CONFIG_COMPAT)		:= y
 
 vdso-install-$(VDSO64-y)	+= vdso.so
-vdso-install-$(VDSO32-y)	+= $(vdso32-y:=.so)
+vdso-install-$(VDSO32-y)	+= $(vdso32-images)
 
 
 # files to link into the vdso
@@ -63,6 +63,8 @@
 vdso32.so-$(CONFIG_COMPAT)	+= syscall
 vdso32.so-$(VDSO32-y)		+= sysenter
 
+vdso32-images			= $(vdso32.so-y:%=vdso32-%.so)
+
 CPPFLAGS_vdso32.lds = $(CPPFLAGS_vdso.lds)
 VDSO_LDFLAGS_vdso32.lds = -m elf_i386 -Wl,-soname=linux-gate.so.1
 
@@ -71,21 +73,21 @@
 override obj-dirs = $(dir $(obj)) $(obj)/vdso32/
 
 targets += vdso32/vdso32.lds
-targets += $(vdso32.so-y:%=vdso32-%.so.dbg) $(vdso32.so-y:%=vdso32-%.so)
+targets += $(vdso32-images) $(vdso32-images:=.dbg)
 targets += vdso32/note.o $(vdso32.so-y:%=vdso32/%.o)
 
-extra-y	+= $(vdso32.so-y:%=vdso32-%.so)
+extra-y	+= $(vdso32-images)
 
-$(obj)/vdso32.o: $(vdso32.so-y:%=$(obj)/vdso32-%.so)
+$(obj)/vdso32.o: $(vdso32-images:%=$(obj)/%)
 
 KBUILD_AFLAGS_32 := $(filter-out -m64,$(KBUILD_AFLAGS))
-$(vdso32.so-y:%=$(obj)/vdso32-%.so.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_32)
-$(vdso32.so-y:%=$(obj)/vdso32-%.so.dbg): asflags-$(CONFIG_X86_64) += -m32
+$(vdso32-images:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_32)
+$(vdso32-images:%=$(obj)/%.dbg): asflags-$(CONFIG_X86_64) += -m32
 
-$(vdso32.so-y:%=$(obj)/vdso32-%.so.dbg): $(obj)/vdso32-%.so.dbg: FORCE \
-					 $(obj)/vdso32/vdso32.lds \
-					 $(obj)/vdso32/note.o \
-					 $(obj)/vdso32/%.o
+$(vdso32-images:%=$(obj)/%.dbg): $(obj)/vdso32-%.so.dbg: FORCE \
+				 $(obj)/vdso32/vdso32.lds \
+				 $(obj)/vdso32/note.o \
+				 $(obj)/vdso32/%.o
 	$(call if_changed,vdso)
 
 # Make vdso32-*-syms.lds from each image, and then make sure they match.

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index de647bc..49e5358 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c

@@ -798,6 +798,10 @@
 	 * added to the table can be prepared properly for Xen.
 	 */
 	xen_write_cr3(__pa(base));
+
+	/* Unpin initial Xen pagetable */
+	pin_pagetable_pfn(MMUEXT_UNPIN_TABLE,
+			  PFN_DOWN(__pa(xen_start_info->pt_base)));
 }
 
 static __init void xen_pagetable_setup_done(pgd_t *base)

diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
index 34b3386..15e6023 100644
--- a/drivers/acpi/osl.c
+++ b/drivers/acpi/osl.c

@@ -623,7 +623,7 @@
 
 acpi_status
 acpi_os_read_pci_configuration(struct acpi_pci_id * pci_id, u32 reg,
-			       void *value, u32 width)
+			       u32 *value, u32 width)
 {
 	int result, size;
 
@@ -689,7 +689,6 @@
 	acpi_status status;
 	unsigned long temp;
 	acpi_object_type type;
-	u8 tu8;
 
 	acpi_get_parent(chandle, &handle);
 	if (handle != rhandle) {
@@ -704,6 +703,7 @@
 		    acpi_evaluate_integer(handle, METHOD_NAME__ADR, NULL,
 					  &temp);
 		if (ACPI_SUCCESS(status)) {
+			u32 val;
 			pci_id->device = ACPI_HIWORD(ACPI_LODWORD(temp));
 			pci_id->function = ACPI_LOWORD(ACPI_LODWORD(temp));
 
@@ -712,24 +712,24 @@
 
 			/* any nicer way to get bus number of bridge ? */
 			status =
-			    acpi_os_read_pci_configuration(pci_id, 0x0e, &tu8,
+			    acpi_os_read_pci_configuration(pci_id, 0x0e, &val,
 							   8);
 			if (ACPI_SUCCESS(status)
-			    && ((tu8 & 0x7f) == 1 || (tu8 & 0x7f) == 2)) {
+			    && ((val & 0x7f) == 1 || (val & 0x7f) == 2)) {
 				status =
 				    acpi_os_read_pci_configuration(pci_id, 0x18,
-								   &tu8, 8);
+								   &val, 8);
 				if (!ACPI_SUCCESS(status)) {
 					/* Certainly broken...  FIX ME */
 					return;
 				}
 				*is_bridge = 1;
-				pci_id->bus = tu8;
+				pci_id->bus = val;
 				status =
 				    acpi_os_read_pci_configuration(pci_id, 0x19,
-								   &tu8, 8);
+								   &val, 8);
 				if (ACPI_SUCCESS(status)) {
-					*bus_number = tu8;
+					*bus_number = val;
 				}
 			} else
 				*is_bridge = 0;

diff --git a/drivers/acpi/wmi.c b/drivers/acpi/wmi.c
index 36b84ab..efacc9f 100644
--- a/drivers/acpi/wmi.c
+++ b/drivers/acpi/wmi.c

@@ -247,7 +247,7 @@
 	block = &wblock->gblock;
 	handle = wblock->handle;
 
-	if (!block->flags & ACPI_WMI_METHOD)
+	if (!(block->flags & ACPI_WMI_METHOD))
 		return AE_BAD_DATA;
 
 	if (block->instance_count < instance)
@@ -673,11 +673,11 @@
 {
 	acpi_status result;
 
+	INIT_LIST_HEAD(&wmi_blocks.list);
+
 	if (acpi_disabled)
 		return -ENODEV;
 
-	INIT_LIST_HEAD(&wmi_blocks.list);
-
 	result = acpi_bus_register_driver(&acpi_wmi_driver);
 
 	if (result < 0) {

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 3011919..004dae4 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c

@@ -3048,6 +3048,8 @@
 static int ata_dev_set_mode(struct ata_device *dev)
 {
 	struct ata_eh_context *ehc = &dev->link->eh_context;
+	const char *dev_err_whine = "";
+	int ign_dev_err = 0;
 	unsigned int err_mask;
 	int rc;
 
@@ -3057,41 +3059,57 @@
 
 	err_mask = ata_dev_set_xfermode(dev);
 
-	/* Old CFA may refuse this command, which is just fine */
-	if (dev->xfer_shift == ATA_SHIFT_PIO && ata_id_is_cfa(dev->id))
-		err_mask &= ~AC_ERR_DEV;
+	if (err_mask & ~AC_ERR_DEV)
+		goto fail;
 
-	/* Some very old devices and some bad newer ones fail any kind of
-	   SET_XFERMODE request but support PIO0-2 timings and no IORDY */
-	if (dev->xfer_shift == ATA_SHIFT_PIO && !ata_id_has_iordy(dev->id) &&
-			dev->pio_mode <= XFER_PIO_2)
-		err_mask &= ~AC_ERR_DEV;
-
-	/* Early MWDMA devices do DMA but don't allow DMA mode setting.
-	   Don't fail an MWDMA0 set IFF the device indicates it is in MWDMA0 */
-	if (dev->xfer_shift == ATA_SHIFT_MWDMA &&
-	    dev->dma_mode == XFER_MW_DMA_0 &&
-	    (dev->id[63] >> 8) & 1)
-		err_mask &= ~AC_ERR_DEV;
-
-	if (err_mask) {
-		ata_dev_printk(dev, KERN_ERR, "failed to set xfermode "
-			       "(err_mask=0x%x)\n", err_mask);
-		return -EIO;
-	}
-
+	/* revalidate */
 	ehc->i.flags |= ATA_EHI_POST_SETMODE;
 	rc = ata_dev_revalidate(dev, ATA_DEV_UNKNOWN, 0);
 	ehc->i.flags &= ~ATA_EHI_POST_SETMODE;
 	if (rc)
 		return rc;
 
+	/* Old CFA may refuse this command, which is just fine */
+	if (dev->xfer_shift == ATA_SHIFT_PIO && ata_id_is_cfa(dev->id))
+		ign_dev_err = 1;
+
+	/* Some very old devices and some bad newer ones fail any kind of
+	   SET_XFERMODE request but support PIO0-2 timings and no IORDY */
+	if (dev->xfer_shift == ATA_SHIFT_PIO && !ata_id_has_iordy(dev->id) &&
+			dev->pio_mode <= XFER_PIO_2)
+		ign_dev_err = 1;
+
+	/* Early MWDMA devices do DMA but don't allow DMA mode setting.
+	   Don't fail an MWDMA0 set IFF the device indicates it is in MWDMA0 */
+	if (dev->xfer_shift == ATA_SHIFT_MWDMA &&
+	    dev->dma_mode == XFER_MW_DMA_0 &&
+	    (dev->id[63] >> 8) & 1)
+		ign_dev_err = 1;
+
+	/* if the device is actually configured correctly, ignore dev err */
+	if (dev->xfer_mode == ata_xfer_mask2mode(ata_id_xfermask(dev->id)))
+		ign_dev_err = 1;
+
+	if (err_mask & AC_ERR_DEV) {
+		if (!ign_dev_err)
+			goto fail;
+		else
+			dev_err_whine = " (device error ignored)";
+	}
+
 	DPRINTK("xfer_shift=%u, xfer_mode=0x%x\n",
 		dev->xfer_shift, (int)dev->xfer_mode);
 
-	ata_dev_printk(dev, KERN_INFO, "configured for %s\n",
-		       ata_mode_string(ata_xfer_mode2mask(dev->xfer_mode)));
+	ata_dev_printk(dev, KERN_INFO, "configured for %s%s\n",
+		       ata_mode_string(ata_xfer_mode2mask(dev->xfer_mode)),
+		       dev_err_whine);
+
 	return 0;
+
+ fail:
+	ata_dev_printk(dev, KERN_ERR, "failed to set xfermode "
+		       "(err_mask=0x%x)\n", err_mask);
+	return -EIO;
 }
 
 /**

diff --git a/drivers/ata/pata_amd.c b/drivers/ata/pata_amd.c
index 761a666..ea567e2 100644
--- a/drivers/ata/pata_amd.c
+++ b/drivers/ata/pata_amd.c

@@ -772,7 +772,7 @@
 }
 
 MODULE_AUTHOR("Alan Cox");
-MODULE_DESCRIPTION("low-level driver for AMD PATA IDE");
+MODULE_DESCRIPTION("low-level driver for AMD and Nvidia PATA IDE");
 MODULE_LICENSE("GPL");
 MODULE_DEVICE_TABLE(pci, amd);
 MODULE_VERSION(DRV_VERSION);

diff --git a/drivers/ata/pata_legacy.c b/drivers/ata/pata_legacy.c
index 333dc15..6c59969 100644
--- a/drivers/ata/pata_legacy.c
+++ b/drivers/ata/pata_legacy.c

@@ -127,7 +127,7 @@
 static int opti82c46x;		/* Opti 82c465MV present(pri/sec autodetect) */
 static int qdi;			/* Set to probe QDI controllers */
 static int winbond;		/* Set to probe Winbond controllers,
-					give I/O port if non stdanard */
+					give I/O port if non standard */
 static int autospeed;		/* Chip present which snoops speed changes */
 static int pio_mask = 0x1F;	/* PIO range for autospeed devices */
 static int iordy_mask = 0xFFFFFFFF;	/* Use iordy if available */

diff --git a/drivers/ata/pata_ninja32.c b/drivers/ata/pata_ninja32.c
index 1c1b835..15dd649 100644
--- a/drivers/ata/pata_ninja32.c
+++ b/drivers/ata/pata_ninja32.c

@@ -17,6 +17,7 @@
  *	Base + 0x00 IRQ Status
  *	Base + 0x01 IRQ control
  *	Base + 0x02 Chipset control
+ *	Base + 0x03 Unknown
  *	Base + 0x04 VDMA and reset control + wait bits
  *	Base + 0x08 BMIMBA
  *	Base + 0x0C DMA Length
@@ -174,8 +175,12 @@
 	ata_std_ports(&ap->ioaddr);
 
 	iowrite8(0x05, base + 0x01);	/* Enable interrupt lines */
-	iowrite8(0xB3, base + 0x02);	/* Burst, ?? setup */
-	iowrite8(0x00, base + 0x04);	/* WAIT0 ? */
+	iowrite8(0xBE, base + 0x02);	/* Burst, ?? setup */
+	iowrite8(0x01, base + 0x03);	/* Unknown */
+	iowrite8(0x20, base + 0x04);	/* WAIT0 */
+	iowrite8(0x8f, base + 0x05);	/* Unknown */
+	iowrite8(0xa4, base + 0x1c);	/* Unknown */
+	iowrite8(0x83, base + 0x1d);	/* BMDMA control: WAIT0 */
 	/* FIXME: Should we disable them at remove ? */
 	return ata_host_activate(host, dev->irq, ata_interrupt,
 				 IRQF_SHARED, &ninja32_sht);

diff --git a/drivers/ata/pata_via.c b/drivers/ata/pata_via.c
index 39627ab..d119a68 100644
--- a/drivers/ata/pata_via.c
+++ b/drivers/ata/pata_via.c

@@ -84,6 +84,7 @@
 	VIA_BAD_ID	= 0x100, /* Has wrong vendor ID (0x1107) */
 	VIA_BAD_AST	= 0x200, /* Don't touch Address Setup Timing */
 	VIA_NO_ENABLES	= 0x400, /* Has no enablebits */
+	VIA_SATA_PATA	= 0x800, /* SATA/PATA combined configuration */
 };
 
 /*
@@ -100,7 +101,7 @@
 	{ "vx800",	PCI_DEVICE_ID_VIA_VX800,    0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST },
 	{ "vt8237s",	PCI_DEVICE_ID_VIA_8237S,    0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST },
 	{ "vt8251",	PCI_DEVICE_ID_VIA_8251,     0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST },
-	{ "cx700",	PCI_DEVICE_ID_VIA_CX700,    0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST },
+	{ "cx700",	PCI_DEVICE_ID_VIA_CX700,    0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST | VIA_SATA_PATA },
 	{ "vt6410",	PCI_DEVICE_ID_VIA_6410,     0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST | VIA_NO_ENABLES},
 	{ "vt8237a",	PCI_DEVICE_ID_VIA_8237A,    0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST },
 	{ "vt8237",	PCI_DEVICE_ID_VIA_8237,     0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST },
@@ -172,6 +173,9 @@
 	if (via_cable_override(pdev))
 		return ATA_CBL_PATA40_SHORT;
 
+	if ((config->flags & VIA_SATA_PATA) && ap->port_no == 0)
+		return ATA_CBL_SATA;
+
 	/* Early chips are 40 wire */
 	if ((config->flags & VIA_UDMA) < VIA_UDMA_66)
 		return ATA_CBL_PATA40;

diff --git a/drivers/ata/sata_mv.c b/drivers/ata/sata_mv.c
index 080b836..04b5717 100644
--- a/drivers/ata/sata_mv.c
+++ b/drivers/ata/sata_mv.c

@@ -1716,14 +1716,16 @@
 	VPRINTK("ENTER, hc%u relevant=0x%08x HC IRQ cause=0x%08x\n",
 		hc, relevant, hc_irq_cause);
 
-	for (port = port0; port < port0 + last_port; port++) {
+	for (port = port0; port < last_port; port++) {
 		struct ata_port *ap = host->ports[port];
-		struct mv_port_priv *pp = ap->private_data;
+		struct mv_port_priv *pp;
 		int have_err_bits, hard_port, shift;
 
 		if ((!ap) || (ap->flags & ATA_FLAG_DISABLED))
 			continue;
 
+		pp = ap->private_data;
+
 		shift = port << 1;		/* (port * 2) */
 		if (port >= MV_PORTS_PER_HC) {
 			shift++;	/* skip bit 8 in the HC Main IRQ reg */
@@ -2879,6 +2881,26 @@
 	return rc;
 }
 
+static int mv_create_dma_pools(struct mv_host_priv *hpriv, struct device *dev)
+{
+	hpriv->crqb_pool   = dmam_pool_create("crqb_q", dev, MV_CRQB_Q_SZ,
+							     MV_CRQB_Q_SZ, 0);
+	if (!hpriv->crqb_pool)
+		return -ENOMEM;
+
+	hpriv->crpb_pool   = dmam_pool_create("crpb_q", dev, MV_CRPB_Q_SZ,
+							     MV_CRPB_Q_SZ, 0);
+	if (!hpriv->crpb_pool)
+		return -ENOMEM;
+
+	hpriv->sg_tbl_pool = dmam_pool_create("sg_tbl", dev, MV_SG_TBL_SZ,
+							     MV_SG_TBL_SZ, 0);
+	if (!hpriv->sg_tbl_pool)
+		return -ENOMEM;
+
+	return 0;
+}
+
 /**
  *      mv_platform_probe - handle a positive probe of an soc Marvell
  *      host
@@ -2932,6 +2954,10 @@
 	hpriv->base = ioremap(res->start, res->end - res->start + 1);
 	hpriv->base -= MV_SATAHC0_REG_BASE;
 
+	rc = mv_create_dma_pools(hpriv, &pdev->dev);
+	if (rc)
+		return rc;
+
 	/* initialize adapter */
 	rc = mv_init_host(host, chip_soc);
 	if (rc)
@@ -3068,26 +3094,6 @@
 	       scc_s, (MV_HP_FLAG_MSI & hpriv->hp_flags) ? "MSI" : "INTx");
 }
 
-static int mv_create_dma_pools(struct mv_host_priv *hpriv, struct device *dev)
-{
-	hpriv->crqb_pool   = dmam_pool_create("crqb_q", dev, MV_CRQB_Q_SZ,
-							     MV_CRQB_Q_SZ, 0);
-	if (!hpriv->crqb_pool)
-		return -ENOMEM;
-
-	hpriv->crpb_pool   = dmam_pool_create("crpb_q", dev, MV_CRPB_Q_SZ,
-							     MV_CRPB_Q_SZ, 0);
-	if (!hpriv->crpb_pool)
-		return -ENOMEM;
-
-	hpriv->sg_tbl_pool = dmam_pool_create("sg_tbl", dev, MV_SG_TBL_SZ,
-							     MV_SG_TBL_SZ, 0);
-	if (!hpriv->sg_tbl_pool)
-		return -ENOMEM;
-
-	return 0;
-}
-
 /**
  *      mv_pci_init_one - handle a positive probe of a PCI Marvell host
  *      @pdev: PCI device found

diff --git a/drivers/char/drm/i830_dma.c b/drivers/char/drm/i830_dma.c
index 379cbda..9df0810 100644
--- a/drivers/char/drm/i830_dma.c
+++ b/drivers/char/drm/i830_dma.c

@@ -36,7 +36,7 @@
 #include "i830_drm.h"
 #include "i830_drv.h"
 #include <linux/interrupt.h>	/* For task queue support */
-#include <linux/pagemap.h>	/* For FASTCALL on unlock_page() */
+#include <linux/pagemap.h>
 #include <linux/delay.h>
 #include <asm/uaccess.h>
 

diff --git a/drivers/char/pcmcia/Kconfig b/drivers/char/pcmcia/Kconfig
index 00b8a84..ffa0efc 100644
--- a/drivers/char/pcmcia/Kconfig
+++ b/drivers/char/pcmcia/Kconfig

@@ -45,7 +45,7 @@
 
 config IPWIRELESS
 	tristate "IPWireless 3G UMTS PCMCIA card support"
-	depends on PCMCIA
+	depends on PCMCIA && NETDEVICES
 	select PPP
 	help
 	  This is a driver for 3G UMTS PCMCIA card from IPWireless company. In

diff --git a/drivers/ide/Kconfig b/drivers/ide/Kconfig
index 043c34a..df752e6 100644
--- a/drivers/ide/Kconfig
+++ b/drivers/ide/Kconfig

@@ -378,6 +378,9 @@
 	  would like the kernel to automatically detect and activate
 	  it, say Y here.
 
+config BLK_DEV_IDEDMA_SFF
+	bool
+
 if PCI
 
 comment "PCI IDE chipsets support"
@@ -459,6 +462,7 @@
 config BLK_DEV_IDEDMA_PCI
 	bool
 	select BLK_DEV_IDEPCI
+	select BLK_DEV_IDEDMA_SFF
 
 config BLK_DEV_AEC62XX
 	tristate "AEC62XX chipset support"
@@ -688,23 +692,6 @@
 
 	  If unsure, say N.
 
-config PDC202XX_BURST
-	bool "Special UDMA Feature"
-	depends on BLK_DEV_PDC202XX_OLD
-	help
-	  This option causes the pdc202xx driver to enable UDMA modes on the
-	  PDC202xx even when the PDC202xx BIOS has not done so.
-
-	  It was originally designed for the PDC20246/Ultra33, whose BIOS will
-	  only setup UDMA on the first two PDC20246 cards.  It has also been
-	  used successfully on a PDC20265/Ultra100, allowing use of UDMA modes
-	  when the PDC20265 BIOS has been disabled (for faster boot up).
-
-	  Please read the comments at the top of
-	  <file:drivers/ide/pci/pdc202xx_old.c>.
-
-	  If unsure, say N.
-
 config BLK_DEV_PDC202XX_NEW
 	tristate "PROMISE PDC202{68|69|70|71|75|76|77} support"
 	select BLK_DEV_IDEDMA_PCI
@@ -1016,7 +1003,7 @@
 config BLK_DEV_PALMCHIP_BK3710
 	tristate "Palmchip bk3710 IDE controller support"
 	depends on ARCH_DAVINCI
-	select BLK_DEV_IDEDMA_PCI
+	select BLK_DEV_IDEDMA_SFF
 	help
 	  Say Y here if you want to support the onchip IDE controller on the
 	  TI DaVinci SoC
@@ -1124,7 +1111,8 @@
 endif
 
 config BLK_DEV_IDEDMA
-	def_bool BLK_DEV_IDEDMA_PCI || BLK_DEV_IDEDMA_PMAC || BLK_DEV_IDEDMA_ICS || BLK_DEV_IDE_AU1XXX_MDMA2_DBDMA
+	def_bool BLK_DEV_IDEDMA_SFF || BLK_DEV_IDEDMA_PMAC || \
+		 BLK_DEV_IDEDMA_ICS || BLK_DEV_IDE_AU1XXX_MDMA2_DBDMA
 
 config IDE_ARCH_OBSOLETE_INIT
 	def_bool ALPHA || (ARM && !ARCH_L7200) || BLACKFIN || X86 || IA64 || M32R || MIPS || PARISC || PPC || (SUPERH64 && BLK_DEV_IDEPCI) || SPARC

diff --git a/drivers/ide/arm/bast-ide.c b/drivers/ide/arm/bast-ide.c
index 0e7574c..161d30c 100644
--- a/drivers/ide/arm/bast-ide.c
+++ b/drivers/ide/arm/bast-ide.c

@@ -21,12 +21,7 @@
 #include <asm/arch/bast-map.h>
 #include <asm/arch/bast-irq.h>
 
-/* list of registered interfaces */
-static ide_hwif_t *ifs[2];
-
-static int __init
-bastide_register(unsigned int base, unsigned int aux, int irq,
-		 ide_hwif_t **hwif)
+static int __init bastide_register(unsigned int base, unsigned int aux, int irq)
 {
 	ide_hwif_t *hwif;
 	hw_regs_t hw;
@@ -76,8 +71,9 @@
 
 	printk("BAST: IDE driver, (c) 2003-2004 Simtec Electronics\n");
 
-	bastide_register(BAST_VA_IDEPRI, BAST_VA_IDEPRIAUX, IRQ_IDE0, &ifs[0]);
-	bastide_register(BAST_VA_IDESEC, BAST_VA_IDESECAUX, IRQ_IDE1, &ifs[1]);
+	bastide_register(BAST_VA_IDEPRI, BAST_VA_IDEPRIAUX, IRQ_IDE0);
+	bastide_register(BAST_VA_IDESEC, BAST_VA_IDESECAUX, IRQ_IDE1);
+
 	return 0;
 }
 

diff --git a/drivers/ide/arm/palm_bk3710.c b/drivers/ide/arm/palm_bk3710.c
index c306997..8e1f6bd 100644
--- a/drivers/ide/arm/palm_bk3710.c
+++ b/drivers/ide/arm/palm_bk3710.c

@@ -311,15 +311,37 @@
 	palm_bk3710_setpiomode(base, NULL, 0, 600, 0);
 	palm_bk3710_setpiomode(base, NULL, 1, 600, 0);
 }
+
+static u8 __devinit palm_bk3710_cable_detect(ide_hwif_t *hwif)
+{
+	return ATA_CBL_PATA80;
+}
+
+static void __devinit palm_bk3710_init_hwif(ide_hwif_t *hwif)
+{
+	hwif->set_pio_mode = palm_bk3710_set_pio_mode;
+	hwif->set_dma_mode = palm_bk3710_set_dma_mode;
+
+	hwif->cable_detect = palm_bk3710_cable_detect;
+}
+
+static const struct ide_port_info __devinitdata palm_bk3710_port_info = {
+	.init_hwif		= palm_bk3710_init_hwif,
+	.host_flags		= IDE_HFLAG_NO_DMA, /* hack (no PCI) */
+	.pio_mask		= ATA_PIO4,
+	.udma_mask		= ATA_UDMA4,	/* (input clk 99MHz) */
+	.mwdma_mask		= ATA_MWDMA2,
+};
+
 static int __devinit palm_bk3710_probe(struct platform_device *pdev)
 {
-	hw_regs_t ide_ctlr_info;
-	int index = 0;
-	int pribase;
 	struct clk *clkp;
 	struct resource *mem, *irq;
 	ide_hwif_t *hwif;
 	void __iomem *base;
+	int pribase, i;
+	hw_regs_t hw;
+	u8 idx[4] = { 0xff, 0xff, 0xff, 0xff };
 
 	clkp = clk_get(NULL, "IDECLK");
 	if (IS_ERR(clkp))
@@ -330,7 +352,7 @@
 	ide_palm_clk = clk_get_rate(ideclkp)/100000;
 	ide_palm_clk = (10000/ide_palm_clk) + 1;
 	/* Register the IDE interface with Linux ATA Interface */
-	memset(&ide_ctlr_info, 0, sizeof(ide_ctlr_info));
+	memset(&hw, 0, sizeof(hw));
 
 	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	if (mem == NULL) {
@@ -349,32 +371,42 @@
 	palm_bk3710_chipinit(base);
 
 	pribase = mem->start + IDE_PALM_ATA_PRI_REG_OFFSET;
-	for (index = 0; index < IDE_NR_PORTS - 2; index++)
-		ide_ctlr_info.io_ports[index] = pribase + index;
-	ide_ctlr_info.io_ports[IDE_CONTROL_OFFSET] = mem->start +
+	for (i = 0; i < IDE_NR_PORTS - 2; i++)
+		hw.io_ports[i] = pribase + i;
+	hw.io_ports[IDE_CONTROL_OFFSET] = mem->start +
 			IDE_PALM_ATA_PRI_CTL_OFFSET;
-	ide_ctlr_info.irq = irq->start;
-	ide_ctlr_info.chipset = ide_palm3710;
+	hw.irq = irq->start;
+	hw.chipset = ide_palm3710;
 
-	if (ide_register_hw(&ide_ctlr_info, NULL, &hwif) < 0) {
-		printk(KERN_WARNING "Palm Chip BK3710 IDE Register Fail\n");
-		return -ENODEV;
-	}
+	hwif = ide_deprecated_find_port(hw.io_ports[IDE_DATA_OFFSET]);
+	if (hwif == NULL)
+		goto out;
 
-	hwif->set_pio_mode = &palm_bk3710_set_pio_mode;
-	hwif->set_dma_mode = &palm_bk3710_set_dma_mode;
+	i = hwif->index;
+
+	if (hwif->present)
+		ide_unregister(i, 0, 0);
+	else if (!hwif->hold)
+		ide_init_port_data(hwif, i);
+
+	ide_init_port_hw(hwif, &hw);
+
 	hwif->mmio = 1;
 	default_hwif_mmiops(hwif);
-	hwif->cbl = ATA_CBL_PATA80;
-	hwif->ultra_mask = 0x1f;	/* Ultra DMA Mode 4 Max
-						(input clk 99MHz) */
-	hwif->mwdma_mask = 0x7;
-	hwif->drives[0].autotune = 1;
-	hwif->drives[1].autotune = 1;
 
 	ide_setup_dma(hwif, mem->start);
 
+	idx[0] = i;
+
+	ide_device_add(idx, &palm_bk3710_port_info);
+
+	if (!hwif->present)
+		goto out;
+
 	return 0;
+out:
+	printk(KERN_WARNING "Palm Chip BK3710 IDE Register Fail\n");
+	return -ENODEV;
 }
 
 static struct platform_driver platform_bk_driver = {

diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index 5e42c19..354c91d 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c

@@ -1555,7 +1555,7 @@
 	if (stat)
 		return stat;
 
-	toc->hdr.toc_length = ntohs (toc->hdr.toc_length);
+	toc->hdr.toc_length = be16_to_cpu(toc->hdr.toc_length);
 
 	if (info->cd_flags & IDE_CD_FLAG_TOCTRACKS_AS_BCD) {
 		toc->hdr.first_track = BCD2BIN(toc->hdr.first_track);

diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c
index 3c69822..aed8b31c 100644
--- a/drivers/ide/ide-disk.c
+++ b/drivers/ide/ide-disk.c

@@ -590,20 +590,24 @@
 static void idedisk_prepare_flush(struct request_queue *q, struct request *rq)
 {
 	ide_drive_t *drive = q->queuedata;
-	ide_task_t task;
+	ide_task_t *task = kmalloc(sizeof(*task), GFP_ATOMIC);
 
-	memset(&task, 0, sizeof(task));
+	/* FIXME: map struct ide_taskfile on rq->cmd[] */
+	BUG_ON(task == NULL);
+
+	memset(task, 0, sizeof(*task));
 	if (ide_id_has_flush_cache_ext(drive->id) &&
 	    (drive->capacity64 >= (1UL << 28)))
-		task.tf.command = WIN_FLUSH_CACHE_EXT;
+		task->tf.command = WIN_FLUSH_CACHE_EXT;
 	else
-		task.tf.command = WIN_FLUSH_CACHE;
-	task.tf_flags	= IDE_TFLAG_OUT_TF | IDE_TFLAG_OUT_DEVICE;
-	task.data_phase	= TASKFILE_NO_DATA;
+		task->tf.command = WIN_FLUSH_CACHE;
+	task->tf_flags	 = IDE_TFLAG_OUT_TF | IDE_TFLAG_OUT_DEVICE |
+			   IDE_TFLAG_DYN;
+	task->data_phase = TASKFILE_NO_DATA;
 
 	rq->cmd_type = REQ_TYPE_ATA_TASKFILE;
 	rq->cmd_flags |= REQ_SOFTBARRIER;
-	rq->special = &task;
+	rq->special = task;
 }
 
 /*

diff --git a/drivers/ide/ide-dma.c b/drivers/ide/ide-dma.c
index a4bb328..d0e7b53 100644
--- a/drivers/ide/ide-dma.c
+++ b/drivers/ide/ide-dma.c

@@ -198,7 +198,7 @@
 
 EXPORT_SYMBOL_GPL(ide_build_sglist);
 
-#ifdef CONFIG_BLK_DEV_IDEDMA_PCI
+#ifdef CONFIG_BLK_DEV_IDEDMA_SFF
 /**
  *	ide_build_dmatable	-	build IDE DMA table
  *
@@ -316,7 +316,7 @@
 
 EXPORT_SYMBOL_GPL(ide_destroy_dmatable);
 
-#ifdef CONFIG_BLK_DEV_IDEDMA_PCI
+#ifdef CONFIG_BLK_DEV_IDEDMA_SFF
 /**
  *	config_drive_for_dma	-	attempt to activate IDE DMA
  *	@drive: the drive to place in DMA mode
@@ -424,7 +424,7 @@
 }
 
 EXPORT_SYMBOL_GPL(ide_dma_host_set);
-#endif /* CONFIG_BLK_DEV_IDEDMA_PCI */
+#endif /* CONFIG_BLK_DEV_IDEDMA_SFF  */
 
 /**
  *	ide_dma_off_quietly	-	Generic DMA kill
@@ -474,7 +474,7 @@
 	drive->hwif->dma_host_set(drive, 1);
 }
 
-#ifdef CONFIG_BLK_DEV_IDEDMA_PCI
+#ifdef CONFIG_BLK_DEV_IDEDMA_SFF
 /**
  *	ide_dma_setup	-	begin a DMA phase
  *	@drive: target device
@@ -591,7 +591,7 @@
 }
 #else
 static inline int config_drive_for_dma(ide_drive_t *drive) { return 0; }
-#endif /* CONFIG_BLK_DEV_IDEDMA_PCI */
+#endif /* CONFIG_BLK_DEV_IDEDMA_SFF */
 
 int __ide_dma_bad_drive (ide_drive_t *drive)
 {
@@ -840,7 +840,7 @@
 		ide_dma_on(drive);
 }
 
-#ifdef CONFIG_BLK_DEV_IDEDMA_PCI
+#ifdef CONFIG_BLK_DEV_IDEDMA_SFF
 void ide_dma_lost_irq (ide_drive_t *drive)
 {
 	printk("%s: DMA interrupt recovery\n", drive->name);
@@ -1002,4 +1002,4 @@
 }
 
 EXPORT_SYMBOL_GPL(ide_setup_dma);
-#endif /* CONFIG_BLK_DEV_IDEDMA_PCI */
+#endif /* CONFIG_BLK_DEV_IDEDMA_SFF */

diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index 3addbe4..7153796 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c

@@ -361,17 +361,21 @@
 	spin_unlock_irqrestore(&ide_lock, flags);
 
 	if (rq->cmd_type == REQ_TYPE_ATA_TASKFILE) {
-		ide_task_t *args = (ide_task_t *) rq->special;
+		ide_task_t *task = (ide_task_t *)rq->special;
+
 		if (rq->errors == 0)
-			rq->errors = !OK_STAT(stat,READY_STAT,BAD_STAT);
-			
-		if (args) {
-			struct ide_taskfile *tf = &args->tf;
+			rq->errors = !OK_STAT(stat, READY_STAT, BAD_STAT);
+
+		if (task) {
+			struct ide_taskfile *tf = &task->tf;
 
 			tf->error = err;
 			tf->status = stat;
 
-			ide_tf_read(drive, args);
+			ide_tf_read(drive, task);
+
+			if (task->tf_flags & IDE_TFLAG_DYN)
+				kfree(task);
 		}
 	} else if (blk_pm_request(rq)) {
 		struct request_pm_state *pm = rq->data;
@@ -388,7 +392,8 @@
 	spin_lock_irqsave(&ide_lock, flags);
 	HWGROUP(drive)->rq = NULL;
 	rq->errors = err;
-	if (__blk_end_request(rq, (rq->errors ? -EIO : 0), 0))
+	if (unlikely(__blk_end_request(rq, (rq->errors ? -EIO : 0),
+				       blk_rq_bytes(rq))))
 		BUG();
 	spin_unlock_irqrestore(&ide_lock, flags);
 }

diff --git a/drivers/ide/ide-iops.c b/drivers/ide/ide-iops.c
index c32e759..c419266 100644
--- a/drivers/ide/ide-iops.c
+++ b/drivers/ide/ide-iops.c

@@ -786,15 +786,11 @@
 {
 	ide_hwgroup_t *hwgroup = HWGROUP(drive);
 
-	if (hwgroup->handler != NULL) {
-		printk(KERN_CRIT "%s: ide_set_handler: handler not null; "
-			"old=%p, new=%p\n",
-			drive->name, hwgroup->handler, handler);
-	}
+	BUG_ON(hwgroup->handler);
 	hwgroup->handler	= handler;
 	hwgroup->expiry		= expiry;
 	hwgroup->timer.expires	= jiffies + timeout;
-	hwgroup->req_gen_timer = hwgroup->req_gen;
+	hwgroup->req_gen_timer	= hwgroup->req_gen;
 	add_timer(&hwgroup->timer);
 }
 
@@ -827,11 +823,9 @@
 			 unsigned timeout, ide_expiry_t *expiry)
 {
 	unsigned long flags;
-	ide_hwgroup_t *hwgroup = HWGROUP(drive);
 	ide_hwif_t *hwif = HWIF(drive);
 
 	spin_lock_irqsave(&ide_lock, flags);
-	BUG_ON(hwgroup->handler);
 	__ide_set_handler(drive, handler, timeout, expiry);
 	hwif->OUTBSYNC(drive, cmd, IDE_COMMAND_REG);
 	/*

diff --git a/drivers/ide/ide-lib.c b/drivers/ide/ide-lib.c
index 1ff676cc..29e2c97 100644
--- a/drivers/ide/ide-lib.c
+++ b/drivers/ide/ide-lib.c

@@ -21,15 +21,6 @@
 #include <asm/uaccess.h>
 #include <asm/io.h>
 
-/*
- *	IDE library routines. These are plug in code that most 
- *	drivers can use but occasionally may be weird enough
- *	to want to do their own thing with
- *
- *	Add common non I/O op stuff here. Make sure it has proper
- *	kernel-doc function headers or your patch will be rejected
- */
-
 static const char *udma_str[] =
 	 { "UDMA/16", "UDMA/25",  "UDMA/33",  "UDMA/44",
 	   "UDMA/66", "UDMA/100", "UDMA/133", "UDMA7" };

diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index 6daea89..4a2cb28 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c

@@ -1051,7 +1051,7 @@
 		int sa = 0;
 #if defined(__mc68000__)
 		sa = IRQF_SHARED;
-#endif /* __mc68000__ || CONFIG_APUS */
+#endif /* __mc68000__ */
 
 		if (IDE_CHIPSET_IS_PCI(hwif->chipset))
 			sa = IRQF_SHARED;
@@ -1355,7 +1355,7 @@
 	hwif->ultra_mask = d->udma_mask;
 
 	/* reset DMA masks only for SFF-style DMA controllers */
-	if ((d->host_flags && IDE_HFLAG_NO_DMA) == 0 && hwif->dma_base == 0)
+	if ((d->host_flags & IDE_HFLAG_NO_DMA) == 0 && hwif->dma_base == 0)
 		hwif->swdma_mask = hwif->mwdma_mask = hwif->ultra_mask = 0;
 
 	if (d->host_flags & IDE_HFLAG_RQSIZE_256)

diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index 49dd2e7..0598ecf 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c

@@ -466,9 +466,6 @@
 /* 0 = no tape is loaded, so we don't rewind after ejecting */
 #define IDETAPE_MEDIUM_PRESENT		9
 
-/* A define for the READ BUFFER command */
-#define IDETAPE_RETRIEVE_FAULTY_BLOCK	6
-
 /* Some defines for the SPACE command */
 #define IDETAPE_SPACE_OVER_FILEMARK	1
 #define IDETAPE_SPACE_TO_EOD		3
@@ -490,7 +487,6 @@
 	REQ_IDETAPE_PC2		= (1 << 1), /* packet command (second stage) */
 	REQ_IDETAPE_READ	= (1 << 2),
 	REQ_IDETAPE_WRITE	= (1 << 3),
-	REQ_IDETAPE_READ_BUFFER	= (1 << 4),
 };
 
 /* Error codes returned in rq->errors to the higher part of the driver. */
@@ -1523,29 +1519,6 @@
 		set_bit(PC_DMA_RECOMMENDED, &pc->flags);
 }
 
-static void idetape_create_read_buffer_cmd(idetape_tape_t *tape,
-		idetape_pc_t *pc, struct idetape_bh *bh)
-{
-	int size = 32768;
-	struct idetape_bh *p = bh;
-
-	idetape_init_pc(pc);
-	pc->c[0] = READ_BUFFER;
-	pc->c[1] = IDETAPE_RETRIEVE_FAULTY_BLOCK;
-	pc->c[7] = size >> 8;
-	pc->c[8] = size & 0xff;
-	pc->callback = &idetape_pc_callback;
-	pc->bh = bh;
-	atomic_set(&bh->b_count, 0);
-	pc->buffer = NULL;
-	while (p) {
-		atomic_set(&p->b_count, 0);
-		p = p->b_reqnext;
-	}
-	pc->request_transfer = size;
-	pc->buffer_size = size;
-}
-
 static void idetape_create_write_cmd(idetape_tape_t *tape, idetape_pc_t *pc,
 		unsigned int length, struct idetape_bh *bh)
 {
@@ -1655,13 +1628,6 @@
 					 (struct idetape_bh *)rq->special);
 		goto out;
 	}
-	if (rq->cmd[0] & REQ_IDETAPE_READ_BUFFER) {
-		tape->postpone_cnt = 0;
-		pc = idetape_next_pc_storage(drive);
-		idetape_create_read_buffer_cmd(tape, pc,
-				(struct idetape_bh *)rq->special);
-		goto out;
-	}
 	if (rq->cmd[0] & REQ_IDETAPE_PC1) {
 		pc = (idetape_pc_t *) rq->buffer;
 		rq->cmd[0] &= ~(REQ_IDETAPE_PC1);

diff --git a/drivers/ide/ide.c b/drivers/ide/ide.c
index ad0e995..4a8952a 100644
--- a/drivers/ide/ide.c
+++ b/drivers/ide/ide.c

@@ -44,8 +44,6 @@
  *  inspiration from lots of linux users, esp.  hamish@zot.apana.org.au
  */
 
-#define	REVISION	"Revision: 7.00alpha2"
-
 #define _IDE_C			/* Tell ide.h it's really us */
 
 #include <linux/module.h>
@@ -1618,7 +1616,7 @@
 {
 	int ret;
 
-	printk(KERN_INFO "Uniform Multi-Platform E-IDE driver " REVISION "\n");
+	printk(KERN_INFO "Uniform Multi-Platform E-IDE driver\n");
 	system_bus_speed = ide_system_bus_speed();
 
 	printk(KERN_INFO "ide: Assuming %dMHz system bus speed "

diff --git a/drivers/ide/legacy/gayle.c b/drivers/ide/legacy/gayle.c
index 9d3851d..b7d81090 100644
--- a/drivers/ide/legacy/gayle.c
+++ b/drivers/ide/legacy/gayle.c

@@ -94,7 +94,7 @@
 
 static void __init gayle_setup_ports(hw_regs_t *hw, unsigned long base,
 				     unsigned long ctl, unsigned long irq_port,
-				     ide_ack_intr_t *ack_intr);
+				     ide_ack_intr_t *ack_intr)
 {
 	int i;
 

diff --git a/drivers/ide/pci/cs5520.c b/drivers/ide/pci/cs5520.c
index 0be1a82..1c163e4 100644
--- a/drivers/ide/pci/cs5520.c
+++ b/drivers/ide/pci/cs5520.c

@@ -147,11 +147,6 @@
 
 	/* We must not grab the entire device, it has 'ISA' space in its
 	 * BARS too and we will freak out other bits of the kernel
-	 *
-	 * pci_enable_device_bars() is going away. I replaced it with
-	 * IO only enable for now but I'll need confirmation this is
-	 * allright for that device. If not, it will need some kind of
-	 * quirk. --BenH.
 	 */
 	if (pci_enable_device_io(dev)) {
 		printk(KERN_WARNING "%s: Unable to enable 55x0.\n", d->name);

diff --git a/drivers/ide/pci/pdc202xx_old.c b/drivers/ide/pci/pdc202xx_old.c
index da43297..150422e 100644
--- a/drivers/ide/pci/pdc202xx_old.c
+++ b/drivers/ide/pci/pdc202xx_old.c

@@ -3,26 +3,6 @@
  *  Copyright (C) 2006-2007		MontaVista Software, Inc.
  *  Copyright (C) 2007			Bartlomiej Zolnierkiewicz
  *
- *  Promise Ultra33 cards with BIOS v1.20 through 1.28 will need this
- *  compiled into the kernel if you have more than one card installed.
- *  Note that BIOS v1.29 is reported to fix the problem.  Since this is
- *  safe chipset tuning, including this support is harmless
- *
- *  Promise Ultra66 cards with BIOS v1.11 this
- *  compiled into the kernel if you have more than one card installed.
- *
- *  Promise Ultra100 cards.
- *
- *  The latest chipset code will support the following ::
- *  Three Ultra33 controllers and 12 drives.
- *  8 are UDMA supported and 4 are limited to DMA mode 2 multi-word.
- *  The 8/4 ratio is a BIOS code limit by promise.
- *
- *  UNLESS you enable "CONFIG_PDC202XX_BURST"
- *
- */
-
-/*
  *  Portions Copyright (C) 1999 Promise Technology, Inc.
  *  Author: Frank Tiernan (frankt@promise.com)
  *  Released under terms of General Public License
@@ -344,7 +324,6 @@
 		(primary_mode & 1) ? "MASTER" : "PCI",
 		(secondary_mode & 1) ? "MASTER" : "PCI" );
 
-#ifdef CONFIG_PDC202XX_BURST
 	if (!(udma_speed_flag & 1)) {
 		printk(KERN_INFO "%s: FORCING BURST BIT 0x%02x->0x%02x ",
 			hwif->cds->name, udma_speed_flag,
@@ -352,7 +331,6 @@
 		outb(udma_speed_flag | 1, dmabase | 0x1f);
 		printk("%sACTIVE\n", (inb(dmabase | 0x1f) & 1) ? "" : "IN");
 	}
-#endif /* CONFIG_PDC202XX_BURST */
 
 	ide_setup_dma(hwif, dmabase);
 }

diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index edc057f..2928ef2 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c

@@ -124,7 +124,7 @@
 struct mirror {
 	struct mirror_set *ms;
 	atomic_t error_count;
-	uint32_t error_type;
+	unsigned long error_type;
 	struct dm_dev *dev;
 	sector_t offset;
 };

diff --git a/drivers/memstick/host/tifm_ms.c b/drivers/memstick/host/tifm_ms.c
index f55b71a..4fb2421 100644
--- a/drivers/memstick/host/tifm_ms.c
+++ b/drivers/memstick/host/tifm_ms.c

@@ -282,7 +282,7 @@
 
 			writel(TIFM_MS_SYS_LATCH
 			       | readl(sock->addr + SOCK_MS_SYSTEM),
-			       sock + SOCK_MS_SYSTEM);
+			       sock->addr + SOCK_MS_SYSTEM);
 			writel(0, sock->addr + SOCK_MS_DATA);
 			dev_dbg(&sock->dev, "writing %x\n", 0);
 

diff --git a/drivers/net/mlx4/alloc.c b/drivers/net/mlx4/alloc.c
index 521dc03..75ef9d0 100644
--- a/drivers/net/mlx4/alloc.c
+++ b/drivers/net/mlx4/alloc.c

@@ -34,6 +34,7 @@
 #include <linux/slab.h>
 #include <linux/bitmap.h>
 #include <linux/dma-mapping.h>
+#include <linux/vmalloc.h>
 
 #include "mlx4.h"
 

diff --git a/drivers/parport/parport_pc.c b/drivers/parport/parport_pc.c
index 238628d..d76d37b 100644
--- a/drivers/parport/parport_pc.c
+++ b/drivers/parport/parport_pc.c

@@ -1768,7 +1768,7 @@
 }
 
 #ifdef CONFIG_PARPORT_PC_FIFO
-static int __devinit parport_ECP_supported(struct parport *pb)
+static int parport_ECP_supported(struct parport *pb)
 {
 	int i;
 	int config, configb;
@@ -1992,7 +1992,7 @@
 /* Don't bother probing for modes we know we won't use. */
 static int __devinit parport_PS2_supported(struct parport *pb) { return 0; }
 #ifdef CONFIG_PARPORT_PC_FIFO
-static int __devinit parport_ECP_supported(struct parport *pb) { return 0; }
+static int parport_ECP_supported(struct parport *pb) { return 0; }
 #endif
 static int __devinit parport_EPP_supported(struct parport *pb) { return 0; }
 static int __devinit parport_ECPEPP_supported(struct parport *pb){return 0;}

diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index ca6b16f..f1ef49f 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c

@@ -243,10 +243,18 @@
 			.program	= &nlm_program,
 			.version	= host->h_version,
 			.authflavor	= RPC_AUTH_UNIX,
-			.flags		= (RPC_CLNT_CREATE_HARDRTRY |
+			.flags		= (RPC_CLNT_CREATE_NOPING |
 					   RPC_CLNT_CREATE_AUTOBIND),
 		};
 
+		/*
+		 * lockd retries server side blocks automatically so we want
+		 * those to be soft RPC calls. Client side calls need to be
+		 * hard RPC tasks.
+		 */
+		if (!host->h_server)
+			args.flags |= RPC_CLNT_CREATE_HARDRTRY;
+
 		clnt = rpc_create(&args);
 		if (!IS_ERR(clnt))
 			host->h_rpcclnt = clnt;

diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 2f4d8fa..fe9bdb4 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c

@@ -763,11 +763,20 @@
 	dprintk("lockd: GRANTing blocked lock.\n");
 	block->b_granted = 1;
 
-	/* Schedule next grant callback in 30 seconds */
-	nlmsvc_insert_block(block, 30 * HZ);
+	/* keep block on the list, but don't reattempt until the RPC
+	 * completes or the submission fails
+	 */
+	nlmsvc_insert_block(block, NLM_NEVER);
 
-	/* Call the client */
-	nlm_async_call(block->b_call, NLMPROC_GRANTED_MSG, &nlmsvc_grant_ops);
+	/* Call the client -- use a soft RPC task since nlmsvc_retry_blocked
+	 * will queue up a new one if this one times out
+	 */
+	error = nlm_async_call(block->b_call, NLMPROC_GRANTED_MSG,
+				&nlmsvc_grant_ops);
+
+	/* RPC submission failed, wait a bit and retry */
+	if (error < 0)
+		nlmsvc_insert_block(block, 10 * HZ);
 }
 
 /*
@@ -786,6 +795,17 @@
 
 	dprintk("lockd: GRANT_MSG RPC callback\n");
 
+	/* if the block is not on a list at this point then it has
+	 * been invalidated. Don't try to requeue it.
+	 *
+	 * FIXME: it's possible that the block is removed from the list
+	 * after this check but before the nlmsvc_insert_block. In that
+	 * case it will be added back. Perhaps we need better locking
+	 * for nlm_blocked?
+	 */
+	if (list_empty(&block->b_list))
+		return;
+
 	/* Technically, we should down the file semaphore here. Since we
 	 * move the block towards the head of the queue only, no harm
 	 * can be done, though. */

diff --git a/fs/pipe.c b/fs/pipe.c
index a07e9a5..3c185b6 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c

@@ -171,7 +171,7 @@
  *
  * Description:
  *	This function returns a kernel virtual address mapping for the
- *	passed in @pipe_buffer. If @atomic is set, an atomic map is provided
+ *	pipe_buffer passed in @buf. If @atomic is set, an atomic map is provided
  *	and the caller has to be careful not to fault before calling
  *	the unmap function.
  *
@@ -208,15 +208,15 @@
 }
 
 /**
- * generic_pipe_buf_steal - attempt to take ownership of a @pipe_buffer
+ * generic_pipe_buf_steal - attempt to take ownership of a &pipe_buffer
  * @pipe:	the pipe that the buffer belongs to
  * @buf:	the buffer to attempt to steal
  *
  * Description:
- *	This function attempts to steal the @struct page attached to
+ *	This function attempts to steal the &struct page attached to
  *	@buf. If successful, this function returns 0 and returns with
  *	the page locked. The caller may then reuse the page for whatever
- *	he wishes, the typical use is insertion into a different file
+ *	he wishes; the typical use is insertion into a different file
  *	page cache.
  */
 int generic_pipe_buf_steal(struct pipe_inode_info *pipe,
@@ -238,7 +238,7 @@
 }
 
 /**
- * generic_pipe_buf_get - get a reference to a @struct pipe_buffer
+ * generic_pipe_buf_get - get a reference to a &struct pipe_buffer
  * @pipe:	the pipe that the buffer belongs to
  * @buf:	the buffer to get a reference to
  *

diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c
index 4e5c22c..376ef3e 100644
--- a/fs/smbfs/inode.c
+++ b/fs/smbfs/inode.c

@@ -505,7 +505,7 @@
 	if (warn_count < 5) {
 		warn_count++;
 		printk(KERN_EMERG "smbfs is deprecated and will be removed"
-			"from the 2.6.27 kernel.  Please migrate to cifs\n");
+			" from the 2.6.27 kernel. Please migrate to cifs\n");
 	}
 
 	if (!raw_data)

diff --git a/fs/udf/balloc.c b/fs/udf/balloc.c
index d721a1a..f855dcb 100644
--- a/fs/udf/balloc.c
+++ b/fs/udf/balloc.c

@@ -145,7 +145,7 @@
 {
 	struct logicalVolIntegrityDesc *lvid;
 
-	if (sbi->s_lvid_bh)
+	if (sbi->s_lvid_bh == NULL)
 		return false;
 
 	lvid = (struct logicalVolIntegrityDesc *)sbi->s_lvid_bh->b_data;

diff --git a/fs/udf/dir.c b/fs/udf/dir.c
index 4b44e23..8d8643ada 100644
--- a/fs/udf/dir.c
+++ b/fs/udf/dir.c

@@ -43,13 +43,13 @@
 	struct fileIdentDesc *fi = NULL;
 	struct fileIdentDesc cfi;
 	int block, iblock;
-	loff_t nf_pos = filp->f_pos - 1;
+	loff_t nf_pos = (filp->f_pos - 1) << 2;
 	int flen;
 	char fname[UDF_NAME_LEN];
 	char *nameptr;
 	uint16_t liu;
 	uint8_t lfi;
-	loff_t size = (udf_ext0_offset(dir) + dir->i_size) >> 2;
+	loff_t size = udf_ext0_offset(dir) + dir->i_size;
 	struct buffer_head *tmp, *bha[16];
 	kernel_lb_addr eloc;
 	uint32_t elen;
@@ -63,13 +63,13 @@
 		return 0;
 
 	if (nf_pos == 0)
-		nf_pos = (udf_ext0_offset(dir) >> 2);
+		nf_pos = udf_ext0_offset(dir);
 
-	fibh.soffset = fibh.eoffset = (nf_pos & ((dir->i_sb->s_blocksize - 1) >> 2)) << 2;
+	fibh.soffset = fibh.eoffset = nf_pos & (dir->i_sb->s_blocksize - 1);
 	iinfo = UDF_I(dir);
 	if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) {
 		fibh.sbh = fibh.ebh = NULL;
-	} else if (inode_bmap(dir, nf_pos >> (dir->i_sb->s_blocksize_bits - 2),
+	} else if (inode_bmap(dir, nf_pos >> dir->i_sb->s_blocksize_bits,
 			      &epos, &eloc, &elen, &offset) == (EXT_RECORDED_ALLOCATED >> 30)) {
 		block = udf_get_lb_pblock(dir->i_sb, eloc, offset);
 		if ((++offset << dir->i_sb->s_blocksize_bits) < elen) {
@@ -111,7 +111,7 @@
 	}
 
 	while (nf_pos < size) {
-		filp->f_pos = nf_pos + 1;
+		filp->f_pos = (nf_pos >> 2) + 1;
 
 		fi = udf_fileident_read(dir, &nf_pos, &fibh, &cfi, &epos, &eloc,
 					&elen, &offset);
@@ -178,7 +178,7 @@
 		}
 	} /* end while */
 
-	filp->f_pos = nf_pos + 1;
+	filp->f_pos = (nf_pos >> 2) + 1;
 
 	if (fibh.sbh != fibh.ebh)
 		brelse(fibh.ebh);

diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index 35582fe..1f3da5b 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c

@@ -1648,14 +1648,14 @@
 	 * Adjust the inode count and the block count to reflect this inode's
 	 * resource usage.
 	 */
-	be64_add(&dqp->q_core.d_icount, 1);
+	be64_add_cpu(&dqp->q_core.d_icount, 1);
 	dqp->q_res_icount++;
 	if (nblks) {
-		be64_add(&dqp->q_core.d_bcount, nblks);
+		be64_add_cpu(&dqp->q_core.d_bcount, nblks);
 		dqp->q_res_bcount += nblks;
 	}
 	if (rtblks) {
-		be64_add(&dqp->q_core.d_rtbcount, rtblks);
+		be64_add_cpu(&dqp->q_core.d_rtbcount, rtblks);
 		dqp->q_res_rtbcount += rtblks;
 	}
 

diff --git a/fs/xfs/quota/xfs_trans_dquot.c b/fs/xfs/quota/xfs_trans_dquot.c
index 7de6874b..f441f83 100644
--- a/fs/xfs/quota/xfs_trans_dquot.c
+++ b/fs/xfs/quota/xfs_trans_dquot.c

@@ -421,13 +421,13 @@
 				       (xfs_qcnt_t) -qtrx->qt_icount_delta);
 #endif
 			if (totalbdelta)
-				be64_add(&d->d_bcount, (xfs_qcnt_t)totalbdelta);
+				be64_add_cpu(&d->d_bcount, (xfs_qcnt_t)totalbdelta);
 
 			if (qtrx->qt_icount_delta)
-				be64_add(&d->d_icount, (xfs_qcnt_t)qtrx->qt_icount_delta);
+				be64_add_cpu(&d->d_icount, (xfs_qcnt_t)qtrx->qt_icount_delta);
 
 			if (totalrtbdelta)
-				be64_add(&d->d_rtbcount, (xfs_qcnt_t)totalrtbdelta);
+				be64_add_cpu(&d->d_rtbcount, (xfs_qcnt_t)totalrtbdelta);
 
 			/*
 			 * Get any default limits in use.

diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index ea6aa60..bdbfbbe 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c

@@ -592,7 +592,7 @@
 		if (!(args->wasfromfl)) {
 
 			agf = XFS_BUF_TO_AGF(args->agbp);
-			be32_add(&agf->agf_freeblks, -(args->len));
+			be32_add_cpu(&agf->agf_freeblks, -(args->len));
 			xfs_trans_agblocks_delta(args->tp,
 						 -((long)(args->len)));
 			args->pag->pagf_freeblks -= args->len;
@@ -1720,7 +1720,7 @@
 
 		agf = XFS_BUF_TO_AGF(agbp);
 		pag = &mp->m_perag[agno];
-		be32_add(&agf->agf_freeblks, len);
+		be32_add_cpu(&agf->agf_freeblks, len);
 		xfs_trans_agblocks_delta(tp, len);
 		pag->pagf_freeblks += len;
 		XFS_WANT_CORRUPTED_GOTO(
@@ -2008,18 +2008,18 @@
 	 * Get the block number and update the data structures.
 	 */
 	bno = be32_to_cpu(agfl->agfl_bno[be32_to_cpu(agf->agf_flfirst)]);
-	be32_add(&agf->agf_flfirst, 1);
+	be32_add_cpu(&agf->agf_flfirst, 1);
 	xfs_trans_brelse(tp, agflbp);
 	if (be32_to_cpu(agf->agf_flfirst) == XFS_AGFL_SIZE(mp))
 		agf->agf_flfirst = 0;
 	pag = &mp->m_perag[be32_to_cpu(agf->agf_seqno)];
-	be32_add(&agf->agf_flcount, -1);
+	be32_add_cpu(&agf->agf_flcount, -1);
 	xfs_trans_agflist_delta(tp, -1);
 	pag->pagf_flcount--;
 
 	logflags = XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT;
 	if (btreeblk) {
-		be32_add(&agf->agf_btreeblks, 1);
+		be32_add_cpu(&agf->agf_btreeblks, 1);
 		pag->pagf_btreeblks++;
 		logflags |= XFS_AGF_BTREEBLKS;
 	}
@@ -2117,17 +2117,17 @@
 			be32_to_cpu(agf->agf_seqno), &agflbp)))
 		return error;
 	agfl = XFS_BUF_TO_AGFL(agflbp);
-	be32_add(&agf->agf_fllast, 1);
+	be32_add_cpu(&agf->agf_fllast, 1);
 	if (be32_to_cpu(agf->agf_fllast) == XFS_AGFL_SIZE(mp))
 		agf->agf_fllast = 0;
 	pag = &mp->m_perag[be32_to_cpu(agf->agf_seqno)];
-	be32_add(&agf->agf_flcount, 1);
+	be32_add_cpu(&agf->agf_flcount, 1);
 	xfs_trans_agflist_delta(tp, 1);
 	pag->pagf_flcount++;
 
 	logflags = XFS_AGF_FLLAST | XFS_AGF_FLCOUNT;
 	if (btreeblk) {
-		be32_add(&agf->agf_btreeblks, -1);
+		be32_add_cpu(&agf->agf_btreeblks, -1);
 		pag->pagf_btreeblks--;
 		logflags |= XFS_AGF_BTREEBLKS;
 	}

diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c
index 1603ce5..3ce2645 100644
--- a/fs/xfs/xfs_alloc_btree.c
+++ b/fs/xfs/xfs_alloc_btree.c

@@ -221,7 +221,7 @@
 			 */
 			bno = be32_to_cpu(agf->agf_roots[cur->bc_btnum]);
 			agf->agf_roots[cur->bc_btnum] = *lpp;
-			be32_add(&agf->agf_levels[cur->bc_btnum], -1);
+			be32_add_cpu(&agf->agf_levels[cur->bc_btnum], -1);
 			mp->m_perag[be32_to_cpu(agf->agf_seqno)].pagf_levels[cur->bc_btnum]--;
 			/*
 			 * Put this buffer/block on the ag's freelist.
@@ -1256,9 +1256,9 @@
 	/*
 	 * Bump and log left's numrecs, decrement and log right's numrecs.
 	 */
-	be16_add(&left->bb_numrecs, 1);
+	be16_add_cpu(&left->bb_numrecs, 1);
 	xfs_alloc_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS);
-	be16_add(&right->bb_numrecs, -1);
+	be16_add_cpu(&right->bb_numrecs, -1);
 	xfs_alloc_log_block(cur->bc_tp, rbp, XFS_BB_NUMRECS);
 	/*
 	 * Slide the contents of right down one entry.
@@ -1346,7 +1346,7 @@
 
 		agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
 		agf->agf_roots[cur->bc_btnum] = cpu_to_be32(nbno);
-		be32_add(&agf->agf_levels[cur->bc_btnum], 1);
+		be32_add_cpu(&agf->agf_levels[cur->bc_btnum], 1);
 		seqno = be32_to_cpu(agf->agf_seqno);
 		mp->m_perag[seqno].pagf_levels[cur->bc_btnum]++;
 		xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp,
@@ -1558,9 +1558,9 @@
 	/*
 	 * Decrement and log left's numrecs, bump and log right's numrecs.
 	 */
-	be16_add(&left->bb_numrecs, -1);
+	be16_add_cpu(&left->bb_numrecs, -1);
 	xfs_alloc_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS);
-	be16_add(&right->bb_numrecs, 1);
+	be16_add_cpu(&right->bb_numrecs, 1);
 	xfs_alloc_log_block(cur->bc_tp, rbp, XFS_BB_NUMRECS);
 	/*
 	 * Using a temporary cursor, update the parent key values of the
@@ -1643,7 +1643,7 @@
 	 */
 	if ((be16_to_cpu(left->bb_numrecs) & 1) &&
 	    cur->bc_ptrs[level] <= be16_to_cpu(right->bb_numrecs) + 1)
-		be16_add(&right->bb_numrecs, 1);
+		be16_add_cpu(&right->bb_numrecs, 1);
 	i = be16_to_cpu(left->bb_numrecs) - be16_to_cpu(right->bb_numrecs) + 1;
 	/*
 	 * For non-leaf blocks, copy keys and addresses over to the new block.
@@ -1689,7 +1689,7 @@
 	 * Adjust numrecs, sibling pointers.
 	 */
 	lbno = XFS_DADDR_TO_AGBNO(cur->bc_mp, XFS_BUF_ADDR(lbp));
-	be16_add(&left->bb_numrecs, -(be16_to_cpu(right->bb_numrecs)));
+	be16_add_cpu(&left->bb_numrecs, -(be16_to_cpu(right->bb_numrecs)));
 	right->bb_rightsib = left->bb_rightsib;
 	left->bb_rightsib = cpu_to_be32(rbno);
 	right->bb_leftsib = cpu_to_be32(lbno);

diff --git a/fs/xfs/xfs_arch.h b/fs/xfs/xfs_arch.h
index c483689..f9472a2 100644
--- a/fs/xfs/xfs_arch.h
+++ b/fs/xfs/xfs_arch.h

@@ -170,21 +170,6 @@
     } \
 }
 
-static inline void be16_add(__be16 *a, __s16 b)
-{
-	*a = cpu_to_be16(be16_to_cpu(*a) + b);
-}
-
-static inline void be32_add(__be32 *a, __s32 b)
-{
-	*a = cpu_to_be32(be32_to_cpu(*a) + b);
-}
-
-static inline void be64_add(__be64 *a, __s64 b)
-{
-	*a = cpu_to_be64(be64_to_cpu(*a) + b);
-}
-
 /*
  * In directories inode numbers are stored as unaligned arrays of unsigned
  * 8bit integers on disk.

diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index eb3815eb..b08e2a2 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c

@@ -317,7 +317,7 @@
 	memcpy(sfe->nameval, args->name, args->namelen);
 	memcpy(&sfe->nameval[args->namelen], args->value, args->valuelen);
 	sf->hdr.count++;
-	be16_add(&sf->hdr.totsize, size);
+	be16_add_cpu(&sf->hdr.totsize, size);
 	xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_ADATA);
 
 	xfs_sbversion_add_attr2(mp, args->trans);
@@ -363,7 +363,7 @@
 	if (end != totsize)
 		memmove(&((char *)sf)[base], &((char *)sf)[end], totsize - end);
 	sf->hdr.count--;
-	be16_add(&sf->hdr.totsize, -size);
+	be16_add_cpu(&sf->hdr.totsize, -size);
 
 	/*
 	 * Fix up the start offset of the attribute fork
@@ -1133,7 +1133,7 @@
 		xfs_da_log_buf(args->trans, bp,
 		    XFS_DA_LOGRANGE(leaf, entry, tmp + sizeof(*entry)));
 	}
-	be16_add(&hdr->count, 1);
+	be16_add_cpu(&hdr->count, 1);
 
 	/*
 	 * Allocate space for the new string (at the end of the run).
@@ -1147,7 +1147,7 @@
 					 mp->m_sb.sb_blocksize, NULL));
 	ASSERT(be16_to_cpu(map->size) < XFS_LBSIZE(mp));
 	ASSERT((be16_to_cpu(map->size) & 0x3) == 0);
-	be16_add(&map->size,
+	be16_add_cpu(&map->size,
 		-xfs_attr_leaf_newentsize(args->namelen, args->valuelen,
 					  mp->m_sb.sb_blocksize, &tmp));
 	entry->nameidx = cpu_to_be16(be16_to_cpu(map->base) +
@@ -1214,12 +1214,12 @@
 	map = &hdr->freemap[0];
 	for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; map++, i++) {
 		if (be16_to_cpu(map->base) == tmp) {
-			be16_add(&map->base, sizeof(xfs_attr_leaf_entry_t));
-			be16_add(&map->size,
+			be16_add_cpu(&map->base, sizeof(xfs_attr_leaf_entry_t));
+			be16_add_cpu(&map->size,
 				 -((int)sizeof(xfs_attr_leaf_entry_t)));
 		}
 	}
-	be16_add(&hdr->usedbytes, xfs_attr_leaf_entsize(leaf, args->index));
+	be16_add_cpu(&hdr->usedbytes, xfs_attr_leaf_entsize(leaf, args->index));
 	xfs_da_log_buf(args->trans, bp,
 		XFS_DA_LOGRANGE(leaf, hdr, sizeof(*hdr)));
 	return(0);
@@ -1727,9 +1727,9 @@
 		ASSERT(be16_to_cpu(map->base) < XFS_LBSIZE(mp));
 		ASSERT(be16_to_cpu(map->size) < XFS_LBSIZE(mp));
 		if (be16_to_cpu(map->base) == tablesize) {
-			be16_add(&map->base,
+			be16_add_cpu(&map->base,
 				 -((int)sizeof(xfs_attr_leaf_entry_t)));
-			be16_add(&map->size, sizeof(xfs_attr_leaf_entry_t));
+			be16_add_cpu(&map->size, sizeof(xfs_attr_leaf_entry_t));
 		}
 
 		if ((be16_to_cpu(map->base) + be16_to_cpu(map->size))
@@ -1751,19 +1751,19 @@
 	if ((before >= 0) || (after >= 0)) {
 		if ((before >= 0) && (after >= 0)) {
 			map = &hdr->freemap[before];
-			be16_add(&map->size, entsize);
-			be16_add(&map->size,
+			be16_add_cpu(&map->size, entsize);
+			be16_add_cpu(&map->size,
 				 be16_to_cpu(hdr->freemap[after].size));
 			hdr->freemap[after].base = 0;
 			hdr->freemap[after].size = 0;
 		} else if (before >= 0) {
 			map = &hdr->freemap[before];
-			be16_add(&map->size, entsize);
+			be16_add_cpu(&map->size, entsize);
 		} else {
 			map = &hdr->freemap[after];
 			/* both on-disk, don't endian flip twice */
 			map->base = entry->nameidx;
-			be16_add(&map->size, entsize);
+			be16_add_cpu(&map->size, entsize);
 		}
 	} else {
 		/*
@@ -1788,7 +1788,7 @@
 	 * Compress the remaining entries and zero out the removed stuff.
 	 */
 	memset(XFS_ATTR_LEAF_NAME(leaf, args->index), 0, entsize);
-	be16_add(&hdr->usedbytes, -entsize);
+	be16_add_cpu(&hdr->usedbytes, -entsize);
 	xfs_da_log_buf(args->trans, bp,
 	     XFS_DA_LOGRANGE(leaf, XFS_ATTR_LEAF_NAME(leaf, args->index),
 				   entsize));
@@ -1796,7 +1796,7 @@
 	tmp = (be16_to_cpu(hdr->count) - args->index)
 					* sizeof(xfs_attr_leaf_entry_t);
 	memmove((char *)entry, (char *)(entry+1), tmp);
-	be16_add(&hdr->count, -1);
+	be16_add_cpu(&hdr->count, -1);
 	xfs_da_log_buf(args->trans, bp,
 	    XFS_DA_LOGRANGE(leaf, entry, tmp + sizeof(*entry)));
 	entry = &leaf->entries[be16_to_cpu(hdr->count)];
@@ -2182,15 +2182,15 @@
 		 */
 		if (entry_s->flags & XFS_ATTR_INCOMPLETE) { /* skip partials? */
 			memset(XFS_ATTR_LEAF_NAME(leaf_s, start_s + i), 0, tmp);
-			be16_add(&hdr_s->usedbytes, -tmp);
-			be16_add(&hdr_s->count, -1);
+			be16_add_cpu(&hdr_s->usedbytes, -tmp);
+			be16_add_cpu(&hdr_s->count, -1);
 			entry_d--;	/* to compensate for ++ in loop hdr */
 			desti--;
 			if ((start_s + i) < offset)
 				result++;	/* insertion index adjustment */
 		} else {
 #endif /* GROT */
-			be16_add(&hdr_d->firstused, -tmp);
+			be16_add_cpu(&hdr_d->firstused, -tmp);
 			/* both on-disk, don't endian flip twice */
 			entry_d->hashval = entry_s->hashval;
 			/* both on-disk, don't endian flip twice */
@@ -2203,10 +2203,10 @@
 			ASSERT(be16_to_cpu(entry_s->nameidx) + tmp
 							<= XFS_LBSIZE(mp));
 			memset(XFS_ATTR_LEAF_NAME(leaf_s, start_s + i), 0, tmp);
-			be16_add(&hdr_s->usedbytes, -tmp);
-			be16_add(&hdr_d->usedbytes, tmp);
-			be16_add(&hdr_s->count, -1);
-			be16_add(&hdr_d->count, 1);
+			be16_add_cpu(&hdr_s->usedbytes, -tmp);
+			be16_add_cpu(&hdr_d->usedbytes, tmp);
+			be16_add_cpu(&hdr_s->count, -1);
+			be16_add_cpu(&hdr_d->count, 1);
 			tmp = be16_to_cpu(hdr_d->count)
 						* sizeof(xfs_attr_leaf_entry_t)
 						+ sizeof(xfs_attr_leaf_hdr_t);
@@ -2247,7 +2247,7 @@
 	 * Fill in the freemap information
 	 */
 	hdr_d->freemap[0].base = cpu_to_be16(sizeof(xfs_attr_leaf_hdr_t));
-	be16_add(&hdr_d->freemap[0].base, be16_to_cpu(hdr_d->count) *
+	be16_add_cpu(&hdr_d->freemap[0].base, be16_to_cpu(hdr_d->count) *
 			sizeof(xfs_attr_leaf_entry_t));
 	hdr_d->freemap[0].size = cpu_to_be16(be16_to_cpu(hdr_d->firstused)
 			      - be16_to_cpu(hdr_d->freemap[0].base));

diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c
index c4181d8..bd18987 100644
--- a/fs/xfs/xfs_bmap_btree.c
+++ b/fs/xfs/xfs_bmap_btree.c

@@ -631,7 +631,7 @@
 		memcpy(lrp, rrp, numrrecs * sizeof(*lrp));
 		xfs_bmbt_log_recs(cur, lbp, numlrecs + 1, numlrecs + numrrecs);
 	}
-	be16_add(&left->bb_numrecs, numrrecs);
+	be16_add_cpu(&left->bb_numrecs, numrrecs);
 	left->bb_rightsib = right->bb_rightsib;
 	xfs_bmbt_log_block(cur, lbp, XFS_BB_RIGHTSIB | XFS_BB_NUMRECS);
 	if (be64_to_cpu(left->bb_rightsib) != NULLDFSBNO) {
@@ -924,7 +924,7 @@
 		xfs_iroot_realloc(ip, i, cur->bc_private.b.whichfork);
 		block = ifp->if_broot;
 	}
-	be16_add(&block->bb_numrecs, i);
+	be16_add_cpu(&block->bb_numrecs, i);
 	ASSERT(block->bb_numrecs == cblock->bb_numrecs);
 	kp = XFS_BMAP_KEY_IADDR(block, 1, cur);
 	ckp = XFS_BMAP_KEY_IADDR(cblock, 1, cur);
@@ -947,7 +947,7 @@
 			XFS_TRANS_DQ_BCOUNT, -1L);
 	xfs_trans_binval(cur->bc_tp, cbp);
 	cur->bc_bufs[level - 1] = NULL;
-	be16_add(&block->bb_level, -1);
+	be16_add_cpu(&block->bb_level, -1);
 	xfs_trans_log_inode(cur->bc_tp, ip,
 		XFS_ILOG_CORE | XFS_ILOG_FBROOT(cur->bc_private.b.whichfork));
 	cur->bc_nlevels--;
@@ -1401,9 +1401,9 @@
 		key.br_startoff = cpu_to_be64(xfs_bmbt_disk_get_startoff(rrp));
 		rkp = &key;
 	}
-	be16_add(&left->bb_numrecs, -1);
+	be16_add_cpu(&left->bb_numrecs, -1);
 	xfs_bmbt_log_block(cur, lbp, XFS_BB_NUMRECS);
-	be16_add(&right->bb_numrecs, 1);
+	be16_add_cpu(&right->bb_numrecs, 1);
 #ifdef DEBUG
 	if (level > 0)
 		xfs_btree_check_key(XFS_BTNUM_BMAP, rkp, rkp + 1);
@@ -1535,7 +1535,7 @@
 	right->bb_numrecs = cpu_to_be16(be16_to_cpu(left->bb_numrecs) / 2);
 	if ((be16_to_cpu(left->bb_numrecs) & 1) &&
 	    cur->bc_ptrs[level] <= be16_to_cpu(right->bb_numrecs) + 1)
-		be16_add(&right->bb_numrecs, 1);
+		be16_add_cpu(&right->bb_numrecs, 1);
 	i = be16_to_cpu(left->bb_numrecs) - be16_to_cpu(right->bb_numrecs) + 1;
 	if (level > 0) {
 		lkp = XFS_BMAP_KEY_IADDR(left, i, cur);
@@ -1562,7 +1562,7 @@
 		xfs_bmbt_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
 		*startoff = xfs_bmbt_disk_get_startoff(rrp);
 	}
-	be16_add(&left->bb_numrecs, -(be16_to_cpu(right->bb_numrecs)));
+	be16_add_cpu(&left->bb_numrecs, -(be16_to_cpu(right->bb_numrecs)));
 	right->bb_rightsib = left->bb_rightsib;
 	left->bb_rightsib = cpu_to_be64(args.fsbno);
 	right->bb_leftsib = cpu_to_be64(lbno);
@@ -2240,7 +2240,7 @@
 	bp = xfs_btree_get_bufl(args.mp, cur->bc_tp, args.fsbno, 0);
 	cblock = XFS_BUF_TO_BMBT_BLOCK(bp);
 	*cblock = *block;
-	be16_add(&block->bb_level, 1);
+	be16_add_cpu(&block->bb_level, 1);
 	block->bb_numrecs = cpu_to_be16(1);
 	cur->bc_nlevels++;
 	cur->bc_ptrs[level + 1] = 1;

diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
index 1b44684..021a8f7 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c

@@ -511,12 +511,12 @@
 		 * Move the req'd B-tree elements from high in node1 to
 		 * low in node2.
 		 */
-		be16_add(&node2->hdr.count, count);
+		be16_add_cpu(&node2->hdr.count, count);
 		tmp = count * (uint)sizeof(xfs_da_node_entry_t);
 		btree_s = &node1->btree[be16_to_cpu(node1->hdr.count) - count];
 		btree_d = &node2->btree[0];
 		memcpy(btree_d, btree_s, tmp);
-		be16_add(&node1->hdr.count, -count);
+		be16_add_cpu(&node1->hdr.count, -count);
 	} else {
 		/*
 		 * Move the req'd B-tree elements from low in node2 to
@@ -527,7 +527,7 @@
 		btree_s = &node2->btree[0];
 		btree_d = &node1->btree[be16_to_cpu(node1->hdr.count)];
 		memcpy(btree_d, btree_s, tmp);
-		be16_add(&node1->hdr.count, count);
+		be16_add_cpu(&node1->hdr.count, count);
 		xfs_da_log_buf(tp, blk1->bp,
 			XFS_DA_LOGRANGE(node1, btree_d, tmp));
 
@@ -539,7 +539,7 @@
 		btree_s = &node2->btree[count];
 		btree_d = &node2->btree[0];
 		memmove(btree_d, btree_s, tmp);
-		be16_add(&node2->hdr.count, -count);
+		be16_add_cpu(&node2->hdr.count, -count);
 	}
 
 	/*
@@ -604,7 +604,7 @@
 	btree->before = cpu_to_be32(newblk->blkno);
 	xfs_da_log_buf(state->args->trans, oldblk->bp,
 		XFS_DA_LOGRANGE(node, btree, tmp + sizeof(*btree)));
-	be16_add(&node->hdr.count, 1);
+	be16_add_cpu(&node->hdr.count, 1);
 	xfs_da_log_buf(state->args->trans, oldblk->bp,
 		XFS_DA_LOGRANGE(node, &node->hdr, sizeof(node->hdr)));
 
@@ -959,7 +959,7 @@
 	memset((char *)btree, 0, sizeof(xfs_da_node_entry_t));
 	xfs_da_log_buf(state->args->trans, drop_blk->bp,
 	    XFS_DA_LOGRANGE(node, btree, sizeof(*btree)));
-	be16_add(&node->hdr.count, -1);
+	be16_add_cpu(&node->hdr.count, -1);
 	xfs_da_log_buf(state->args->trans, drop_blk->bp,
 	    XFS_DA_LOGRANGE(node, &node->hdr, sizeof(node->hdr)));
 
@@ -1018,7 +1018,7 @@
 	 */
 	tmp = be16_to_cpu(drop_node->hdr.count) * (uint)sizeof(xfs_da_node_entry_t);
 	memcpy(btree, &drop_node->btree[0], tmp);
-	be16_add(&save_node->hdr.count, be16_to_cpu(drop_node->hdr.count));
+	be16_add_cpu(&save_node->hdr.count, be16_to_cpu(drop_node->hdr.count));
 
 	xfs_da_log_buf(tp, save_blk->bp,
 		XFS_DA_LOGRANGE(save_node, &save_node->hdr,

diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c
index a5f4f4f..fb5a556 100644
--- a/fs/xfs/xfs_dir2_block.c
+++ b/fs/xfs/xfs_dir2_block.c

@@ -271,7 +271,7 @@
 		}
 		lfloglow = toidx + 1 - (be32_to_cpu(btp->stale) - 1);
 		lfloghigh -= be32_to_cpu(btp->stale) - 1;
-		be32_add(&btp->count, -(be32_to_cpu(btp->stale) - 1));
+		be32_add_cpu(&btp->count, -(be32_to_cpu(btp->stale) - 1));
 		xfs_dir2_data_make_free(tp, bp,
 			(xfs_dir2_data_aoff_t)((char *)blp - (char *)block),
 			(xfs_dir2_data_aoff_t)((be32_to_cpu(btp->stale) - 1) * sizeof(*blp)),
@@ -326,7 +326,7 @@
 		/*
 		 * Update the tail (entry count).
 		 */
-		be32_add(&btp->count, 1);
+		be32_add_cpu(&btp->count, 1);
 		/*
 		 * If we now need to rebuild the bestfree map, do so.
 		 * This needs to happen before the next call to use_free.
@@ -387,7 +387,7 @@
 			lfloglow = MIN(mid, lfloglow);
 			lfloghigh = MAX(highstale, lfloghigh);
 		}
-		be32_add(&btp->stale, -1);
+		be32_add_cpu(&btp->stale, -1);
 	}
 	/*
 	 * Point to the new data entry.
@@ -767,7 +767,7 @@
 	/*
 	 * Fix up the block tail.
 	 */
-	be32_add(&btp->stale, 1);
+	be32_add_cpu(&btp->stale, 1);
 	xfs_dir2_block_log_tail(tp, bp);
 	/*
 	 * Remove the leaf entry by marking it stale.

diff --git a/fs/xfs/xfs_dir2_data.c b/fs/xfs/xfs_dir2_data.c
index d245269..fb8c9e0 100644
--- a/fs/xfs/xfs_dir2_data.c
+++ b/fs/xfs/xfs_dir2_data.c

@@ -587,7 +587,7 @@
 		/*
 		 * Fix up the new big freespace.
 		 */
-		be16_add(&prevdup->length, len + be16_to_cpu(postdup->length));
+		be16_add_cpu(&prevdup->length, len + be16_to_cpu(postdup->length));
 		*xfs_dir2_data_unused_tag_p(prevdup) =
 			cpu_to_be16((char *)prevdup - (char *)d);
 		xfs_dir2_data_log_unused(tp, bp, prevdup);
@@ -621,7 +621,7 @@
 	 */
 	else if (prevdup) {
 		dfp = xfs_dir2_data_freefind(d, prevdup);
-		be16_add(&prevdup->length, len);
+		be16_add_cpu(&prevdup->length, len);
 		*xfs_dir2_data_unused_tag_p(prevdup) =
 			cpu_to_be16((char *)prevdup - (char *)d);
 		xfs_dir2_data_log_unused(tp, bp, prevdup);

diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c
index 0ca0020..bc52b80 100644
--- a/fs/xfs/xfs_dir2_leaf.c
+++ b/fs/xfs/xfs_dir2_leaf.c

@@ -359,7 +359,7 @@
 			bestsp--;
 			memmove(&bestsp[0], &bestsp[1],
 				be32_to_cpu(ltp->bestcount) * sizeof(bestsp[0]));
-			be32_add(&ltp->bestcount, 1);
+			be32_add_cpu(&ltp->bestcount, 1);
 			xfs_dir2_leaf_log_tail(tp, lbp);
 			xfs_dir2_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1);
 		}
@@ -445,7 +445,7 @@
 		 */
 		lfloglow = index;
 		lfloghigh = be16_to_cpu(leaf->hdr.count);
-		be16_add(&leaf->hdr.count, 1);
+		be16_add_cpu(&leaf->hdr.count, 1);
 	}
 	/*
 	 * There are stale entries.
@@ -523,7 +523,7 @@
 			lfloglow = MIN(index, lfloglow);
 			lfloghigh = MAX(highstale, lfloghigh);
 		}
-		be16_add(&leaf->hdr.stale, -1);
+		be16_add_cpu(&leaf->hdr.stale, -1);
 	}
 	/*
 	 * Fill in the new leaf entry.
@@ -626,7 +626,7 @@
 	 * Update and log the header, log the leaf entries.
 	 */
 	ASSERT(be16_to_cpu(leaf->hdr.stale) == from - to);
-	be16_add(&leaf->hdr.count, -(be16_to_cpu(leaf->hdr.stale)));
+	be16_add_cpu(&leaf->hdr.count, -(be16_to_cpu(leaf->hdr.stale)));
 	leaf->hdr.stale = 0;
 	xfs_dir2_leaf_log_header(args->trans, bp);
 	if (loglow != -1)
@@ -728,7 +728,7 @@
 	/*
 	 * Adjust the leaf header values.
 	 */
-	be16_add(&leaf->hdr.count, -(from - to));
+	be16_add_cpu(&leaf->hdr.count, -(from - to));
 	leaf->hdr.stale = cpu_to_be16(1);
 	/*
 	 * Remember the low/high stale value only in the "right"
@@ -1470,7 +1470,7 @@
 	/*
 	 * We just mark the leaf entry stale by putting a null in it.
 	 */
-	be16_add(&leaf->hdr.stale, 1);
+	be16_add_cpu(&leaf->hdr.stale, 1);
 	xfs_dir2_leaf_log_header(tp, lbp);
 	lep->address = cpu_to_be32(XFS_DIR2_NULL_DATAPTR);
 	xfs_dir2_leaf_log_ents(tp, lbp, index, index);
@@ -1531,7 +1531,7 @@
 			 */
 			memmove(&bestsp[db - i], bestsp,
 				(be32_to_cpu(ltp->bestcount) - (db - i)) * sizeof(*bestsp));
-			be32_add(&ltp->bestcount, -(db - i));
+			be32_add_cpu(&ltp->bestcount, -(db - i));
 			xfs_dir2_leaf_log_tail(tp, lbp);
 			xfs_dir2_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1);
 		} else
@@ -1712,7 +1712,7 @@
 	 * Eliminate the last bests entry from the table.
 	 */
 	bestsp = xfs_dir2_leaf_bests_p(ltp);
-	be32_add(&ltp->bestcount, -1);
+	be32_add_cpu(&ltp->bestcount, -1);
 	memmove(&bestsp[1], &bestsp[0], be32_to_cpu(ltp->bestcount) * sizeof(*bestsp));
 	xfs_dir2_leaf_log_tail(tp, lbp);
 	xfs_dir2_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1);

diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c
index eb18e39..8dade71 100644
--- a/fs/xfs/xfs_dir2_node.c
+++ b/fs/xfs/xfs_dir2_node.c

@@ -254,7 +254,7 @@
 				(be16_to_cpu(leaf->hdr.count) - index) * sizeof(*lep));
 		lfloglow = index;
 		lfloghigh = be16_to_cpu(leaf->hdr.count);
-		be16_add(&leaf->hdr.count, 1);
+		be16_add_cpu(&leaf->hdr.count, 1);
 	}
 	/*
 	 * There are stale entries.  We'll use one for the new entry.
@@ -322,7 +322,7 @@
 			lfloglow = MIN(index, lfloglow);
 			lfloghigh = MAX(highstale, lfloghigh);
 		}
-		be16_add(&leaf->hdr.stale, -1);
+		be16_add_cpu(&leaf->hdr.stale, -1);
 	}
 	/*
 	 * Insert the new entry, log everything.
@@ -697,10 +697,10 @@
 	/*
 	 * Update the headers and log them.
 	 */
-	be16_add(&leaf_s->hdr.count, -(count));
-	be16_add(&leaf_s->hdr.stale, -(stale));
-	be16_add(&leaf_d->hdr.count, count);
-	be16_add(&leaf_d->hdr.stale, stale);
+	be16_add_cpu(&leaf_s->hdr.count, -(count));
+	be16_add_cpu(&leaf_s->hdr.stale, -(stale));
+	be16_add_cpu(&leaf_d->hdr.count, count);
+	be16_add_cpu(&leaf_d->hdr.stale, stale);
 	xfs_dir2_leaf_log_header(tp, bp_s);
 	xfs_dir2_leaf_log_header(tp, bp_d);
 	xfs_dir2_leafn_check(args->dp, bp_s);
@@ -885,7 +885,7 @@
 	 * Kill the leaf entry by marking it stale.
 	 * Log the leaf block changes.
 	 */
-	be16_add(&leaf->hdr.stale, 1);
+	be16_add_cpu(&leaf->hdr.stale, 1);
 	xfs_dir2_leaf_log_header(tp, bp);
 	lep->address = cpu_to_be32(XFS_DIR2_NULL_DATAPTR);
 	xfs_dir2_leaf_log_ents(tp, bp, index, index);
@@ -971,7 +971,7 @@
 			/*
 			 * One less used entry in the free table.
 			 */
-			be32_add(&free->hdr.nused, -1);
+			be32_add_cpu(&free->hdr.nused, -1);
 			xfs_dir2_free_log_header(tp, fbp);
 			/*
 			 * If this was the last entry in the table, we can
@@ -1642,7 +1642,7 @@
 		 * (this should always be true) then update the header.
 		 */
 		if (be16_to_cpu(free->bests[findex]) == NULLDATAOFF) {
-			be32_add(&free->hdr.nused, 1);
+			be32_add_cpu(&free->hdr.nused, 1);
 			xfs_dir2_free_log_header(tp, fbp);
 		}
 		/*

diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index b8de7f3..eadc159 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c

@@ -318,7 +318,7 @@
 		}
 		ASSERT(bp);
 		agi = XFS_BUF_TO_AGI(bp);
-		be32_add(&agi->agi_length, new);
+		be32_add_cpu(&agi->agi_length, new);
 		ASSERT(nagcount == oagcount ||
 		       be32_to_cpu(agi->agi_length) == mp->m_sb.sb_agblocks);
 		xfs_ialloc_log_agi(tp, bp, XFS_AGI_LENGTH);
@@ -331,7 +331,7 @@
 		}
 		ASSERT(bp);
 		agf = XFS_BUF_TO_AGF(bp);
-		be32_add(&agf->agf_length, new);
+		be32_add_cpu(&agf->agf_length, new);
 		ASSERT(be32_to_cpu(agf->agf_length) ==
 		       be32_to_cpu(agi->agi_length));
 		xfs_alloc_log_agf(tp, bp, XFS_AGF_LENGTH);

diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index 1409c2d..c5836b9 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c

@@ -301,8 +301,8 @@
 		}
 		xfs_trans_inode_alloc_buf(tp, fbuf);
 	}
-	be32_add(&agi->agi_count, newlen);
-	be32_add(&agi->agi_freecount, newlen);
+	be32_add_cpu(&agi->agi_count, newlen);
+	be32_add_cpu(&agi->agi_freecount, newlen);
 	agno = be32_to_cpu(agi->agi_seqno);
 	down_read(&args.mp->m_peraglock);
 	args.mp->m_perag[agno].pagi_freecount += newlen;
@@ -885,7 +885,7 @@
 	if ((error = xfs_inobt_update(cur, rec.ir_startino, rec.ir_freecount,
 			rec.ir_free)))
 		goto error0;
-	be32_add(&agi->agi_freecount, -1);
+	be32_add_cpu(&agi->agi_freecount, -1);
 	xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT);
 	down_read(&mp->m_peraglock);
 	mp->m_perag[tagno].pagi_freecount--;
@@ -1065,8 +1065,8 @@
 		 * to be freed when the transaction is committed.
 		 */
 		ilen = XFS_IALLOC_INODES(mp);
-		be32_add(&agi->agi_count, -ilen);
-		be32_add(&agi->agi_freecount, -(ilen - 1));
+		be32_add_cpu(&agi->agi_count, -ilen);
+		be32_add_cpu(&agi->agi_freecount, -(ilen - 1));
 		xfs_ialloc_log_agi(tp, agbp, XFS_AGI_COUNT | XFS_AGI_FREECOUNT);
 		down_read(&mp->m_peraglock);
 		mp->m_perag[agno].pagi_freecount -= ilen - 1;
@@ -1095,7 +1095,7 @@
 		/* 
 		 * Change the inode free counts and log the ag/sb changes.
 		 */
-		be32_add(&agi->agi_freecount, 1);
+		be32_add_cpu(&agi->agi_freecount, 1);
 		xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT);
 		down_read(&mp->m_peraglock);
 		mp->m_perag[agno].pagi_freecount++;

diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c
index 8cdeeaf..e5310c90 100644
--- a/fs/xfs/xfs_ialloc_btree.c
+++ b/fs/xfs/xfs_ialloc_btree.c

@@ -189,7 +189,7 @@
 			 */
 			bno = be32_to_cpu(agi->agi_root);
 			agi->agi_root = *pp;
-			be32_add(&agi->agi_level, -1);
+			be32_add_cpu(&agi->agi_level, -1);
 			/*
 			 * Free the block.
 			 */
@@ -1132,7 +1132,7 @@
 	/*
 	 * Bump and log left's numrecs, decrement and log right's numrecs.
 	 */
-	be16_add(&left->bb_numrecs, 1);
+	be16_add_cpu(&left->bb_numrecs, 1);
 	xfs_inobt_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS);
 #ifdef DEBUG
 	if (level > 0)
@@ -1140,7 +1140,7 @@
 	else
 		xfs_btree_check_rec(cur->bc_btnum, lrp - 1, lrp);
 #endif
-	be16_add(&right->bb_numrecs, -1);
+	be16_add_cpu(&right->bb_numrecs, -1);
 	xfs_inobt_log_block(cur->bc_tp, rbp, XFS_BB_NUMRECS);
 	/*
 	 * Slide the contents of right down one entry.
@@ -1232,7 +1232,7 @@
 	 * Set the root data in the a.g. inode structure.
 	 */
 	agi->agi_root = cpu_to_be32(args.agbno);
-	be32_add(&agi->agi_level, 1);
+	be32_add_cpu(&agi->agi_level, 1);
 	xfs_ialloc_log_agi(args.tp, cur->bc_private.i.agbp,
 		XFS_AGI_ROOT | XFS_AGI_LEVEL);
 	/*
@@ -1426,9 +1426,9 @@
 	/*
 	 * Decrement and log left's numrecs, bump and log right's numrecs.
 	 */
-	be16_add(&left->bb_numrecs, -1);
+	be16_add_cpu(&left->bb_numrecs, -1);
 	xfs_inobt_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS);
-	be16_add(&right->bb_numrecs, 1);
+	be16_add_cpu(&right->bb_numrecs, 1);
 #ifdef DEBUG
 	if (level > 0)
 		xfs_btree_check_key(cur->bc_btnum, rkp, rkp + 1);
@@ -1529,7 +1529,7 @@
 	 */
 	if ((be16_to_cpu(left->bb_numrecs) & 1) &&
 	    cur->bc_ptrs[level] <= be16_to_cpu(right->bb_numrecs) + 1)
-		be16_add(&right->bb_numrecs, 1);
+		be16_add_cpu(&right->bb_numrecs, 1);
 	i = be16_to_cpu(left->bb_numrecs) - be16_to_cpu(right->bb_numrecs) + 1;
 	/*
 	 * For non-leaf blocks, copy keys and addresses over to the new block.
@@ -1565,7 +1565,7 @@
 	 * Find the left block number by looking in the buffer.
 	 * Adjust numrecs, sibling pointers.
 	 */
-	be16_add(&left->bb_numrecs, -(be16_to_cpu(right->bb_numrecs)));
+	be16_add_cpu(&left->bb_numrecs, -(be16_to_cpu(right->bb_numrecs)));
 	right->bb_rightsib = left->bb_rightsib;
 	left->bb_rightsib = cpu_to_be32(args.agbno);
 	right->bb_leftsib = cpu_to_be32(lbno);

diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index b3ac380..a75edca 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c

@@ -1509,9 +1509,9 @@
 		 * case, though.
 		 */
 		for (i = 0; i < split; i += BBSIZE) {
-			be32_add((__be32 *)dptr, 1);
+			be32_add_cpu((__be32 *)dptr, 1);
 			if (be32_to_cpu(*(__be32 *)dptr) == XLOG_HEADER_MAGIC_NUM)
-				be32_add((__be32 *)dptr, 1);
+				be32_add_cpu((__be32 *)dptr, 1);
 			dptr += BBSIZE;
 		}
 
@@ -1600,7 +1600,7 @@
 {
 	spin_lock(&log->l_icloglock);
 
-	be32_add(&iclog->ic_header.h_num_logops, record_cnt);
+	be32_add_cpu(&iclog->ic_header.h_num_logops, record_cnt);
 	iclog->ic_offset += copy_bytes;
 
 	spin_unlock(&log->l_icloglock);

diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 71e4c8d..1403864 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c

@@ -567,26 +567,26 @@
 	 */
 	if (!xfs_sb_version_haslazysbcount(&(tp->t_mountp->m_sb))) {
 		if (tp->t_icount_delta)
-			be64_add(&sbp->sb_icount, tp->t_icount_delta);
+			be64_add_cpu(&sbp->sb_icount, tp->t_icount_delta);
 		if (tp->t_ifree_delta)
-			be64_add(&sbp->sb_ifree, tp->t_ifree_delta);
+			be64_add_cpu(&sbp->sb_ifree, tp->t_ifree_delta);
 		if (tp->t_fdblocks_delta)
-			be64_add(&sbp->sb_fdblocks, tp->t_fdblocks_delta);
+			be64_add_cpu(&sbp->sb_fdblocks, tp->t_fdblocks_delta);
 		if (tp->t_res_fdblocks_delta)
-			be64_add(&sbp->sb_fdblocks, tp->t_res_fdblocks_delta);
+			be64_add_cpu(&sbp->sb_fdblocks, tp->t_res_fdblocks_delta);
 	}
 
 	if (tp->t_frextents_delta)
-		be64_add(&sbp->sb_frextents, tp->t_frextents_delta);
+		be64_add_cpu(&sbp->sb_frextents, tp->t_frextents_delta);
 	if (tp->t_res_frextents_delta)
-		be64_add(&sbp->sb_frextents, tp->t_res_frextents_delta);
+		be64_add_cpu(&sbp->sb_frextents, tp->t_res_frextents_delta);
 
 	if (tp->t_dblocks_delta) {
-		be64_add(&sbp->sb_dblocks, tp->t_dblocks_delta);
+		be64_add_cpu(&sbp->sb_dblocks, tp->t_dblocks_delta);
 		whole = 1;
 	}
 	if (tp->t_agcount_delta) {
-		be32_add(&sbp->sb_agcount, tp->t_agcount_delta);
+		be32_add_cpu(&sbp->sb_agcount, tp->t_agcount_delta);
 		whole = 1;
 	}
 	if (tp->t_imaxpct_delta) {
@@ -594,19 +594,19 @@
 		whole = 1;
 	}
 	if (tp->t_rextsize_delta) {
-		be32_add(&sbp->sb_rextsize, tp->t_rextsize_delta);
+		be32_add_cpu(&sbp->sb_rextsize, tp->t_rextsize_delta);
 		whole = 1;
 	}
 	if (tp->t_rbmblocks_delta) {
-		be32_add(&sbp->sb_rbmblocks, tp->t_rbmblocks_delta);
+		be32_add_cpu(&sbp->sb_rbmblocks, tp->t_rbmblocks_delta);
 		whole = 1;
 	}
 	if (tp->t_rblocks_delta) {
-		be64_add(&sbp->sb_rblocks, tp->t_rblocks_delta);
+		be64_add_cpu(&sbp->sb_rblocks, tp->t_rblocks_delta);
 		whole = 1;
 	}
 	if (tp->t_rextents_delta) {
-		be64_add(&sbp->sb_rextents, tp->t_rextents_delta);
+		be64_add_cpu(&sbp->sb_rextents, tp->t_rextents_delta);
 		whole = 1;
 	}
 	if (tp->t_rextslog_delta) {

diff --git a/include/acpi/acpiosxf.h b/include/acpi/acpiosxf.h
index 022a5fd..4839f2a 100644
--- a/include/acpi/acpiosxf.h
+++ b/include/acpi/acpiosxf.h

@@ -222,7 +222,7 @@
  */
 acpi_status
 acpi_os_read_pci_configuration(struct acpi_pci_id *pci_id,
-			       u32 reg, void *value, u32 width);
+			       u32 reg, u32 *value, u32 width);
 
 acpi_status
 acpi_os_write_pci_configuration(struct acpi_pci_id *pci_id,

diff --git a/include/asm-generic/topology.h b/include/asm-generic/topology.h
index 5d9d70c..342a2a0 100644
--- a/include/asm-generic/topology.h
+++ b/include/asm-generic/topology.h

@@ -30,19 +30,19 @@
 /* Other architectures wishing to use this simple topology API should fill
    in the below functions as appropriate in their own <asm/topology.h> file. */
 #ifndef cpu_to_node
-#define cpu_to_node(cpu)	(0)
+#define cpu_to_node(cpu)	((void)(cpu),0)
 #endif
 #ifndef parent_node
-#define parent_node(node)	(0)
+#define parent_node(node)	((void)(node),0)
 #endif
 #ifndef node_to_cpumask
-#define node_to_cpumask(node)	(cpu_online_map)
+#define node_to_cpumask(node)	((void)node, cpu_online_map)
 #endif
 #ifndef node_to_first_cpu
-#define node_to_first_cpu(node)	(0)
+#define node_to_first_cpu(node)	((void)(node),0)
 #endif
 #ifndef pcibus_to_node
-#define pcibus_to_node(node)	(-1)
+#define pcibus_to_node(bus)	((void)(bus), -1)
 #endif
 
 #ifndef pcibus_to_cpumask

diff --git a/include/asm-ia64/param.h b/include/asm-ia64/param.h
index 49c62dd..0964c32 100644
--- a/include/asm-ia64/param.h
+++ b/include/asm-ia64/param.h

@@ -19,15 +19,7 @@
 #define MAXHOSTNAMELEN	64	/* max length of hostname */
 
 #ifdef __KERNEL__
-# ifdef CONFIG_IA64_HP_SIM
-  /*
-   * Yeah, simulating stuff is slow, so let us catch some breath between
-   * timer interrupts...
-   */
-#  define HZ	  32
-# else
-#  define HZ	CONFIG_HZ
-# endif
+# define HZ		CONFIG_HZ
 # define USER_HZ	HZ
 # define CLOCKS_PER_SEC	HZ	/* frequency at which times() counts */
 #else

diff --git a/include/asm-mn10300/highmem.h b/include/asm-mn10300/highmem.h
index 383c0c4..5256854 100644
--- a/include/asm-mn10300/highmem.h
+++ b/include/asm-mn10300/highmem.h

@@ -42,8 +42,8 @@
 #define PKMAP_NR(virt)  ((virt - PKMAP_BASE) >> PAGE_SHIFT)
 #define PKMAP_ADDR(nr)  (PKMAP_BASE + ((nr) << PAGE_SHIFT))
 
-extern unsigned long __fastcall kmap_high(struct page *page);
-extern void __fastcall kunmap_high(struct page *page);
+extern unsigned long kmap_high(struct page *page);
+extern void kunmap_high(struct page *page);
 
 static inline unsigned long kmap(struct page *page)
 {

diff --git a/include/asm-mn10300/linkage.h b/include/asm-mn10300/linkage.h
index 29a32e4..dda3002 100644
--- a/include/asm-mn10300/linkage.h
+++ b/include/asm-mn10300/linkage.h

@@ -13,8 +13,6 @@
 
 /* don't override anything */
 #define asmlinkage
-#define FASTCALL(x) x
-#define fastcall
 
 #define __ALIGN		.align 4,0xcb
 #define __ALIGN_STR	".align 4,0xcb"

diff --git a/include/asm-x86/sigcontext.h b/include/asm-x86/sigcontext.h
index 681dead..d743947 100644
--- a/include/asm-x86/sigcontext.h
+++ b/include/asm-x86/sigcontext.h

@@ -58,6 +58,7 @@
 
 #define X86_FXSR_MAGIC		0x0000
 
+#ifdef __KERNEL__
 struct sigcontext {
 	unsigned short gs, __gsh;
 	unsigned short fs, __fsh;
@@ -82,6 +83,35 @@
 	unsigned long oldmask;
 	unsigned long cr2;
 };
+#else /* __KERNEL__ */
+/*
+ * User-space might still rely on the old definition:
+ */
+struct sigcontext {
+	unsigned short gs, __gsh;
+	unsigned short fs, __fsh;
+	unsigned short es, __esh;
+	unsigned short ds, __dsh;
+	unsigned long edi;
+	unsigned long esi;
+	unsigned long ebp;
+	unsigned long esp;
+	unsigned long ebx;
+	unsigned long edx;
+	unsigned long ecx;
+	unsigned long eax;
+	unsigned long trapno;
+	unsigned long err;
+	unsigned long eip;
+	unsigned short cs, __csh;
+	unsigned long eflags;
+	unsigned long esp_at_signal;
+	unsigned short ss, __ssh;
+	struct _fpstate __user * fpstate;
+	unsigned long oldmask;
+	unsigned long cr2;
+};
+#endif /* !__KERNEL__ */
 
 #else /* __i386__ */
 
@@ -102,6 +132,7 @@
 	__u32	reserved2[24];
 };
 
+#ifdef __KERNEL__
 struct sigcontext {
 	unsigned long r8;
 	unsigned long r9;
@@ -132,6 +163,41 @@
 	struct _fpstate __user *fpstate;	/* zero when no FPU context */
 	unsigned long reserved1[8];
 };
+#else /* __KERNEL__ */
+/*
+ * User-space might still rely on the old definition:
+ */
+struct sigcontext {
+	unsigned long r8;
+	unsigned long r9;
+	unsigned long r10;
+	unsigned long r11;
+	unsigned long r12;
+	unsigned long r13;
+	unsigned long r14;
+	unsigned long r15;
+	unsigned long rdi;
+	unsigned long rsi;
+	unsigned long rbp;
+	unsigned long rbx;
+	unsigned long rdx;
+	unsigned long rax;
+	unsigned long rcx;
+	unsigned long rsp;
+	unsigned long rip;
+	unsigned long eflags;		/* RFLAGS */
+	unsigned short cs;
+	unsigned short gs;
+	unsigned short fs;
+	unsigned short __pad0;
+	unsigned long err;
+	unsigned long trapno;
+	unsigned long oldmask;
+	unsigned long cr2;
+	struct _fpstate __user *fpstate;	/* zero when no FPU context */
+	unsigned long reserved1[8];
+};
+#endif /* !__KERNEL__ */
 
 #endif /* !__i386__ */
 

diff --git a/include/linux/aio.h b/include/linux/aio.h
index 7ef8de6..a9931e2 100644
--- a/include/linux/aio.h
+++ b/include/linux/aio.h

@@ -206,21 +206,21 @@
 /* prototypes */
 extern unsigned aio_max_size;
 
-extern ssize_t FASTCALL(wait_on_sync_kiocb(struct kiocb *iocb));
-extern int FASTCALL(aio_put_req(struct kiocb *iocb));
-extern void FASTCALL(kick_iocb(struct kiocb *iocb));
-extern int FASTCALL(aio_complete(struct kiocb *iocb, long res, long res2));
-extern void FASTCALL(__put_ioctx(struct kioctx *ctx));
+extern ssize_t wait_on_sync_kiocb(struct kiocb *iocb);
+extern int aio_put_req(struct kiocb *iocb);
+extern void kick_iocb(struct kiocb *iocb);
+extern int aio_complete(struct kiocb *iocb, long res, long res2);
+extern void __put_ioctx(struct kioctx *ctx);
 struct mm_struct;
-extern void FASTCALL(exit_aio(struct mm_struct *mm));
+extern void exit_aio(struct mm_struct *mm);
 extern struct kioctx *lookup_ioctx(unsigned long ctx_id);
-extern int FASTCALL(io_submit_one(struct kioctx *ctx,
-			struct iocb __user *user_iocb, struct iocb *iocb));
+extern int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
+			 struct iocb *iocb);
 
 /* semi private, but used by the 32bit emulations: */
 struct kioctx *lookup_ioctx(unsigned long ctx_id);
-int FASTCALL(io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
-				  struct iocb *iocb));
+int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
+		  struct iocb *iocb);
 
 #define get_ioctx(kioctx) do {						\
 	BUG_ON(atomic_read(&(kioctx)->users) <= 0);			\

diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index e98801f..932eb02 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h

@@ -144,7 +144,7 @@
  * Declarations
  */
 
-void FASTCALL(mark_buffer_dirty(struct buffer_head *bh));
+void mark_buffer_dirty(struct buffer_head *bh);
 void init_buffer(struct buffer_head *, bh_end_io_t *, void *);
 void set_bh_page(struct buffer_head *bh,
 		struct page *page, unsigned long offset);
@@ -185,8 +185,8 @@
 void invalidate_bh_lrus(void);
 struct buffer_head *alloc_buffer_head(gfp_t gfp_flags);
 void free_buffer_head(struct buffer_head * bh);
-void FASTCALL(unlock_buffer(struct buffer_head *bh));
-void FASTCALL(__lock_buffer(struct buffer_head *bh));
+void unlock_buffer(struct buffer_head *bh);
+void __lock_buffer(struct buffer_head *bh);
 void ll_rw_block(int, int, struct buffer_head * bh[]);
 int sync_dirty_buffer(struct buffer_head *bh);
 int submit_bh(int, struct buffer_head *);

diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h
index 228235c..ac6aad9 100644
--- a/include/linux/cgroup_subsys.h
+++ b/include/linux/cgroup_subsys.h

@@ -25,7 +25,7 @@
 
 /* */
 
-#ifdef CONFIG_FAIR_CGROUP_SCHED
+#ifdef CONFIG_CGROUP_SCHED
 SUBSYS(cpu_cgroup)
 #endif
 

diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index f8c9a27..0a26be3 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h

@@ -26,8 +26,6 @@
 #define cpuset_current_mems_allowed (current->mems_allowed)
 void cpuset_init_current_mems_allowed(void);
 void cpuset_update_task_memory_state(void);
-#define cpuset_nodes_subset_current_mems_allowed(nodes) \
-		nodes_subset((nodes), current->mems_allowed)
 int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl);
 
 extern int __cpuset_zone_allowed_softwall(struct zone *z, gfp_t gfp_mask);
@@ -103,7 +101,6 @@
 #define cpuset_current_mems_allowed (node_states[N_HIGH_MEMORY])
 static inline void cpuset_init_current_mems_allowed(void) {}
 static inline void cpuset_update_task_memory_state(void) {}
-#define cpuset_nodes_subset_current_mems_allowed(nodes) (1)
 
 static inline int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl)
 {

diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
index b7558ec..25d62e6 100644
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h

@@ -70,8 +70,7 @@
 }
 
 /**
- * is_local_ether_addr - Determine if the Ethernet address is locally-assigned
- * one (IEEE 802).
+ * is_local_ether_addr - Determine if the Ethernet address is locally-assigned one (IEEE 802).
  * @addr: Pointer to a six-byte array containing the Ethernet address
  *
  * Return true if the address is a local address.

diff --git a/include/linux/file.h b/include/linux/file.h
index 56023c7..7239baa 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h

@@ -59,8 +59,8 @@
 
 extern struct kmem_cache *filp_cachep;
 
-extern void FASTCALL(__fput(struct file *));
-extern void FASTCALL(fput(struct file *));
+extern void __fput(struct file *);
+extern void fput(struct file *);
 
 struct file_operations;
 struct vfsmount;
@@ -77,13 +77,13 @@
 		fput(file);
 }
 
-extern struct file * FASTCALL(fget(unsigned int fd));
-extern struct file * FASTCALL(fget_light(unsigned int fd, int *fput_needed));
-extern void FASTCALL(set_close_on_exec(unsigned int fd, int flag));
+extern struct file *fget(unsigned int fd);
+extern struct file *fget_light(unsigned int fd, int *fput_needed);
+extern void set_close_on_exec(unsigned int fd, int flag);
 extern void put_filp(struct file *);
 extern int get_unused_fd(void);
 extern int get_unused_fd_flags(int flags);
-extern void FASTCALL(put_unused_fd(unsigned int fd));
+extern void put_unused_fd(unsigned int fd);
 struct kmem_cache;
 
 extern int expand_files(struct files_struct *, int nr);
@@ -110,12 +110,12 @@
  */
 #define fcheck(fd)	fcheck_files(current->files, fd)
 
-extern void FASTCALL(fd_install(unsigned int fd, struct file * file));
+extern void fd_install(unsigned int fd, struct file *file);
 
 struct task_struct;
 
 struct files_struct *get_files_struct(struct task_struct *);
-void FASTCALL(put_files_struct(struct files_struct *fs));
+void put_files_struct(struct files_struct *fs);
 void reset_files_struct(struct task_struct *, struct files_struct *);
 
 extern struct kmem_cache *files_cachep;

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 0c6ce51..164be9d 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h

@@ -172,8 +172,7 @@
 static inline void arch_alloc_page(struct page *page, int order) { }
 #endif
 
-extern struct page *
-FASTCALL(__alloc_pages(gfp_t, unsigned int, struct zonelist *));
+extern struct page *__alloc_pages(gfp_t, unsigned int, struct zonelist *);
 
 static inline struct page *alloc_pages_node(int nid, gfp_t gfp_mask,
 						unsigned int order)
@@ -209,8 +208,8 @@
 #endif
 #define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0)
 
-extern unsigned long FASTCALL(__get_free_pages(gfp_t gfp_mask, unsigned int order));
-extern unsigned long FASTCALL(get_zeroed_page(gfp_t gfp_mask));
+extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order);
+extern unsigned long get_zeroed_page(gfp_t gfp_mask);
 
 #define __get_free_page(gfp_mask) \
 		__get_free_pages((gfp_mask),0)
@@ -218,10 +217,10 @@
 #define __get_dma_pages(gfp_mask, order) \
 		__get_free_pages((gfp_mask) | GFP_DMA,(order))
 
-extern void FASTCALL(__free_pages(struct page *page, unsigned int order));
-extern void FASTCALL(free_pages(unsigned long addr, unsigned int order));
-extern void FASTCALL(free_hot_page(struct page *page));
-extern void FASTCALL(free_cold_page(struct page *page));
+extern void __free_pages(struct page *page, unsigned int order);
+extern void free_pages(unsigned long addr, unsigned int order);
+extern void free_hot_page(struct page *page);
+extern void free_cold_page(struct page *page);
 
 #define __free_page(page) __free_pages((page), 0)
 #define free_page(addr) free_pages((addr),0)

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 7ca198b..addca4c 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h

@@ -33,8 +33,8 @@
 void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed);
 
 extern unsigned long max_huge_pages;
+extern unsigned long sysctl_overcommit_huge_pages;
 extern unsigned long hugepages_treat_as_movable;
-extern unsigned long nr_overcommit_huge_pages;
 extern const unsigned long hugetlb_zero, hugetlb_infinity;
 extern int sysctl_hugetlb_shm_group;
 

diff --git a/include/linux/ide.h b/include/linux/ide.h
index acec99d..a3b69c1 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h

@@ -906,6 +906,8 @@
 					  IDE_TFLAG_IN_DEVICE,
 	/* force 16-bit I/O operations */
 	IDE_TFLAG_IO_16BIT		= (1 << 30),
+	/* ide_task_t was allocated using kmalloc() */
+	IDE_TFLAG_DYN			= (1 << 31),
 };
 
 struct ide_taskfile {
@@ -998,8 +1000,7 @@
 void ide_pci_setup_ports(struct pci_dev *, const struct ide_port_info *, int, u8 *);
 void ide_setup_pci_noise(struct pci_dev *, const struct ide_port_info *);
 
-/* FIXME: palm_bk3710 uses BLK_DEV_IDEDMA_PCI without BLK_DEV_IDEPCI! */
-#if defined(CONFIG_BLK_DEV_IDEPCI) && defined(CONFIG_BLK_DEV_IDEDMA_PCI)
+#ifdef CONFIG_BLK_DEV_IDEDMA_PCI
 void ide_hwif_setup_dma(ide_hwif_t *, const struct ide_port_info *);
 #else
 static inline void ide_hwif_setup_dma(ide_hwif_t *hwif,
@@ -1146,7 +1147,7 @@
 int ide_build_sglist(ide_drive_t *, struct request *);
 void ide_destroy_dmatable(ide_drive_t *);
 
-#ifdef CONFIG_BLK_DEV_IDEDMA_PCI
+#ifdef CONFIG_BLK_DEV_IDEDMA_SFF
 extern int ide_build_dmatable(ide_drive_t *, struct request *);
 extern int ide_release_dma(ide_hwif_t *);
 extern void ide_setup_dma(ide_hwif_t *, unsigned long);
@@ -1157,7 +1158,7 @@
 extern int __ide_dma_end(ide_drive_t *);
 extern void ide_dma_lost_irq(ide_drive_t *);
 extern void ide_dma_timeout(ide_drive_t *);
-#endif /* CONFIG_BLK_DEV_IDEDMA_PCI */
+#endif /* CONFIG_BLK_DEV_IDEDMA_SFF */
 
 #else
 static inline int ide_id_dma_bug(ide_drive_t *drive) { return 0; }
@@ -1171,7 +1172,7 @@
 static inline void ide_check_dma_crc(ide_drive_t *drive) { ; }
 #endif /* CONFIG_BLK_DEV_IDEDMA */
 
-#ifndef CONFIG_BLK_DEV_IDEDMA_PCI
+#ifndef CONFIG_BLK_DEV_IDEDMA_SFF
 static inline void ide_release_dma(ide_hwif_t *drive) {;}
 #endif
 
@@ -1294,7 +1295,7 @@
 static inline int hwif_to_node(ide_hwif_t *hwif)
 {
 	struct pci_dev *dev = to_pci_dev(hwif->dev);
-	return dev ? pcibus_to_node(dev->bus) : -1;
+	return hwif->dev ? pcibus_to_node(dev->bus) : -1;
 }
 
 static inline ide_drive_t *ide_get_paired_drive(ide_drive_t *drive)

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index dea7598..f8ab4ce 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h

@@ -273,8 +273,8 @@
 extern void open_softirq(int nr, void (*action)(struct softirq_action*), void *data);
 extern void softirq_init(void);
 #define __raise_softirq_irqoff(nr) do { or_softirq_pending(1UL << (nr)); } while (0)
-extern void FASTCALL(raise_softirq_irqoff(unsigned int nr));
-extern void FASTCALL(raise_softirq(unsigned int nr));
+extern void raise_softirq_irqoff(unsigned int nr);
+extern void raise_softirq(unsigned int nr);
 
 
 /* Tasklets --- multithreaded analogue of BHs.
@@ -341,7 +341,7 @@
 #define tasklet_unlock(t) do { } while (0)
 #endif
 
-extern void FASTCALL(__tasklet_schedule(struct tasklet_struct *t));
+extern void __tasklet_schedule(struct tasklet_struct *t);
 
 static inline void tasklet_schedule(struct tasklet_struct *t)
 {
@@ -349,7 +349,7 @@
 		__tasklet_schedule(t);
 }
 
-extern void FASTCALL(__tasklet_hi_schedule(struct tasklet_struct *t));
+extern void __tasklet_hi_schedule(struct tasklet_struct *t);
 
 static inline void tasklet_hi_schedule(struct tasklet_struct *t)
 {

diff --git a/include/linux/irq.h b/include/linux/irq.h
index bfd9efb..176e5e7 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h

@@ -285,7 +285,6 @@
 
 /*
  * Monolithic do_IRQ implementation.
- * (is an explicit fastcall, because i386 4KSTACKS calls it from assembly)
  */
 #ifndef CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ
 extern unsigned int __do_IRQ(unsigned int irq);

diff --git a/include/linux/linkage.h b/include/linux/linkage.h
index 3faf599..0592936 100644
--- a/include/linux/linkage.h
+++ b/include/linux/linkage.h

@@ -73,9 +73,4 @@
 #define ATTRIB_NORET  __attribute__((noreturn))
 #define NORET_AND     noreturn,
 
-#ifndef FASTCALL
-#define FASTCALL(x)	x
-#define fastcall
-#endif
-
 #endif

diff --git a/include/linux/marker.h b/include/linux/marker.h
index 5f36cf9..5df879d 100644
--- a/include/linux/marker.h
+++ b/include/linux/marker.h

@@ -19,16 +19,23 @@
 
 /**
  * marker_probe_func - Type of a marker probe function
- * @mdata: pointer of type struct marker
- * @private_data: caller site private data
+ * @probe_private: probe private data
+ * @call_private: call site private data
  * @fmt: format string
- * @...: variable argument list
+ * @args: variable argument list pointer. Use a pointer to overcome C's
+ *        inability to pass this around as a pointer in a portable manner in
+ *        the callee otherwise.
  *
  * Type of marker probe functions. They receive the mdata and need to parse the
  * format string to recover the variable argument list.
  */
-typedef void marker_probe_func(const struct marker *mdata,
-	void *private_data, const char *fmt, ...);
+typedef void marker_probe_func(void *probe_private, void *call_private,
+		const char *fmt, va_list *args);
+
+struct marker_probe_closure {
+	marker_probe_func *func;	/* Callback */
+	void *probe_private;		/* Private probe data */
+};
 
 struct marker {
 	const char *name;	/* Marker name */
@@ -36,8 +43,11 @@
 				 * variable argument list.
 				 */
 	char state;		/* Marker state. */
-	marker_probe_func *call;/* Probe handler function pointer */
-	void *private;		/* Private probe data */
+	char ptype;		/* probe type : 0 : single, 1 : multi */
+	void (*call)(const struct marker *mdata,	/* Probe wrapper */
+		void *call_private, const char *fmt, ...);
+	struct marker_probe_closure single;
+	struct marker_probe_closure *multi;
 } __attribute__((aligned(8)));
 
 #ifdef CONFIG_MARKERS
@@ -49,35 +59,31 @@
  * not add unwanted padding between the beginning of the section and the
  * structure. Force alignment to the same alignment as the section start.
  */
-#define __trace_mark(name, call_data, format, args...)			\
+#define __trace_mark(name, call_private, format, args...)		\
 	do {								\
-		static const char __mstrtab_name_##name[]		\
+		static const char __mstrtab_##name[]			\
 		__attribute__((section("__markers_strings")))		\
-		= #name;						\
-		static const char __mstrtab_format_##name[]		\
-		__attribute__((section("__markers_strings")))		\
-		= format;						\
+		= #name "\0" format;					\
 		static struct marker __mark_##name			\
 		__attribute__((section("__markers"), aligned(8))) =	\
-		{ __mstrtab_name_##name, __mstrtab_format_##name,	\
-		0, __mark_empty_function, NULL };			\
+		{ __mstrtab_##name, &__mstrtab_##name[sizeof(#name)],	\
+		0, 0, marker_probe_cb,					\
+		{ __mark_empty_function, NULL}, NULL };			\
 		__mark_check_format(format, ## args);			\
 		if (unlikely(__mark_##name.state)) {			\
-			preempt_disable();				\
 			(*__mark_##name.call)				\
-				(&__mark_##name, call_data,		\
+				(&__mark_##name, call_private,		\
 				format, ## args);			\
-			preempt_enable();				\
 		}							\
 	} while (0)
 
 extern void marker_update_probe_range(struct marker *begin,
-	struct marker *end, struct module *probe_module, int *refcount);
+	struct marker *end);
 #else /* !CONFIG_MARKERS */
-#define __trace_mark(name, call_data, format, args...) \
+#define __trace_mark(name, call_private, format, args...) \
 		__mark_check_format(format, ## args)
 static inline void marker_update_probe_range(struct marker *begin,
-	struct marker *end, struct module *probe_module, int *refcount)
+	struct marker *end)
 { }
 #endif /* CONFIG_MARKERS */
 
@@ -92,8 +98,6 @@
 #define trace_mark(name, format, args...) \
 	__trace_mark(name, NULL, format, ## args)
 
-#define MARK_MAX_FORMAT_LEN	1024
-
 /**
  * MARK_NOARGS - Format string for a marker with no argument.
  */
@@ -106,24 +110,30 @@
 
 extern marker_probe_func __mark_empty_function;
 
+extern void marker_probe_cb(const struct marker *mdata,
+	void *call_private, const char *fmt, ...);
+extern void marker_probe_cb_noarg(const struct marker *mdata,
+	void *call_private, const char *fmt, ...);
+
 /*
  * Connect a probe to a marker.
  * private data pointer must be a valid allocated memory address, or NULL.
  */
 extern int marker_probe_register(const char *name, const char *format,
-				marker_probe_func *probe, void *private);
+				marker_probe_func *probe, void *probe_private);
 
 /*
  * Returns the private data given to marker_probe_register.
  */
-extern void *marker_probe_unregister(const char *name);
+extern int marker_probe_unregister(const char *name,
+	marker_probe_func *probe, void *probe_private);
 /*
  * Unregister a marker by providing the registered private data.
  */
-extern void *marker_probe_unregister_private_data(void *private);
+extern int marker_probe_unregister_private_data(marker_probe_func *probe,
+	void *probe_private);
 
-extern int marker_arm(const char *name);
-extern int marker_disarm(const char *name);
-extern void *marker_get_private_data(const char *name);
+extern void *marker_get_private_data(const char *name, marker_probe_func *probe,
+	int num);
 
 #endif

diff --git a/include/linux/mm.h b/include/linux/mm.h
index e8abb38..26c7124 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h

@@ -786,7 +786,7 @@
 int __set_page_dirty_no_writeback(struct page *page);
 int redirty_page_for_writepage(struct writeback_control *wbc,
 				struct page *page);
-int FASTCALL(set_page_dirty(struct page *page));
+int set_page_dirty(struct page *page);
 int set_page_dirty_lock(struct page *page);
 int clear_page_dirty_for_io(struct page *page);
 
@@ -829,7 +829,7 @@
 
 int vma_wants_writenotify(struct vm_area_struct *vma);
 
-extern pte_t *FASTCALL(get_locked_pte(struct mm_struct *mm, unsigned long addr, spinlock_t **ptl));
+extern pte_t *get_locked_pte(struct mm_struct *mm, unsigned long addr, spinlock_t **ptl);
 
 #ifdef __PAGETABLE_PUD_FOLDED
 static inline int __pud_alloc(struct mm_struct *mm, pgd_t *pgd,

diff --git a/include/linux/module.h b/include/linux/module.h
index ac28e87..330bec0 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h

@@ -465,7 +465,7 @@
 
 extern void print_modules(void);
 
-extern void module_update_markers(struct module *probe_module, int *refcount);
+extern void module_update_markers(void);
 
 #else /* !CONFIG_MODULES... */
 #define EXPORT_SYMBOL(sym)

diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h
index 8126e55..ec62438 100644
--- a/include/linux/moduleparam.h
+++ b/include/linux/moduleparam.h

@@ -62,6 +62,16 @@
 	void *elem;
 };
 
+/* On alpha, ia64 and ppc64 relocations to global data cannot go into
+   read-only sections (which is part of respective UNIX ABI on these
+   platforms). So 'const' makes no sense and even causes compile failures
+   with some compilers. */
+#if defined(CONFIG_ALPHA) || defined(CONFIG_IA64) || defined(CONFIG_PPC64)
+#define __moduleparam_const
+#else
+#define __moduleparam_const const
+#endif
+
 /* This is the fundamental function for registering boot/module
    parameters.  perm sets the visibility in sysfs: 000 means it's
    not there, read bits mean it's readable, write bits mean it's
@@ -71,7 +81,7 @@
 	static int __param_perm_check_##name __attribute__((unused)) =	\
 	BUILD_BUG_ON_ZERO((perm) < 0 || (perm) > 0777 || ((perm) & 2));	\
 	static const char __param_str_##name[] = prefix #name;		\
-	static struct kernel_param const __param_##name			\
+	static struct kernel_param __moduleparam_const __param_##name	\
 	__used								\
     __attribute__ ((unused,__section__ ("__param"),aligned(sizeof(void *)))) \
 	= { __param_str_##name, perm, set, get, { arg } }

diff --git a/include/linux/mutex-debug.h b/include/linux/mutex-debug.h
index 2537285..731d77d 100644
--- a/include/linux/mutex-debug.h
+++ b/include/linux/mutex-debug.h

@@ -18,6 +18,6 @@
 	__mutex_init((mutex), #mutex, &__key);				\
 } while (0)
 
-extern void FASTCALL(mutex_destroy(struct mutex *lock));
+extern void mutex_destroy(struct mutex *lock);
 
 #endif

diff --git a/include/linux/namei.h b/include/linux/namei.h
index 4cb4f8d..c13e411 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h

@@ -62,13 +62,13 @@
 #define LOOKUP_ACCESS		(0x0400)
 #define LOOKUP_CHDIR		(0x0800)
 
-extern int FASTCALL(__user_walk(const char __user *, unsigned, struct nameidata *));
-extern int FASTCALL(__user_walk_fd(int dfd, const char __user *, unsigned, struct nameidata *));
+extern int __user_walk(const char __user *, unsigned, struct nameidata *);
+extern int __user_walk_fd(int dfd, const char __user *, unsigned, struct nameidata *);
 #define user_path_walk(name,nd) \
 	__user_walk_fd(AT_FDCWD, name, LOOKUP_FOLLOW, nd)
 #define user_path_walk_link(name,nd) \
 	__user_walk_fd(AT_FDCWD, name, 0, nd)
-extern int FASTCALL(path_lookup(const char *, unsigned, struct nameidata *));
+extern int path_lookup(const char *, unsigned, struct nameidata *);
 extern int vfs_path_lookup(struct dentry *, struct vfsmount *,
 			   const char *, unsigned int, struct nameidata *);
 extern void path_release(struct nameidata *);

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 047d432..7128a02 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h

@@ -322,7 +322,7 @@
 	NAPI_STATE_DISABLE,	/* Disable pending */
 };
 
-extern void FASTCALL(__napi_schedule(struct napi_struct *n));
+extern void __napi_schedule(struct napi_struct *n);
 
 static inline int napi_disable_pending(struct napi_struct *n)
 {

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 4b62a10..d2fca80 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h

@@ -156,10 +156,10 @@
 	return pgoff >> (PAGE_CACHE_SHIFT - PAGE_SHIFT);
 }
 
-extern void FASTCALL(__lock_page(struct page *page));
-extern int FASTCALL(__lock_page_killable(struct page *page));
-extern void FASTCALL(__lock_page_nosync(struct page *page));
-extern void FASTCALL(unlock_page(struct page *page));
+extern void __lock_page(struct page *page);
+extern int __lock_page_killable(struct page *page);
+extern void __lock_page_nosync(struct page *page);
+extern void unlock_page(struct page *page);
 
 /*
  * lock_page may only be called if we have the page's inode pinned.
@@ -199,7 +199,7 @@
  * This is exported only for wait_on_page_locked/wait_on_page_writeback.
  * Never use this directly!
  */
-extern void FASTCALL(wait_on_page_bit(struct page *page, int bit_nr));
+extern void wait_on_page_bit(struct page *page, int bit_nr);
 
 /* 
  * Wait for a page to be unlocked.

diff --git a/include/linux/pid.h b/include/linux/pid.h
index f84d532..c798081 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h

@@ -79,10 +79,9 @@
 	return pid;
 }
 
-extern void FASTCALL(put_pid(struct pid *pid));
-extern struct task_struct *FASTCALL(pid_task(struct pid *pid, enum pid_type));
-extern struct task_struct *FASTCALL(get_pid_task(struct pid *pid,
-						enum pid_type));
+extern void put_pid(struct pid *pid);
+extern struct task_struct *pid_task(struct pid *pid, enum pid_type);
+extern struct task_struct *get_pid_task(struct pid *pid, enum pid_type);
 
 extern struct pid *get_task_pid(struct task_struct *task, enum pid_type type);
 
@@ -90,11 +89,11 @@
  * attach_pid() and detach_pid() must be called with the tasklist_lock
  * write-held.
  */
-extern int FASTCALL(attach_pid(struct task_struct *task,
-				enum pid_type type, struct pid *pid));
-extern void FASTCALL(detach_pid(struct task_struct *task, enum pid_type));
-extern void FASTCALL(transfer_pid(struct task_struct *old,
-				  struct task_struct *new, enum pid_type));
+extern int attach_pid(struct task_struct *task, enum pid_type type,
+		      struct pid *pid);
+extern void detach_pid(struct task_struct *task, enum pid_type);
+extern void transfer_pid(struct task_struct *old, struct task_struct *new,
+			 enum pid_type);
 
 struct pid_namespace;
 extern struct pid_namespace init_pid_ns;
@@ -109,7 +108,7 @@
  *
  * see also find_task_by_pid() set in include/linux/sched.h
  */
-extern struct pid *FASTCALL(find_pid_ns(int nr, struct pid_namespace *ns));
+extern struct pid *find_pid_ns(int nr, struct pid_namespace *ns);
 extern struct pid *find_vpid(int nr);
 extern struct pid *find_pid(int nr);
 
@@ -121,7 +120,7 @@
 int next_pidmap(struct pid_namespace *pid_ns, int last);
 
 extern struct pid *alloc_pid(struct pid_namespace *ns);
-extern void FASTCALL(free_pid(struct pid *pid));
+extern void free_pid(struct pid *pid);
 
 /*
  * the helpers to get the pid's id seen from different namespaces

diff --git a/include/linux/rwsem-spinlock.h b/include/linux/rwsem-spinlock.h
index 813cee1..6c3c0f6c 100644
--- a/include/linux/rwsem-spinlock.h
+++ b/include/linux/rwsem-spinlock.h

@@ -60,14 +60,14 @@
 	__init_rwsem((sem), #sem, &__key);			\
 } while (0)
 
-extern void FASTCALL(__down_read(struct rw_semaphore *sem));
-extern int FASTCALL(__down_read_trylock(struct rw_semaphore *sem));
-extern void FASTCALL(__down_write(struct rw_semaphore *sem));
-extern void FASTCALL(__down_write_nested(struct rw_semaphore *sem, int subclass));
-extern int FASTCALL(__down_write_trylock(struct rw_semaphore *sem));
-extern void FASTCALL(__up_read(struct rw_semaphore *sem));
-extern void FASTCALL(__up_write(struct rw_semaphore *sem));
-extern void FASTCALL(__downgrade_write(struct rw_semaphore *sem));
+extern void __down_read(struct rw_semaphore *sem);
+extern int __down_read_trylock(struct rw_semaphore *sem);
+extern void __down_write(struct rw_semaphore *sem);
+extern void __down_write_nested(struct rw_semaphore *sem, int subclass);
+extern int __down_write_trylock(struct rw_semaphore *sem);
+extern void __up_read(struct rw_semaphore *sem);
+extern void __up_write(struct rw_semaphore *sem);
+extern void __downgrade_write(struct rw_semaphore *sem);
 
 static inline int rwsem_is_locked(struct rw_semaphore *sem)
 {

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 00e1441..e217d18 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h

@@ -323,7 +323,7 @@
 extern int in_sched_functions(unsigned long addr);
 
 #define	MAX_SCHEDULE_TIMEOUT	LONG_MAX
-extern signed long FASTCALL(schedule_timeout(signed long timeout));
+extern signed long schedule_timeout(signed long timeout);
 extern signed long schedule_timeout_interruptible(signed long timeout);
 extern signed long schedule_timeout_killable(signed long timeout);
 extern signed long schedule_timeout_uninterruptible(signed long timeout);
@@ -590,7 +590,7 @@
 	struct hlist_node uidhash_node;
 	uid_t uid;
 
-#ifdef CONFIG_FAIR_USER_SCHED
+#ifdef CONFIG_USER_SCHED
 	struct task_group *tg;
 #ifdef CONFIG_SYSFS
 	struct kobject kobj;
@@ -973,7 +973,7 @@
 	unsigned long timeout;
 	int nr_cpus_allowed;
 
-#ifdef CONFIG_FAIR_GROUP_SCHED
+#ifdef CONFIG_RT_GROUP_SCHED
 	struct sched_rt_entity	*parent;
 	/* rq on which this entity is (to be) queued: */
 	struct rt_rq		*rt_rq;
@@ -1541,8 +1541,6 @@
 extern unsigned int sysctl_sched_features;
 extern unsigned int sysctl_sched_migration_cost;
 extern unsigned int sysctl_sched_nr_migrate;
-extern unsigned int sysctl_sched_rt_period;
-extern unsigned int sysctl_sched_rt_ratio;
 #if defined(CONFIG_FAIR_GROUP_SCHED) && defined(CONFIG_SMP)
 extern unsigned int sysctl_sched_min_bal_int_shares;
 extern unsigned int sysctl_sched_max_bal_int_shares;
@@ -1552,6 +1550,8 @@
 		struct file *file, void __user *buffer, size_t *length,
 		loff_t *ppos);
 #endif
+extern unsigned int sysctl_sched_rt_period;
+extern int sysctl_sched_rt_runtime;
 
 extern unsigned int sysctl_sched_compat_yield;
 
@@ -1648,10 +1648,10 @@
 
 extern void do_timer(unsigned long ticks);
 
-extern int FASTCALL(wake_up_state(struct task_struct * tsk, unsigned int state));
-extern int FASTCALL(wake_up_process(struct task_struct * tsk));
-extern void FASTCALL(wake_up_new_task(struct task_struct * tsk,
-						unsigned long clone_flags));
+extern int wake_up_state(struct task_struct *tsk, unsigned int state);
+extern int wake_up_process(struct task_struct *tsk);
+extern void wake_up_new_task(struct task_struct *tsk,
+				unsigned long clone_flags);
 #ifdef CONFIG_SMP
  extern void kick_process(struct task_struct *tsk);
 #else
@@ -1741,7 +1741,7 @@
 extern struct mm_struct * mm_alloc(void);
 
 /* mmdrop drops the mm and the page tables */
-extern void FASTCALL(__mmdrop(struct mm_struct *));
+extern void __mmdrop(struct mm_struct *);
 static inline void mmdrop(struct mm_struct * mm)
 {
 	if (unlikely(atomic_dec_and_test(&mm->mm_count)))
@@ -1925,7 +1925,7 @@
 	return unlikely(test_tsk_thread_flag(p,TIF_SIGPENDING));
 }
 
-extern int FASTCALL(__fatal_signal_pending(struct task_struct *p));
+extern int __fatal_signal_pending(struct task_struct *p);
 
 static inline int fatal_signal_pending(struct task_struct *p)
 {
@@ -2027,16 +2027,22 @@
 
 extern void normalize_rt_tasks(void);
 
-#ifdef CONFIG_FAIR_GROUP_SCHED
+#ifdef CONFIG_GROUP_SCHED
 
 extern struct task_group init_task_group;
 
 extern struct task_group *sched_create_group(void);
 extern void sched_destroy_group(struct task_group *tg);
 extern void sched_move_task(struct task_struct *tsk);
+#ifdef CONFIG_FAIR_GROUP_SCHED
 extern int sched_group_set_shares(struct task_group *tg, unsigned long shares);
 extern unsigned long sched_group_shares(struct task_group *tg);
-
+#endif
+#ifdef CONFIG_RT_GROUP_SCHED
+extern int sched_group_set_rt_runtime(struct task_group *tg,
+				      long rt_runtime_us);
+extern long sched_group_rt_runtime(struct task_group *tg);
+#endif
 #endif
 
 #ifdef CONFIG_TASK_XACCT

diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 1a0b6cf..289942f 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h

@@ -149,6 +149,8 @@
 /* Freescale ColdFire */
 #define PORT_MCF	78
 
+#define PORT_SC26XX	79
+
 
 /* MN10300 on-chip UART numbers */
 #define PORT_MN10300		80

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 64c7710..64c9755 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h

@@ -409,16 +409,13 @@
  * for all cases without actually generating the checksum, so we just use a
  * static value.
  */
-static inline void
-svc_reserve_auth(struct svc_rqst *rqstp, int space)
+static inline void svc_reserve_auth(struct svc_rqst *rqstp, int space)
 {
-	int			added_space = 0;
+	int added_space = 0;
 
-	switch(rqstp->rq_authop->flavour) {
-		case RPC_AUTH_GSS:
-			added_space = RPC_MAX_AUTH_SIZE;
-	}
-	return svc_reserve(rqstp, space + added_space);
+	if (rqstp->rq_authop->flavour)
+		added_space = RPC_MAX_AUTH_SIZE;
+	svc_reserve(rqstp, space + added_space);
 }
 
 #endif /* SUNRPC_SVC_H */

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 3ca5c4b..878459a 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h

@@ -171,10 +171,10 @@
 
 
 /* linux/mm/swap.c */
-extern void FASTCALL(lru_cache_add(struct page *));
-extern void FASTCALL(lru_cache_add_active(struct page *));
-extern void FASTCALL(activate_page(struct page *));
-extern void FASTCALL(mark_page_accessed(struct page *));
+extern void lru_cache_add(struct page *);
+extern void lru_cache_add_active(struct page *);
+extern void activate_page(struct page *);
+extern void mark_page_accessed(struct page *);
 extern void lru_add_drain(void);
 extern int lru_add_drain_all(void);
 extern int rotate_reclaimable_page(struct page *page);

diff --git a/include/linux/wait.h b/include/linux/wait.h
index 33a2aa9..0081147 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h

@@ -117,9 +117,9 @@
  */
 #define is_sync_wait(wait)	(!(wait) || ((wait)->private))
 
-extern void FASTCALL(add_wait_queue(wait_queue_head_t *q, wait_queue_t * wait));
-extern void FASTCALL(add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t * wait));
-extern void FASTCALL(remove_wait_queue(wait_queue_head_t *q, wait_queue_t * wait));
+extern void add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait);
+extern void add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t *wait);
+extern void remove_wait_queue(wait_queue_head_t *q, wait_queue_t *wait);
 
 static inline void __add_wait_queue(wait_queue_head_t *head, wait_queue_t *new)
 {
@@ -141,16 +141,16 @@
 	list_del(&old->task_list);
 }
 
-void FASTCALL(__wake_up(wait_queue_head_t *q, unsigned int mode, int nr, void *key));
-extern void FASTCALL(__wake_up_locked(wait_queue_head_t *q, unsigned int mode));
-extern void FASTCALL(__wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr));
-void FASTCALL(__wake_up_bit(wait_queue_head_t *, void *, int));
-int FASTCALL(__wait_on_bit(wait_queue_head_t *, struct wait_bit_queue *, int (*)(void *), unsigned));
-int FASTCALL(__wait_on_bit_lock(wait_queue_head_t *, struct wait_bit_queue *, int (*)(void *), unsigned));
-void FASTCALL(wake_up_bit(void *, int));
-int FASTCALL(out_of_line_wait_on_bit(void *, int, int (*)(void *), unsigned));
-int FASTCALL(out_of_line_wait_on_bit_lock(void *, int, int (*)(void *), unsigned));
-wait_queue_head_t *FASTCALL(bit_waitqueue(void *, int));
+void __wake_up(wait_queue_head_t *q, unsigned int mode, int nr, void *key);
+extern void __wake_up_locked(wait_queue_head_t *q, unsigned int mode);
+extern void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr);
+void __wake_up_bit(wait_queue_head_t *, void *, int);
+int __wait_on_bit(wait_queue_head_t *, struct wait_bit_queue *, int (*)(void *), unsigned);
+int __wait_on_bit_lock(wait_queue_head_t *, struct wait_bit_queue *, int (*)(void *), unsigned);
+void wake_up_bit(void *, int);
+int out_of_line_wait_on_bit(void *, int, int (*)(void *), unsigned);
+int out_of_line_wait_on_bit_lock(void *, int, int (*)(void *), unsigned);
+wait_queue_head_t *bit_waitqueue(void *, int);
 
 #define wake_up(x)			__wake_up(x, TASK_NORMAL, 1, NULL)
 #define wake_up_nr(x, nr)		__wake_up(x, TASK_NORMAL, nr, NULL)
@@ -437,11 +437,9 @@
 /*
  * Waitqueues which are removed from the waitqueue_head at wakeup time
  */
-void FASTCALL(prepare_to_wait(wait_queue_head_t *q,
-				wait_queue_t *wait, int state));
-void FASTCALL(prepare_to_wait_exclusive(wait_queue_head_t *q,
-				wait_queue_t *wait, int state));
-void FASTCALL(finish_wait(wait_queue_head_t *q, wait_queue_t *wait));
+void prepare_to_wait(wait_queue_head_t *q, wait_queue_t *wait, int state);
+void prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int state);
+void finish_wait(wait_queue_head_t *q, wait_queue_t *wait);
 int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key);
 int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *key);
 

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 7f28c32..542526c 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h

@@ -178,18 +178,17 @@
 
 extern void destroy_workqueue(struct workqueue_struct *wq);
 
-extern int FASTCALL(queue_work(struct workqueue_struct *wq, struct work_struct *work));
-extern int FASTCALL(queue_delayed_work(struct workqueue_struct *wq,
-			struct delayed_work *work, unsigned long delay));
+extern int queue_work(struct workqueue_struct *wq, struct work_struct *work);
+extern int queue_delayed_work(struct workqueue_struct *wq,
+			struct delayed_work *work, unsigned long delay);
 extern int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
 			struct delayed_work *work, unsigned long delay);
 
-extern void FASTCALL(flush_workqueue(struct workqueue_struct *wq));
+extern void flush_workqueue(struct workqueue_struct *wq);
 extern void flush_scheduled_work(void);
 
-extern int FASTCALL(schedule_work(struct work_struct *work));
-extern int FASTCALL(schedule_delayed_work(struct delayed_work *work,
-					unsigned long delay));
+extern int schedule_work(struct work_struct *work);
+extern int schedule_delayed_work(struct delayed_work *work, unsigned long delay);
 extern int schedule_delayed_work_on(int cpu, struct delayed_work *work,
 					unsigned long delay);
 extern int schedule_on_each_cpu(work_func_t func);

diff --git a/init/Kconfig b/init/Kconfig
index 824d48c..dcef8b5 100644
--- a/init/Kconfig
+++ b/init/Kconfig

@@ -311,25 +311,36 @@
 
 	  Say N if unsure.
 
-config FAIR_GROUP_SCHED
-	bool "Fair group CPU scheduler"
+config GROUP_SCHED
+	bool "Group CPU scheduler"
 	default y
 	help
 	  This feature lets CPU scheduler recognize task groups and control CPU
 	  bandwidth allocation to such task groups.
 
-choice
-	depends on FAIR_GROUP_SCHED
-	prompt "Basis for grouping tasks"
-	default FAIR_USER_SCHED
+config FAIR_GROUP_SCHED
+	bool "Group scheduling for SCHED_OTHER"
+	depends on GROUP_SCHED
+	default y
 
-config FAIR_USER_SCHED
+config RT_GROUP_SCHED
+	bool "Group scheduling for SCHED_RR/FIFO"
+	depends on EXPERIMENTAL
+	depends on GROUP_SCHED
+	default n
+
+choice
+	depends on GROUP_SCHED
+	prompt "Basis for grouping tasks"
+	default USER_SCHED
+
+config USER_SCHED
 	bool "user id"
 	help
 	  This option will choose userid as the basis for grouping
 	  tasks, thus providing equal CPU bandwidth to each user.
 
-config FAIR_CGROUP_SCHED
+config CGROUP_SCHED
 	bool "Control groups"
  	depends on CGROUPS
  	help

diff --git a/init/Makefile b/init/Makefile
index c5f157c..4a243df 100644
--- a/init/Makefile
+++ b/init/Makefile

@@ -27,6 +27,7 @@
 # mkcompile_h will make sure to only update the
 # actual file if its content has changed.
 
+       chk_compile.h = :
  quiet_chk_compile.h = echo '  CHK     $@'
 silent_chk_compile.h = :
 include/linux/compile.h: FORCE

diff --git a/kernel/marker.c b/kernel/marker.c
index 5323cfa..c4c2cd8 100644
--- a/kernel/marker.c
+++ b/kernel/marker.c

@@ -27,21 +27,14 @@
 extern struct marker __start___markers[];
 extern struct marker __stop___markers[];
 
-/*
- * markers_mutex nests inside module_mutex. Markers mutex protects the builtin
- * and module markers, the hash table and deferred_sync.
- */
-static DEFINE_MUTEX(markers_mutex);
+/* Set to 1 to enable marker debug output */
+const int marker_debug;
 
 /*
- * Marker deferred synchronization.
- * Upon marker probe_unregister, we delay call to synchronize_sched() to
- * accelerate mass unregistration (only when there is no more reference to a
- * given module do we call synchronize_sched()). However, we need to make sure
- * every critical region has ended before we re-arm a marker that has been
- * unregistered and then registered back with a different probe data.
+ * markers_mutex nests inside module_mutex. Markers mutex protects the builtin
+ * and module markers and the hash table.
  */
-static int deferred_sync;
+static DEFINE_MUTEX(markers_mutex);
 
 /*
  * Marker hash table, containing the active markers.
@@ -50,12 +43,26 @@
 #define MARKER_HASH_BITS 6
 #define MARKER_TABLE_SIZE (1 << MARKER_HASH_BITS)
 
+/*
+ * Note about RCU :
+ * It is used to make sure every handler has finished using its private data
+ * between two consecutive operation (add or remove) on a given marker.  It is
+ * also used to delay the free of multiple probes array until a quiescent state
+ * is reached.
+ * marker entries modifications are protected by the markers_mutex.
+ */
 struct marker_entry {
 	struct hlist_node hlist;
 	char *format;
-	marker_probe_func *probe;
-	void *private;
+	void (*call)(const struct marker *mdata,	/* Probe wrapper */
+		void *call_private, const char *fmt, ...);
+	struct marker_probe_closure single;
+	struct marker_probe_closure *multi;
 	int refcount;	/* Number of times armed. 0 if disarmed. */
+	struct rcu_head rcu;
+	void *oldptr;
+	char rcu_pending:1;
+	char ptype:1;
 	char name[0];	/* Contains name'\0'format'\0' */
 };
 
@@ -63,7 +70,8 @@
 
 /**
  * __mark_empty_function - Empty probe callback
- * @mdata: pointer of type const struct marker
+ * @probe_private: probe private data
+ * @call_private: call site private data
  * @fmt: format string
  * @...: variable argument list
  *
@@ -72,13 +80,267 @@
  * though the function pointer change and the marker enabling are two distinct
  * operations that modifies the execution flow of preemptible code.
  */
-void __mark_empty_function(const struct marker *mdata, void *private,
-	const char *fmt, ...)
+void __mark_empty_function(void *probe_private, void *call_private,
+	const char *fmt, va_list *args)
 {
 }
 EXPORT_SYMBOL_GPL(__mark_empty_function);
 
 /*
+ * marker_probe_cb Callback that prepares the variable argument list for probes.
+ * @mdata: pointer of type struct marker
+ * @call_private: caller site private data
+ * @fmt: format string
+ * @...:  Variable argument list.
+ *
+ * Since we do not use "typical" pointer based RCU in the 1 argument case, we
+ * need to put a full smp_rmb() in this branch. This is why we do not use
+ * rcu_dereference() for the pointer read.
+ */
+void marker_probe_cb(const struct marker *mdata, void *call_private,
+	const char *fmt, ...)
+{
+	va_list args;
+	char ptype;
+
+	/*
+	 * disabling preemption to make sure the teardown of the callbacks can
+	 * be done correctly when they are in modules and they insure RCU read
+	 * coherency.
+	 */
+	preempt_disable();
+	ptype = ACCESS_ONCE(mdata->ptype);
+	if (likely(!ptype)) {
+		marker_probe_func *func;
+		/* Must read the ptype before ptr. They are not data dependant,
+		 * so we put an explicit smp_rmb() here. */
+		smp_rmb();
+		func = ACCESS_ONCE(mdata->single.func);
+		/* Must read the ptr before private data. They are not data
+		 * dependant, so we put an explicit smp_rmb() here. */
+		smp_rmb();
+		va_start(args, fmt);
+		func(mdata->single.probe_private, call_private, fmt, &args);
+		va_end(args);
+	} else {
+		struct marker_probe_closure *multi;
+		int i;
+		/*
+		 * multi points to an array, therefore accessing the array
+		 * depends on reading multi. However, even in this case,
+		 * we must insure that the pointer is read _before_ the array
+		 * data. Same as rcu_dereference, but we need a full smp_rmb()
+		 * in the fast path, so put the explicit barrier here.
+		 */
+		smp_read_barrier_depends();
+		multi = ACCESS_ONCE(mdata->multi);
+		for (i = 0; multi[i].func; i++) {
+			va_start(args, fmt);
+			multi[i].func(multi[i].probe_private, call_private, fmt,
+				&args);
+			va_end(args);
+		}
+	}
+	preempt_enable();
+}
+EXPORT_SYMBOL_GPL(marker_probe_cb);
+
+/*
+ * marker_probe_cb Callback that does not prepare the variable argument list.
+ * @mdata: pointer of type struct marker
+ * @call_private: caller site private data
+ * @fmt: format string
+ * @...:  Variable argument list.
+ *
+ * Should be connected to markers "MARK_NOARGS".
+ */
+void marker_probe_cb_noarg(const struct marker *mdata,
+	void *call_private, const char *fmt, ...)
+{
+	va_list args;	/* not initialized */
+	char ptype;
+
+	preempt_disable();
+	ptype = ACCESS_ONCE(mdata->ptype);
+	if (likely(!ptype)) {
+		marker_probe_func *func;
+		/* Must read the ptype before ptr. They are not data dependant,
+		 * so we put an explicit smp_rmb() here. */
+		smp_rmb();
+		func = ACCESS_ONCE(mdata->single.func);
+		/* Must read the ptr before private data. They are not data
+		 * dependant, so we put an explicit smp_rmb() here. */
+		smp_rmb();
+		func(mdata->single.probe_private, call_private, fmt, &args);
+	} else {
+		struct marker_probe_closure *multi;
+		int i;
+		/*
+		 * multi points to an array, therefore accessing the array
+		 * depends on reading multi. However, even in this case,
+		 * we must insure that the pointer is read _before_ the array
+		 * data. Same as rcu_dereference, but we need a full smp_rmb()
+		 * in the fast path, so put the explicit barrier here.
+		 */
+		smp_read_barrier_depends();
+		multi = ACCESS_ONCE(mdata->multi);
+		for (i = 0; multi[i].func; i++)
+			multi[i].func(multi[i].probe_private, call_private, fmt,
+				&args);
+	}
+	preempt_enable();
+}
+EXPORT_SYMBOL_GPL(marker_probe_cb_noarg);
+
+static void free_old_closure(struct rcu_head *head)
+{
+	struct marker_entry *entry = container_of(head,
+		struct marker_entry, rcu);
+	kfree(entry->oldptr);
+	/* Make sure we free the data before setting the pending flag to 0 */
+	smp_wmb();
+	entry->rcu_pending = 0;
+}
+
+static void debug_print_probes(struct marker_entry *entry)
+{
+	int i;
+
+	if (!marker_debug)
+		return;
+
+	if (!entry->ptype) {
+		printk(KERN_DEBUG "Single probe : %p %p\n",
+			entry->single.func,
+			entry->single.probe_private);
+	} else {
+		for (i = 0; entry->multi[i].func; i++)
+			printk(KERN_DEBUG "Multi probe %d : %p %p\n", i,
+				entry->multi[i].func,
+				entry->multi[i].probe_private);
+	}
+}
+
+static struct marker_probe_closure *
+marker_entry_add_probe(struct marker_entry *entry,
+		marker_probe_func *probe, void *probe_private)
+{
+	int nr_probes = 0;
+	struct marker_probe_closure *old, *new;
+
+	WARN_ON(!probe);
+
+	debug_print_probes(entry);
+	old = entry->multi;
+	if (!entry->ptype) {
+		if (entry->single.func == probe &&
+				entry->single.probe_private == probe_private)
+			return ERR_PTR(-EBUSY);
+		if (entry->single.func == __mark_empty_function) {
+			/* 0 -> 1 probes */
+			entry->single.func = probe;
+			entry->single.probe_private = probe_private;
+			entry->refcount = 1;
+			entry->ptype = 0;
+			debug_print_probes(entry);
+			return NULL;
+		} else {
+			/* 1 -> 2 probes */
+			nr_probes = 1;
+			old = NULL;
+		}
+	} else {
+		/* (N -> N+1), (N != 0, 1) probes */
+		for (nr_probes = 0; old[nr_probes].func; nr_probes++)
+			if (old[nr_probes].func == probe
+					&& old[nr_probes].probe_private
+						== probe_private)
+				return ERR_PTR(-EBUSY);
+	}
+	/* + 2 : one for new probe, one for NULL func */
+	new = kzalloc((nr_probes + 2) * sizeof(struct marker_probe_closure),
+			GFP_KERNEL);
+	if (new == NULL)
+		return ERR_PTR(-ENOMEM);
+	if (!old)
+		new[0] = entry->single;
+	else
+		memcpy(new, old,
+			nr_probes * sizeof(struct marker_probe_closure));
+	new[nr_probes].func = probe;
+	new[nr_probes].probe_private = probe_private;
+	entry->refcount = nr_probes + 1;
+	entry->multi = new;
+	entry->ptype = 1;
+	debug_print_probes(entry);
+	return old;
+}
+
+static struct marker_probe_closure *
+marker_entry_remove_probe(struct marker_entry *entry,
+		marker_probe_func *probe, void *probe_private)
+{
+	int nr_probes = 0, nr_del = 0, i;
+	struct marker_probe_closure *old, *new;
+
+	old = entry->multi;
+
+	debug_print_probes(entry);
+	if (!entry->ptype) {
+		/* 0 -> N is an error */
+		WARN_ON(entry->single.func == __mark_empty_function);
+		/* 1 -> 0 probes */
+		WARN_ON(probe && entry->single.func != probe);
+		WARN_ON(entry->single.probe_private != probe_private);
+		entry->single.func = __mark_empty_function;
+		entry->refcount = 0;
+		entry->ptype = 0;
+		debug_print_probes(entry);
+		return NULL;
+	} else {
+		/* (N -> M), (N > 1, M >= 0) probes */
+		for (nr_probes = 0; old[nr_probes].func; nr_probes++) {
+			if ((!probe || old[nr_probes].func == probe)
+					&& old[nr_probes].probe_private
+						== probe_private)
+				nr_del++;
+		}
+	}
+
+	if (nr_probes - nr_del == 0) {
+		/* N -> 0, (N > 1) */
+		entry->single.func = __mark_empty_function;
+		entry->refcount = 0;
+		entry->ptype = 0;
+	} else if (nr_probes - nr_del == 1) {
+		/* N -> 1, (N > 1) */
+		for (i = 0; old[i].func; i++)
+			if ((probe && old[i].func != probe) ||
+					old[i].probe_private != probe_private)
+				entry->single = old[i];
+		entry->refcount = 1;
+		entry->ptype = 0;
+	} else {
+		int j = 0;
+		/* N -> M, (N > 1, M > 1) */
+		/* + 1 for NULL */
+		new = kzalloc((nr_probes - nr_del + 1)
+			* sizeof(struct marker_probe_closure), GFP_KERNEL);
+		if (new == NULL)
+			return ERR_PTR(-ENOMEM);
+		for (i = 0; old[i].func; i++)
+			if ((probe && old[i].func != probe) ||
+					old[i].probe_private != probe_private)
+				new[j++] = old[i];
+		entry->refcount = nr_probes - nr_del;
+		entry->ptype = 1;
+		entry->multi = new;
+	}
+	debug_print_probes(entry);
+	return old;
+}
+
+/*
  * Get marker if the marker is present in the marker hash table.
  * Must be called with markers_mutex held.
  * Returns NULL if not present.
@@ -102,8 +364,7 @@
  * Add the marker to the marker hash table. Must be called with markers_mutex
  * held.
  */
-static int add_marker(const char *name, const char *format,
-	marker_probe_func *probe, void *private)
+static struct marker_entry *add_marker(const char *name, const char *format)
 {
 	struct hlist_head *head;
 	struct hlist_node *node;
@@ -118,9 +379,8 @@
 	hlist_for_each_entry(e, node, head, hlist) {
 		if (!strcmp(name, e->name)) {
 			printk(KERN_NOTICE
-				"Marker %s busy, probe %p already installed\n",
-				name, e->probe);
-			return -EBUSY;	/* Already there */
+				"Marker %s busy\n", name);
+			return ERR_PTR(-EBUSY);	/* Already there */
 		}
 	}
 	/*
@@ -130,34 +390,42 @@
 	e = kmalloc(sizeof(struct marker_entry) + name_len + format_len,
 			GFP_KERNEL);
 	if (!e)
-		return -ENOMEM;
+		return ERR_PTR(-ENOMEM);
 	memcpy(&e->name[0], name, name_len);
 	if (format) {
 		e->format = &e->name[name_len];
 		memcpy(e->format, format, format_len);
+		if (strcmp(e->format, MARK_NOARGS) == 0)
+			e->call = marker_probe_cb_noarg;
+		else
+			e->call = marker_probe_cb;
 		trace_mark(core_marker_format, "name %s format %s",
 				e->name, e->format);
-	} else
+	} else {
 		e->format = NULL;
-	e->probe = probe;
-	e->private = private;
+		e->call = marker_probe_cb;
+	}
+	e->single.func = __mark_empty_function;
+	e->single.probe_private = NULL;
+	e->multi = NULL;
+	e->ptype = 0;
 	e->refcount = 0;
+	e->rcu_pending = 0;
 	hlist_add_head(&e->hlist, head);
-	return 0;
+	return e;
 }
 
 /*
  * Remove the marker from the marker hash table. Must be called with mutex_lock
  * held.
  */
-static void *remove_marker(const char *name)
+static int remove_marker(const char *name)
 {
 	struct hlist_head *head;
 	struct hlist_node *node;
 	struct marker_entry *e;
 	int found = 0;
 	size_t len = strlen(name) + 1;
-	void *private = NULL;
 	u32 hash = jhash(name, len-1, 0);
 
 	head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
@@ -167,12 +435,16 @@
 			break;
 		}
 	}
-	if (found) {
-		private = e->private;
-		hlist_del(&e->hlist);
-		kfree(e);
-	}
-	return private;
+	if (!found)
+		return -ENOENT;
+	if (e->single.func != __mark_empty_function)
+		return -EBUSY;
+	hlist_del(&e->hlist);
+	/* Make sure the call_rcu has been executed */
+	if (e->rcu_pending)
+		rcu_barrier();
+	kfree(e);
+	return 0;
 }
 
 /*
@@ -184,6 +456,7 @@
 	size_t name_len = strlen((*entry)->name) + 1;
 	size_t format_len = strlen(format) + 1;
 
+
 	e = kmalloc(sizeof(struct marker_entry) + name_len + format_len,
 			GFP_KERNEL);
 	if (!e)
@@ -191,11 +464,20 @@
 	memcpy(&e->name[0], (*entry)->name, name_len);
 	e->format = &e->name[name_len];
 	memcpy(e->format, format, format_len);
-	e->probe = (*entry)->probe;
-	e->private = (*entry)->private;
+	if (strcmp(e->format, MARK_NOARGS) == 0)
+		e->call = marker_probe_cb_noarg;
+	else
+		e->call = marker_probe_cb;
+	e->single = (*entry)->single;
+	e->multi = (*entry)->multi;
+	e->ptype = (*entry)->ptype;
 	e->refcount = (*entry)->refcount;
+	e->rcu_pending = 0;
 	hlist_add_before(&e->hlist, &(*entry)->hlist);
 	hlist_del(&(*entry)->hlist);
+	/* Make sure the call_rcu has been executed */
+	if ((*entry)->rcu_pending)
+		rcu_barrier();
 	kfree(*entry);
 	*entry = e;
 	trace_mark(core_marker_format, "name %s format %s",
@@ -206,7 +488,8 @@
 /*
  * Sets the probe callback corresponding to one marker.
  */
-static int set_marker(struct marker_entry **entry, struct marker *elem)
+static int set_marker(struct marker_entry **entry, struct marker *elem,
+		int active)
 {
 	int ret;
 	WARN_ON(strcmp((*entry)->name, elem->name) != 0);
@@ -226,9 +509,43 @@
 		if (ret)
 			return ret;
 	}
-	elem->call = (*entry)->probe;
-	elem->private = (*entry)->private;
-	elem->state = 1;
+
+	/*
+	 * probe_cb setup (statically known) is done here. It is
+	 * asynchronous with the rest of execution, therefore we only
+	 * pass from a "safe" callback (with argument) to an "unsafe"
+	 * callback (does not set arguments).
+	 */
+	elem->call = (*entry)->call;
+	/*
+	 * Sanity check :
+	 * We only update the single probe private data when the ptr is
+	 * set to a _non_ single probe! (0 -> 1 and N -> 1, N != 1)
+	 */
+	WARN_ON(elem->single.func != __mark_empty_function
+		&& elem->single.probe_private
+		!= (*entry)->single.probe_private &&
+		!elem->ptype);
+	elem->single.probe_private = (*entry)->single.probe_private;
+	/*
+	 * Make sure the private data is valid when we update the
+	 * single probe ptr.
+	 */
+	smp_wmb();
+	elem->single.func = (*entry)->single.func;
+	/*
+	 * We also make sure that the new probe callbacks array is consistent
+	 * before setting a pointer to it.
+	 */
+	rcu_assign_pointer(elem->multi, (*entry)->multi);
+	/*
+	 * Update the function or multi probe array pointer before setting the
+	 * ptype.
+	 */
+	smp_wmb();
+	elem->ptype = (*entry)->ptype;
+	elem->state = active;
+
 	return 0;
 }
 
@@ -240,8 +557,12 @@
  */
 static void disable_marker(struct marker *elem)
 {
+	/* leave "call" as is. It is known statically. */
 	elem->state = 0;
-	elem->call = __mark_empty_function;
+	elem->single.func = __mark_empty_function;
+	/* Update the function before setting the ptype */
+	smp_wmb();
+	elem->ptype = 0;	/* single probe */
 	/*
 	 * Leave the private data and id there, because removal is racy and
 	 * should be done only after a synchronize_sched(). These are never used
@@ -253,14 +574,11 @@
  * marker_update_probe_range - Update a probe range
  * @begin: beginning of the range
  * @end: end of the range
- * @probe_module: module address of the probe being updated
- * @refcount: number of references left to the given probe_module (out)
  *
  * Updates the probe callback corresponding to a range of markers.
  */
 void marker_update_probe_range(struct marker *begin,
-	struct marker *end, struct module *probe_module,
-	int *refcount)
+	struct marker *end)
 {
 	struct marker *iter;
 	struct marker_entry *mark_entry;
@@ -268,15 +586,12 @@
 	mutex_lock(&markers_mutex);
 	for (iter = begin; iter < end; iter++) {
 		mark_entry = get_marker(iter->name);
-		if (mark_entry && mark_entry->refcount) {
-			set_marker(&mark_entry, iter);
+		if (mark_entry) {
+			set_marker(&mark_entry, iter,
+					!!mark_entry->refcount);
 			/*
 			 * ignore error, continue
 			 */
-			if (probe_module)
-				if (probe_module ==
-			__module_text_address((unsigned long)mark_entry->probe))
-					(*refcount)++;
 		} else {
 			disable_marker(iter);
 		}
@@ -289,20 +604,27 @@
  * Issues a synchronize_sched() when no reference to the module passed
  * as parameter is found in the probes so the probe module can be
  * safely unloaded from now on.
+ *
+ * Internal callback only changed before the first probe is connected to it.
+ * Single probe private data can only be changed on 0 -> 1 and 2 -> 1
+ * transitions.  All other transitions will leave the old private data valid.
+ * This makes the non-atomicity of the callback/private data updates valid.
+ *
+ * "special case" updates :
+ * 0 -> 1 callback
+ * 1 -> 0 callback
+ * 1 -> 2 callbacks
+ * 2 -> 1 callbacks
+ * Other updates all behave the same, just like the 2 -> 3 or 3 -> 2 updates.
+ * Site effect : marker_set_format may delete the marker entry (creating a
+ * replacement).
  */
-static void marker_update_probes(struct module *probe_module)
+static void marker_update_probes(void)
 {
-	int refcount = 0;
-
 	/* Core kernel markers */
-	marker_update_probe_range(__start___markers,
-			__stop___markers, probe_module, &refcount);
+	marker_update_probe_range(__start___markers, __stop___markers);
 	/* Markers in modules. */
-	module_update_markers(probe_module, &refcount);
-	if (probe_module && refcount == 0) {
-		synchronize_sched();
-		deferred_sync = 0;
-	}
+	module_update_markers();
 }
 
 /**
@@ -310,33 +632,49 @@
  * @name: marker name
  * @format: format string
  * @probe: probe handler
- * @private: probe private data
+ * @probe_private: probe private data
  *
  * private data must be a valid allocated memory address, or NULL.
  * Returns 0 if ok, error value on error.
+ * The probe address must at least be aligned on the architecture pointer size.
  */
 int marker_probe_register(const char *name, const char *format,
-			marker_probe_func *probe, void *private)
+			marker_probe_func *probe, void *probe_private)
 {
 	struct marker_entry *entry;
 	int ret = 0;
+	struct marker_probe_closure *old;
 
 	mutex_lock(&markers_mutex);
 	entry = get_marker(name);
-	if (entry && entry->refcount) {
-		ret = -EBUSY;
+	if (!entry) {
+		entry = add_marker(name, format);
+		if (IS_ERR(entry)) {
+			ret = PTR_ERR(entry);
+			goto end;
+		}
+	}
+	/*
+	 * If we detect that a call_rcu is pending for this marker,
+	 * make sure it's executed now.
+	 */
+	if (entry->rcu_pending)
+		rcu_barrier();
+	old = marker_entry_add_probe(entry, probe, probe_private);
+	if (IS_ERR(old)) {
+		ret = PTR_ERR(old);
 		goto end;
 	}
-	if (deferred_sync) {
-		synchronize_sched();
-		deferred_sync = 0;
-	}
-	ret = add_marker(name, format, probe, private);
-	if (ret)
-		goto end;
 	mutex_unlock(&markers_mutex);
-	marker_update_probes(NULL);
-	return ret;
+	marker_update_probes();		/* may update entry */
+	mutex_lock(&markers_mutex);
+	entry = get_marker(name);
+	WARN_ON(!entry);
+	entry->oldptr = old;
+	entry->rcu_pending = 1;
+	/* write rcu_pending before calling the RCU callback */
+	smp_wmb();
+	call_rcu(&entry->rcu, free_old_closure);
 end:
 	mutex_unlock(&markers_mutex);
 	return ret;
@@ -346,171 +684,166 @@
 /**
  * marker_probe_unregister -  Disconnect a probe from a marker
  * @name: marker name
+ * @probe: probe function pointer
+ * @probe_private: probe private data
  *
  * Returns the private data given to marker_probe_register, or an ERR_PTR().
+ * We do not need to call a synchronize_sched to make sure the probes have
+ * finished running before doing a module unload, because the module unload
+ * itself uses stop_machine(), which insures that every preempt disabled section
+ * have finished.
  */
-void *marker_probe_unregister(const char *name)
+int marker_probe_unregister(const char *name,
+	marker_probe_func *probe, void *probe_private)
 {
-	struct module *probe_module;
 	struct marker_entry *entry;
-	void *private;
+	struct marker_probe_closure *old;
+	int ret = 0;
 
 	mutex_lock(&markers_mutex);
 	entry = get_marker(name);
 	if (!entry) {
-		private = ERR_PTR(-ENOENT);
+		ret = -ENOENT;
 		goto end;
 	}
-	entry->refcount = 0;
-	/* In what module is the probe handler ? */
-	probe_module = __module_text_address((unsigned long)entry->probe);
-	private = remove_marker(name);
-	deferred_sync = 1;
+	if (entry->rcu_pending)
+		rcu_barrier();
+	old = marker_entry_remove_probe(entry, probe, probe_private);
 	mutex_unlock(&markers_mutex);
-	marker_update_probes(probe_module);
-	return private;
+	marker_update_probes();		/* may update entry */
+	mutex_lock(&markers_mutex);
+	entry = get_marker(name);
+	entry->oldptr = old;
+	entry->rcu_pending = 1;
+	/* write rcu_pending before calling the RCU callback */
+	smp_wmb();
+	call_rcu(&entry->rcu, free_old_closure);
+	remove_marker(name);	/* Ignore busy error message */
 end:
 	mutex_unlock(&markers_mutex);
-	return private;
+	return ret;
 }
 EXPORT_SYMBOL_GPL(marker_probe_unregister);
 
-/**
- * marker_probe_unregister_private_data -  Disconnect a probe from a marker
- * @private: probe private data
- *
- * Unregister a marker by providing the registered private data.
- * Returns the private data given to marker_probe_register, or an ERR_PTR().
- */
-void *marker_probe_unregister_private_data(void *private)
+static struct marker_entry *
+get_marker_from_private_data(marker_probe_func *probe, void *probe_private)
 {
-	struct module *probe_module;
+	struct marker_entry *entry;
+	unsigned int i;
 	struct hlist_head *head;
 	struct hlist_node *node;
-	struct marker_entry *entry;
-	int found = 0;
-	unsigned int i;
 
-	mutex_lock(&markers_mutex);
 	for (i = 0; i < MARKER_TABLE_SIZE; i++) {
 		head = &marker_table[i];
 		hlist_for_each_entry(entry, node, head, hlist) {
-			if (entry->private == private) {
-				found = 1;
-				goto iter_end;
+			if (!entry->ptype) {
+				if (entry->single.func == probe
+						&& entry->single.probe_private
+						== probe_private)
+					return entry;
+			} else {
+				struct marker_probe_closure *closure;
+				closure = entry->multi;
+				for (i = 0; closure[i].func; i++) {
+					if (closure[i].func == probe &&
+							closure[i].probe_private
+							== probe_private)
+						return entry;
+				}
 			}
 		}
 	}
-iter_end:
-	if (!found) {
-		private = ERR_PTR(-ENOENT);
+	return NULL;
+}
+
+/**
+ * marker_probe_unregister_private_data -  Disconnect a probe from a marker
+ * @probe: probe function
+ * @probe_private: probe private data
+ *
+ * Unregister a probe by providing the registered private data.
+ * Only removes the first marker found in hash table.
+ * Return 0 on success or error value.
+ * We do not need to call a synchronize_sched to make sure the probes have
+ * finished running before doing a module unload, because the module unload
+ * itself uses stop_machine(), which insures that every preempt disabled section
+ * have finished.
+ */
+int marker_probe_unregister_private_data(marker_probe_func *probe,
+		void *probe_private)
+{
+	struct marker_entry *entry;
+	int ret = 0;
+	struct marker_probe_closure *old;
+
+	mutex_lock(&markers_mutex);
+	entry = get_marker_from_private_data(probe, probe_private);
+	if (!entry) {
+		ret = -ENOENT;
 		goto end;
 	}
-	entry->refcount = 0;
-	/* In what module is the probe handler ? */
-	probe_module = __module_text_address((unsigned long)entry->probe);
-	private = remove_marker(entry->name);
-	deferred_sync = 1;
+	if (entry->rcu_pending)
+		rcu_barrier();
+	old = marker_entry_remove_probe(entry, NULL, probe_private);
 	mutex_unlock(&markers_mutex);
-	marker_update_probes(probe_module);
-	return private;
+	marker_update_probes();		/* may update entry */
+	mutex_lock(&markers_mutex);
+	entry = get_marker_from_private_data(probe, probe_private);
+	WARN_ON(!entry);
+	entry->oldptr = old;
+	entry->rcu_pending = 1;
+	/* write rcu_pending before calling the RCU callback */
+	smp_wmb();
+	call_rcu(&entry->rcu, free_old_closure);
+	remove_marker(entry->name);	/* Ignore busy error message */
 end:
 	mutex_unlock(&markers_mutex);
-	return private;
+	return ret;
 }
 EXPORT_SYMBOL_GPL(marker_probe_unregister_private_data);
 
 /**
- * marker_arm - Arm a marker
- * @name: marker name
- *
- * Activate a marker. It keeps a reference count of the number of
- * arming/disarming done.
- * Returns 0 if ok, error value on error.
- */
-int marker_arm(const char *name)
-{
-	struct marker_entry *entry;
-	int ret = 0;
-
-	mutex_lock(&markers_mutex);
-	entry = get_marker(name);
-	if (!entry) {
-		ret = -ENOENT;
-		goto end;
-	}
-	/*
-	 * Only need to update probes when refcount passes from 0 to 1.
-	 */
-	if (entry->refcount++)
-		goto end;
-end:
-	mutex_unlock(&markers_mutex);
-	marker_update_probes(NULL);
-	return ret;
-}
-EXPORT_SYMBOL_GPL(marker_arm);
-
-/**
- * marker_disarm - Disarm a marker
- * @name: marker name
- *
- * Disarm a marker. It keeps a reference count of the number of arming/disarming
- * done.
- * Returns 0 if ok, error value on error.
- */
-int marker_disarm(const char *name)
-{
-	struct marker_entry *entry;
-	int ret = 0;
-
-	mutex_lock(&markers_mutex);
-	entry = get_marker(name);
-	if (!entry) {
-		ret = -ENOENT;
-		goto end;
-	}
-	/*
-	 * Only permit decrement refcount if higher than 0.
-	 * Do probe update only on 1 -> 0 transition.
-	 */
-	if (entry->refcount) {
-		if (--entry->refcount)
-			goto end;
-	} else {
-		ret = -EPERM;
-		goto end;
-	}
-end:
-	mutex_unlock(&markers_mutex);
-	marker_update_probes(NULL);
-	return ret;
-}
-EXPORT_SYMBOL_GPL(marker_disarm);
-
-/**
  * marker_get_private_data - Get a marker's probe private data
  * @name: marker name
+ * @probe: probe to match
+ * @num: get the nth matching probe's private data
  *
+ * Returns the nth private data pointer (starting from 0) matching, or an
+ * ERR_PTR.
  * Returns the private data pointer, or an ERR_PTR.
  * The private data pointer should _only_ be dereferenced if the caller is the
  * owner of the data, or its content could vanish. This is mostly used to
  * confirm that a caller is the owner of a registered probe.
  */
-void *marker_get_private_data(const char *name)
+void *marker_get_private_data(const char *name, marker_probe_func *probe,
+		int num)
 {
 	struct hlist_head *head;
 	struct hlist_node *node;
 	struct marker_entry *e;
 	size_t name_len = strlen(name) + 1;
 	u32 hash = jhash(name, name_len-1, 0);
-	int found = 0;
+	int i;
 
 	head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
 	hlist_for_each_entry(e, node, head, hlist) {
 		if (!strcmp(name, e->name)) {
-			found = 1;
-			return e->private;
+			if (!e->ptype) {
+				if (num == 0 && e->single.func == probe)
+					return e->single.probe_private;
+				else
+					break;
+			} else {
+				struct marker_probe_closure *closure;
+				int match = 0;
+				closure = e->multi;
+				for (i = 0; closure[i].func; i++) {
+					if (closure[i].func != probe)
+						continue;
+					if (match++ == num)
+						return closure[i].probe_private;
+				}
+			}
 		}
 	}
 	return ERR_PTR(-ENOENT);

diff --git a/kernel/module.c b/kernel/module.c
index 4202da9..92595ba 100644
--- a/kernel/module.c
+++ b/kernel/module.c

@@ -2038,7 +2038,7 @@
 #ifdef CONFIG_MARKERS
 	if (!mod->taints)
 		marker_update_probe_range(mod->markers,
-			mod->markers + mod->num_markers, NULL, NULL);
+			mod->markers + mod->num_markers);
 #endif
 	err = module_finalize(hdr, sechdrs, mod);
 	if (err < 0)
@@ -2564,7 +2564,7 @@
 #endif
 
 #ifdef CONFIG_MARKERS
-void module_update_markers(struct module *probe_module, int *refcount)
+void module_update_markers(void)
 {
 	struct module *mod;
 
@@ -2572,8 +2572,7 @@
 	list_for_each_entry(mod, &modules, list)
 		if (!mod->taints)
 			marker_update_probe_range(mod->markers,
-				mod->markers + mod->num_markers,
-				probe_module, refcount);
+				mod->markers + mod->num_markers);
 	mutex_unlock(&module_mutex);
 }
 #endif

diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index 760dfc2..c09605f 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c

@@ -56,7 +56,10 @@
 static DEFINE_MUTEX(rcu_barrier_mutex);
 static struct completion rcu_barrier_completion;
 
-/* Because of FASTCALL declaration of complete, we use this wrapper */
+/*
+ * Awaken the corresponding synchronize_rcu() instance now that a
+ * grace period has elapsed.
+ */
 static void wakeme_after_rcu(struct rcu_head  *head)
 {
 	struct rcu_synchronize *rcu;

diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c
index 0deef71..6522ae5 100644
--- a/kernel/rtmutex.c
+++ b/kernel/rtmutex.c

@@ -630,9 +630,12 @@
 	set_current_state(state);
 
 	/* Setup the timer, when timeout != NULL */
-	if (unlikely(timeout))
+	if (unlikely(timeout)) {
 		hrtimer_start(&timeout->timer, timeout->timer.expires,
 			      HRTIMER_MODE_ABS);
+		if (!hrtimer_active(&timeout->timer))
+			timeout->task = NULL;
+	}
 
 	for (;;) {
 		/* Try to acquire the lock: */

diff --git a/kernel/sched.c b/kernel/sched.c
index 3eedd52..f28f19e 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c

@@ -155,7 +155,7 @@
 	struct list_head queue[MAX_RT_PRIO];
 };
 
-#ifdef CONFIG_FAIR_GROUP_SCHED
+#ifdef CONFIG_GROUP_SCHED
 
 #include <linux/cgroup.h>
 
@@ -165,19 +165,16 @@
 
 /* task group related information */
 struct task_group {
-#ifdef CONFIG_FAIR_CGROUP_SCHED
+#ifdef CONFIG_CGROUP_SCHED
 	struct cgroup_subsys_state css;
 #endif
+
+#ifdef CONFIG_FAIR_GROUP_SCHED
 	/* schedulable entities of this group on each cpu */
 	struct sched_entity **se;
 	/* runqueue "owned" by this group on each cpu */
 	struct cfs_rq **cfs_rq;
 
-	struct sched_rt_entity **rt_se;
-	struct rt_rq **rt_rq;
-
-	unsigned int rt_ratio;
-
 	/*
 	 * shares assigned to a task group governs how much of cpu bandwidth
 	 * is allocated to the group. The more shares a group has, the more is
@@ -213,33 +210,46 @@
 	 *
 	 */
 	unsigned long shares;
+#endif
+
+#ifdef CONFIG_RT_GROUP_SCHED
+	struct sched_rt_entity **rt_se;
+	struct rt_rq **rt_rq;
+
+	u64 rt_runtime;
+#endif
 
 	struct rcu_head rcu;
 	struct list_head list;
 };
 
+#ifdef CONFIG_FAIR_GROUP_SCHED
 /* Default task group's sched entity on each cpu */
 static DEFINE_PER_CPU(struct sched_entity, init_sched_entity);
 /* Default task group's cfs_rq on each cpu */
 static DEFINE_PER_CPU(struct cfs_rq, init_cfs_rq) ____cacheline_aligned_in_smp;
 
+static struct sched_entity *init_sched_entity_p[NR_CPUS];
+static struct cfs_rq *init_cfs_rq_p[NR_CPUS];
+#endif
+
+#ifdef CONFIG_RT_GROUP_SCHED
 static DEFINE_PER_CPU(struct sched_rt_entity, init_sched_rt_entity);
 static DEFINE_PER_CPU(struct rt_rq, init_rt_rq) ____cacheline_aligned_in_smp;
 
-static struct sched_entity *init_sched_entity_p[NR_CPUS];
-static struct cfs_rq *init_cfs_rq_p[NR_CPUS];
-
 static struct sched_rt_entity *init_sched_rt_entity_p[NR_CPUS];
 static struct rt_rq *init_rt_rq_p[NR_CPUS];
+#endif
 
-/* task_group_mutex serializes add/remove of task groups and also changes to
+/* task_group_lock serializes add/remove of task groups and also changes to
  * a task group's cpu shares.
  */
-static DEFINE_MUTEX(task_group_mutex);
+static DEFINE_SPINLOCK(task_group_lock);
 
 /* doms_cur_mutex serializes access to doms_cur[] array */
 static DEFINE_MUTEX(doms_cur_mutex);
 
+#ifdef CONFIG_FAIR_GROUP_SCHED
 #ifdef CONFIG_SMP
 /* kernel thread that runs rebalance_shares() periodically */
 static struct task_struct *lb_monitor_task;
@@ -248,18 +258,7 @@
 
 static void set_se_shares(struct sched_entity *se, unsigned long shares);
 
-/* Default task group.
- *	Every task in system belong to this group at bootup.
- */
-struct task_group init_task_group = {
-	.se	= init_sched_entity_p,
-	.cfs_rq = init_cfs_rq_p,
-
-	.rt_se	= init_sched_rt_entity_p,
-	.rt_rq	= init_rt_rq_p,
-};
-
-#ifdef CONFIG_FAIR_USER_SCHED
+#ifdef CONFIG_USER_SCHED
 # define INIT_TASK_GROUP_LOAD	(2*NICE_0_LOAD)
 #else
 # define INIT_TASK_GROUP_LOAD	NICE_0_LOAD
@@ -268,15 +267,31 @@
 #define MIN_GROUP_SHARES	2
 
 static int init_task_group_load = INIT_TASK_GROUP_LOAD;
+#endif
+
+/* Default task group.
+ *	Every task in system belong to this group at bootup.
+ */
+struct task_group init_task_group = {
+#ifdef CONFIG_FAIR_GROUP_SCHED
+	.se	= init_sched_entity_p,
+	.cfs_rq = init_cfs_rq_p,
+#endif
+
+#ifdef CONFIG_RT_GROUP_SCHED
+	.rt_se	= init_sched_rt_entity_p,
+	.rt_rq	= init_rt_rq_p,
+#endif
+};
 
 /* return group to which a task belongs */
 static inline struct task_group *task_group(struct task_struct *p)
 {
 	struct task_group *tg;
 
-#ifdef CONFIG_FAIR_USER_SCHED
+#ifdef CONFIG_USER_SCHED
 	tg = p->user->tg;
-#elif defined(CONFIG_FAIR_CGROUP_SCHED)
+#elif defined(CONFIG_CGROUP_SCHED)
 	tg = container_of(task_subsys_state(p, cpu_cgroup_subsys_id),
 				struct task_group, css);
 #else
@@ -288,21 +303,15 @@
 /* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */
 static inline void set_task_rq(struct task_struct *p, unsigned int cpu)
 {
+#ifdef CONFIG_FAIR_GROUP_SCHED
 	p->se.cfs_rq = task_group(p)->cfs_rq[cpu];
 	p->se.parent = task_group(p)->se[cpu];
+#endif
 
+#ifdef CONFIG_RT_GROUP_SCHED
 	p->rt.rt_rq  = task_group(p)->rt_rq[cpu];
 	p->rt.parent = task_group(p)->rt_se[cpu];
-}
-
-static inline void lock_task_group_list(void)
-{
-	mutex_lock(&task_group_mutex);
-}
-
-static inline void unlock_task_group_list(void)
-{
-	mutex_unlock(&task_group_mutex);
+#endif
 }
 
 static inline void lock_doms_cur(void)
@@ -318,12 +327,10 @@
 #else
 
 static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { }
-static inline void lock_task_group_list(void) { }
-static inline void unlock_task_group_list(void) { }
 static inline void lock_doms_cur(void) { }
 static inline void unlock_doms_cur(void) { }
 
-#endif	/* CONFIG_FAIR_GROUP_SCHED */
+#endif	/* CONFIG_GROUP_SCHED */
 
 /* CFS-related fields in a runqueue */
 struct cfs_rq {
@@ -363,7 +370,7 @@
 struct rt_rq {
 	struct rt_prio_array active;
 	unsigned long rt_nr_running;
-#if defined CONFIG_SMP || defined CONFIG_FAIR_GROUP_SCHED
+#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
 	int highest_prio; /* highest queued rt task prio */
 #endif
 #ifdef CONFIG_SMP
@@ -373,7 +380,9 @@
 	int rt_throttled;
 	u64 rt_time;
 
-#ifdef CONFIG_FAIR_GROUP_SCHED
+#ifdef CONFIG_RT_GROUP_SCHED
+	unsigned long rt_nr_boosted;
+
 	struct rq *rq;
 	struct list_head leaf_rt_rq_list;
 	struct task_group *tg;
@@ -447,6 +456,8 @@
 #ifdef CONFIG_FAIR_GROUP_SCHED
 	/* list of leaf cfs_rq on this cpu: */
 	struct list_head leaf_cfs_rq_list;
+#endif
+#ifdef CONFIG_RT_GROUP_SCHED
 	struct list_head leaf_rt_rq_list;
 #endif
 
@@ -652,19 +663,21 @@
 const_debug unsigned int sysctl_sched_nr_migrate = 32;
 
 /*
- * period over which we measure -rt task cpu usage in ms.
+ * period over which we measure -rt task cpu usage in us.
  * default: 1s
  */
-const_debug unsigned int sysctl_sched_rt_period = 1000;
-
-#define SCHED_RT_FRAC_SHIFT	16
-#define SCHED_RT_FRAC		(1UL << SCHED_RT_FRAC_SHIFT)
+unsigned int sysctl_sched_rt_period = 1000000;
 
 /*
- * ratio of time -rt tasks may consume.
- * default: 95%
+ * part of the period that we allow rt tasks to run in us.
+ * default: 0.95s
  */
-const_debug unsigned int sysctl_sched_rt_ratio = 62259;
+int sysctl_sched_rt_runtime = 950000;
+
+/*
+ * single value that denotes runtime == period, ie unlimited time.
+ */
+#define RUNTIME_INF	((u64)~0ULL)
 
 /*
  * For kernel-internal use: high-speed (but slightly incorrect) per-cpu
@@ -4571,6 +4584,15 @@
 			return -EPERM;
 	}
 
+#ifdef CONFIG_RT_GROUP_SCHED
+	/*
+	 * Do not allow realtime tasks into groups that have no runtime
+	 * assigned.
+	 */
+	if (rt_policy(policy) && task_group(p)->rt_runtime == 0)
+		return -EPERM;
+#endif
+
 	retval = security_task_setscheduler(p, policy, param);
 	if (retval)
 		return retval;
@@ -7112,7 +7134,7 @@
 	/* delimiter for bitsearch: */
 	__set_bit(MAX_RT_PRIO, array->bitmap);
 
-#if defined CONFIG_SMP || defined CONFIG_FAIR_GROUP_SCHED
+#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
 	rt_rq->highest_prio = MAX_RT_PRIO;
 #endif
 #ifdef CONFIG_SMP
@@ -7123,7 +7145,8 @@
 	rt_rq->rt_time = 0;
 	rt_rq->rt_throttled = 0;
 
-#ifdef CONFIG_FAIR_GROUP_SCHED
+#ifdef CONFIG_RT_GROUP_SCHED
+	rt_rq->rt_nr_boosted = 0;
 	rt_rq->rq = rq;
 #endif
 }
@@ -7146,7 +7169,9 @@
 	se->load.inv_weight = div64_64(1ULL<<32, se->load.weight);
 	se->parent = NULL;
 }
+#endif
 
+#ifdef CONFIG_RT_GROUP_SCHED
 static void init_tg_rt_entry(struct rq *rq, struct task_group *tg,
 		struct rt_rq *rt_rq, struct sched_rt_entity *rt_se,
 		int cpu, int add)
@@ -7175,7 +7200,7 @@
 	init_defrootdomain();
 #endif
 
-#ifdef CONFIG_FAIR_GROUP_SCHED
+#ifdef CONFIG_GROUP_SCHED
 	list_add(&init_task_group.list, &task_groups);
 #endif
 
@@ -7196,7 +7221,10 @@
 				&per_cpu(init_cfs_rq, i),
 				&per_cpu(init_sched_entity, i), i, 1);
 
-		init_task_group.rt_ratio = sysctl_sched_rt_ratio; /* XXX */
+#endif
+#ifdef CONFIG_RT_GROUP_SCHED
+		init_task_group.rt_runtime =
+			sysctl_sched_rt_runtime * NSEC_PER_USEC;
 		INIT_LIST_HEAD(&rq->leaf_rt_rq_list);
 		init_tg_rt_entry(rq, &init_task_group,
 				&per_cpu(init_rt_rq, i),
@@ -7303,7 +7331,7 @@
 	unsigned long flags;
 	struct rq *rq;
 
-	read_lock_irq(&tasklist_lock);
+	read_lock_irqsave(&tasklist_lock, flags);
 	do_each_thread(g, p) {
 		/*
 		 * Only normalize user tasks:
@@ -7329,16 +7357,16 @@
 			continue;
 		}
 
-		spin_lock_irqsave(&p->pi_lock, flags);
+		spin_lock(&p->pi_lock);
 		rq = __task_rq_lock(p);
 
 		normalize_task(rq, p);
 
 		__task_rq_unlock(rq);
-		spin_unlock_irqrestore(&p->pi_lock, flags);
+		spin_unlock(&p->pi_lock);
 	} while_each_thread(g, p);
 
-	read_unlock_irq(&tasklist_lock);
+	read_unlock_irqrestore(&tasklist_lock, flags);
 }
 
 #endif /* CONFIG_MAGIC_SYSRQ */
@@ -7387,9 +7415,9 @@
 
 #endif
 
-#ifdef CONFIG_FAIR_GROUP_SCHED
+#ifdef CONFIG_GROUP_SCHED
 
-#ifdef CONFIG_SMP
+#if defined CONFIG_FAIR_GROUP_SCHED && defined CONFIG_SMP
 /*
  * distribute shares of all task groups among their schedulable entities,
  * to reflect load distribution across cpus.
@@ -7540,7 +7568,8 @@
 }
 #endif	/* CONFIG_SMP */
 
-static void free_sched_group(struct task_group *tg)
+#ifdef CONFIG_FAIR_GROUP_SCHED
+static void free_fair_sched_group(struct task_group *tg)
 {
 	int i;
 
@@ -7549,49 +7578,27 @@
 			kfree(tg->cfs_rq[i]);
 		if (tg->se)
 			kfree(tg->se[i]);
-		if (tg->rt_rq)
-			kfree(tg->rt_rq[i]);
-		if (tg->rt_se)
-			kfree(tg->rt_se[i]);
 	}
 
 	kfree(tg->cfs_rq);
 	kfree(tg->se);
-	kfree(tg->rt_rq);
-	kfree(tg->rt_se);
-	kfree(tg);
 }
 
-/* allocate runqueue etc for a new task group */
-struct task_group *sched_create_group(void)
+static int alloc_fair_sched_group(struct task_group *tg)
 {
-	struct task_group *tg;
 	struct cfs_rq *cfs_rq;
 	struct sched_entity *se;
-	struct rt_rq *rt_rq;
-	struct sched_rt_entity *rt_se;
 	struct rq *rq;
 	int i;
 
-	tg = kzalloc(sizeof(*tg), GFP_KERNEL);
-	if (!tg)
-		return ERR_PTR(-ENOMEM);
-
 	tg->cfs_rq = kzalloc(sizeof(cfs_rq) * NR_CPUS, GFP_KERNEL);
 	if (!tg->cfs_rq)
 		goto err;
 	tg->se = kzalloc(sizeof(se) * NR_CPUS, GFP_KERNEL);
 	if (!tg->se)
 		goto err;
-	tg->rt_rq = kzalloc(sizeof(rt_rq) * NR_CPUS, GFP_KERNEL);
-	if (!tg->rt_rq)
-		goto err;
-	tg->rt_se = kzalloc(sizeof(rt_se) * NR_CPUS, GFP_KERNEL);
-	if (!tg->rt_se)
-		goto err;
 
 	tg->shares = NICE_0_LOAD;
-	tg->rt_ratio = 0; /* XXX */
 
 	for_each_possible_cpu(i) {
 		rq = cpu_rq(i);
@@ -7606,6 +7613,79 @@
 		if (!se)
 			goto err;
 
+		init_tg_cfs_entry(rq, tg, cfs_rq, se, i, 0);
+	}
+
+	return 1;
+
+ err:
+	return 0;
+}
+
+static inline void register_fair_sched_group(struct task_group *tg, int cpu)
+{
+	list_add_rcu(&tg->cfs_rq[cpu]->leaf_cfs_rq_list,
+			&cpu_rq(cpu)->leaf_cfs_rq_list);
+}
+
+static inline void unregister_fair_sched_group(struct task_group *tg, int cpu)
+{
+	list_del_rcu(&tg->cfs_rq[cpu]->leaf_cfs_rq_list);
+}
+#else
+static inline void free_fair_sched_group(struct task_group *tg)
+{
+}
+
+static inline int alloc_fair_sched_group(struct task_group *tg)
+{
+	return 1;
+}
+
+static inline void register_fair_sched_group(struct task_group *tg, int cpu)
+{
+}
+
+static inline void unregister_fair_sched_group(struct task_group *tg, int cpu)
+{
+}
+#endif
+
+#ifdef CONFIG_RT_GROUP_SCHED
+static void free_rt_sched_group(struct task_group *tg)
+{
+	int i;
+
+	for_each_possible_cpu(i) {
+		if (tg->rt_rq)
+			kfree(tg->rt_rq[i]);
+		if (tg->rt_se)
+			kfree(tg->rt_se[i]);
+	}
+
+	kfree(tg->rt_rq);
+	kfree(tg->rt_se);
+}
+
+static int alloc_rt_sched_group(struct task_group *tg)
+{
+	struct rt_rq *rt_rq;
+	struct sched_rt_entity *rt_se;
+	struct rq *rq;
+	int i;
+
+	tg->rt_rq = kzalloc(sizeof(rt_rq) * NR_CPUS, GFP_KERNEL);
+	if (!tg->rt_rq)
+		goto err;
+	tg->rt_se = kzalloc(sizeof(rt_se) * NR_CPUS, GFP_KERNEL);
+	if (!tg->rt_se)
+		goto err;
+
+	tg->rt_runtime = 0;
+
+	for_each_possible_cpu(i) {
+		rq = cpu_rq(i);
+
 		rt_rq = kmalloc_node(sizeof(struct rt_rq),
 				GFP_KERNEL|__GFP_ZERO, cpu_to_node(i));
 		if (!rt_rq)
@@ -7616,20 +7696,75 @@
 		if (!rt_se)
 			goto err;
 
-		init_tg_cfs_entry(rq, tg, cfs_rq, se, i, 0);
 		init_tg_rt_entry(rq, tg, rt_rq, rt_se, i, 0);
 	}
 
-	lock_task_group_list();
+	return 1;
+
+ err:
+	return 0;
+}
+
+static inline void register_rt_sched_group(struct task_group *tg, int cpu)
+{
+	list_add_rcu(&tg->rt_rq[cpu]->leaf_rt_rq_list,
+			&cpu_rq(cpu)->leaf_rt_rq_list);
+}
+
+static inline void unregister_rt_sched_group(struct task_group *tg, int cpu)
+{
+	list_del_rcu(&tg->rt_rq[cpu]->leaf_rt_rq_list);
+}
+#else
+static inline void free_rt_sched_group(struct task_group *tg)
+{
+}
+
+static inline int alloc_rt_sched_group(struct task_group *tg)
+{
+	return 1;
+}
+
+static inline void register_rt_sched_group(struct task_group *tg, int cpu)
+{
+}
+
+static inline void unregister_rt_sched_group(struct task_group *tg, int cpu)
+{
+}
+#endif
+
+static void free_sched_group(struct task_group *tg)
+{
+	free_fair_sched_group(tg);
+	free_rt_sched_group(tg);
+	kfree(tg);
+}
+
+/* allocate runqueue etc for a new task group */
+struct task_group *sched_create_group(void)
+{
+	struct task_group *tg;
+	unsigned long flags;
+	int i;
+
+	tg = kzalloc(sizeof(*tg), GFP_KERNEL);
+	if (!tg)
+		return ERR_PTR(-ENOMEM);
+
+	if (!alloc_fair_sched_group(tg))
+		goto err;
+
+	if (!alloc_rt_sched_group(tg))
+		goto err;
+
+	spin_lock_irqsave(&task_group_lock, flags);
 	for_each_possible_cpu(i) {
-		rq = cpu_rq(i);
-		cfs_rq = tg->cfs_rq[i];
-		list_add_rcu(&cfs_rq->leaf_cfs_rq_list, &rq->leaf_cfs_rq_list);
-		rt_rq = tg->rt_rq[i];
-		list_add_rcu(&rt_rq->leaf_rt_rq_list, &rq->leaf_rt_rq_list);
+		register_fair_sched_group(tg, i);
+		register_rt_sched_group(tg, i);
 	}
 	list_add_rcu(&tg->list, &task_groups);
-	unlock_task_group_list();
+	spin_unlock_irqrestore(&task_group_lock, flags);
 
 	return tg;
 
@@ -7648,21 +7783,16 @@
 /* Destroy runqueue etc associated with a task group */
 void sched_destroy_group(struct task_group *tg)
 {
-	struct cfs_rq *cfs_rq = NULL;
-	struct rt_rq *rt_rq = NULL;
+	unsigned long flags;
 	int i;
 
-	lock_task_group_list();
+	spin_lock_irqsave(&task_group_lock, flags);
 	for_each_possible_cpu(i) {
-		cfs_rq = tg->cfs_rq[i];
-		list_del_rcu(&cfs_rq->leaf_cfs_rq_list);
-		rt_rq = tg->rt_rq[i];
-		list_del_rcu(&rt_rq->leaf_rt_rq_list);
+		unregister_fair_sched_group(tg, i);
+		unregister_rt_sched_group(tg, i);
 	}
 	list_del_rcu(&tg->list);
-	unlock_task_group_list();
-
-	BUG_ON(!cfs_rq);
+	spin_unlock_irqrestore(&task_group_lock, flags);
 
 	/* wait for possible concurrent references to cfs_rqs complete */
 	call_rcu(&tg->rcu, free_sched_group_rcu);
@@ -7703,6 +7833,7 @@
 	task_rq_unlock(rq, &flags);
 }
 
+#ifdef CONFIG_FAIR_GROUP_SCHED
 /* rq->lock to be locked by caller */
 static void set_se_shares(struct sched_entity *se, unsigned long shares)
 {
@@ -7728,13 +7859,14 @@
 	}
 }
 
+static DEFINE_MUTEX(shares_mutex);
+
 int sched_group_set_shares(struct task_group *tg, unsigned long shares)
 {
 	int i;
-	struct cfs_rq *cfs_rq;
-	struct rq *rq;
+	unsigned long flags;
 
-	lock_task_group_list();
+	mutex_lock(&shares_mutex);
 	if (tg->shares == shares)
 		goto done;
 
@@ -7746,10 +7878,10 @@
 	 * load_balance_fair) from referring to this group first,
 	 * by taking it off the rq->leaf_cfs_rq_list on each cpu.
 	 */
-	for_each_possible_cpu(i) {
-		cfs_rq = tg->cfs_rq[i];
-		list_del_rcu(&cfs_rq->leaf_cfs_rq_list);
-	}
+	spin_lock_irqsave(&task_group_lock, flags);
+	for_each_possible_cpu(i)
+		unregister_fair_sched_group(tg, i);
+	spin_unlock_irqrestore(&task_group_lock, flags);
 
 	/* wait for any ongoing reference to this group to finish */
 	synchronize_sched();
@@ -7769,13 +7901,12 @@
 	 * Enable load balance activity on this group, by inserting it back on
 	 * each cpu's rq->leaf_cfs_rq_list.
 	 */
-	for_each_possible_cpu(i) {
-		rq = cpu_rq(i);
-		cfs_rq = tg->cfs_rq[i];
-		list_add_rcu(&cfs_rq->leaf_cfs_rq_list, &rq->leaf_cfs_rq_list);
-	}
+	spin_lock_irqsave(&task_group_lock, flags);
+	for_each_possible_cpu(i)
+		register_fair_sched_group(tg, i);
+	spin_unlock_irqrestore(&task_group_lock, flags);
 done:
-	unlock_task_group_list();
+	mutex_unlock(&shares_mutex);
 	return 0;
 }
 
@@ -7783,35 +7914,84 @@
 {
 	return tg->shares;
 }
+#endif
 
+#ifdef CONFIG_RT_GROUP_SCHED
 /*
- * Ensure the total rt_ratio <= sysctl_sched_rt_ratio
+ * Ensure that the real time constraints are schedulable.
  */
-int sched_group_set_rt_ratio(struct task_group *tg, unsigned long rt_ratio)
+static DEFINE_MUTEX(rt_constraints_mutex);
+
+static unsigned long to_ratio(u64 period, u64 runtime)
+{
+	if (runtime == RUNTIME_INF)
+		return 1ULL << 16;
+
+	runtime *= (1ULL << 16);
+	div64_64(runtime, period);
+	return runtime;
+}
+
+static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime)
 {
 	struct task_group *tgi;
 	unsigned long total = 0;
+	unsigned long global_ratio =
+		to_ratio(sysctl_sched_rt_period,
+			 sysctl_sched_rt_runtime < 0 ?
+				RUNTIME_INF : sysctl_sched_rt_runtime);
 
 	rcu_read_lock();
-	list_for_each_entry_rcu(tgi, &task_groups, list)
-		total += tgi->rt_ratio;
+	list_for_each_entry_rcu(tgi, &task_groups, list) {
+		if (tgi == tg)
+			continue;
+
+		total += to_ratio(period, tgi->rt_runtime);
+	}
 	rcu_read_unlock();
 
-	if (total + rt_ratio - tg->rt_ratio > sysctl_sched_rt_ratio)
-		return -EINVAL;
-
-	tg->rt_ratio = rt_ratio;
-	return 0;
+	return total + to_ratio(period, runtime) < global_ratio;
 }
 
-unsigned long sched_group_rt_ratio(struct task_group *tg)
+int sched_group_set_rt_runtime(struct task_group *tg, long rt_runtime_us)
 {
-	return tg->rt_ratio;
+	u64 rt_runtime, rt_period;
+	int err = 0;
+
+	rt_period = sysctl_sched_rt_period * NSEC_PER_USEC;
+	rt_runtime = (u64)rt_runtime_us * NSEC_PER_USEC;
+	if (rt_runtime_us == -1)
+		rt_runtime = rt_period;
+
+	mutex_lock(&rt_constraints_mutex);
+	if (!__rt_schedulable(tg, rt_period, rt_runtime)) {
+		err = -EINVAL;
+		goto unlock;
+	}
+	if (rt_runtime_us == -1)
+		rt_runtime = RUNTIME_INF;
+	tg->rt_runtime = rt_runtime;
+ unlock:
+	mutex_unlock(&rt_constraints_mutex);
+
+	return err;
 }
 
-#endif	/* CONFIG_FAIR_GROUP_SCHED */
+long sched_group_rt_runtime(struct task_group *tg)
+{
+	u64 rt_runtime_us;
 
-#ifdef CONFIG_FAIR_CGROUP_SCHED
+	if (tg->rt_runtime == RUNTIME_INF)
+		return -1;
+
+	rt_runtime_us = tg->rt_runtime;
+	do_div(rt_runtime_us, NSEC_PER_USEC);
+	return rt_runtime_us;
+}
+#endif
+#endif	/* CONFIG_GROUP_SCHED */
+
+#ifdef CONFIG_CGROUP_SCHED
 
 /* return corresponding task_group object of a cgroup */
 static inline struct task_group *cgroup_tg(struct cgroup *cgrp)
@@ -7857,9 +8037,15 @@
 cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
 		      struct task_struct *tsk)
 {
+#ifdef CONFIG_RT_GROUP_SCHED
+	/* Don't accept realtime tasks when there is no way for them to run */
+	if (rt_task(tsk) && cgroup_tg(cgrp)->rt_runtime == 0)
+		return -EINVAL;
+#else
 	/* We don't support RT-tasks being in separate groups */
 	if (tsk->sched_class != &fair_sched_class)
 		return -EINVAL;
+#endif
 
 	return 0;
 }
@@ -7871,6 +8057,7 @@
 	sched_move_task(tsk);
 }
 
+#ifdef CONFIG_FAIR_GROUP_SCHED
 static int cpu_shares_write_uint(struct cgroup *cgrp, struct cftype *cftype,
 				u64 shareval)
 {
@@ -7883,31 +8070,70 @@
 
 	return (u64) tg->shares;
 }
+#endif
 
-static int cpu_rt_ratio_write_uint(struct cgroup *cgrp, struct cftype *cftype,
-		u64 rt_ratio_val)
+#ifdef CONFIG_RT_GROUP_SCHED
+static int cpu_rt_runtime_write(struct cgroup *cgrp, struct cftype *cft,
+				struct file *file,
+				const char __user *userbuf,
+				size_t nbytes, loff_t *unused_ppos)
 {
-	return sched_group_set_rt_ratio(cgroup_tg(cgrp), rt_ratio_val);
+	char buffer[64];
+	int retval = 0;
+	s64 val;
+	char *end;
+
+	if (!nbytes)
+		return -EINVAL;
+	if (nbytes >= sizeof(buffer))
+		return -E2BIG;
+	if (copy_from_user(buffer, userbuf, nbytes))
+		return -EFAULT;
+
+	buffer[nbytes] = 0;     /* nul-terminate */
+
+	/* strip newline if necessary */
+	if (nbytes && (buffer[nbytes-1] == '\n'))
+		buffer[nbytes-1] = 0;
+	val = simple_strtoll(buffer, &end, 0);
+	if (*end)
+		return -EINVAL;
+
+	/* Pass to subsystem */
+	retval = sched_group_set_rt_runtime(cgroup_tg(cgrp), val);
+	if (!retval)
+		retval = nbytes;
+	return retval;
 }
 
-static u64 cpu_rt_ratio_read_uint(struct cgroup *cgrp, struct cftype *cft)
+static ssize_t cpu_rt_runtime_read(struct cgroup *cgrp, struct cftype *cft,
+				   struct file *file,
+				   char __user *buf, size_t nbytes,
+				   loff_t *ppos)
 {
-	struct task_group *tg = cgroup_tg(cgrp);
+	char tmp[64];
+	long val = sched_group_rt_runtime(cgroup_tg(cgrp));
+	int len = sprintf(tmp, "%ld\n", val);
 
-	return (u64) tg->rt_ratio;
+	return simple_read_from_buffer(buf, nbytes, ppos, tmp, len);
 }
+#endif
 
 static struct cftype cpu_files[] = {
+#ifdef CONFIG_FAIR_GROUP_SCHED
 	{
 		.name = "shares",
 		.read_uint = cpu_shares_read_uint,
 		.write_uint = cpu_shares_write_uint,
 	},
+#endif
+#ifdef CONFIG_RT_GROUP_SCHED
 	{
-		.name = "rt_ratio",
-		.read_uint = cpu_rt_ratio_read_uint,
-		.write_uint = cpu_rt_ratio_write_uint,
+		.name = "rt_runtime_us",
+		.read = cpu_rt_runtime_read,
+		.write = cpu_rt_runtime_write,
 	},
+#endif
 };
 
 static int cpu_cgroup_populate(struct cgroup_subsys *ss, struct cgroup *cont)
@@ -7926,7 +8152,7 @@
 	.early_init	= 1,
 };
 
-#endif	/* CONFIG_FAIR_CGROUP_SCHED */
+#endif	/* CONFIG_CGROUP_SCHED */
 
 #ifdef CONFIG_CGROUP_CPUACCT
 

diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 274b40d..f54792b 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c

@@ -55,14 +55,14 @@
 	return !list_empty(&rt_se->run_list);
 }
 
-#ifdef CONFIG_FAIR_GROUP_SCHED
+#ifdef CONFIG_RT_GROUP_SCHED
 
-static inline unsigned int sched_rt_ratio(struct rt_rq *rt_rq)
+static inline u64 sched_rt_runtime(struct rt_rq *rt_rq)
 {
 	if (!rt_rq->tg)
-		return SCHED_RT_FRAC;
+		return RUNTIME_INF;
 
-	return rt_rq->tg->rt_ratio;
+	return rt_rq->tg->rt_runtime;
 }
 
 #define for_each_leaf_rt_rq(rt_rq, rq) \
@@ -89,7 +89,7 @@
 static void enqueue_rt_entity(struct sched_rt_entity *rt_se);
 static void dequeue_rt_entity(struct sched_rt_entity *rt_se);
 
-static void sched_rt_ratio_enqueue(struct rt_rq *rt_rq)
+static void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
 {
 	struct sched_rt_entity *rt_se = rt_rq->rt_se;
 
@@ -102,7 +102,7 @@
 	}
 }
 
-static void sched_rt_ratio_dequeue(struct rt_rq *rt_rq)
+static void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
 {
 	struct sched_rt_entity *rt_se = rt_rq->rt_se;
 
@@ -110,11 +110,31 @@
 		dequeue_rt_entity(rt_se);
 }
 
+static inline int rt_rq_throttled(struct rt_rq *rt_rq)
+{
+	return rt_rq->rt_throttled && !rt_rq->rt_nr_boosted;
+}
+
+static int rt_se_boosted(struct sched_rt_entity *rt_se)
+{
+	struct rt_rq *rt_rq = group_rt_rq(rt_se);
+	struct task_struct *p;
+
+	if (rt_rq)
+		return !!rt_rq->rt_nr_boosted;
+
+	p = rt_task_of(rt_se);
+	return p->prio != p->normal_prio;
+}
+
 #else
 
-static inline unsigned int sched_rt_ratio(struct rt_rq *rt_rq)
+static inline u64 sched_rt_runtime(struct rt_rq *rt_rq)
 {
-	return sysctl_sched_rt_ratio;
+	if (sysctl_sched_rt_runtime == -1)
+		return RUNTIME_INF;
+
+	return (u64)sysctl_sched_rt_runtime * NSEC_PER_USEC;
 }
 
 #define for_each_leaf_rt_rq(rt_rq, rq) \
@@ -141,19 +161,23 @@
 	return NULL;
 }
 
-static inline void sched_rt_ratio_enqueue(struct rt_rq *rt_rq)
+static inline void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
 {
 }
 
-static inline void sched_rt_ratio_dequeue(struct rt_rq *rt_rq)
+static inline void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
 {
 }
 
+static inline int rt_rq_throttled(struct rt_rq *rt_rq)
+{
+	return rt_rq->rt_throttled;
+}
 #endif
 
 static inline int rt_se_prio(struct sched_rt_entity *rt_se)
 {
-#ifdef CONFIG_FAIR_GROUP_SCHED
+#ifdef CONFIG_RT_GROUP_SCHED
 	struct rt_rq *rt_rq = group_rt_rq(rt_se);
 
 	if (rt_rq)
@@ -163,28 +187,26 @@
 	return rt_task_of(rt_se)->prio;
 }
 
-static int sched_rt_ratio_exceeded(struct rt_rq *rt_rq)
+static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq)
 {
-	unsigned int rt_ratio = sched_rt_ratio(rt_rq);
-	u64 period, ratio;
+	u64 runtime = sched_rt_runtime(rt_rq);
 
-	if (rt_ratio == SCHED_RT_FRAC)
+	if (runtime == RUNTIME_INF)
 		return 0;
 
 	if (rt_rq->rt_throttled)
-		return 1;
+		return rt_rq_throttled(rt_rq);
 
-	period = (u64)sysctl_sched_rt_period * NSEC_PER_MSEC;
-	ratio = (period * rt_ratio) >> SCHED_RT_FRAC_SHIFT;
-
-	if (rt_rq->rt_time > ratio) {
+	if (rt_rq->rt_time > runtime) {
 		struct rq *rq = rq_of_rt_rq(rt_rq);
 
 		rq->rt_throttled = 1;
 		rt_rq->rt_throttled = 1;
 
-		sched_rt_ratio_dequeue(rt_rq);
-		return 1;
+		if (rt_rq_throttled(rt_rq)) {
+			sched_rt_rq_dequeue(rt_rq);
+			return 1;
+		}
 	}
 
 	return 0;
@@ -196,17 +218,16 @@
 	u64 period;
 
 	while (rq->clock > rq->rt_period_expire) {
-		period = (u64)sysctl_sched_rt_period * NSEC_PER_MSEC;
+		period = (u64)sysctl_sched_rt_period * NSEC_PER_USEC;
 		rq->rt_period_expire += period;
 
 		for_each_leaf_rt_rq(rt_rq, rq) {
-			unsigned long rt_ratio = sched_rt_ratio(rt_rq);
-			u64 ratio = (period * rt_ratio) >> SCHED_RT_FRAC_SHIFT;
+			u64 runtime = sched_rt_runtime(rt_rq);
 
-			rt_rq->rt_time -= min(rt_rq->rt_time, ratio);
-			if (rt_rq->rt_throttled) {
+			rt_rq->rt_time -= min(rt_rq->rt_time, runtime);
+			if (rt_rq->rt_throttled && rt_rq->rt_time < runtime) {
 				rt_rq->rt_throttled = 0;
-				sched_rt_ratio_enqueue(rt_rq);
+				sched_rt_rq_enqueue(rt_rq);
 			}
 		}
 
@@ -239,12 +260,7 @@
 	cpuacct_charge(curr, delta_exec);
 
 	rt_rq->rt_time += delta_exec;
-	/*
-	 * might make it a tad more accurate:
-	 *
-	 * update_sched_rt_period(rq);
-	 */
-	if (sched_rt_ratio_exceeded(rt_rq))
+	if (sched_rt_runtime_exceeded(rt_rq))
 		resched_task(curr);
 }
 
@@ -253,7 +269,7 @@
 {
 	WARN_ON(!rt_prio(rt_se_prio(rt_se)));
 	rt_rq->rt_nr_running++;
-#if defined CONFIG_SMP || defined CONFIG_FAIR_GROUP_SCHED
+#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
 	if (rt_se_prio(rt_se) < rt_rq->highest_prio)
 		rt_rq->highest_prio = rt_se_prio(rt_se);
 #endif
@@ -265,6 +281,10 @@
 
 	update_rt_migration(rq_of_rt_rq(rt_rq));
 #endif
+#ifdef CONFIG_RT_GROUP_SCHED
+	if (rt_se_boosted(rt_se))
+		rt_rq->rt_nr_boosted++;
+#endif
 }
 
 static inline
@@ -273,7 +293,7 @@
 	WARN_ON(!rt_prio(rt_se_prio(rt_se)));
 	WARN_ON(!rt_rq->rt_nr_running);
 	rt_rq->rt_nr_running--;
-#if defined CONFIG_SMP || defined CONFIG_FAIR_GROUP_SCHED
+#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
 	if (rt_rq->rt_nr_running) {
 		struct rt_prio_array *array;
 
@@ -295,6 +315,12 @@
 
 	update_rt_migration(rq_of_rt_rq(rt_rq));
 #endif /* CONFIG_SMP */
+#ifdef CONFIG_RT_GROUP_SCHED
+	if (rt_se_boosted(rt_se))
+		rt_rq->rt_nr_boosted--;
+
+	WARN_ON(!rt_rq->rt_nr_running && rt_rq->rt_nr_boosted);
+#endif
 }
 
 static void enqueue_rt_entity(struct sched_rt_entity *rt_se)
@@ -303,7 +329,7 @@
 	struct rt_prio_array *array = &rt_rq->active;
 	struct rt_rq *group_rq = group_rt_rq(rt_se);
 
-	if (group_rq && group_rq->rt_throttled)
+	if (group_rq && rt_rq_throttled(group_rq))
 		return;
 
 	list_add_tail(&rt_se->run_list, array->queue + rt_se_prio(rt_se));
@@ -496,7 +522,7 @@
 	if (unlikely(!rt_rq->rt_nr_running))
 		return NULL;
 
-	if (sched_rt_ratio_exceeded(rt_rq))
+	if (rt_rq_throttled(rt_rq))
 		return NULL;
 
 	do {

diff --git a/kernel/signal.c b/kernel/signal.c
index 2c1f08d..84917fe 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c

@@ -972,7 +972,7 @@
 	}
 }
 
-int fastcall __fatal_signal_pending(struct task_struct *tsk)
+int __fatal_signal_pending(struct task_struct *tsk)
 {
 	return sigismember(&tsk->pending.signal, SIGKILL);
 }

diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index d41ef6b..8b7e954 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c

@@ -311,22 +311,6 @@
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec,
 	},
-	{
-		.ctl_name	= CTL_UNNUMBERED,
-		.procname	= "sched_rt_period_ms",
-		.data		= &sysctl_sched_rt_period,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec,
-	},
-	{
-		.ctl_name	= CTL_UNNUMBERED,
-		.procname	= "sched_rt_ratio",
-		.data		= &sysctl_sched_rt_ratio,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec,
-	},
 #if defined(CONFIG_FAIR_GROUP_SCHED) && defined(CONFIG_SMP)
 	{
 		.ctl_name       = CTL_UNNUMBERED,
@@ -348,6 +332,22 @@
 #endif
 	{
 		.ctl_name	= CTL_UNNUMBERED,
+		.procname	= "sched_rt_period_us",
+		.data		= &sysctl_sched_rt_period,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
+		.ctl_name	= CTL_UNNUMBERED,
+		.procname	= "sched_rt_runtime_us",
+		.data		= &sysctl_sched_rt_runtime,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
+		.ctl_name	= CTL_UNNUMBERED,
 		.procname	= "sched_compat_yield",
 		.data		= &sysctl_sched_compat_yield,
 		.maxlen		= sizeof(unsigned int),
@@ -978,8 +978,8 @@
 	{
 		.ctl_name	= CTL_UNNUMBERED,
 		.procname	= "nr_overcommit_hugepages",
-		.data		= &nr_overcommit_huge_pages,
-		.maxlen		= sizeof(nr_overcommit_huge_pages),
+		.data		= &sysctl_overcommit_huge_pages,
+		.maxlen		= sizeof(sysctl_overcommit_huge_pages),
 		.mode		= 0644,
 		.proc_handler	= &hugetlb_overcommit_handler,
 	},

diff --git a/kernel/timeconst.pl b/kernel/timeconst.pl
index 62b1287..4146803 100644
--- a/kernel/timeconst.pl
+++ b/kernel/timeconst.pl

@@ -339,7 +339,7 @@
 	print "\n";
 
 	foreach $pfx ('HZ_TO_MSEC','MSEC_TO_HZ',
-		      'USEC_TO_HZ','HZ_TO_USEC') {
+		      'HZ_TO_USEC','USEC_TO_HZ') {
 		foreach $bit (32, 64) {
 			foreach $suf ('MUL', 'ADJ', 'SHR') {
 				printf "#define %-23s %s\n",

diff --git a/kernel/user.c b/kernel/user.c
index 7d7900c..7132022 100644
--- a/kernel/user.c
+++ b/kernel/user.c

@@ -57,7 +57,7 @@
 	.uid_keyring	= &root_user_keyring,
 	.session_keyring = &root_session_keyring,
 #endif
-#ifdef CONFIG_FAIR_USER_SCHED
+#ifdef CONFIG_USER_SCHED
 	.tg		= &init_task_group,
 #endif
 };
@@ -90,7 +90,7 @@
 	return NULL;
 }
 
-#ifdef CONFIG_FAIR_USER_SCHED
+#ifdef CONFIG_USER_SCHED
 
 static void sched_destroy_user(struct user_struct *up)
 {
@@ -113,15 +113,15 @@
 	sched_move_task(p);
 }
 
-#else	/* CONFIG_FAIR_USER_SCHED */
+#else	/* CONFIG_USER_SCHED */
 
 static void sched_destroy_user(struct user_struct *up) { }
 static int sched_create_user(struct user_struct *up) { return 0; }
 static void sched_switch_user(struct task_struct *p) { }
 
-#endif	/* CONFIG_FAIR_USER_SCHED */
+#endif	/* CONFIG_USER_SCHED */
 
-#if defined(CONFIG_FAIR_USER_SCHED) && defined(CONFIG_SYSFS)
+#if defined(CONFIG_USER_SCHED) && defined(CONFIG_SYSFS)
 
 static struct kset *uids_kset; /* represents the /sys/kernel/uids/ directory */
 static DEFINE_MUTEX(uids_mutex);
@@ -137,6 +137,7 @@
 }
 
 /* uid directory attributes */
+#ifdef CONFIG_FAIR_GROUP_SCHED
 static ssize_t cpu_shares_show(struct kobject *kobj,
 			       struct kobj_attribute *attr,
 			       char *buf)
@@ -163,10 +164,45 @@
 
 static struct kobj_attribute cpu_share_attr =
 	__ATTR(cpu_share, 0644, cpu_shares_show, cpu_shares_store);
+#endif
+
+#ifdef CONFIG_RT_GROUP_SCHED
+static ssize_t cpu_rt_runtime_show(struct kobject *kobj,
+				   struct kobj_attribute *attr,
+				   char *buf)
+{
+	struct user_struct *up = container_of(kobj, struct user_struct, kobj);
+
+	return sprintf(buf, "%lu\n", sched_group_rt_runtime(up->tg));
+}
+
+static ssize_t cpu_rt_runtime_store(struct kobject *kobj,
+				    struct kobj_attribute *attr,
+				    const char *buf, size_t size)
+{
+	struct user_struct *up = container_of(kobj, struct user_struct, kobj);
+	unsigned long rt_runtime;
+	int rc;
+
+	sscanf(buf, "%lu", &rt_runtime);
+
+	rc = sched_group_set_rt_runtime(up->tg, rt_runtime);
+
+	return (rc ? rc : size);
+}
+
+static struct kobj_attribute cpu_rt_runtime_attr =
+	__ATTR(cpu_rt_runtime, 0644, cpu_rt_runtime_show, cpu_rt_runtime_store);
+#endif
 
 /* default attributes per uid directory */
 static struct attribute *uids_attributes[] = {
+#ifdef CONFIG_FAIR_GROUP_SCHED
 	&cpu_share_attr.attr,
+#endif
+#ifdef CONFIG_RT_GROUP_SCHED
+	&cpu_rt_runtime_attr.attr,
+#endif
 	NULL
 };
 
@@ -269,7 +305,7 @@
 	schedule_work(&up->work);
 }
 
-#else	/* CONFIG_FAIR_USER_SCHED && CONFIG_SYSFS */
+#else	/* CONFIG_USER_SCHED && CONFIG_SYSFS */
 
 int uids_sysfs_init(void) { return 0; }
 static inline int uids_user_create(struct user_struct *up) { return 0; }
@@ -373,7 +409,7 @@
 		spin_lock_irq(&uidhash_lock);
 		up = uid_hash_find(uid, hashent);
 		if (up) {
-			/* This case is not possible when CONFIG_FAIR_USER_SCHED
+			/* This case is not possible when CONFIG_USER_SCHED
 			 * is defined, since we serialize alloc_uid() using
 			 * uids_mutex. Hence no need to call
 			 * sched_destroy_user() or remove_user_sysfs_dir().

diff --git a/mm/filemap.c b/mm/filemap.c
index b7b1be6..5c74b68 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c

@@ -604,7 +604,7 @@
 }
 EXPORT_SYMBOL(__lock_page);
 
-int fastcall __lock_page_killable(struct page *page)
+int __lock_page_killable(struct page *page)
 {
 	DEFINE_WAIT_BIT(wait, &page->flags, PG_locked);
 

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index d9a3803..cb1b3a7 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c

@@ -24,14 +24,15 @@
 const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL;
 static unsigned long nr_huge_pages, free_huge_pages, resv_huge_pages;
 static unsigned long surplus_huge_pages;
+static unsigned long nr_overcommit_huge_pages;
 unsigned long max_huge_pages;
+unsigned long sysctl_overcommit_huge_pages;
 static struct list_head hugepage_freelists[MAX_NUMNODES];
 static unsigned int nr_huge_pages_node[MAX_NUMNODES];
 static unsigned int free_huge_pages_node[MAX_NUMNODES];
 static unsigned int surplus_huge_pages_node[MAX_NUMNODES];
 static gfp_t htlb_alloc_mask = GFP_HIGHUSER;
 unsigned long hugepages_treat_as_movable;
-unsigned long nr_overcommit_huge_pages;
 static int hugetlb_next_nid;
 
 /*
@@ -609,8 +610,9 @@
 			struct file *file, void __user *buffer,
 			size_t *length, loff_t *ppos)
 {
-	spin_lock(&hugetlb_lock);
 	proc_doulongvec_minmax(table, write, file, buffer, length, ppos);
+	spin_lock(&hugetlb_lock);
+	nr_overcommit_huge_pages = sysctl_overcommit_huge_pages;
 	spin_unlock(&hugetlb_lock);
 	return 0;
 }

diff --git a/mm/memory.c b/mm/memory.c
index e5628a5..717aa0e 100644
--- a/mm/memory.c
+++ b/mm/memory.c

@@ -989,6 +989,8 @@
 	int i;
 	unsigned int vm_flags;
 
+	if (len <= 0)
+		return 0;
 	/* 
 	 * Require read or write permissions.
 	 * If 'force' is set, we only require the "MAY" flags.

diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 83c69f8..8d246c3 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c

@@ -116,22 +116,51 @@
 /* Do sanity checking on a policy */
 static int mpol_check_policy(int mode, nodemask_t *nodes)
 {
-	int empty = nodes_empty(*nodes);
+	int was_empty, is_empty;
+
+	if (!nodes)
+		return 0;
+
+	/*
+	 * "Contextualize" the in-coming nodemast for cpusets:
+	 * Remember whether in-coming nodemask was empty,  If not,
+	 * restrict the nodes to the allowed nodes in the cpuset.
+	 * This is guaranteed to be a subset of nodes with memory.
+	 */
+	cpuset_update_task_memory_state();
+	is_empty = was_empty = nodes_empty(*nodes);
+	if (!was_empty) {
+		nodes_and(*nodes, *nodes, cpuset_current_mems_allowed);
+		is_empty = nodes_empty(*nodes);	/* after "contextualization" */
+	}
 
 	switch (mode) {
 	case MPOL_DEFAULT:
-		if (!empty)
+		/*
+		 * require caller to specify an empty nodemask
+		 * before "contextualization"
+		 */
+		if (!was_empty)
 			return -EINVAL;
 		break;
 	case MPOL_BIND:
 	case MPOL_INTERLEAVE:
-		/* Preferred will only use the first bit, but allow
-		   more for now. */
-		if (empty)
+		/*
+		 * require at least 1 valid node after "contextualization"
+		 */
+		if (is_empty)
+			return -EINVAL;
+		break;
+	case MPOL_PREFERRED:
+		/*
+		 * Did caller specify invalid nodes?
+		 * Don't silently accept this as "local allocation".
+		 */
+		if (!was_empty && is_empty)
 			return -EINVAL;
 		break;
 	}
- 	return nodes_subset(*nodes, node_states[N_HIGH_MEMORY]) ? 0 : -EINVAL;
+	return 0;
 }
 
 /* Generate a custom zonelist for the BIND policy. */
@@ -188,8 +217,6 @@
 	switch (mode) {
 	case MPOL_INTERLEAVE:
 		policy->v.nodes = *nodes;
-		nodes_and(policy->v.nodes, policy->v.nodes,
-					node_states[N_HIGH_MEMORY]);
 		if (nodes_weight(policy->v.nodes) == 0) {
 			kmem_cache_free(policy_cache, policy);
 			return ERR_PTR(-EINVAL);
@@ -421,18 +448,6 @@
 	return err;
 }
 
-static int contextualize_policy(int mode, nodemask_t *nodes)
-{
-	if (!nodes)
-		return 0;
-
-	cpuset_update_task_memory_state();
-	if (!cpuset_nodes_subset_current_mems_allowed(*nodes))
-		return -EINVAL;
-	return mpol_check_policy(mode, nodes);
-}
-
-
 /*
  * Update task->flags PF_MEMPOLICY bit: set iff non-default
  * mempolicy.  Allows more rapid checking of this (combined perhaps
@@ -468,7 +483,7 @@
 {
 	struct mempolicy *new;
 
-	if (contextualize_policy(mode, nodes))
+	if (mpol_check_policy(mode, nodes))
 		return -EINVAL;
 	new = mpol_new(mode, nodes);
 	if (IS_ERR(new))
@@ -915,10 +930,6 @@
 	err = get_nodes(&nodes, nmask, maxnode);
 	if (err)
 		return err;
-#ifdef CONFIG_CPUSETS
-	/* Restrict the nodes to the allowed nodes in the cpuset */
-	nodes_and(nodes, nodes, current->mems_allowed);
-#endif
 	return do_mbind(start, len, mode, &nodes, flags);
 }
 

diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index d3e4e18..0c2c937 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c

@@ -465,7 +465,7 @@
 	return len;
 }
 
-void fastcall __rfcomm_dlc_throttle(struct rfcomm_dlc *d)
+void __rfcomm_dlc_throttle(struct rfcomm_dlc *d)
 {
 	BT_DBG("dlc %p state %ld", d, d->state);
 
@@ -476,7 +476,7 @@
 	rfcomm_schedule(RFCOMM_SCHED_TX);
 }
 
-void fastcall __rfcomm_dlc_unthrottle(struct rfcomm_dlc *d)
+void __rfcomm_dlc_unthrottle(struct rfcomm_dlc *d)
 {
 	BT_DBG("dlc %p state %ld", d, d->state);
 

diff --git a/net/core/dev.c b/net/core/dev.c
index 9549417..b3e19ae 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c

@@ -2143,7 +2143,7 @@
  *
  * The entry's receive function will be scheduled to run
  */
-void fastcall __napi_schedule(struct napi_struct *n)
+void __napi_schedule(struct napi_struct *n)
 {
 	unsigned long flags;
 
@@ -3038,8 +3038,7 @@
 EXPORT_SYMBOL(dev_unicast_sync);
 
 /**
- *	dev_unicast_unsync - Remove synchronized addresses from the destination
- *			     device
+ *	dev_unicast_unsync - Remove synchronized addresses from the destination device
  *	@to: destination device
  *	@from: source device
  *

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 4e35422..cfc07da 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c

@@ -1907,11 +1907,11 @@
  * of bytes already consumed and the next call to
  * skb_seq_read() will return the remaining part of the block.
  *
- * Note: The size of each block of data returned can be arbitary,
+ * Note 1: The size of each block of data returned can be arbitary,
  *       this limitation is the cost for zerocopy seqeuental
  *       reads of potentially non linear data.
  *
- * Note: Fragment lists within fragments are not implemented
+ * Note 2: Fragment lists within fragments are not implemented
  *       at the moment, state->root_skb could be replaced with
  *       a stack for this purpose.
  */

diff --git a/net/core/sock.c b/net/core/sock.c
index 433715f..09cb3a7 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c

@@ -1731,7 +1731,7 @@
 	atomic_set(&sk->sk_drops, 0);
 }
 
-void fastcall lock_sock_nested(struct sock *sk, int subclass)
+void lock_sock_nested(struct sock *sk, int subclass)
 {
 	might_sleep();
 	spin_lock_bh(&sk->sk_lock.slock);
@@ -1748,7 +1748,7 @@
 
 EXPORT_SYMBOL(lock_sock_nested);
 
-void fastcall release_sock(struct sock *sk)
+void release_sock(struct sock *sk)
 {
 	/*
 	 * The sk_lock has mutex_unlock() semantics:

diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 0998e6d..8c6a7f1 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c

@@ -464,9 +464,9 @@
 
 /**
  * rpc_bind_new_program - bind a new RPC program to an existing client
- * @old - old rpc_client
- * @program - rpc program to set
- * @vers - rpc program version
+ * @old: old rpc_client
+ * @program: rpc program to set
+ * @vers: rpc program version
  *
  * Clones the rpc client and sets up a new RPC program. This is mainly
  * of use for enabling different RPC programs to share the same transport.
@@ -575,7 +575,7 @@
  * @clnt: pointer to RPC client
  * @msg: RPC call parameters
  * @flags: RPC call flags
- * @ops: RPC call ops
+ * @tk_ops: RPC call ops
  * @data: user call data
  */
 int
@@ -610,7 +610,7 @@
  * rpc_peeraddr - extract remote peer address from clnt's xprt
  * @clnt: RPC client structure
  * @buf: target buffer
- * @size: length of target buffer
+ * @bufsize: length of target buffer
  *
  * Returns the number of bytes that are actually in the stored address.
  */

diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 7e19716..0e3ead7 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c

@@ -677,7 +677,7 @@
 /**
  * rpc_mkdir - Create a new directory in rpc_pipefs
  * @path: path from the rpc_pipefs root to the new directory
- * @rpc_clnt: rpc client to associate with this directory
+ * @rpc_client: rpc client to associate with this directory
  *
  * This creates a directory at the given @path associated with
  * @rpc_clnt, which will contain a file named "info" with some basic
@@ -748,6 +748,7 @@
  * @private: private data to associate with the pipe, for the caller's use
  * @ops: operations defining the behavior of the pipe: upcall, downcall,
  *	release_pipe, and destroy_msg.
+ * @flags: rpc_inode flags
  *
  * Data is made available for userspace to read by calls to
  * rpc_queue_upcall().  The actual reads will result in calls to

diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index cfcade9..d5553b8 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c

@@ -124,7 +124,7 @@
 
 /**
  * xprt_unregister_transport - unregister a transport implementation
- * transport: transport to unregister
+ * @transport: transport to unregister
  *
  * Returns:
  * 0:		transport successfully unregistered

diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index 3e32194..0598b22 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c

@@ -159,7 +159,8 @@
 	BUG_ON(sge_count >= 32);
 	dprintk("svcrdma: RDMA_WRITE rmr=%x, to=%llx, xdr_off=%d, "
 		"write_len=%d, xdr_sge=%p, sge_count=%d\n",
-		rmr, to, xdr_off, write_len, xdr_sge, sge_count);
+		rmr, (unsigned long long)to, xdr_off,
+		write_len, xdr_sge, sge_count);
 
 	ctxt = svc_rdma_get_context(xprt);
 	ctxt->count = 0;

diff --git a/samples/markers/probe-example.c b/samples/markers/probe-example.c
index a367975..c8e099d 100644
--- a/samples/markers/probe-example.c
+++ b/samples/markers/probe-example.c

@@ -20,31 +20,27 @@
 	marker_probe_func *probe_func;
 };
 
-void probe_subsystem_event(const struct marker *mdata, void *private,
-	const char *format, ...)
+void probe_subsystem_event(void *probe_data, void *call_data,
+	const char *format, va_list *args)
 {
-	va_list ap;
 	/* Declare args */
 	unsigned int value;
 	const char *mystr;
 
 	/* Assign args */
-	va_start(ap, format);
-	value = va_arg(ap, typeof(value));
-	mystr = va_arg(ap, typeof(mystr));
+	value = va_arg(*args, typeof(value));
+	mystr = va_arg(*args, typeof(mystr));
 
 	/* Call printk */
-	printk(KERN_DEBUG "Value %u, string %s\n", value, mystr);
+	printk(KERN_INFO "Value %u, string %s\n", value, mystr);
 
 	/* or count, check rights, serialize data in a buffer */
-
-	va_end(ap);
 }
 
 atomic_t eventb_count = ATOMIC_INIT(0);
 
-void probe_subsystem_eventb(const struct marker *mdata, void *private,
-	const char *format, ...)
+void probe_subsystem_eventb(void *probe_data, void *call_data,
+	const char *format, va_list *args)
 {
 	/* Increment counter */
 	atomic_inc(&eventb_count);
@@ -72,10 +68,6 @@
 		if (result)
 			printk(KERN_INFO "Unable to register probe %s\n",
 				probe_array[i].name);
-		result = marker_arm(probe_array[i].name);
-		if (result)
-			printk(KERN_INFO "Unable to arm probe %s\n",
-				probe_array[i].name);
 	}
 	return 0;
 }
@@ -85,7 +77,8 @@
 	int i;
 
 	for (i = 0; i < ARRAY_SIZE(probe_array); i++)
-		marker_probe_unregister(probe_array[i].name);
+		marker_probe_unregister(probe_array[i].name,
+			probe_array[i].probe_func, &probe_array[i]);
 	printk(KERN_INFO "Number of event b : %u\n",
 			atomic_read(&eventb_count));
 }

diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include
index da3559e..d64e6ba 100644
--- a/scripts/Kbuild.include
+++ b/scripts/Kbuild.include

@@ -39,10 +39,13 @@
 # - If they are equal no change, and no timestamp update
 # - stdin is piped in from the first prerequisite ($<) so one has
 #   to specify a valid file as first prerequisite (often the kbuild file)
+       chk_filechk = :
  quiet_chk_filechk = echo '  CHK     $@'
 silent_chk_filechk = :
+       upd_filechk = :
  quiet_upd_filechk = echo '  UPD     $@'
 silent_upd_filechk = :
+
 define filechk
 	$(Q)set -e;				\
 	$($(quiet)chk_filechk);			\

diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost
index 65e707e..cfc004e 100644
--- a/scripts/Makefile.modpost
+++ b/scripts/Makefile.modpost

@@ -13,6 +13,7 @@
 # 2) modpost is then used to
 # 3)  create one <module>.mod.c file pr. module
 # 4)  create one Module.symvers file with CRC for all exported symbols
+# 4a) [CONFIG_MARKERS] create one Module.markers file listing defined markers
 # 5) compile all <module>.mod.c files
 # 6) final link of the module to a <module.ko> file
 
@@ -45,6 +46,10 @@
 
 kernelsymfile := $(objtree)/Module.symvers
 modulesymfile := $(firstword $(KBUILD_EXTMOD))/Module.symvers
+kernelmarkersfile := $(objtree)/Module.markers
+modulemarkersfile := $(firstword $(KBUILD_EXTMOD))/Module.markers
+
+markersfile = $(if $(KBUILD_EXTMOD),$(modulemarkersfile),$(kernelmarkersfile))
 
 # Step 1), find all modules listed in $(MODVERDIR)/
 __modules := $(sort $(shell grep -h '\.ko' /dev/null $(wildcard $(MODVERDIR)/*.mod)))
@@ -63,6 +68,8 @@
  $(if $(KBUILD_EXTMOD),-I $(modulesymfile))      \
  $(if $(KBUILD_EXTMOD),-o $(modulesymfile))      \
  $(if $(CONFIG_DEBUG_SECTION_MISMATCH),,-S)      \
+ $(if $(CONFIG_MARKERS),-K $(kernelmarkersfile)) \
+ $(if $(CONFIG_MARKERS),-M $(markersfile))	 \
  $(if $(KBUILD_EXTMOD)$(KBUILD_MODPOST_WARN),-w)
 
 quiet_cmd_modpost = MODPOST $(words $(filter-out vmlinux FORCE, $^)) modules
@@ -82,6 +89,10 @@
 $(symverfile):         __modpost ;
 $(modules:.ko=.mod.c): __modpost ;
 
+ifdef CONFIG_MARKERS
+$(markersfile):	       __modpost ;
+endif
+
 
 # Step 5), compile all *.mod.c files
 

diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index dbe1fb5..6174277 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c

@@ -11,6 +11,8 @@
  * Usage: modpost vmlinux module1.o module2.o ...
  */
 
+#define _GNU_SOURCE
+#include <stdio.h>
 #include <ctype.h>
 #include "modpost.h"
 #include "../../include/linux/license.h"
@@ -435,6 +437,8 @@
 			info->export_unused_gpl_sec = i;
 		else if (strcmp(secname, "__ksymtab_gpl_future") == 0)
 			info->export_gpl_future_sec = i;
+		else if (strcmp(secname, "__markers_strings") == 0)
+			info->markers_strings_sec = i;
 
 		if (sechdrs[i].sh_type != SHT_SYMTAB)
 			continue;
@@ -1470,6 +1474,62 @@
 	}
 }
 
+static void get_markers(struct elf_info *info, struct module *mod)
+{
+	const Elf_Shdr *sh = &info->sechdrs[info->markers_strings_sec];
+	const char *strings = (const char *) info->hdr + sh->sh_offset;
+	const Elf_Sym *sym, *first_sym, *last_sym;
+	size_t n;
+
+	if (!info->markers_strings_sec)
+		return;
+
+	/*
+	 * First count the strings.  We look for all the symbols defined
+	 * in the __markers_strings section named __mstrtab_*.  For
+	 * these local names, the compiler puts a random .NNN suffix on,
+	 * so the names don't correspond exactly.
+	 */
+	first_sym = last_sym = NULL;
+	n = 0;
+	for (sym = info->symtab_start; sym < info->symtab_stop; sym++)
+		if (ELF_ST_TYPE(sym->st_info) == STT_OBJECT &&
+		    sym->st_shndx == info->markers_strings_sec &&
+		    !strncmp(info->strtab + sym->st_name,
+			     "__mstrtab_", sizeof "__mstrtab_" - 1)) {
+			if (first_sym == NULL)
+				first_sym = sym;
+			last_sym = sym;
+			++n;
+		}
+
+	if (n == 0)
+		return;
+
+	/*
+	 * Now collect each name and format into a line for the output.
+	 * Lines look like:
+	 *	marker_name	vmlinux	marker %s format %d
+	 * The format string after the second \t can use whitespace.
+	 */
+	mod->markers = NOFAIL(malloc(sizeof mod->markers[0] * n));
+	mod->nmarkers = n;
+
+	n = 0;
+	for (sym = first_sym; sym <= last_sym; sym++)
+		if (ELF_ST_TYPE(sym->st_info) == STT_OBJECT &&
+		    sym->st_shndx == info->markers_strings_sec &&
+		    !strncmp(info->strtab + sym->st_name,
+			     "__mstrtab_", sizeof "__mstrtab_" - 1)) {
+			const char *name = strings + sym->st_value;
+			const char *fmt = strchr(name, '\0') + 1;
+			char *line = NULL;
+			asprintf(&line, "%s\t%s\t%s\n", name, mod->name, fmt);
+			NOFAIL(line);
+			mod->markers[n++] = line;
+		}
+}
+
 static void read_symbols(char *modname)
 {
 	const char *symname;
@@ -1521,6 +1581,8 @@
 		get_src_version(modname, mod->srcversion,
 				sizeof(mod->srcversion)-1);
 
+	get_markers(&info, mod);
+
 	parse_elf_finish(&info);
 
 	/* Our trick to get versioning for struct_module - it's
@@ -1867,16 +1929,104 @@
 	write_if_changed(&buf, fname);
 }
 
+static void add_marker(struct module *mod, const char *name, const char *fmt)
+{
+	char *line = NULL;
+	asprintf(&line, "%s\t%s\t%s\n", name, mod->name, fmt);
+	NOFAIL(line);
+
+	mod->markers = NOFAIL(realloc(mod->markers, ((mod->nmarkers + 1) *
+						     sizeof mod->markers[0])));
+	mod->markers[mod->nmarkers++] = line;
+}
+
+static void read_markers(const char *fname)
+{
+	unsigned long size, pos = 0;
+	void *file = grab_file(fname, &size);
+	char *line;
+
+	if (!file)		/* No old markers, silently ignore */
+		return;
+
+	while ((line = get_next_line(&pos, file, size))) {
+		char *marker, *modname, *fmt;
+		struct module *mod;
+
+		marker = line;
+		modname = strchr(marker, '\t');
+		if (!modname)
+			goto fail;
+		*modname++ = '\0';
+		fmt = strchr(modname, '\t');
+		if (!fmt)
+			goto fail;
+		*fmt++ = '\0';
+		if (*marker == '\0' || *modname == '\0')
+			goto fail;
+
+		mod = find_module(modname);
+		if (!mod) {
+			if (is_vmlinux(modname))
+				have_vmlinux = 1;
+			mod = new_module(NOFAIL(strdup(modname)));
+			mod->skip = 1;
+		}
+
+		add_marker(mod, marker, fmt);
+	}
+	return;
+fail:
+	fatal("parse error in markers list file\n");
+}
+
+static int compare_strings(const void *a, const void *b)
+{
+	return strcmp(*(const char **) a, *(const char **) b);
+}
+
+static void write_markers(const char *fname)
+{
+	struct buffer buf = { };
+	struct module *mod;
+	size_t i;
+
+	for (mod = modules; mod; mod = mod->next)
+		if ((!external_module || !mod->skip) && mod->markers != NULL) {
+			/*
+			 * Sort the strings so we can skip duplicates when
+			 * we write them out.
+			 */
+			qsort(mod->markers, mod->nmarkers,
+			      sizeof mod->markers[0], &compare_strings);
+			for (i = 0; i < mod->nmarkers; ++i) {
+				char *line = mod->markers[i];
+				buf_write(&buf, line, strlen(line));
+				while (i + 1 < mod->nmarkers &&
+				       !strcmp(mod->markers[i],
+					       mod->markers[i + 1]))
+					free(mod->markers[i++]);
+				free(mod->markers[i]);
+			}
+			free(mod->markers);
+			mod->markers = NULL;
+		}
+
+	write_if_changed(&buf, fname);
+}
+
 int main(int argc, char **argv)
 {
 	struct module *mod;
 	struct buffer buf = { };
 	char *kernel_read = NULL, *module_read = NULL;
 	char *dump_write = NULL;
+	char *markers_read = NULL;
+	char *markers_write = NULL;
 	int opt;
 	int err;
 
-	while ((opt = getopt(argc, argv, "i:I:msSo:aw")) != -1) {
+	while ((opt = getopt(argc, argv, "i:I:msSo:awM:K:")) != -1) {
 		switch (opt) {
 		case 'i':
 			kernel_read = optarg;
@@ -1903,6 +2053,12 @@
 		case 'w':
 			warn_unresolved = 1;
 			break;
+			case 'M':
+				markers_write = optarg;
+				break;
+			case 'K':
+				markers_read = optarg;
+				break;
 		default:
 			exit(1);
 		}
@@ -1950,5 +2106,11 @@
 		     "'make CONFIG_DEBUG_SECTION_MISMATCH=y'\n",
 		     sec_mismatch_count);
 
+	if (markers_read)
+		read_markers(markers_read);
+
+	if (markers_write)
+		write_markers(markers_write);
+
 	return err;
 }

diff --git a/scripts/mod/modpost.h b/scripts/mod/modpost.h
index 999f15e..565c587 100644
--- a/scripts/mod/modpost.h
+++ b/scripts/mod/modpost.h

@@ -112,6 +112,8 @@
 	int has_init;
 	int has_cleanup;
 	struct buffer dev_table_buf;
+	char **markers;
+	size_t nmarkers;
 	char	     srcversion[25];
 };
 
@@ -126,6 +128,7 @@
 	Elf_Section  export_gpl_sec;
 	Elf_Section  export_unused_gpl_sec;
 	Elf_Section  export_gpl_future_sec;
+	Elf_Section  markers_strings_sec;
 	const char   *strtab;
 	char	     *modinfo;
 	unsigned int modinfo_len;

diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index e5ed075..44f16d9 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c

@@ -1272,12 +1272,18 @@
 			    SECCLASS_PROCESS, perms, NULL);
 }
 
+#if CAP_LAST_CAP > 63
+#error Fix SELinux to handle capabilities > 63.
+#endif
+
 /* Check whether a task is allowed to use a capability. */
 static int task_has_capability(struct task_struct *tsk,
 			       int cap)
 {
 	struct task_security_struct *tsec;
 	struct avc_audit_data ad;
+	u16 sclass;
+	u32 av = CAP_TO_MASK(cap);
 
 	tsec = tsk->security;
 
@@ -1285,8 +1291,19 @@
 	ad.tsk = tsk;
 	ad.u.cap = cap;
 
-	return avc_has_perm(tsec->sid, tsec->sid,
-			    SECCLASS_CAPABILITY, CAP_TO_MASK(cap), &ad);
+	switch (CAP_TO_INDEX(cap)) {
+	case 0:
+		sclass = SECCLASS_CAPABILITY;
+		break;
+	case 1:
+		sclass = SECCLASS_CAPABILITY2;
+		break;
+	default:
+		printk(KERN_ERR
+		       "SELinux:  out of range capability %d\n", cap);
+		BUG();
+	}
+	return avc_has_perm(tsec->sid, tsec->sid, sclass, av, &ad);
 }
 
 /* Check whether a task is allowed to use a system operation. */

diff --git a/security/selinux/include/av_perm_to_string.h b/security/selinux/include/av_perm_to_string.h
index 399f868..d569669 100644
--- a/security/selinux/include/av_perm_to_string.h
+++ b/security/selinux/include/av_perm_to_string.h

@@ -132,6 +132,9 @@
    S_(SECCLASS_CAPABILITY, CAPABILITY__LEASE, "lease")
    S_(SECCLASS_CAPABILITY, CAPABILITY__AUDIT_WRITE, "audit_write")
    S_(SECCLASS_CAPABILITY, CAPABILITY__AUDIT_CONTROL, "audit_control")
+   S_(SECCLASS_CAPABILITY, CAPABILITY__SETFCAP, "setfcap")
+   S_(SECCLASS_CAPABILITY2, CAPABILITY2__MAC_OVERRIDE, "mac_override")
+   S_(SECCLASS_CAPABILITY2, CAPABILITY2__MAC_ADMIN, "mac_admin")
    S_(SECCLASS_NETLINK_ROUTE_SOCKET, NETLINK_ROUTE_SOCKET__NLMSG_READ, "nlmsg_read")
    S_(SECCLASS_NETLINK_ROUTE_SOCKET, NETLINK_ROUTE_SOCKET__NLMSG_WRITE, "nlmsg_write")
    S_(SECCLASS_NETLINK_FIREWALL_SOCKET, NETLINK_FIREWALL_SOCKET__NLMSG_READ, "nlmsg_read")

diff --git a/security/selinux/include/av_permissions.h b/security/selinux/include/av_permissions.h
index 84c9abc..75b41311a 100644
--- a/security/selinux/include/av_permissions.h
+++ b/security/selinux/include/av_permissions.h

@@ -533,6 +533,9 @@
 #define CAPABILITY__LEASE                         0x10000000UL
 #define CAPABILITY__AUDIT_WRITE                   0x20000000UL
 #define CAPABILITY__AUDIT_CONTROL                 0x40000000UL
+#define CAPABILITY__SETFCAP                       0x80000000UL
+#define CAPABILITY2__MAC_OVERRIDE                 0x00000001UL
+#define CAPABILITY2__MAC_ADMIN                    0x00000002UL
 #define NETLINK_ROUTE_SOCKET__IOCTL               0x00000001UL
 #define NETLINK_ROUTE_SOCKET__READ                0x00000002UL
 #define NETLINK_ROUTE_SOCKET__WRITE               0x00000004UL

diff --git a/security/selinux/include/class_to_string.h b/security/selinux/include/class_to_string.h
index b1b0d1d..bd813c3 100644
--- a/security/selinux/include/class_to_string.h
+++ b/security/selinux/include/class_to_string.h

@@ -71,3 +71,4 @@
     S_(NULL)
     S_(NULL)
     S_("peer")
+    S_("capability2")

diff --git a/security/selinux/include/flask.h b/security/selinux/include/flask.h
index 09e9dd23..febf886 100644
--- a/security/selinux/include/flask.h
+++ b/security/selinux/include/flask.h

@@ -51,6 +51,7 @@
 #define SECCLASS_DCCP_SOCKET                             60
 #define SECCLASS_MEMPROTECT                              61
 #define SECCLASS_PEER                                    68
+#define SECCLASS_CAPABILITY2                             69
 
 /*
  * Security identifier indices for initial entities

diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index 1c11e42..5b69048 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c

@@ -701,7 +701,7 @@
 		return -EOPNOTSUPP;
 
 	sock = SOCKET_I(ip);
-	if (sock == NULL)
+	if (sock == NULL || sock->sk == NULL)
 		return -EOPNOTSUPP;
 
 	ssp = sock->sk->sk_security;
@@ -1280,10 +1280,11 @@
  */
 static int smack_netlabel(struct sock *sk)
 {
-	struct socket_smack *ssp = sk->sk_security;
+	struct socket_smack *ssp;
 	struct netlbl_lsm_secattr secattr;
 	int rc = 0;
 
+	ssp = sk->sk_security;
 	netlbl_secattr_init(&secattr);
 	smack_to_secattr(ssp->smk_out, &secattr);
 	if (secattr.flags != NETLBL_SECATTR_NONE)
@@ -1331,7 +1332,7 @@
 		return -EOPNOTSUPP;
 
 	sock = SOCKET_I(inode);
-	if (sock == NULL)
+	if (sock == NULL || sock->sk == NULL)
 		return -EOPNOTSUPP;
 
 	ssp = sock->sk->sk_security;
@@ -1362,7 +1363,7 @@
 static int smack_socket_post_create(struct socket *sock, int family,
 				    int type, int protocol, int kern)
 {
-	if (family != PF_INET)
+	if (family != PF_INET || sock->sk == NULL)
 		return 0;
 	/*
 	 * Set the outbound netlbl.
commit	e760e716d47b48caf98da348368fd41b4a9b9e7e	[log] [tgz]
author	Linus Torvalds <torvalds@woody.linux-foundation.org>	Wed Feb 13 16:23:44 2008 -0800
committer	Linus Torvalds <torvalds@woody.linux-foundation.org>	Wed Feb 13 16:23:44 2008 -0800
tree	92d401fdbc618a4bdf4afe7ae5ee509e09dda0e6
parent	b2e3e658b344c6bcfb8fb694100ab2f2b5b2edb0 [diff]
parent	99109301d103fbf0de43fc5a580a406c12a501e0 [diff]