Merge tag 'v6.6-vfs.super.fixes.2' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs

Pull more superblock follow-on fixes from Christian Brauner:
 "This contains two more small follow-up fixes for the super work this
  cycle. I went through all filesystems once more and detected two minor
  issues that still needed fixing:

   - Some filesystems support mtd devices (e.g., mount -t jffs2 mtd2
     /mnt). The mtd infrastructure uses the sb->s_mtd pointer to find an
     existing superblock. When the mtd device is put and sb->s_mtd
     cleared the superblock can still be found fs_supers and so this
     risks a use-after-free.

     Add a small patch that aligns mtd with what we did for regular
     block devices and switch keying to rely on sb->s_dev.

     (This was tested with mtd devices and jffs2 as xfstests doesn't
     support mtd devices.)

   - Switch nfs back to rely on kill_anon_super() so the superblock is
     removed from the list of active supers before sb->s_fs_info is
     freed"

* tag 'v6.6-vfs.super.fixes.2' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs:
  NFS: switch back to using kill_anon_super
  mtd: key superblock by device number
  fs: export sget_dev()
diff --git a/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_gpci b/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_gpci
index 12e2bf9..40f7cd2 100644
--- a/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_gpci
+++ b/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_gpci
@@ -80,3 +80,163 @@
 Description:	read only
 		This sysfs file exposes the cpumask which is designated to make
 		HCALLs to retrieve hv-gpci pmu event counter data.
+
+What:		/sys/devices/hv_gpci/interface/processor_bus_topology
+Date:		July 2023
+Contact:	Linux on PowerPC Developer List <linuxppc-dev@lists.ozlabs.org>
+Description:	admin read only
+		This sysfs file exposes the system topology information by making HCALL
+		H_GET_PERF_COUNTER_INFO. The HCALL is made with counter request value
+		PROCESSOR_BUS_TOPOLOGY(0xD0).
+
+		* This sysfs file will be created only for power10 and above platforms.
+
+		* User needs root privileges to read data from this sysfs file.
+
+		* This sysfs file will be created, only when the HCALL returns "H_SUCCESS",
+		  "H_AUTHORITY" or "H_PARAMETER" as the return type.
+
+		  HCALL with return error type "H_AUTHORITY" can be resolved during
+		  runtime by setting "Enable Performance Information Collection" option.
+
+		* The end user reading this sysfs file must decode the content as per
+		  underlying platform/firmware.
+
+		Possible error codes while reading this sysfs file:
+
+		* "-EPERM" : Partition is not permitted to retrieve performance information,
+			    required to set "Enable Performance Information Collection" option.
+
+		* "-EIO" : Can't retrieve system information because of invalid buffer length/invalid address
+			   or because of some hardware error. Refer to getPerfCountInfo documentation for
+			   more information.
+
+		* "-EFBIG" : System information exceeds PAGE_SIZE.
+
+What:		/sys/devices/hv_gpci/interface/processor_config
+Date:		July 2023
+Contact:	Linux on PowerPC Developer List <linuxppc-dev@lists.ozlabs.org>
+Description:	admin read only
+		This sysfs file exposes the system topology information by making HCALL
+		H_GET_PERF_COUNTER_INFO. The HCALL is made with counter request value
+		PROCESSOR_CONFIG(0x90).
+
+		* This sysfs file will be created only for power10 and above platforms.
+
+		* User needs root privileges to read data from this sysfs file.
+
+		* This sysfs file will be created, only when the HCALL returns "H_SUCCESS",
+		  "H_AUTHORITY" or "H_PARAMETER" as the return type.
+
+		  HCALL with return error type "H_AUTHORITY" can be resolved during
+		  runtime by setting "Enable Performance Information Collection" option.
+
+		* The end user reading this sysfs file must decode the content as per
+		  underlying platform/firmware.
+
+		Possible error codes while reading this sysfs file:
+
+		* "-EPERM" : Partition is not permitted to retrieve performance information,
+			    required to set "Enable Performance Information Collection" option.
+
+		* "-EIO" : Can't retrieve system information because of invalid buffer length/invalid address
+			   or because of some hardware error. Refer to getPerfCountInfo documentation for
+			   more information.
+
+		* "-EFBIG" : System information exceeds PAGE_SIZE.
+
+What:		/sys/devices/hv_gpci/interface/affinity_domain_via_virtual_processor
+Date:		July 2023
+Contact:	Linux on PowerPC Developer List <linuxppc-dev@lists.ozlabs.org>
+Description:	admin read only
+		This sysfs file exposes the system topology information by making HCALL
+		H_GET_PERF_COUNTER_INFO. The HCALL is made with counter request value
+		AFFINITY_DOMAIN_INFORMATION_BY_VIRTUAL_PROCESSOR(0xA0).
+
+		* This sysfs file will be created only for power10 and above platforms.
+
+		* User needs root privileges to read data from this sysfs file.
+
+		* This sysfs file will be created, only when the HCALL returns "H_SUCCESS",
+		  "H_AUTHORITY" or "H_PARAMETER" as the return type.
+
+		  HCALL with return error type "H_AUTHORITY" can be resolved during
+		  runtime by setting "Enable Performance Information Collection" option.
+
+		* The end user reading this sysfs file must decode the content as per
+		  underlying platform/firmware.
+
+		Possible error codes while reading this sysfs file:
+
+		* "-EPERM" : Partition is not permitted to retrieve performance information,
+			    required to set "Enable Performance Information Collection" option.
+
+		* "-EIO" : Can't retrieve system information because of invalid buffer length/invalid address
+			   or because of some hardware error. Refer to getPerfCountInfo documentation for
+			   more information.
+
+		* "-EFBIG" : System information exceeds PAGE_SIZE.
+
+What:		/sys/devices/hv_gpci/interface/affinity_domain_via_domain
+Date:		July 2023
+Contact:	Linux on PowerPC Developer List <linuxppc-dev@lists.ozlabs.org>
+Description:	admin read only
+		This sysfs file exposes the system topology information by making HCALL
+		H_GET_PERF_COUNTER_INFO. The HCALL is made with counter request value
+		AFFINITY_DOMAIN_INFORMATION_BY_DOMAIN(0xB0).
+
+		* This sysfs file will be created only for power10 and above platforms.
+
+		* User needs root privileges to read data from this sysfs file.
+
+		* This sysfs file will be created, only when the HCALL returns "H_SUCCESS",
+		  "H_AUTHORITY" or "H_PARAMETER" as the return type.
+
+		  HCALL with return error type "H_AUTHORITY" can be resolved during
+		  runtime by setting "Enable Performance Information Collection" option.
+
+		* The end user reading this sysfs file must decode the content as per
+		  underlying platform/firmware.
+
+		Possible error codes while reading this sysfs file:
+
+		* "-EPERM" : Partition is not permitted to retrieve performance information,
+			    required to set "Enable Performance Information Collection" option.
+
+		* "-EIO" : Can't retrieve system information because of invalid buffer length/invalid address
+			   or because of some hardware error. Refer to getPerfCountInfo documentation for
+			   more information.
+
+		* "-EFBIG" : System information exceeds PAGE_SIZE.
+
+What:		/sys/devices/hv_gpci/interface/affinity_domain_via_partition
+Date:		July 2023
+Contact:	Linux on PowerPC Developer List <linuxppc-dev@lists.ozlabs.org>
+Description:	admin read only
+		This sysfs file exposes the system topology information by making HCALL
+		H_GET_PERF_COUNTER_INFO. The HCALL is made with counter request value
+		AFFINITY_DOMAIN_INFORMATION_BY_PARTITION(0xB1).
+
+		* This sysfs file will be created only for power10 and above platforms.
+
+		* User needs root privileges to read data from this sysfs file.
+
+		* This sysfs file will be created, only when the HCALL returns "H_SUCCESS",
+		  "H_AUTHORITY" or "H_PARAMETER" as the return type.
+
+		  HCALL with return error type "H_AUTHORITY" can be resolved during
+		  runtime by setting "Enable Performance Information Collection" option.
+
+		* The end user reading this sysfs file must decode the content as per
+		  underlying platform/firmware.
+
+		Possible error codes while reading this sysfs file:
+
+		* "-EPERM" : Partition is not permitted to retrieve performance information,
+			    required to set "Enable Performance Information Collection" option.
+
+		* "-EIO" : Can't retrieve system information because of invalid buffer length/invalid address
+			   or because of some hardware error. Refer to getPerfCountInfo documentation for
+			   more information.
+
+		* "-EFBIG" : System information exceeds PAGE_SIZE.
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 0b739a3..fcf79ac 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -3753,7 +3753,7 @@
 
 	nohibernate	[HIBERNATION] Disable hibernation and resume.
 
-	nohlt		[ARM,ARM64,MICROBLAZE,MIPS,SH] Forces the kernel to
+	nohlt		[ARM,ARM64,MICROBLAZE,MIPS,PPC,SH] Forces the kernel to
 			busy wait in do_idle() and not use the arch_cpu_idle()
 			implementation; requires CONFIG_GENERIC_IDLE_POLL_SETUP
 			to be effective. This is useful on platforms where the
@@ -3889,10 +3889,10 @@
 	nosmp		[SMP] Tells an SMP kernel to act as a UP kernel,
 			and disable the IO APIC.  legacy for "maxcpus=0".
 
-	nosmt		[KNL,MIPS,S390] Disable symmetric multithreading (SMT).
+	nosmt		[KNL,MIPS,PPC,S390] Disable symmetric multithreading (SMT).
 			Equivalent to smt=1.
 
-			[KNL,X86] Disable symmetric multithreading (SMT).
+			[KNL,X86,PPC] Disable symmetric multithreading (SMT).
 			nosmt=force: Force disable SMT, cannot be undone
 				     via the sysfs control file.
 
diff --git a/Documentation/arch/x86/index.rst b/Documentation/arch/x86/index.rst
index c73d133..8ac64d7 100644
--- a/Documentation/arch/x86/index.rst
+++ b/Documentation/arch/x86/index.rst
@@ -22,6 +22,7 @@
    mtrr
    pat
    intel-hfi
+   shstk
    iommu
    intel_txt
    amd-memory-encryption
diff --git a/Documentation/arch/x86/shstk.rst b/Documentation/arch/x86/shstk.rst
new file mode 100644
index 0000000..60260e8
--- /dev/null
+++ b/Documentation/arch/x86/shstk.rst
@@ -0,0 +1,179 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+======================================================
+Control-flow Enforcement Technology (CET) Shadow Stack
+======================================================
+
+CET Background
+==============
+
+Control-flow Enforcement Technology (CET) covers several related x86 processor
+features that provide protection against control flow hijacking attacks. CET
+can protect both applications and the kernel.
+
+CET introduces shadow stack and indirect branch tracking (IBT). A shadow stack
+is a secondary stack allocated from memory which cannot be directly modified by
+applications. When executing a CALL instruction, the processor pushes the
+return address to both the normal stack and the shadow stack. Upon
+function return, the processor pops the shadow stack copy and compares it
+to the normal stack copy. If the two differ, the processor raises a
+control-protection fault. IBT verifies indirect CALL/JMP targets are intended
+as marked by the compiler with 'ENDBR' opcodes. Not all CPU's have both Shadow
+Stack and Indirect Branch Tracking. Today in the 64-bit kernel, only userspace
+shadow stack and kernel IBT are supported.
+
+Requirements to use Shadow Stack
+================================
+
+To use userspace shadow stack you need HW that supports it, a kernel
+configured with it and userspace libraries compiled with it.
+
+The kernel Kconfig option is X86_USER_SHADOW_STACK.  When compiled in, shadow
+stacks can be disabled at runtime with the kernel parameter: nousershstk.
+
+To build a user shadow stack enabled kernel, Binutils v2.29 or LLVM v6 or later
+are required.
+
+At run time, /proc/cpuinfo shows CET features if the processor supports
+CET. "user_shstk" means that userspace shadow stack is supported on the current
+kernel and HW.
+
+Application Enabling
+====================
+
+An application's CET capability is marked in its ELF note and can be verified
+from readelf/llvm-readelf output::
+
+    readelf -n <application> | grep -a SHSTK
+        properties: x86 feature: SHSTK
+
+The kernel does not process these applications markers directly. Applications
+or loaders must enable CET features using the interface described in section 4.
+Typically this would be done in dynamic loader or static runtime objects, as is
+the case in GLIBC.
+
+Enabling arch_prctl()'s
+=======================
+
+Elf features should be enabled by the loader using the below arch_prctl's. They
+are only supported in 64 bit user applications. These operate on the features
+on a per-thread basis. The enablement status is inherited on clone, so if the
+feature is enabled on the first thread, it will propagate to all the thread's
+in an app.
+
+arch_prctl(ARCH_SHSTK_ENABLE, unsigned long feature)
+    Enable a single feature specified in 'feature'. Can only operate on
+    one feature at a time.
+
+arch_prctl(ARCH_SHSTK_DISABLE, unsigned long feature)
+    Disable a single feature specified in 'feature'. Can only operate on
+    one feature at a time.
+
+arch_prctl(ARCH_SHSTK_LOCK, unsigned long features)
+    Lock in features at their current enabled or disabled status. 'features'
+    is a mask of all features to lock. All bits set are processed, unset bits
+    are ignored. The mask is ORed with the existing value. So any feature bits
+    set here cannot be enabled or disabled afterwards.
+
+arch_prctl(ARCH_SHSTK_UNLOCK, unsigned long features)
+    Unlock features. 'features' is a mask of all features to unlock. All
+    bits set are processed, unset bits are ignored. Only works via ptrace.
+
+arch_prctl(ARCH_SHSTK_STATUS, unsigned long addr)
+    Copy the currently enabled features to the address passed in addr. The
+    features are described using the bits passed into the others in
+    'features'.
+
+The return values are as follows. On success, return 0. On error, errno can
+be::
+
+        -EPERM if any of the passed feature are locked.
+        -ENOTSUPP if the feature is not supported by the hardware or
+         kernel.
+        -EINVAL arguments (non existing feature, etc)
+        -EFAULT if could not copy information back to userspace
+
+The feature's bits supported are::
+
+    ARCH_SHSTK_SHSTK - Shadow stack
+    ARCH_SHSTK_WRSS  - WRSS
+
+Currently shadow stack and WRSS are supported via this interface. WRSS
+can only be enabled with shadow stack, and is automatically disabled
+if shadow stack is disabled.
+
+Proc Status
+===========
+To check if an application is actually running with shadow stack, the
+user can read the /proc/$PID/status. It will report "wrss" or "shstk"
+depending on what is enabled. The lines look like this::
+
+    x86_Thread_features: shstk wrss
+    x86_Thread_features_locked: shstk wrss
+
+Implementation of the Shadow Stack
+==================================
+
+Shadow Stack Size
+-----------------
+
+A task's shadow stack is allocated from memory to a fixed size of
+MIN(RLIMIT_STACK, 4 GB). In other words, the shadow stack is allocated to
+the maximum size of the normal stack, but capped to 4 GB. In the case
+of the clone3 syscall, there is a stack size passed in and shadow stack
+uses this instead of the rlimit.
+
+Signal
+------
+
+The main program and its signal handlers use the same shadow stack. Because
+the shadow stack stores only return addresses, a large shadow stack covers
+the condition that both the program stack and the signal alternate stack run
+out.
+
+When a signal happens, the old pre-signal state is pushed on the stack. When
+shadow stack is enabled, the shadow stack specific state is pushed onto the
+shadow stack. Today this is only the old SSP (shadow stack pointer), pushed
+in a special format with bit 63 set. On sigreturn this old SSP token is
+verified and restored by the kernel. The kernel will also push the normal
+restorer address to the shadow stack to help userspace avoid a shadow stack
+violation on the sigreturn path that goes through the restorer.
+
+So the shadow stack signal frame format is as follows::
+
+    |1...old SSP| - Pointer to old pre-signal ssp in sigframe token format
+                    (bit 63 set to 1)
+    |        ...| - Other state may be added in the future
+
+
+32 bit ABI signals are not supported in shadow stack processes. Linux prevents
+32 bit execution while shadow stack is enabled by the allocating shadow stacks
+outside of the 32 bit address space. When execution enters 32 bit mode, either
+via far call or returning to userspace, a #GP is generated by the hardware
+which, will be delivered to the process as a segfault. When transitioning to
+userspace the register's state will be as if the userspace ip being returned to
+caused the segfault.
+
+Fork
+----
+
+The shadow stack's vma has VM_SHADOW_STACK flag set; its PTEs are required
+to be read-only and dirty. When a shadow stack PTE is not RO and dirty, a
+shadow access triggers a page fault with the shadow stack access bit set
+in the page fault error code.
+
+When a task forks a child, its shadow stack PTEs are copied and both the
+parent's and the child's shadow stack PTEs are cleared of the dirty bit.
+Upon the next shadow stack access, the resulting shadow stack page fault
+is handled by page copy/re-use.
+
+When a pthread child is created, the kernel allocates a new shadow stack
+for the new thread. New shadow stack creation behaves like mmap() with respect
+to ASLR behavior. Similarly, on thread exit the thread's shadow stack is
+disabled.
+
+Exec
+----
+
+On exec, shadow stack features are disabled by the kernel. At which point,
+userspace can choose to re-enable, or lock them.
diff --git a/Documentation/filesystems/proc.rst b/Documentation/filesystems/proc.rst
index d6109c7..1f96155 100644
--- a/Documentation/filesystems/proc.rst
+++ b/Documentation/filesystems/proc.rst
@@ -566,6 +566,7 @@
     mt    arm64 MTE allocation tags are enabled
     um    userfaultfd missing tracking
     uw    userfaultfd wr-protect tracking
+    ss    shadow stack page
     ==    =======================================
 
 Note that there is no guarantee that every flag and associated mnemonic will
diff --git a/Documentation/mm/arch_pgtable_helpers.rst b/Documentation/mm/arch_pgtable_helpers.rst
index af3891f..c82e3ee 100644
--- a/Documentation/mm/arch_pgtable_helpers.rst
+++ b/Documentation/mm/arch_pgtable_helpers.rst
@@ -46,7 +46,11 @@
 +---------------------------+--------------------------------------------------+
 | pte_mkclean               | Creates a clean PTE                              |
 +---------------------------+--------------------------------------------------+
-| pte_mkwrite               | Creates a writable PTE                           |
+| pte_mkwrite               | Creates a writable PTE of the type specified by  |
+|                           | the VMA.                                         |
++---------------------------+--------------------------------------------------+
+| pte_mkwrite_novma         | Creates a writable PTE, of the conventional type |
+|                           | of writable.                                     |
 +---------------------------+--------------------------------------------------+
 | pte_wrprotect             | Creates a write protected PTE                    |
 +---------------------------+--------------------------------------------------+
@@ -118,7 +122,11 @@
 +---------------------------+--------------------------------------------------+
 | pmd_mkclean               | Creates a clean PMD                              |
 +---------------------------+--------------------------------------------------+
-| pmd_mkwrite               | Creates a writable PMD                           |
+| pmd_mkwrite               | Creates a writable PMD of the type specified by  |
+|                           | the VMA.                                         |
++---------------------------+--------------------------------------------------+
+| pmd_mkwrite_novma         | Creates a writable PMD, of the conventional type |
+|                           | of writable.                                     |
 +---------------------------+--------------------------------------------------+
 | pmd_wrprotect             | Creates a write protected PMD                    |
 +---------------------------+--------------------------------------------------+
diff --git a/Documentation/powerpc/ptrace.rst b/Documentation/powerpc/ptrace.rst
index 77725d6..5629edf 100644
--- a/Documentation/powerpc/ptrace.rst
+++ b/Documentation/powerpc/ptrace.rst
@@ -15,7 +15,7 @@
 that GDB doesn't need to special-case each of them. We added the
 following 3 new ptrace requests.
 
-1. PTRACE_PPC_GETHWDEBUGINFO
+1. PPC_PTRACE_GETHWDBGINFO
 ============================
 
 Query for GDB to discover the hardware debug features. The main info to
@@ -48,7 +48,7 @@
   #define PPC_DEBUG_FEATURE_DATA_BP_DAWR		0x10
   #define PPC_DEBUG_FEATURE_DATA_BP_ARCH_31		0x20
 
-2. PTRACE_SETHWDEBUG
+2. PPC_PTRACE_SETHWDEBUG
 
 Sets a hardware breakpoint or watchpoint, according to the provided structure::
 
@@ -88,7 +88,7 @@
 are not contemplated, but that is out of the scope of this work.
 
 ptrace will return an integer (handle) uniquely identifying the breakpoint or
-watchpoint just created. This integer will be used in the PTRACE_DELHWDEBUG
+watchpoint just created. This integer will be used in the PPC_PTRACE_DELHWDEBUG
 request to ask for its removal. Return -ENOSPC if the requested breakpoint
 can't be allocated on the registers.
 
@@ -150,7 +150,7 @@
     p.addr2           = (uint64_t) end_range;
     p.condition_value = 0;
 
-3. PTRACE_DELHWDEBUG
+3. PPC_PTRACE_DELHWDEBUG
 
 Takes an integer which identifies an existing breakpoint or watchpoint
 (i.e., the value returned from PTRACE_SETHWDEBUG), and deletes the
diff --git a/arch/Kconfig b/arch/Kconfig
index ec49c01..12d5149 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -931,6 +931,14 @@
 config ARCH_WANT_HUGE_PMD_SHARE
 	bool
 
+# Archs that want to use pmd_mkwrite on kernel memory need it defined even
+# if there are no userspace memory management features that use it
+config ARCH_WANT_KERNEL_PMD_MKWRITE
+	bool
+
+config ARCH_WANT_PMD_MKWRITE
+	def_bool TRANSPARENT_HUGEPAGE || ARCH_WANT_KERNEL_PMD_MKWRITE
+
 config HAVE_ARCH_SOFT_DIRTY
 	bool
 
diff --git a/arch/alpha/include/asm/pgtable.h b/arch/alpha/include/asm/pgtable.h
index 747b5f7..635f0a5 100644
--- a/arch/alpha/include/asm/pgtable.h
+++ b/arch/alpha/include/asm/pgtable.h
@@ -256,7 +256,7 @@ extern inline int pte_young(pte_t pte)		{ return pte_val(pte) & _PAGE_ACCESSED;
 extern inline pte_t pte_wrprotect(pte_t pte)	{ pte_val(pte) |= _PAGE_FOW; return pte; }
 extern inline pte_t pte_mkclean(pte_t pte)	{ pte_val(pte) &= ~(__DIRTY_BITS); return pte; }
 extern inline pte_t pte_mkold(pte_t pte)	{ pte_val(pte) &= ~(__ACCESS_BITS); return pte; }
-extern inline pte_t pte_mkwrite(pte_t pte)	{ pte_val(pte) &= ~_PAGE_FOW; return pte; }
+extern inline pte_t pte_mkwrite_novma(pte_t pte){ pte_val(pte) &= ~_PAGE_FOW; return pte; }
 extern inline pte_t pte_mkdirty(pte_t pte)	{ pte_val(pte) |= __DIRTY_BITS; return pte; }
 extern inline pte_t pte_mkyoung(pte_t pte)	{ pte_val(pte) |= __ACCESS_BITS; return pte; }
 
diff --git a/arch/arc/include/asm/hugepage.h b/arch/arc/include/asm/hugepage.h
index 5001b79..ef8d416 100644
--- a/arch/arc/include/asm/hugepage.h
+++ b/arch/arc/include/asm/hugepage.h
@@ -21,7 +21,7 @@ static inline pmd_t pte_pmd(pte_t pte)
 }
 
 #define pmd_wrprotect(pmd)	pte_pmd(pte_wrprotect(pmd_pte(pmd)))
-#define pmd_mkwrite(pmd)	pte_pmd(pte_mkwrite(pmd_pte(pmd)))
+#define pmd_mkwrite_novma(pmd)	pte_pmd(pte_mkwrite_novma(pmd_pte(pmd)))
 #define pmd_mkdirty(pmd)	pte_pmd(pte_mkdirty(pmd_pte(pmd)))
 #define pmd_mkold(pmd)		pte_pmd(pte_mkold(pmd_pte(pmd)))
 #define pmd_mkyoung(pmd)	pte_pmd(pte_mkyoung(pmd_pte(pmd)))
diff --git a/arch/arc/include/asm/pgtable-bits-arcv2.h b/arch/arc/include/asm/pgtable-bits-arcv2.h
index ee78ab3..f3eea3f 100644
--- a/arch/arc/include/asm/pgtable-bits-arcv2.h
+++ b/arch/arc/include/asm/pgtable-bits-arcv2.h
@@ -87,7 +87,7 @@
 
 PTE_BIT_FUNC(mknotpresent,     &= ~(_PAGE_PRESENT));
 PTE_BIT_FUNC(wrprotect,	&= ~(_PAGE_WRITE));
-PTE_BIT_FUNC(mkwrite,	|= (_PAGE_WRITE));
+PTE_BIT_FUNC(mkwrite_novma,	|= (_PAGE_WRITE));
 PTE_BIT_FUNC(mkclean,	&= ~(_PAGE_DIRTY));
 PTE_BIT_FUNC(mkdirty,	|= (_PAGE_DIRTY));
 PTE_BIT_FUNC(mkold,	&= ~(_PAGE_ACCESSED));
diff --git a/arch/arm/common/locomo.c b/arch/arm/common/locomo.c
index 309b747..70480dd 100644
--- a/arch/arm/common/locomo.c
+++ b/arch/arm/common/locomo.c
@@ -350,19 +350,6 @@ static int locomo_resume(struct platform_device *dev)
 }
 #endif
 
-
-/**
- *	locomo_probe - probe for a single LoCoMo chip.
- *	@phys_addr: physical address of device.
- *
- *	Probe for a LoCoMo chip.  This must be called
- *	before any other locomo-specific code.
- *
- *	Returns:
- *	%-ENODEV	device not found.
- *	%-EBUSY		physical address already marked in-use.
- *	%0		successful.
- */
 static int
 __locomo_probe(struct device *me, struct resource *mem, int irq)
 {
@@ -479,6 +466,21 @@ static void __locomo_remove(struct locomo *lchip)
 	kfree(lchip);
 }
 
+/**
+ *	locomo_probe - probe for a single LoCoMo chip.
+ *	@dev: platform device
+ *
+ *	Probe for a LoCoMo chip.  This must be called
+ *	before any other locomo-specific code.
+ *
+ *	Returns:
+ *	* %-EINVAL	- device's IORESOURCE_MEM not found
+ *	* %-ENXIO	- could not allocate an IRQ for the device
+ *	* %-ENODEV	- device not found.
+ *	* %-EBUSY	- physical address already marked in-use.
+ *	* %-ENOMEM	- could not allocate or iomap memory.
+ *	* %0		- successful.
+ */
 static int locomo_probe(struct platform_device *dev)
 {
 	struct resource *mem;
diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h
index 1060497..71c3add 100644
--- a/arch/arm/include/asm/pgtable-3level.h
+++ b/arch/arm/include/asm/pgtable-3level.h
@@ -202,7 +202,7 @@ static inline pmd_t pmd_##fn(pmd_t pmd) { pmd_val(pmd) op; return pmd; }
 
 PMD_BIT_FUNC(wrprotect,	|= L_PMD_SECT_RDONLY);
 PMD_BIT_FUNC(mkold,	&= ~PMD_SECT_AF);
-PMD_BIT_FUNC(mkwrite,   &= ~L_PMD_SECT_RDONLY);
+PMD_BIT_FUNC(mkwrite_novma,   &= ~L_PMD_SECT_RDONLY);
 PMD_BIT_FUNC(mkdirty,   |= L_PMD_SECT_DIRTY);
 PMD_BIT_FUNC(mkclean,   &= ~L_PMD_SECT_DIRTY);
 PMD_BIT_FUNC(mkyoung,   |= PMD_SECT_AF);
diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
index ba573f2..16b02f4 100644
--- a/arch/arm/include/asm/pgtable.h
+++ b/arch/arm/include/asm/pgtable.h
@@ -228,7 +228,7 @@ static inline pte_t pte_wrprotect(pte_t pte)
 	return set_pte_bit(pte, __pgprot(L_PTE_RDONLY));
 }
 
-static inline pte_t pte_mkwrite(pte_t pte)
+static inline pte_t pte_mkwrite_novma(pte_t pte)
 {
 	return clear_pte_bit(pte, __pgprot(L_PTE_RDONLY));
 }
diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
index 7f092cb..943ffcf 100644
--- a/arch/arm/include/asm/thread_info.h
+++ b/arch/arm/include/asm/thread_info.h
@@ -40,6 +40,7 @@ struct task_struct;
 DECLARE_PER_CPU(struct task_struct *, __entry_task);
 
 #include <asm/types.h>
+#include <asm/traps.h>
 
 struct cpu_context_save {
 	__u32	r4;
@@ -66,7 +67,6 @@ struct thread_info {
 	__u32			cpu_domain;	/* cpu domain */
 	struct cpu_context_save	cpu_context;	/* cpu context */
 	__u32			abi_syscall;	/* ABI type and syscall nr */
-	__u8			used_cp[16];	/* thread used copro */
 	unsigned long		tp_value[2];	/* TLS registers */
 	union fp_state		fpstate __attribute__((aligned(8)));
 	union vfp_state		vfpstate;
@@ -105,6 +105,21 @@ extern void iwmmxt_task_restore(struct thread_info *, void *);
 extern void iwmmxt_task_release(struct thread_info *);
 extern void iwmmxt_task_switch(struct thread_info *);
 
+extern int iwmmxt_undef_handler(struct pt_regs *, u32);
+
+static inline void register_iwmmxt_undef_handler(void)
+{
+	static struct undef_hook iwmmxt_undef_hook = {
+		.instr_mask	= 0x0c000e00,
+		.instr_val	= 0x0c000000,
+		.cpsr_mask	= MODE_MASK | PSR_T_BIT,
+		.cpsr_val	= USR_MODE,
+		.fn		= iwmmxt_undef_handler,
+	};
+
+	register_undef_hook(&iwmmxt_undef_hook);
+}
+
 extern void vfp_sync_hwstate(struct thread_info *);
 extern void vfp_flush_hwstate(struct thread_info *);
 
diff --git a/arch/arm/include/asm/vfp.h b/arch/arm/include/asm/vfp.h
index 5b57b87..157ea34 100644
--- a/arch/arm/include/asm/vfp.h
+++ b/arch/arm/include/asm/vfp.h
@@ -102,7 +102,6 @@
 
 #ifndef __ASSEMBLY__
 void vfp_disable(void);
-void VFP_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs);
 #endif
 
 #endif /* __ASM_VFP_H */
diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c
index 6a80d4b..219cbc7 100644
--- a/arch/arm/kernel/asm-offsets.c
+++ b/arch/arm/kernel/asm-offsets.c
@@ -47,7 +47,6 @@ int main(void)
   DEFINE(TI_CPU_DOMAIN,		offsetof(struct thread_info, cpu_domain));
   DEFINE(TI_CPU_SAVE,		offsetof(struct thread_info, cpu_context));
   DEFINE(TI_ABI_SYSCALL,	offsetof(struct thread_info, abi_syscall));
-  DEFINE(TI_USED_CP,		offsetof(struct thread_info, used_cp));
   DEFINE(TI_TP_VALUE,		offsetof(struct thread_info, tp_value));
   DEFINE(TI_FPSTATE,		offsetof(struct thread_info, fpstate));
 #ifdef CONFIG_VFP
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 76e8125..6150a71 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -446,258 +446,26 @@
 __und_usr:
 	usr_entry uaccess=0
 
-	mov	r2, r4
-	mov	r3, r5
-
-	@ r2 = regs->ARM_pc, which is either 2 or 4 bytes ahead of the
-	@      faulting instruction depending on Thumb mode.
-	@ r3 = regs->ARM_cpsr
-	@
-	@ The emulation code returns using r9 if it has emulated the
-	@ instruction, or the more conventional lr if we are to treat
-	@ this as a real undefined instruction
-	@
-	badr	r9, ret_from_exception
-
 	@ IRQs must be enabled before attempting to read the instruction from
 	@ user space since that could cause a page/translation fault if the
 	@ page table was modified by another CPU.
 	enable_irq
 
-	tst	r3, #PSR_T_BIT			@ Thumb mode?
-	bne	__und_usr_thumb
-	sub	r4, r2, #4			@ ARM instr at LR - 4
-1:	ldrt	r0, [r4]
- ARM_BE8(rev	r0, r0)				@ little endian instruction
-
+	tst	r5, #PSR_T_BIT			@ Thumb mode?
+	mov	r1, #2				@ set insn size to 2 for Thumb
+	bne	0f				@ handle as Thumb undef exception
+#ifdef CONFIG_FPE_NWFPE
+	adr	r9, ret_from_exception
+	bl	call_fpe			@ returns via R9 on success
+#endif
+	mov	r1, #4				@ set insn size to 4 for ARM
+0:	mov	r0, sp
 	uaccess_disable ip
-
-	@ r0 = 32-bit ARM instruction which caused the exception
-	@ r2 = PC value for the following instruction (:= regs->ARM_pc)
-	@ r4 = PC value for the faulting instruction
-	@ lr = 32-bit undefined instruction function
-	badr	lr, __und_usr_fault_32
-	b	call_fpe
-
-__und_usr_thumb:
-	@ Thumb instruction
-	sub	r4, r2, #2			@ First half of thumb instr at LR - 2
-#if CONFIG_ARM_THUMB && __LINUX_ARM_ARCH__ >= 6 && CONFIG_CPU_V7
-/*
- * Thumb-2 instruction handling.  Note that because pre-v6 and >= v6 platforms
- * can never be supported in a single kernel, this code is not applicable at
- * all when __LINUX_ARM_ARCH__ < 6.  This allows simplifying assumptions to be
- * made about .arch directives.
- */
-#if __LINUX_ARM_ARCH__ < 7
-/* If the target CPU may not be Thumb-2-capable, a run-time check is needed: */
-	ldr_va	r5, cpu_architecture
-	cmp	r5, #CPU_ARCH_ARMv7
-	blo	__und_usr_fault_16		@ 16bit undefined instruction
-/*
- * The following code won't get run unless the running CPU really is v7, so
- * coding round the lack of ldrht on older arches is pointless.  Temporarily
- * override the assembler target arch with the minimum required instead:
- */
-	.arch	armv6t2
-#endif
-2:	ldrht	r5, [r4]
-ARM_BE8(rev16	r5, r5)				@ little endian instruction
-	cmp	r5, #0xe800			@ 32bit instruction if xx != 0
-	blo	__und_usr_fault_16_pan		@ 16bit undefined instruction
-3:	ldrht	r0, [r2]
-ARM_BE8(rev16	r0, r0)				@ little endian instruction
-	uaccess_disable ip
-	add	r2, r2, #2			@ r2 is PC + 2, make it PC + 4
-	str	r2, [sp, #S_PC]			@ it's a 2x16bit instr, update
-	orr	r0, r0, r5, lsl #16
-	badr	lr, __und_usr_fault_32
-	@ r0 = the two 16-bit Thumb instructions which caused the exception
-	@ r2 = PC value for the following Thumb instruction (:= regs->ARM_pc)
-	@ r4 = PC value for the first 16-bit Thumb instruction
-	@ lr = 32bit undefined instruction function
-
-#if __LINUX_ARM_ARCH__ < 7
-/* If the target arch was overridden, change it back: */
-#ifdef CONFIG_CPU_32v6K
-	.arch	armv6k
-#else
-	.arch	armv6
-#endif
-#endif /* __LINUX_ARM_ARCH__ < 7 */
-#else /* !(CONFIG_ARM_THUMB && __LINUX_ARM_ARCH__ >= 6 && CONFIG_CPU_V7) */
-	b	__und_usr_fault_16
-#endif
+	bl	__und_fault
+	b	ret_from_exception
  UNWIND(.fnend)
 ENDPROC(__und_usr)
 
-/*
- * The out of line fixup for the ldrt instructions above.
- */
-	.pushsection .text.fixup, "ax"
-	.align	2
-4:	str     r4, [sp, #S_PC]			@ retry current instruction
-	ret	r9
-	.popsection
-	.pushsection __ex_table,"a"
-	.long	1b, 4b
-#if CONFIG_ARM_THUMB && __LINUX_ARM_ARCH__ >= 6 && CONFIG_CPU_V7
-	.long	2b, 4b
-	.long	3b, 4b
-#endif
-	.popsection
-
-/*
- * Check whether the instruction is a co-processor instruction.
- * If yes, we need to call the relevant co-processor handler.
- *
- * Note that we don't do a full check here for the co-processor
- * instructions; all instructions with bit 27 set are well
- * defined.  The only instructions that should fault are the
- * co-processor instructions.  However, we have to watch out
- * for the ARM6/ARM7 SWI bug.
- *
- * NEON is a special case that has to be handled here. Not all
- * NEON instructions are co-processor instructions, so we have
- * to make a special case of checking for them. Plus, there's
- * five groups of them, so we have a table of mask/opcode pairs
- * to check against, and if any match then we branch off into the
- * NEON handler code.
- *
- * Emulators may wish to make use of the following registers:
- *  r0  = instruction opcode (32-bit ARM or two 16-bit Thumb)
- *  r2  = PC value to resume execution after successful emulation
- *  r9  = normal "successful" return address
- *  r10 = this threads thread_info structure
- *  lr  = unrecognised instruction return address
- * IRQs enabled, FIQs enabled.
- */
-	@
-	@ Fall-through from Thumb-2 __und_usr
-	@
-#ifdef CONFIG_NEON
-	get_thread_info r10			@ get current thread
-	adr	r6, .LCneon_thumb_opcodes
-	b	2f
-#endif
-call_fpe:
-	get_thread_info r10			@ get current thread
-#ifdef CONFIG_NEON
-	adr	r6, .LCneon_arm_opcodes
-2:	ldr	r5, [r6], #4			@ mask value
-	ldr	r7, [r6], #4			@ opcode bits matching in mask
-	cmp	r5, #0				@ end mask?
-	beq	1f
-	and	r8, r0, r5
-	cmp	r8, r7				@ NEON instruction?
-	bne	2b
-	mov	r7, #1
-	strb	r7, [r10, #TI_USED_CP + 10]	@ mark CP#10 as used
-	strb	r7, [r10, #TI_USED_CP + 11]	@ mark CP#11 as used
-	b	do_vfp				@ let VFP handler handle this
-1:
-#endif
-	tst	r0, #0x08000000			@ only CDP/CPRT/LDC/STC have bit 27
-	tstne	r0, #0x04000000			@ bit 26 set on both ARM and Thumb-2
-	reteq	lr
-	and	r8, r0, #0x00000f00		@ mask out CP number
-	mov	r7, #1
-	add	r6, r10, r8, lsr #8		@ add used_cp[] array offset first
-	strb	r7, [r6, #TI_USED_CP]		@ set appropriate used_cp[]
-#ifdef CONFIG_IWMMXT
-	@ Test if we need to give access to iWMMXt coprocessors
-	ldr	r5, [r10, #TI_FLAGS]
-	rsbs	r7, r8, #(1 << 8)		@ CP 0 or 1 only
-	movscs	r7, r5, lsr #(TIF_USING_IWMMXT + 1)
-	bcs	iwmmxt_task_enable
-#endif
- ARM(	add	pc, pc, r8, lsr #6	)
- THUMB(	lsr	r8, r8, #6		)
- THUMB(	add	pc, r8			)
-	nop
-
-	ret.w	lr				@ CP#0
-	W(b)	do_fpe				@ CP#1 (FPE)
-	W(b)	do_fpe				@ CP#2 (FPE)
-	ret.w	lr				@ CP#3
-	ret.w	lr				@ CP#4
-	ret.w	lr				@ CP#5
-	ret.w	lr				@ CP#6
-	ret.w	lr				@ CP#7
-	ret.w	lr				@ CP#8
-	ret.w	lr				@ CP#9
-#ifdef CONFIG_VFP
-	W(b)	do_vfp				@ CP#10 (VFP)
-	W(b)	do_vfp				@ CP#11 (VFP)
-#else
-	ret.w	lr				@ CP#10 (VFP)
-	ret.w	lr				@ CP#11 (VFP)
-#endif
-	ret.w	lr				@ CP#12
-	ret.w	lr				@ CP#13
-	ret.w	lr				@ CP#14 (Debug)
-	ret.w	lr				@ CP#15 (Control)
-
-#ifdef CONFIG_NEON
-	.align	6
-
-.LCneon_arm_opcodes:
-	.word	0xfe000000			@ mask
-	.word	0xf2000000			@ opcode
-
-	.word	0xff100000			@ mask
-	.word	0xf4000000			@ opcode
-
-	.word	0x00000000			@ mask
-	.word	0x00000000			@ opcode
-
-.LCneon_thumb_opcodes:
-	.word	0xef000000			@ mask
-	.word	0xef000000			@ opcode
-
-	.word	0xff100000			@ mask
-	.word	0xf9000000			@ opcode
-
-	.word	0x00000000			@ mask
-	.word	0x00000000			@ opcode
-#endif
-
-do_fpe:
-	add	r10, r10, #TI_FPSTATE		@ r10 = workspace
-	ldr_va	pc, fp_enter, tmp=r4		@ Call FP module USR entry point
-
-/*
- * The FP module is called with these registers set:
- *  r0  = instruction
- *  r2  = PC+4
- *  r9  = normal "successful" return address
- *  r10 = FP workspace
- *  lr  = unrecognised FP instruction return address
- */
-
-	.pushsection .data
-	.align	2
-ENTRY(fp_enter)
-	.word	no_fp
-	.popsection
-
-ENTRY(no_fp)
-	ret	lr
-ENDPROC(no_fp)
-
-__und_usr_fault_32:
-	mov	r1, #4
-	b	1f
-__und_usr_fault_16_pan:
-	uaccess_disable ip
-__und_usr_fault_16:
-	mov	r1, #2
-1:	mov	r0, sp
-	badr	lr, ret_from_exception
-	b	__und_fault
-ENDPROC(__und_usr_fault_32)
-ENDPROC(__und_usr_fault_16)
-
 	.align	5
 __pabt_usr:
 	usr_entry
diff --git a/arch/arm/kernel/iwmmxt.S b/arch/arm/kernel/iwmmxt.S
index d2b4ac0..a0218c4 100644
--- a/arch/arm/kernel/iwmmxt.S
+++ b/arch/arm/kernel/iwmmxt.S
@@ -58,9 +58,19 @@
 	.text
 	.arm
 
+ENTRY(iwmmxt_undef_handler)
+	push		{r9, r10, lr}
+	get_thread_info	r10
+	mov		r9, pc
+	b		iwmmxt_task_enable
+	mov		r0, #0
+	pop		{r9, r10, pc}
+ENDPROC(iwmmxt_undef_handler)
+
 /*
  * Lazy switching of Concan coprocessor context
  *
+ * r0  = struct pt_regs pointer
  * r10 = struct thread_info pointer
  * r9  = ret_from_exception
  * lr  = undefined instr exit
@@ -84,12 +94,12 @@
 	PJ4(mcr	p15, 0, r2, c1, c0, 2)
 
 	ldr	r3, =concan_owner
-	add	r0, r10, #TI_IWMMXT_STATE	@ get task Concan save area
-	ldr	r2, [sp, #60]			@ current task pc value
+	ldr	r2, [r0, #S_PC]			@ current task pc value
 	ldr	r1, [r3]			@ get current Concan owner
-	str	r0, [r3]			@ this task now owns Concan regs
 	sub	r2, r2, #4			@ adjust pc back
-	str	r2, [sp, #60]
+	str	r2, [r0, #S_PC]
+	add	r0, r10, #TI_IWMMXT_STATE	@ get task Concan save area
+	str	r0, [r3]			@ this task now owns Concan regs
 
 	mrc	p15, 0, r2, c2, c0, 0
 	mov	r2, r2				@ cpwait
diff --git a/arch/arm/kernel/machine_kexec.c b/arch/arm/kernel/machine_kexec.c
index 46364b6..5d07cf9 100644
--- a/arch/arm/kernel/machine_kexec.c
+++ b/arch/arm/kernel/machine_kexec.c
@@ -94,16 +94,28 @@ static void machine_crash_nonpanic_core(void *unused)
 	}
 }
 
+static DEFINE_PER_CPU(call_single_data_t, cpu_stop_csd) =
+	CSD_INIT(machine_crash_nonpanic_core, NULL);
+
 void crash_smp_send_stop(void)
 {
 	static int cpus_stopped;
 	unsigned long msecs;
+	call_single_data_t *csd;
+	int cpu, this_cpu = raw_smp_processor_id();
 
 	if (cpus_stopped)
 		return;
 
 	atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1);
-	smp_call_function(machine_crash_nonpanic_core, NULL, false);
+	for_each_online_cpu(cpu) {
+		if (cpu == this_cpu)
+			continue;
+
+		csd = &per_cpu(cpu_stop_csd, cpu);
+		smp_call_function_single_async(cpu, csd);
+	}
+
 	msecs = 1000; /* Wait at most a second for the other cpus to stop */
 	while ((atomic_read(&waiting_for_crash_ipi) > 0) && msecs) {
 		mdelay(1);
diff --git a/arch/arm/kernel/pj4-cp0.c b/arch/arm/kernel/pj4-cp0.c
index 1d1fb22..4bca809 100644
--- a/arch/arm/kernel/pj4-cp0.c
+++ b/arch/arm/kernel/pj4-cp0.c
@@ -126,6 +126,7 @@ static int __init pj4_cp0_init(void)
 	pr_info("PJ4 iWMMXt v%d coprocessor enabled.\n", vers);
 	elf_hwcap |= HWCAP_IWMMXT;
 	thread_register_notifier(&iwmmxt_notifier_block);
+	register_iwmmxt_undef_handler();
 #endif
 
 	return 0;
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index 0e8ff85..e16ed10 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -222,7 +222,6 @@ void flush_thread(void)
 
 	flush_ptrace_hw_breakpoint(tsk);
 
-	memset(thread->used_cp, 0, sizeof(thread->used_cp));
 	memset(&tsk->thread.debug, 0, sizeof(struct debug_info));
 	memset(&thread->fpstate, 0, sizeof(union fp_state));
 
diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c
index fef32d7..c421a89 100644
--- a/arch/arm/kernel/ptrace.c
+++ b/arch/arm/kernel/ptrace.c
@@ -584,8 +584,6 @@ static int fpa_set(struct task_struct *target,
 {
 	struct thread_info *thread = task_thread_info(target);
 
-	thread->used_cp[1] = thread->used_cp[2] = 1;
-
 	return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
 		&thread->fpstate,
 		0, sizeof(struct user_fp));
diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c
index 8d0afa11..79a6730 100644
--- a/arch/arm/kernel/signal.c
+++ b/arch/arm/kernel/signal.c
@@ -682,7 +682,7 @@ asmlinkage void do_rseq_syscall(struct pt_regs *regs)
  */
 static_assert(NSIGILL	== 11);
 static_assert(NSIGFPE	== 15);
-static_assert(NSIGSEGV	== 9);
+static_assert(NSIGSEGV	== 10);
 static_assert(NSIGBUS	== 5);
 static_assert(NSIGTRAP	== 6);
 static_assert(NSIGCHLD	== 6);
diff --git a/arch/arm/kernel/xscale-cp0.c b/arch/arm/kernel/xscale-cp0.c
index ed4f6e7..00d00d3 100644
--- a/arch/arm/kernel/xscale-cp0.c
+++ b/arch/arm/kernel/xscale-cp0.c
@@ -166,6 +166,7 @@ static int __init xscale_cp0_init(void)
 		pr_info("XScale iWMMXt coprocessor detected.\n");
 		elf_hwcap |= HWCAP_IWMMXT;
 		thread_register_notifier(&iwmmxt_notifier_block);
+		register_iwmmxt_undef_handler();
 #endif
 	} else {
 		pr_info("XScale DSP coprocessor detected.\n");
diff --git a/arch/arm/mm/proc-feroceon.S b/arch/arm/mm/proc-feroceon.S
index 61ce82a..072ff9b 100644
--- a/arch/arm/mm/proc-feroceon.S
+++ b/arch/arm/mm/proc-feroceon.S
@@ -56,6 +56,10 @@
 	movne	r2, r2, lsr #2			@ turned into # of sets
 	sub	r2, r2, #(1 << 5)
 	stmia	r1, {r2, r3}
+#ifdef CONFIG_VFP
+	mov	r1, #1				@ disable quirky VFP
+	str_l	r1, VFP_arch_feroceon, r2
+#endif
 	ret	lr
 
 /*
diff --git a/arch/arm/nwfpe/entry.S b/arch/arm/nwfpe/entry.S
index d8f9915..354d297 100644
--- a/arch/arm/nwfpe/entry.S
+++ b/arch/arm/nwfpe/entry.S
@@ -7,6 +7,7 @@
     Direct questions, comments to Scott Bambrough <scottb@netwinder.org>
 
 */
+#include <linux/linkage.h>
 #include <asm/assembler.h>
 #include <asm/opcodes.h>
 
@@ -104,6 +105,7 @@
 	@ plain LDR instruction.  Weird, but it seems harmless.
 	.pushsection .text.fixup,"ax"
 	.align	2
+.Lrep:	str     r4, [sp, #S_PC]		@ retry current instruction
 .Lfix:	ret	r9			@ let the user eat segfaults
 	.popsection
 
@@ -111,3 +113,78 @@
 	.align	3
 	.long	.Lx1, .Lfix
 	.popsection
+
+	@
+	@ Check whether the instruction is a co-processor instruction.
+	@ If yes, we need to call the relevant co-processor handler.
+	@ Only FPE instructions are dispatched here, everything else
+	@ is handled by undef hooks.
+	@
+	@ Emulators may wish to make use of the following registers:
+	@  r4  = PC value to resume execution after successful emulation
+	@  r9  = normal "successful" return address
+	@  lr  = unrecognised instruction return address
+	@ IRQs enabled, FIQs enabled.
+	@
+ENTRY(call_fpe)
+	mov	r2, r4
+	sub	r4, r4, #4			@ ARM instruction at user PC - 4
+USERL(	.Lrep,	ldrt r0, [r4])			@ load opcode from user space
+ARM_BE8(rev	r0, r0)				@ little endian instruction
+
+	uaccess_disable ip
+
+	get_thread_info r10			@ get current thread
+	tst	r0, #0x08000000			@ only CDP/CPRT/LDC/STC have bit 27
+	reteq	lr
+	and	r8, r0, #0x00000f00		@ mask out CP number
+#ifdef CONFIG_IWMMXT
+	@ Test if we need to give access to iWMMXt coprocessors
+	ldr	r5, [r10, #TI_FLAGS]
+	rsbs	r7, r8, #(1 << 8)		@ CP 0 or 1 only
+	movscs	r7, r5, lsr #(TIF_USING_IWMMXT + 1)
+	movcs	r0, sp				@ pass struct pt_regs
+	bcs	iwmmxt_task_enable
+#endif
+	add	pc, pc, r8, lsr #6
+	nop
+
+	ret	lr				@ CP#0
+	b	do_fpe				@ CP#1 (FPE)
+	b	do_fpe				@ CP#2 (FPE)
+	ret	lr				@ CP#3
+	ret	lr				@ CP#4
+	ret	lr				@ CP#5
+	ret	lr				@ CP#6
+	ret	lr				@ CP#7
+	ret	lr				@ CP#8
+	ret	lr				@ CP#9
+	ret	lr				@ CP#10 (VFP)
+	ret	lr				@ CP#11 (VFP)
+	ret	lr				@ CP#12
+	ret	lr				@ CP#13
+	ret	lr				@ CP#14 (Debug)
+	ret	lr				@ CP#15 (Control)
+
+do_fpe:
+	add	r10, r10, #TI_FPSTATE		@ r10 = workspace
+	ldr_va	pc, fp_enter, tmp=r4		@ Call FP module USR entry point
+
+	@
+	@ The FP module is called with these registers set:
+	@  r0  = instruction
+	@  r2  = PC+4
+	@  r9  = normal "successful" return address
+	@  r10 = FP workspace
+	@  lr  = unrecognised FP instruction return address
+	@
+
+	.pushsection .data
+	.align	2
+ENTRY(fp_enter)
+	.word	no_fp
+	.popsection
+
+no_fp:
+	ret	lr
+ENDPROC(no_fp)
diff --git a/arch/arm/vfp/Makefile b/arch/arm/vfp/Makefile
index 749901a..dfd64bc 100644
--- a/arch/arm/vfp/Makefile
+++ b/arch/arm/vfp/Makefile
@@ -8,4 +8,4 @@
 # ccflags-y := -DDEBUG
 # asflags-y := -DDEBUG
 
-obj-y		+= vfpmodule.o entry.o vfphw.o vfpsingle.o vfpdouble.o
+obj-y		+= vfpmodule.o vfphw.o vfpsingle.o vfpdouble.o
diff --git a/arch/arm/vfp/entry.S b/arch/arm/vfp/entry.S
deleted file mode 100644
index 62206ef..0000000
--- a/arch/arm/vfp/entry.S
+++ /dev/null
@@ -1,31 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  linux/arch/arm/vfp/entry.S
- *
- *  Copyright (C) 2004 ARM Limited.
- *  Written by Deep Blue Solutions Limited.
- */
-#include <linux/init.h>
-#include <linux/linkage.h>
-#include <asm/thread_info.h>
-#include <asm/vfpmacros.h>
-#include <asm/assembler.h>
-#include <asm/asm-offsets.h>
-
-@ VFP entry point.
-@
-@  r0  = instruction opcode (32-bit ARM or two 16-bit Thumb)
-@  r2  = PC value to resume execution after successful emulation
-@  r9  = normal "successful" return address
-@  r10 = this threads thread_info structure
-@  lr  = unrecognised instruction return address
-@  IRQs enabled.
-@
-ENTRY(do_vfp)
-	mov	r1, r10
-	str	lr, [sp, #-8]!
-	add	r3, sp, #4
-	str	r9, [r3]
-	bl	vfp_entry
-	ldr	pc, [sp], #8
-ENDPROC(do_vfp)
diff --git a/arch/arm/vfp/vfp.h b/arch/arm/vfp/vfp.h
index 5cd6d50..e43a630 100644
--- a/arch/arm/vfp/vfp.h
+++ b/arch/arm/vfp/vfp.h
@@ -375,3 +375,4 @@ struct op {
 };
 
 asmlinkage void vfp_save_state(void *location, u32 fpexc);
+asmlinkage u32 vfp_load_state(const void *location);
diff --git a/arch/arm/vfp/vfphw.S b/arch/arm/vfp/vfphw.S
index a4610d0..d5a03f3 100644
--- a/arch/arm/vfp/vfphw.S
+++ b/arch/arm/vfp/vfphw.S
@@ -4,12 +4,6 @@
  *
  *  Copyright (C) 2004 ARM Limited.
  *  Written by Deep Blue Solutions Limited.
- *
- * This code is called from the kernel's undefined instruction trap.
- * r1 holds the thread_info pointer
- * r3 holds the return address for successful handling.
- * lr holds the return address for unrecognised instructions.
- * sp points to a struct pt_regs (as defined in include/asm/proc/ptrace.h)
  */
 #include <linux/init.h>
 #include <linux/linkage.h>
@@ -19,20 +13,6 @@
 #include <asm/assembler.h>
 #include <asm/asm-offsets.h>
 
-	.macro	DBGSTR, str
-#ifdef DEBUG
-	stmfd	sp!, {r0-r3, ip, lr}
-	ldr	r0, =1f
-	bl	_printk
-	ldmfd	sp!, {r0-r3, ip, lr}
-
-	.pushsection .rodata, "a"
-1:	.ascii	KERN_DEBUG "VFP: \str\n"
-	.byte	0
-	.previous
-#endif
-	.endm
-
 	.macro  DBGSTR1, str, arg
 #ifdef DEBUG
 	stmfd	sp!, {r0-r3, ip, lr}
@@ -48,181 +28,25 @@
 #endif
 	.endm
 
-	.macro  DBGSTR3, str, arg1, arg2, arg3
-#ifdef DEBUG
-	stmfd	sp!, {r0-r3, ip, lr}
-	mov	r3, \arg3
-	mov	r2, \arg2
-	mov	r1, \arg1
-	ldr	r0, =1f
-	bl	_printk
-	ldmfd	sp!, {r0-r3, ip, lr}
-
-	.pushsection .rodata, "a"
-1:	.ascii	KERN_DEBUG "VFP: \str\n"
-	.byte	0
-	.previous
-#endif
-	.endm
-
-
-@ VFP hardware support entry point.
-@
-@  r0  = instruction opcode (32-bit ARM or two 16-bit Thumb)
-@  r1  = thread_info pointer
-@  r2  = PC value to resume execution after successful emulation
-@  r3  = normal "successful" return address
-@  lr  = unrecognised instruction return address
-@  IRQs enabled.
-ENTRY(vfp_support_entry)
-	ldr	r11, [r1, #TI_CPU]	@ CPU number
-	add	r10, r1, #TI_VFPSTATE	@ r10 = workspace
-
-	DBGSTR3	"instr %08x pc %08x state %p", r0, r2, r10
-
-	.fpu	vfpv2
-	VFPFMRX	r1, FPEXC		@ Is the VFP enabled?
-	DBGSTR1	"fpexc %08x", r1
-	tst	r1, #FPEXC_EN
-	bne	look_for_VFP_exceptions	@ VFP is already enabled
-
-	DBGSTR1 "enable %x", r10
-	ldr	r9, vfp_current_hw_state_address
-	orr	r1, r1, #FPEXC_EN	@ user FPEXC has the enable bit set
-	ldr	r4, [r9, r11, lsl #2]	@ vfp_current_hw_state pointer
-	bic	r5, r1, #FPEXC_EX	@ make sure exceptions are disabled
-	cmp	r4, r10			@ this thread owns the hw context?
-#ifndef CONFIG_SMP
-	@ For UP, checking that this thread owns the hw context is
-	@ sufficient to determine that the hardware state is valid.
-	beq	vfp_hw_state_valid
-
-	@ On UP, we lazily save the VFP context.  As a different
-	@ thread wants ownership of the VFP hardware, save the old
-	@ state if there was a previous (valid) owner.
-
-	VFPFMXR	FPEXC, r5		@ enable VFP, disable any pending
-					@ exceptions, so we can get at the
-					@ rest of it
-
-	DBGSTR1	"save old state %p", r4
-	cmp	r4, #0			@ if the vfp_current_hw_state is NULL
-	beq	vfp_reload_hw		@ then the hw state needs reloading
-	VFPFSTMIA r4, r5		@ save the working registers
-	VFPFMRX	r5, FPSCR		@ current status
-#ifndef CONFIG_CPU_FEROCEON
-	tst	r1, #FPEXC_EX		@ is there additional state to save?
-	beq	1f
-	VFPFMRX	r6, FPINST		@ FPINST (only if FPEXC.EX is set)
-	tst	r1, #FPEXC_FP2V		@ is there an FPINST2 to read?
-	beq	1f
-	VFPFMRX	r8, FPINST2		@ FPINST2 if needed (and present)
-1:
-#endif
-	stmia	r4, {r1, r5, r6, r8}	@ save FPEXC, FPSCR, FPINST, FPINST2
-vfp_reload_hw:
-
-#else
-	@ For SMP, if this thread does not own the hw context, then we
-	@ need to reload it.  No need to save the old state as on SMP,
-	@ we always save the state when we switch away from a thread.
-	bne	vfp_reload_hw
-
-	@ This thread has ownership of the current hardware context.
-	@ However, it may have been migrated to another CPU, in which
-	@ case the saved state is newer than the hardware context.
-	@ Check this by looking at the CPU number which the state was
-	@ last loaded onto.
-	ldr	ip, [r10, #VFP_CPU]
-	teq	ip, r11
-	beq	vfp_hw_state_valid
-
-vfp_reload_hw:
-	@ We're loading this threads state into the VFP hardware. Update
-	@ the CPU number which contains the most up to date VFP context.
-	str	r11, [r10, #VFP_CPU]
-
-	VFPFMXR	FPEXC, r5		@ enable VFP, disable any pending
-					@ exceptions, so we can get at the
-					@ rest of it
-#endif
-
-	DBGSTR1	"load state %p", r10
-	str	r10, [r9, r11, lsl #2]	@ update the vfp_current_hw_state pointer
+ENTRY(vfp_load_state)
+	@ Load the current VFP state
+	@ r0 - load location
+	@ returns FPEXC
+	DBGSTR1	"load VFP state %p", r0
 					@ Load the saved state back into the VFP
-	VFPFLDMIA r10, r5		@ reload the working registers while
+	VFPFLDMIA r0, r1		@ reload the working registers while
 					@ FPEXC is in a safe state
-	ldmia	r10, {r1, r5, r6, r8}	@ load FPEXC, FPSCR, FPINST, FPINST2
-#ifndef CONFIG_CPU_FEROCEON
-	tst	r1, #FPEXC_EX		@ is there additional state to restore?
+	ldmia	r0, {r0-r3}		@ load FPEXC, FPSCR, FPINST, FPINST2
+	tst	r0, #FPEXC_EX		@ is there additional state to restore?
 	beq	1f
-	VFPFMXR	FPINST, r6		@ restore FPINST (only if FPEXC.EX is set)
-	tst	r1, #FPEXC_FP2V		@ is there an FPINST2 to write?
+	VFPFMXR	FPINST, r2		@ restore FPINST (only if FPEXC.EX is set)
+	tst	r0, #FPEXC_FP2V		@ is there an FPINST2 to write?
 	beq	1f
-	VFPFMXR	FPINST2, r8		@ FPINST2 if needed (and present)
+	VFPFMXR	FPINST2, r3		@ FPINST2 if needed (and present)
 1:
-#endif
-	VFPFMXR	FPSCR, r5		@ restore status
-
-@ The context stored in the VFP hardware is up to date with this thread
-vfp_hw_state_valid:
-	tst	r1, #FPEXC_EX
-	bne	process_exception	@ might as well handle the pending
-					@ exception before retrying branch
-					@ out before setting an FPEXC that
-					@ stops us reading stuff
-	VFPFMXR	FPEXC, r1		@ Restore FPEXC last
-	mov	sp, r3			@ we think we have handled things
-	pop	{lr}
-	sub	r2, r2, #4		@ Retry current instruction - if Thumb
-	str	r2, [sp, #S_PC]		@ mode it's two 16-bit instructions,
-					@ else it's one 32-bit instruction, so
-					@ always subtract 4 from the following
-					@ instruction address.
-
-local_bh_enable_and_ret:
-	adr	r0, .
-	mov	r1, #SOFTIRQ_DISABLE_OFFSET
-	b	__local_bh_enable_ip	@ tail call
-
-look_for_VFP_exceptions:
-	@ Check for synchronous or asynchronous exception
-	tst	r1, #FPEXC_EX | FPEXC_DEX
-	bne	process_exception
-	@ On some implementations of the VFP subarch 1, setting FPSCR.IXE
-	@ causes all the CDP instructions to be bounced synchronously without
-	@ setting the FPEXC.EX bit
-	VFPFMRX	r5, FPSCR
-	tst	r5, #FPSCR_IXE
-	bne	process_exception
-
-	tst	r5, #FPSCR_LENGTH_MASK
-	beq	skip
-	orr	r1, r1, #FPEXC_DEX
-	b	process_exception
-skip:
-
-	@ Fall into hand on to next handler - appropriate coproc instr
-	@ not recognised by VFP
-
-	DBGSTR	"not VFP"
-	b	local_bh_enable_and_ret
-
-process_exception:
-	DBGSTR	"bounce"
-	mov	sp, r3			@ setup for a return to the user code.
-	pop	{lr}
-	mov	r2, sp			@ nothing stacked - regdump is at TOS
-
-	@ Now call the C code to package up the bounce to the support code
-	@   r0 holds the trigger instruction
-	@   r1 holds the FPEXC value
-	@   r2 pointer to register dump
-	b	VFP_bounce		@ we have handled this - the support
-					@ code will raise an exception if
-					@ required. If not, the user code will
-					@ retry the faulted instruction
-ENDPROC(vfp_support_entry)
+	VFPFMXR	FPSCR, r1		@ restore status
+	ret	lr
+ENDPROC(vfp_load_state)
 
 ENTRY(vfp_save_state)
 	@ Save the current VFP state
@@ -242,10 +66,6 @@
 	ret	lr
 ENDPROC(vfp_save_state)
 
-	.align
-vfp_current_hw_state_address:
-	.word	vfp_current_hw_state
-
 	.macro	tbl_branch, base, tmp, shift
 #ifdef CONFIG_THUMB2_KERNEL
 	adr	\tmp, 1f
diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c
index 1ba5078..7e8773a 100644
--- a/arch/arm/vfp/vfpmodule.c
+++ b/arch/arm/vfp/vfpmodule.c
@@ -18,6 +18,7 @@
 #include <linux/uaccess.h>
 #include <linux/user.h>
 #include <linux/export.h>
+#include <linux/perf_event.h>
 
 #include <asm/cp15.h>
 #include <asm/cputype.h>
@@ -30,11 +31,6 @@
 #include "vfpinstr.h"
 #include "vfp.h"
 
-/*
- * Our undef handlers (in entry.S)
- */
-asmlinkage void vfp_support_entry(u32, void *, u32, u32);
-
 static bool have_vfp __ro_after_init;
 
 /*
@@ -42,7 +38,11 @@ static bool have_vfp __ro_after_init;
  * Used in startup: set to non-zero if VFP checks fail
  * After startup, holds VFP architecture
  */
-static unsigned int __initdata VFP_arch;
+static unsigned int VFP_arch;
+
+#ifdef CONFIG_CPU_FEROCEON
+extern unsigned int VFP_arch_feroceon __alias(VFP_arch);
+#endif
 
 /*
  * The pointer to the vfpstate structure of the thread which currently
@@ -314,13 +314,14 @@ static u32 vfp_emulate_instruction(u32 inst, u32 fpscr, struct pt_regs *regs)
 		 * emulate it.
 		 */
 	}
+	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, regs->ARM_pc);
 	return exceptions & ~VFP_NAN_FLAG;
 }
 
 /*
  * Package up a bounce condition.
  */
-void VFP_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs)
+static void VFP_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs)
 {
 	u32 fpscr, orig_fpscr, fpsid, exceptions;
 
@@ -356,14 +357,12 @@ void VFP_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs)
 	}
 
 	if (fpexc & FPEXC_EX) {
-#ifndef CONFIG_CPU_FEROCEON
 		/*
 		 * Asynchronous exception. The instruction is read from FPINST
 		 * and the interrupted instruction has to be restarted.
 		 */
 		trigger = fmrx(FPINST);
 		regs->ARM_pc -= 4;
-#endif
 	} else if (!(fpexc & FPEXC_DEX)) {
 		/*
 		 * Illegal combination of bits. It can be caused by an
@@ -371,7 +370,7 @@ void VFP_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs)
 		 * on VFP subarch 1.
 		 */
 		 vfp_raise_exceptions(VFP_EXCEPTION_ERROR, trigger, fpscr, regs);
-		goto exit;
+		return;
 	}
 
 	/*
@@ -402,7 +401,7 @@ void VFP_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs)
 	 * the FPEXC.FP2V bit is valid only if FPEXC.EX is 1.
 	 */
 	if ((fpexc & (FPEXC_EX | FPEXC_FP2V)) != (FPEXC_EX | FPEXC_FP2V))
-		goto exit;
+		return;
 
 	/*
 	 * The barrier() here prevents fpinst2 being read
@@ -415,8 +414,6 @@ void VFP_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs)
 	exceptions = vfp_emulate_instruction(trigger, orig_fpscr, regs);
 	if (exceptions)
 		vfp_raise_exceptions(exceptions, trigger, orig_fpscr, regs);
- exit:
-	local_bh_enable();
 }
 
 static void vfp_enable(void *unused)
@@ -645,27 +642,6 @@ static int vfp_starting_cpu(unsigned int unused)
 	return 0;
 }
 
-/*
- * Entered with:
- *
- *  r0  = instruction opcode (32-bit ARM or two 16-bit Thumb)
- *  r1  = thread_info pointer
- *  r2  = PC value to resume execution after successful emulation
- *  r3  = normal "successful" return address
- *  lr  = unrecognised instruction return address
- */
-asmlinkage void vfp_entry(u32 trigger, struct thread_info *ti, u32 resume_pc,
-			  u32 resume_return_address)
-{
-	if (unlikely(!have_vfp))
-		return;
-
-	local_bh_disable();
-	vfp_support_entry(trigger, ti, resume_pc, resume_return_address);
-}
-
-#ifdef CONFIG_KERNEL_MODE_NEON
-
 static int vfp_kmode_exception(struct pt_regs *regs, unsigned int instr)
 {
 	/*
@@ -688,47 +664,151 @@ static int vfp_kmode_exception(struct pt_regs *regs, unsigned int instr)
 	return 1;
 }
 
-static struct undef_hook vfp_kmode_exception_hook[] = {{
+/*
+ * vfp_support_entry - Handle VFP exception
+ *
+ * @regs:	pt_regs structure holding the register state at exception entry
+ * @trigger:	The opcode of the instruction that triggered the exception
+ *
+ * Returns 0 if the exception was handled, or an error code otherwise.
+ */
+static int vfp_support_entry(struct pt_regs *regs, u32 trigger)
+{
+	struct thread_info *ti = current_thread_info();
+	u32 fpexc;
+
+	if (unlikely(!have_vfp))
+		return -ENODEV;
+
+	if (!user_mode(regs))
+		return vfp_kmode_exception(regs, trigger);
+
+	local_bh_disable();
+	fpexc = fmrx(FPEXC);
+
+	/*
+	 * If the VFP unit was not enabled yet, we have to check whether the
+	 * VFP state in the CPU's registers is the most recent VFP state
+	 * associated with the process. On UP systems, we don't save the VFP
+	 * state eagerly on a context switch, so we may need to save the
+	 * VFP state to memory first, as it may belong to another process.
+	 */
+	if (!(fpexc & FPEXC_EN)) {
+		/*
+		 * Enable the VFP unit but mask the FP exception flag for the
+		 * time being, so we can access all the registers.
+		 */
+		fpexc |= FPEXC_EN;
+		fmxr(FPEXC, fpexc & ~FPEXC_EX);
+
+		/*
+		 * Check whether or not the VFP state in the CPU's registers is
+		 * the most recent VFP state associated with this task. On SMP,
+		 * migration may result in multiple CPUs holding VFP states
+		 * that belong to the same task, but only the most recent one
+		 * is valid.
+		 */
+		if (!vfp_state_in_hw(ti->cpu, ti)) {
+			if (!IS_ENABLED(CONFIG_SMP) &&
+			    vfp_current_hw_state[ti->cpu] != NULL) {
+				/*
+				 * This CPU is currently holding the most
+				 * recent VFP state associated with another
+				 * task, and we must save that to memory first.
+				 */
+				vfp_save_state(vfp_current_hw_state[ti->cpu],
+					       fpexc);
+			}
+
+			/*
+			 * We can now proceed with loading the task's VFP state
+			 * from memory into the CPU registers.
+			 */
+			fpexc = vfp_load_state(&ti->vfpstate);
+			vfp_current_hw_state[ti->cpu] = &ti->vfpstate;
+#ifdef CONFIG_SMP
+			/*
+			 * Record that this CPU is now the one holding the most
+			 * recent VFP state of the task.
+			 */
+			ti->vfpstate.hard.cpu = ti->cpu;
+#endif
+		}
+
+		if (fpexc & FPEXC_EX)
+			/*
+			 * Might as well handle the pending exception before
+			 * retrying branch out before setting an FPEXC that
+			 * stops us reading stuff.
+			 */
+			goto bounce;
+
+		/*
+		 * No FP exception is pending: just enable the VFP and
+		 * replay the instruction that trapped.
+		 */
+		fmxr(FPEXC, fpexc);
+	} else {
+		/* Check for synchronous or asynchronous exceptions */
+		if (!(fpexc & (FPEXC_EX | FPEXC_DEX))) {
+			u32 fpscr = fmrx(FPSCR);
+
+			/*
+			 * On some implementations of the VFP subarch 1,
+			 * setting FPSCR.IXE causes all the CDP instructions to
+			 * be bounced synchronously without setting the
+			 * FPEXC.EX bit
+			 */
+			if (!(fpscr & FPSCR_IXE)) {
+				if (!(fpscr & FPSCR_LENGTH_MASK)) {
+					pr_debug("not VFP\n");
+					local_bh_enable();
+					return -ENOEXEC;
+				}
+				fpexc |= FPEXC_DEX;
+			}
+		}
+bounce:		regs->ARM_pc += 4;
+		VFP_bounce(trigger, fpexc, regs);
+	}
+
+	local_bh_enable();
+	return 0;
+}
+
+static struct undef_hook neon_support_hook[] = {{
 	.instr_mask	= 0xfe000000,
 	.instr_val	= 0xf2000000,
-	.cpsr_mask	= MODE_MASK | PSR_T_BIT,
-	.cpsr_val	= SVC_MODE,
-	.fn		= vfp_kmode_exception,
+	.cpsr_mask	= PSR_T_BIT,
+	.cpsr_val	= 0,
+	.fn		= vfp_support_entry,
 }, {
 	.instr_mask	= 0xff100000,
 	.instr_val	= 0xf4000000,
-	.cpsr_mask	= MODE_MASK | PSR_T_BIT,
-	.cpsr_val	= SVC_MODE,
-	.fn		= vfp_kmode_exception,
+	.cpsr_mask	= PSR_T_BIT,
+	.cpsr_val	= 0,
+	.fn		= vfp_support_entry,
 }, {
 	.instr_mask	= 0xef000000,
 	.instr_val	= 0xef000000,
-	.cpsr_mask	= MODE_MASK | PSR_T_BIT,
-	.cpsr_val	= SVC_MODE | PSR_T_BIT,
-	.fn		= vfp_kmode_exception,
+	.cpsr_mask	= PSR_T_BIT,
+	.cpsr_val	= PSR_T_BIT,
+	.fn		= vfp_support_entry,
 }, {
 	.instr_mask	= 0xff100000,
 	.instr_val	= 0xf9000000,
-	.cpsr_mask	= MODE_MASK | PSR_T_BIT,
-	.cpsr_val	= SVC_MODE | PSR_T_BIT,
-	.fn		= vfp_kmode_exception,
-}, {
-	.instr_mask	= 0x0c000e00,
-	.instr_val	= 0x0c000a00,
-	.cpsr_mask	= MODE_MASK,
-	.cpsr_val	= SVC_MODE,
-	.fn		= vfp_kmode_exception,
+	.cpsr_mask	= PSR_T_BIT,
+	.cpsr_val	= PSR_T_BIT,
+	.fn		= vfp_support_entry,
 }};
 
-static int __init vfp_kmode_exception_hook_init(void)
-{
-	int i;
+static struct undef_hook vfp_support_hook = {
+	.instr_mask	= 0x0c000e00,
+	.instr_val	= 0x0c000a00,
+	.fn		= vfp_support_entry,
+};
 
-	for (i = 0; i < ARRAY_SIZE(vfp_kmode_exception_hook); i++)
-		register_undef_hook(&vfp_kmode_exception_hook[i]);
-	return 0;
-}
-subsys_initcall(vfp_kmode_exception_hook_init);
+#ifdef CONFIG_KERNEL_MODE_NEON
 
 /*
  * Kernel-side NEON support functions
@@ -833,8 +913,11 @@ static int __init vfp_init(void)
 		 * for NEON if the hardware has the MVFR registers.
 		 */
 		if (IS_ENABLED(CONFIG_NEON) &&
-		   (fmrx(MVFR1) & 0x000fff00) == 0x00011100)
+		    (fmrx(MVFR1) & 0x000fff00) == 0x00011100) {
 			elf_hwcap |= HWCAP_NEON;
+			for (int i = 0; i < ARRAY_SIZE(neon_support_hook); i++)
+				register_undef_hook(&neon_support_hook[i]);
+		}
 
 		if (IS_ENABLED(CONFIG_VFPv3)) {
 			u32 mvfr0 = fmrx(MVFR0);
@@ -903,6 +986,7 @@ static int __init vfp_init(void)
 
 	have_vfp = true;
 
+	register_undef_hook(&vfp_support_hook);
 	thread_register_notifier(&vfp_notifier_block);
 	vfp_pm_init();
 
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 432932a..7f7d9b1 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -181,7 +181,7 @@ static inline pmd_t set_pmd_bit(pmd_t pmd, pgprot_t prot)
 	return pmd;
 }
 
-static inline pte_t pte_mkwrite(pte_t pte)
+static inline pte_t pte_mkwrite_novma(pte_t pte)
 {
 	pte = set_pte_bit(pte, __pgprot(PTE_WRITE));
 	pte = clear_pte_bit(pte, __pgprot(PTE_RDONLY));
@@ -487,7 +487,7 @@ static inline int pmd_trans_huge(pmd_t pmd)
 #define pmd_cont(pmd)		pte_cont(pmd_pte(pmd))
 #define pmd_wrprotect(pmd)	pte_pmd(pte_wrprotect(pmd_pte(pmd)))
 #define pmd_mkold(pmd)		pte_pmd(pte_mkold(pmd_pte(pmd)))
-#define pmd_mkwrite(pmd)	pte_pmd(pte_mkwrite(pmd_pte(pmd)))
+#define pmd_mkwrite_novma(pmd)	pte_pmd(pte_mkwrite_novma(pmd_pte(pmd)))
 #define pmd_mkclean(pmd)	pte_pmd(pte_mkclean(pmd_pte(pmd)))
 #define pmd_mkdirty(pmd)	pte_pmd(pte_mkdirty(pmd_pte(pmd)))
 #define pmd_mkyoung(pmd)	pte_pmd(pte_mkyoung(pmd_pte(pmd)))
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index c7ebe74..0e8beb3 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -1344,7 +1344,7 @@ void __init minsigstksz_setup(void)
  */
 static_assert(NSIGILL	== 11);
 static_assert(NSIGFPE	== 15);
-static_assert(NSIGSEGV	== 9);
+static_assert(NSIGSEGV	== 10);
 static_assert(NSIGBUS	== 5);
 static_assert(NSIGTRAP	== 6);
 static_assert(NSIGCHLD	== 6);
diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c
index 4700f85..bbd5427 100644
--- a/arch/arm64/kernel/signal32.c
+++ b/arch/arm64/kernel/signal32.c
@@ -460,7 +460,7 @@ void compat_setup_restart_syscall(struct pt_regs *regs)
  */
 static_assert(NSIGILL	== 11);
 static_assert(NSIGFPE	== 15);
-static_assert(NSIGSEGV	== 9);
+static_assert(NSIGSEGV	== 10);
 static_assert(NSIGBUS	== 5);
 static_assert(NSIGTRAP	== 6);
 static_assert(NSIGCHLD	== 6);
diff --git a/arch/arm64/mm/trans_pgd.c b/arch/arm64/mm/trans_pgd.c
index e9ad391..7b14df3 100644
--- a/arch/arm64/mm/trans_pgd.c
+++ b/arch/arm64/mm/trans_pgd.c
@@ -41,7 +41,7 @@ static void _copy_pte(pte_t *dst_ptep, pte_t *src_ptep, unsigned long addr)
 		 * read only (code, rodata). Clear the RDONLY bit from
 		 * the temporary mappings we use during restore.
 		 */
-		set_pte(dst_ptep, pte_mkwrite(pte));
+		set_pte(dst_ptep, pte_mkwrite_novma(pte));
 	} else if ((debug_pagealloc_enabled() ||
 		   is_kfence_address((void *)addr)) && !pte_none(pte)) {
 		/*
@@ -55,7 +55,7 @@ static void _copy_pte(pte_t *dst_ptep, pte_t *src_ptep, unsigned long addr)
 		 */
 		BUG_ON(!pfn_valid(pte_pfn(pte)));
 
-		set_pte(dst_ptep, pte_mkpresent(pte_mkwrite(pte)));
+		set_pte(dst_ptep, pte_mkpresent(pte_mkwrite_novma(pte)));
 	}
 }
 
diff --git a/arch/csky/include/asm/pgtable.h b/arch/csky/include/asm/pgtable.h
index 4240503..a397e17 100644
--- a/arch/csky/include/asm/pgtable.h
+++ b/arch/csky/include/asm/pgtable.h
@@ -176,7 +176,7 @@ static inline pte_t pte_mkold(pte_t pte)
 	return pte;
 }
 
-static inline pte_t pte_mkwrite(pte_t pte)
+static inline pte_t pte_mkwrite_novma(pte_t pte)
 {
 	pte_val(pte) |= _PAGE_WRITE;
 	if (pte_val(pte) & _PAGE_MODIFIED)
diff --git a/arch/hexagon/include/asm/pgtable.h b/arch/hexagon/include/asm/pgtable.h
index dd05dd7..8c5b7a1 100644
--- a/arch/hexagon/include/asm/pgtable.h
+++ b/arch/hexagon/include/asm/pgtable.h
@@ -300,7 +300,7 @@ static inline pte_t pte_wrprotect(pte_t pte)
 }
 
 /* pte_mkwrite - mark page as writable */
-static inline pte_t pte_mkwrite(pte_t pte)
+static inline pte_t pte_mkwrite_novma(pte_t pte)
 {
 	pte_val(pte) |= _PAGE_WRITE;
 	return pte;
diff --git a/arch/ia64/include/asm/pgtable.h b/arch/ia64/include/asm/pgtable.h
index 4e5dd80..9be2d2b 100644
--- a/arch/ia64/include/asm/pgtable.h
+++ b/arch/ia64/include/asm/pgtable.h
@@ -269,7 +269,7 @@ ia64_phys_addr_valid (unsigned long addr)
  * access rights:
  */
 #define pte_wrprotect(pte)	(__pte(pte_val(pte) & ~_PAGE_AR_RW))
-#define pte_mkwrite(pte)	(__pte(pte_val(pte) | _PAGE_AR_RW))
+#define pte_mkwrite_novma(pte)	(__pte(pte_val(pte) | _PAGE_AR_RW))
 #define pte_mkold(pte)		(__pte(pte_val(pte) & ~_PAGE_A))
 #define pte_mkyoung(pte)	(__pte(pte_val(pte) | _PAGE_A))
 #define pte_mkclean(pte)	(__pte(pte_val(pte) & ~_PAGE_D))
diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h
index e7cf25e..06963a1 100644
--- a/arch/loongarch/include/asm/pgtable.h
+++ b/arch/loongarch/include/asm/pgtable.h
@@ -384,7 +384,7 @@ static inline pte_t pte_mkdirty(pte_t pte)
 	return pte;
 }
 
-static inline pte_t pte_mkwrite(pte_t pte)
+static inline pte_t pte_mkwrite_novma(pte_t pte)
 {
 	pte_val(pte) |= _PAGE_WRITE;
 	if (pte_val(pte) & _PAGE_MODIFIED)
@@ -493,7 +493,7 @@ static inline int pmd_write(pmd_t pmd)
 	return !!(pmd_val(pmd) & _PAGE_WRITE);
 }
 
-static inline pmd_t pmd_mkwrite(pmd_t pmd)
+static inline pmd_t pmd_mkwrite_novma(pmd_t pmd)
 {
 	pmd_val(pmd) |= _PAGE_WRITE;
 	if (pmd_val(pmd) & _PAGE_MODIFIED)
diff --git a/arch/m68k/include/asm/mcf_pgtable.h b/arch/m68k/include/asm/mcf_pgtable.h
index 772b7e7..48f87a8 100644
--- a/arch/m68k/include/asm/mcf_pgtable.h
+++ b/arch/m68k/include/asm/mcf_pgtable.h
@@ -210,7 +210,7 @@ static inline pte_t pte_mkold(pte_t pte)
 	return pte;
 }
 
-static inline pte_t pte_mkwrite(pte_t pte)
+static inline pte_t pte_mkwrite_novma(pte_t pte)
 {
 	pte_val(pte) |= CF_PAGE_WRITABLE;
 	return pte;
diff --git a/arch/m68k/include/asm/motorola_pgtable.h b/arch/m68k/include/asm/motorola_pgtable.h
index 38d5e5e..9866c7a 100644
--- a/arch/m68k/include/asm/motorola_pgtable.h
+++ b/arch/m68k/include/asm/motorola_pgtable.h
@@ -156,7 +156,7 @@ static inline int pte_young(pte_t pte)		{ return pte_val(pte) & _PAGE_ACCESSED;
 static inline pte_t pte_wrprotect(pte_t pte)	{ pte_val(pte) |= _PAGE_RONLY; return pte; }
 static inline pte_t pte_mkclean(pte_t pte)	{ pte_val(pte) &= ~_PAGE_DIRTY; return pte; }
 static inline pte_t pte_mkold(pte_t pte)	{ pte_val(pte) &= ~_PAGE_ACCESSED; return pte; }
-static inline pte_t pte_mkwrite(pte_t pte)	{ pte_val(pte) &= ~_PAGE_RONLY; return pte; }
+static inline pte_t pte_mkwrite_novma(pte_t pte){ pte_val(pte) &= ~_PAGE_RONLY; return pte; }
 static inline pte_t pte_mkdirty(pte_t pte)	{ pte_val(pte) |= _PAGE_DIRTY; return pte; }
 static inline pte_t pte_mkyoung(pte_t pte)	{ pte_val(pte) |= _PAGE_ACCESSED; return pte; }
 static inline pte_t pte_mknocache(pte_t pte)
diff --git a/arch/m68k/include/asm/sun3_pgtable.h b/arch/m68k/include/asm/sun3_pgtable.h
index 0cc39a8..30081ae 100644
--- a/arch/m68k/include/asm/sun3_pgtable.h
+++ b/arch/m68k/include/asm/sun3_pgtable.h
@@ -144,7 +144,7 @@ static inline int pte_young(pte_t pte)		{ return pte_val(pte) & SUN3_PAGE_ACCESS
 static inline pte_t pte_wrprotect(pte_t pte)	{ pte_val(pte) &= ~SUN3_PAGE_WRITEABLE; return pte; }
 static inline pte_t pte_mkclean(pte_t pte)	{ pte_val(pte) &= ~SUN3_PAGE_MODIFIED; return pte; }
 static inline pte_t pte_mkold(pte_t pte)	{ pte_val(pte) &= ~SUN3_PAGE_ACCESSED; return pte; }
-static inline pte_t pte_mkwrite(pte_t pte)	{ pte_val(pte) |= SUN3_PAGE_WRITEABLE; return pte; }
+static inline pte_t pte_mkwrite_novma(pte_t pte){ pte_val(pte) |= SUN3_PAGE_WRITEABLE; return pte; }
 static inline pte_t pte_mkdirty(pte_t pte)	{ pte_val(pte) |= SUN3_PAGE_MODIFIED; return pte; }
 static inline pte_t pte_mkyoung(pte_t pte)	{ pte_val(pte) |= SUN3_PAGE_ACCESSED; return pte; }
 static inline pte_t pte_mknocache(pte_t pte)	{ pte_val(pte) |= SUN3_PAGE_NOCACHE; return pte; }
diff --git a/arch/microblaze/include/asm/pgtable.h b/arch/microblaze/include/asm/pgtable.h
index 6f9b990..3042eb98 100644
--- a/arch/microblaze/include/asm/pgtable.h
+++ b/arch/microblaze/include/asm/pgtable.h
@@ -266,7 +266,7 @@ static inline pte_t pte_mkread(pte_t pte) \
 	{ pte_val(pte) |= _PAGE_USER; return pte; }
 static inline pte_t pte_mkexec(pte_t pte) \
 	{ pte_val(pte) |= _PAGE_USER | _PAGE_EXEC; return pte; }
-static inline pte_t pte_mkwrite(pte_t pte) \
+static inline pte_t pte_mkwrite_novma(pte_t pte) \
 	{ pte_val(pte) |= _PAGE_RW; return pte; }
 static inline pte_t pte_mkdirty(pte_t pte) \
 	{ pte_val(pte) |= _PAGE_DIRTY; return pte; }
diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h
index cbb93a8..430b208 100644
--- a/arch/mips/include/asm/pgtable.h
+++ b/arch/mips/include/asm/pgtable.h
@@ -319,7 +319,7 @@ static inline pte_t pte_mkold(pte_t pte)
 	return pte;
 }
 
-static inline pte_t pte_mkwrite(pte_t pte)
+static inline pte_t pte_mkwrite_novma(pte_t pte)
 {
 	pte.pte_low |= _PAGE_WRITE;
 	if (pte.pte_low & _PAGE_MODIFIED) {
@@ -374,7 +374,7 @@ static inline pte_t pte_mkold(pte_t pte)
 	return pte;
 }
 
-static inline pte_t pte_mkwrite(pte_t pte)
+static inline pte_t pte_mkwrite_novma(pte_t pte)
 {
 	pte_val(pte) |= _PAGE_WRITE;
 	if (pte_val(pte) & _PAGE_MODIFIED)
@@ -646,7 +646,7 @@ static inline pmd_t pmd_wrprotect(pmd_t pmd)
 	return pmd;
 }
 
-static inline pmd_t pmd_mkwrite(pmd_t pmd)
+static inline pmd_t pmd_mkwrite_novma(pmd_t pmd)
 {
 	pmd_val(pmd) |= _PAGE_WRITE;
 	if (pmd_val(pmd) & _PAGE_MODIFIED)
diff --git a/arch/nios2/include/asm/pgtable.h b/arch/nios2/include/asm/pgtable.h
index be6bf3e0..5144506 100644
--- a/arch/nios2/include/asm/pgtable.h
+++ b/arch/nios2/include/asm/pgtable.h
@@ -129,7 +129,7 @@ static inline pte_t pte_mkold(pte_t pte)
 	return pte;
 }
 
-static inline pte_t pte_mkwrite(pte_t pte)
+static inline pte_t pte_mkwrite_novma(pte_t pte)
 {
 	pte_val(pte) |= _PAGE_WRITE;
 	return pte;
diff --git a/arch/openrisc/include/asm/pgtable.h b/arch/openrisc/include/asm/pgtable.h
index 7bdf1bb..60c6ce7 100644
--- a/arch/openrisc/include/asm/pgtable.h
+++ b/arch/openrisc/include/asm/pgtable.h
@@ -250,7 +250,7 @@ static inline pte_t pte_mkold(pte_t pte)
 	return pte;
 }
 
-static inline pte_t pte_mkwrite(pte_t pte)
+static inline pte_t pte_mkwrite_novma(pte_t pte)
 {
 	pte_val(pte) |= _PAGE_WRITE;
 	return pte;
diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h
index ce38bb3..974acca 100644
--- a/arch/parisc/include/asm/pgtable.h
+++ b/arch/parisc/include/asm/pgtable.h
@@ -322,7 +322,7 @@ static inline pte_t pte_mkold(pte_t pte)	{ pte_val(pte) &= ~_PAGE_ACCESSED; retu
 static inline pte_t pte_wrprotect(pte_t pte)	{ pte_val(pte) &= ~_PAGE_WRITE; return pte; }
 static inline pte_t pte_mkdirty(pte_t pte)	{ pte_val(pte) |= _PAGE_DIRTY; return pte; }
 static inline pte_t pte_mkyoung(pte_t pte)	{ pte_val(pte) |= _PAGE_ACCESSED; return pte; }
-static inline pte_t pte_mkwrite(pte_t pte)	{ pte_val(pte) |= _PAGE_WRITE; return pte; }
+static inline pte_t pte_mkwrite_novma(pte_t pte)	{ pte_val(pte) |= _PAGE_WRITE; return pte; }
 static inline pte_t pte_mkspecial(pte_t pte)	{ pte_val(pte) |= _PAGE_SPECIAL; return pte; }
 
 /*
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 21edd66..54b9387 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -188,6 +188,7 @@
 	select DYNAMIC_FTRACE			if FUNCTION_TRACER
 	select EDAC_ATOMIC_SCRUB
 	select EDAC_SUPPORT
+	select FTRACE_MCOUNT_USE_PATCHABLE_FUNCTION_ENTRY if ARCH_USING_PATCHABLE_FUNCTION_ENTRY
 	select GENERIC_ATOMIC64			if PPC32
 	select GENERIC_CLOCKEVENTS_BROADCAST	if SMP
 	select GENERIC_CMOS_UPDATE
@@ -195,6 +196,7 @@
 	select GENERIC_CPU_VULNERABILITIES	if PPC_BARRIER_NOSPEC
 	select GENERIC_EARLY_IOREMAP
 	select GENERIC_GETTIMEOFDAY
+	select GENERIC_IDLE_POLL_SETUP
 	select GENERIC_IOREMAP
 	select GENERIC_IRQ_SHOW
 	select GENERIC_IRQ_SHOW_LEVEL
@@ -229,8 +231,8 @@
 	select HAVE_DEBUG_KMEMLEAK
 	select HAVE_DEBUG_STACKOVERFLOW
 	select HAVE_DYNAMIC_FTRACE
-	select HAVE_DYNAMIC_FTRACE_WITH_ARGS	if MPROFILE_KERNEL || PPC32
-	select HAVE_DYNAMIC_FTRACE_WITH_REGS	if MPROFILE_KERNEL || PPC32
+	select HAVE_DYNAMIC_FTRACE_WITH_ARGS	if ARCH_USING_PATCHABLE_FUNCTION_ENTRY || MPROFILE_KERNEL || PPC32
+	select HAVE_DYNAMIC_FTRACE_WITH_REGS	if ARCH_USING_PATCHABLE_FUNCTION_ENTRY || MPROFILE_KERNEL || PPC32
 	select HAVE_EBPF_JIT
 	select HAVE_EFFICIENT_UNALIGNED_ACCESS
 	select HAVE_FAST_GUP
@@ -258,7 +260,7 @@
 	select HAVE_MOD_ARCH_SPECIFIC
 	select HAVE_NMI				if PERF_EVENTS || (PPC64 && PPC_BOOK3S)
 	select HAVE_OPTPROBES
-	select HAVE_OBJTOOL			if PPC32 || MPROFILE_KERNEL
+	select HAVE_OBJTOOL			if ARCH_USING_PATCHABLE_FUNCTION_ENTRY || MPROFILE_KERNEL || PPC32
 	select HAVE_OBJTOOL_MCOUNT		if HAVE_OBJTOOL
 	select HAVE_PERF_EVENTS
 	select HAVE_PERF_EVENTS_NMI		if PPC64
@@ -275,6 +277,8 @@
 	select HAVE_SYSCALL_TRACEPOINTS
 	select HAVE_VIRT_CPU_ACCOUNTING
 	select HAVE_VIRT_CPU_ACCOUNTING_GEN
+	select HOTPLUG_SMT			if HOTPLUG_CPU
+	select SMT_NUM_THREADS_DYNAMIC
 	select HUGETLB_PAGE_SIZE_VARIABLE	if PPC_BOOK3S_64 && HUGETLB_PAGE
 	select IOMMU_HELPER			if PPC64
 	select IRQ_DOMAIN
@@ -554,6 +558,13 @@
 	def_bool $(success,$(srctree)/arch/powerpc/tools/gcc-check-mprofile-kernel.sh $(CC) -mlittle-endian) if CPU_LITTLE_ENDIAN
 	def_bool $(success,$(srctree)/arch/powerpc/tools/gcc-check-mprofile-kernel.sh $(CC) -mbig-endian) if CPU_BIG_ENDIAN
 
+config ARCH_USING_PATCHABLE_FUNCTION_ENTRY
+	depends on FUNCTION_TRACER && (PPC32 || PPC64_ELF_ABI_V2)
+	depends on $(cc-option,-fpatchable-function-entry=2)
+	def_bool y if PPC32
+	def_bool $(success,$(srctree)/arch/powerpc/tools/gcc-check-fpatchable-function-entry.sh $(CC) -mlittle-endian) if PPC64 && CPU_LITTLE_ENDIAN
+	def_bool $(success,$(srctree)/arch/powerpc/tools/gcc-check-fpatchable-function-entry.sh $(CC) -mbig-endian) if PPC64 && CPU_BIG_ENDIAN
+
 config HOTPLUG_CPU
 	bool "Support for enabling/disabling CPUs"
 	depends on SMP && (PPC_PSERIES || \
@@ -1126,12 +1137,6 @@
 	help
 	  Freescale General-purpose Timers support
 
-config PCI_8260
-	bool
-	depends on PCI && 8260
-	select PPC_INDIRECT_PCI
-	default y
-
 config FSL_RIO
 	bool "Freescale Embedded SRIO Controller support"
 	depends on RAPIDIO = y && HAVE_RAPIDIO
diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index dac7ca1..f19dbaa 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -143,18 +143,21 @@
 CFLAGS-$(CONFIG_PPC32)	+= $(call cc-option,-mno-readonly-in-sdata)
 
 ifdef CONFIG_FUNCTION_TRACER
+ifdef CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY
+KBUILD_CPPFLAGS	+= -DCC_USING_PATCHABLE_FUNCTION_ENTRY
+CC_FLAGS_FTRACE := -fpatchable-function-entry=2
+else
 CC_FLAGS_FTRACE := -pg
 ifdef CONFIG_MPROFILE_KERNEL
 CC_FLAGS_FTRACE += -mprofile-kernel
 endif
 endif
+endif
 
 CFLAGS-$(CONFIG_TARGET_CPU_BOOL) += -mcpu=$(CONFIG_TARGET_CPU)
 AFLAGS-$(CONFIG_TARGET_CPU_BOOL) += -mcpu=$(CONFIG_TARGET_CPU)
 
-CFLAGS-$(CONFIG_POWERPC64_CPU) += $(call cc-option,-mtune=power10,	\
-				  $(call cc-option,-mtune=power9,	\
-				  $(call cc-option,-mtune=power8)))
+CFLAGS-y += $(CONFIG_TUNE_CPU)
 
 asinstr := $(call as-instr,lis 9$(comma)foo@high,-DHAVE_AS_ATHIGH=1)
 
diff --git a/arch/powerpc/boot/dts/fsl/c293si-post.dtsi b/arch/powerpc/boot/dts/fsl/c293si-post.dtsi
index bec0fc3..f208fb8 100644
--- a/arch/powerpc/boot/dts/fsl/c293si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/c293si-post.dtsi
@@ -124,10 +124,10 @@ crypto@80000 {
 		reg = <0x80000 0x20000>;
 		ranges = <0x0 0x80000 0x20000>;
 
-		jr@1000{
+		jr@1000 {
 			interrupts = <45 2 0 0>;
 		};
-		jr@2000{
+		jr@2000 {
 			interrupts = <57 2 0 0>;
 		};
 	};
@@ -140,10 +140,10 @@ crypto@a0000 {
 		reg = <0xa0000 0x20000>;
 		ranges = <0x0 0xa0000 0x20000>;
 
-		jr@1000{
+		jr@1000 {
 			interrupts = <49 2 0 0>;
 		};
-		jr@2000{
+		jr@2000 {
 			interrupts = <50 2 0 0>;
 		};
 	};
@@ -156,10 +156,10 @@ crypto@c0000 {
 		reg = <0xc0000 0x20000>;
 		ranges = <0x0 0xc0000 0x20000>;
 
-		jr@1000{
+		jr@1000 {
 			interrupts = <55 2 0 0>;
 		};
-		jr@2000{
+		jr@2000 {
 			interrupts = <56 2 0 0>;
 		};
 	};
diff --git a/arch/powerpc/boot/dts/fsl/p1022rdk.dts b/arch/powerpc/boot/dts/fsl/p1022rdk.dts
index 29e8af1..4261c2f 100644
--- a/arch/powerpc/boot/dts/fsl/p1022rdk.dts
+++ b/arch/powerpc/boot/dts/fsl/p1022rdk.dts
@@ -60,23 +60,23 @@ rtc@68 {
 				compatible = "st,m41t62";
 				reg = <0x68>;
 			};
-			adt7461@4c{
+			adt7461@4c {
 				compatible = "adi,adt7461";
 				reg = <0x4c>;
 			};
-			zl6100@21{
+			zl6100@21 {
 				compatible = "isil,zl6100";
 				reg = <0x21>;
 			};
-			zl6100@24{
+			zl6100@24 {
 				compatible = "isil,zl6100";
 				reg = <0x24>;
 			};
-			zl6100@26{
+			zl6100@26 {
 				compatible = "isil,zl6100";
 				reg = <0x26>;
 			};
-			zl6100@29{
+			zl6100@29 {
 				compatible = "isil,zl6100";
 				reg = <0x29>;
 			};
diff --git a/arch/powerpc/boot/dts/fsl/p1022si-post.dtsi b/arch/powerpc/boot/dts/fsl/p1022si-post.dtsi
index 5f51b7b..093e4e3 100644
--- a/arch/powerpc/boot/dts/fsl/p1022si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/p1022si-post.dtsi
@@ -238,7 +238,7 @@ global-utilities@e0000 {
 		fsl,has-rstcr;
 	};
 
-	power@e0070{
+	power@e0070 {
 		compatible = "fsl,mpc8536-pmc", "fsl,mpc8548-pmc";
 		reg = <0xe0070 0x20>;
 	};
diff --git a/arch/powerpc/boot/dts/fsl/p3041ds.dts b/arch/powerpc/boot/dts/fsl/p3041ds.dts
index 6f5f728..ca0e027 100644
--- a/arch/powerpc/boot/dts/fsl/p3041ds.dts
+++ b/arch/powerpc/boot/dts/fsl/p3041ds.dts
@@ -41,7 +41,7 @@ / {
 	#size-cells = <2>;
 	interrupt-parent = <&mpic>;
 
-	aliases{
+	aliases {
 		phy_rgmii_0 = &phy_rgmii_0;
 		phy_rgmii_1 = &phy_rgmii_1;
 		phy_sgmii_1c = &phy_sgmii_1c;
@@ -165,7 +165,7 @@ adt7461@4c {
 			};
 		};
 
-		fman@400000{
+		fman@400000 {
 			ethernet@e0000 {
 				phy-handle = <&phy_sgmii_1c>;
 				phy-connection-type = "sgmii";
diff --git a/arch/powerpc/boot/dts/fsl/p5040ds.dts b/arch/powerpc/boot/dts/fsl/p5040ds.dts
index 30850b3..5cfc689e 100644
--- a/arch/powerpc/boot/dts/fsl/p5040ds.dts
+++ b/arch/powerpc/boot/dts/fsl/p5040ds.dts
@@ -41,7 +41,7 @@ / {
 	#size-cells = <2>;
 	interrupt-parent = <&mpic>;
 
-	aliases{
+	aliases {
 		phy_sgmii_slot2_1c = &phy_sgmii_slot2_1c;
 		phy_sgmii_slot2_1d = &phy_sgmii_slot2_1d;
 		phy_sgmii_slot2_1e = &phy_sgmii_slot2_1e;
diff --git a/arch/powerpc/boot/dts/fsl/t4240qds.dts b/arch/powerpc/boot/dts/fsl/t4240qds.dts
index c0913ac..128b5798 100644
--- a/arch/powerpc/boot/dts/fsl/t4240qds.dts
+++ b/arch/powerpc/boot/dts/fsl/t4240qds.dts
@@ -41,7 +41,7 @@ / {
 	#size-cells = <2>;
 	interrupt-parent = <&mpic>;
 
-	aliases{
+	aliases {
 		phy_rgmii1 = &phyrgmii1;
 		phy_rgmii2 = &phyrgmii2;
 		phy_sgmii3 = &phy3;
diff --git a/arch/powerpc/boot/dts/mpc5121.dtsi b/arch/powerpc/boot/dts/mpc5121.dtsi
index 3f66b91..d3fc806 100644
--- a/arch/powerpc/boot/dts/mpc5121.dtsi
+++ b/arch/powerpc/boot/dts/mpc5121.dtsi
@@ -140,7 +140,7 @@ clks: clock@f00 {
 		};
 
 		/* Power Management Controller */
-		pmc@1000{
+		pmc@1000 {
 			compatible = "fsl,mpc5121-pmc";
 			reg = <0x1000 0x100>;
 			interrupts = <83 0x8>;
diff --git a/arch/powerpc/boot/dts/mpc5125twr.dts b/arch/powerpc/boot/dts/mpc5125twr.dts
index 0bd2acc..ee09070 100644
--- a/arch/powerpc/boot/dts/mpc5125twr.dts
+++ b/arch/powerpc/boot/dts/mpc5125twr.dts
@@ -104,7 +104,7 @@ clks: clock@f00 {	// Clock control
 			clock-names = "osc";
 		};
 
-		pmc@1000{  // Power Management Controller
+		pmc@1000 {  // Power Management Controller
 			compatible = "fsl,mpc5121-pmc";
 			reg = <0x1000 0x100>;
 			interrupts = <83 0x2>;
diff --git a/arch/powerpc/configs/pmac32_defconfig b/arch/powerpc/configs/pmac32_defconfig
index 05ed585..a205da9 100644
--- a/arch/powerpc/configs/pmac32_defconfig
+++ b/arch/powerpc/configs/pmac32_defconfig
@@ -176,8 +176,9 @@
 # CONFIG_SERIO_I8042 is not set
 # CONFIG_SERIO_SERPORT is not set
 CONFIG_SERIAL_8250=m
-CONFIG_SERIAL_PMACZILOG=m
+CONFIG_SERIAL_PMACZILOG=y
 CONFIG_SERIAL_PMACZILOG_TTYS=y
+CONFIG_SERIAL_PMACZILOG_CONSOLE=y
 CONFIG_NVRAM=y
 CONFIG_I2C_CHARDEV=m
 CONFIG_APM_POWER=y
diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig
index c0f4bbc..6e7b9e8 100644
--- a/arch/powerpc/configs/ppc64_defconfig
+++ b/arch/powerpc/configs/ppc64_defconfig
@@ -390,8 +390,11 @@
 CONFIG_CRYPTO_WP512=m
 CONFIG_CRYPTO_LZO=m
 CONFIG_CRYPTO_CRC32C_VPMSUM=m
+CONFIG_CRYPTO_CRCT10DIF_VPMSUM=m
+CONFIG_CRYPTO_VPMSUM_TESTER=m
 CONFIG_CRYPTO_MD5_PPC=m
 CONFIG_CRYPTO_SHA1_PPC=m
+CONFIG_CRYPTO_AES_GCM_P10=m
 CONFIG_CRYPTO_DEV_NX=y
 CONFIG_CRYPTO_DEV_NX_ENCRYPT=m
 CONFIG_CRYPTO_DEV_VMX=y
diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig
index 1034aea..eaf3273 100644
--- a/arch/powerpc/configs/ppc6xx_defconfig
+++ b/arch/powerpc/configs/ppc6xx_defconfig
@@ -183,7 +183,6 @@
 CONFIG_IP_NF_FILTER=m
 CONFIG_IP_NF_TARGET_REJECT=m
 CONFIG_IP_NF_MANGLE=m
-CONFIG_IP_NF_TARGET_CLUSTERIP=m
 CONFIG_IP_NF_TARGET_ECN=m
 CONFIG_IP_NF_TARGET_TTL=m
 CONFIG_IP_NF_RAW=m
diff --git a/arch/powerpc/configs/skiroot_defconfig b/arch/powerpc/configs/skiroot_defconfig
index 71cfb990a..8d3eacb 100644
--- a/arch/powerpc/configs/skiroot_defconfig
+++ b/arch/powerpc/configs/skiroot_defconfig
@@ -289,7 +289,6 @@
 # CONFIG_XZ_DEC_SPARC is not set
 CONFIG_PRINTK_TIME=y
 CONFIG_MAGIC_SYSRQ=y
-CONFIG_SLUB_DEBUG_ON=y
 CONFIG_SCHED_STACK_END_CHECK=y
 CONFIG_DEBUG_STACKOVERFLOW=y
 CONFIG_PANIC_ON_OOPS=y
diff --git a/arch/powerpc/crypto/Kconfig b/arch/powerpc/crypto/Kconfig
index f25024a..803da4a 100644
--- a/arch/powerpc/crypto/Kconfig
+++ b/arch/powerpc/crypto/Kconfig
@@ -100,7 +100,7 @@
 	select CRYPTO_LIB_AES
 	select CRYPTO_ALGAPI
 	select CRYPTO_AEAD
-	default m
+	select CRYPTO_SKCIPHER
 	help
 	  AEAD cipher: AES cipher algorithms (FIPS-197)
 	  GCM (Galois/Counter Mode) authenticated encryption mode (NIST SP800-38D)
diff --git a/arch/powerpc/include/asm/8xx_immap.h b/arch/powerpc/include/asm/8xx_immap.h
index bdf0563..f9cac46 100644
--- a/arch/powerpc/include/asm/8xx_immap.h
+++ b/arch/powerpc/include/asm/8xx_immap.h
@@ -560,5 +560,7 @@ typedef struct immap {
 	cpm8xx_t	im_cpm;		/* Communication processor */
 } immap_t;
 
+extern immap_t __iomem *mpc8xx_immr;
+
 #endif /* __IMMAP_8XX__ */
 #endif /* __KERNEL__ */
diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild
index 419319c..61a8d55 100644
--- a/arch/powerpc/include/asm/Kbuild
+++ b/arch/powerpc/include/asm/Kbuild
@@ -3,7 +3,6 @@
 generated-y += syscall_table_64.h
 generated-y += syscall_table_spu.h
 generic-y += agp.h
-generic-y += export.h
 generic-y += kvm_types.h
 generic-y += mcs_spinlock.h
 generic-y += qrwlock.h
diff --git a/arch/powerpc/include/asm/book3s/32/kup.h b/arch/powerpc/include/asm/book3s/32/kup.h
index 678f9c9..4e14a54 100644
--- a/arch/powerpc/include/asm/book3s/32/kup.h
+++ b/arch/powerpc/include/asm/book3s/32/kup.h
@@ -9,79 +9,53 @@
 
 #ifndef __ASSEMBLY__
 
-#include <linux/jump_label.h>
-
-extern struct static_key_false disable_kuap_key;
-
-static __always_inline bool kuep_is_disabled(void)
-{
-	return !IS_ENABLED(CONFIG_PPC_KUEP);
-}
-
 #ifdef CONFIG_PPC_KUAP
 
 #include <linux/sched.h>
 
 #define KUAP_NONE	(~0UL)
-#define KUAP_ALL	(~1UL)
 
-static __always_inline bool kuap_is_disabled(void)
-{
-	return static_branch_unlikely(&disable_kuap_key);
-}
-
-static inline void kuap_lock_one(unsigned long addr)
+static __always_inline void kuap_lock_one(unsigned long addr)
 {
 	mtsr(mfsr(addr) | SR_KS, addr);
 	isync();	/* Context sync required after mtsr() */
 }
 
-static inline void kuap_unlock_one(unsigned long addr)
+static __always_inline void kuap_unlock_one(unsigned long addr)
 {
 	mtsr(mfsr(addr) & ~SR_KS, addr);
 	isync();	/* Context sync required after mtsr() */
 }
 
-static inline void kuap_lock_all(void)
+static __always_inline void uaccess_begin_32s(unsigned long addr)
 {
-	update_user_segments(mfsr(0) | SR_KS);
-	isync();	/* Context sync required after mtsr() */
+	unsigned long tmp;
+
+	asm volatile(ASM_MMU_FTR_IFSET(
+		"mfsrin %0, %1;"
+		"rlwinm %0, %0, 0, %2;"
+		"mtsrin %0, %1;"
+		"isync", "", %3)
+		: "=&r"(tmp)
+		: "r"(addr), "i"(~SR_KS), "i"(MMU_FTR_KUAP)
+		: "memory");
 }
 
-static inline void kuap_unlock_all(void)
+static __always_inline void uaccess_end_32s(unsigned long addr)
 {
-	update_user_segments(mfsr(0) & ~SR_KS);
-	isync();	/* Context sync required after mtsr() */
+	unsigned long tmp;
+
+	asm volatile(ASM_MMU_FTR_IFSET(
+		"mfsrin %0, %1;"
+		"oris %0, %0, %2;"
+		"mtsrin %0, %1;"
+		"isync", "", %3)
+		: "=&r"(tmp)
+		: "r"(addr), "i"(SR_KS >> 16), "i"(MMU_FTR_KUAP)
+		: "memory");
 }
 
-void kuap_lock_all_ool(void);
-void kuap_unlock_all_ool(void);
-
-static inline void kuap_lock_addr(unsigned long addr, bool ool)
-{
-	if (likely(addr != KUAP_ALL))
-		kuap_lock_one(addr);
-	else if (!ool)
-		kuap_lock_all();
-	else
-		kuap_lock_all_ool();
-}
-
-static inline void kuap_unlock(unsigned long addr, bool ool)
-{
-	if (likely(addr != KUAP_ALL))
-		kuap_unlock_one(addr);
-	else if (!ool)
-		kuap_unlock_all();
-	else
-		kuap_unlock_all_ool();
-}
-
-static inline void __kuap_lock(void)
-{
-}
-
-static inline void __kuap_save_and_lock(struct pt_regs *regs)
+static __always_inline void __kuap_save_and_lock(struct pt_regs *regs)
 {
 	unsigned long kuap = current->thread.kuap;
 
@@ -90,18 +64,19 @@ static inline void __kuap_save_and_lock(struct pt_regs *regs)
 		return;
 
 	current->thread.kuap = KUAP_NONE;
-	kuap_lock_addr(kuap, false);
+	kuap_lock_one(kuap);
 }
+#define __kuap_save_and_lock __kuap_save_and_lock
 
-static inline void kuap_user_restore(struct pt_regs *regs)
+static __always_inline void kuap_user_restore(struct pt_regs *regs)
 {
 }
 
-static inline void __kuap_kernel_restore(struct pt_regs *regs, unsigned long kuap)
+static __always_inline void __kuap_kernel_restore(struct pt_regs *regs, unsigned long kuap)
 {
 	if (unlikely(kuap != KUAP_NONE)) {
 		current->thread.kuap = KUAP_NONE;
-		kuap_lock_addr(kuap, false);
+		kuap_lock_one(kuap);
 	}
 
 	if (likely(regs->kuap == KUAP_NONE))
@@ -109,10 +84,10 @@ static inline void __kuap_kernel_restore(struct pt_regs *regs, unsigned long kua
 
 	current->thread.kuap = regs->kuap;
 
-	kuap_unlock(regs->kuap, false);
+	kuap_unlock_one(regs->kuap);
 }
 
-static inline unsigned long __kuap_get_and_assert_locked(void)
+static __always_inline unsigned long __kuap_get_and_assert_locked(void)
 {
 	unsigned long kuap = current->thread.kuap;
 
@@ -120,9 +95,10 @@ static inline unsigned long __kuap_get_and_assert_locked(void)
 
 	return kuap;
 }
+#define __kuap_get_and_assert_locked __kuap_get_and_assert_locked
 
-static __always_inline void __allow_user_access(void __user *to, const void __user *from,
-						u32 size, unsigned long dir)
+static __always_inline void allow_user_access(void __user *to, const void __user *from,
+					      u32 size, unsigned long dir)
 {
 	BUILD_BUG_ON(!__builtin_constant_p(dir));
 
@@ -130,10 +106,10 @@ static __always_inline void __allow_user_access(void __user *to, const void __us
 		return;
 
 	current->thread.kuap = (__force u32)to;
-	kuap_unlock_one((__force u32)to);
+	uaccess_begin_32s((__force u32)to);
 }
 
-static __always_inline void __prevent_user_access(unsigned long dir)
+static __always_inline void prevent_user_access(unsigned long dir)
 {
 	u32 kuap = current->thread.kuap;
 
@@ -143,42 +119,51 @@ static __always_inline void __prevent_user_access(unsigned long dir)
 		return;
 
 	current->thread.kuap = KUAP_NONE;
-	kuap_lock_addr(kuap, true);
+	uaccess_end_32s(kuap);
 }
 
-static inline unsigned long __prevent_user_access_return(void)
+static __always_inline unsigned long prevent_user_access_return(void)
 {
 	unsigned long flags = current->thread.kuap;
 
 	if (flags != KUAP_NONE) {
 		current->thread.kuap = KUAP_NONE;
-		kuap_lock_addr(flags, true);
+		uaccess_end_32s(flags);
 	}
 
 	return flags;
 }
 
-static inline void __restore_user_access(unsigned long flags)
+static __always_inline void restore_user_access(unsigned long flags)
 {
 	if (flags != KUAP_NONE) {
 		current->thread.kuap = flags;
-		kuap_unlock(flags, true);
+		uaccess_begin_32s(flags);
 	}
 }
 
-static inline bool
+static __always_inline bool
 __bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
 {
 	unsigned long kuap = regs->kuap;
 
-	if (!is_write || kuap == KUAP_ALL)
+	if (!is_write)
 		return false;
 	if (kuap == KUAP_NONE)
 		return true;
 
-	/* If faulting address doesn't match unlocked segment, unlock all */
-	if ((kuap ^ address) & 0xf0000000)
-		regs->kuap = KUAP_ALL;
+	/*
+	 * If faulting address doesn't match unlocked segment, change segment.
+	 * In case of unaligned store crossing two segments, emulate store.
+	 */
+	if ((kuap ^ address) & 0xf0000000) {
+		if (!(kuap & 0x0fffffff) && address > kuap - 4 && fix_alignment(regs)) {
+			regs_add_return_ip(regs, 4);
+			emulate_single_step(regs);
+		} else {
+			regs->kuap = address;
+		}
+	}
 
 	return false;
 }
diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h
index 5f12b93..9b13eb1 100644
--- a/arch/powerpc/include/asm/book3s/32/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/32/pgtable.h
@@ -493,7 +493,7 @@ static inline pte_t pte_mkpte(pte_t pte)
 	return pte;
 }
 
-static inline pte_t pte_mkwrite(pte_t pte)
+static inline pte_t pte_mkwrite_novma(pte_t pte)
 {
 	return __pte(pte_val(pte) | _PAGE_RW);
 }
@@ -536,58 +536,43 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 
 
 /* This low level function performs the actual PTE insertion
- * Setting the PTE depends on the MMU type and other factors. It's
- * an horrible mess that I'm not going to try to clean up now but
- * I'm keeping it in one place rather than spread around
+ * Setting the PTE depends on the MMU type and other factors.
+ *
+ * First case is 32-bit in UP mode with 32-bit PTEs, we need to preserve
+ * the _PAGE_HASHPTE bit since we may not have invalidated the previous
+ * translation in the hash yet (done in a subsequent flush_tlb_xxx())
+ * and see we need to keep track that this PTE needs invalidating.
+ *
+ * Second case is 32-bit with 64-bit PTE.  In this case, we
+ * can just store as long as we do the two halves in the right order
+ * with a barrier in between. This is possible because we take care,
+ * in the hash code, to pre-invalidate if the PTE was already hashed,
+ * which synchronizes us with any concurrent invalidation.
+ * In the percpu case, we fallback to the simple update preserving
+ * the hash bits (ie, same as the non-SMP case).
+ *
+ * Third case is 32-bit in SMP mode with 32-bit PTEs. We use the
+ * helper pte_update() which does an atomic update. We need to do that
+ * because a concurrent invalidation can clear _PAGE_HASHPTE. If it's a
+ * per-CPU PTE such as a kmap_atomic, we also do a simple update preserving
+ * the hash bits instead.
  */
 static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
 				pte_t *ptep, pte_t pte, int percpu)
 {
-#if defined(CONFIG_SMP) && !defined(CONFIG_PTE_64BIT)
-	/* First case is 32-bit Hash MMU in SMP mode with 32-bit PTEs. We use the
-	 * helper pte_update() which does an atomic update. We need to do that
-	 * because a concurrent invalidation can clear _PAGE_HASHPTE. If it's a
-	 * per-CPU PTE such as a kmap_atomic, we do a simple update preserving
-	 * the hash bits instead (ie, same as the non-SMP case)
-	 */
-	if (percpu)
-		*ptep = __pte((pte_val(*ptep) & _PAGE_HASHPTE)
-			      | (pte_val(pte) & ~_PAGE_HASHPTE));
-	else
+	if ((!IS_ENABLED(CONFIG_SMP) && !IS_ENABLED(CONFIG_PTE_64BIT)) || percpu) {
+		*ptep = __pte((pte_val(*ptep) & _PAGE_HASHPTE) |
+			      (pte_val(pte) & ~_PAGE_HASHPTE));
+	} else if (IS_ENABLED(CONFIG_PTE_64BIT)) {
+		if (pte_val(*ptep) & _PAGE_HASHPTE)
+			flush_hash_entry(mm, ptep, addr);
+
+		asm volatile("stw%X0 %2,%0; eieio; stw%X1 %L2,%1" :
+			     "=m" (*ptep), "=m" (*((unsigned char *)ptep+4)) :
+			     "r" (pte) : "memory");
+	} else {
 		pte_update(mm, addr, ptep, ~_PAGE_HASHPTE, pte_val(pte), 0);
-
-#elif defined(CONFIG_PTE_64BIT)
-	/* Second case is 32-bit with 64-bit PTE.  In this case, we
-	 * can just store as long as we do the two halves in the right order
-	 * with a barrier in between. This is possible because we take care,
-	 * in the hash code, to pre-invalidate if the PTE was already hashed,
-	 * which synchronizes us with any concurrent invalidation.
-	 * In the percpu case, we also fallback to the simple update preserving
-	 * the hash bits
-	 */
-	if (percpu) {
-		*ptep = __pte((pte_val(*ptep) & _PAGE_HASHPTE)
-			      | (pte_val(pte) & ~_PAGE_HASHPTE));
-		return;
 	}
-	if (pte_val(*ptep) & _PAGE_HASHPTE)
-		flush_hash_entry(mm, ptep, addr);
-	__asm__ __volatile__("\
-		stw%X0 %2,%0\n\
-		eieio\n\
-		stw%X1 %L2,%1"
-	: "=m" (*ptep), "=m" (*((unsigned char *)ptep+4))
-	: "r" (pte) : "memory");
-
-#else
-	/* Third case is 32-bit hash table in UP mode, we need to preserve
-	 * the _PAGE_HASHPTE bit since we may not have invalidated the previous
-	 * translation in the hash yet (done in a subsequent flush_tlb_xxx())
-	 * and see we need to keep track that this PTE needs invalidating
-	 */
-	*ptep = __pte((pte_val(*ptep) & _PAGE_HASHPTE)
-		      | (pte_val(pte) & ~_PAGE_HASHPTE));
-#endif
 }
 
 /*
diff --git a/arch/powerpc/include/asm/book3s/64/hash-pkey.h b/arch/powerpc/include/asm/book3s/64/hash-pkey.h
index f1e60d5..6c5564c 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-pkey.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-pkey.h
@@ -24,7 +24,7 @@ static inline u64 pte_to_hpte_pkey_bits(u64 pteflags, unsigned long flags)
 		    ((pteflags & H_PTE_PKEY_BIT1) ? HPTE_R_KEY_BIT1 : 0x0UL) |
 		    ((pteflags & H_PTE_PKEY_BIT0) ? HPTE_R_KEY_BIT0 : 0x0UL));
 
-	if (mmu_has_feature(MMU_FTR_BOOK3S_KUAP) ||
+	if (mmu_has_feature(MMU_FTR_KUAP) ||
 	    mmu_has_feature(MMU_FTR_BOOK3S_KUEP)) {
 		if ((pte_pkey == 0) && (flags & HPTE_USE_KERNEL_KEY))
 			return HASH_DEFAULT_KERNEL_KEY;
diff --git a/arch/powerpc/include/asm/book3s/64/kup.h b/arch/powerpc/include/asm/book3s/64/kup.h
index 84c09e5..497a7bd 100644
--- a/arch/powerpc/include/asm/book3s/64/kup.h
+++ b/arch/powerpc/include/asm/book3s/64/kup.h
@@ -31,7 +31,7 @@
 	mfspr	\gpr2, SPRN_AMR
 	cmpd	\gpr1, \gpr2
 	beq	99f
-	END_MMU_FTR_SECTION_NESTED_IFCLR(MMU_FTR_BOOK3S_KUAP, 68)
+	END_MMU_FTR_SECTION_NESTED_IFCLR(MMU_FTR_KUAP, 68)
 
 	isync
 	mtspr	SPRN_AMR, \gpr1
@@ -78,7 +78,7 @@
 	 * No need to restore IAMR when returning to kernel space.
 	 */
 100:
-	END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_BOOK3S_KUAP, 67)
+	END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_KUAP, 67)
 #endif
 .endm
 
@@ -91,7 +91,7 @@
 	LOAD_REG_IMMEDIATE(\gpr2, AMR_KUAP_BLOCKED)
 999:	tdne	\gpr1, \gpr2
 	EMIT_WARN_ENTRY 999b, __FILE__, __LINE__, (BUGFLAG_WARNING | BUGFLAG_ONCE)
-	END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_BOOK3S_KUAP, 67)
+	END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_KUAP, 67)
 #endif
 .endm
 #endif
@@ -130,7 +130,7 @@
 	 */
 	BEGIN_MMU_FTR_SECTION_NESTED(68)
 	b	100f  // skip_save_amr
-	END_MMU_FTR_SECTION_NESTED_IFCLR(MMU_FTR_PKEY | MMU_FTR_BOOK3S_KUAP, 68)
+	END_MMU_FTR_SECTION_NESTED_IFCLR(MMU_FTR_PKEY | MMU_FTR_KUAP, 68)
 
 	/*
 	 * if pkey is disabled and we are entering from userspace
@@ -166,7 +166,7 @@
 	mtspr	SPRN_AMR, \gpr2
 	isync
 102:
-	END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_BOOK3S_KUAP, 69)
+	END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_KUAP, 69)
 
 	/*
 	 * if entering from kernel we don't need save IAMR
@@ -213,14 +213,14 @@ extern u64 __ro_after_init default_iamr;
  * access restrictions. Because of this ignore AMR value when accessing
  * userspace via kernel thread.
  */
-static inline u64 current_thread_amr(void)
+static __always_inline u64 current_thread_amr(void)
 {
 	if (current->thread.regs)
 		return current->thread.regs->amr;
 	return default_amr;
 }
 
-static inline u64 current_thread_iamr(void)
+static __always_inline u64 current_thread_iamr(void)
 {
 	if (current->thread.regs)
 		return current->thread.regs->iamr;
@@ -230,12 +230,7 @@ static inline u64 current_thread_iamr(void)
 
 #ifdef CONFIG_PPC_KUAP
 
-static __always_inline bool kuap_is_disabled(void)
-{
-	return !mmu_has_feature(MMU_FTR_BOOK3S_KUAP);
-}
-
-static inline void kuap_user_restore(struct pt_regs *regs)
+static __always_inline void kuap_user_restore(struct pt_regs *regs)
 {
 	bool restore_amr = false, restore_iamr = false;
 	unsigned long amr, iamr;
@@ -243,7 +238,7 @@ static inline void kuap_user_restore(struct pt_regs *regs)
 	if (!mmu_has_feature(MMU_FTR_PKEY))
 		return;
 
-	if (!mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) {
+	if (!mmu_has_feature(MMU_FTR_KUAP)) {
 		amr = mfspr(SPRN_AMR);
 		if (amr != regs->amr)
 			restore_amr = true;
@@ -274,7 +269,7 @@ static inline void kuap_user_restore(struct pt_regs *regs)
 	 */
 }
 
-static inline void __kuap_kernel_restore(struct pt_regs *regs, unsigned long amr)
+static __always_inline void __kuap_kernel_restore(struct pt_regs *regs, unsigned long amr)
 {
 	if (likely(regs->amr == amr))
 		return;
@@ -290,7 +285,7 @@ static inline void __kuap_kernel_restore(struct pt_regs *regs, unsigned long amr
 	 */
 }
 
-static inline unsigned long __kuap_get_and_assert_locked(void)
+static __always_inline unsigned long __kuap_get_and_assert_locked(void)
 {
 	unsigned long amr = mfspr(SPRN_AMR);
 
@@ -298,22 +293,16 @@ static inline unsigned long __kuap_get_and_assert_locked(void)
 		WARN_ON_ONCE(amr != AMR_KUAP_BLOCKED);
 	return amr;
 }
+#define __kuap_get_and_assert_locked __kuap_get_and_assert_locked
 
-/* Do nothing, book3s/64 does that in ASM */
-static inline void __kuap_lock(void)
-{
-}
-
-static inline void __kuap_save_and_lock(struct pt_regs *regs)
-{
-}
+/* __kuap_lock() not required, book3s/64 does that in ASM */
 
 /*
  * We support individually allowing read or write, but we don't support nesting
  * because that would require an expensive read/modify write of the AMR.
  */
 
-static inline unsigned long get_kuap(void)
+static __always_inline unsigned long get_kuap(void)
 {
 	/*
 	 * We return AMR_KUAP_BLOCKED when we don't support KUAP because
@@ -323,7 +312,7 @@ static inline unsigned long get_kuap(void)
 	 * This has no effect in terms of actually blocking things on hash,
 	 * so it doesn't break anything.
 	 */
-	if (!mmu_has_feature(MMU_FTR_BOOK3S_KUAP))
+	if (!mmu_has_feature(MMU_FTR_KUAP))
 		return AMR_KUAP_BLOCKED;
 
 	return mfspr(SPRN_AMR);
@@ -331,7 +320,7 @@ static inline unsigned long get_kuap(void)
 
 static __always_inline void set_kuap(unsigned long value)
 {
-	if (!mmu_has_feature(MMU_FTR_BOOK3S_KUAP))
+	if (!mmu_has_feature(MMU_FTR_KUAP))
 		return;
 
 	/*
@@ -343,7 +332,8 @@ static __always_inline void set_kuap(unsigned long value)
 	isync();
 }
 
-static inline bool __bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
+static __always_inline bool
+__bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
 {
 	/*
 	 * For radix this will be a storage protection fault (DSISR_PROTFAULT).
@@ -386,12 +376,12 @@ static __always_inline void allow_user_access(void __user *to, const void __user
 
 #else /* CONFIG_PPC_KUAP */
 
-static inline unsigned long get_kuap(void)
+static __always_inline unsigned long get_kuap(void)
 {
 	return AMR_KUAP_BLOCKED;
 }
 
-static inline void set_kuap(unsigned long value) { }
+static __always_inline void set_kuap(unsigned long value) { }
 
 static __always_inline void allow_user_access(void __user *to, const void __user *from,
 					      unsigned long size, unsigned long dir)
@@ -406,7 +396,7 @@ static __always_inline void prevent_user_access(unsigned long dir)
 		do_uaccess_flush();
 }
 
-static inline unsigned long prevent_user_access_return(void)
+static __always_inline unsigned long prevent_user_access_return(void)
 {
 	unsigned long flags = get_kuap();
 
@@ -417,7 +407,7 @@ static inline unsigned long prevent_user_access_return(void)
 	return flags;
 }
 
-static inline void restore_user_access(unsigned long flags)
+static __always_inline void restore_user_access(unsigned long flags)
 {
 	set_kuap(flags);
 	if (static_branch_unlikely(&uaccess_flush_key) && flags == AMR_KUAP_BLOCKED)
diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h
index 570a496..fedbc5d 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu.h
@@ -71,10 +71,7 @@ extern unsigned int mmu_pid_bits;
 /* Base PID to allocate from */
 extern unsigned int mmu_base_pid;
 
-/*
- * memory block size used with radix translation.
- */
-extern unsigned long __ro_after_init radix_mem_block_size;
+extern unsigned long __ro_after_init memory_block_size;
 
 #define PRTB_SIZE_SHIFT	(mmu_pid_bits + 4)
 #define PRTB_ENTRIES	(1ul << mmu_pid_bits)
@@ -261,7 +258,7 @@ static inline void radix_init_pseries(void) { }
 #define arch_clear_mm_cpumask_cpu(cpu, mm)				\
 	do {								\
 		if (cpumask_test_cpu(cpu, mm_cpumask(mm))) {		\
-			atomic_dec(&(mm)->context.active_cpus);		\
+			dec_mm_active_cpus(mm);				\
 			cpumask_clear_cpu(cpu, mm_cpumask(mm));		\
 		}							\
 	} while (0)
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 8269b23..5c497c8 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -596,7 +596,7 @@ static inline pte_t pte_mkexec(pte_t pte)
 	return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_EXEC));
 }
 
-static inline pte_t pte_mkwrite(pte_t pte)
+static inline pte_t pte_mkwrite_novma(pte_t pte)
 {
 	/*
 	 * write implies read, hence set both
@@ -931,7 +931,7 @@ static inline pte_t *pudp_ptep(pud_t *pud)
 #define pud_mkdirty(pud)	pte_pud(pte_mkdirty(pud_pte(pud)))
 #define pud_mkclean(pud)	pte_pud(pte_mkclean(pud_pte(pud)))
 #define pud_mkyoung(pud)	pte_pud(pte_mkyoung(pud_pte(pud)))
-#define pud_mkwrite(pud)	pte_pud(pte_mkwrite(pud_pte(pud)))
+#define pud_mkwrite(pud)	pte_pud(pte_mkwrite_novma(pud_pte(pud)))
 #define pud_write(pud)		pte_write(pud_pte(pud))
 
 #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
@@ -1088,7 +1088,7 @@ static inline pte_t *pmdp_ptep(pmd_t *pmd)
 #define pmd_mkdirty(pmd)	pte_pmd(pte_mkdirty(pmd_pte(pmd)))
 #define pmd_mkclean(pmd)	pte_pmd(pte_mkclean(pmd_pte(pmd)))
 #define pmd_mkyoung(pmd)	pte_pmd(pte_mkyoung(pmd_pte(pmd)))
-#define pmd_mkwrite(pmd)	pte_pmd(pte_mkwrite(pmd_pte(pmd)))
+#define pmd_mkwrite_novma(pmd)	pte_pmd(pte_mkwrite_novma(pmd_pte(pmd)))
 
 #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
 #define pmd_soft_dirty(pmd)    pte_soft_dirty(pmd_pte(pmd))
diff --git a/arch/powerpc/include/asm/bug.h b/arch/powerpc/include/asm/bug.h
index 00c6b0b..1db485a 100644
--- a/arch/powerpc/include/asm/bug.h
+++ b/arch/powerpc/include/asm/bug.h
@@ -120,6 +120,7 @@
 struct pt_regs;
 void hash__do_page_fault(struct pt_regs *);
 void bad_page_fault(struct pt_regs *, int);
+void emulate_single_step(struct pt_regs *regs);
 extern void _exception(int, struct pt_regs *, int, unsigned long);
 extern void _exception_pkey(struct pt_regs *, unsigned long, int);
 extern void die(const char *, struct pt_regs *, long);
diff --git a/arch/powerpc/include/asm/cpm2.h b/arch/powerpc/include/asm/cpm2.h
index 9ee192a..249d43c 100644
--- a/arch/powerpc/include/asm/cpm2.h
+++ b/arch/powerpc/include/asm/cpm2.h
@@ -1080,6 +1080,9 @@ typedef struct im_idma {
 #define FCC2_MEM_OFFSET FCC_MEM_OFFSET(1)
 #define FCC3_MEM_OFFSET FCC_MEM_OFFSET(2)
 
+/* Pipeline Maximum Depth */
+#define MPC82XX_BCR_PLDP 0x00800000
+
 /* Clocks and GRG's */
 
 enum cpm_clk_dir {
diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h
index 443a9d48..8765d51 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -252,7 +252,7 @@ static inline void cpu_feature_keys_init(void) { }
  * This is also required by 52xx family.
  */
 #if defined(CONFIG_SMP) || defined(CONFIG_MPC10X_BRIDGE) \
-	|| defined(CONFIG_PPC_83xx) || defined(CONFIG_8260) \
+	|| defined(CONFIG_PPC_83xx) || defined(CONFIG_PPC_82xx) \
 	|| defined(CONFIG_PPC_MPC52xx)
 #define CPU_FTR_COMMON                  CPU_FTR_NEED_COHERENT
 #else
diff --git a/arch/powerpc/include/asm/dtl.h b/arch/powerpc/include/asm/dtl.h
index 4bcb9f9..d6f43d1 100644
--- a/arch/powerpc/include/asm/dtl.h
+++ b/arch/powerpc/include/asm/dtl.h
@@ -39,6 +39,5 @@ extern rwlock_t dtl_access_lock;
 
 extern void register_dtl_buffer(int cpu);
 extern void alloc_dtl_buffers(unsigned long *time_limit);
-extern long hcall_vphn(unsigned long cpu, u64 flags, __be32 *associativity);
 
 #endif /* _ASM_POWERPC_DTL_H */
diff --git a/arch/powerpc/include/asm/feature-fixups.h b/arch/powerpc/include/asm/feature-fixups.h
index ac605fc..77824bd 100644
--- a/arch/powerpc/include/asm/feature-fixups.h
+++ b/arch/powerpc/include/asm/feature-fixups.h
@@ -292,6 +292,7 @@ extern long __start___barrier_nospec_fixup, __stop___barrier_nospec_fixup;
 extern long __start__btb_flush_fixup, __stop__btb_flush_fixup;
 
 void apply_feature_fixups(void);
+void update_mmu_feature_fixups(unsigned long mask);
 void setup_feature_keys(void);
 #endif
 
diff --git a/arch/powerpc/include/asm/fs_pd.h b/arch/powerpc/include/asm/fs_pd.h
index 8def56e..d530f68 100644
--- a/arch/powerpc/include/asm/fs_pd.h
+++ b/arch/powerpc/include/asm/fs_pd.h
@@ -14,28 +14,6 @@
 #include <sysdev/fsl_soc.h>
 #include <asm/time.h>
 
-#ifdef CONFIG_CPM2
-#include <asm/cpm2.h>
-
-#if defined(CONFIG_8260)
-#include <asm/mpc8260.h>
-#endif
-
-#define cpm2_map(member) (&cpm2_immr->member)
-#define cpm2_map_size(member, size) (&cpm2_immr->member)
-#define cpm2_unmap(addr) do {} while(0)
-#endif
-
-#ifdef CONFIG_PPC_8xx
-#include <asm/8xx_immap.h>
-
-extern immap_t __iomem *mpc8xx_immr;
-
-#define immr_map(member) (&mpc8xx_immr->member)
-#define immr_map_size(member, size) (&mpc8xx_immr->member)
-#define immr_unmap(addr) do {} while (0)
-#endif
-
 static inline int uart_baudrate(void)
 {
         return get_baudrate();
diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h
index 91c049d..9e5a39b 100644
--- a/arch/powerpc/include/asm/ftrace.h
+++ b/arch/powerpc/include/asm/ftrace.h
@@ -11,8 +11,8 @@
 #define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
 
 /* Ignore unused weak functions which will have larger offsets */
-#ifdef CONFIG_MPROFILE_KERNEL
-#define FTRACE_MCOUNT_MAX_OFFSET	12
+#if defined(CONFIG_MPROFILE_KERNEL) || defined(CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY)
+#define FTRACE_MCOUNT_MAX_OFFSET	16
 #elif defined(CONFIG_PPC32)
 #define FTRACE_MCOUNT_MAX_OFFSET	8
 #endif
@@ -22,18 +22,26 @@ extern void _mcount(void);
 
 static inline unsigned long ftrace_call_adjust(unsigned long addr)
 {
-       /* relocation of mcount call site is the same as the address */
+	if (IS_ENABLED(CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY))
+		addr += MCOUNT_INSN_SIZE;
+
        return addr;
 }
 
 unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip,
 				    unsigned long sp);
 
+struct module;
+struct dyn_ftrace;
 struct dyn_arch_ftrace {
 	struct module *mod;
 };
 
 #ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS
+#define ftrace_need_init_nop()	(true)
+int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec);
+#define ftrace_init_nop ftrace_init_nop
+
 struct ftrace_regs {
 	struct pt_regs regs;
 };
@@ -124,15 +132,19 @@ static inline u8 this_cpu_get_ftrace_enabled(void)
 {
 	return get_paca()->ftrace_enabled;
 }
-
-void ftrace_free_init_tramp(void);
 #else /* CONFIG_PPC64 */
 static inline void this_cpu_disable_ftrace(void) { }
 static inline void this_cpu_enable_ftrace(void) { }
 static inline void this_cpu_set_ftrace_enabled(u8 ftrace_enabled) { }
 static inline u8 this_cpu_get_ftrace_enabled(void) { return 1; }
-static inline void ftrace_free_init_tramp(void) { }
 #endif /* CONFIG_PPC64 */
+
+#ifdef CONFIG_FUNCTION_TRACER
+extern unsigned int ftrace_tramp_text[], ftrace_tramp_init[];
+void ftrace_free_init_tramp(void);
+#else
+static inline void ftrace_free_init_tramp(void) { }
+#endif
 #endif /* !__ASSEMBLY__ */
 
 #endif /* _ASM_POWERPC_FTRACE */
diff --git a/arch/powerpc/include/asm/hw_breakpoint.h b/arch/powerpc/include/asm/hw_breakpoint.h
index 84d39fd..66db014 100644
--- a/arch/powerpc/include/asm/hw_breakpoint.h
+++ b/arch/powerpc/include/asm/hw_breakpoint.h
@@ -18,6 +18,7 @@ struct arch_hw_breakpoint {
 	u16		len; /* length of the target data symbol */
 	u16		hw_len; /* length programmed in hw */
 	u8		flags;
+	bool		perf_single_step; /* temporarily uninstalled for a perf single step */
 };
 
 /* Note: Don't change the first 6 bits below as they are in the same order
diff --git a/arch/powerpc/include/asm/ibmebus.h b/arch/powerpc/include/asm/ibmebus.h
index 088f95b..6f33253 100644
--- a/arch/powerpc/include/asm/ibmebus.h
+++ b/arch/powerpc/include/asm/ibmebus.h
@@ -46,6 +46,8 @@
 #include <linux/of_device.h>
 #include <linux/of_platform.h>
 
+struct platform_driver;
+
 extern struct bus_type ibmebus_bus_type;
 
 int ibmebus_register_driver(struct platform_driver *drv);
diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
index 34e14df..0266959 100644
--- a/arch/powerpc/include/asm/iommu.h
+++ b/arch/powerpc/include/asm/iommu.h
@@ -28,6 +28,9 @@
 #define IOMMU_PAGE_MASK(tblptr) (~((1 << (tblptr)->it_page_shift) - 1))
 #define IOMMU_PAGE_ALIGN(addr, tblptr) ALIGN(addr, IOMMU_PAGE_SIZE(tblptr))
 
+#define DIRECT64_PROPNAME "linux,direct64-ddr-window-info"
+#define DMA64_PROPNAME "linux,dma64-ddr-window-info"
+
 /* Boot time flags */
 extern int iommu_is_off;
 extern int iommu_force_on;
diff --git a/arch/powerpc/include/asm/kfence.h b/arch/powerpc/include/asm/kfence.h
index 6fd2b4d..424ceef 100644
--- a/arch/powerpc/include/asm/kfence.h
+++ b/arch/powerpc/include/asm/kfence.h
@@ -23,7 +23,7 @@ static inline bool arch_kfence_init_pool(void)
 #ifdef CONFIG_PPC64
 static inline bool kfence_protect_page(unsigned long addr, bool protect)
 {
-	struct page *page = virt_to_page(addr);
+	struct page *page = virt_to_page((void *)addr);
 
 	__kernel_map_pages(page, 1, !protect);
 
diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h
index d751ddd..ad7e8c5a 100644
--- a/arch/powerpc/include/asm/kup.h
+++ b/arch/powerpc/include/asm/kup.h
@@ -6,6 +6,12 @@
 #define KUAP_WRITE	2
 #define KUAP_READ_WRITE	(KUAP_READ | KUAP_WRITE)
 
+#ifndef __ASSEMBLY__
+#include <linux/types.h>
+
+static __always_inline bool kuap_is_disabled(void);
+#endif
+
 #ifdef CONFIG_PPC_BOOK3S_64
 #include <asm/book3s/64/kup.h>
 #endif
@@ -41,26 +47,24 @@ void setup_kuep(bool disabled);
 
 #ifdef CONFIG_PPC_KUAP
 void setup_kuap(bool disabled);
+
+static __always_inline bool kuap_is_disabled(void)
+{
+	return !mmu_has_feature(MMU_FTR_KUAP);
+}
 #else
 static inline void setup_kuap(bool disabled) { }
 
 static __always_inline bool kuap_is_disabled(void) { return true; }
 
-static inline bool
+static __always_inline bool
 __bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
 {
 	return false;
 }
 
-static inline void __kuap_lock(void) { }
-static inline void __kuap_save_and_lock(struct pt_regs *regs) { }
-static inline void kuap_user_restore(struct pt_regs *regs) { }
-static inline void __kuap_kernel_restore(struct pt_regs *regs, unsigned long amr) { }
-
-static inline unsigned long __kuap_get_and_assert_locked(void)
-{
-	return 0;
-}
+static __always_inline void kuap_user_restore(struct pt_regs *regs) { }
+static __always_inline void __kuap_kernel_restore(struct pt_regs *regs, unsigned long amr) { }
 
 /*
  * book3s/64/kup-radix.h defines these functions for the !KUAP case to flush
@@ -68,11 +72,11 @@ static inline unsigned long __kuap_get_and_assert_locked(void)
  * platforms.
  */
 #ifndef CONFIG_PPC_BOOK3S_64
-static inline void __allow_user_access(void __user *to, const void __user *from,
-				       unsigned long size, unsigned long dir) { }
-static inline void __prevent_user_access(unsigned long dir) { }
-static inline unsigned long __prevent_user_access_return(void) { return 0UL; }
-static inline void __restore_user_access(unsigned long flags) { }
+static __always_inline void allow_user_access(void __user *to, const void __user *from,
+					      unsigned long size, unsigned long dir) { }
+static __always_inline void prevent_user_access(unsigned long dir) { }
+static __always_inline unsigned long prevent_user_access_return(void) { return 0UL; }
+static __always_inline void restore_user_access(unsigned long flags) { }
 #endif /* CONFIG_PPC_BOOK3S_64 */
 #endif /* CONFIG_PPC_KUAP */
 
@@ -85,29 +89,24 @@ bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
 	return __bad_kuap_fault(regs, address, is_write);
 }
 
-static __always_inline void kuap_assert_locked(void)
-{
-	if (kuap_is_disabled())
-		return;
-
-	if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG))
-		__kuap_get_and_assert_locked();
-}
-
 static __always_inline void kuap_lock(void)
 {
+#ifdef __kuap_lock
 	if (kuap_is_disabled())
 		return;
 
 	__kuap_lock();
+#endif
 }
 
 static __always_inline void kuap_save_and_lock(struct pt_regs *regs)
 {
+#ifdef __kuap_save_and_lock
 	if (kuap_is_disabled())
 		return;
 
 	__kuap_save_and_lock(regs);
+#endif
 }
 
 static __always_inline void kuap_kernel_restore(struct pt_regs *regs, unsigned long amr)
@@ -120,47 +119,19 @@ static __always_inline void kuap_kernel_restore(struct pt_regs *regs, unsigned l
 
 static __always_inline unsigned long kuap_get_and_assert_locked(void)
 {
-	if (kuap_is_disabled())
-		return 0;
-
-	return __kuap_get_and_assert_locked();
+#ifdef __kuap_get_and_assert_locked
+	if (!kuap_is_disabled())
+		return __kuap_get_and_assert_locked();
+#endif
+	return 0;
 }
 
-#ifndef CONFIG_PPC_BOOK3S_64
-static __always_inline void allow_user_access(void __user *to, const void __user *from,
-				     unsigned long size, unsigned long dir)
+static __always_inline void kuap_assert_locked(void)
 {
-	if (kuap_is_disabled())
-		return;
-
-	__allow_user_access(to, from, size, dir);
+	if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG))
+		kuap_get_and_assert_locked();
 }
 
-static __always_inline void prevent_user_access(unsigned long dir)
-{
-	if (kuap_is_disabled())
-		return;
-
-	__prevent_user_access(dir);
-}
-
-static __always_inline unsigned long prevent_user_access_return(void)
-{
-	if (kuap_is_disabled())
-		return 0;
-
-	return __prevent_user_access_return();
-}
-
-static __always_inline void restore_user_access(unsigned long flags)
-{
-	if (kuap_is_disabled())
-		return;
-
-	__restore_user_access(flags);
-}
-#endif /* CONFIG_PPC_BOOK3S_64 */
-
 static __always_inline void allow_read_from_user(const void __user *from, unsigned long size)
 {
 	barrier_nospec();
diff --git a/arch/powerpc/include/asm/lppaca.h b/arch/powerpc/include/asm/lppaca.h
index 34d44cb..61ec244 100644
--- a/arch/powerpc/include/asm/lppaca.h
+++ b/arch/powerpc/include/asm/lppaca.h
@@ -6,28 +6,6 @@
 #ifndef _ASM_POWERPC_LPPACA_H
 #define _ASM_POWERPC_LPPACA_H
 
-/*
- * The below VPHN macros are outside the __KERNEL__ check since these are
- * used for compiling the vphn selftest in userspace
- */
-
-/* The H_HOME_NODE_ASSOCIATIVITY h_call returns 6 64-bit registers. */
-#define VPHN_REGISTER_COUNT 6
-
-/*
- * 6 64-bit registers unpacked into up to 24 be32 associativity values. To
- * form the complete property we have to add the length in the first cell.
- */
-#define VPHN_ASSOC_BUFSIZE (VPHN_REGISTER_COUNT*sizeof(u64)/sizeof(u16) + 1)
-
-/*
- * The H_HOME_NODE_ASSOCIATIVITY hcall takes two values for flags:
- * 1 for retrieving associativity information for a guest cpu
- * 2 for retrieving associativity information for a host/hypervisor cpu
- */
-#define VPHN_FLAG_VCPU	1
-#define VPHN_FLAG_PCPU	2
-
 #ifdef __KERNEL__
 
 /*
@@ -45,6 +23,7 @@
 #include <asm/types.h>
 #include <asm/mmu.h>
 #include <asm/firmware.h>
+#include <asm/paca.h>
 
 /*
  * The lppaca is the "virtual processor area" registered with the hypervisor,
@@ -127,13 +106,23 @@ struct lppaca {
  */
 #define LPPACA_OLD_SHARED_PROC		2
 
-static inline bool lppaca_shared_proc(struct lppaca *l)
+#ifdef CONFIG_PPC_PSERIES
+/*
+ * All CPUs should have the same shared proc value, so directly access the PACA
+ * to avoid false positives from DEBUG_PREEMPT.
+ */
+static inline bool lppaca_shared_proc(void)
 {
+	struct lppaca *l = local_paca->lppaca_ptr;
+
 	if (!firmware_has_feature(FW_FEATURE_SPLPAR))
 		return false;
 	return !!(l->__old_status & LPPACA_OLD_SHARED_PROC);
 }
 
+#define get_lppaca()	(get_paca()->lppaca_ptr)
+#endif
+
 /*
  * SLB shadow buffer structure as defined in the PAPR.  The save_area
  * contains adjacent ESID and VSID pairs for each shadowed SLB.  The
@@ -149,8 +138,6 @@ struct slb_shadow {
 	} save_area[SLB_NUM_BOLTED];
 } ____cacheline_aligned;
 
-extern long hcall_vphn(unsigned long cpu, u64 flags, __be32 *associativity);
-
 #endif /* CONFIG_PPC_BOOK3S */
 #endif /* __KERNEL__ */
 #endif /* _ASM_POWERPC_LPPACA_H */
diff --git a/arch/powerpc/include/asm/macio.h b/arch/powerpc/include/asm/macio.h
index ff5fd82..3a07c62 100644
--- a/arch/powerpc/include/asm/macio.h
+++ b/arch/powerpc/include/asm/macio.h
@@ -3,7 +3,8 @@
 #define __MACIO_ASIC_H__
 #ifdef __KERNEL__
 
-#include <linux/of_device.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
 
 extern struct bus_type macio_bus_type;
 
diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h
index 94b9811..52cc258 100644
--- a/arch/powerpc/include/asm/mmu.h
+++ b/arch/powerpc/include/asm/mmu.h
@@ -33,7 +33,7 @@
  * key 0 controlling userspace addresses on radix
  * Key 3 on hash
  */
-#define MMU_FTR_BOOK3S_KUAP		ASM_CONST(0x00000200)
+#define MMU_FTR_KUAP		ASM_CONST(0x00000200)
 
 /*
  * Supports KUEP feature
@@ -144,11 +144,6 @@
 
 typedef pte_t *pgtable_t;
 
-#ifdef CONFIG_PPC_E500
-#include <asm/percpu.h>
-DECLARE_PER_CPU(int, next_tlbcam_idx);
-#endif
-
 enum {
 	MMU_FTRS_POSSIBLE =
 #if defined(CONFIG_PPC_BOOK3S_604)
@@ -188,7 +183,7 @@ enum {
 #endif /* CONFIG_PPC_RADIX_MMU */
 #endif
 #ifdef CONFIG_PPC_KUAP
-	MMU_FTR_BOOK3S_KUAP |
+	MMU_FTR_KUAP |
 #endif /* CONFIG_PPC_KUAP */
 #ifdef CONFIG_PPC_MEM_KEYS
 	MMU_FTR_PKEY |
diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
index 57f5017..37bffa0 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -127,6 +127,7 @@ static inline void inc_mm_active_cpus(struct mm_struct *mm)
 
 static inline void dec_mm_active_cpus(struct mm_struct *mm)
 {
+	VM_WARN_ON_ONCE(atomic_read(&mm->context.active_cpus) <= 0);
 	atomic_dec(&mm->context.active_cpus);
 }
 
diff --git a/arch/powerpc/include/asm/module.h b/arch/powerpc/include/asm/module.h
index ac53606..a8e2e833 100644
--- a/arch/powerpc/include/asm/module.h
+++ b/arch/powerpc/include/asm/module.h
@@ -75,10 +75,6 @@ struct mod_arch_specific {
 #endif
 
 #ifdef CONFIG_DYNAMIC_FTRACE
-#    ifdef MODULE
-	asm(".section .ftrace.tramp,\"ax\",@nobits; .align 3; .previous");
-#    endif	/* MODULE */
-
 int module_trampoline_target(struct module *mod, unsigned long trampoline,
 			     unsigned long *target);
 int module_finalize_ftrace(struct module *mod, const Elf_Shdr *sechdrs);
diff --git a/arch/powerpc/include/asm/mpc8260.h b/arch/powerpc/include/asm/mpc8260.h
deleted file mode 100644
index 155114b..0000000
--- a/arch/powerpc/include/asm/mpc8260.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Since there are many different boards and no standard configuration,
- * we have a unique include file for each.  Rather than change every
- * file that has to include MPC8260 configuration, they all include
- * this one and the configuration switching is done here.
- */
-#ifdef __KERNEL__
-#ifndef __ASM_POWERPC_MPC8260_H__
-#define __ASM_POWERPC_MPC8260_H__
-
-#define MPC82XX_BCR_PLDP 0x00800000 /* Pipeline Maximum Depth */
-
-#ifdef CONFIG_8260
-
-#ifdef CONFIG_PCI_8260
-#include <platforms/82xx/m82xx_pci.h>
-#endif
-
-#endif /* CONFIG_8260 */
-#endif /* !__ASM_POWERPC_MPC8260_H__ */
-#endif /* __KERNEL__ */
diff --git a/arch/powerpc/include/asm/nohash/32/kup-8xx.h b/arch/powerpc/include/asm/nohash/32/kup-8xx.h
index c44d977..46bc592 100644
--- a/arch/powerpc/include/asm/nohash/32/kup-8xx.h
+++ b/arch/powerpc/include/asm/nohash/32/kup-8xx.h
@@ -9,76 +9,74 @@
 
 #ifndef __ASSEMBLY__
 
-#include <linux/jump_label.h>
-
 #include <asm/reg.h>
 
-extern struct static_key_false disable_kuap_key;
-
-static __always_inline bool kuap_is_disabled(void)
-{
-	return static_branch_unlikely(&disable_kuap_key);
-}
-
-static inline void __kuap_lock(void)
-{
-}
-
-static inline void __kuap_save_and_lock(struct pt_regs *regs)
+static __always_inline void __kuap_save_and_lock(struct pt_regs *regs)
 {
 	regs->kuap = mfspr(SPRN_MD_AP);
 	mtspr(SPRN_MD_AP, MD_APG_KUAP);
 }
+#define __kuap_save_and_lock __kuap_save_and_lock
 
-static inline void kuap_user_restore(struct pt_regs *regs)
+static __always_inline void kuap_user_restore(struct pt_regs *regs)
 {
 }
 
-static inline void __kuap_kernel_restore(struct pt_regs *regs, unsigned long kuap)
+static __always_inline void __kuap_kernel_restore(struct pt_regs *regs, unsigned long kuap)
 {
 	mtspr(SPRN_MD_AP, regs->kuap);
 }
 
-static inline unsigned long __kuap_get_and_assert_locked(void)
+#ifdef CONFIG_PPC_KUAP_DEBUG
+static __always_inline unsigned long __kuap_get_and_assert_locked(void)
 {
-	unsigned long kuap;
+	WARN_ON_ONCE(mfspr(SPRN_MD_AP) >> 16 != MD_APG_KUAP >> 16);
 
-	kuap = mfspr(SPRN_MD_AP);
+	return 0;
+}
+#define __kuap_get_and_assert_locked __kuap_get_and_assert_locked
+#endif
 
-	if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG))
-		WARN_ON_ONCE(kuap >> 16 != MD_APG_KUAP >> 16);
-
-	return kuap;
+static __always_inline void uaccess_begin_8xx(unsigned long val)
+{
+	asm(ASM_MMU_FTR_IFSET("mtspr %0, %1", "", %2) : :
+	    "i"(SPRN_MD_AP), "r"(val), "i"(MMU_FTR_KUAP) : "memory");
 }
 
-static inline void __allow_user_access(void __user *to, const void __user *from,
-				       unsigned long size, unsigned long dir)
+static __always_inline void uaccess_end_8xx(void)
 {
-	mtspr(SPRN_MD_AP, MD_APG_INIT);
+	asm(ASM_MMU_FTR_IFSET("mtspr %0, %1", "", %2) : :
+	    "i"(SPRN_MD_AP), "r"(MD_APG_KUAP), "i"(MMU_FTR_KUAP) : "memory");
 }
 
-static inline void __prevent_user_access(unsigned long dir)
+static __always_inline void allow_user_access(void __user *to, const void __user *from,
+					      unsigned long size, unsigned long dir)
 {
-	mtspr(SPRN_MD_AP, MD_APG_KUAP);
+	uaccess_begin_8xx(MD_APG_INIT);
 }
 
-static inline unsigned long __prevent_user_access_return(void)
+static __always_inline void prevent_user_access(unsigned long dir)
+{
+	uaccess_end_8xx();
+}
+
+static __always_inline unsigned long prevent_user_access_return(void)
 {
 	unsigned long flags;
 
 	flags = mfspr(SPRN_MD_AP);
 
-	mtspr(SPRN_MD_AP, MD_APG_KUAP);
+	uaccess_end_8xx();
 
 	return flags;
 }
 
-static inline void __restore_user_access(unsigned long flags)
+static __always_inline void restore_user_access(unsigned long flags)
 {
-	mtspr(SPRN_MD_AP, flags);
+	uaccess_begin_8xx(flags);
 }
 
-static inline bool
+static __always_inline bool
 __bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
 {
 	return !((regs->kuap ^ MD_APG_KUAP) & 0xff000000);
diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h
index fec56d9..f99c53a 100644
--- a/arch/powerpc/include/asm/nohash/32/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/32/pgtable.h
@@ -170,8 +170,8 @@ void unmap_kernel_page(unsigned long va);
 #define pte_clear(mm, addr, ptep) \
 	do { pte_update(mm, addr, ptep, ~0, 0, 0); } while (0)
 
-#ifndef pte_mkwrite
-static inline pte_t pte_mkwrite(pte_t pte)
+#ifndef pte_mkwrite_novma
+static inline pte_t pte_mkwrite_novma(pte_t pte)
 {
 	return __pte(pte_val(pte) | _PAGE_RW);
 }
@@ -355,7 +355,7 @@ static inline int pte_young(pte_t pte)
 #define pmd_pfn(pmd)		(pmd_val(pmd) >> PAGE_SHIFT)
 #else
 #define pmd_page_vaddr(pmd)	\
-	((unsigned long)(pmd_val(pmd) & ~(PTE_TABLE_SIZE - 1)))
+	((const void *)(pmd_val(pmd) & ~(PTE_TABLE_SIZE - 1)))
 #define pmd_pfn(pmd)		(__pa(pmd_val(pmd)) >> PAGE_SHIFT)
 #endif
 
diff --git a/arch/powerpc/include/asm/nohash/32/pte-8xx.h b/arch/powerpc/include/asm/nohash/32/pte-8xx.h
index 1a89ebd..21f681e 100644
--- a/arch/powerpc/include/asm/nohash/32/pte-8xx.h
+++ b/arch/powerpc/include/asm/nohash/32/pte-8xx.h
@@ -101,12 +101,12 @@ static inline int pte_write(pte_t pte)
 
 #define pte_write pte_write
 
-static inline pte_t pte_mkwrite(pte_t pte)
+static inline pte_t pte_mkwrite_novma(pte_t pte)
 {
 	return __pte(pte_val(pte) & ~_PAGE_RO);
 }
 
-#define pte_mkwrite pte_mkwrite
+#define pte_mkwrite_novma pte_mkwrite_novma
 
 static inline bool pte_user(pte_t pte)
 {
diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h
index 287e258..5cd9acf 100644
--- a/arch/powerpc/include/asm/nohash/64/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/64/pgtable.h
@@ -85,7 +85,7 @@
 #ifndef __ASSEMBLY__
 /* pte_clear moved to later in this file */
 
-static inline pte_t pte_mkwrite(pte_t pte)
+static inline pte_t pte_mkwrite_novma(pte_t pte)
 {
 	return __pte(pte_val(pte) | _PAGE_RW);
 }
@@ -127,7 +127,7 @@ static inline pte_t pmd_pte(pmd_t pmd)
 #define	pmd_bad(pmd)		(!is_kernel_addr(pmd_val(pmd)) \
 				 || (pmd_val(pmd) & PMD_BAD_BITS))
 #define	pmd_present(pmd)	(!pmd_none(pmd))
-#define pmd_page_vaddr(pmd)	(pmd_val(pmd) & ~PMD_MASKED_BITS)
+#define pmd_page_vaddr(pmd)	((const void *)(pmd_val(pmd) & ~PMD_MASKED_BITS))
 extern struct page *pmd_page(pmd_t pmd);
 #define pmd_pfn(pmd)		(page_to_pfn(pmd_page(pmd)))
 
diff --git a/arch/powerpc/include/asm/nohash/kup-booke.h b/arch/powerpc/include/asm/nohash/kup-booke.h
index 49bb41e..0c7c325 100644
--- a/arch/powerpc/include/asm/nohash/kup-booke.h
+++ b/arch/powerpc/include/asm/nohash/kup-booke.h
@@ -3,6 +3,7 @@
 #define _ASM_POWERPC_KUP_BOOKE_H_
 
 #include <asm/bug.h>
+#include <asm/mmu.h>
 
 #ifdef CONFIG_PPC_KUAP
 
@@ -13,32 +14,26 @@
 
 #else
 
-#include <linux/jump_label.h>
 #include <linux/sched.h>
 
 #include <asm/reg.h>
 
-extern struct static_key_false disable_kuap_key;
-
-static __always_inline bool kuap_is_disabled(void)
-{
-	return static_branch_unlikely(&disable_kuap_key);
-}
-
-static inline void __kuap_lock(void)
+static __always_inline void __kuap_lock(void)
 {
 	mtspr(SPRN_PID, 0);
 	isync();
 }
+#define __kuap_lock __kuap_lock
 
-static inline void __kuap_save_and_lock(struct pt_regs *regs)
+static __always_inline void __kuap_save_and_lock(struct pt_regs *regs)
 {
 	regs->kuap = mfspr(SPRN_PID);
 	mtspr(SPRN_PID, 0);
 	isync();
 }
+#define __kuap_save_and_lock __kuap_save_and_lock
 
-static inline void kuap_user_restore(struct pt_regs *regs)
+static __always_inline void kuap_user_restore(struct pt_regs *regs)
 {
 	if (kuap_is_disabled())
 		return;
@@ -48,7 +43,7 @@ static inline void kuap_user_restore(struct pt_regs *regs)
 	/* Context synchronisation is performed by rfi */
 }
 
-static inline void __kuap_kernel_restore(struct pt_regs *regs, unsigned long kuap)
+static __always_inline void __kuap_kernel_restore(struct pt_regs *regs, unsigned long kuap)
 {
 	if (regs->kuap)
 		mtspr(SPRN_PID, current->thread.pid);
@@ -56,48 +51,55 @@ static inline void __kuap_kernel_restore(struct pt_regs *regs, unsigned long kua
 	/* Context synchronisation is performed by rfi */
 }
 
-static inline unsigned long __kuap_get_and_assert_locked(void)
+#ifdef CONFIG_PPC_KUAP_DEBUG
+static __always_inline unsigned long __kuap_get_and_assert_locked(void)
 {
-	unsigned long kuap = mfspr(SPRN_PID);
+	WARN_ON_ONCE(mfspr(SPRN_PID));
 
-	if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG))
-		WARN_ON_ONCE(kuap);
+	return 0;
+}
+#define __kuap_get_and_assert_locked __kuap_get_and_assert_locked
+#endif
 
-	return kuap;
+static __always_inline void uaccess_begin_booke(unsigned long val)
+{
+	asm(ASM_MMU_FTR_IFSET("mtspr %0, %1; isync", "", %2) : :
+	    "i"(SPRN_PID), "r"(val), "i"(MMU_FTR_KUAP) : "memory");
 }
 
-static inline void __allow_user_access(void __user *to, const void __user *from,
-				       unsigned long size, unsigned long dir)
+static __always_inline void uaccess_end_booke(void)
 {
-	mtspr(SPRN_PID, current->thread.pid);
-	isync();
+	asm(ASM_MMU_FTR_IFSET("mtspr %0, %1; isync", "", %2) : :
+	    "i"(SPRN_PID), "r"(0), "i"(MMU_FTR_KUAP) : "memory");
 }
 
-static inline void __prevent_user_access(unsigned long dir)
+static __always_inline void allow_user_access(void __user *to, const void __user *from,
+					      unsigned long size, unsigned long dir)
 {
-	mtspr(SPRN_PID, 0);
-	isync();
+	uaccess_begin_booke(current->thread.pid);
 }
 
-static inline unsigned long __prevent_user_access_return(void)
+static __always_inline void prevent_user_access(unsigned long dir)
+{
+	uaccess_end_booke();
+}
+
+static __always_inline unsigned long prevent_user_access_return(void)
 {
 	unsigned long flags = mfspr(SPRN_PID);
 
-	mtspr(SPRN_PID, 0);
-	isync();
+	uaccess_end_booke();
 
 	return flags;
 }
 
-static inline void __restore_user_access(unsigned long flags)
+static __always_inline void restore_user_access(unsigned long flags)
 {
-	if (flags) {
-		mtspr(SPRN_PID, current->thread.pid);
-		isync();
-	}
+	if (flags)
+		uaccess_begin_booke(current->thread.pid);
 }
 
-static inline bool
+static __always_inline bool
 __bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
 {
 	return !regs->kuap;
diff --git a/arch/powerpc/include/asm/nohash/mmu-e500.h b/arch/powerpc/include/asm/nohash/mmu-e500.h
index e43a418..6ddced0 100644
--- a/arch/powerpc/include/asm/nohash/mmu-e500.h
+++ b/arch/powerpc/include/asm/nohash/mmu-e500.h
@@ -319,6 +319,9 @@ extern int book3e_htw_mode;
 
 #endif
 
+#include <asm/percpu.h>
+DECLARE_PER_CPU(int, next_tlbcam_idx);
+
 #endif /* !__ASSEMBLY__ */
 
 #endif /* _ASM_POWERPC_MMU_BOOK3E_H_ */
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index cb32593..e667d45 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -15,7 +15,6 @@
 #include <linux/cache.h>
 #include <linux/string.h>
 #include <asm/types.h>
-#include <asm/lppaca.h>
 #include <asm/mmu.h>
 #include <asm/page.h>
 #ifdef CONFIG_PPC_BOOK3E_64
@@ -47,14 +46,11 @@ extern unsigned int debug_smp_processor_id(void); /* from linux/smp.h */
 #define get_paca()	local_paca
 #endif
 
-#ifdef CONFIG_PPC_PSERIES
-#define get_lppaca()	(get_paca()->lppaca_ptr)
-#endif
-
 #define get_slb_shadow()	(get_paca()->slb_shadow_ptr)
 
 struct task_struct;
 struct rtas_args;
+struct lppaca;
 
 /*
  * Defines the layout of the paca.
diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
index f2b6bf5..e5fcc79 100644
--- a/arch/powerpc/include/asm/page.h
+++ b/arch/powerpc/include/asm/page.h
@@ -9,6 +9,7 @@
 #ifndef __ASSEMBLY__
 #include <linux/types.h>
 #include <linux/kernel.h>
+#include <linux/bug.h>
 #else
 #include <asm/types.h>
 #endif
@@ -119,16 +120,6 @@ extern long long virt_phys_offset;
 #define ARCH_PFN_OFFSET		((unsigned long)(MEMORY_START >> PAGE_SHIFT))
 #endif
 
-#define virt_to_pfn(kaddr)	(__pa(kaddr) >> PAGE_SHIFT)
-#define virt_to_page(kaddr)	pfn_to_page(virt_to_pfn(kaddr))
-#define pfn_to_kaddr(pfn)	__va((pfn) << PAGE_SHIFT)
-
-#define virt_addr_valid(vaddr)	({					\
-	unsigned long _addr = (unsigned long)vaddr;			\
-	_addr >= PAGE_OFFSET && _addr < (unsigned long)high_memory &&	\
-	pfn_valid(virt_to_pfn(_addr));					\
-})
-
 /*
  * On Book-E parts we need __va to parse the device tree and we can't
  * determine MEMORY_START until then.  However we can determine PHYSICAL_START
@@ -233,6 +224,25 @@ extern long long virt_phys_offset;
 #endif
 #endif
 
+#ifndef __ASSEMBLY__
+static inline unsigned long virt_to_pfn(const void *kaddr)
+{
+	return __pa(kaddr) >> PAGE_SHIFT;
+}
+
+static inline const void *pfn_to_kaddr(unsigned long pfn)
+{
+	return __va(pfn << PAGE_SHIFT);
+}
+#endif
+
+#define virt_to_page(kaddr)	pfn_to_page(virt_to_pfn(kaddr))
+#define virt_addr_valid(vaddr)	({					\
+	unsigned long _addr = (unsigned long)vaddr;			\
+	_addr >= PAGE_OFFSET && _addr < (unsigned long)high_memory &&	\
+	pfn_valid(virt_to_pfn((void *)_addr));				\
+})
+
 /*
  * Unfortunately the PLT is in the BSS in the PPC32 ELF ABI,
  * and needs to be executable.  This means the whole heap ends
diff --git a/arch/powerpc/include/asm/paravirt.h b/arch/powerpc/include/asm/paravirt.h
index f5ba1a3..e08513d 100644
--- a/arch/powerpc/include/asm/paravirt.h
+++ b/arch/powerpc/include/asm/paravirt.h
@@ -6,6 +6,7 @@
 #include <asm/smp.h>
 #ifdef CONFIG_PPC64
 #include <asm/paca.h>
+#include <asm/lppaca.h>
 #include <asm/hvcall.h>
 #endif
 
diff --git a/arch/powerpc/include/asm/pci.h b/arch/powerpc/include/asm/pci.h
index 289f1ec..f5078a7 100644
--- a/arch/powerpc/include/asm/pci.h
+++ b/arch/powerpc/include/asm/pci.h
@@ -82,7 +82,8 @@ extern int pci_legacy_write(struct pci_bus *bus, loff_t port, u32 val,
 extern int pci_mmap_legacy_page_range(struct pci_bus *bus,
 				      struct vm_area_struct *vma,
 				      enum pci_mmap_state mmap_state);
-
+extern void pci_adjust_legacy_attr(struct pci_bus *bus,
+				   enum pci_mmap_state mmap_type);
 #define HAVE_PCI_LEGACY	1
 
 extern void pcibios_claim_one_bus(struct pci_bus *b);
diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h
index b2e9bc4..d0ee46d 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -72,9 +72,9 @@ static inline pgprot_t pte_pgprot(pte_t pte)
 }
 
 #ifndef pmd_page_vaddr
-static inline unsigned long pmd_page_vaddr(pmd_t pmd)
+static inline const void *pmd_page_vaddr(pmd_t pmd)
 {
-	return ((unsigned long)__va(pmd_val(pmd) & ~PMD_MASKED_BITS));
+	return __va(pmd_val(pmd) & ~PMD_MASKED_BITS);
 }
 #define pmd_page_vaddr pmd_page_vaddr
 #endif
diff --git a/arch/powerpc/include/asm/plpar_wrappers.h b/arch/powerpc/include/asm/plpar_wrappers.h
index 8239c0a..fe3d0ea 100644
--- a/arch/powerpc/include/asm/plpar_wrappers.h
+++ b/arch/powerpc/include/asm/plpar_wrappers.h
@@ -9,6 +9,7 @@
 
 #include <asm/hvcall.h>
 #include <asm/paca.h>
+#include <asm/lppaca.h>
 #include <asm/page.h>
 
 static inline long poll_pending(void)
diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index ef6972aa..0056012 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -397,6 +397,7 @@
 #define PPC_RAW_RFCI			(0x4c000066)
 #define PPC_RAW_RFDI			(0x4c00004e)
 #define PPC_RAW_RFMCI			(0x4c00004c)
+#define PPC_RAW_TLBILX_LPID		(0x7c000024)
 #define PPC_RAW_TLBILX(t, a, b)		(0x7c000024 | __PPC_T_TLB(t) | 	__PPC_RA0(a) | __PPC_RB(b))
 #define PPC_RAW_WAIT_v203		(0x7c00007c)
 #define PPC_RAW_WAIT(w, p)		(0x7c00003c | __PPC_WC(w) | __PPC_PL(p))
@@ -616,6 +617,7 @@
 #define PPC_TLBILX(t, a, b)	stringify_in_c(.long PPC_RAW_TLBILX(t, a, b))
 #define PPC_TLBILX_ALL(a, b)	PPC_TLBILX(0, a, b)
 #define PPC_TLBILX_PID(a, b)	PPC_TLBILX(1, a, b)
+#define PPC_TLBILX_LPID		stringify_in_c(.long PPC_RAW_TLBILX_LPID)
 #define PPC_TLBILX_VA(a, b)	PPC_TLBILX(3, a, b)
 #define PPC_WAIT_v203		stringify_in_c(.long PPC_RAW_WAIT_v203)
 #define PPC_WAIT(w, p)		stringify_in_c(.long PPC_RAW_WAIT(w, p))
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index a6c7069..b2c51d3 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -172,11 +172,6 @@ struct thread_struct {
 	unsigned int	align_ctl;	/* alignment handling control */
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
 	struct perf_event *ptrace_bps[HBP_NUM_MAX];
-	/*
-	 * Helps identify source of single-step exception and subsequent
-	 * hw-breakpoint enablement
-	 */
-	struct perf_event *last_hit_ubp[HBP_NUM_MAX];
 #endif /* CONFIG_HAVE_HW_BREAKPOINT */
 	struct arch_hw_breakpoint hw_brk[HBP_NUM_MAX]; /* hardware breakpoint info */
 	unsigned long	trap_nr;	/* last trap # on this thread */
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index bb01212..4ae4ab9 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -1414,11 +1414,9 @@ static inline void mtmsr_isync(unsigned long val)
 #define mfspr(rn)	({unsigned long rval; \
 			asm volatile("mfspr %0," __stringify(rn) \
 				: "=r" (rval)); rval;})
-#ifndef mtspr
 #define mtspr(rn, v)	asm volatile("mtspr " __stringify(rn) ",%0" : \
 				     : "r" ((unsigned long)(v)) \
 				     : "memory")
-#endif
 #define wrtspr(rn)	asm volatile("mtspr " __stringify(rn) ",2" : : : "memory")
 
 static inline void wrtee(unsigned long val)
diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
index 3abe15ac..c697c3c 100644
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -202,7 +202,9 @@ typedef struct {
 #define RTAS_USER_REGION_SIZE (64 * 1024)
 
 /* RTAS return status codes */
+#define RTAS_HARDWARE_ERROR	-1    /* Hardware Error */
 #define RTAS_BUSY		-2    /* RTAS Busy */
+#define RTAS_INVALID_PARAMETER	-3    /* Invalid indicator/domain/sensor etc. */
 #define RTAS_EXTENDED_DELAY_MIN	9900
 #define RTAS_EXTENDED_DELAY_MAX	9905
 
@@ -425,6 +427,7 @@ extern int rtas_set_indicator(int indicator, int index, int new_value);
 extern int rtas_set_indicator_fast(int indicator, int index, int new_value);
 extern void rtas_progress(char *s, unsigned short hex);
 int rtas_ibm_suspend_me(int *fw_status);
+int rtas_error_rc(int rtas_rc);
 
 struct rtc_time;
 extern time64_t rtas_get_boot_time(void);
diff --git a/arch/powerpc/include/asm/sections.h b/arch/powerpc/include/asm/sections.h
index 4e1f548..ea26665 100644
--- a/arch/powerpc/include/asm/sections.h
+++ b/arch/powerpc/include/asm/sections.h
@@ -74,6 +74,8 @@ static inline int overlaps_kernel_text(unsigned long start, unsigned long end)
 		(unsigned long)_stext < end;
 }
 
+#else
+static inline unsigned long kernel_toc_addr(void) { BUILD_BUG(); return -1UL; }
 #endif
 
 #endif /* __KERNEL__ */
diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h
index e29e83f..eed74c1 100644
--- a/arch/powerpc/include/asm/setup.h
+++ b/arch/powerpc/include/asm/setup.h
@@ -8,7 +8,6 @@
 extern void ppc_printk_progress(char *s, unsigned short hex);
 
 extern unsigned long long memory_limit;
-extern void *zalloc_maybe_bootmem(size_t size, gfp_t mask);
 
 struct device_node;
 
diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h
index 8a4d4f4..f4e6f2d 100644
--- a/arch/powerpc/include/asm/topology.h
+++ b/arch/powerpc/include/asm/topology.h
@@ -143,5 +143,20 @@ static inline int cpu_to_coregroup_id(int cpu)
 #endif
 #endif
 
+#ifdef CONFIG_HOTPLUG_SMT
+#include <linux/cpu_smt.h>
+#include <asm/cputhreads.h>
+
+static inline bool topology_is_primary_thread(unsigned int cpu)
+{
+	return cpu == cpu_first_thread_sibling(cpu);
+}
+
+static inline bool topology_smt_thread_allowed(unsigned int cpu)
+{
+	return cpu_thread_in_core(cpu) < cpu_smt_num_threads;
+}
+#endif
+
 #endif /* __KERNEL__ */
 #endif	/* _ASM_POWERPC_TOPOLOGY_H */
diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index a2d255aa..fb725ec 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -386,7 +386,7 @@ copy_mc_to_user(void __user *to, const void *from, unsigned long n)
 extern long __copy_from_user_flushcache(void *dst, const void __user *src,
 		unsigned size);
 
-static __must_check inline bool user_access_begin(const void __user *ptr, size_t len)
+static __must_check __always_inline bool user_access_begin(const void __user *ptr, size_t len)
 {
 	if (unlikely(!access_ok(ptr, len)))
 		return false;
@@ -401,7 +401,7 @@ static __must_check inline bool user_access_begin(const void __user *ptr, size_t
 #define user_access_save	prevent_user_access_return
 #define user_access_restore	restore_user_access
 
-static __must_check inline bool
+static __must_check __always_inline bool
 user_read_access_begin(const void __user *ptr, size_t len)
 {
 	if (unlikely(!access_ok(ptr, len)))
@@ -415,7 +415,7 @@ user_read_access_begin(const void __user *ptr, size_t len)
 #define user_read_access_begin	user_read_access_begin
 #define user_read_access_end		prevent_current_read_from_user
 
-static __must_check inline bool
+static __must_check __always_inline bool
 user_write_access_begin(const void __user *ptr, size_t len)
 {
 	if (unlikely(!access_ok(ptr, len)))
diff --git a/arch/powerpc/include/asm/vermagic.h b/arch/powerpc/include/asm/vermagic.h
index b054a85..6f250fe 100644
--- a/arch/powerpc/include/asm/vermagic.h
+++ b/arch/powerpc/include/asm/vermagic.h
@@ -2,7 +2,9 @@
 #ifndef _ASM_VERMAGIC_H
 #define _ASM_VERMAGIC_H
 
-#ifdef CONFIG_MPROFILE_KERNEL
+#ifdef CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY
+#define MODULE_ARCH_VERMAGIC_FTRACE	"patchable-function-entry "
+#elif defined(CONFIG_MPROFILE_KERNEL)
 #define MODULE_ARCH_VERMAGIC_FTRACE	"mprofile-kernel "
 #else
 #define MODULE_ARCH_VERMAGIC_FTRACE	""
diff --git a/arch/powerpc/include/asm/vphn.h b/arch/powerpc/include/asm/vphn.h
new file mode 100644
index 0000000..8c2f795
--- /dev/null
+++ b/arch/powerpc/include/asm/vphn.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _ASM_POWERPC_VPHN_H
+#define _ASM_POWERPC_VPHN_H
+
+/* The H_HOME_NODE_ASSOCIATIVITY h_call returns 6 64-bit registers. */
+#define VPHN_REGISTER_COUNT 6
+
+/*
+ * 6 64-bit registers unpacked into up to 24 be32 associativity values. To
+ * form the complete property we have to add the length in the first cell.
+ */
+#define VPHN_ASSOC_BUFSIZE (VPHN_REGISTER_COUNT*sizeof(u64)/sizeof(u16) + 1)
+
+/*
+ * The H_HOME_NODE_ASSOCIATIVITY hcall takes two values for flags:
+ * 1 for retrieving associativity information for a guest cpu
+ * 2 for retrieving associativity information for a host/hypervisor cpu
+ */
+#define VPHN_FLAG_VCPU	1
+#define VPHN_FLAG_PCPU	2
+
+long hcall_vphn(unsigned long cpu, u64 flags, __be32 *associativity);
+
+#endif // _ASM_POWERPC_VPHN_H
diff --git a/arch/powerpc/kernel/audit.c b/arch/powerpc/kernel/audit.c
index 1bcfca5f..92298d6 100644
--- a/arch/powerpc/kernel/audit.c
+++ b/arch/powerpc/kernel/audit.c
@@ -4,6 +4,8 @@
 #include <linux/audit.h>
 #include <asm/unistd.h>
 
+#include "audit_32.h"
+
 static unsigned dir_class[] = {
 #include <asm-generic/audit_dir_write.h>
 ~0U
@@ -41,7 +43,6 @@ int audit_classify_arch(int arch)
 int audit_classify_syscall(int abi, unsigned syscall)
 {
 #ifdef CONFIG_PPC64
-	extern int ppc32_classify_syscall(unsigned);
 	if (abi == AUDIT_ARCH_PPC)
 		return ppc32_classify_syscall(syscall);
 #endif
diff --git a/arch/powerpc/kernel/audit_32.h b/arch/powerpc/kernel/audit_32.h
new file mode 100644
index 0000000..c6c79c3
--- /dev/null
+++ b/arch/powerpc/kernel/audit_32.h
@@ -0,0 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef __AUDIT_32_H__
+#define __AUDIT_32_H__
+
+extern int ppc32_classify_syscall(unsigned);
+
+#endif
diff --git a/arch/powerpc/kernel/compat_audit.c b/arch/powerpc/kernel/compat_audit.c
index d92ffe4..57b38c5 100644
--- a/arch/powerpc/kernel/compat_audit.c
+++ b/arch/powerpc/kernel/compat_audit.c
@@ -3,6 +3,8 @@
 #include <linux/audit_arch.h>
 #include <asm/unistd.h>
 
+#include "audit_32.h"
+
 unsigned ppc32_dir_class[] = {
 #include <asm-generic/audit_dir_write.h>
 ~0U
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index 8a32bff..e97a0fd 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -75,6 +75,10 @@ static struct cpu_spec * __init setup_cpu_spec(unsigned long offset,
 		t->cpu_features |= old.cpu_features & CPU_FTR_PMAO_BUG;
 	}
 
+	/* Set kuap ON at startup, will be disabled later if cmdline has 'nosmap' */
+	if (IS_ENABLED(CONFIG_PPC_KUAP) && IS_ENABLED(CONFIG_PPC32))
+		t->mmu_features |= MMU_FTR_KUAP;
+
 	*PTRRELOC(&cur_cpu_spec) = &the_cpu_spec;
 
 	/*
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index fe27d41..9692acb 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -29,7 +29,6 @@
 #include <asm/asm-offsets.h>
 #include <asm/unistd.h>
 #include <asm/ptrace.h>
-#include <asm/export.h>
 #include <asm/feature-fixups.h>
 #include <asm/barrier.h>
 #include <asm/kup.h>
diff --git a/arch/powerpc/kernel/epapr_hcalls.S b/arch/powerpc/kernel/epapr_hcalls.S
index 033116e..1a9b5ae 100644
--- a/arch/powerpc/kernel/epapr_hcalls.S
+++ b/arch/powerpc/kernel/epapr_hcalls.S
@@ -3,6 +3,7 @@
  * Copyright (C) 2012 Freescale Semiconductor, Inc.
  */
 
+#include <linux/export.h>
 #include <linux/threads.h>
 #include <asm/epapr_hcalls.h>
 #include <asm/reg.h>
@@ -12,7 +13,6 @@
 #include <asm/ppc_asm.h>
 #include <asm/asm-compat.h>
 #include <asm/asm-offsets.h>
-#include <asm/export.h>
 
 #ifndef CONFIG_PPC64
 /* epapr_ev_idle() was derived from e500_idle() */
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index ea0a073..3ff2da7 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -654,6 +654,7 @@ int __init fadump_reserve_mem(void)
 	return ret;
 error_out:
 	fw_dump.fadump_enabled = 0;
+	fw_dump.reserve_dump_area_size = 0;
 	return 0;
 }
 
diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S
index f71f2bb..6a9acfb 100644
--- a/arch/powerpc/kernel/fpu.S
+++ b/arch/powerpc/kernel/fpu.S
@@ -9,6 +9,7 @@
  *    Copyright (C) 1997 Dan Malek (dmalek@jlc.net).
  */
 
+#include <linux/export.h>
 #include <asm/reg.h>
 #include <asm/page.h>
 #include <asm/mmu.h>
@@ -18,7 +19,6 @@
 #include <asm/ppc_asm.h>
 #include <asm/asm-offsets.h>
 #include <asm/ptrace.h>
-#include <asm/export.h>
 #include <asm/asm-compat.h>
 #include <asm/feature-fixups.h>
 
diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S
index 3f68a16..b32e7b2 100644
--- a/arch/powerpc/kernel/head_40x.S
+++ b/arch/powerpc/kernel/head_40x.S
@@ -38,7 +38,6 @@
 #include <asm/ppc_asm.h>
 #include <asm/asm-offsets.h>
 #include <asm/ptrace.h>
-#include <asm/export.h>
 
 #include "head_32.h"
 
diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S
index 63a85c1..a3197c9 100644
--- a/arch/powerpc/kernel/head_44x.S
+++ b/arch/powerpc/kernel/head_44x.S
@@ -35,7 +35,6 @@
 #include <asm/asm-offsets.h>
 #include <asm/ptrace.h>
 #include <asm/synch.h>
-#include <asm/export.h>
 #include <asm/code-patching-asm.h>
 #include "head_booke.h"
 
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index 6440b1b..4690c21 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -40,7 +40,6 @@
 #include <asm/hw_irq.h>
 #include <asm/cputhreads.h>
 #include <asm/ppc-opcode.h>
-#include <asm/export.h>
 #include <asm/feature-fixups.h>
 #ifdef CONFIG_PPC_BOOK3S
 #include <asm/exception-64s.h>
diff --git a/arch/powerpc/kernel/head_85xx.S b/arch/powerpc/kernel/head_85xx.S
index fdbee10..97e9ea0 100644
--- a/arch/powerpc/kernel/head_85xx.S
+++ b/arch/powerpc/kernel/head_85xx.S
@@ -40,7 +40,6 @@
 #include <asm/asm-offsets.h>
 #include <asm/cache.h>
 #include <asm/ptrace.h>
-#include <asm/export.h>
 #include <asm/feature-fixups.h>
 #include "head_booke.h"
 
diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index a79751e..647b0b4 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -29,7 +29,6 @@
 #include <asm/ppc_asm.h>
 #include <asm/asm-offsets.h>
 #include <asm/ptrace.h>
-#include <asm/export.h>
 #include <asm/code-patching-asm.h>
 #include <asm/interrupt.h>
 
diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S
index c51f28b..6764b98 100644
--- a/arch/powerpc/kernel/head_book3s_32.S
+++ b/arch/powerpc/kernel/head_book3s_32.S
@@ -31,7 +31,6 @@
 #include <asm/ptrace.h>
 #include <asm/bug.h>
 #include <asm/kvm_book3s_asm.h>
-#include <asm/export.h>
 #include <asm/feature-fixups.h>
 #include <asm/interrupt.h>
 
diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c
index e1b4e70..b8513dc 100644
--- a/arch/powerpc/kernel/hw_breakpoint.c
+++ b/arch/powerpc/kernel/hw_breakpoint.c
@@ -43,16 +43,6 @@ int hw_breakpoint_slots(int type)
 	return 0;		/* no instruction breakpoints available */
 }
 
-static bool single_step_pending(void)
-{
-	int i;
-
-	for (i = 0; i < nr_wp_slots(); i++) {
-		if (current->thread.last_hit_ubp[i])
-			return true;
-	}
-	return false;
-}
 
 /*
  * Install a perf counter breakpoint.
@@ -84,7 +74,7 @@ int arch_install_hw_breakpoint(struct perf_event *bp)
 	 * Do not install DABR values if the instruction must be single-stepped.
 	 * If so, DABR will be populated in single_step_dabr_instruction().
 	 */
-	if (!single_step_pending())
+	if (!info->perf_single_step)
 		__set_breakpoint(i, info);
 
 	return 0;
@@ -124,275 +114,6 @@ static bool is_ptrace_bp(struct perf_event *bp)
 	return bp->overflow_handler == ptrace_triggered;
 }
 
-struct breakpoint {
-	struct list_head list;
-	struct perf_event *bp;
-	bool ptrace_bp;
-};
-
-/*
- * While kernel/events/hw_breakpoint.c does its own synchronization, we cannot
- * rely on it safely synchronizing internals here; however, we can rely on it
- * not requesting more breakpoints than available.
- */
-static DEFINE_SPINLOCK(cpu_bps_lock);
-static DEFINE_PER_CPU(struct breakpoint *, cpu_bps[HBP_NUM_MAX]);
-static DEFINE_SPINLOCK(task_bps_lock);
-static LIST_HEAD(task_bps);
-
-static struct breakpoint *alloc_breakpoint(struct perf_event *bp)
-{
-	struct breakpoint *tmp;
-
-	tmp = kzalloc(sizeof(*tmp), GFP_KERNEL);
-	if (!tmp)
-		return ERR_PTR(-ENOMEM);
-	tmp->bp = bp;
-	tmp->ptrace_bp = is_ptrace_bp(bp);
-	return tmp;
-}
-
-static bool bp_addr_range_overlap(struct perf_event *bp1, struct perf_event *bp2)
-{
-	__u64 bp1_saddr, bp1_eaddr, bp2_saddr, bp2_eaddr;
-
-	bp1_saddr = ALIGN_DOWN(bp1->attr.bp_addr, HW_BREAKPOINT_SIZE);
-	bp1_eaddr = ALIGN(bp1->attr.bp_addr + bp1->attr.bp_len, HW_BREAKPOINT_SIZE);
-	bp2_saddr = ALIGN_DOWN(bp2->attr.bp_addr, HW_BREAKPOINT_SIZE);
-	bp2_eaddr = ALIGN(bp2->attr.bp_addr + bp2->attr.bp_len, HW_BREAKPOINT_SIZE);
-
-	return (bp1_saddr < bp2_eaddr && bp1_eaddr > bp2_saddr);
-}
-
-static bool alternate_infra_bp(struct breakpoint *b, struct perf_event *bp)
-{
-	return is_ptrace_bp(bp) ? !b->ptrace_bp : b->ptrace_bp;
-}
-
-static bool can_co_exist(struct breakpoint *b, struct perf_event *bp)
-{
-	return !(alternate_infra_bp(b, bp) && bp_addr_range_overlap(b->bp, bp));
-}
-
-static int task_bps_add(struct perf_event *bp)
-{
-	struct breakpoint *tmp;
-
-	tmp = alloc_breakpoint(bp);
-	if (IS_ERR(tmp))
-		return PTR_ERR(tmp);
-
-	spin_lock(&task_bps_lock);
-	list_add(&tmp->list, &task_bps);
-	spin_unlock(&task_bps_lock);
-	return 0;
-}
-
-static void task_bps_remove(struct perf_event *bp)
-{
-	struct list_head *pos, *q;
-
-	spin_lock(&task_bps_lock);
-	list_for_each_safe(pos, q, &task_bps) {
-		struct breakpoint *tmp = list_entry(pos, struct breakpoint, list);
-
-		if (tmp->bp == bp) {
-			list_del(&tmp->list);
-			kfree(tmp);
-			break;
-		}
-	}
-	spin_unlock(&task_bps_lock);
-}
-
-/*
- * If any task has breakpoint from alternate infrastructure,
- * return true. Otherwise return false.
- */
-static bool all_task_bps_check(struct perf_event *bp)
-{
-	struct breakpoint *tmp;
-	bool ret = false;
-
-	spin_lock(&task_bps_lock);
-	list_for_each_entry(tmp, &task_bps, list) {
-		if (!can_co_exist(tmp, bp)) {
-			ret = true;
-			break;
-		}
-	}
-	spin_unlock(&task_bps_lock);
-	return ret;
-}
-
-/*
- * If same task has breakpoint from alternate infrastructure,
- * return true. Otherwise return false.
- */
-static bool same_task_bps_check(struct perf_event *bp)
-{
-	struct breakpoint *tmp;
-	bool ret = false;
-
-	spin_lock(&task_bps_lock);
-	list_for_each_entry(tmp, &task_bps, list) {
-		if (tmp->bp->hw.target == bp->hw.target &&
-		    !can_co_exist(tmp, bp)) {
-			ret = true;
-			break;
-		}
-	}
-	spin_unlock(&task_bps_lock);
-	return ret;
-}
-
-static int cpu_bps_add(struct perf_event *bp)
-{
-	struct breakpoint **cpu_bp;
-	struct breakpoint *tmp;
-	int i = 0;
-
-	tmp = alloc_breakpoint(bp);
-	if (IS_ERR(tmp))
-		return PTR_ERR(tmp);
-
-	spin_lock(&cpu_bps_lock);
-	cpu_bp = per_cpu_ptr(cpu_bps, bp->cpu);
-	for (i = 0; i < nr_wp_slots(); i++) {
-		if (!cpu_bp[i]) {
-			cpu_bp[i] = tmp;
-			break;
-		}
-	}
-	spin_unlock(&cpu_bps_lock);
-	return 0;
-}
-
-static void cpu_bps_remove(struct perf_event *bp)
-{
-	struct breakpoint **cpu_bp;
-	int i = 0;
-
-	spin_lock(&cpu_bps_lock);
-	cpu_bp = per_cpu_ptr(cpu_bps, bp->cpu);
-	for (i = 0; i < nr_wp_slots(); i++) {
-		if (!cpu_bp[i])
-			continue;
-
-		if (cpu_bp[i]->bp == bp) {
-			kfree(cpu_bp[i]);
-			cpu_bp[i] = NULL;
-			break;
-		}
-	}
-	spin_unlock(&cpu_bps_lock);
-}
-
-static bool cpu_bps_check(int cpu, struct perf_event *bp)
-{
-	struct breakpoint **cpu_bp;
-	bool ret = false;
-	int i;
-
-	spin_lock(&cpu_bps_lock);
-	cpu_bp = per_cpu_ptr(cpu_bps, cpu);
-	for (i = 0; i < nr_wp_slots(); i++) {
-		if (cpu_bp[i] && !can_co_exist(cpu_bp[i], bp)) {
-			ret = true;
-			break;
-		}
-	}
-	spin_unlock(&cpu_bps_lock);
-	return ret;
-}
-
-static bool all_cpu_bps_check(struct perf_event *bp)
-{
-	int cpu;
-
-	for_each_online_cpu(cpu) {
-		if (cpu_bps_check(cpu, bp))
-			return true;
-	}
-	return false;
-}
-
-int arch_reserve_bp_slot(struct perf_event *bp)
-{
-	int ret;
-
-	/* ptrace breakpoint */
-	if (is_ptrace_bp(bp)) {
-		if (all_cpu_bps_check(bp))
-			return -ENOSPC;
-
-		if (same_task_bps_check(bp))
-			return -ENOSPC;
-
-		return task_bps_add(bp);
-	}
-
-	/* perf breakpoint */
-	if (is_kernel_addr(bp->attr.bp_addr))
-		return 0;
-
-	if (bp->hw.target && bp->cpu == -1) {
-		if (same_task_bps_check(bp))
-			return -ENOSPC;
-
-		return task_bps_add(bp);
-	} else if (!bp->hw.target && bp->cpu != -1) {
-		if (all_task_bps_check(bp))
-			return -ENOSPC;
-
-		return cpu_bps_add(bp);
-	}
-
-	if (same_task_bps_check(bp))
-		return -ENOSPC;
-
-	ret = cpu_bps_add(bp);
-	if (ret)
-		return ret;
-	ret = task_bps_add(bp);
-	if (ret)
-		cpu_bps_remove(bp);
-
-	return ret;
-}
-
-void arch_release_bp_slot(struct perf_event *bp)
-{
-	if (!is_kernel_addr(bp->attr.bp_addr)) {
-		if (bp->hw.target)
-			task_bps_remove(bp);
-		if (bp->cpu != -1)
-			cpu_bps_remove(bp);
-	}
-}
-
-/*
- * Perform cleanup of arch-specific counters during unregistration
- * of the perf-event
- */
-void arch_unregister_hw_breakpoint(struct perf_event *bp)
-{
-	/*
-	 * If the breakpoint is unregistered between a hw_breakpoint_handler()
-	 * and the single_step_dabr_instruction(), then cleanup the breakpoint
-	 * restoration variables to prevent dangling pointers.
-	 * FIXME, this should not be using bp->ctx at all! Sayeth peterz.
-	 */
-	if (bp->ctx && bp->ctx->task && bp->ctx->task != ((void *)-1L)) {
-		int i;
-
-		for (i = 0; i < nr_wp_slots(); i++) {
-			if (bp->ctx->task->thread.last_hit_ubp[i] == bp)
-				bp->ctx->task->thread.last_hit_ubp[i] = NULL;
-		}
-	}
-}
-
 /*
  * Check for virtual address in kernel space.
  */
@@ -499,6 +220,10 @@ int hw_breakpoint_arch_parse(struct perf_event *bp,
  * Restores the breakpoint on the debug registers.
  * Invoke this function if it is known that the execution context is
  * about to change to cause loss of MSR_SE settings.
+ *
+ * The perf watchpoint will simply re-trigger once the thread is started again,
+ * and the watchpoint handler will set up MSR_SE and perf_single_step as
+ * needed.
  */
 void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs)
 {
@@ -506,7 +231,9 @@ void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs)
 	int i;
 
 	for (i = 0; i < nr_wp_slots(); i++) {
-		if (unlikely(tsk->thread.last_hit_ubp[i]))
+		struct perf_event *bp = __this_cpu_read(bp_per_reg[i]);
+
+		if (unlikely(bp && counter_arch_bp(bp)->perf_single_step))
 			goto reset;
 	}
 	return;
@@ -516,7 +243,7 @@ void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs)
 	for (i = 0; i < nr_wp_slots(); i++) {
 		info = counter_arch_bp(__this_cpu_read(bp_per_reg[i]));
 		__set_breakpoint(i, info);
-		tsk->thread.last_hit_ubp[i] = NULL;
+		info->perf_single_step = false;
 	}
 }
 
@@ -534,23 +261,22 @@ static bool is_octword_vsx_instr(int type, int size)
  * We've failed in reliably handling the hw-breakpoint. Unregister
  * it and throw a warning message to let the user know about it.
  */
-static void handler_error(struct perf_event *bp, struct arch_hw_breakpoint *info)
+static void handler_error(struct perf_event *bp)
 {
 	WARN(1, "Unable to handle hardware breakpoint. Breakpoint at 0x%lx will be disabled.",
-	     info->address);
+	     counter_arch_bp(bp)->address);
 	perf_event_disable_inatomic(bp);
 }
 
-static void larx_stcx_err(struct perf_event *bp, struct arch_hw_breakpoint *info)
+static void larx_stcx_err(struct perf_event *bp)
 {
 	printk_ratelimited("Breakpoint hit on instruction that can't be emulated. Breakpoint at 0x%lx will be disabled.\n",
-			   info->address);
+			   counter_arch_bp(bp)->address);
 	perf_event_disable_inatomic(bp);
 }
 
 static bool stepping_handler(struct pt_regs *regs, struct perf_event **bp,
-			     struct arch_hw_breakpoint **info, int *hit,
-			     ppc_inst_t instr)
+			     int *hit, ppc_inst_t instr)
 {
 	int i;
 	int stepped;
@@ -560,8 +286,9 @@ static bool stepping_handler(struct pt_regs *regs, struct perf_event **bp,
 		for (i = 0; i < nr_wp_slots(); i++) {
 			if (!hit[i])
 				continue;
-			current->thread.last_hit_ubp[i] = bp[i];
-			info[i] = NULL;
+
+			counter_arch_bp(bp[i])->perf_single_step = true;
+			bp[i] = NULL;
 		}
 		regs_set_return_msr(regs, regs->msr | MSR_SE);
 		return false;
@@ -572,15 +299,15 @@ static bool stepping_handler(struct pt_regs *regs, struct perf_event **bp,
 		for (i = 0; i < nr_wp_slots(); i++) {
 			if (!hit[i])
 				continue;
-			handler_error(bp[i], info[i]);
-			info[i] = NULL;
+			handler_error(bp[i]);
+			bp[i] = NULL;
 		}
 		return false;
 	}
 	return true;
 }
 
-static void handle_p10dd1_spurious_exception(struct arch_hw_breakpoint **info,
+static void handle_p10dd1_spurious_exception(struct perf_event **bp,
 					     int *hit, unsigned long ea)
 {
 	int i;
@@ -592,10 +319,14 @@ static void handle_p10dd1_spurious_exception(struct arch_hw_breakpoint **info,
 	 * spurious exception.
 	 */
 	for (i = 0; i < nr_wp_slots(); i++) {
-		if (!info[i])
+		struct arch_hw_breakpoint *info;
+
+		if (!bp[i])
 			continue;
 
-		hw_end_addr = ALIGN(info[i]->address + info[i]->len, HW_BREAKPOINT_SIZE);
+		info = counter_arch_bp(bp[i]);
+
+		hw_end_addr = ALIGN(info->address + info->len, HW_BREAKPOINT_SIZE);
 
 		/*
 		 * Ending address of DAWR range is less than starting
@@ -625,9 +356,9 @@ static void handle_p10dd1_spurious_exception(struct arch_hw_breakpoint **info,
 		return;
 
 	for (i = 0; i < nr_wp_slots(); i++) {
-		if (info[i]) {
+		if (bp[i]) {
 			hit[i] = 1;
-			info[i]->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ;
+			counter_arch_bp(bp[i])->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ;
 		}
 	}
 }
@@ -638,7 +369,6 @@ int hw_breakpoint_handler(struct die_args *args)
 	int rc = NOTIFY_STOP;
 	struct perf_event *bp[HBP_NUM_MAX] = { NULL };
 	struct pt_regs *regs = args->regs;
-	struct arch_hw_breakpoint *info[HBP_NUM_MAX] = { NULL };
 	int i;
 	int hit[HBP_NUM_MAX] = {0};
 	int nr_hit = 0;
@@ -663,18 +393,20 @@ int hw_breakpoint_handler(struct die_args *args)
 		wp_get_instr_detail(regs, &instr, &type, &size, &ea);
 
 	for (i = 0; i < nr_wp_slots(); i++) {
+		struct arch_hw_breakpoint *info;
+
 		bp[i] = __this_cpu_read(bp_per_reg[i]);
 		if (!bp[i])
 			continue;
 
-		info[i] = counter_arch_bp(bp[i]);
-		info[i]->type &= ~HW_BRK_TYPE_EXTRANEOUS_IRQ;
+		info = counter_arch_bp(bp[i]);
+		info->type &= ~HW_BRK_TYPE_EXTRANEOUS_IRQ;
 
-		if (wp_check_constraints(regs, instr, ea, type, size, info[i])) {
+		if (wp_check_constraints(regs, instr, ea, type, size, info)) {
 			if (!IS_ENABLED(CONFIG_PPC_8xx) &&
 			    ppc_inst_equal(instr, ppc_inst(0))) {
-				handler_error(bp[i], info[i]);
-				info[i] = NULL;
+				handler_error(bp[i]);
+				bp[i] = NULL;
 				err = 1;
 				continue;
 			}
@@ -693,7 +425,7 @@ int hw_breakpoint_handler(struct die_args *args)
 		/* Workaround for Power10 DD1 */
 		if (!IS_ENABLED(CONFIG_PPC_8xx) && mfspr(SPRN_PVR) == 0x800100 &&
 		    is_octword_vsx_instr(type, size)) {
-			handle_p10dd1_spurious_exception(info, hit, ea);
+			handle_p10dd1_spurious_exception(bp, hit, ea);
 		} else {
 			rc = NOTIFY_DONE;
 			goto out;
@@ -708,10 +440,10 @@ int hw_breakpoint_handler(struct die_args *args)
 	 */
 	if (ptrace_bp) {
 		for (i = 0; i < nr_wp_slots(); i++) {
-			if (!hit[i])
+			if (!hit[i] || !is_ptrace_bp(bp[i]))
 				continue;
 			perf_bp_event(bp[i], regs);
-			info[i] = NULL;
+			bp[i] = NULL;
 		}
 		rc = NOTIFY_DONE;
 		goto reset;
@@ -722,13 +454,13 @@ int hw_breakpoint_handler(struct die_args *args)
 			for (i = 0; i < nr_wp_slots(); i++) {
 				if (!hit[i])
 					continue;
-				larx_stcx_err(bp[i], info[i]);
-				info[i] = NULL;
+				larx_stcx_err(bp[i]);
+				bp[i] = NULL;
 			}
 			goto reset;
 		}
 
-		if (!stepping_handler(regs, bp, info, hit, instr))
+		if (!stepping_handler(regs, bp, hit, instr))
 			goto reset;
 	}
 
@@ -739,15 +471,15 @@ int hw_breakpoint_handler(struct die_args *args)
 	for (i = 0; i < nr_wp_slots(); i++) {
 		if (!hit[i])
 			continue;
-		if (!(info[i]->type & HW_BRK_TYPE_EXTRANEOUS_IRQ))
+		if (!(counter_arch_bp(bp[i])->type & HW_BRK_TYPE_EXTRANEOUS_IRQ))
 			perf_bp_event(bp[i], regs);
 	}
 
 reset:
 	for (i = 0; i < nr_wp_slots(); i++) {
-		if (!info[i])
+		if (!bp[i])
 			continue;
-		__set_breakpoint(i, info[i]);
+		__set_breakpoint(i, counter_arch_bp(bp[i]));
 	}
 
 out:
@@ -762,24 +494,28 @@ NOKPROBE_SYMBOL(hw_breakpoint_handler);
 static int single_step_dabr_instruction(struct die_args *args)
 {
 	struct pt_regs *regs = args->regs;
-	struct perf_event *bp = NULL;
-	struct arch_hw_breakpoint *info;
-	int i;
 	bool found = false;
 
 	/*
 	 * Check if we are single-stepping as a result of a
 	 * previous HW Breakpoint exception
 	 */
-	for (i = 0; i < nr_wp_slots(); i++) {
-		bp = current->thread.last_hit_ubp[i];
+	for (int i = 0; i < nr_wp_slots(); i++) {
+		struct perf_event *bp;
+		struct arch_hw_breakpoint *info;
+
+		bp = __this_cpu_read(bp_per_reg[i]);
 
 		if (!bp)
 			continue;
 
-		found = true;
 		info = counter_arch_bp(bp);
 
+		if (!info->perf_single_step)
+			continue;
+
+		found = true;
+
 		/*
 		 * We shall invoke the user-defined callback function in the
 		 * single stepping handler to confirm to 'trigger-after-execute'
@@ -787,26 +523,16 @@ static int single_step_dabr_instruction(struct die_args *args)
 		 */
 		if (!(info->type & HW_BRK_TYPE_EXTRANEOUS_IRQ))
 			perf_bp_event(bp, regs);
-		current->thread.last_hit_ubp[i] = NULL;
-	}
 
-	if (!found)
-		return NOTIFY_DONE;
-
-	for (i = 0; i < nr_wp_slots(); i++) {
-		bp = __this_cpu_read(bp_per_reg[i]);
-		if (!bp)
-			continue;
-
-		info = counter_arch_bp(bp);
-		__set_breakpoint(i, info);
+		info->perf_single_step = false;
+		__set_breakpoint(i, counter_arch_bp(bp));
 	}
 
 	/*
 	 * If the process was being single-stepped by ptrace, let the
 	 * other single-step actions occur (e.g. generate SIGTRAP).
 	 */
-	if (test_thread_flag(TIF_SINGLESTEP))
+	if (!found || test_thread_flag(TIF_SINGLESTEP))
 		return NOTIFY_DONE;
 
 	return NOTIFY_STOP;
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index c52449ae..14251bc 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -172,17 +172,28 @@ static int fail_iommu_bus_notify(struct notifier_block *nb,
 	return 0;
 }
 
-static struct notifier_block fail_iommu_bus_notifier = {
+/*
+ * PCI and VIO buses need separate notifier_block structs, since they're linked
+ * list nodes.  Sharing a notifier_block would mean that any notifiers later
+ * registered for PCI buses would also get called by VIO buses and vice versa.
+ */
+static struct notifier_block fail_iommu_pci_bus_notifier = {
 	.notifier_call = fail_iommu_bus_notify
 };
 
+#ifdef CONFIG_IBMVIO
+static struct notifier_block fail_iommu_vio_bus_notifier = {
+	.notifier_call = fail_iommu_bus_notify
+};
+#endif
+
 static int __init fail_iommu_setup(void)
 {
 #ifdef CONFIG_PCI
-	bus_register_notifier(&pci_bus_type, &fail_iommu_bus_notifier);
+	bus_register_notifier(&pci_bus_type, &fail_iommu_pci_bus_notifier);
 #endif
 #ifdef CONFIG_IBMVIO
-	bus_register_notifier(&vio_bus_type, &fail_iommu_bus_notifier);
+	bus_register_notifier(&vio_bus_type, &fail_iommu_vio_bus_notifier);
 #endif
 
 	return 0;
diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c
index 6ee6574..1da2f6e 100644
--- a/arch/powerpc/kernel/legacy_serial.c
+++ b/arch/powerpc/kernel/legacy_serial.c
@@ -5,8 +5,8 @@
 #include <linux/serial_core.h>
 #include <linux/console.h>
 #include <linux/pci.h>
+#include <linux/of.h>
 #include <linux/of_address.h>
-#include <linux/of_device.h>
 #include <linux/of_irq.h>
 #include <linux/serial_reg.h>
 #include <asm/io.h>
diff --git a/arch/powerpc/kernel/misc.S b/arch/powerpc/kernel/misc.S
index fb7de354..29e1440 100644
--- a/arch/powerpc/kernel/misc.S
+++ b/arch/powerpc/kernel/misc.S
@@ -10,11 +10,11 @@
  *
  * setjmp/longjmp code by Paul Mackerras.
  */
+#include <linux/export.h>
 #include <asm/ppc_asm.h>
 #include <asm/unistd.h>
 #include <asm/asm-compat.h>
 #include <asm/asm-offsets.h>
-#include <asm/export.h>
 
 	.text
 
diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index daf8f87..2eabb15 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -8,6 +8,7 @@
  *
  */
 
+#include <linux/export.h>
 #include <linux/sys.h>
 #include <asm/unistd.h>
 #include <asm/errno.h>
@@ -22,7 +23,6 @@
 #include <asm/processor.h>
 #include <asm/bug.h>
 #include <asm/ptrace.h>
-#include <asm/export.h>
 #include <asm/feature-fixups.h>
 
 	.text
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index 2c9ac70a..1a8cdaf 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -9,6 +9,7 @@
  * PPC64 updates by Dave Engebretsen (engebret@us.ibm.com)
  */
 
+#include <linux/export.h>
 #include <linux/linkage.h>
 #include <linux/sys.h>
 #include <asm/unistd.h>
@@ -23,7 +24,6 @@
 #include <asm/kexec.h>
 #include <asm/ptrace.h>
 #include <asm/mmu.h>
-#include <asm/export.h>
 #include <asm/feature-fixups.h>
 
 	.text
diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c
index 9257028..7112adc 100644
--- a/arch/powerpc/kernel/module_64.c
+++ b/arch/powerpc/kernel/module_64.c
@@ -465,7 +465,7 @@ int module_frob_arch_sections(Elf64_Ehdr *hdr,
 	return 0;
 }
 
-#ifdef CONFIG_MPROFILE_KERNEL
+#if defined(CONFIG_MPROFILE_KERNEL) || defined(CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY)
 
 static u32 stub_insns[] = {
 #ifdef CONFIG_PPC_KERNEL_PCREL
diff --git a/arch/powerpc/kernel/of_platform.c b/arch/powerpc/kernel/of_platform.c
index f89376f..adc76fa 100644
--- a/arch/powerpc/kernel/of_platform.c
+++ b/arch/powerpc/kernel/of_platform.c
@@ -13,9 +13,7 @@
 #include <linux/export.h>
 #include <linux/mod_devicetable.h>
 #include <linux/pci.h>
-#include <linux/of.h>
-#include <linux/of_device.h>
-#include <linux/of_platform.h>
+#include <linux/platform_device.h>
 #include <linux/atomic.h>
 
 #include <asm/errno.h>
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index e88d7c9..040255d 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -125,7 +125,7 @@ struct pci_controller *pcibios_alloc_controller(struct device_node *dev)
 {
 	struct pci_controller *phb;
 
-	phb = zalloc_maybe_bootmem(sizeof(struct pci_controller), GFP_KERNEL);
+	phb = kzalloc(sizeof(struct pci_controller), GFP_KERNEL);
 	if (phb == NULL)
 		return NULL;
 
diff --git a/arch/powerpc/kernel/pmc.c b/arch/powerpc/kernel/pmc.c
index 15414c8..9fabb4d 100644
--- a/arch/powerpc/kernel/pmc.c
+++ b/arch/powerpc/kernel/pmc.c
@@ -74,7 +74,7 @@ void release_pmc_hardware(void)
 }
 EXPORT_SYMBOL_GPL(release_pmc_hardware);
 
-#ifdef CONFIG_PPC64
+#ifdef CONFIG_PPC_BOOK3S_64
 void power4_enable_pmcs(void)
 {
 	unsigned long hid0;
diff --git a/arch/powerpc/kernel/ptrace/ptrace-view.c b/arch/powerpc/kernel/ptrace/ptrace-view.c
index 3910cd7..584cf5c 100644
--- a/arch/powerpc/kernel/ptrace/ptrace-view.c
+++ b/arch/powerpc/kernel/ptrace/ptrace-view.c
@@ -716,69 +716,86 @@ int gpr32_get_common(struct task_struct *target,
 	return membuf_zero(&to, (ELF_NGREG - PT_REGS_COUNT) * sizeof(u32));
 }
 
-int gpr32_set_common(struct task_struct *target,
-		     const struct user_regset *regset,
-		     unsigned int pos, unsigned int count,
-		     const void *kbuf, const void __user *ubuf,
-		     unsigned long *regs)
+static int gpr32_set_common_kernel(struct task_struct *target,
+				   const struct user_regset *regset,
+				   unsigned int pos, unsigned int count,
+				   const void *kbuf, unsigned long *regs)
 {
 	const compat_ulong_t *k = kbuf;
+
+	pos /= sizeof(compat_ulong_t);
+	count /= sizeof(compat_ulong_t);
+
+	for (; count > 0 && pos < PT_MSR; --count)
+		regs[pos++] = *k++;
+
+	if (count > 0 && pos == PT_MSR) {
+		set_user_msr(target, *k++);
+		++pos;
+		--count;
+	}
+
+	for (; count > 0 && pos <= PT_MAX_PUT_REG; --count)
+		regs[pos++] = *k++;
+	for (; count > 0 && pos < PT_TRAP; --count, ++pos)
+		++k;
+
+	if (count > 0 && pos == PT_TRAP) {
+		set_user_trap(target, *k++);
+		++pos;
+		--count;
+	}
+
+	kbuf = k;
+	pos *= sizeof(compat_ulong_t);
+	count *= sizeof(compat_ulong_t);
+	user_regset_copyin_ignore(&pos, &count, &kbuf, NULL,
+				  (PT_TRAP + 1) * sizeof(compat_ulong_t), -1);
+	return 0;
+}
+
+static int gpr32_set_common_user(struct task_struct *target,
+				 const struct user_regset *regset,
+				 unsigned int pos, unsigned int count,
+				 const void __user *ubuf, unsigned long *regs)
+{
 	const compat_ulong_t __user *u = ubuf;
+	const void *kbuf = NULL;
 	compat_ulong_t reg;
 
-	if (!kbuf && !user_read_access_begin(u, count))
+	if (!user_read_access_begin(u, count))
 		return -EFAULT;
 
 	pos /= sizeof(reg);
 	count /= sizeof(reg);
 
-	if (kbuf)
-		for (; count > 0 && pos < PT_MSR; --count)
-			regs[pos++] = *k++;
-	else
-		for (; count > 0 && pos < PT_MSR; --count) {
-			unsafe_get_user(reg, u++, Efault);
-			regs[pos++] = reg;
-		}
-
+	for (; count > 0 && pos < PT_MSR; --count) {
+		unsafe_get_user(reg, u++, Efault);
+		regs[pos++] = reg;
+	}
 
 	if (count > 0 && pos == PT_MSR) {
-		if (kbuf)
-			reg = *k++;
-		else
-			unsafe_get_user(reg, u++, Efault);
+		unsafe_get_user(reg, u++, Efault);
 		set_user_msr(target, reg);
 		++pos;
 		--count;
 	}
 
-	if (kbuf) {
-		for (; count > 0 && pos <= PT_MAX_PUT_REG; --count)
-			regs[pos++] = *k++;
-		for (; count > 0 && pos < PT_TRAP; --count, ++pos)
-			++k;
-	} else {
-		for (; count > 0 && pos <= PT_MAX_PUT_REG; --count) {
-			unsafe_get_user(reg, u++, Efault);
-			regs[pos++] = reg;
-		}
-		for (; count > 0 && pos < PT_TRAP; --count, ++pos)
-			unsafe_get_user(reg, u++, Efault);
+	for (; count > 0 && pos <= PT_MAX_PUT_REG; --count) {
+		unsafe_get_user(reg, u++, Efault);
+		regs[pos++] = reg;
 	}
+	for (; count > 0 && pos < PT_TRAP; --count, ++pos)
+		unsafe_get_user(reg, u++, Efault);
 
 	if (count > 0 && pos == PT_TRAP) {
-		if (kbuf)
-			reg = *k++;
-		else
-			unsafe_get_user(reg, u++, Efault);
+		unsafe_get_user(reg, u++, Efault);
 		set_user_trap(target, reg);
 		++pos;
 		--count;
 	}
-	if (!kbuf)
-		user_read_access_end();
+	user_read_access_end();
 
-	kbuf = k;
 	ubuf = u;
 	pos *= sizeof(reg);
 	count *= sizeof(reg);
@@ -791,6 +808,18 @@ int gpr32_set_common(struct task_struct *target,
 	return -EFAULT;
 }
 
+int gpr32_set_common(struct task_struct *target,
+		     const struct user_regset *regset,
+		     unsigned int pos, unsigned int count,
+		     const void *kbuf, const void __user *ubuf,
+		     unsigned long *regs)
+{
+	if (kbuf)
+		return gpr32_set_common_kernel(target, regset, pos, count, kbuf, regs);
+	else
+		return gpr32_set_common_user(target, regset, pos, count, ubuf, regs);
+}
+
 static int gpr32_get(struct task_struct *target,
 		     const struct user_regset *regset,
 		     struct membuf to)
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index c087eee..eddc031 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -1330,33 +1330,34 @@ bool __ref rtas_busy_delay(int status)
 }
 EXPORT_SYMBOL_GPL(rtas_busy_delay);
 
-static int rtas_error_rc(int rtas_rc)
+int rtas_error_rc(int rtas_rc)
 {
 	int rc;
 
 	switch (rtas_rc) {
-		case -1: 		/* Hardware Error */
-			rc = -EIO;
-			break;
-		case -3:		/* Bad indicator/domain/etc */
-			rc = -EINVAL;
-			break;
-		case -9000:		/* Isolation error */
-			rc = -EFAULT;
-			break;
-		case -9001:		/* Outstanding TCE/PTE */
-			rc = -EEXIST;
-			break;
-		case -9002:		/* No usable slot */
-			rc = -ENODEV;
-			break;
-		default:
-			pr_err("%s: unexpected error %d\n", __func__, rtas_rc);
-			rc = -ERANGE;
-			break;
+	case RTAS_HARDWARE_ERROR:	/* Hardware Error */
+		rc = -EIO;
+		break;
+	case RTAS_INVALID_PARAMETER:	/* Bad indicator/domain/etc */
+		rc = -EINVAL;
+		break;
+	case -9000:			/* Isolation error */
+		rc = -EFAULT;
+		break;
+	case -9001:			/* Outstanding TCE/PTE */
+		rc = -EEXIST;
+		break;
+	case -9002:			/* No usable slot */
+		rc = -ENODEV;
+		break;
+	default:
+		pr_err("%s: unexpected error %d\n", __func__, rtas_rc);
+		rc = -ERANGE;
+		break;
 	}
 	return rc;
 }
+EXPORT_SYMBOL_GPL(rtas_error_rc);
 
 int rtas_get_power_level(int powerdomain, int *level)
 {
@@ -1587,6 +1588,7 @@ static bool ibm_extended_os_term;
 void rtas_os_term(char *str)
 {
 	s32 token = rtas_function_token(RTAS_FN_IBM_OS_TERM);
+	static struct rtas_args args;
 	int status;
 
 	/*
@@ -1607,7 +1609,8 @@ void rtas_os_term(char *str)
 	 * schedules.
 	 */
 	do {
-		status = rtas_call(token, 1, 1, NULL, __pa(rtas_os_term_buf));
+		rtas_call_unlocked(&args, token, 1, 1, NULL, __pa(rtas_os_term_buf));
+		status = be32_to_cpu(args.rets[0]);
 	} while (rtas_busy_delay_time(status));
 
 	if (status != 0)
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index d2a4462..2f1026f 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -31,9 +31,9 @@
 #include <linux/serial_8250.h>
 #include <linux/percpu.h>
 #include <linux/memblock.h>
-#include <linux/of_irq.h>
+#include <linux/of.h>
 #include <linux/of_fdt.h>
-#include <linux/of_platform.h>
+#include <linux/of_irq.h>
 #include <linux/hugetlb.h>
 #include <linux/pgtable.h>
 #include <asm/io.h>
@@ -969,8 +969,12 @@ void __init setup_arch(char **cmdline_p)
 	klp_init_thread_info(&init_task);
 
 	setup_initial_init_mm(_stext, _etext, _edata, _end);
-
+	/* sched_init() does the mmgrab(&init_mm) for the primary CPU */
+	VM_WARN_ON(cpumask_test_cpu(smp_processor_id(), mm_cpumask(&init_mm)));
+	cpumask_set_cpu(smp_processor_id(), mm_cpumask(&init_mm));
+	inc_mm_active_cpus(&init_mm);
 	mm_iommu_init(&init_mm);
+
 	irqstack_early_init();
 	exc_lvl_early_init();
 	emergency_stack_init();
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index fbbb695..5826f51 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -47,6 +47,7 @@
 #include <asm/smp.h>
 #include <asm/time.h>
 #include <asm/machdep.h>
+#include <asm/mmu_context.h>
 #include <asm/cputhreads.h>
 #include <asm/cputable.h>
 #include <asm/mpic.h>
@@ -1087,7 +1088,7 @@ static int __init init_big_cores(void)
 
 void __init smp_prepare_cpus(unsigned int max_cpus)
 {
-	unsigned int cpu;
+	unsigned int cpu, num_threads;
 
 	DBG("smp_prepare_cpus\n");
 
@@ -1154,6 +1155,12 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
 
 	if (smp_ops && smp_ops->probe)
 		smp_ops->probe();
+
+	// Initalise the generic SMT topology support
+	num_threads = 1;
+	if (smt_enabled_at_boot)
+		num_threads = smt_enabled_at_boot;
+	cpu_smt_set_num_threads(num_threads, threads_per_core);
 }
 
 void smp_prepare_boot_cpu(void)
@@ -1616,6 +1623,9 @@ void start_secondary(void *unused)
 
 	mmgrab_lazy_tlb(&init_mm);
 	current->active_mm = &init_mm;
+	VM_WARN_ON(cpumask_test_cpu(smp_processor_id(), mm_cpumask(&init_mm)));
+	cpumask_set_cpu(cpu, mm_cpumask(&init_mm));
+	inc_mm_active_cpus(&init_mm);
 
 	smp_store_cpu_info(cpu);
 	set_dec(tb_ticks_per_jiffy);
@@ -1751,6 +1761,14 @@ int __cpu_disable(void)
 
 void __cpu_die(unsigned int cpu)
 {
+	/*
+	 * This could perhaps be a generic call in idlea_task_dead(), but
+	 * that requires testing from all archs, so first put it here to
+	 */
+	VM_WARN_ON_ONCE(!cpumask_test_cpu(cpu, mm_cpumask(&init_mm)));
+	dec_mm_active_cpus(&init_mm);
+	cpumask_clear_cpu(cpu, mm_cpumask(&init_mm));
+
 	if (smp_ops->cpu_die)
 		smp_ops->cpu_die(cpu);
 }
diff --git a/arch/powerpc/kernel/syscall.c b/arch/powerpc/kernel/syscall.c
index 18b9d32..77fedb1 100644
--- a/arch/powerpc/kernel/syscall.c
+++ b/arch/powerpc/kernel/syscall.c
@@ -46,7 +46,7 @@ notrace long system_call_exception(struct pt_regs *regs, unsigned long r0)
 		iamr = mfspr(SPRN_IAMR);
 		regs->amr  = amr;
 		regs->iamr = iamr;
-		if (mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) {
+		if (mmu_has_feature(MMU_FTR_KUAP)) {
 			mtspr(SPRN_AMR, AMR_KUAP_BLOCKED);
 			flush_needed = true;
 		}
diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S
index 9feab5e..a9cd650 100644
--- a/arch/powerpc/kernel/tm.S
+++ b/arch/powerpc/kernel/tm.S
@@ -6,13 +6,13 @@
  * Copyright 2012 Matt Evans & Michael Neuling, IBM Corporation.
  */
 
+#include <linux/export.h>
 #include <asm/asm-offsets.h>
 #include <asm/ppc_asm.h>
 #include <asm/ppc-opcode.h>
 #include <asm/ptrace.h>
 #include <asm/reg.h>
 #include <asm/bug.h>
-#include <asm/export.h>
 #include <asm/feature-fixups.h>
 
 #ifdef CONFIG_VSX
diff --git a/arch/powerpc/kernel/trace/Makefile b/arch/powerpc/kernel/trace/Makefile
index b16a9f9..125f4ca 100644
--- a/arch/powerpc/kernel/trace/Makefile
+++ b/arch/powerpc/kernel/trace/Makefile
@@ -6,15 +6,15 @@
 ifdef CONFIG_FUNCTION_TRACER
 # do not trace tracer code
 CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_ftrace_64_pg.o = $(CC_FLAGS_FTRACE)
 endif
 
-obj32-$(CONFIG_FUNCTION_TRACER)		+= ftrace_mprofile.o
+obj32-$(CONFIG_FUNCTION_TRACER)		+= ftrace.o ftrace_entry.o
 ifdef CONFIG_MPROFILE_KERNEL
-obj64-$(CONFIG_FUNCTION_TRACER)		+= ftrace_mprofile.o
+obj64-$(CONFIG_FUNCTION_TRACER)		+= ftrace.o ftrace_entry.o
 else
-obj64-$(CONFIG_FUNCTION_TRACER)		+= ftrace_64_pg.o
+obj64-$(CONFIG_FUNCTION_TRACER)		+= ftrace_64_pg.o ftrace_64_pg_entry.o
 endif
-obj-$(CONFIG_FUNCTION_TRACER)		+= ftrace_low.o ftrace.o
 obj-$(CONFIG_TRACING)			+= trace_clock.o
 
 obj-$(CONFIG_PPC64)			+= $(obj64-y)
@@ -25,3 +25,7 @@
 KCOV_INSTRUMENT_ftrace.o := n
 KCSAN_SANITIZE_ftrace.o := n
 UBSAN_SANITIZE_ftrace.o := n
+GCOV_PROFILE_ftrace_64_pg.o := n
+KCOV_INSTRUMENT_ftrace_64_pg.o := n
+KCSAN_SANITIZE_ftrace_64_pg.o := n
+UBSAN_SANITIZE_ftrace_64_pg.o := n
diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c
index a47f303..8201062 100644
--- a/arch/powerpc/kernel/trace/ftrace.c
+++ b/arch/powerpc/kernel/trace/ftrace.c
@@ -28,64 +28,52 @@
 #include <asm/syscall.h>
 #include <asm/inst.h>
 
-/*
- * We generally only have a single long_branch tramp and at most 2 or 3 plt
- * tramps generated. But, we don't use the plt tramps currently. We also allot
- * 2 tramps after .text and .init.text. So, we only end up with around 3 usable
- * tramps in total. Set aside 8 just to be sure.
- */
-#define	NUM_FTRACE_TRAMPS	8
+#define	NUM_FTRACE_TRAMPS	2
 static unsigned long ftrace_tramps[NUM_FTRACE_TRAMPS];
 
-static ppc_inst_t
-ftrace_call_replace(unsigned long ip, unsigned long addr, int link)
+static ppc_inst_t ftrace_create_branch_inst(unsigned long ip, unsigned long addr, int link)
 {
 	ppc_inst_t op;
 
-	addr = ppc_function_entry((void *)addr);
-
-	/* if (link) set op to 'bl' else 'b' */
+	WARN_ON(!is_offset_in_branch_range(addr - ip));
 	create_branch(&op, (u32 *)ip, addr, link ? BRANCH_SET_LINK : 0);
 
 	return op;
 }
 
-static inline int
-ftrace_modify_code(unsigned long ip, ppc_inst_t old, ppc_inst_t new)
+static inline int ftrace_read_inst(unsigned long ip, ppc_inst_t *op)
 {
-	ppc_inst_t replaced;
-
-	/*
-	 * Note:
-	 * We are paranoid about modifying text, as if a bug was to happen, it
-	 * could cause us to read or write to someplace that could cause harm.
-	 * Carefully read and modify the code with probe_kernel_*(), and make
-	 * sure what we read is what we expected it to be before modifying it.
-	 */
-
-	/* read the text we want to modify */
-	if (copy_inst_from_kernel_nofault(&replaced, (void *)ip))
+	if (copy_inst_from_kernel_nofault(op, (void *)ip)) {
+		pr_err("0x%lx: fetching instruction failed\n", ip);
 		return -EFAULT;
-
-	/* Make sure it is what we expect it to be */
-	if (!ppc_inst_equal(replaced, old)) {
-		pr_err("%p: replaced (%08lx) != old (%08lx)", (void *)ip,
-		       ppc_inst_as_ulong(replaced), ppc_inst_as_ulong(old));
-		return -EINVAL;
 	}
 
-	/* replace the text with the new text */
-	return patch_instruction((u32 *)ip, new);
+	return 0;
 }
 
-/*
- * Helper functions that are the same for both PPC64 and PPC32.
- */
-static int test_24bit_addr(unsigned long ip, unsigned long addr)
+static inline int ftrace_validate_inst(unsigned long ip, ppc_inst_t inst)
 {
-	addr = ppc_function_entry((void *)addr);
+	ppc_inst_t op;
+	int ret;
 
-	return is_offset_in_branch_range(addr - ip);
+	ret = ftrace_read_inst(ip, &op);
+	if (!ret && !ppc_inst_equal(op, inst)) {
+		pr_err("0x%lx: expected (%08lx) != found (%08lx)\n",
+		       ip, ppc_inst_as_ulong(inst), ppc_inst_as_ulong(op));
+		ret = -EINVAL;
+	}
+
+	return ret;
+}
+
+static inline int ftrace_modify_code(unsigned long ip, ppc_inst_t old, ppc_inst_t new)
+{
+	int ret = ftrace_validate_inst(ip, old);
+
+	if (!ret)
+		ret = patch_instruction((u32 *)ip, new);
+
+	return ret;
 }
 
 static int is_bl_op(ppc_inst_t op)
@@ -93,138 +81,11 @@ static int is_bl_op(ppc_inst_t op)
 	return (ppc_inst_val(op) & ~PPC_LI_MASK) == PPC_RAW_BL(0);
 }
 
-static int is_b_op(ppc_inst_t op)
-{
-	return (ppc_inst_val(op) & ~PPC_LI_MASK) == PPC_RAW_BRANCH(0);
-}
-
-static unsigned long find_bl_target(unsigned long ip, ppc_inst_t op)
-{
-	int offset;
-
-	offset = PPC_LI(ppc_inst_val(op));
-	/* make it signed */
-	if (offset & 0x02000000)
-		offset |= 0xfe000000;
-
-	return ip + (long)offset;
-}
-
-#ifdef CONFIG_MODULES
-static int
-__ftrace_make_nop(struct module *mod,
-		  struct dyn_ftrace *rec, unsigned long addr)
-{
-	unsigned long entry, ptr, tramp;
-	unsigned long ip = rec->ip;
-	ppc_inst_t op, pop;
-
-	/* read where this goes */
-	if (copy_inst_from_kernel_nofault(&op, (void *)ip)) {
-		pr_err("Fetching opcode failed.\n");
-		return -EFAULT;
-	}
-
-	/* Make sure that this is still a 24bit jump */
-	if (!is_bl_op(op)) {
-		pr_err("Not expected bl: opcode is %08lx\n", ppc_inst_as_ulong(op));
-		return -EINVAL;
-	}
-
-	/* lets find where the pointer goes */
-	tramp = find_bl_target(ip, op);
-
-	pr_devel("ip:%lx jumps to %lx", ip, tramp);
-
-	if (module_trampoline_target(mod, tramp, &ptr)) {
-		pr_err("Failed to get trampoline target\n");
-		return -EFAULT;
-	}
-
-	pr_devel("trampoline target %lx", ptr);
-
-	entry = ppc_global_function_entry((void *)addr);
-	/* This should match what was called */
-	if (ptr != entry) {
-		pr_err("addr %lx does not match expected %lx\n", ptr, entry);
-		return -EINVAL;
-	}
-
-	if (IS_ENABLED(CONFIG_MPROFILE_KERNEL)) {
-		if (copy_inst_from_kernel_nofault(&op, (void *)(ip - 4))) {
-			pr_err("Fetching instruction at %lx failed.\n", ip - 4);
-			return -EFAULT;
-		}
-
-		/* We expect either a mflr r0, or a std r0, LRSAVE(r1) */
-		if (!ppc_inst_equal(op, ppc_inst(PPC_RAW_MFLR(_R0))) &&
-		    !ppc_inst_equal(op, ppc_inst(PPC_INST_STD_LR))) {
-			pr_err("Unexpected instruction %08lx around bl _mcount\n",
-			       ppc_inst_as_ulong(op));
-			return -EINVAL;
-		}
-	} else if (IS_ENABLED(CONFIG_PPC64)) {
-		/*
-		 * Check what is in the next instruction. We can see ld r2,40(r1), but
-		 * on first pass after boot we will see mflr r0.
-		 */
-		if (copy_inst_from_kernel_nofault(&op, (void *)(ip + 4))) {
-			pr_err("Fetching op failed.\n");
-			return -EFAULT;
-		}
-
-		if (!ppc_inst_equal(op,  ppc_inst(PPC_INST_LD_TOC))) {
-			pr_err("Expected %08lx found %08lx\n", PPC_INST_LD_TOC,
-			       ppc_inst_as_ulong(op));
-			return -EINVAL;
-		}
-	}
-
-	/*
-	 * When using -mprofile-kernel or PPC32 there is no load to jump over.
-	 *
-	 * Otherwise our original call site looks like:
-	 *
-	 * bl <tramp>
-	 * ld r2,XX(r1)
-	 *
-	 * Milton Miller pointed out that we can not simply nop the branch.
-	 * If a task was preempted when calling a trace function, the nops
-	 * will remove the way to restore the TOC in r2 and the r2 TOC will
-	 * get corrupted.
-	 *
-	 * Use a b +8 to jump over the load.
-	 * XXX: could make PCREL depend on MPROFILE_KERNEL
-	 * XXX: check PCREL && MPROFILE_KERNEL calling sequence
-	 */
-	if (IS_ENABLED(CONFIG_MPROFILE_KERNEL) || IS_ENABLED(CONFIG_PPC32))
-		pop = ppc_inst(PPC_RAW_NOP());
-	else
-		pop = ppc_inst(PPC_RAW_BRANCH(8));	/* b +8 */
-
-	if (patch_instruction((u32 *)ip, pop)) {
-		pr_err("Patching NOP failed.\n");
-		return -EPERM;
-	}
-
-	return 0;
-}
-#else
-static int __ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr)
-{
-	return 0;
-}
-#endif /* CONFIG_MODULES */
-
 static unsigned long find_ftrace_tramp(unsigned long ip)
 {
 	int i;
 
-	/*
-	 * We have the compiler generated long_branch tramps at the end
-	 * and we prefer those
-	 */
-	for (i = NUM_FTRACE_TRAMPS - 1; i >= 0; i--)
+	for (i = 0; i < NUM_FTRACE_TRAMPS; i++)
 		if (!ftrace_tramps[i])
 			continue;
 		else if (is_offset_in_branch_range(ftrace_tramps[i] - ip))
@@ -233,449 +94,195 @@ static unsigned long find_ftrace_tramp(unsigned long ip)
 	return 0;
 }
 
-static int add_ftrace_tramp(unsigned long tramp)
-{
-	int i;
-
-	for (i = 0; i < NUM_FTRACE_TRAMPS; i++)
-		if (!ftrace_tramps[i]) {
-			ftrace_tramps[i] = tramp;
-			return 0;
-		}
-
-	return -1;
-}
-
-/*
- * If this is a compiler generated long_branch trampoline (essentially, a
- * trampoline that has a branch to _mcount()), we re-write the branch to
- * instead go to ftrace_[regs_]caller() and note down the location of this
- * trampoline.
- */
-static int setup_mcount_compiler_tramp(unsigned long tramp)
-{
-	int i;
-	ppc_inst_t op;
-	unsigned long ptr;
-
-	/* Is this a known long jump tramp? */
-	for (i = 0; i < NUM_FTRACE_TRAMPS; i++)
-		if (ftrace_tramps[i] == tramp)
-			return 0;
-
-	/* New trampoline -- read where this goes */
-	if (copy_inst_from_kernel_nofault(&op, (void *)tramp)) {
-		pr_debug("Fetching opcode failed.\n");
-		return -1;
-	}
-
-	/* Is this a 24 bit branch? */
-	if (!is_b_op(op)) {
-		pr_debug("Trampoline is not a long branch tramp.\n");
-		return -1;
-	}
-
-	/* lets find where the pointer goes */
-	ptr = find_bl_target(tramp, op);
-
-	if (ptr != ppc_global_function_entry((void *)_mcount)) {
-		pr_debug("Trampoline target %p is not _mcount\n", (void *)ptr);
-		return -1;
-	}
-
-	/* Let's re-write the tramp to go to ftrace_[regs_]caller */
-	if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS))
-		ptr = ppc_global_function_entry((void *)ftrace_regs_caller);
-	else
-		ptr = ppc_global_function_entry((void *)ftrace_caller);
-
-	if (patch_branch((u32 *)tramp, ptr, 0)) {
-		pr_debug("REL24 out of range!\n");
-		return -1;
-	}
-
-	if (add_ftrace_tramp(tramp)) {
-		pr_debug("No tramp locations left\n");
-		return -1;
-	}
-
-	return 0;
-}
-
-static int __ftrace_make_nop_kernel(struct dyn_ftrace *rec, unsigned long addr)
-{
-	unsigned long tramp, ip = rec->ip;
-	ppc_inst_t op;
-
-	/* Read where this goes */
-	if (copy_inst_from_kernel_nofault(&op, (void *)ip)) {
-		pr_err("Fetching opcode failed.\n");
-		return -EFAULT;
-	}
-
-	/* Make sure that this is still a 24bit jump */
-	if (!is_bl_op(op)) {
-		pr_err("Not expected bl: opcode is %08lx\n", ppc_inst_as_ulong(op));
-		return -EINVAL;
-	}
-
-	/* Let's find where the pointer goes */
-	tramp = find_bl_target(ip, op);
-
-	pr_devel("ip:%lx jumps to %lx", ip, tramp);
-
-	if (setup_mcount_compiler_tramp(tramp)) {
-		/* Are other trampolines reachable? */
-		if (!find_ftrace_tramp(ip)) {
-			pr_err("No ftrace trampolines reachable from %ps\n",
-					(void *)ip);
-			return -EINVAL;
-		}
-	}
-
-	if (patch_instruction((u32 *)ip, ppc_inst(PPC_RAW_NOP()))) {
-		pr_err("Patching NOP failed.\n");
-		return -EPERM;
-	}
-
-	return 0;
-}
-
-int ftrace_make_nop(struct module *mod,
-		    struct dyn_ftrace *rec, unsigned long addr)
+static int ftrace_get_call_inst(struct dyn_ftrace *rec, unsigned long addr, ppc_inst_t *call_inst)
 {
 	unsigned long ip = rec->ip;
-	ppc_inst_t old, new;
+	unsigned long stub;
 
-	/*
-	 * If the calling address is more that 24 bits away,
-	 * then we had to use a trampoline to make the call.
-	 * Otherwise just update the call site.
-	 */
-	if (test_24bit_addr(ip, addr)) {
-		/* within range */
-		old = ftrace_call_replace(ip, addr, 1);
-		new = ppc_inst(PPC_RAW_NOP());
-		return ftrace_modify_code(ip, old, new);
-	} else if (core_kernel_text(ip)) {
-		return __ftrace_make_nop_kernel(rec, addr);
-	} else if (!IS_ENABLED(CONFIG_MODULES)) {
-		return -EINVAL;
-	}
-
-	/*
-	 * Out of range jumps are called from modules.
-	 * We should either already have a pointer to the module
-	 * or it has been passed in.
-	 */
-	if (!rec->arch.mod) {
-		if (!mod) {
-			pr_err("No module loaded addr=%lx\n", addr);
-			return -EFAULT;
-		}
-		rec->arch.mod = mod;
-	} else if (mod) {
-		if (mod != rec->arch.mod) {
-			pr_err("Record mod %p not equal to passed in mod %p\n",
-			       rec->arch.mod, mod);
-			return -EINVAL;
-		}
-		/* nothing to do if mod == rec->arch.mod */
-	} else
-		mod = rec->arch.mod;
-
-	return __ftrace_make_nop(mod, rec, addr);
-}
-
+	if (is_offset_in_branch_range(addr - ip)) {
+		/* Within range */
+		stub = addr;
 #ifdef CONFIG_MODULES
-/*
- * Examine the existing instructions for __ftrace_make_call.
- * They should effectively be a NOP, and follow formal constraints,
- * depending on the ABI. Return false if they don't.
- */
-static bool expected_nop_sequence(void *ip, ppc_inst_t op0, ppc_inst_t op1)
-{
-	if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS))
-		return ppc_inst_equal(op0, ppc_inst(PPC_RAW_NOP()));
-	else
-		return ppc_inst_equal(op0, ppc_inst(PPC_RAW_BRANCH(8))) &&
-		       ppc_inst_equal(op1, ppc_inst(PPC_INST_LD_TOC));
-}
-
-static int
-__ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
-{
-	ppc_inst_t op[2];
-	void *ip = (void *)rec->ip;
-	unsigned long entry, ptr, tramp;
-	struct module *mod = rec->arch.mod;
-
-	/* read where this goes */
-	if (copy_inst_from_kernel_nofault(op, ip))
-		return -EFAULT;
-
-	if (!IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS) &&
-	    copy_inst_from_kernel_nofault(op + 1, ip + 4))
-		return -EFAULT;
-
-	if (!expected_nop_sequence(ip, op[0], op[1])) {
-		pr_err("Unexpected call sequence at %p: %08lx %08lx\n", ip,
-		       ppc_inst_as_ulong(op[0]), ppc_inst_as_ulong(op[1]));
-		return -EINVAL;
-	}
-
-	/* If we never set up ftrace trampoline(s), then bail */
-	if (!mod->arch.tramp ||
-	    (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS) && !mod->arch.tramp_regs)) {
-		pr_err("No ftrace trampoline\n");
-		return -EINVAL;
-	}
-
-	if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS) && rec->flags & FTRACE_FL_REGS)
-		tramp = mod->arch.tramp_regs;
-	else
-		tramp = mod->arch.tramp;
-
-	if (module_trampoline_target(mod, tramp, &ptr)) {
-		pr_err("Failed to get trampoline target\n");
-		return -EFAULT;
-	}
-
-	pr_devel("trampoline target %lx", ptr);
-
-	entry = ppc_global_function_entry((void *)addr);
-	/* This should match what was called */
-	if (ptr != entry) {
-		pr_err("addr %lx does not match expected %lx\n", ptr, entry);
-		return -EINVAL;
-	}
-
-	if (patch_branch(ip, tramp, BRANCH_SET_LINK)) {
-		pr_err("REL24 out of range!\n");
-		return -EINVAL;
-	}
-
-	return 0;
-}
-#else
-static int __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
-{
-	return 0;
-}
-#endif /* CONFIG_MODULES */
-
-static int __ftrace_make_call_kernel(struct dyn_ftrace *rec, unsigned long addr)
-{
-	ppc_inst_t op;
-	void *ip = (void *)rec->ip;
-	unsigned long tramp, entry, ptr;
-
-	/* Make sure we're being asked to patch branch to a known ftrace addr */
-	entry = ppc_global_function_entry((void *)ftrace_caller);
-	ptr = ppc_global_function_entry((void *)addr);
-
-	if (ptr != entry && IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS))
-		entry = ppc_global_function_entry((void *)ftrace_regs_caller);
-
-	if (ptr != entry) {
-		pr_err("Unknown ftrace addr to patch: %ps\n", (void *)ptr);
-		return -EINVAL;
-	}
-
-	/* Make sure we have a nop */
-	if (copy_inst_from_kernel_nofault(&op, ip)) {
-		pr_err("Unable to read ftrace location %p\n", ip);
-		return -EFAULT;
-	}
-
-	if (!ppc_inst_equal(op, ppc_inst(PPC_RAW_NOP()))) {
-		pr_err("Unexpected call sequence at %p: %08lx\n",
-		       ip, ppc_inst_as_ulong(op));
-		return -EINVAL;
-	}
-
-	tramp = find_ftrace_tramp((unsigned long)ip);
-	if (!tramp) {
-		pr_err("No ftrace trampolines reachable from %ps\n", ip);
-		return -EINVAL;
-	}
-
-	if (patch_branch(ip, tramp, BRANCH_SET_LINK)) {
-		pr_err("Error patching branch to ftrace tramp!\n");
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
-{
-	unsigned long ip = rec->ip;
-	ppc_inst_t old, new;
-
-	/*
-	 * If the calling address is more that 24 bits away,
-	 * then we had to use a trampoline to make the call.
-	 * Otherwise just update the call site.
-	 */
-	if (test_24bit_addr(ip, addr)) {
-		/* within range */
-		old = ppc_inst(PPC_RAW_NOP());
-		new = ftrace_call_replace(ip, addr, 1);
-		return ftrace_modify_code(ip, old, new);
+	} else if (rec->arch.mod) {
+		/* Module code would be going to one of the module stubs */
+		stub = (addr == (unsigned long)ftrace_caller ? rec->arch.mod->arch.tramp :
+							       rec->arch.mod->arch.tramp_regs);
+#endif
 	} else if (core_kernel_text(ip)) {
-		return __ftrace_make_call_kernel(rec, addr);
-	} else if (!IS_ENABLED(CONFIG_MODULES)) {
-		/* We should not get here without modules */
+		/* We would be branching to one of our ftrace stubs */
+		stub = find_ftrace_tramp(ip);
+		if (!stub) {
+			pr_err("0x%lx: No ftrace stubs reachable\n", ip);
+			return -EINVAL;
+		}
+	} else {
 		return -EINVAL;
 	}
 
-	/*
-	 * Out of range jumps are called from modules.
-	 * Being that we are converting from nop, it had better
-	 * already have a module defined.
-	 */
-	if (!rec->arch.mod) {
-		pr_err("No module loaded\n");
-		return -EINVAL;
-	}
-
-	return __ftrace_make_call(rec, addr);
+	*call_inst = ftrace_create_branch_inst(ip, stub, 1);
+	return 0;
 }
 
 #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
-#ifdef CONFIG_MODULES
-static int
-__ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
-					unsigned long addr)
+int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long addr)
 {
-	ppc_inst_t op;
-	unsigned long ip = rec->ip;
-	unsigned long entry, ptr, tramp;
-	struct module *mod = rec->arch.mod;
+	/* This should never be called since we override ftrace_replace_code() */
+	WARN_ON(1);
+	return -EINVAL;
+}
+#endif
 
-	/* If we never set up ftrace trampolines, then bail */
-	if (!mod->arch.tramp || !mod->arch.tramp_regs) {
-		pr_err("No ftrace trampoline\n");
+int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+{
+	ppc_inst_t old, new;
+	int ret;
+
+	/* This can only ever be called during module load */
+	if (WARN_ON(!IS_ENABLED(CONFIG_MODULES) || core_kernel_text(rec->ip)))
+		return -EINVAL;
+
+	old = ppc_inst(PPC_RAW_NOP());
+	ret = ftrace_get_call_inst(rec, addr, &new);
+	if (ret)
+		return ret;
+
+	return ftrace_modify_code(rec->ip, old, new);
+}
+
+int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr)
+{
+	/*
+	 * This should never be called since we override ftrace_replace_code(),
+	 * as well as ftrace_init_nop()
+	 */
+	WARN_ON(1);
+	return -EINVAL;
+}
+
+void ftrace_replace_code(int enable)
+{
+	ppc_inst_t old, new, call_inst, new_call_inst;
+	ppc_inst_t nop_inst = ppc_inst(PPC_RAW_NOP());
+	unsigned long ip, new_addr, addr;
+	struct ftrace_rec_iter *iter;
+	struct dyn_ftrace *rec;
+	int ret = 0, update;
+
+	for_ftrace_rec_iter(iter) {
+		rec = ftrace_rec_iter_record(iter);
+		ip = rec->ip;
+
+		if (rec->flags & FTRACE_FL_DISABLED && !(rec->flags & FTRACE_FL_ENABLED))
+			continue;
+
+		addr = ftrace_get_addr_curr(rec);
+		new_addr = ftrace_get_addr_new(rec);
+		update = ftrace_update_record(rec, enable);
+
+		switch (update) {
+		case FTRACE_UPDATE_IGNORE:
+		default:
+			continue;
+		case FTRACE_UPDATE_MODIFY_CALL:
+			ret = ftrace_get_call_inst(rec, new_addr, &new_call_inst);
+			ret |= ftrace_get_call_inst(rec, addr, &call_inst);
+			old = call_inst;
+			new = new_call_inst;
+			break;
+		case FTRACE_UPDATE_MAKE_NOP:
+			ret = ftrace_get_call_inst(rec, addr, &call_inst);
+			old = call_inst;
+			new = nop_inst;
+			break;
+		case FTRACE_UPDATE_MAKE_CALL:
+			ret = ftrace_get_call_inst(rec, new_addr, &call_inst);
+			old = nop_inst;
+			new = call_inst;
+			break;
+		}
+
+		if (!ret)
+			ret = ftrace_modify_code(ip, old, new);
+		if (ret)
+			goto out;
+	}
+
+out:
+	if (ret)
+		ftrace_bug(ret, rec);
+	return;
+}
+
+int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec)
+{
+	unsigned long addr, ip = rec->ip;
+	ppc_inst_t old, new;
+	int ret = 0;
+
+	/* Verify instructions surrounding the ftrace location */
+	if (IS_ENABLED(CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY)) {
+		/* Expect nops */
+		ret = ftrace_validate_inst(ip - 4, ppc_inst(PPC_RAW_NOP()));
+		if (!ret)
+			ret = ftrace_validate_inst(ip, ppc_inst(PPC_RAW_NOP()));
+	} else if (IS_ENABLED(CONFIG_PPC32)) {
+		/* Expected sequence: 'mflr r0', 'stw r0,4(r1)', 'bl _mcount' */
+		ret = ftrace_validate_inst(ip - 8, ppc_inst(PPC_RAW_MFLR(_R0)));
+		if (!ret)
+			ret = ftrace_validate_inst(ip - 4, ppc_inst(PPC_RAW_STW(_R0, _R1, 4)));
+	} else if (IS_ENABLED(CONFIG_MPROFILE_KERNEL)) {
+		/* Expected sequence: 'mflr r0', ['std r0,16(r1)'], 'bl _mcount' */
+		ret = ftrace_read_inst(ip - 4, &old);
+		if (!ret && !ppc_inst_equal(old, ppc_inst(PPC_RAW_MFLR(_R0)))) {
+			ret = ftrace_validate_inst(ip - 8, ppc_inst(PPC_RAW_MFLR(_R0)));
+			ret |= ftrace_validate_inst(ip - 4, ppc_inst(PPC_RAW_STD(_R0, _R1, 16)));
+		}
+	} else {
 		return -EINVAL;
 	}
 
-	/* read where this goes */
-	if (copy_inst_from_kernel_nofault(&op, (void *)ip)) {
-		pr_err("Fetching opcode failed.\n");
-		return -EFAULT;
-	}
+	if (ret)
+		return ret;
 
-	/* Make sure that this is still a 24bit jump */
-	if (!is_bl_op(op)) {
-		pr_err("Not expected bl: opcode is %08lx\n", ppc_inst_as_ulong(op));
-		return -EINVAL;
-	}
-
-	/* lets find where the pointer goes */
-	tramp = find_bl_target(ip, op);
-	entry = ppc_global_function_entry((void *)old_addr);
-
-	pr_devel("ip:%lx jumps to %lx", ip, tramp);
-
-	if (tramp != entry) {
-		/* old_addr is not within range, so we must have used a trampoline */
-		if (module_trampoline_target(mod, tramp, &ptr)) {
-			pr_err("Failed to get trampoline target\n");
+	if (!core_kernel_text(ip)) {
+		if (!mod) {
+			pr_err("0x%lx: No module provided for non-kernel address\n", ip);
 			return -EFAULT;
 		}
-
-		pr_devel("trampoline target %lx", ptr);
-
-		/* This should match what was called */
-		if (ptr != entry) {
-			pr_err("addr %lx does not match expected %lx\n", ptr, entry);
-			return -EINVAL;
-		}
+		rec->arch.mod = mod;
 	}
 
-	/* The new target may be within range */
-	if (test_24bit_addr(ip, addr)) {
-		/* within range */
-		if (patch_branch((u32 *)ip, addr, BRANCH_SET_LINK)) {
-			pr_err("REL24 out of range!\n");
-			return -EINVAL;
-		}
-
-		return 0;
-	}
-
-	if (rec->flags & FTRACE_FL_REGS)
-		tramp = mod->arch.tramp_regs;
-	else
-		tramp = mod->arch.tramp;
-
-	if (module_trampoline_target(mod, tramp, &ptr)) {
-		pr_err("Failed to get trampoline target\n");
-		return -EFAULT;
-	}
-
-	pr_devel("trampoline target %lx", ptr);
-
-	entry = ppc_global_function_entry((void *)addr);
-	/* This should match what was called */
-	if (ptr != entry) {
-		pr_err("addr %lx does not match expected %lx\n", ptr, entry);
-		return -EINVAL;
-	}
-
-	if (patch_branch((u32 *)ip, tramp, BRANCH_SET_LINK)) {
-		pr_err("REL24 out of range!\n");
-		return -EINVAL;
-	}
-
-	return 0;
-}
-#else
-static int __ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long addr)
-{
-	return 0;
-}
-#endif
-
-int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
-			unsigned long addr)
-{
-	unsigned long ip = rec->ip;
-	ppc_inst_t old, new;
-
-	/*
-	 * If the calling address is more that 24 bits away,
-	 * then we had to use a trampoline to make the call.
-	 * Otherwise just update the call site.
-	 */
-	if (test_24bit_addr(ip, addr) && test_24bit_addr(ip, old_addr)) {
-		/* within range */
-		old = ftrace_call_replace(ip, old_addr, 1);
-		new = ftrace_call_replace(ip, addr, 1);
-		return ftrace_modify_code(ip, old, new);
-	} else if (core_kernel_text(ip)) {
+	/* Nop-out the ftrace location */
+	new = ppc_inst(PPC_RAW_NOP());
+	addr = MCOUNT_ADDR;
+	if (IS_ENABLED(CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY)) {
+		/* we instead patch-in the 'mflr r0' */
+		old = ppc_inst(PPC_RAW_NOP());
+		new = ppc_inst(PPC_RAW_MFLR(_R0));
+		ret = ftrace_modify_code(ip - 4, old, new);
+	} else if (is_offset_in_branch_range(addr - ip)) {
+		/* Within range */
+		old = ftrace_create_branch_inst(ip, addr, 1);
+		ret = ftrace_modify_code(ip, old, new);
+	} else if (core_kernel_text(ip) || (IS_ENABLED(CONFIG_MODULES) && mod)) {
 		/*
-		 * We always patch out of range locations to go to the regs
-		 * variant, so there is nothing to do here
+		 * We would be branching to a linker-generated stub, or to the module _mcount
+		 * stub. Let's just confirm we have a 'bl' here.
 		 */
-		return 0;
-	} else if (!IS_ENABLED(CONFIG_MODULES)) {
-		/* We should not get here without modules */
+		ret = ftrace_read_inst(ip, &old);
+		if (ret)
+			return ret;
+		if (!is_bl_op(old)) {
+			pr_err("0x%lx: expected (bl) != found (%08lx)\n", ip, ppc_inst_as_ulong(old));
+			return -EINVAL;
+		}
+		ret = patch_instruction((u32 *)ip, new);
+	} else {
 		return -EINVAL;
 	}
 
-	/*
-	 * Out of range jumps are called from modules.
-	 */
-	if (!rec->arch.mod) {
-		pr_err("No module loaded\n");
-		return -EINVAL;
-	}
-
-	return __ftrace_modify_call(rec, old_addr, addr);
+	return ret;
 }
-#endif
 
 int ftrace_update_ftrace_func(ftrace_func_t func)
 {
@@ -684,14 +291,14 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
 	int ret;
 
 	old = ppc_inst_read((u32 *)&ftrace_call);
-	new = ftrace_call_replace(ip, (unsigned long)func, 1);
+	new = ftrace_create_branch_inst(ip, ppc_function_entry(func), 1);
 	ret = ftrace_modify_code(ip, old, new);
 
 	/* Also update the regs callback function */
 	if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS) && !ret) {
 		ip = (unsigned long)(&ftrace_regs_call);
 		old = ppc_inst_read((u32 *)&ftrace_regs_call);
-		new = ftrace_call_replace(ip, (unsigned long)func, 1);
+		new = ftrace_create_branch_inst(ip, ppc_function_entry(func), 1);
 		ret = ftrace_modify_code(ip, old, new);
 	}
 
@@ -707,11 +314,6 @@ void arch_ftrace_update_code(int command)
 	ftrace_modify_all_code(command);
 }
 
-#ifdef CONFIG_PPC64
-#define PACATOC offsetof(struct paca_struct, kernel_toc)
-
-extern unsigned int ftrace_tramp_text[], ftrace_tramp_init[];
-
 void ftrace_free_init_tramp(void)
 {
 	int i;
@@ -723,35 +325,43 @@ void ftrace_free_init_tramp(void)
 		}
 }
 
-int __init ftrace_dyn_arch_init(void)
+static void __init add_ftrace_tramp(unsigned long tramp)
 {
 	int i;
+
+	for (i = 0; i < NUM_FTRACE_TRAMPS; i++)
+		if (!ftrace_tramps[i]) {
+			ftrace_tramps[i] = tramp;
+			return;
+		}
+}
+
+int __init ftrace_dyn_arch_init(void)
+{
 	unsigned int *tramp[] = { ftrace_tramp_text, ftrace_tramp_init };
-#ifdef CONFIG_PPC_KERNEL_PCREL
+	unsigned long addr = FTRACE_REGS_ADDR;
+	long reladdr;
+	int i;
 	u32 stub_insns[] = {
+#ifdef CONFIG_PPC_KERNEL_PCREL
 		/* pla r12,addr */
 		PPC_PREFIX_MLS | __PPC_PRFX_R(1),
 		PPC_INST_PADDI | ___PPC_RT(_R12),
 		PPC_RAW_MTCTR(_R12),
 		PPC_RAW_BCTR()
-	};
-#else
-	u32 stub_insns[] = {
-		PPC_RAW_LD(_R12, _R13, PACATOC),
+#elif defined(CONFIG_PPC64)
+		PPC_RAW_LD(_R12, _R13, offsetof(struct paca_struct, kernel_toc)),
 		PPC_RAW_ADDIS(_R12, _R12, 0),
 		PPC_RAW_ADDI(_R12, _R12, 0),
 		PPC_RAW_MTCTR(_R12),
 		PPC_RAW_BCTR()
-	};
+#else
+		PPC_RAW_LIS(_R12, 0),
+		PPC_RAW_ADDI(_R12, _R12, 0),
+		PPC_RAW_MTCTR(_R12),
+		PPC_RAW_BCTR()
 #endif
-
-	unsigned long addr;
-	long reladdr;
-
-	if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS))
-		addr = ppc_global_function_entry((void *)ftrace_regs_caller);
-	else
-		addr = ppc_global_function_entry((void *)ftrace_caller);
+	};
 
 	if (IS_ENABLED(CONFIG_PPC_KERNEL_PCREL)) {
 		for (i = 0; i < 2; i++) {
@@ -768,10 +378,10 @@ int __init ftrace_dyn_arch_init(void)
 			tramp[i][1] |= IMM_L(reladdr);
 			add_ftrace_tramp((unsigned long)tramp[i]);
 		}
-	} else {
+	} else if (IS_ENABLED(CONFIG_PPC64)) {
 		reladdr = addr - kernel_toc_addr();
 
-		if (reladdr >= (long)SZ_2G || reladdr < -(long)SZ_2G) {
+		if (reladdr >= (long)SZ_2G || reladdr < -(long long)SZ_2G) {
 			pr_err("Address of %ps out of range of kernel_toc.\n",
 				(void *)addr);
 			return -1;
@@ -783,51 +393,23 @@ int __init ftrace_dyn_arch_init(void)
 			tramp[i][2] |= PPC_LO(reladdr);
 			add_ftrace_tramp((unsigned long)tramp[i]);
 		}
+	} else {
+		for (i = 0; i < 2; i++) {
+			memcpy(tramp[i], stub_insns, sizeof(stub_insns));
+			tramp[i][0] |= PPC_HA(addr);
+			tramp[i][1] |= PPC_LO(addr);
+			add_ftrace_tramp((unsigned long)tramp[i]);
+		}
 	}
 
 	return 0;
 }
-#endif
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
-
-extern void ftrace_graph_call(void);
-extern void ftrace_graph_stub(void);
-
-static int ftrace_modify_ftrace_graph_caller(bool enable)
+void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
+		       struct ftrace_ops *op, struct ftrace_regs *fregs)
 {
-	unsigned long ip = (unsigned long)(&ftrace_graph_call);
-	unsigned long addr = (unsigned long)(&ftrace_graph_caller);
-	unsigned long stub = (unsigned long)(&ftrace_graph_stub);
-	ppc_inst_t old, new;
-
-	if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_ARGS))
-		return 0;
-
-	old = ftrace_call_replace(ip, enable ? stub : addr, 0);
-	new = ftrace_call_replace(ip, enable ? addr : stub, 0);
-
-	return ftrace_modify_code(ip, old, new);
-}
-
-int ftrace_enable_ftrace_graph_caller(void)
-{
-	return ftrace_modify_ftrace_graph_caller(true);
-}
-
-int ftrace_disable_ftrace_graph_caller(void)
-{
-	return ftrace_modify_ftrace_graph_caller(false);
-}
-
-/*
- * Hook the return address and push it in the stack of return addrs
- * in current thread info. Return the address we want to divert to.
- */
-static unsigned long
-__prepare_ftrace_return(unsigned long parent, unsigned long ip, unsigned long sp)
-{
-	unsigned long return_hooker;
+	unsigned long sp = fregs->regs.gpr[1];
 	int bit;
 
 	if (unlikely(ftrace_graph_is_dead()))
@@ -836,41 +418,15 @@ __prepare_ftrace_return(unsigned long parent, unsigned long ip, unsigned long sp
 	if (unlikely(atomic_read(&current->tracing_graph_pause)))
 		goto out;
 
-	bit = ftrace_test_recursion_trylock(ip, parent);
+	bit = ftrace_test_recursion_trylock(ip, parent_ip);
 	if (bit < 0)
 		goto out;
 
-	return_hooker = ppc_function_entry(return_to_handler);
-
-	if (!function_graph_enter(parent, ip, 0, (unsigned long *)sp))
-		parent = return_hooker;
+	if (!function_graph_enter(parent_ip, ip, 0, (unsigned long *)sp))
+		parent_ip = ppc_function_entry(return_to_handler);
 
 	ftrace_test_recursion_unlock(bit);
 out:
-	return parent;
+	fregs->regs.link = parent_ip;
 }
-
-#ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS
-void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
-		       struct ftrace_ops *op, struct ftrace_regs *fregs)
-{
-	fregs->regs.link = __prepare_ftrace_return(parent_ip, ip, fregs->regs.gpr[1]);
-}
-#else
-unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip,
-				    unsigned long sp)
-{
-	return __prepare_ftrace_return(parent, ip, sp);
-}
-#endif
 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
-
-#ifdef CONFIG_PPC64_ELF_ABI_V1
-char *arch_ftrace_match_adjust(char *str, const char *search)
-{
-	if (str[0] == '.' && search[0] != '.')
-		return str + 1;
-	else
-		return str;
-}
-#endif /* CONFIG_PPC64_ELF_ABI_V1 */
diff --git a/arch/powerpc/kernel/trace/ftrace_64_pg.S b/arch/powerpc/kernel/trace/ftrace_64_pg.S
deleted file mode 100644
index 6708e24..0000000
--- a/arch/powerpc/kernel/trace/ftrace_64_pg.S
+++ /dev/null
@@ -1,67 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Split from ftrace_64.S
- */
-
-#include <linux/magic.h>
-#include <asm/ppc_asm.h>
-#include <asm/asm-offsets.h>
-#include <asm/ftrace.h>
-#include <asm/ppc-opcode.h>
-#include <asm/export.h>
-
-_GLOBAL_TOC(ftrace_caller)
-	lbz	r3, PACA_FTRACE_ENABLED(r13)
-	cmpdi	r3, 0
-	beqlr
-
-	/* Taken from output of objdump from lib64/glibc */
-	mflr	r3
-	ld	r11, 0(r1)
-	stdu	r1, -112(r1)
-	std	r3, 128(r1)
-	ld	r4, 16(r11)
-	subi	r3, r3, MCOUNT_INSN_SIZE
-.globl ftrace_call
-ftrace_call:
-	bl	ftrace_stub
-	nop
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-.globl ftrace_graph_call
-ftrace_graph_call:
-	b	ftrace_graph_stub
-_GLOBAL(ftrace_graph_stub)
-#endif
-	ld	r0, 128(r1)
-	mtlr	r0
-	addi	r1, r1, 112
-
-_GLOBAL(ftrace_stub)
-	blr
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-_GLOBAL(ftrace_graph_caller)
-	addi	r5, r1, 112
-	/* load r4 with local address */
-	ld	r4, 128(r1)
-	subi	r4, r4, MCOUNT_INSN_SIZE
-
-	/* Grab the LR out of the caller stack frame */
-	ld	r11, 112(r1)
-	ld	r3, 16(r11)
-
-	bl	prepare_ftrace_return
-	nop
-
-	/*
-	 * prepare_ftrace_return gives us the address we divert to.
-	 * Change the LR in the callers stack frame to this.
-	 */
-	ld	r11, 112(r1)
-	std	r3, 16(r11)
-
-	ld	r0, 128(r1)
-	mtlr	r0
-	addi	r1, r1, 112
-	blr
-#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/powerpc/kernel/trace/ftrace_64_pg.c b/arch/powerpc/kernel/trace/ftrace_64_pg.c
new file mode 100644
index 0000000..7b85c3b
--- /dev/null
+++ b/arch/powerpc/kernel/trace/ftrace_64_pg.c
@@ -0,0 +1,846 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Code for replacing ftrace calls with jumps.
+ *
+ * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
+ *
+ * Thanks goes out to P.A. Semi, Inc for supplying me with a PPC64 box.
+ *
+ * Added function graph tracer code, taken from x86 that was written
+ * by Frederic Weisbecker, and ported to PPC by Steven Rostedt.
+ *
+ */
+
+#define pr_fmt(fmt) "ftrace-powerpc: " fmt
+
+#include <linux/spinlock.h>
+#include <linux/hardirq.h>
+#include <linux/uaccess.h>
+#include <linux/module.h>
+#include <linux/ftrace.h>
+#include <linux/percpu.h>
+#include <linux/init.h>
+#include <linux/list.h>
+
+#include <asm/cacheflush.h>
+#include <asm/code-patching.h>
+#include <asm/ftrace.h>
+#include <asm/syscall.h>
+#include <asm/inst.h>
+
+/*
+ * We generally only have a single long_branch tramp and at most 2 or 3 plt
+ * tramps generated. But, we don't use the plt tramps currently. We also allot
+ * 2 tramps after .text and .init.text. So, we only end up with around 3 usable
+ * tramps in total. Set aside 8 just to be sure.
+ */
+#define	NUM_FTRACE_TRAMPS	8
+static unsigned long ftrace_tramps[NUM_FTRACE_TRAMPS];
+
+static ppc_inst_t
+ftrace_call_replace(unsigned long ip, unsigned long addr, int link)
+{
+	ppc_inst_t op;
+
+	addr = ppc_function_entry((void *)addr);
+
+	/* if (link) set op to 'bl' else 'b' */
+	create_branch(&op, (u32 *)ip, addr, link ? BRANCH_SET_LINK : 0);
+
+	return op;
+}
+
+static inline int
+ftrace_modify_code(unsigned long ip, ppc_inst_t old, ppc_inst_t new)
+{
+	ppc_inst_t replaced;
+
+	/*
+	 * Note:
+	 * We are paranoid about modifying text, as if a bug was to happen, it
+	 * could cause us to read or write to someplace that could cause harm.
+	 * Carefully read and modify the code with probe_kernel_*(), and make
+	 * sure what we read is what we expected it to be before modifying it.
+	 */
+
+	/* read the text we want to modify */
+	if (copy_inst_from_kernel_nofault(&replaced, (void *)ip))
+		return -EFAULT;
+
+	/* Make sure it is what we expect it to be */
+	if (!ppc_inst_equal(replaced, old)) {
+		pr_err("%p: replaced (%08lx) != old (%08lx)", (void *)ip,
+		       ppc_inst_as_ulong(replaced), ppc_inst_as_ulong(old));
+		return -EINVAL;
+	}
+
+	/* replace the text with the new text */
+	return patch_instruction((u32 *)ip, new);
+}
+
+/*
+ * Helper functions that are the same for both PPC64 and PPC32.
+ */
+static int test_24bit_addr(unsigned long ip, unsigned long addr)
+{
+	addr = ppc_function_entry((void *)addr);
+
+	return is_offset_in_branch_range(addr - ip);
+}
+
+static int is_bl_op(ppc_inst_t op)
+{
+	return (ppc_inst_val(op) & ~PPC_LI_MASK) == PPC_RAW_BL(0);
+}
+
+static int is_b_op(ppc_inst_t op)
+{
+	return (ppc_inst_val(op) & ~PPC_LI_MASK) == PPC_RAW_BRANCH(0);
+}
+
+static unsigned long find_bl_target(unsigned long ip, ppc_inst_t op)
+{
+	int offset;
+
+	offset = PPC_LI(ppc_inst_val(op));
+	/* make it signed */
+	if (offset & 0x02000000)
+		offset |= 0xfe000000;
+
+	return ip + (long)offset;
+}
+
+#ifdef CONFIG_MODULES
+static int
+__ftrace_make_nop(struct module *mod,
+		  struct dyn_ftrace *rec, unsigned long addr)
+{
+	unsigned long entry, ptr, tramp;
+	unsigned long ip = rec->ip;
+	ppc_inst_t op, pop;
+
+	/* read where this goes */
+	if (copy_inst_from_kernel_nofault(&op, (void *)ip)) {
+		pr_err("Fetching opcode failed.\n");
+		return -EFAULT;
+	}
+
+	/* Make sure that this is still a 24bit jump */
+	if (!is_bl_op(op)) {
+		pr_err("Not expected bl: opcode is %08lx\n", ppc_inst_as_ulong(op));
+		return -EINVAL;
+	}
+
+	/* lets find where the pointer goes */
+	tramp = find_bl_target(ip, op);
+
+	pr_devel("ip:%lx jumps to %lx", ip, tramp);
+
+	if (module_trampoline_target(mod, tramp, &ptr)) {
+		pr_err("Failed to get trampoline target\n");
+		return -EFAULT;
+	}
+
+	pr_devel("trampoline target %lx", ptr);
+
+	entry = ppc_global_function_entry((void *)addr);
+	/* This should match what was called */
+	if (ptr != entry) {
+		pr_err("addr %lx does not match expected %lx\n", ptr, entry);
+		return -EINVAL;
+	}
+
+	if (IS_ENABLED(CONFIG_MPROFILE_KERNEL)) {
+		if (copy_inst_from_kernel_nofault(&op, (void *)(ip - 4))) {
+			pr_err("Fetching instruction at %lx failed.\n", ip - 4);
+			return -EFAULT;
+		}
+
+		/* We expect either a mflr r0, or a std r0, LRSAVE(r1) */
+		if (!ppc_inst_equal(op, ppc_inst(PPC_RAW_MFLR(_R0))) &&
+		    !ppc_inst_equal(op, ppc_inst(PPC_INST_STD_LR))) {
+			pr_err("Unexpected instruction %08lx around bl _mcount\n",
+			       ppc_inst_as_ulong(op));
+			return -EINVAL;
+		}
+	} else if (IS_ENABLED(CONFIG_PPC64)) {
+		/*
+		 * Check what is in the next instruction. We can see ld r2,40(r1), but
+		 * on first pass after boot we will see mflr r0.
+		 */
+		if (copy_inst_from_kernel_nofault(&op, (void *)(ip + 4))) {
+			pr_err("Fetching op failed.\n");
+			return -EFAULT;
+		}
+
+		if (!ppc_inst_equal(op,  ppc_inst(PPC_INST_LD_TOC))) {
+			pr_err("Expected %08lx found %08lx\n", PPC_INST_LD_TOC,
+			       ppc_inst_as_ulong(op));
+			return -EINVAL;
+		}
+	}
+
+	/*
+	 * When using -mprofile-kernel or PPC32 there is no load to jump over.
+	 *
+	 * Otherwise our original call site looks like:
+	 *
+	 * bl <tramp>
+	 * ld r2,XX(r1)
+	 *
+	 * Milton Miller pointed out that we can not simply nop the branch.
+	 * If a task was preempted when calling a trace function, the nops
+	 * will remove the way to restore the TOC in r2 and the r2 TOC will
+	 * get corrupted.
+	 *
+	 * Use a b +8 to jump over the load.
+	 */
+	if (IS_ENABLED(CONFIG_MPROFILE_KERNEL) || IS_ENABLED(CONFIG_PPC32))
+		pop = ppc_inst(PPC_RAW_NOP());
+	else
+		pop = ppc_inst(PPC_RAW_BRANCH(8));	/* b +8 */
+
+	if (patch_instruction((u32 *)ip, pop)) {
+		pr_err("Patching NOP failed.\n");
+		return -EPERM;
+	}
+
+	return 0;
+}
+#else
+static int __ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr)
+{
+	return 0;
+}
+#endif /* CONFIG_MODULES */
+
+static unsigned long find_ftrace_tramp(unsigned long ip)
+{
+	int i;
+
+	/*
+	 * We have the compiler generated long_branch tramps at the end
+	 * and we prefer those
+	 */
+	for (i = NUM_FTRACE_TRAMPS - 1; i >= 0; i--)
+		if (!ftrace_tramps[i])
+			continue;
+		else if (is_offset_in_branch_range(ftrace_tramps[i] - ip))
+			return ftrace_tramps[i];
+
+	return 0;
+}
+
+static int add_ftrace_tramp(unsigned long tramp)
+{
+	int i;
+
+	for (i = 0; i < NUM_FTRACE_TRAMPS; i++)
+		if (!ftrace_tramps[i]) {
+			ftrace_tramps[i] = tramp;
+			return 0;
+		}
+
+	return -1;
+}
+
+/*
+ * If this is a compiler generated long_branch trampoline (essentially, a
+ * trampoline that has a branch to _mcount()), we re-write the branch to
+ * instead go to ftrace_[regs_]caller() and note down the location of this
+ * trampoline.
+ */
+static int setup_mcount_compiler_tramp(unsigned long tramp)
+{
+	int i;
+	ppc_inst_t op;
+	unsigned long ptr;
+
+	/* Is this a known long jump tramp? */
+	for (i = 0; i < NUM_FTRACE_TRAMPS; i++)
+		if (ftrace_tramps[i] == tramp)
+			return 0;
+
+	/* New trampoline -- read where this goes */
+	if (copy_inst_from_kernel_nofault(&op, (void *)tramp)) {
+		pr_debug("Fetching opcode failed.\n");
+		return -1;
+	}
+
+	/* Is this a 24 bit branch? */
+	if (!is_b_op(op)) {
+		pr_debug("Trampoline is not a long branch tramp.\n");
+		return -1;
+	}
+
+	/* lets find where the pointer goes */
+	ptr = find_bl_target(tramp, op);
+
+	if (ptr != ppc_global_function_entry((void *)_mcount)) {
+		pr_debug("Trampoline target %p is not _mcount\n", (void *)ptr);
+		return -1;
+	}
+
+	/* Let's re-write the tramp to go to ftrace_[regs_]caller */
+	if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS))
+		ptr = ppc_global_function_entry((void *)ftrace_regs_caller);
+	else
+		ptr = ppc_global_function_entry((void *)ftrace_caller);
+
+	if (patch_branch((u32 *)tramp, ptr, 0)) {
+		pr_debug("REL24 out of range!\n");
+		return -1;
+	}
+
+	if (add_ftrace_tramp(tramp)) {
+		pr_debug("No tramp locations left\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+static int __ftrace_make_nop_kernel(struct dyn_ftrace *rec, unsigned long addr)
+{
+	unsigned long tramp, ip = rec->ip;
+	ppc_inst_t op;
+
+	/* Read where this goes */
+	if (copy_inst_from_kernel_nofault(&op, (void *)ip)) {
+		pr_err("Fetching opcode failed.\n");
+		return -EFAULT;
+	}
+
+	/* Make sure that this is still a 24bit jump */
+	if (!is_bl_op(op)) {
+		pr_err("Not expected bl: opcode is %08lx\n", ppc_inst_as_ulong(op));
+		return -EINVAL;
+	}
+
+	/* Let's find where the pointer goes */
+	tramp = find_bl_target(ip, op);
+
+	pr_devel("ip:%lx jumps to %lx", ip, tramp);
+
+	if (setup_mcount_compiler_tramp(tramp)) {
+		/* Are other trampolines reachable? */
+		if (!find_ftrace_tramp(ip)) {
+			pr_err("No ftrace trampolines reachable from %ps\n",
+					(void *)ip);
+			return -EINVAL;
+		}
+	}
+
+	if (patch_instruction((u32 *)ip, ppc_inst(PPC_RAW_NOP()))) {
+		pr_err("Patching NOP failed.\n");
+		return -EPERM;
+	}
+
+	return 0;
+}
+
+int ftrace_make_nop(struct module *mod,
+		    struct dyn_ftrace *rec, unsigned long addr)
+{
+	unsigned long ip = rec->ip;
+	ppc_inst_t old, new;
+
+	/*
+	 * If the calling address is more that 24 bits away,
+	 * then we had to use a trampoline to make the call.
+	 * Otherwise just update the call site.
+	 */
+	if (test_24bit_addr(ip, addr)) {
+		/* within range */
+		old = ftrace_call_replace(ip, addr, 1);
+		new = ppc_inst(PPC_RAW_NOP());
+		return ftrace_modify_code(ip, old, new);
+	} else if (core_kernel_text(ip)) {
+		return __ftrace_make_nop_kernel(rec, addr);
+	} else if (!IS_ENABLED(CONFIG_MODULES)) {
+		return -EINVAL;
+	}
+
+	/*
+	 * Out of range jumps are called from modules.
+	 * We should either already have a pointer to the module
+	 * or it has been passed in.
+	 */
+	if (!rec->arch.mod) {
+		if (!mod) {
+			pr_err("No module loaded addr=%lx\n", addr);
+			return -EFAULT;
+		}
+		rec->arch.mod = mod;
+	} else if (mod) {
+		if (mod != rec->arch.mod) {
+			pr_err("Record mod %p not equal to passed in mod %p\n",
+			       rec->arch.mod, mod);
+			return -EINVAL;
+		}
+		/* nothing to do if mod == rec->arch.mod */
+	} else
+		mod = rec->arch.mod;
+
+	return __ftrace_make_nop(mod, rec, addr);
+}
+
+#ifdef CONFIG_MODULES
+/*
+ * Examine the existing instructions for __ftrace_make_call.
+ * They should effectively be a NOP, and follow formal constraints,
+ * depending on the ABI. Return false if they don't.
+ */
+static bool expected_nop_sequence(void *ip, ppc_inst_t op0, ppc_inst_t op1)
+{
+	if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS))
+		return ppc_inst_equal(op0, ppc_inst(PPC_RAW_NOP()));
+	else
+		return ppc_inst_equal(op0, ppc_inst(PPC_RAW_BRANCH(8))) &&
+		       ppc_inst_equal(op1, ppc_inst(PPC_INST_LD_TOC));
+}
+
+static int
+__ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+{
+	ppc_inst_t op[2];
+	void *ip = (void *)rec->ip;
+	unsigned long entry, ptr, tramp;
+	struct module *mod = rec->arch.mod;
+
+	/* read where this goes */
+	if (copy_inst_from_kernel_nofault(op, ip))
+		return -EFAULT;
+
+	if (!IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS) &&
+	    copy_inst_from_kernel_nofault(op + 1, ip + 4))
+		return -EFAULT;
+
+	if (!expected_nop_sequence(ip, op[0], op[1])) {
+		pr_err("Unexpected call sequence at %p: %08lx %08lx\n", ip,
+		       ppc_inst_as_ulong(op[0]), ppc_inst_as_ulong(op[1]));
+		return -EINVAL;
+	}
+
+	/* If we never set up ftrace trampoline(s), then bail */
+	if (!mod->arch.tramp ||
+	    (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS) && !mod->arch.tramp_regs)) {
+		pr_err("No ftrace trampoline\n");
+		return -EINVAL;
+	}
+
+	if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS) && rec->flags & FTRACE_FL_REGS)
+		tramp = mod->arch.tramp_regs;
+	else
+		tramp = mod->arch.tramp;
+
+	if (module_trampoline_target(mod, tramp, &ptr)) {
+		pr_err("Failed to get trampoline target\n");
+		return -EFAULT;
+	}
+
+	pr_devel("trampoline target %lx", ptr);
+
+	entry = ppc_global_function_entry((void *)addr);
+	/* This should match what was called */
+	if (ptr != entry) {
+		pr_err("addr %lx does not match expected %lx\n", ptr, entry);
+		return -EINVAL;
+	}
+
+	if (patch_branch(ip, tramp, BRANCH_SET_LINK)) {
+		pr_err("REL24 out of range!\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+#else
+static int __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+{
+	return 0;
+}
+#endif /* CONFIG_MODULES */
+
+static int __ftrace_make_call_kernel(struct dyn_ftrace *rec, unsigned long addr)
+{
+	ppc_inst_t op;
+	void *ip = (void *)rec->ip;
+	unsigned long tramp, entry, ptr;
+
+	/* Make sure we're being asked to patch branch to a known ftrace addr */
+	entry = ppc_global_function_entry((void *)ftrace_caller);
+	ptr = ppc_global_function_entry((void *)addr);
+
+	if (ptr != entry && IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS))
+		entry = ppc_global_function_entry((void *)ftrace_regs_caller);
+
+	if (ptr != entry) {
+		pr_err("Unknown ftrace addr to patch: %ps\n", (void *)ptr);
+		return -EINVAL;
+	}
+
+	/* Make sure we have a nop */
+	if (copy_inst_from_kernel_nofault(&op, ip)) {
+		pr_err("Unable to read ftrace location %p\n", ip);
+		return -EFAULT;
+	}
+
+	if (!ppc_inst_equal(op, ppc_inst(PPC_RAW_NOP()))) {
+		pr_err("Unexpected call sequence at %p: %08lx\n",
+		       ip, ppc_inst_as_ulong(op));
+		return -EINVAL;
+	}
+
+	tramp = find_ftrace_tramp((unsigned long)ip);
+	if (!tramp) {
+		pr_err("No ftrace trampolines reachable from %ps\n", ip);
+		return -EINVAL;
+	}
+
+	if (patch_branch(ip, tramp, BRANCH_SET_LINK)) {
+		pr_err("Error patching branch to ftrace tramp!\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+{
+	unsigned long ip = rec->ip;
+	ppc_inst_t old, new;
+
+	/*
+	 * If the calling address is more that 24 bits away,
+	 * then we had to use a trampoline to make the call.
+	 * Otherwise just update the call site.
+	 */
+	if (test_24bit_addr(ip, addr)) {
+		/* within range */
+		old = ppc_inst(PPC_RAW_NOP());
+		new = ftrace_call_replace(ip, addr, 1);
+		return ftrace_modify_code(ip, old, new);
+	} else if (core_kernel_text(ip)) {
+		return __ftrace_make_call_kernel(rec, addr);
+	} else if (!IS_ENABLED(CONFIG_MODULES)) {
+		/* We should not get here without modules */
+		return -EINVAL;
+	}
+
+	/*
+	 * Out of range jumps are called from modules.
+	 * Being that we are converting from nop, it had better
+	 * already have a module defined.
+	 */
+	if (!rec->arch.mod) {
+		pr_err("No module loaded\n");
+		return -EINVAL;
+	}
+
+	return __ftrace_make_call(rec, addr);
+}
+
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+#ifdef CONFIG_MODULES
+static int
+__ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
+					unsigned long addr)
+{
+	ppc_inst_t op;
+	unsigned long ip = rec->ip;
+	unsigned long entry, ptr, tramp;
+	struct module *mod = rec->arch.mod;
+
+	/* If we never set up ftrace trampolines, then bail */
+	if (!mod->arch.tramp || !mod->arch.tramp_regs) {
+		pr_err("No ftrace trampoline\n");
+		return -EINVAL;
+	}
+
+	/* read where this goes */
+	if (copy_inst_from_kernel_nofault(&op, (void *)ip)) {
+		pr_err("Fetching opcode failed.\n");
+		return -EFAULT;
+	}
+
+	/* Make sure that this is still a 24bit jump */
+	if (!is_bl_op(op)) {
+		pr_err("Not expected bl: opcode is %08lx\n", ppc_inst_as_ulong(op));
+		return -EINVAL;
+	}
+
+	/* lets find where the pointer goes */
+	tramp = find_bl_target(ip, op);
+	entry = ppc_global_function_entry((void *)old_addr);
+
+	pr_devel("ip:%lx jumps to %lx", ip, tramp);
+
+	if (tramp != entry) {
+		/* old_addr is not within range, so we must have used a trampoline */
+		if (module_trampoline_target(mod, tramp, &ptr)) {
+			pr_err("Failed to get trampoline target\n");
+			return -EFAULT;
+		}
+
+		pr_devel("trampoline target %lx", ptr);
+
+		/* This should match what was called */
+		if (ptr != entry) {
+			pr_err("addr %lx does not match expected %lx\n", ptr, entry);
+			return -EINVAL;
+		}
+	}
+
+	/* The new target may be within range */
+	if (test_24bit_addr(ip, addr)) {
+		/* within range */
+		if (patch_branch((u32 *)ip, addr, BRANCH_SET_LINK)) {
+			pr_err("REL24 out of range!\n");
+			return -EINVAL;
+		}
+
+		return 0;
+	}
+
+	if (rec->flags & FTRACE_FL_REGS)
+		tramp = mod->arch.tramp_regs;
+	else
+		tramp = mod->arch.tramp;
+
+	if (module_trampoline_target(mod, tramp, &ptr)) {
+		pr_err("Failed to get trampoline target\n");
+		return -EFAULT;
+	}
+
+	pr_devel("trampoline target %lx", ptr);
+
+	entry = ppc_global_function_entry((void *)addr);
+	/* This should match what was called */
+	if (ptr != entry) {
+		pr_err("addr %lx does not match expected %lx\n", ptr, entry);
+		return -EINVAL;
+	}
+
+	if (patch_branch((u32 *)ip, tramp, BRANCH_SET_LINK)) {
+		pr_err("REL24 out of range!\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+#else
+static int __ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long addr)
+{
+	return 0;
+}
+#endif
+
+int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
+			unsigned long addr)
+{
+	unsigned long ip = rec->ip;
+	ppc_inst_t old, new;
+
+	/*
+	 * If the calling address is more that 24 bits away,
+	 * then we had to use a trampoline to make the call.
+	 * Otherwise just update the call site.
+	 */
+	if (test_24bit_addr(ip, addr) && test_24bit_addr(ip, old_addr)) {
+		/* within range */
+		old = ftrace_call_replace(ip, old_addr, 1);
+		new = ftrace_call_replace(ip, addr, 1);
+		return ftrace_modify_code(ip, old, new);
+	} else if (core_kernel_text(ip)) {
+		/*
+		 * We always patch out of range locations to go to the regs
+		 * variant, so there is nothing to do here
+		 */
+		return 0;
+	} else if (!IS_ENABLED(CONFIG_MODULES)) {
+		/* We should not get here without modules */
+		return -EINVAL;
+	}
+
+	/*
+	 * Out of range jumps are called from modules.
+	 */
+	if (!rec->arch.mod) {
+		pr_err("No module loaded\n");
+		return -EINVAL;
+	}
+
+	return __ftrace_modify_call(rec, old_addr, addr);
+}
+#endif
+
+int ftrace_update_ftrace_func(ftrace_func_t func)
+{
+	unsigned long ip = (unsigned long)(&ftrace_call);
+	ppc_inst_t old, new;
+	int ret;
+
+	old = ppc_inst_read((u32 *)&ftrace_call);
+	new = ftrace_call_replace(ip, (unsigned long)func, 1);
+	ret = ftrace_modify_code(ip, old, new);
+
+	/* Also update the regs callback function */
+	if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS) && !ret) {
+		ip = (unsigned long)(&ftrace_regs_call);
+		old = ppc_inst_read((u32 *)&ftrace_regs_call);
+		new = ftrace_call_replace(ip, (unsigned long)func, 1);
+		ret = ftrace_modify_code(ip, old, new);
+	}
+
+	return ret;
+}
+
+/*
+ * Use the default ftrace_modify_all_code, but without
+ * stop_machine().
+ */
+void arch_ftrace_update_code(int command)
+{
+	ftrace_modify_all_code(command);
+}
+
+#ifdef CONFIG_PPC64
+#define PACATOC offsetof(struct paca_struct, kernel_toc)
+
+extern unsigned int ftrace_tramp_text[], ftrace_tramp_init[];
+
+void ftrace_free_init_tramp(void)
+{
+	int i;
+
+	for (i = 0; i < NUM_FTRACE_TRAMPS && ftrace_tramps[i]; i++)
+		if (ftrace_tramps[i] == (unsigned long)ftrace_tramp_init) {
+			ftrace_tramps[i] = 0;
+			return;
+		}
+}
+
+int __init ftrace_dyn_arch_init(void)
+{
+	int i;
+	unsigned int *tramp[] = { ftrace_tramp_text, ftrace_tramp_init };
+	u32 stub_insns[] = {
+		PPC_RAW_LD(_R12, _R13, PACATOC),
+		PPC_RAW_ADDIS(_R12, _R12, 0),
+		PPC_RAW_ADDI(_R12, _R12, 0),
+		PPC_RAW_MTCTR(_R12),
+		PPC_RAW_BCTR()
+	};
+	unsigned long addr;
+	long reladdr;
+
+	if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS))
+		addr = ppc_global_function_entry((void *)ftrace_regs_caller);
+	else
+		addr = ppc_global_function_entry((void *)ftrace_caller);
+
+	reladdr = addr - kernel_toc_addr();
+
+	if (reladdr >= SZ_2G || reladdr < -(long)SZ_2G) {
+		pr_err("Address of %ps out of range of kernel_toc.\n",
+				(void *)addr);
+		return -1;
+	}
+
+	for (i = 0; i < 2; i++) {
+		memcpy(tramp[i], stub_insns, sizeof(stub_insns));
+		tramp[i][1] |= PPC_HA(reladdr);
+		tramp[i][2] |= PPC_LO(reladdr);
+		add_ftrace_tramp((unsigned long)tramp[i]);
+	}
+
+	return 0;
+}
+#endif
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+
+extern void ftrace_graph_call(void);
+extern void ftrace_graph_stub(void);
+
+static int ftrace_modify_ftrace_graph_caller(bool enable)
+{
+	unsigned long ip = (unsigned long)(&ftrace_graph_call);
+	unsigned long addr = (unsigned long)(&ftrace_graph_caller);
+	unsigned long stub = (unsigned long)(&ftrace_graph_stub);
+	ppc_inst_t old, new;
+
+	if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_ARGS))
+		return 0;
+
+	old = ftrace_call_replace(ip, enable ? stub : addr, 0);
+	new = ftrace_call_replace(ip, enable ? addr : stub, 0);
+
+	return ftrace_modify_code(ip, old, new);
+}
+
+int ftrace_enable_ftrace_graph_caller(void)
+{
+	return ftrace_modify_ftrace_graph_caller(true);
+}
+
+int ftrace_disable_ftrace_graph_caller(void)
+{
+	return ftrace_modify_ftrace_graph_caller(false);
+}
+
+/*
+ * Hook the return address and push it in the stack of return addrs
+ * in current thread info. Return the address we want to divert to.
+ */
+static unsigned long
+__prepare_ftrace_return(unsigned long parent, unsigned long ip, unsigned long sp)
+{
+	unsigned long return_hooker;
+	int bit;
+
+	if (unlikely(ftrace_graph_is_dead()))
+		goto out;
+
+	if (unlikely(atomic_read(&current->tracing_graph_pause)))
+		goto out;
+
+	bit = ftrace_test_recursion_trylock(ip, parent);
+	if (bit < 0)
+		goto out;
+
+	return_hooker = ppc_function_entry(return_to_handler);
+
+	if (!function_graph_enter(parent, ip, 0, (unsigned long *)sp))
+		parent = return_hooker;
+
+	ftrace_test_recursion_unlock(bit);
+out:
+	return parent;
+}
+
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS
+void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
+		       struct ftrace_ops *op, struct ftrace_regs *fregs)
+{
+	fregs->regs.link = __prepare_ftrace_return(parent_ip, ip, fregs->regs.gpr[1]);
+}
+#else
+unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip,
+				    unsigned long sp)
+{
+	return __prepare_ftrace_return(parent, ip, sp);
+}
+#endif
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
+
+#ifdef CONFIG_PPC64_ELF_ABI_V1
+char *arch_ftrace_match_adjust(char *str, const char *search)
+{
+	if (str[0] == '.' && search[0] != '.')
+		return str + 1;
+	else
+		return str;
+}
+#endif /* CONFIG_PPC64_ELF_ABI_V1 */
diff --git a/arch/powerpc/kernel/trace/ftrace_64_pg_entry.S b/arch/powerpc/kernel/trace/ftrace_64_pg_entry.S
new file mode 100644
index 0000000..a8a7f28
--- /dev/null
+++ b/arch/powerpc/kernel/trace/ftrace_64_pg_entry.S
@@ -0,0 +1,132 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Split from ftrace_64.S
+ */
+
+#include <linux/export.h>
+#include <linux/magic.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/ftrace.h>
+#include <asm/ppc-opcode.h>
+
+_GLOBAL_TOC(ftrace_caller)
+	lbz	r3, PACA_FTRACE_ENABLED(r13)
+	cmpdi	r3, 0
+	beqlr
+
+	/* Taken from output of objdump from lib64/glibc */
+	mflr	r3
+	ld	r11, 0(r1)
+	stdu	r1, -112(r1)
+	std	r3, 128(r1)
+	ld	r4, 16(r11)
+	subi	r3, r3, MCOUNT_INSN_SIZE
+.globl ftrace_call
+ftrace_call:
+	bl	ftrace_stub
+	nop
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+.globl ftrace_graph_call
+ftrace_graph_call:
+	b	ftrace_graph_stub
+_GLOBAL(ftrace_graph_stub)
+#endif
+	ld	r0, 128(r1)
+	mtlr	r0
+	addi	r1, r1, 112
+
+_GLOBAL(ftrace_stub)
+	blr
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+_GLOBAL(ftrace_graph_caller)
+	addi	r5, r1, 112
+	/* load r4 with local address */
+	ld	r4, 128(r1)
+	subi	r4, r4, MCOUNT_INSN_SIZE
+
+	/* Grab the LR out of the caller stack frame */
+	ld	r11, 112(r1)
+	ld	r3, 16(r11)
+
+	bl	prepare_ftrace_return
+	nop
+
+	/*
+	 * prepare_ftrace_return gives us the address we divert to.
+	 * Change the LR in the callers stack frame to this.
+	 */
+	ld	r11, 112(r1)
+	std	r3, 16(r11)
+
+	ld	r0, 128(r1)
+	mtlr	r0
+	addi	r1, r1, 112
+	blr
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
+
+.pushsection ".tramp.ftrace.text","aw",@progbits;
+.globl ftrace_tramp_text
+ftrace_tramp_text:
+	.space 32
+.popsection
+
+.pushsection ".tramp.ftrace.init","aw",@progbits;
+.globl ftrace_tramp_init
+ftrace_tramp_init:
+	.space 32
+.popsection
+
+_GLOBAL(mcount)
+_GLOBAL(_mcount)
+EXPORT_SYMBOL(_mcount)
+	mflr	r12
+	mtctr	r12
+	mtlr	r0
+	bctr
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+_GLOBAL(return_to_handler)
+	/* need to save return values */
+#ifdef CONFIG_PPC64
+	std	r4,  -32(r1)
+	std	r3,  -24(r1)
+	/* save TOC */
+	std	r2,  -16(r1)
+	std	r31, -8(r1)
+	mr	r31, r1
+	stdu	r1, -112(r1)
+
+	/*
+	 * We might be called from a module.
+	 * Switch to our TOC to run inside the core kernel.
+	 */
+	LOAD_PACA_TOC()
+#else
+	stwu	r1, -16(r1)
+	stw	r3, 8(r1)
+	stw	r4, 12(r1)
+#endif
+
+	bl	ftrace_return_to_handler
+	nop
+
+	/* return value has real return address */
+	mtlr	r3
+
+#ifdef CONFIG_PPC64
+	ld	r1, 0(r1)
+	ld	r4,  -32(r1)
+	ld	r3,  -24(r1)
+	ld	r2,  -16(r1)
+	ld	r31, -8(r1)
+#else
+	lwz	r3, 8(r1)
+	lwz	r4, 12(r1)
+	addi	r1, r1, 16
+#endif
+
+	/* Jump back to real return address */
+	blr
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/powerpc/kernel/trace/ftrace_mprofile.S b/arch/powerpc/kernel/trace/ftrace_entry.S
similarity index 83%
rename from arch/powerpc/kernel/trace/ftrace_mprofile.S
rename to arch/powerpc/kernel/trace/ftrace_entry.S
index 1f7d86d..9070188 100644
--- a/arch/powerpc/kernel/trace/ftrace_mprofile.S
+++ b/arch/powerpc/kernel/trace/ftrace_entry.S
@@ -3,12 +3,12 @@
  * Split from ftrace_64.S
  */
 
+#include <linux/export.h>
 #include <linux/magic.h>
 #include <asm/ppc_asm.h>
 #include <asm/asm-offsets.h>
 #include <asm/ftrace.h>
 #include <asm/ppc-opcode.h>
-#include <asm/export.h>
 #include <asm/thread_info.h>
 #include <asm/bug.h>
 #include <asm/ptrace.h>
@@ -254,3 +254,70 @@
 	/* Return to original caller of live patched function */
 	blr
 #endif /* CONFIG_LIVEPATCH */
+
+#ifndef CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY
+_GLOBAL(mcount)
+_GLOBAL(_mcount)
+EXPORT_SYMBOL(_mcount)
+	mflr	r12
+	mtctr	r12
+	mtlr	r0
+	bctr
+#endif
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+_GLOBAL(return_to_handler)
+	/* need to save return values */
+#ifdef CONFIG_PPC64
+	std	r4,  -32(r1)
+	std	r3,  -24(r1)
+	/* save TOC */
+	std	r2,  -16(r1)
+	std	r31, -8(r1)
+	mr	r31, r1
+	stdu	r1, -112(r1)
+
+	/*
+	 * We might be called from a module.
+	 * Switch to our TOC to run inside the core kernel.
+	 */
+	LOAD_PACA_TOC()
+#else
+	stwu	r1, -16(r1)
+	stw	r3, 8(r1)
+	stw	r4, 12(r1)
+#endif
+
+	bl	ftrace_return_to_handler
+	nop
+
+	/* return value has real return address */
+	mtlr	r3
+
+#ifdef CONFIG_PPC64
+	ld	r1, 0(r1)
+	ld	r4,  -32(r1)
+	ld	r3,  -24(r1)
+	ld	r2,  -16(r1)
+	ld	r31, -8(r1)
+#else
+	lwz	r3, 8(r1)
+	lwz	r4, 12(r1)
+	addi	r1, r1, 16
+#endif
+
+	/* Jump back to real return address */
+	blr
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
+
+.pushsection ".tramp.ftrace.text","aw",@progbits;
+.globl ftrace_tramp_text
+ftrace_tramp_text:
+	.space 32
+.popsection
+
+.pushsection ".tramp.ftrace.init","aw",@progbits;
+.globl ftrace_tramp_init
+ftrace_tramp_init:
+	.space 32
+.popsection
diff --git a/arch/powerpc/kernel/trace/ftrace_low.S b/arch/powerpc/kernel/trace/ftrace_low.S
deleted file mode 100644
index 294d1e0..0000000
--- a/arch/powerpc/kernel/trace/ftrace_low.S
+++ /dev/null
@@ -1,78 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Split from entry_64.S
- */
-
-#include <linux/magic.h>
-#include <asm/ppc_asm.h>
-#include <asm/asm-offsets.h>
-#include <asm/ftrace.h>
-#include <asm/ppc-opcode.h>
-#include <asm/export.h>
-
-#ifdef CONFIG_PPC64
-.pushsection ".tramp.ftrace.text","aw",@progbits;
-.globl ftrace_tramp_text
-ftrace_tramp_text:
-	.space 64
-.popsection
-
-.pushsection ".tramp.ftrace.init","aw",@progbits;
-.globl ftrace_tramp_init
-ftrace_tramp_init:
-	.space 64
-.popsection
-#endif
-
-_GLOBAL(mcount)
-_GLOBAL(_mcount)
-EXPORT_SYMBOL(_mcount)
-	mflr	r12
-	mtctr	r12
-	mtlr	r0
-	bctr
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-_GLOBAL(return_to_handler)
-	/* need to save return values */
-#ifdef CONFIG_PPC64
-	std	r4,  -32(r1)
-	std	r3,  -24(r1)
-	/* save TOC */
-	std	r2,  -16(r1)
-	std	r31, -8(r1)
-	mr	r31, r1
-	stdu	r1, -112(r1)
-
-	/*
-	 * We might be called from a module.
-	 * Switch to our TOC to run inside the core kernel.
-	 */
-	LOAD_PACA_TOC()
-#else
-	stwu	r1, -16(r1)
-	stw	r3, 8(r1)
-	stw	r4, 12(r1)
-#endif
-
-	bl	ftrace_return_to_handler
-	nop
-
-	/* return value has real return address */
-	mtlr	r3
-
-#ifdef CONFIG_PPC64
-	ld	r1, 0(r1)
-	ld	r4,  -32(r1)
-	ld	r3,  -24(r1)
-	ld	r2,  -16(r1)
-	ld	r31, -8(r1)
-#else
-	lwz	r3, 8(r1)
-	lwz	r4, 12(r1)
-	addi	r1, r1, 16
-#endif
-
-	/* Jump back to real return address */
-	blr
-#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 7ef147e..eeff136 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -1158,7 +1158,7 @@ DEFINE_INTERRUPT_HANDLER(single_step_exception)
  * pretend we got a single-step exception.  This was pointed out
  * by Kumar Gala.  -- paulus
  */
-static void emulate_single_step(struct pt_regs *regs)
+void emulate_single_step(struct pt_regs *regs)
 {
 	if (single_stepping(regs))
 		__single_step_exception(regs);
@@ -2225,21 +2225,10 @@ void __noreturn unrecoverable_exception(struct pt_regs *regs)
 }
 
 #if defined(CONFIG_BOOKE_WDT) || defined(CONFIG_40x)
-/*
- * Default handler for a Watchdog exception,
- * spins until a reboot occurs
- */
-void __attribute__ ((weak)) WatchdogHandler(struct pt_regs *regs)
-{
-	/* Generic WatchdogHandler, implement your own */
-	mtspr(SPRN_TCR, mfspr(SPRN_TCR)&(~TCR_WIE));
-	return;
-}
-
 DEFINE_INTERRUPT_HANDLER_NMI(WatchdogException)
 {
 	printk (KERN_EMERG "PowerPC Book-E Watchdog Exception\n");
-	WatchdogHandler(regs);
+	mtspr(SPRN_TCR, mfspr(SPRN_TCR) & ~TCR_WIE);
 	return 0;
 }
 #endif
diff --git a/arch/powerpc/kernel/ucall.S b/arch/powerpc/kernel/ucall.S
index 07296bc..80a1f9a4 100644
--- a/arch/powerpc/kernel/ucall.S
+++ b/arch/powerpc/kernel/ucall.S
@@ -5,8 +5,8 @@
  * Copyright 2019, IBM Corporation.
  *
  */
+#include <linux/export.h>
 #include <asm/ppc_asm.h>
-#include <asm/export.h>
 
 _GLOBAL(ucall_norets)
 EXPORT_SYMBOL_GPL(ucall_norets)
diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S
index fcc0ad6..4094e4c 100644
--- a/arch/powerpc/kernel/vector.S
+++ b/arch/powerpc/kernel/vector.S
@@ -1,4 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/export.h>
 #include <linux/linkage.h>
 #include <asm/processor.h>
 #include <asm/ppc_asm.h>
@@ -8,7 +9,6 @@
 #include <asm/thread_info.h>
 #include <asm/page.h>
 #include <asm/ptrace.h>
-#include <asm/export.h>
 #include <asm/asm-compat.h>
 
 /*
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index 13614f0..1c5970d 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -107,9 +107,7 @@
 #endif
 		/* careful! __ftr_alt_* sections need to be close to .text */
 		*(.text.hot .text.hot.* TEXT_MAIN .text.fixup .text.unlikely .text.unlikely.* .fixup __ftr_alt_* .ref.text);
-#ifdef CONFIG_PPC64
 		*(.tramp.ftrace.text);
-#endif
 		NOINSTR_TEXT
 		SCHED_TEXT
 		LOCK_TEXT
@@ -276,9 +274,7 @@
 		 */
 		. = ALIGN(PAGE_SIZE);
 		_einittext = .;
-#ifdef CONFIG_PPC64
 		*(.tramp.ftrace.init);
-#endif
 	} :text
 
 	/* .exit.text is discarded at runtime, not link time,
diff --git a/arch/powerpc/kexec/crash.c b/arch/powerpc/kexec/crash.c
index 252724e..ef5c2d2 100644
--- a/arch/powerpc/kexec/crash.c
+++ b/arch/powerpc/kexec/crash.c
@@ -350,7 +350,7 @@ EXPORT_SYMBOL(crash_shutdown_unregister);
 
 void default_machine_crash_shutdown(struct pt_regs *regs)
 {
-	unsigned int i;
+	volatile unsigned int i;
 	int (*old_handler)(struct pt_regs *regs);
 
 	if (TRAP(regs) == INTERRUPT_SYSTEM_RESET)
diff --git a/arch/powerpc/kexec/file_load_64.c b/arch/powerpc/kexec/file_load_64.c
index 110d28b..a3de536 100644
--- a/arch/powerpc/kexec/file_load_64.c
+++ b/arch/powerpc/kexec/file_load_64.c
@@ -17,7 +17,7 @@
 #include <linux/kexec.h>
 #include <linux/of_fdt.h>
 #include <linux/libfdt.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/memblock.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
@@ -27,6 +27,7 @@
 #include <asm/kexec_ranges.h>
 #include <asm/crashdump-ppc64.h>
 #include <asm/mmzone.h>
+#include <asm/iommu.h>
 #include <asm/prom.h>
 #include <asm/plpks.h>
 
@@ -933,9 +934,9 @@ int setup_purgatory_ppc64(struct kimage *image, const void *slave_code,
 }
 
 /**
- * get_cpu_node_size - Compute the size of a CPU node in the FDT.
- *                     This should be done only once and the value is stored in
- *                     a static variable.
+ * cpu_node_size - Compute the size of a CPU node in the FDT.
+ *                 This should be done only once and the value is stored in
+ *                 a static variable.
  * Returns the max size of a CPU node in the FDT.
  */
 static unsigned int cpu_node_size(void)
@@ -1208,8 +1209,6 @@ int setup_new_fdt_ppc64(const struct kimage *image, void *fdt,
 	if (ret < 0)
 		goto out;
 
-#define DIRECT64_PROPNAME "linux,direct64-ddr-window-info"
-#define DMA64_PROPNAME "linux,dma64-ddr-window-info"
 	ret = update_pci_dma_nodes(fdt, DIRECT64_PROPNAME);
 	if (ret < 0)
 		goto out;
@@ -1217,8 +1216,6 @@ int setup_new_fdt_ppc64(const struct kimage *image, void *fdt,
 	ret = update_pci_dma_nodes(fdt, DMA64_PROPNAME);
 	if (ret < 0)
 		goto out;
-#undef DMA64_PROPNAME
-#undef DIRECT64_PROPNAME
 
 	/* Update memory reserve map */
 	ret = get_reserved_memory_ranges(&rmem);
diff --git a/arch/powerpc/kexec/ranges.c b/arch/powerpc/kexec/ranges.c
index 5fc53a5..fb3e12f 100644
--- a/arch/powerpc/kexec/ranges.c
+++ b/arch/powerpc/kexec/ranges.c
@@ -18,7 +18,7 @@
 
 #include <linux/sort.h>
 #include <linux/kexec.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/slab.h>
 #include <asm/sections.h>
 #include <asm/kexec_ranges.h>
diff --git a/arch/powerpc/kvm/book3s_64_entry.S b/arch/powerpc/kvm/book3s_64_entry.S
index 6c2b1d1..3b361af 100644
--- a/arch/powerpc/kvm/book3s_64_entry.S
+++ b/arch/powerpc/kvm/book3s_64_entry.S
@@ -1,9 +1,9 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
+#include <linux/export.h>
 #include <asm/asm-offsets.h>
 #include <asm/cache.h>
 #include <asm/code-patching-asm.h>
 #include <asm/exception-64s.h>
-#include <asm/export.h>
 #include <asm/kvm_asm.h>
 #include <asm/kvm_book3s_asm.h>
 #include <asm/mmu.h>
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 7f765d5..efd0ebf 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -182,7 +182,7 @@ void kvmppc_free_hpt(struct kvm_hpt_info *info)
 	vfree(info->rev);
 	info->rev = NULL;
 	if (info->cma)
-		kvm_free_hpt_cma(virt_to_page(info->virt),
+		kvm_free_hpt_cma(virt_to_page((void *)info->virt),
 				 1 << (info->order - PAGE_SHIFT));
 	else if (info->virt)
 		free_pages(info->virt, info->order - PAGE_SHIFT);
diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c
index ccfd969..82be6d8 100644
--- a/arch/powerpc/kvm/book3s_hv_ras.c
+++ b/arch/powerpc/kvm/book3s_hv_ras.c
@@ -9,6 +9,7 @@
 #include <linux/kvm.h>
 #include <linux/kvm_host.h>
 #include <linux/kernel.h>
+#include <asm/lppaca.h>
 #include <asm/opal.h>
 #include <asm/mce.h>
 #include <asm/machdep.h>
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 870110e..ea7ad20 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -10,6 +10,7 @@
  * Authors: Alexander Graf <agraf@suse.de>
  */
 
+#include <linux/export.h>
 #include <linux/linkage.h>
 #include <linux/objtool.h>
 #include <asm/ppc_asm.h>
@@ -24,7 +25,6 @@
 #include <asm/exception-64s.h>
 #include <asm/kvm_book3s_asm.h>
 #include <asm/book3s/64/mmu-hash.h>
-#include <asm/export.h>
 #include <asm/tm.h>
 #include <asm/opal.h>
 #include <asm/thread_info.h>
diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c
index d58df71..e476e10 100644
--- a/arch/powerpc/kvm/e500mc.c
+++ b/arch/powerpc/kvm/e500mc.c
@@ -20,6 +20,7 @@
 #include <asm/cputable.h>
 #include <asm/kvm_ppc.h>
 #include <asm/dbell.h>
+#include <asm/ppc-opcode.h>
 
 #include "booke.h"
 #include "e500.h"
@@ -92,7 +93,11 @@ void kvmppc_e500_tlbil_all(struct kvmppc_vcpu_e500 *vcpu_e500)
 
 	local_irq_save(flags);
 	mtspr(SPRN_MAS5, MAS5_SGS | get_lpid(&vcpu_e500->vcpu));
-	asm volatile("tlbilxlpid");
+	/*
+	 * clang-17 and older could not assemble tlbilxlpid.
+	 * https://github.com/ClangBuiltLinux/linux/issues/1891
+	 */
+	asm volatile (PPC_TLBILX_LPID);
 	mtspr(SPRN_MAS5, 0);
 	local_irq_restore(flags);
 }
diff --git a/arch/powerpc/kvm/tm.S b/arch/powerpc/kvm/tm.S
index 2158f61..b506c4d 100644
--- a/arch/powerpc/kvm/tm.S
+++ b/arch/powerpc/kvm/tm.S
@@ -6,10 +6,10 @@
  * Copyright 2011 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
  */
 
+#include <linux/export.h>
 #include <asm/reg.h>
 #include <asm/ppc_asm.h>
 #include <asm/asm-offsets.h>
-#include <asm/export.h>
 #include <asm/tm.h>
 #include <asm/cputable.h>
 
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index 9aa8286..51ad039 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -27,7 +27,7 @@
 CFLAGS_code-patching.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
 CFLAGS_feature-fixups.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
 
-obj-y += alloc.o code-patching.o feature-fixups.o pmem.o
+obj-y += code-patching.o feature-fixups.o pmem.o
 
 obj-$(CONFIG_CODE_PATCHING_SELFTEST) += test-code-patching.o
 
diff --git a/arch/powerpc/lib/alloc.c b/arch/powerpc/lib/alloc.c
deleted file mode 100644
index ce18087..0000000
--- a/arch/powerpc/lib/alloc.c
+++ /dev/null
@@ -1,23 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/memblock.h>
-#include <linux/string.h>
-#include <asm/setup.h>
-
-
-void * __ref zalloc_maybe_bootmem(size_t size, gfp_t mask)
-{
-	void *p;
-
-	if (slab_is_available())
-		p = kzalloc(size, mask);
-	else {
-		p = memblock_alloc(size, SMP_CACHE_BYTES);
-		if (!p)
-			panic("%s: Failed to allocate %zu bytes\n", __func__,
-			      size);
-	}
-	return p;
-}
diff --git a/arch/powerpc/lib/checksum_32.S b/arch/powerpc/lib/checksum_32.S
index 4541e8e..cd00b9b 100644
--- a/arch/powerpc/lib/checksum_32.S
+++ b/arch/powerpc/lib/checksum_32.S
@@ -8,12 +8,12 @@
  * Severely hacked about by Paul Mackerras (paulus@cs.anu.edu.au).
  */
 
+#include <linux/export.h>
 #include <linux/sys.h>
 #include <asm/processor.h>
 #include <asm/cache.h>
 #include <asm/errno.h>
 #include <asm/ppc_asm.h>
-#include <asm/export.h>
 
 	.text
 
diff --git a/arch/powerpc/lib/checksum_64.S b/arch/powerpc/lib/checksum_64.S
index 98ff51b..d53d8f0 100644
--- a/arch/powerpc/lib/checksum_64.S
+++ b/arch/powerpc/lib/checksum_64.S
@@ -8,11 +8,11 @@
  * Severely hacked about by Paul Mackerras (paulus@cs.anu.edu.au).
  */
 
+#include <linux/export.h>
 #include <linux/sys.h>
 #include <asm/processor.h>
 #include <asm/errno.h>
 #include <asm/ppc_asm.h>
-#include <asm/export.h>
 
 /*
  * Computes the checksum of a memory block at buff, length len,
diff --git a/arch/powerpc/lib/copy_32.S b/arch/powerpc/lib/copy_32.S
index 3e9c27c..933b685 100644
--- a/arch/powerpc/lib/copy_32.S
+++ b/arch/powerpc/lib/copy_32.S
@@ -4,11 +4,11 @@
  *
  * Copyright (C) 1996-2005 Paul Mackerras.
  */
+#include <linux/export.h>
 #include <asm/processor.h>
 #include <asm/cache.h>
 #include <asm/errno.h>
 #include <asm/ppc_asm.h>
-#include <asm/export.h>
 #include <asm/code-patching-asm.h>
 #include <asm/kasan.h>
 
diff --git a/arch/powerpc/lib/copy_mc_64.S b/arch/powerpc/lib/copy_mc_64.S
index 88d46c4..bf1014b 100644
--- a/arch/powerpc/lib/copy_mc_64.S
+++ b/arch/powerpc/lib/copy_mc_64.S
@@ -4,9 +4,9 @@
  * Derived from copyuser_power7.s by Anton Blanchard <anton@au.ibm.com>
  * Author - Balbir Singh <bsingharora@gmail.com>
  */
+#include <linux/export.h>
 #include <asm/ppc_asm.h>
 #include <asm/errno.h>
-#include <asm/export.h>
 
 	.macro err1
 100:
diff --git a/arch/powerpc/lib/copypage_64.S b/arch/powerpc/lib/copypage_64.S
index 5d09a02..f33a2e6 100644
--- a/arch/powerpc/lib/copypage_64.S
+++ b/arch/powerpc/lib/copypage_64.S
@@ -2,11 +2,11 @@
 /*
  * Copyright (C) 2008 Mark Nelson, IBM Corp.
  */
+#include <linux/export.h>
 #include <asm/page.h>
 #include <asm/processor.h>
 #include <asm/ppc_asm.h>
 #include <asm/asm-offsets.h>
-#include <asm/export.h>
 #include <asm/feature-fixups.h>
 
 _GLOBAL_TOC(copy_page)
diff --git a/arch/powerpc/lib/copyuser_64.S b/arch/powerpc/lib/copyuser_64.S
index db8719a..9af969d 100644
--- a/arch/powerpc/lib/copyuser_64.S
+++ b/arch/powerpc/lib/copyuser_64.S
@@ -2,9 +2,9 @@
 /*
  * Copyright (C) 2002 Paul Mackerras, IBM Corp.
  */
+#include <linux/export.h>
 #include <asm/processor.h>
 #include <asm/ppc_asm.h>
-#include <asm/export.h>
 #include <asm/asm-compat.h>
 #include <asm/feature-fixups.h>
 
diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c
index 80def1c..4f82581 100644
--- a/arch/powerpc/lib/feature-fixups.c
+++ b/arch/powerpc/lib/feature-fixups.c
@@ -67,7 +67,8 @@ static int patch_alt_instruction(u32 *src, u32 *dest, u32 *alt_start, u32 *alt_e
 	return 0;
 }
 
-static int patch_feature_section(unsigned long value, struct fixup_entry *fcur)
+static int patch_feature_section_mask(unsigned long value, unsigned long mask,
+				      struct fixup_entry *fcur)
 {
 	u32 *start, *end, *alt_start, *alt_end, *src, *dest;
 
@@ -79,7 +80,7 @@ static int patch_feature_section(unsigned long value, struct fixup_entry *fcur)
 	if ((alt_end - alt_start) > (end - start))
 		return 1;
 
-	if ((value & fcur->mask) == fcur->value)
+	if ((value & fcur->mask & mask) == (fcur->value & mask))
 		return 0;
 
 	src = alt_start;
@@ -97,7 +98,8 @@ static int patch_feature_section(unsigned long value, struct fixup_entry *fcur)
 	return 0;
 }
 
-void do_feature_fixups(unsigned long value, void *fixup_start, void *fixup_end)
+static void do_feature_fixups_mask(unsigned long value, unsigned long mask,
+				   void *fixup_start, void *fixup_end)
 {
 	struct fixup_entry *fcur, *fend;
 
@@ -105,7 +107,7 @@ void do_feature_fixups(unsigned long value, void *fixup_start, void *fixup_end)
 	fend = fixup_end;
 
 	for (; fcur < fend; fcur++) {
-		if (patch_feature_section(value, fcur)) {
+		if (patch_feature_section_mask(value, mask, fcur)) {
 			WARN_ON(1);
 			printk("Unable to patch feature section at %p - %p" \
 				" with %p - %p\n",
@@ -117,6 +119,11 @@ void do_feature_fixups(unsigned long value, void *fixup_start, void *fixup_end)
 	}
 }
 
+void do_feature_fixups(unsigned long value, void *fixup_start, void *fixup_end)
+{
+	do_feature_fixups_mask(value, ~0, fixup_start, fixup_end);
+}
+
 #ifdef CONFIG_PPC_BARRIER_NOSPEC
 static bool is_fixup_addr_valid(void *dest, size_t size)
 {
@@ -651,6 +658,17 @@ void __init apply_feature_fixups(void)
 	do_final_fixups();
 }
 
+void __init update_mmu_feature_fixups(unsigned long mask)
+{
+	saved_mmu_features &= ~mask;
+	saved_mmu_features |= cur_cpu_spec->mmu_features & mask;
+
+	do_feature_fixups_mask(cur_cpu_spec->mmu_features, mask,
+			       PTRRELOC(&__start___mmu_ftr_fixup),
+			       PTRRELOC(&__stop___mmu_ftr_fixup));
+	mmu_feature_keys_init();
+}
+
 void __init setup_feature_keys(void)
 {
 	/*
@@ -683,6 +701,11 @@ late_initcall(check_features);
 #define check(x)	\
 	if (!(x)) printk("feature-fixups: test failed at line %d\n", __LINE__);
 
+static int patch_feature_section(unsigned long value, struct fixup_entry *fcur)
+{
+	return patch_feature_section_mask(value, ~0, fcur);
+}
+
 /* This must be after the text it fixes up, vmlinux.lds.S enforces that atm */
 static struct fixup_entry fixup;
 
diff --git a/arch/powerpc/lib/hweight_64.S b/arch/powerpc/lib/hweight_64.S
index 09af295..1518750 100644
--- a/arch/powerpc/lib/hweight_64.S
+++ b/arch/powerpc/lib/hweight_64.S
@@ -5,9 +5,9 @@
  *
  * Author: Anton Blanchard <anton@au.ibm.com>
  */
+#include <linux/export.h>
 #include <asm/processor.h>
 #include <asm/ppc_asm.h>
-#include <asm/export.h>
 #include <asm/feature-fixups.h>
 
 /* Note: This code relies on -mminimal-toc */
diff --git a/arch/powerpc/lib/mem_64.S b/arch/powerpc/lib/mem_64.S
index 9351ffa..6fd06cd 100644
--- a/arch/powerpc/lib/mem_64.S
+++ b/arch/powerpc/lib/mem_64.S
@@ -4,10 +4,10 @@
  *
  * Copyright (C) 1996 Paul Mackerras.
  */
+#include <linux/export.h>
 #include <asm/processor.h>
 #include <asm/errno.h>
 #include <asm/ppc_asm.h>
-#include <asm/export.h>
 #include <asm/kasan.h>
 
 #ifndef CONFIG_KASAN
diff --git a/arch/powerpc/lib/memcmp_32.S b/arch/powerpc/lib/memcmp_32.S
index 5010e37..f6fca56 100644
--- a/arch/powerpc/lib/memcmp_32.S
+++ b/arch/powerpc/lib/memcmp_32.S
@@ -7,8 +7,8 @@
  *
  */
 
+#include <linux/export.h>
 #include <asm/ppc_asm.h>
-#include <asm/export.h>
 
 	.text
 
diff --git a/arch/powerpc/lib/memcmp_64.S b/arch/powerpc/lib/memcmp_64.S
index 0b9b168..142c666 100644
--- a/arch/powerpc/lib/memcmp_64.S
+++ b/arch/powerpc/lib/memcmp_64.S
@@ -3,8 +3,8 @@
  * Author: Anton Blanchard <anton@au.ibm.com>
  * Copyright 2015 IBM Corporation.
  */
+#include <linux/export.h>
 #include <asm/ppc_asm.h>
-#include <asm/export.h>
 #include <asm/ppc-opcode.h>
 
 #define off8	r6
diff --git a/arch/powerpc/lib/memcpy_64.S b/arch/powerpc/lib/memcpy_64.S
index 016c91e..b5a67e2 100644
--- a/arch/powerpc/lib/memcpy_64.S
+++ b/arch/powerpc/lib/memcpy_64.S
@@ -2,9 +2,9 @@
 /*
  * Copyright (C) 2002 Paul Mackerras, IBM Corp.
  */
+#include <linux/export.h>
 #include <asm/processor.h>
 #include <asm/ppc_asm.h>
-#include <asm/export.h>
 #include <asm/asm-compat.h>
 #include <asm/feature-fixups.h>
 #include <asm/kasan.h>
diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index 38158b7..a4ab862 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -485,7 +485,7 @@ write_mem_aligned(unsigned long val, unsigned long ea, int nb, struct pt_regs *r
  * Copy from a buffer to userspace, using the largest possible
  * aligned accesses, up to sizeof(long).
  */
-static nokprobe_inline int __copy_mem_out(u8 *dest, unsigned long ea, int nb, struct pt_regs *regs)
+static __always_inline int __copy_mem_out(u8 *dest, unsigned long ea, int nb, struct pt_regs *regs)
 {
 	int c;
 
@@ -1043,7 +1043,7 @@ static nokprobe_inline int do_vsx_store(struct instruction_op *op,
 }
 #endif /* CONFIG_VSX */
 
-static int __emulate_dcbz(unsigned long ea)
+static __always_inline int __emulate_dcbz(unsigned long ea)
 {
 	unsigned long i;
 	unsigned long size = l1_dcache_bytes();
diff --git a/arch/powerpc/lib/string.S b/arch/powerpc/lib/string.S
index 2752b1c..daa7206 100644
--- a/arch/powerpc/lib/string.S
+++ b/arch/powerpc/lib/string.S
@@ -4,8 +4,8 @@
  *
  * Copyright (C) 1996 Paul Mackerras.
  */
+#include <linux/export.h>
 #include <asm/ppc_asm.h>
-#include <asm/export.h>
 #include <asm/cache.h>
 
 	.text
diff --git a/arch/powerpc/lib/string_32.S b/arch/powerpc/lib/string_32.S
index 1ddb263..3ee4561 100644
--- a/arch/powerpc/lib/string_32.S
+++ b/arch/powerpc/lib/string_32.S
@@ -7,8 +7,8 @@
  *
  */
 
+#include <linux/export.h>
 #include <asm/ppc_asm.h>
-#include <asm/export.h>
 #include <asm/cache.h>
 
 	.text
diff --git a/arch/powerpc/lib/string_64.S b/arch/powerpc/lib/string_64.S
index df41ce0..a25eb85 100644
--- a/arch/powerpc/lib/string_64.S
+++ b/arch/powerpc/lib/string_64.S
@@ -6,10 +6,10 @@
  * Author: Anton Blanchard <anton@au.ibm.com>
  */
 
+#include <linux/export.h>
 #include <asm/ppc_asm.h>
 #include <asm/linkage.h>
 #include <asm/asm-offsets.h>
-#include <asm/export.h>
 
 /**
  * __arch_clear_user: - Zero a block of memory in user space, with less checking.
diff --git a/arch/powerpc/lib/strlen_32.S b/arch/powerpc/lib/strlen_32.S
index 0a8d3f6..bbd24fe 100644
--- a/arch/powerpc/lib/strlen_32.S
+++ b/arch/powerpc/lib/strlen_32.S
@@ -6,8 +6,8 @@
  *
  * Inspired from glibc implementation
  */
+#include <linux/export.h>
 #include <asm/ppc_asm.h>
-#include <asm/export.h>
 #include <asm/cache.h>
 
 	.text
diff --git a/arch/powerpc/mm/book3s32/hash_low.S b/arch/powerpc/mm/book3s32/hash_low.S
index a5a21d4..8b804e1 100644
--- a/arch/powerpc/mm/book3s32/hash_low.S
+++ b/arch/powerpc/mm/book3s32/hash_low.S
@@ -14,6 +14,7 @@
  *  hash table, so this file is not used on them.)
  */
 
+#include <linux/export.h>
 #include <linux/pgtable.h>
 #include <linux/init.h>
 #include <asm/reg.h>
@@ -22,7 +23,6 @@
 #include <asm/ppc_asm.h>
 #include <asm/thread_info.h>
 #include <asm/asm-offsets.h>
-#include <asm/export.h>
 #include <asm/feature-fixups.h>
 #include <asm/code-patching-asm.h>
 
diff --git a/arch/powerpc/mm/book3s32/kuap.c b/arch/powerpc/mm/book3s32/kuap.c
index 28676ca..3a88155 100644
--- a/arch/powerpc/mm/book3s32/kuap.c
+++ b/arch/powerpc/mm/book3s32/kuap.c
@@ -3,25 +3,11 @@
 #include <asm/kup.h>
 #include <asm/smp.h>
 
-struct static_key_false disable_kuap_key;
-EXPORT_SYMBOL(disable_kuap_key);
-
-void kuap_lock_all_ool(void)
-{
-	kuap_lock_all();
-}
-EXPORT_SYMBOL(kuap_lock_all_ool);
-
-void kuap_unlock_all_ool(void)
-{
-	kuap_unlock_all();
-}
-EXPORT_SYMBOL(kuap_unlock_all_ool);
-
 void setup_kuap(bool disabled)
 {
 	if (!disabled) {
-		kuap_lock_all_ool();
+		update_user_segments(mfsr(0) | SR_KS);
+		isync();        /* Context sync required after mtsr() */
 		init_mm.context.sr0 |= SR_KS;
 		current->thread.sr0 |= SR_KS;
 	}
@@ -30,7 +16,7 @@ void setup_kuap(bool disabled)
 		return;
 
 	if (disabled)
-		static_branch_enable(&disable_kuap_key);
+		cur_cpu_spec->mmu_features &= ~MMU_FTR_KUAP;
 	else
 		pr_info("Activating Kernel Userspace Access Protection\n");
 }
diff --git a/arch/powerpc/mm/book3s32/mmu_context.c b/arch/powerpc/mm/book3s32/mmu_context.c
index 269a3eb2..1922f9a 100644
--- a/arch/powerpc/mm/book3s32/mmu_context.c
+++ b/arch/powerpc/mm/book3s32/mmu_context.c
@@ -71,7 +71,7 @@ int init_new_context(struct task_struct *t, struct mm_struct *mm)
 	mm->context.id = __init_new_context();
 	mm->context.sr0 = CTX_TO_VSID(mm->context.id, 0);
 
-	if (!kuep_is_disabled())
+	if (IS_ENABLED(CONFIG_PPC_KUEP))
 		mm->context.sr0 |= SR_NX;
 	if (!kuap_is_disabled())
 		mm->context.sr0 |= SR_KS;
diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c
index 1498ccd..8f8a62d3 100644
--- a/arch/powerpc/mm/book3s64/pgtable.c
+++ b/arch/powerpc/mm/book3s64/pgtable.c
@@ -9,6 +9,7 @@
 #include <linux/memremap.h>
 #include <linux/pkeys.h>
 #include <linux/debugfs.h>
+#include <linux/proc_fs.h>
 #include <misc/cxl-base.h>
 
 #include <asm/pgalloc.h>
diff --git a/arch/powerpc/mm/book3s64/pkeys.c b/arch/powerpc/mm/book3s64/pkeys.c
index 1d2675a..1257339 100644
--- a/arch/powerpc/mm/book3s64/pkeys.c
+++ b/arch/powerpc/mm/book3s64/pkeys.c
@@ -291,7 +291,7 @@ void setup_kuap(bool disabled)
 
 	if (smp_processor_id() == boot_cpuid) {
 		pr_info("Activating Kernel Userspace Access Prevention\n");
-		cur_cpu_spec->mmu_features |= MMU_FTR_BOOK3S_KUAP;
+		cur_cpu_spec->mmu_features |= MMU_FTR_KUAP;
 	}
 
 	/*
diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
index 9667901..c6a4ac7 100644
--- a/arch/powerpc/mm/book3s64/radix_pgtable.c
+++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
@@ -37,7 +37,6 @@
 #include <mm/mmu_decl.h>
 
 unsigned int mmu_base_pid;
-unsigned long radix_mem_block_size __ro_after_init;
 
 static __ref void *early_alloc_pgtable(unsigned long size, int nid,
 			unsigned long region_start, unsigned long region_end)
@@ -300,7 +299,7 @@ static int __meminit create_physical_mapping(unsigned long start,
 	bool prev_exec, exec = false;
 	pgprot_t prot;
 	int psize;
-	unsigned long max_mapping_size = radix_mem_block_size;
+	unsigned long max_mapping_size = memory_block_size;
 
 	if (debug_pagealloc_enabled_or_kfence())
 		max_mapping_size = PAGE_SIZE;
@@ -502,58 +501,6 @@ static int __init radix_dt_scan_page_sizes(unsigned long node,
 	return 1;
 }
 
-#ifdef CONFIG_MEMORY_HOTPLUG
-static int __init probe_memory_block_size(unsigned long node, const char *uname, int
-					  depth, void *data)
-{
-	unsigned long *mem_block_size = (unsigned long *)data;
-	const __be32 *prop;
-	int len;
-
-	if (depth != 1)
-		return 0;
-
-	if (strcmp(uname, "ibm,dynamic-reconfiguration-memory"))
-		return 0;
-
-	prop = of_get_flat_dt_prop(node, "ibm,lmb-size", &len);
-
-	if (!prop || len < dt_root_size_cells * sizeof(__be32))
-		/*
-		 * Nothing in the device tree
-		 */
-		*mem_block_size = MIN_MEMORY_BLOCK_SIZE;
-	else
-		*mem_block_size = of_read_number(prop, dt_root_size_cells);
-	return 1;
-}
-
-static unsigned long __init radix_memory_block_size(void)
-{
-	unsigned long mem_block_size = MIN_MEMORY_BLOCK_SIZE;
-
-	/*
-	 * OPAL firmware feature is set by now. Hence we are ok
-	 * to test OPAL feature.
-	 */
-	if (firmware_has_feature(FW_FEATURE_OPAL))
-		mem_block_size = 1UL * 1024 * 1024 * 1024;
-	else
-		of_scan_flat_dt(probe_memory_block_size, &mem_block_size);
-
-	return mem_block_size;
-}
-
-#else   /* CONFIG_MEMORY_HOTPLUG */
-
-static unsigned long __init radix_memory_block_size(void)
-{
-	return 1UL * 1024 * 1024 * 1024;
-}
-
-#endif /* CONFIG_MEMORY_HOTPLUG */
-
-
 void __init radix__early_init_devtree(void)
 {
 	int rc;
@@ -577,16 +524,6 @@ void __init radix__early_init_devtree(void)
 		mmu_psize_defs[MMU_PAGE_64K].h_rpt_pgsize =
 			psize_to_rpti_pgsize(MMU_PAGE_64K);
 	}
-
-	/*
-	 * Max mapping size used when mapping pages. We don't use
-	 * ppc_md.memory_block_size() here because this get called
-	 * early and we don't have machine probe called yet. Also
-	 * the pseries implementation only check for ibm,lmb-size.
-	 * All hypervisor supporting radix do expose that device
-	 * tree node.
-	 */
-	radix_mem_block_size = radix_memory_block_size();
 	return;
 }
 
diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c
index 3020a8b..39acc2c 100644
--- a/arch/powerpc/mm/book3s64/radix_tlb.c
+++ b/arch/powerpc/mm/book3s64/radix_tlb.c
@@ -127,21 +127,6 @@ static __always_inline void __tlbie_pid(unsigned long pid, unsigned long ric)
 	trace_tlbie(0, 0, rb, rs, ric, prs, r);
 }
 
-static __always_inline void __tlbie_pid_lpid(unsigned long pid,
-					     unsigned long lpid,
-					     unsigned long ric)
-{
-	unsigned long rb, rs, prs, r;
-
-	rb = PPC_BIT(53); /* IS = 1 */
-	rs = (pid << PPC_BITLSHIFT(31)) | (lpid & ~(PPC_BITMASK(0, 31)));
-	prs = 1; /* process scoped */
-	r = 1;   /* radix format */
-
-	asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
-		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
-	trace_tlbie(0, 0, rb, rs, ric, prs, r);
-}
 static __always_inline void __tlbie_lpid(unsigned long lpid, unsigned long ric)
 {
 	unsigned long rb,rs,prs,r;
@@ -202,23 +187,6 @@ static __always_inline void __tlbie_va(unsigned long va, unsigned long pid,
 	trace_tlbie(0, 0, rb, rs, ric, prs, r);
 }
 
-static __always_inline void __tlbie_va_lpid(unsigned long va, unsigned long pid,
-					    unsigned long lpid,
-					    unsigned long ap, unsigned long ric)
-{
-	unsigned long rb, rs, prs, r;
-
-	rb = va & ~(PPC_BITMASK(52, 63));
-	rb |= ap << PPC_BITLSHIFT(58);
-	rs = (pid << PPC_BITLSHIFT(31)) | (lpid & ~(PPC_BITMASK(0, 31)));
-	prs = 1; /* process scoped */
-	r = 1;   /* radix format */
-
-	asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
-		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
-	trace_tlbie(0, 0, rb, rs, ric, prs, r);
-}
-
 static __always_inline void __tlbie_lpid_va(unsigned long va, unsigned long lpid,
 					    unsigned long ap, unsigned long ric)
 {
@@ -264,22 +232,6 @@ static inline void fixup_tlbie_va_range(unsigned long va, unsigned long pid,
 	}
 }
 
-static inline void fixup_tlbie_va_range_lpid(unsigned long va,
-					     unsigned long pid,
-					     unsigned long lpid,
-					     unsigned long ap)
-{
-	if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
-		asm volatile("ptesync" : : : "memory");
-		__tlbie_pid_lpid(0, lpid, RIC_FLUSH_TLB);
-	}
-
-	if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
-		asm volatile("ptesync" : : : "memory");
-		__tlbie_va_lpid(va, pid, lpid, ap, RIC_FLUSH_TLB);
-	}
-}
-
 static inline void fixup_tlbie_pid(unsigned long pid)
 {
 	/*
@@ -299,26 +251,6 @@ static inline void fixup_tlbie_pid(unsigned long pid)
 	}
 }
 
-static inline void fixup_tlbie_pid_lpid(unsigned long pid, unsigned long lpid)
-{
-	/*
-	 * We can use any address for the invalidation, pick one which is
-	 * probably unused as an optimisation.
-	 */
-	unsigned long va = ((1UL << 52) - 1);
-
-	if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
-		asm volatile("ptesync" : : : "memory");
-		__tlbie_pid_lpid(0, lpid, RIC_FLUSH_TLB);
-	}
-
-	if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
-		asm volatile("ptesync" : : : "memory");
-		__tlbie_va_lpid(va, pid, lpid, mmu_get_ap(MMU_PAGE_64K),
-				RIC_FLUSH_TLB);
-	}
-}
-
 static inline void fixup_tlbie_lpid_va(unsigned long va, unsigned long lpid,
 				       unsigned long ap)
 {
@@ -416,31 +348,6 @@ static inline void _tlbie_pid(unsigned long pid, unsigned long ric)
 	asm volatile("eieio; tlbsync; ptesync": : :"memory");
 }
 
-static inline void _tlbie_pid_lpid(unsigned long pid, unsigned long lpid,
-				   unsigned long ric)
-{
-	asm volatile("ptesync" : : : "memory");
-
-	/*
-	 * Workaround the fact that the "ric" argument to __tlbie_pid
-	 * must be a compile-time contraint to match the "i" constraint
-	 * in the asm statement.
-	 */
-	switch (ric) {
-	case RIC_FLUSH_TLB:
-		__tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB);
-		fixup_tlbie_pid_lpid(pid, lpid);
-		break;
-	case RIC_FLUSH_PWC:
-		__tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC);
-		break;
-	case RIC_FLUSH_ALL:
-	default:
-		__tlbie_pid_lpid(pid, lpid, RIC_FLUSH_ALL);
-		fixup_tlbie_pid_lpid(pid, lpid);
-	}
-	asm volatile("eieio; tlbsync; ptesync" : : : "memory");
-}
 struct tlbiel_pid {
 	unsigned long pid;
 	unsigned long ric;
@@ -566,20 +473,6 @@ static inline void __tlbie_va_range(unsigned long start, unsigned long end,
 	fixup_tlbie_va_range(addr - page_size, pid, ap);
 }
 
-static inline void __tlbie_va_range_lpid(unsigned long start, unsigned long end,
-					 unsigned long pid, unsigned long lpid,
-					 unsigned long page_size,
-					 unsigned long psize)
-{
-	unsigned long addr;
-	unsigned long ap = mmu_get_ap(psize);
-
-	for (addr = start; addr < end; addr += page_size)
-		__tlbie_va_lpid(addr, pid, lpid, ap, RIC_FLUSH_TLB);
-
-	fixup_tlbie_va_range_lpid(addr - page_size, pid, lpid, ap);
-}
-
 static __always_inline void _tlbie_va(unsigned long va, unsigned long pid,
 				      unsigned long psize, unsigned long ric)
 {
@@ -660,18 +553,6 @@ static inline void _tlbie_va_range(unsigned long start, unsigned long end,
 	asm volatile("eieio; tlbsync; ptesync": : :"memory");
 }
 
-static inline void _tlbie_va_range_lpid(unsigned long start, unsigned long end,
-					unsigned long pid, unsigned long lpid,
-					unsigned long page_size,
-					unsigned long psize, bool also_pwc)
-{
-	asm volatile("ptesync" : : : "memory");
-	if (also_pwc)
-		__tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC);
-	__tlbie_va_range_lpid(start, end, pid, lpid, page_size, psize);
-	asm volatile("eieio; tlbsync; ptesync" : : : "memory");
-}
-
 static inline void _tlbiel_va_range_multicast(struct mm_struct *mm,
 				unsigned long start, unsigned long end,
 				unsigned long pid, unsigned long page_size,
@@ -820,7 +701,7 @@ void exit_lazy_flush_tlb(struct mm_struct *mm, bool always_flush)
 	 * that's what the caller expects.
 	 */
 	if (cpumask_test_cpu(cpu, mm_cpumask(mm))) {
-		atomic_dec(&mm->context.active_cpus);
+		dec_mm_active_cpus(mm);
 		cpumask_clear_cpu(cpu, mm_cpumask(mm));
 		always_flush = true;
 	}
@@ -1316,7 +1197,35 @@ void radix__tlb_flush(struct mmu_gather *tlb)
 	 * See the comment for radix in arch_exit_mmap().
 	 */
 	if (tlb->fullmm) {
-		__flush_all_mm(mm, true);
+		if (IS_ENABLED(CONFIG_MMU_LAZY_TLB_SHOOTDOWN)) {
+			/*
+			 * Shootdown based lazy tlb mm refcounting means we
+			 * have to IPI everyone in the mm_cpumask anyway soon
+			 * when the mm goes away, so might as well do it as
+			 * part of the final flush now.
+			 *
+			 * If lazy shootdown was improved to reduce IPIs (e.g.,
+			 * by batching), then it may end up being better to use
+			 * tlbies here instead.
+			 */
+			preempt_disable();
+
+			smp_mb(); /* see radix__flush_tlb_mm */
+			exit_flush_lazy_tlbs(mm);
+			_tlbiel_pid(mm->context.id, RIC_FLUSH_ALL);
+
+			/*
+			 * It should not be possible to have coprocessors still
+			 * attached here.
+			 */
+			if (WARN_ON_ONCE(atomic_read(&mm->context.copros) > 0))
+				__flush_all_mm(mm, true);
+
+			preempt_enable();
+		} else {
+			__flush_all_mm(mm, true);
+		}
+
 	} else if ( (psize = radix_get_mmu_psize(page_size)) == -1) {
 		if (!tlb->freed_tables)
 			radix__flush_tlb_mm(mm);
@@ -1497,6 +1406,127 @@ void radix__flush_tlb_all(void)
 }
 
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+static __always_inline void __tlbie_pid_lpid(unsigned long pid,
+					     unsigned long lpid,
+					     unsigned long ric)
+{
+	unsigned long rb, rs, prs, r;
+
+	rb = PPC_BIT(53); /* IS = 1 */
+	rs = (pid << PPC_BITLSHIFT(31)) | (lpid & ~(PPC_BITMASK(0, 31)));
+	prs = 1; /* process scoped */
+	r = 1;   /* radix format */
+
+	asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
+		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
+	trace_tlbie(0, 0, rb, rs, ric, prs, r);
+}
+
+static __always_inline void __tlbie_va_lpid(unsigned long va, unsigned long pid,
+					    unsigned long lpid,
+					    unsigned long ap, unsigned long ric)
+{
+	unsigned long rb, rs, prs, r;
+
+	rb = va & ~(PPC_BITMASK(52, 63));
+	rb |= ap << PPC_BITLSHIFT(58);
+	rs = (pid << PPC_BITLSHIFT(31)) | (lpid & ~(PPC_BITMASK(0, 31)));
+	prs = 1; /* process scoped */
+	r = 1;   /* radix format */
+
+	asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
+		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
+	trace_tlbie(0, 0, rb, rs, ric, prs, r);
+}
+
+static inline void fixup_tlbie_pid_lpid(unsigned long pid, unsigned long lpid)
+{
+	/*
+	 * We can use any address for the invalidation, pick one which is
+	 * probably unused as an optimisation.
+	 */
+	unsigned long va = ((1UL << 52) - 1);
+
+	if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
+		asm volatile("ptesync" : : : "memory");
+		__tlbie_pid_lpid(0, lpid, RIC_FLUSH_TLB);
+	}
+
+	if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
+		asm volatile("ptesync" : : : "memory");
+		__tlbie_va_lpid(va, pid, lpid, mmu_get_ap(MMU_PAGE_64K),
+				RIC_FLUSH_TLB);
+	}
+}
+
+static inline void _tlbie_pid_lpid(unsigned long pid, unsigned long lpid,
+				   unsigned long ric)
+{
+	asm volatile("ptesync" : : : "memory");
+
+	/*
+	 * Workaround the fact that the "ric" argument to __tlbie_pid
+	 * must be a compile-time contraint to match the "i" constraint
+	 * in the asm statement.
+	 */
+	switch (ric) {
+	case RIC_FLUSH_TLB:
+		__tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB);
+		fixup_tlbie_pid_lpid(pid, lpid);
+		break;
+	case RIC_FLUSH_PWC:
+		__tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC);
+		break;
+	case RIC_FLUSH_ALL:
+	default:
+		__tlbie_pid_lpid(pid, lpid, RIC_FLUSH_ALL);
+		fixup_tlbie_pid_lpid(pid, lpid);
+	}
+	asm volatile("eieio; tlbsync; ptesync" : : : "memory");
+}
+
+static inline void fixup_tlbie_va_range_lpid(unsigned long va,
+					     unsigned long pid,
+					     unsigned long lpid,
+					     unsigned long ap)
+{
+	if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
+		asm volatile("ptesync" : : : "memory");
+		__tlbie_pid_lpid(0, lpid, RIC_FLUSH_TLB);
+	}
+
+	if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
+		asm volatile("ptesync" : : : "memory");
+		__tlbie_va_lpid(va, pid, lpid, ap, RIC_FLUSH_TLB);
+	}
+}
+
+static inline void __tlbie_va_range_lpid(unsigned long start, unsigned long end,
+					 unsigned long pid, unsigned long lpid,
+					 unsigned long page_size,
+					 unsigned long psize)
+{
+	unsigned long addr;
+	unsigned long ap = mmu_get_ap(psize);
+
+	for (addr = start; addr < end; addr += page_size)
+		__tlbie_va_lpid(addr, pid, lpid, ap, RIC_FLUSH_TLB);
+
+	fixup_tlbie_va_range_lpid(addr - page_size, pid, lpid, ap);
+}
+
+static inline void _tlbie_va_range_lpid(unsigned long start, unsigned long end,
+					unsigned long pid, unsigned long lpid,
+					unsigned long page_size,
+					unsigned long psize, bool also_pwc)
+{
+	asm volatile("ptesync" : : : "memory");
+	if (also_pwc)
+		__tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC);
+	__tlbie_va_range_lpid(start, end, pid, lpid, page_size, psize);
+	asm volatile("eieio; tlbsync; ptesync" : : : "memory");
+}
+
 /*
  * Performs process-scoped invalidations for a given LPID
  * as part of H_RPT_INVALIDATE hcall.
diff --git a/arch/powerpc/mm/book3s64/slb.c b/arch/powerpc/mm/book3s64/slb.c
index 6956f63..f2708c8 100644
--- a/arch/powerpc/mm/book3s64/slb.c
+++ b/arch/powerpc/mm/book3s64/slb.c
@@ -13,6 +13,7 @@
 #include <asm/mmu.h>
 #include <asm/mmu_context.h>
 #include <asm/paca.h>
+#include <asm/lppaca.h>
 #include <asm/ppc-opcode.h>
 #include <asm/cputable.h>
 #include <asm/cacheflush.h>
diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c
index d4cc374..d8adc45 100644
--- a/arch/powerpc/mm/init_32.c
+++ b/arch/powerpc/mm/init_32.c
@@ -126,6 +126,8 @@ void __init MMU_init(void)
 
 	setup_kup();
 
+	update_mmu_feature_fixups(MMU_FTR_KUAP);
+
 	/* Shortly after that, the entire linear mapping will be available */
 	memblock_set_current_limit(lowmem_end_addr);
 }
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index e0208cb..d96bbc0 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -40,6 +40,7 @@
 #include <linux/of_fdt.h>
 #include <linux/libfdt.h>
 #include <linux/memremap.h>
+#include <linux/memory.h>
 
 #include <asm/pgalloc.h>
 #include <asm/page.h>
@@ -493,6 +494,130 @@ static int __init dt_scan_mmu_pid_width(unsigned long node,
 	return 1;
 }
 
+/*
+ * Outside hotplug the kernel uses this value to map the kernel direct map
+ * with radix. To be compatible with older kernels, let's keep this value
+ * as 16M which is also SECTION_SIZE with SPARSEMEM. We can ideally map
+ * things with 1GB size in the case where we don't support hotplug.
+ */
+#ifndef CONFIG_MEMORY_HOTPLUG
+#define DEFAULT_MEMORY_BLOCK_SIZE	SZ_16M
+#else
+#define DEFAULT_MEMORY_BLOCK_SIZE	MIN_MEMORY_BLOCK_SIZE
+#endif
+
+static void update_memory_block_size(unsigned long *block_size, unsigned long mem_size)
+{
+	unsigned long min_memory_block_size = DEFAULT_MEMORY_BLOCK_SIZE;
+
+	for (; *block_size > min_memory_block_size; *block_size >>= 2) {
+		if ((mem_size & *block_size) == 0)
+			break;
+	}
+}
+
+static int __init probe_memory_block_size(unsigned long node, const char *uname, int
+					  depth, void *data)
+{
+	const char *type;
+	unsigned long *block_size = (unsigned long *)data;
+	const __be32 *reg, *endp;
+	int l;
+
+	if (depth != 1)
+		return 0;
+	/*
+	 * If we have dynamic-reconfiguration-memory node, use the
+	 * lmb value.
+	 */
+	if (strcmp(uname, "ibm,dynamic-reconfiguration-memory") == 0) {
+
+		const __be32 *prop;
+
+		prop = of_get_flat_dt_prop(node, "ibm,lmb-size", &l);
+
+		if (!prop || l < dt_root_size_cells * sizeof(__be32))
+			/*
+			 * Nothing in the device tree
+			 */
+			*block_size = DEFAULT_MEMORY_BLOCK_SIZE;
+		else
+			*block_size = of_read_number(prop, dt_root_size_cells);
+		/*
+		 * We have found the final value. Don't probe further.
+		 */
+		return 1;
+	}
+	/*
+	 * Find all the device tree nodes of memory type and make sure
+	 * the area can be mapped using the memory block size value
+	 * we end up using. We start with 1G value and keep reducing
+	 * it such that we can map the entire area using memory_block_size.
+	 * This will be used on powernv and older pseries that don't
+	 * have ibm,lmb-size node.
+	 * For ex: with P5 we can end up with
+	 * memory@0 -> 128MB
+	 * memory@128M -> 64M
+	 * This will end up using 64MB  memory block size value.
+	 */
+	type = of_get_flat_dt_prop(node, "device_type", NULL);
+	if (type == NULL || strcmp(type, "memory") != 0)
+		return 0;
+
+	reg = of_get_flat_dt_prop(node, "linux,usable-memory", &l);
+	if (!reg)
+		reg = of_get_flat_dt_prop(node, "reg", &l);
+	if (!reg)
+		return 0;
+
+	endp = reg + (l / sizeof(__be32));
+	while ((endp - reg) >= (dt_root_addr_cells + dt_root_size_cells)) {
+		const char *compatible;
+		u64 size;
+
+		dt_mem_next_cell(dt_root_addr_cells, &reg);
+		size = dt_mem_next_cell(dt_root_size_cells, &reg);
+
+		if (size) {
+			update_memory_block_size(block_size, size);
+			continue;
+		}
+		/*
+		 * ibm,coherent-device-memory with linux,usable-memory = 0
+		 * Force 256MiB block size. Work around for GPUs on P9 PowerNV
+		 * linux,usable-memory == 0 implies driver managed memory and
+		 * we can't use large memory block size due to hotplug/unplug
+		 * limitations.
+		 */
+		compatible = of_get_flat_dt_prop(node, "compatible", NULL);
+		if (compatible && !strcmp(compatible, "ibm,coherent-device-memory")) {
+			if (*block_size > SZ_256M)
+				*block_size = SZ_256M;
+			/*
+			 * We keep 256M as the upper limit with GPU present.
+			 */
+			return 0;
+		}
+	}
+	/* continue looking for other memory device types */
+	return 0;
+}
+
+/*
+ * start with 1G memory block size. Early init will
+ * fix this with correct value.
+ */
+unsigned long memory_block_size __ro_after_init = 1UL << 30;
+static void __init early_init_memory_block_size(void)
+{
+	/*
+	 * We need to do memory_block_size probe early so that
+	 * radix__early_init_mmu() can use this as limit for
+	 * mapping page size.
+	 */
+	of_scan_flat_dt(probe_memory_block_size, &memory_block_size);
+}
+
 void __init mmu_early_init_devtree(void)
 {
 	bool hvmode = !!(mfmsr() & MSR_HV);
@@ -526,6 +651,8 @@ void __init mmu_early_init_devtree(void)
 	if (!hvmode)
 		early_check_vec5();
 
+	early_init_memory_block_size();
+
 	if (early_radix_enabled()) {
 		radix__early_init_devtree();
 
diff --git a/arch/powerpc/mm/mmu_context.c b/arch/powerpc/mm/mmu_context.c
index 1fb9c99..b24c190 100644
--- a/arch/powerpc/mm/mmu_context.c
+++ b/arch/powerpc/mm/mmu_context.c
@@ -43,11 +43,13 @@ static inline void switch_mm_pgdir(struct task_struct *tsk,
 void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 			struct task_struct *tsk)
 {
+	int cpu = smp_processor_id();
 	bool new_on_cpu = false;
 
 	/* Mark this context has been used on the new CPU */
-	if (!cpumask_test_cpu(smp_processor_id(), mm_cpumask(next))) {
-		cpumask_set_cpu(smp_processor_id(), mm_cpumask(next));
+	if (!cpumask_test_cpu(cpu, mm_cpumask(next))) {
+		VM_WARN_ON_ONCE(next == &init_mm);
+		cpumask_set_cpu(cpu, mm_cpumask(next));
 		inc_mm_active_cpus(next);
 
 		/*
@@ -100,6 +102,8 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 	 * sub architectures. Out of line for now
 	 */
 	switch_mmu_context(prev, next, tsk);
+
+	VM_WARN_ON_ONCE(!cpumask_test_cpu(cpu, mm_cpumask(prev)));
 }
 
 #ifndef CONFIG_PPC_BOOK3S_64
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h
index c6dccb4..7f9ff064 100644
--- a/arch/powerpc/mm/mmu_decl.h
+++ b/arch/powerpc/mm/mmu_decl.h
@@ -110,6 +110,7 @@ extern void MMU_init_hw(void);
 void MMU_init_hw_patch(void);
 unsigned long mmu_mapin_ram(unsigned long base, unsigned long top);
 #endif
+void mmu_init_secondary(int cpu);
 
 #ifdef CONFIG_PPC_E500
 extern unsigned long map_mem_in_cams(unsigned long ram, int max_cam_idx,
diff --git a/arch/powerpc/mm/nohash/kup.c b/arch/powerpc/mm/nohash/kup.c
index 552becf..e1f7de2 100644
--- a/arch/powerpc/mm/nohash/kup.c
+++ b/arch/powerpc/mm/nohash/kup.c
@@ -5,7 +5,6 @@
 
 #include <linux/export.h>
 #include <linux/init.h>
-#include <linux/jump_label.h>
 #include <linux/printk.h>
 #include <linux/smp.h>
 
@@ -13,21 +12,18 @@
 #include <asm/smp.h>
 
 #ifdef CONFIG_PPC_KUAP
-struct static_key_false disable_kuap_key;
-EXPORT_SYMBOL(disable_kuap_key);
-
 void setup_kuap(bool disabled)
 {
 	if (disabled) {
 		if (IS_ENABLED(CONFIG_40x))
 			disable_kuep = true;
 		if (smp_processor_id() == boot_cpuid)
-			static_branch_enable(&disable_kuap_key);
+			cur_cpu_spec->mmu_features &= ~MMU_FTR_KUAP;
 		return;
 	}
 
 	pr_info("Activating Kernel Userspace Access Protection\n");
 
-	__prevent_user_access(KUAP_READ_WRITE);
+	prevent_user_access(KUAP_READ_WRITE);
 }
 #endif
diff --git a/arch/powerpc/mm/nohash/tlb.c b/arch/powerpc/mm/nohash/tlb.c
index a903b30..5ffa0af 100644
--- a/arch/powerpc/mm/nohash/tlb.c
+++ b/arch/powerpc/mm/nohash/tlb.c
@@ -318,17 +318,6 @@ EXPORT_SYMBOL(flush_tlb_page);
 
 #endif /* CONFIG_SMP */
 
-#ifdef CONFIG_PPC_47x
-void __init early_init_mmu_47x(void)
-{
-#ifdef CONFIG_SMP
-	unsigned long root = of_get_flat_dt_root();
-	if (of_get_flat_dt_prop(root, "cooperative-partition", NULL))
-		mmu_clear_feature(MMU_FTR_USE_TLBIVAX_BCAST);
-#endif /* CONFIG_SMP */
-}
-#endif /* CONFIG_PPC_47x */
-
 /*
  * Flush kernel TLB entries in the given range
  */
@@ -746,8 +735,10 @@ void setup_initial_memory_limit(phys_addr_t first_memblock_base,
 #else /* ! CONFIG_PPC64 */
 void __init early_init_mmu(void)
 {
-#ifdef CONFIG_PPC_47x
-	early_init_mmu_47x();
-#endif
+	unsigned long root = of_get_flat_dt_root();
+
+	if (IS_ENABLED(CONFIG_PPC_47x) && IS_ENABLED(CONFIG_SMP) &&
+	    of_get_flat_dt_prop(root, "cooperative-partition", NULL))
+		mmu_clear_feature(MMU_FTR_USE_TLBIVAX_BCAST);
 }
 #endif /* CONFIG_PPC64 */
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 9f73d08..f6c4ace 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -34,6 +34,7 @@
 #include <asm/hvcall.h>
 #include <asm/setup.h>
 #include <asm/vdso.h>
+#include <asm/vphn.h>
 #include <asm/drmem.h>
 
 static int numa_enabled = 1;
diff --git a/arch/powerpc/perf/core-fsl-emb.c b/arch/powerpc/perf/core-fsl-emb.c
index ee721f4..1a53ab0 100644
--- a/arch/powerpc/perf/core-fsl-emb.c
+++ b/arch/powerpc/perf/core-fsl-emb.c
@@ -645,7 +645,6 @@ static void perf_event_interrupt(struct pt_regs *regs)
 	struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
 	struct perf_event *event;
 	unsigned long val;
-	int found = 0;
 
 	for (i = 0; i < ppmu->n_counter; ++i) {
 		event = cpuhw->event[i];
@@ -654,7 +653,6 @@ static void perf_event_interrupt(struct pt_regs *regs)
 		if ((int)val < 0) {
 			if (event) {
 				/* event has overflowed */
-				found = 1;
 				record_and_restart(event, val, regs);
 			} else {
 				/*
@@ -672,11 +670,13 @@ static void perf_event_interrupt(struct pt_regs *regs)
 	isync();
 }
 
-void hw_perf_event_setup(int cpu)
+static int fsl_emb_pmu_prepare_cpu(unsigned int cpu)
 {
 	struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu);
 
 	memset(cpuhw, 0, sizeof(*cpuhw));
+
+	return 0;
 }
 
 int register_fsl_emb_pmu(struct fsl_emb_pmu *pmu)
@@ -689,6 +689,8 @@ int register_fsl_emb_pmu(struct fsl_emb_pmu *pmu)
 		pmu->name);
 
 	perf_pmu_register(&fsl_emb_pmu, "cpu", PERF_TYPE_RAW);
+	cpuhp_setup_state(CPUHP_PERF_POWER, "perf/powerpc:prepare",
+			  fsl_emb_pmu_prepare_cpu, NULL);
 
 	return 0;
 }
diff --git a/arch/powerpc/perf/hv-gpci.c b/arch/powerpc/perf/hv-gpci.c
index 7ff8ff3..39dbe6b 100644
--- a/arch/powerpc/perf/hv-gpci.c
+++ b/arch/powerpc/perf/hv-gpci.c
@@ -102,6 +102,511 @@ static ssize_t cpumask_show(struct device *dev,
 	return cpumap_print_to_pagebuf(true, buf, &hv_gpci_cpumask);
 }
 
+/* Interface attribute array index to store system information */
+#define INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR	6
+#define INTERFACE_PROCESSOR_CONFIG_ATTR		7
+#define INTERFACE_AFFINITY_DOMAIN_VIA_VP_ATTR	8
+#define INTERFACE_AFFINITY_DOMAIN_VIA_DOM_ATTR	9
+#define INTERFACE_AFFINITY_DOMAIN_VIA_PAR_ATTR	10
+#define INTERFACE_NULL_ATTR			11
+
+/* Counter request value to retrieve system information */
+enum {
+	PROCESSOR_BUS_TOPOLOGY,
+	PROCESSOR_CONFIG,
+	AFFINITY_DOMAIN_VIA_VP, /* affinity domain via virtual processor */
+	AFFINITY_DOMAIN_VIA_DOM, /* affinity domain via domain */
+	AFFINITY_DOMAIN_VIA_PAR, /* affinity domain via partition */
+};
+
+static int sysinfo_counter_request[] = {
+	[PROCESSOR_BUS_TOPOLOGY] = 0xD0,
+	[PROCESSOR_CONFIG] = 0x90,
+	[AFFINITY_DOMAIN_VIA_VP] = 0xA0,
+	[AFFINITY_DOMAIN_VIA_DOM] = 0xB0,
+	[AFFINITY_DOMAIN_VIA_PAR] = 0xB1,
+};
+
+static DEFINE_PER_CPU(char, hv_gpci_reqb[HGPCI_REQ_BUFFER_SIZE]) __aligned(sizeof(uint64_t));
+
+static unsigned long systeminfo_gpci_request(u32 req, u32 starting_index,
+			u16 secondary_index, char *buf,
+			size_t *n, struct hv_gpci_request_buffer *arg)
+{
+	unsigned long ret;
+	size_t i, j;
+
+	arg->params.counter_request = cpu_to_be32(req);
+	arg->params.starting_index = cpu_to_be32(starting_index);
+	arg->params.secondary_index = cpu_to_be16(secondary_index);
+
+	ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO,
+			virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE);
+
+	/*
+	 * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL',
+	 * which means that the current buffer size cannot accommodate
+	 * all the information and a partial buffer returned.
+	 * hcall fails incase of ret value other than H_SUCCESS or H_PARAMETER.
+	 *
+	 * ret value as H_AUTHORITY implies that partition is not permitted to retrieve
+	 * performance information, and required to set
+	 * "Enable Performance Information Collection" option.
+	 */
+	if (ret == H_AUTHORITY)
+		return -EPERM;
+
+	/*
+	 * hcall can fail with other possible ret value like H_PRIVILEGE/H_HARDWARE
+	 * because of invalid buffer-length/address or due to some hardware
+	 * error.
+	 */
+	if (ret && (ret != H_PARAMETER))
+		return -EIO;
+
+	/*
+	 * hcall H_GET_PERF_COUNTER_INFO populates the 'returned_values'
+	 * to show the total number of counter_value array elements
+	 * returned via hcall.
+	 * hcall also populates 'cv_element_size' corresponds to individual
+	 * counter_value array element size. Below loop go through all
+	 * counter_value array elements as per their size and add it to
+	 * the output buffer.
+	 */
+	for (i = 0; i < be16_to_cpu(arg->params.returned_values); i++) {
+		j = i * be16_to_cpu(arg->params.cv_element_size);
+
+		for (; j < (i + 1) * be16_to_cpu(arg->params.cv_element_size); j++)
+			*n += sprintf(buf + *n,  "%02x", (u8)arg->bytes[j]);
+		*n += sprintf(buf + *n,  "\n");
+	}
+
+	if (*n >= PAGE_SIZE) {
+		pr_info("System information exceeds PAGE_SIZE\n");
+		return -EFBIG;
+	}
+
+	return ret;
+}
+
+static ssize_t processor_bus_topology_show(struct device *dev, struct device_attribute *attr,
+				char *buf)
+{
+	struct hv_gpci_request_buffer *arg;
+	unsigned long ret;
+	size_t n = 0;
+
+	arg = (void *)get_cpu_var(hv_gpci_reqb);
+	memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
+
+	/*
+	 * Pass the counter request value 0xD0 corresponds to request
+	 * type 'Processor_bus_topology', to retrieve
+	 * the system topology information.
+	 * starting_index value implies the starting hardware
+	 * chip id.
+	 */
+	ret = systeminfo_gpci_request(sysinfo_counter_request[PROCESSOR_BUS_TOPOLOGY],
+			0, 0, buf, &n, arg);
+
+	if (!ret)
+		return n;
+
+	if (ret != H_PARAMETER)
+		goto out;
+
+	/*
+	 * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL', which
+	 * implies that buffer can't accommodate all information, and a partial buffer
+	 * returned. To handle that, we need to make subsequent requests
+	 * with next starting index to retrieve additional (missing) data.
+	 * Below loop do subsequent hcalls with next starting index and add it
+	 * to buffer util we get all the information.
+	 */
+	while (ret == H_PARAMETER) {
+		int returned_values = be16_to_cpu(arg->params.returned_values);
+		int elementsize = be16_to_cpu(arg->params.cv_element_size);
+		int last_element = (returned_values - 1) * elementsize;
+
+		/*
+		 * Since the starting index value is part of counter_value
+		 * buffer elements, use the starting index value in the last
+		 * element and add 1 to make subsequent hcalls.
+		 */
+		u32 starting_index = arg->bytes[last_element + 3] +
+				(arg->bytes[last_element + 2] << 8) +
+				(arg->bytes[last_element + 1] << 16) +
+				(arg->bytes[last_element] << 24) + 1;
+
+		memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
+
+		ret = systeminfo_gpci_request(sysinfo_counter_request[PROCESSOR_BUS_TOPOLOGY],
+				starting_index, 0, buf, &n, arg);
+
+		if (!ret)
+			return n;
+
+		if (ret != H_PARAMETER)
+			goto out;
+	}
+
+	return n;
+
+out:
+	put_cpu_var(hv_gpci_reqb);
+	return ret;
+}
+
+static ssize_t processor_config_show(struct device *dev, struct device_attribute *attr,
+					char *buf)
+{
+	struct hv_gpci_request_buffer *arg;
+	unsigned long ret;
+	size_t n = 0;
+
+	arg = (void *)get_cpu_var(hv_gpci_reqb);
+	memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
+
+	/*
+	 * Pass the counter request value 0x90 corresponds to request
+	 * type 'Processor_config', to retrieve
+	 * the system processor information.
+	 * starting_index value implies the starting hardware
+	 * processor index.
+	 */
+	ret = systeminfo_gpci_request(sysinfo_counter_request[PROCESSOR_CONFIG],
+			0, 0, buf, &n, arg);
+
+	if (!ret)
+		return n;
+
+	if (ret != H_PARAMETER)
+		goto out;
+
+	/*
+	 * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL', which
+	 * implies that buffer can't accommodate all information, and a partial buffer
+	 * returned. To handle that, we need to take subsequent requests
+	 * with next starting index to retrieve additional (missing) data.
+	 * Below loop do subsequent hcalls with next starting index and add it
+	 * to buffer util we get all the information.
+	 */
+	while (ret == H_PARAMETER) {
+		int returned_values = be16_to_cpu(arg->params.returned_values);
+		int elementsize = be16_to_cpu(arg->params.cv_element_size);
+		int last_element = (returned_values - 1) * elementsize;
+
+		/*
+		 * Since the starting index is part of counter_value
+		 * buffer elements, use the starting index value in the last
+		 * element and add 1 to subsequent hcalls.
+		 */
+		u32 starting_index = arg->bytes[last_element + 3] +
+				(arg->bytes[last_element + 2] << 8) +
+				(arg->bytes[last_element + 1] << 16) +
+				(arg->bytes[last_element] << 24) + 1;
+
+		memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
+
+		ret = systeminfo_gpci_request(sysinfo_counter_request[PROCESSOR_CONFIG],
+				starting_index, 0, buf, &n, arg);
+
+		if (!ret)
+			return n;
+
+		if (ret != H_PARAMETER)
+			goto out;
+	}
+
+	return n;
+
+out:
+	put_cpu_var(hv_gpci_reqb);
+	return ret;
+}
+
+static ssize_t affinity_domain_via_virtual_processor_show(struct device *dev,
+			struct device_attribute *attr, char *buf)
+{
+	struct hv_gpci_request_buffer *arg;
+	unsigned long ret;
+	size_t n = 0;
+
+	arg = (void *)get_cpu_var(hv_gpci_reqb);
+	memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
+
+	/*
+	 * Pass the counter request 0xA0 corresponds to request
+	 * type 'Affinity_domain_information_by_virutal_processor',
+	 * to retrieve the system affinity domain information.
+	 * starting_index value refers to the starting hardware
+	 * processor index.
+	 */
+	ret = systeminfo_gpci_request(sysinfo_counter_request[AFFINITY_DOMAIN_VIA_VP],
+			0, 0, buf, &n, arg);
+
+	if (!ret)
+		return n;
+
+	if (ret != H_PARAMETER)
+		goto out;
+
+	/*
+	 * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL', which
+	 * implies that buffer can't accommodate all information, and a partial buffer
+	 * returned. To handle that, we need to take subsequent requests
+	 * with next secondary index to retrieve additional (missing) data.
+	 * Below loop do subsequent hcalls with next secondary index and add it
+	 * to buffer util we get all the information.
+	 */
+	while (ret == H_PARAMETER) {
+		int returned_values = be16_to_cpu(arg->params.returned_values);
+		int elementsize = be16_to_cpu(arg->params.cv_element_size);
+		int last_element = (returned_values - 1) * elementsize;
+
+		/*
+		 * Since the starting index and secondary index type is part of the
+		 * counter_value buffer elements, use the starting index value in the
+		 * last array element as subsequent starting index, and use secondary index
+		 * value in the last array element plus 1 as subsequent secondary index.
+		 * For counter request '0xA0', starting index points to partition id
+		 * and secondary index points to corresponding virtual processor index.
+		 */
+		u32 starting_index = arg->bytes[last_element + 1] + (arg->bytes[last_element] << 8);
+		u16 secondary_index = arg->bytes[last_element + 3] +
+				(arg->bytes[last_element + 2] << 8) + 1;
+
+		memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
+
+		ret = systeminfo_gpci_request(sysinfo_counter_request[AFFINITY_DOMAIN_VIA_VP],
+				starting_index, secondary_index, buf, &n, arg);
+
+		if (!ret)
+			return n;
+
+		if (ret != H_PARAMETER)
+			goto out;
+	}
+
+	return n;
+
+out:
+	put_cpu_var(hv_gpci_reqb);
+	return ret;
+}
+
+static ssize_t affinity_domain_via_domain_show(struct device *dev, struct device_attribute *attr,
+						char *buf)
+{
+	struct hv_gpci_request_buffer *arg;
+	unsigned long ret;
+	size_t n = 0;
+
+	arg = (void *)get_cpu_var(hv_gpci_reqb);
+	memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
+
+	/*
+	 * Pass the counter request 0xB0 corresponds to request
+	 * type 'Affinity_domain_information_by_domain',
+	 * to retrieve the system affinity domain information.
+	 * starting_index value refers to the starting hardware
+	 * processor index.
+	 */
+	ret = systeminfo_gpci_request(sysinfo_counter_request[AFFINITY_DOMAIN_VIA_DOM],
+			0, 0, buf, &n, arg);
+
+	if (!ret)
+		return n;
+
+	if (ret != H_PARAMETER)
+		goto out;
+
+	/*
+	 * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL', which
+	 * implies that buffer can't accommodate all information, and a partial buffer
+	 * returned. To handle that, we need to take subsequent requests
+	 * with next starting index to retrieve additional (missing) data.
+	 * Below loop do subsequent hcalls with next starting index and add it
+	 * to buffer util we get all the information.
+	 */
+	while (ret == H_PARAMETER) {
+		int returned_values = be16_to_cpu(arg->params.returned_values);
+		int elementsize = be16_to_cpu(arg->params.cv_element_size);
+		int last_element = (returned_values - 1) * elementsize;
+
+		/*
+		 * Since the starting index value is part of counter_value
+		 * buffer elements, use the starting index value in the last
+		 * element and add 1 to make subsequent hcalls.
+		 */
+		u32 starting_index = arg->bytes[last_element + 1] +
+			(arg->bytes[last_element] << 8) + 1;
+
+		memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
+
+		ret = systeminfo_gpci_request(sysinfo_counter_request[AFFINITY_DOMAIN_VIA_DOM],
+					starting_index, 0, buf, &n, arg);
+
+		if (!ret)
+			return n;
+
+		if (ret != H_PARAMETER)
+			goto out;
+	}
+
+	return n;
+
+out:
+	put_cpu_var(hv_gpci_reqb);
+	return ret;
+}
+
+static void affinity_domain_via_partition_result_parse(int returned_values,
+			int element_size, char *buf, size_t *last_element,
+			size_t *n, struct hv_gpci_request_buffer *arg)
+{
+	size_t i = 0, j = 0;
+	size_t k, l, m;
+	uint16_t total_affinity_domain_ele, size_of_each_affinity_domain_ele;
+
+	/*
+	 * hcall H_GET_PERF_COUNTER_INFO populates the 'returned_values'
+	 * to show the total number of counter_value array elements
+	 * returned via hcall.
+	 * Unlike other request types, the data structure returned by this
+	 * request is variable-size. For this counter request type,
+	 * hcall populates 'cv_element_size' corresponds to minimum size of
+	 * the structure returned i.e; the size of the structure with no domain
+	 * information. Below loop go through all counter_value array
+	 * to determine the number and size of each domain array element and
+	 * add it to the output buffer.
+	 */
+	while (i < returned_values) {
+		k = j;
+		for (; k < j + element_size; k++)
+			*n += sprintf(buf + *n,  "%02x", (u8)arg->bytes[k]);
+		*n += sprintf(buf + *n,  "\n");
+
+		total_affinity_domain_ele = (u8)arg->bytes[k - 2] << 8 | (u8)arg->bytes[k - 3];
+		size_of_each_affinity_domain_ele = (u8)arg->bytes[k] << 8 | (u8)arg->bytes[k - 1];
+
+		for (l = 0; l < total_affinity_domain_ele; l++) {
+			for (m = 0; m < size_of_each_affinity_domain_ele; m++) {
+				*n += sprintf(buf + *n,  "%02x", (u8)arg->bytes[k]);
+				k++;
+			}
+			*n += sprintf(buf + *n,  "\n");
+		}
+
+		*n += sprintf(buf + *n,  "\n");
+		i++;
+		j = k;
+	}
+
+	*last_element = k;
+}
+
+static ssize_t affinity_domain_via_partition_show(struct device *dev, struct device_attribute *attr,
+							char *buf)
+{
+	struct hv_gpci_request_buffer *arg;
+	unsigned long ret;
+	size_t n = 0;
+	size_t last_element = 0;
+	u32 starting_index;
+
+	arg = (void *)get_cpu_var(hv_gpci_reqb);
+	memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
+
+	/*
+	 * Pass the counter request value 0xB1 corresponds to counter request
+	 * type 'Affinity_domain_information_by_partition',
+	 * to retrieve the system affinity domain by partition information.
+	 * starting_index value refers to the starting hardware
+	 * processor index.
+	 */
+	arg->params.counter_request = cpu_to_be32(sysinfo_counter_request[AFFINITY_DOMAIN_VIA_PAR]);
+	arg->params.starting_index = cpu_to_be32(0);
+
+	ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO,
+			virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE);
+
+	if (!ret)
+		goto parse_result;
+
+	/*
+	 * ret value as 'H_PARAMETER' implies that the current buffer size
+	 * can't accommodate all the information, and a partial buffer
+	 * returned. To handle that, we need to make subsequent requests
+	 * with next starting index to retrieve additional (missing) data.
+	 * Below loop do subsequent hcalls with next starting index and add it
+	 * to buffer util we get all the information.
+	 */
+	while (ret == H_PARAMETER) {
+		affinity_domain_via_partition_result_parse(
+			be16_to_cpu(arg->params.returned_values) - 1,
+			be16_to_cpu(arg->params.cv_element_size), buf,
+			&last_element, &n, arg);
+
+		if (n >= PAGE_SIZE) {
+			put_cpu_var(hv_gpci_reqb);
+			pr_debug("System information exceeds PAGE_SIZE\n");
+			return -EFBIG;
+		}
+
+		/*
+		 * Since the starting index value is part of counter_value
+		 * buffer elements, use the starting_index value in the last
+		 * element and add 1 to make subsequent hcalls.
+		 */
+		starting_index = (u8)arg->bytes[last_element] << 8 |
+				(u8)arg->bytes[last_element + 1];
+
+		memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
+		arg->params.counter_request = cpu_to_be32(
+				sysinfo_counter_request[AFFINITY_DOMAIN_VIA_PAR]);
+		arg->params.starting_index = cpu_to_be32(starting_index);
+
+		ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO,
+				virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE);
+
+		if (ret && (ret != H_PARAMETER))
+			goto out;
+	}
+
+parse_result:
+	affinity_domain_via_partition_result_parse(
+		be16_to_cpu(arg->params.returned_values),
+		be16_to_cpu(arg->params.cv_element_size),
+		buf, &last_element, &n, arg);
+
+	put_cpu_var(hv_gpci_reqb);
+	return n;
+
+out:
+	put_cpu_var(hv_gpci_reqb);
+
+	/*
+	 * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL',
+	 * which means that the current buffer size cannot accommodate
+	 * all the information and a partial buffer returned.
+	 * hcall fails incase of ret value other than H_SUCCESS or H_PARAMETER.
+	 *
+	 * ret value as H_AUTHORITY implies that partition is not permitted to retrieve
+	 * performance information, and required to set
+	 * "Enable Performance Information Collection" option.
+	 */
+	if (ret == H_AUTHORITY)
+		return -EPERM;
+
+	/*
+	 * hcall can fail with other possible ret value like H_PRIVILEGE/H_HARDWARE
+	 * because of invalid buffer-length/address or due to some hardware
+	 * error.
+	 */
+	return -EIO;
+}
+
 static DEVICE_ATTR_RO(kernel_version);
 static DEVICE_ATTR_RO(cpumask);
 
@@ -118,6 +623,31 @@ static struct attribute *interface_attrs[] = {
 	&hv_caps_attr_expanded.attr,
 	&hv_caps_attr_lab.attr,
 	&hv_caps_attr_collect_privileged.attr,
+	/*
+	 * This NULL is a placeholder for the processor_bus_topology
+	 * attribute, set in init function if applicable.
+	 */
+	NULL,
+	/*
+	 * This NULL is a placeholder for the processor_config
+	 * attribute, set in init function if applicable.
+	 */
+	NULL,
+	/*
+	 * This NULL is a placeholder for the affinity_domain_via_virtual_processor
+	 * attribute, set in init function if applicable.
+	 */
+	NULL,
+	/*
+	 * This NULL is a placeholder for the affinity_domain_via_domain
+	 * attribute, set in init function if applicable.
+	 */
+	NULL,
+	/*
+	 * This NULL is a placeholder for the affinity_domain_via_partition
+	 * attribute, set in init function if applicable.
+	 */
+	NULL,
 	NULL,
 };
 
@@ -143,8 +673,6 @@ static const struct attribute_group *attr_groups[] = {
 	NULL,
 };
 
-static DEFINE_PER_CPU(char, hv_gpci_reqb[HGPCI_REQ_BUFFER_SIZE]) __aligned(sizeof(uint64_t));
-
 static unsigned long single_gpci_request(u32 req, u32 starting_index,
 		u16 secondary_index, u8 version_in, u32 offset, u8 length,
 		u64 *value)
@@ -325,6 +853,107 @@ static int hv_gpci_cpu_hotplug_init(void)
 			  ppc_hv_gpci_cpu_offline);
 }
 
+static struct device_attribute *sysinfo_device_attr_create(int
+		sysinfo_interface_group_index, u32 req)
+{
+	struct device_attribute *attr = NULL;
+	unsigned long ret;
+	struct hv_gpci_request_buffer *arg;
+
+	if (sysinfo_interface_group_index < INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR ||
+			sysinfo_interface_group_index >= INTERFACE_NULL_ATTR) {
+		pr_info("Wrong interface group index for system information\n");
+		return NULL;
+	}
+
+	/* Check for given counter request value support */
+	arg = (void *)get_cpu_var(hv_gpci_reqb);
+	memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
+
+	arg->params.counter_request = cpu_to_be32(req);
+
+	ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO,
+			virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE);
+
+	put_cpu_var(hv_gpci_reqb);
+
+	/*
+	 * Add given counter request value attribute in the interface_attrs
+	 * attribute array, only for valid return types.
+	 */
+	if (!ret || ret == H_AUTHORITY || ret == H_PARAMETER) {
+		attr = kzalloc(sizeof(*attr), GFP_KERNEL);
+		if (!attr)
+			return NULL;
+
+		sysfs_attr_init(&attr->attr);
+		attr->attr.mode = 0444;
+
+		switch (sysinfo_interface_group_index) {
+		case INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR:
+			attr->attr.name = "processor_bus_topology";
+			attr->show = processor_bus_topology_show;
+		break;
+		case INTERFACE_PROCESSOR_CONFIG_ATTR:
+			attr->attr.name = "processor_config";
+			attr->show = processor_config_show;
+		break;
+		case INTERFACE_AFFINITY_DOMAIN_VIA_VP_ATTR:
+			attr->attr.name = "affinity_domain_via_virtual_processor";
+			attr->show = affinity_domain_via_virtual_processor_show;
+		break;
+		case INTERFACE_AFFINITY_DOMAIN_VIA_DOM_ATTR:
+			attr->attr.name = "affinity_domain_via_domain";
+			attr->show = affinity_domain_via_domain_show;
+		break;
+		case INTERFACE_AFFINITY_DOMAIN_VIA_PAR_ATTR:
+			attr->attr.name = "affinity_domain_via_partition";
+			attr->show = affinity_domain_via_partition_show;
+		break;
+		}
+	} else
+		pr_devel("hcall failed, with error: 0x%lx\n", ret);
+
+	return attr;
+}
+
+static void add_sysinfo_interface_files(void)
+{
+	int sysfs_count;
+	struct device_attribute *attr[INTERFACE_NULL_ATTR - INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR];
+	int i;
+
+	sysfs_count = INTERFACE_NULL_ATTR - INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR;
+
+	/* Get device attribute for a given counter request value */
+	for (i = 0; i < sysfs_count; i++) {
+		attr[i] = sysinfo_device_attr_create(i + INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR,
+				sysinfo_counter_request[i]);
+
+		if (!attr[i])
+			goto out;
+	}
+
+	/* Add sysinfo interface attributes in the interface_attrs attribute array */
+	for (i = 0; i < sysfs_count; i++)
+		interface_attrs[i + INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR] = &attr[i]->attr;
+
+	return;
+
+out:
+	/*
+	 * The sysinfo interface attributes will be added, only if hcall passed for
+	 * all the counter request values. Free the device attribute array incase
+	 * of any hcall failure.
+	 */
+	if (i > 0) {
+		while (i >= 0) {
+			kfree(attr[i]);
+			i--;
+		}
+	}
+}
+
 static int hv_gpci_init(void)
 {
 	int r;
@@ -388,6 +1017,10 @@ static int hv_gpci_init(void)
 	if (r)
 		return r;
 
+	/* sysinfo interface files are only available for power10 and above platforms */
+	if (PVR_VER(mfspr(SPRN_PVR)) >= PVR_POWER10)
+		add_sysinfo_interface_files();
+
 	return 0;
 }
 
diff --git a/arch/powerpc/platforms/44x/warp.c b/arch/powerpc/platforms/44x/warp.c
index bfeb9bd..bf0188d 100644
--- a/arch/powerpc/platforms/44x/warp.c
+++ b/arch/powerpc/platforms/44x/warp.c
@@ -83,45 +83,8 @@ static int __init warp_post_info(void)
 
 #ifdef CONFIG_SENSORS_AD7414
 
-static LIST_HEAD(dtm_shutdown_list);
 static void __iomem *dtm_fpga;
 
-struct dtm_shutdown {
-	struct list_head list;
-	void (*func)(void *arg);
-	void *arg;
-};
-
-int pika_dtm_register_shutdown(void (*func)(void *arg), void *arg)
-{
-	struct dtm_shutdown *shutdown;
-
-	shutdown = kmalloc(sizeof(struct dtm_shutdown), GFP_KERNEL);
-	if (shutdown == NULL)
-		return -ENOMEM;
-
-	shutdown->func = func;
-	shutdown->arg = arg;
-
-	list_add(&shutdown->list, &dtm_shutdown_list);
-
-	return 0;
-}
-
-int pika_dtm_unregister_shutdown(void (*func)(void *arg), void *arg)
-{
-	struct dtm_shutdown *shutdown;
-
-	list_for_each_entry(shutdown, &dtm_shutdown_list, list)
-		if (shutdown->func == func && shutdown->arg == arg) {
-			list_del(&shutdown->list);
-			kfree(shutdown);
-			return 0;
-		}
-
-	return -EINVAL;
-}
-
 #define WARP_GREEN_LED	0
 #define WARP_RED_LED	1
 
@@ -153,17 +116,12 @@ static struct platform_device warp_gpio_leds = {
 
 static irqreturn_t temp_isr(int irq, void *context)
 {
-	struct dtm_shutdown *shutdown;
 	int value = 1;
 
 	local_irq_disable();
 
 	gpiod_set_value(warp_gpio_led_pins[WARP_GREEN_LED].gpiod, 0);
 
-	/* Run through the shutdown list. */
-	list_for_each_entry(shutdown, &dtm_shutdown_list, list)
-		shutdown->func(shutdown->arg);
-
 	printk(KERN_EMERG "\n\nCritical Temperature Shutdown\n\n");
 
 	while (1) {
@@ -366,19 +324,6 @@ machine_late_initcall(warp, pika_dtm_start);
 
 #else /* !CONFIG_SENSORS_AD7414 */
 
-int pika_dtm_register_shutdown(void (*func)(void *arg), void *arg)
-{
-	return 0;
-}
-
-int pika_dtm_unregister_shutdown(void (*func)(void *arg), void *arg)
-{
-	return 0;
-}
-
 machine_late_initcall(warp, warp_post_info);
 
 #endif
-
-EXPORT_SYMBOL(pika_dtm_register_shutdown);
-EXPORT_SYMBOL(pika_dtm_unregister_shutdown);
diff --git a/arch/powerpc/platforms/4xx/cpm.c b/arch/powerpc/platforms/4xx/cpm.c
index 182e128..670f8ad 100644
--- a/arch/powerpc/platforms/4xx/cpm.c
+++ b/arch/powerpc/platforms/4xx/cpm.c
@@ -18,7 +18,7 @@
  */
 
 #include <linux/kernel.h>
-#include <linux/of_platform.h>
+#include <linux/of.h>
 #include <linux/sysfs.h>
 #include <linux/cpu.h>
 #include <linux/suspend.h>
diff --git a/arch/powerpc/platforms/4xx/hsta_msi.c b/arch/powerpc/platforms/4xx/hsta_msi.c
index e11b57a..c6bd846 100644
--- a/arch/powerpc/platforms/4xx/hsta_msi.c
+++ b/arch/powerpc/platforms/4xx/hsta_msi.c
@@ -11,7 +11,7 @@
 #include <linux/msi.h>
 #include <linux/of.h>
 #include <linux/of_irq.h>
-#include <linux/of_platform.h>
+#include <linux/platform_device.h>
 #include <linux/pci.h>
 #include <linux/semaphore.h>
 #include <asm/msi_bitmap.h>
diff --git a/arch/powerpc/platforms/4xx/soc.c b/arch/powerpc/platforms/4xx/soc.c
index ac1cd8b..b2d9404 100644
--- a/arch/powerpc/platforms/4xx/soc.c
+++ b/arch/powerpc/platforms/4xx/soc.c
@@ -15,12 +15,13 @@
 #include <linux/errno.h>
 #include <linux/interrupt.h>
 #include <linux/irq.h>
+#include <linux/of.h>
 #include <linux/of_irq.h>
-#include <linux/of_platform.h>
 
 #include <asm/dcr.h>
 #include <asm/dcr-regs.h>
 #include <asm/reg.h>
+#include <asm/ppc4xx.h>
 
 static u32 dcrbase_l2c;
 
diff --git a/arch/powerpc/platforms/4xx/uic.c b/arch/powerpc/platforms/4xx/uic.c
index d667ad0..e3e148b 100644
--- a/arch/powerpc/platforms/4xx/uic.c
+++ b/arch/powerpc/platforms/4xx/uic.c
@@ -24,6 +24,7 @@
 #include <asm/irq.h>
 #include <asm/io.h>
 #include <asm/dcr.h>
+#include <asm/uic.h>
 
 #define NR_UIC_INTS	32
 
diff --git a/arch/powerpc/platforms/512x/mpc5121_ads.c b/arch/powerpc/platforms/512x/mpc5121_ads.c
index 80b25ce..a18f85b 100644
--- a/arch/powerpc/platforms/512x/mpc5121_ads.c
+++ b/arch/powerpc/platforms/512x/mpc5121_ads.c
@@ -10,7 +10,7 @@
 
 #include <linux/kernel.h>
 #include <linux/io.h>
-#include <linux/of_platform.h>
+#include <linux/of.h>
 
 #include <asm/machdep.h>
 #include <asm/ipic.h>
diff --git a/arch/powerpc/platforms/512x/mpc512x.h b/arch/powerpc/platforms/512x/mpc512x.h
index 2f3c60e..d2cb06e 100644
--- a/arch/powerpc/platforms/512x/mpc512x.h
+++ b/arch/powerpc/platforms/512x/mpc512x.h
@@ -13,7 +13,6 @@ extern void __init mpc512x_init(void);
 extern void __init mpc512x_setup_arch(void);
 extern int __init mpc5121_clk_init(void);
 const char *__init mpc512x_select_psc_compat(void);
-const char *__init mpc512x_select_reset_compat(void);
 extern void __noreturn mpc512x_restart(char *cmd);
 
 #endif				/* __MPC512X_H__ */
diff --git a/arch/powerpc/platforms/512x/mpc512x_generic.c b/arch/powerpc/platforms/512x/mpc512x_generic.c
index 97dfaac..0d58ab2 100644
--- a/arch/powerpc/platforms/512x/mpc512x_generic.c
+++ b/arch/powerpc/platforms/512x/mpc512x_generic.c
@@ -9,7 +9,7 @@
  */
 
 #include <linux/kernel.h>
-#include <linux/of_platform.h>
+#include <linux/of.h>
 
 #include <asm/machdep.h>
 #include <asm/ipic.h>
diff --git a/arch/powerpc/platforms/512x/mpc512x_lpbfifo.c b/arch/powerpc/platforms/512x/mpc512x_lpbfifo.c
index c1e9816..4a25b6b 100644
--- a/arch/powerpc/platforms/512x/mpc512x_lpbfifo.c
+++ b/arch/powerpc/platforms/512x/mpc512x_lpbfifo.c
@@ -10,9 +10,9 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_platform.h>
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
+#include <linux/platform_device.h>
 #include <asm/mpc5121.h>
 #include <asm/io.h>
 #include <linux/spinlock.h>
diff --git a/arch/powerpc/platforms/512x/mpc512x_shared.c b/arch/powerpc/platforms/512x/mpc512x_shared.c
index 5ac0ead..8f75e95 100644
--- a/arch/powerpc/platforms/512x/mpc512x_shared.c
+++ b/arch/powerpc/platforms/512x/mpc512x_shared.c
@@ -29,20 +29,6 @@
 
 static struct mpc512x_reset_module __iomem *reset_module_base;
 
-static void __init mpc512x_restart_init(void)
-{
-	struct device_node *np;
-	const char *reset_compat;
-
-	reset_compat = mpc512x_select_reset_compat();
-	np = of_find_compatible_node(NULL, NULL, reset_compat);
-	if (!np)
-		return;
-
-	reset_module_base = of_iomap(np, 0);
-	of_node_put(np);
-}
-
 void __noreturn mpc512x_restart(char *cmd)
 {
 	if (reset_module_base) {
@@ -363,7 +349,7 @@ const char *__init mpc512x_select_psc_compat(void)
 	return NULL;
 }
 
-const char *__init mpc512x_select_reset_compat(void)
+static const char *__init mpc512x_select_reset_compat(void)
 {
 	if (of_machine_is_compatible("fsl,mpc5121"))
 		return "fsl,mpc5121-reset";
@@ -455,6 +441,20 @@ static void __init mpc512x_psc_fifo_init(void)
 	}
 }
 
+static void __init mpc512x_restart_init(void)
+{
+	struct device_node *np;
+	const char *reset_compat;
+
+	reset_compat = mpc512x_select_reset_compat();
+	np = of_find_compatible_node(NULL, NULL, reset_compat);
+	if (!np)
+		return;
+
+	reset_module_base = of_iomap(np, 0);
+	of_node_put(np);
+}
+
 void __init mpc512x_init_early(void)
 {
 	mpc512x_restart_init();
diff --git a/arch/powerpc/platforms/512x/pdm360ng.c b/arch/powerpc/platforms/512x/pdm360ng.c
index 4bdec1c..ce51cfe 100644
--- a/arch/powerpc/platforms/512x/pdm360ng.c
+++ b/arch/powerpc/platforms/512x/pdm360ng.c
@@ -7,11 +7,12 @@
  * PDM360NG board setup
  */
 
+#include <linux/device.h>
 #include <linux/kernel.h>
 #include <linux/io.h>
+#include <linux/of.h>
 #include <linux/of_address.h>
 #include <linux/of_fdt.h>
-#include <linux/of_platform.h>
 
 #include <asm/machdep.h>
 #include <asm/ipic.h>
diff --git a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
index 3fce4e1..5810595 100644
--- a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
+++ b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
@@ -48,7 +48,6 @@
  * the output mode.  This driver does not change the output mode setting.
  */
 
-#include <linux/device.h>
 #include <linux/irq.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
@@ -57,8 +56,8 @@
 #include <linux/of.h>
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
-#include <linux/of_platform.h>
 #include <linux/of_gpio.h>
+#include <linux/platform_device.h>
 #include <linux/kernel.h>
 #include <linux/property.h>
 #include <linux/slab.h>
diff --git a/arch/powerpc/platforms/82xx/Kconfig b/arch/powerpc/platforms/82xx/Kconfig
index 4eb372b..d9f1a2a 100644
--- a/arch/powerpc/platforms/82xx/Kconfig
+++ b/arch/powerpc/platforms/82xx/Kconfig
@@ -7,8 +7,8 @@
 
 config EP8248E
 	bool "Embedded Planet EP8248E (a.k.a. CWH-PPC-8248N-VE)"
-	select 8272
-	select 8260
+	select CPM2
+	select PPC_INDIRECT_PCI if PCI
 	select FSL_SOC
 	select PHYLIB if NETDEVICES
 	select MDIO_BITBANG if PHYLIB
@@ -20,26 +20,10 @@
 
 config MGCOGE
 	bool "Keymile MGCOGE"
-	select 8272
-	select 8260
+	select CPM2
+	select PPC_INDIRECT_PCI if PCI
 	select FSL_SOC
 	help
 	  This enables support for the Keymile MGCOGE board.
 
 endif
-
-config 8260
-	bool
-	depends on PPC_BOOK3S_32
-	select CPM2
-	help
-	  The MPC8260 is a typical embedded CPU made by Freescale.  Selecting
-	  this option means that you wish to build a kernel for a machine with
-	  an 8260 class CPU.
-
-config 8272
-	bool
-	select 8260
-	help
-	  The MPC8272 CPM has a different internal dpram setup than other CPM2
-	  devices
diff --git a/arch/powerpc/platforms/82xx/ep8248e.c b/arch/powerpc/platforms/82xx/ep8248e.c
index 8f1856b..3dc65ce 100644
--- a/arch/powerpc/platforms/82xx/ep8248e.c
+++ b/arch/powerpc/platforms/82xx/ep8248e.c
@@ -13,13 +13,13 @@
 #include <linux/of_mdio.h>
 #include <linux/slab.h>
 #include <linux/of_platform.h>
+#include <linux/platform_device.h>
 
 #include <asm/io.h>
 #include <asm/cpm2.h>
 #include <asm/udbg.h>
 #include <asm/machdep.h>
 #include <asm/time.h>
-#include <asm/mpc8260.h>
 
 #include <sysdev/fsl_soc.h>
 #include <sysdev/cpm2_pic.h>
@@ -140,12 +140,6 @@ static int ep8248e_mdio_probe(struct platform_device *ofdev)
 	return ret;
 }
 
-static int ep8248e_mdio_remove(struct platform_device *ofdev)
-{
-	BUG();
-	return 0;
-}
-
 static const struct of_device_id ep8248e_mdio_match[] = {
 	{
 		.compatible = "fsl,ep8248e-mdio-bitbang",
@@ -157,9 +151,9 @@ static struct platform_driver ep8248e_mdio_driver = {
 	.driver = {
 		.name = "ep8248e-mdio-bitbang",
 		.of_match_table = ep8248e_mdio_match,
+		.suppress_bind_attrs = true,
 	},
 	.probe = ep8248e_mdio_probe,
-	.remove = ep8248e_mdio_remove,
 };
 
 struct cpm_pin {
diff --git a/arch/powerpc/platforms/82xx/km82xx.c b/arch/powerpc/platforms/82xx/km82xx.c
index 51c9bfd..c86da3f 100644
--- a/arch/powerpc/platforms/82xx/km82xx.c
+++ b/arch/powerpc/platforms/82xx/km82xx.c
@@ -19,7 +19,6 @@
 #include <asm/udbg.h>
 #include <asm/machdep.h>
 #include <linux/time.h>
-#include <asm/mpc8260.h>
 
 #include <sysdev/fsl_soc.h>
 #include <sysdev/cpm2_pic.h>
diff --git a/arch/powerpc/platforms/82xx/m82xx_pci.h b/arch/powerpc/platforms/82xx/m82xx_pci.h
deleted file mode 100644
index d07c4d7..0000000
--- a/arch/powerpc/platforms/82xx/m82xx_pci.h
+++ /dev/null
@@ -1,14 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-#ifndef _PPC_KERNEL_M82XX_PCI_H
-#define _PPC_KERNEL_M82XX_PCI_H
-
-/*
- */
-
-#define SIU_INT_IRQ1   ((uint)0x13 + CPM_IRQ_OFFSET)
-
-#ifndef _IO_BASE
-#define _IO_BASE isa_io_base
-#endif
-
-#endif				/* _PPC_KERNEL_M8260_PCI_H */
diff --git a/arch/powerpc/platforms/82xx/pq2.c b/arch/powerpc/platforms/82xx/pq2.c
index 3b5cb39..391d72a2 100644
--- a/arch/powerpc/platforms/82xx/pq2.c
+++ b/arch/powerpc/platforms/82xx/pq2.c
@@ -32,49 +32,3 @@ void __noreturn pq2_restart(char *cmd)
 	panic("Restart failed\n");
 }
 NOKPROBE_SYMBOL(pq2_restart)
-
-#ifdef CONFIG_PCI
-static int pq2_pci_exclude_device(struct pci_controller *hose,
-                                  u_char bus, u8 devfn)
-{
-	if (bus == 0 && PCI_SLOT(devfn) == 0)
-		return PCIBIOS_DEVICE_NOT_FOUND;
-	else
-		return PCIBIOS_SUCCESSFUL;
-}
-
-static void __init pq2_pci_add_bridge(struct device_node *np)
-{
-	struct pci_controller *hose;
-	struct resource r;
-
-	if (of_address_to_resource(np, 0, &r) || r.end - r.start < 0x10b)
-		goto err;
-
-	pci_add_flags(PCI_REASSIGN_ALL_BUS);
-
-	hose = pcibios_alloc_controller(np);
-	if (!hose)
-		return;
-
-	hose->dn = np;
-
-	setup_indirect_pci(hose, r.start + 0x100, r.start + 0x104, 0);
-	pci_process_bridge_OF_ranges(hose, np, 1);
-
-	return;
-
-err:
-	printk(KERN_ERR "No valid PCI reg property in device tree\n");
-}
-
-void __init pq2_init_pci(void)
-{
-	struct device_node *np;
-
-	ppc_md.pci_exclude_device = pq2_pci_exclude_device;
-
-	for_each_compatible_node(np, NULL, "fsl,pq2-pci")
-		pq2_pci_add_bridge(np);
-}
-#endif
diff --git a/arch/powerpc/platforms/83xx/Makefile b/arch/powerpc/platforms/83xx/Makefile
index 6b4013e..6fc3dba 100644
--- a/arch/powerpc/platforms/83xx/Makefile
+++ b/arch/powerpc/platforms/83xx/Makefile
@@ -2,7 +2,7 @@
 #
 # Makefile for the PowerPC 83xx linux kernel.
 #
-obj-y				:= misc.o usb.o
+obj-y				:= misc.o
 obj-$(CONFIG_SUSPEND)		+= suspend.o suspend-asm.o
 obj-$(CONFIG_MCU_MPC8349EMITX)	+= mcu_mpc8349emitx.o
 obj-$(CONFIG_MPC830x_RDB)	+= mpc830x_rdb.o
@@ -13,3 +13,6 @@
 obj-$(CONFIG_MPC837x_RDB)	+= mpc837x_rdb.o
 obj-$(CONFIG_ASP834x)		+= asp834x.o
 obj-$(CONFIG_KMETER1)		+= km83xx.o
+obj-$(CONFIG_PPC_MPC831x)	+= usb_831x.o
+obj-$(CONFIG_PPC_MPC834x)	+= usb_834x.o
+obj-$(CONFIG_PPC_MPC837x)	+= usb_837x.o
diff --git a/arch/powerpc/platforms/83xx/km83xx.c b/arch/powerpc/platforms/83xx/km83xx.c
index 26ddc71..2b5d187 100644
--- a/arch/powerpc/platforms/83xx/km83xx.c
+++ b/arch/powerpc/platforms/83xx/km83xx.c
@@ -20,8 +20,8 @@
 #include <linux/seq_file.h>
 #include <linux/root_dev.h>
 #include <linux/initrd.h>
-#include <linux/of_platform.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
 
 #include <linux/atomic.h>
 #include <linux/time.h>
diff --git a/arch/powerpc/platforms/83xx/mpc832x_rdb.c b/arch/powerpc/platforms/83xx/mpc832x_rdb.c
index 3b4e417..d523ce0 100644
--- a/arch/powerpc/platforms/83xx/mpc832x_rdb.c
+++ b/arch/powerpc/platforms/83xx/mpc832x_rdb.c
@@ -15,8 +15,10 @@
 #include <linux/spi/spi.h>
 #include <linux/spi/mmc_spi.h>
 #include <linux/mmc/host.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
 #include <linux/of_irq.h>
-#include <linux/of_platform.h>
+#include <linux/platform_device.h>
 #include <linux/fsl_devices.h>
 
 #include <asm/time.h>
diff --git a/arch/powerpc/platforms/83xx/mpc83xx.h b/arch/powerpc/platforms/83xx/mpc83xx.h
index aea803b..0b8738a 100644
--- a/arch/powerpc/platforms/83xx/mpc83xx.h
+++ b/arch/powerpc/platforms/83xx/mpc83xx.h
@@ -3,8 +3,6 @@
 #define __MPC83XX_H__
 
 #include <linux/init.h>
-#include <linux/device.h>
-#include <asm/pci-bridge.h>
 
 /* System Clock Control Register */
 #define MPC83XX_SCCR_OFFS          0xA08
diff --git a/arch/powerpc/platforms/83xx/suspend.c b/arch/powerpc/platforms/83xx/suspend.c
index 3fa8979..9833c36 100644
--- a/arch/powerpc/platforms/83xx/suspend.c
+++ b/arch/powerpc/platforms/83xx/suspend.c
@@ -19,7 +19,7 @@
 #include <linux/fsl_devices.h>
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
-#include <linux/of_platform.h>
+#include <linux/platform_device.h>
 #include <linux/export.h>
 
 #include <asm/reg.h>
diff --git a/arch/powerpc/platforms/83xx/usb.c b/arch/powerpc/platforms/83xx/usb.c
deleted file mode 100644
index e2a13a0..0000000
--- a/arch/powerpc/platforms/83xx/usb.c
+++ /dev/null
@@ -1,251 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Freescale 83xx USB SOC setup code
- *
- * Copyright (C) 2007 Freescale Semiconductor, Inc.
- * Author: Li Yang
- */
-
-
-#include <linux/stddef.h>
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/of.h>
-#include <linux/of_address.h>
-
-#include <asm/io.h>
-#include <sysdev/fsl_soc.h>
-
-#include "mpc83xx.h"
-
-
-#ifdef CONFIG_PPC_MPC834x
-int __init mpc834x_usb_cfg(void)
-{
-	unsigned long sccr, sicrl, sicrh;
-	void __iomem *immap;
-	struct device_node *np = NULL;
-	int port0_is_dr = 0, port1_is_dr = 0;
-	const void *prop, *dr_mode;
-
-	immap = ioremap(get_immrbase(), 0x1000);
-	if (!immap)
-		return -ENOMEM;
-
-	/* Read registers */
-	/* Note: DR and MPH must use the same clock setting in SCCR */
-	sccr = in_be32(immap + MPC83XX_SCCR_OFFS) & ~MPC83XX_SCCR_USB_MASK;
-	sicrl = in_be32(immap + MPC83XX_SICRL_OFFS) & ~MPC834X_SICRL_USB_MASK;
-	sicrh = in_be32(immap + MPC83XX_SICRH_OFFS) & ~MPC834X_SICRH_USB_UTMI;
-
-	np = of_find_compatible_node(NULL, NULL, "fsl-usb2-dr");
-	if (np) {
-		sccr |= MPC83XX_SCCR_USB_DRCM_11;  /* 1:3 */
-
-		prop = of_get_property(np, "phy_type", NULL);
-		port1_is_dr = 1;
-		if (prop && (!strcmp(prop, "utmi") ||
-					!strcmp(prop, "utmi_wide"))) {
-			sicrl |= MPC834X_SICRL_USB0 | MPC834X_SICRL_USB1;
-			sicrh |= MPC834X_SICRH_USB_UTMI;
-			port0_is_dr = 1;
-		} else if (prop && !strcmp(prop, "serial")) {
-			dr_mode = of_get_property(np, "dr_mode", NULL);
-			if (dr_mode && !strcmp(dr_mode, "otg")) {
-				sicrl |= MPC834X_SICRL_USB0 | MPC834X_SICRL_USB1;
-				port0_is_dr = 1;
-			} else {
-				sicrl |= MPC834X_SICRL_USB1;
-			}
-		} else if (prop && !strcmp(prop, "ulpi")) {
-			sicrl |= MPC834X_SICRL_USB1;
-		} else {
-			printk(KERN_WARNING "834x USB PHY type not supported\n");
-		}
-		of_node_put(np);
-	}
-	np = of_find_compatible_node(NULL, NULL, "fsl-usb2-mph");
-	if (np) {
-		sccr |= MPC83XX_SCCR_USB_MPHCM_11; /* 1:3 */
-
-		prop = of_get_property(np, "port0", NULL);
-		if (prop) {
-			if (port0_is_dr)
-				printk(KERN_WARNING
-					"834x USB port0 can't be used by both DR and MPH!\n");
-			sicrl &= ~MPC834X_SICRL_USB0;
-		}
-		prop = of_get_property(np, "port1", NULL);
-		if (prop) {
-			if (port1_is_dr)
-				printk(KERN_WARNING
-					"834x USB port1 can't be used by both DR and MPH!\n");
-			sicrl &= ~MPC834X_SICRL_USB1;
-		}
-		of_node_put(np);
-	}
-
-	/* Write back */
-	out_be32(immap + MPC83XX_SCCR_OFFS, sccr);
-	out_be32(immap + MPC83XX_SICRL_OFFS, sicrl);
-	out_be32(immap + MPC83XX_SICRH_OFFS, sicrh);
-
-	iounmap(immap);
-	return 0;
-}
-#endif /* CONFIG_PPC_MPC834x */
-
-#ifdef CONFIG_PPC_MPC831x
-int __init mpc831x_usb_cfg(void)
-{
-	u32 temp;
-	void __iomem *immap, *usb_regs;
-	struct device_node *np = NULL;
-	struct device_node *immr_node = NULL;
-	const void *prop;
-	struct resource res;
-	int ret = 0;
-#ifdef CONFIG_USB_OTG
-	const void *dr_mode;
-#endif
-
-	np = of_find_compatible_node(NULL, NULL, "fsl-usb2-dr");
-	if (!np)
-		return -ENODEV;
-	prop = of_get_property(np, "phy_type", NULL);
-
-	/* Map IMMR space for pin and clock settings */
-	immap = ioremap(get_immrbase(), 0x1000);
-	if (!immap) {
-		of_node_put(np);
-		return -ENOMEM;
-	}
-
-	/* Configure clock */
-	immr_node = of_get_parent(np);
-	if (immr_node && (of_device_is_compatible(immr_node, "fsl,mpc8315-immr") ||
-			of_device_is_compatible(immr_node, "fsl,mpc8308-immr")))
-		clrsetbits_be32(immap + MPC83XX_SCCR_OFFS,
-		                MPC8315_SCCR_USB_MASK,
-		                MPC8315_SCCR_USB_DRCM_01);
-	else
-		clrsetbits_be32(immap + MPC83XX_SCCR_OFFS,
-		                MPC83XX_SCCR_USB_MASK,
-		                MPC83XX_SCCR_USB_DRCM_11);
-
-	/* Configure pin mux for ULPI.  There is no pin mux for UTMI */
-	if (prop && !strcmp(prop, "ulpi")) {
-		if (of_device_is_compatible(immr_node, "fsl,mpc8308-immr")) {
-			clrsetbits_be32(immap + MPC83XX_SICRH_OFFS,
-					MPC8308_SICRH_USB_MASK,
-					MPC8308_SICRH_USB_ULPI);
-		} else if (of_device_is_compatible(immr_node, "fsl,mpc8315-immr")) {
-			clrsetbits_be32(immap + MPC83XX_SICRL_OFFS,
-					MPC8315_SICRL_USB_MASK,
-					MPC8315_SICRL_USB_ULPI);
-			clrsetbits_be32(immap + MPC83XX_SICRH_OFFS,
-					MPC8315_SICRH_USB_MASK,
-					MPC8315_SICRH_USB_ULPI);
-		} else {
-			clrsetbits_be32(immap + MPC83XX_SICRL_OFFS,
-					MPC831X_SICRL_USB_MASK,
-					MPC831X_SICRL_USB_ULPI);
-			clrsetbits_be32(immap + MPC83XX_SICRH_OFFS,
-					MPC831X_SICRH_USB_MASK,
-					MPC831X_SICRH_USB_ULPI);
-		}
-	}
-
-	iounmap(immap);
-
-	of_node_put(immr_node);
-
-	/* Map USB SOC space */
-	ret = of_address_to_resource(np, 0, &res);
-	if (ret) {
-		of_node_put(np);
-		return ret;
-	}
-	usb_regs = ioremap(res.start, resource_size(&res));
-
-	/* Using on-chip PHY */
-	if (prop && (!strcmp(prop, "utmi_wide") ||
-		     !strcmp(prop, "utmi"))) {
-		u32 refsel;
-
-		if (of_device_is_compatible(immr_node, "fsl,mpc8308-immr"))
-			goto out;
-
-		if (of_device_is_compatible(immr_node, "fsl,mpc8315-immr"))
-			refsel = CONTROL_REFSEL_24MHZ;
-		else
-			refsel = CONTROL_REFSEL_48MHZ;
-		/* Set UTMI_PHY_EN and REFSEL */
-		out_be32(usb_regs + FSL_USB2_CONTROL_OFFS,
-				CONTROL_UTMI_PHY_EN | refsel);
-	/* Using external UPLI PHY */
-	} else if (prop && !strcmp(prop, "ulpi")) {
-		/* Set PHY_CLK_SEL to ULPI */
-		temp = CONTROL_PHY_CLK_SEL_ULPI;
-#ifdef CONFIG_USB_OTG
-		/* Set OTG_PORT */
-		if (!of_device_is_compatible(immr_node, "fsl,mpc8308-immr")) {
-			dr_mode = of_get_property(np, "dr_mode", NULL);
-			if (dr_mode && !strcmp(dr_mode, "otg"))
-				temp |= CONTROL_OTG_PORT;
-		}
-#endif /* CONFIG_USB_OTG */
-		out_be32(usb_regs + FSL_USB2_CONTROL_OFFS, temp);
-	} else {
-		printk(KERN_WARNING "831x USB PHY type not supported\n");
-		ret = -EINVAL;
-	}
-
-out:
-	iounmap(usb_regs);
-	of_node_put(np);
-	return ret;
-}
-#endif /* CONFIG_PPC_MPC831x */
-
-#ifdef CONFIG_PPC_MPC837x
-int __init mpc837x_usb_cfg(void)
-{
-	void __iomem *immap;
-	struct device_node *np = NULL;
-	const void *prop;
-	int ret = 0;
-
-	np = of_find_compatible_node(NULL, NULL, "fsl-usb2-dr");
-	if (!np || !of_device_is_available(np)) {
-		of_node_put(np);
-		return -ENODEV;
-	}
-	prop = of_get_property(np, "phy_type", NULL);
-
-	if (!prop || (strcmp(prop, "ulpi") && strcmp(prop, "serial"))) {
-		printk(KERN_WARNING "837x USB PHY type not supported\n");
-		of_node_put(np);
-		return -EINVAL;
-	}
-
-	/* Map IMMR space for pin and clock settings */
-	immap = ioremap(get_immrbase(), 0x1000);
-	if (!immap) {
-		of_node_put(np);
-		return -ENOMEM;
-	}
-
-	/* Configure clock */
-	clrsetbits_be32(immap + MPC83XX_SCCR_OFFS, MPC837X_SCCR_USB_DRCM_11,
-			MPC837X_SCCR_USB_DRCM_11);
-
-	/* Configure pin mux for ULPI/serial */
-	clrsetbits_be32(immap + MPC83XX_SICRL_OFFS, MPC837X_SICRL_USB_MASK,
-			MPC837X_SICRL_USB_ULPI);
-
-	iounmap(immap);
-	of_node_put(np);
-	return ret;
-}
-#endif /* CONFIG_PPC_MPC837x */
diff --git a/arch/powerpc/platforms/83xx/usb_831x.c b/arch/powerpc/platforms/83xx/usb_831x.c
new file mode 100644
index 0000000..28c24e9
--- /dev/null
+++ b/arch/powerpc/platforms/83xx/usb_831x.c
@@ -0,0 +1,128 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Freescale 83xx USB SOC setup code
+ *
+ * Copyright (C) 2007 Freescale Semiconductor, Inc.
+ * Author: Li Yang
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/io.h>
+
+#include <sysdev/fsl_soc.h>
+
+#include "mpc83xx.h"
+
+int __init mpc831x_usb_cfg(void)
+{
+	u32 temp;
+	void __iomem *immap, *usb_regs;
+	struct device_node *np = NULL;
+	struct device_node *immr_node = NULL;
+	const void *prop;
+	struct resource res;
+	int ret = 0;
+#ifdef CONFIG_USB_OTG
+	const void *dr_mode;
+#endif
+
+	np = of_find_compatible_node(NULL, NULL, "fsl-usb2-dr");
+	if (!np)
+		return -ENODEV;
+	prop = of_get_property(np, "phy_type", NULL);
+
+	/* Map IMMR space for pin and clock settings */
+	immap = ioremap(get_immrbase(), 0x1000);
+	if (!immap) {
+		of_node_put(np);
+		return -ENOMEM;
+	}
+
+	/* Configure clock */
+	immr_node = of_get_parent(np);
+	if (immr_node && (of_device_is_compatible(immr_node, "fsl,mpc8315-immr") ||
+			  of_device_is_compatible(immr_node, "fsl,mpc8308-immr")))
+		clrsetbits_be32(immap + MPC83XX_SCCR_OFFS,
+				MPC8315_SCCR_USB_MASK,
+				MPC8315_SCCR_USB_DRCM_01);
+	else
+		clrsetbits_be32(immap + MPC83XX_SCCR_OFFS,
+				MPC83XX_SCCR_USB_MASK,
+				MPC83XX_SCCR_USB_DRCM_11);
+
+	/* Configure pin mux for ULPI.  There is no pin mux for UTMI */
+	if (prop && !strcmp(prop, "ulpi")) {
+		if (of_device_is_compatible(immr_node, "fsl,mpc8308-immr")) {
+			clrsetbits_be32(immap + MPC83XX_SICRH_OFFS,
+					MPC8308_SICRH_USB_MASK,
+					MPC8308_SICRH_USB_ULPI);
+		} else if (of_device_is_compatible(immr_node, "fsl,mpc8315-immr")) {
+			clrsetbits_be32(immap + MPC83XX_SICRL_OFFS,
+					MPC8315_SICRL_USB_MASK,
+					MPC8315_SICRL_USB_ULPI);
+			clrsetbits_be32(immap + MPC83XX_SICRH_OFFS,
+					MPC8315_SICRH_USB_MASK,
+					MPC8315_SICRH_USB_ULPI);
+		} else {
+			clrsetbits_be32(immap + MPC83XX_SICRL_OFFS,
+					MPC831X_SICRL_USB_MASK,
+					MPC831X_SICRL_USB_ULPI);
+			clrsetbits_be32(immap + MPC83XX_SICRH_OFFS,
+					MPC831X_SICRH_USB_MASK,
+					MPC831X_SICRH_USB_ULPI);
+		}
+	}
+
+	iounmap(immap);
+
+	of_node_put(immr_node);
+
+	/* Map USB SOC space */
+	ret = of_address_to_resource(np, 0, &res);
+	if (ret) {
+		of_node_put(np);
+		return ret;
+	}
+	usb_regs = ioremap(res.start, resource_size(&res));
+
+	/* Using on-chip PHY */
+	if (prop && (!strcmp(prop, "utmi_wide") || !strcmp(prop, "utmi"))) {
+		u32 refsel;
+
+		if (of_device_is_compatible(immr_node, "fsl,mpc8308-immr"))
+			goto out;
+
+		if (of_device_is_compatible(immr_node, "fsl,mpc8315-immr"))
+			refsel = CONTROL_REFSEL_24MHZ;
+		else
+			refsel = CONTROL_REFSEL_48MHZ;
+		/* Set UTMI_PHY_EN and REFSEL */
+		out_be32(usb_regs + FSL_USB2_CONTROL_OFFS,
+			 CONTROL_UTMI_PHY_EN | refsel);
+	/* Using external UPLI PHY */
+	} else if (prop && !strcmp(prop, "ulpi")) {
+		/* Set PHY_CLK_SEL to ULPI */
+		temp = CONTROL_PHY_CLK_SEL_ULPI;
+#ifdef CONFIG_USB_OTG
+		/* Set OTG_PORT */
+		if (!of_device_is_compatible(immr_node, "fsl,mpc8308-immr")) {
+			dr_mode = of_get_property(np, "dr_mode", NULL);
+			if (dr_mode && !strcmp(dr_mode, "otg"))
+				temp |= CONTROL_OTG_PORT;
+		}
+#endif /* CONFIG_USB_OTG */
+		out_be32(usb_regs + FSL_USB2_CONTROL_OFFS, temp);
+	} else {
+		pr_warn("831x USB PHY type not supported\n");
+		ret = -EINVAL;
+	}
+
+out:
+	iounmap(usb_regs);
+	of_node_put(np);
+	return ret;
+}
diff --git a/arch/powerpc/platforms/83xx/usb_834x.c b/arch/powerpc/platforms/83xx/usb_834x.c
new file mode 100644
index 0000000..3a8d6c6
--- /dev/null
+++ b/arch/powerpc/platforms/83xx/usb_834x.c
@@ -0,0 +1,90 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Freescale 83xx USB SOC setup code
+ *
+ * Copyright (C) 2007 Freescale Semiconductor, Inc.
+ * Author: Li Yang
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/io.h>
+
+#include <sysdev/fsl_soc.h>
+
+#include "mpc83xx.h"
+
+int __init mpc834x_usb_cfg(void)
+{
+	unsigned long sccr, sicrl, sicrh;
+	void __iomem *immap;
+	struct device_node *np = NULL;
+	int port0_is_dr = 0, port1_is_dr = 0;
+	const void *prop, *dr_mode;
+
+	immap = ioremap(get_immrbase(), 0x1000);
+	if (!immap)
+		return -ENOMEM;
+
+	/* Read registers */
+	/* Note: DR and MPH must use the same clock setting in SCCR */
+	sccr = in_be32(immap + MPC83XX_SCCR_OFFS) & ~MPC83XX_SCCR_USB_MASK;
+	sicrl = in_be32(immap + MPC83XX_SICRL_OFFS) & ~MPC834X_SICRL_USB_MASK;
+	sicrh = in_be32(immap + MPC83XX_SICRH_OFFS) & ~MPC834X_SICRH_USB_UTMI;
+
+	np = of_find_compatible_node(NULL, NULL, "fsl-usb2-dr");
+	if (np) {
+		sccr |= MPC83XX_SCCR_USB_DRCM_11;  /* 1:3 */
+
+		prop = of_get_property(np, "phy_type", NULL);
+		port1_is_dr = 1;
+		if (prop &&
+		    (!strcmp(prop, "utmi") || !strcmp(prop, "utmi_wide"))) {
+			sicrl |= MPC834X_SICRL_USB0 | MPC834X_SICRL_USB1;
+			sicrh |= MPC834X_SICRH_USB_UTMI;
+			port0_is_dr = 1;
+		} else if (prop && !strcmp(prop, "serial")) {
+			dr_mode = of_get_property(np, "dr_mode", NULL);
+			if (dr_mode && !strcmp(dr_mode, "otg")) {
+				sicrl |= MPC834X_SICRL_USB0 | MPC834X_SICRL_USB1;
+				port0_is_dr = 1;
+			} else {
+				sicrl |= MPC834X_SICRL_USB1;
+			}
+		} else if (prop && !strcmp(prop, "ulpi")) {
+			sicrl |= MPC834X_SICRL_USB1;
+		} else {
+			pr_warn("834x USB PHY type not supported\n");
+		}
+		of_node_put(np);
+	}
+	np = of_find_compatible_node(NULL, NULL, "fsl-usb2-mph");
+	if (np) {
+		sccr |= MPC83XX_SCCR_USB_MPHCM_11; /* 1:3 */
+
+		prop = of_get_property(np, "port0", NULL);
+		if (prop) {
+			if (port0_is_dr)
+				pr_warn("834x USB port0 can't be used by both DR and MPH!\n");
+			sicrl &= ~MPC834X_SICRL_USB0;
+		}
+		prop = of_get_property(np, "port1", NULL);
+		if (prop) {
+			if (port1_is_dr)
+				pr_warn("834x USB port1 can't be used by both DR and MPH!\n");
+			sicrl &= ~MPC834X_SICRL_USB1;
+		}
+		of_node_put(np);
+	}
+
+	/* Write back */
+	out_be32(immap + MPC83XX_SCCR_OFFS, sccr);
+	out_be32(immap + MPC83XX_SICRL_OFFS, sicrl);
+	out_be32(immap + MPC83XX_SICRH_OFFS, sicrh);
+
+	iounmap(immap);
+	return 0;
+}
diff --git a/arch/powerpc/platforms/83xx/usb_837x.c b/arch/powerpc/platforms/83xx/usb_837x.c
new file mode 100644
index 0000000..726935b
--- /dev/null
+++ b/arch/powerpc/platforms/83xx/usb_837x.c
@@ -0,0 +1,58 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Freescale 83xx USB SOC setup code
+ *
+ * Copyright (C) 2007 Freescale Semiconductor, Inc.
+ * Author: Li Yang
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/io.h>
+
+#include <sysdev/fsl_soc.h>
+
+#include "mpc83xx.h"
+
+int __init mpc837x_usb_cfg(void)
+{
+	void __iomem *immap;
+	struct device_node *np = NULL;
+	const void *prop;
+	int ret = 0;
+
+	np = of_find_compatible_node(NULL, NULL, "fsl-usb2-dr");
+	if (!np || !of_device_is_available(np)) {
+		of_node_put(np);
+		return -ENODEV;
+	}
+	prop = of_get_property(np, "phy_type", NULL);
+
+	if (!prop || (strcmp(prop, "ulpi") && strcmp(prop, "serial"))) {
+		pr_warn("837x USB PHY type not supported\n");
+		of_node_put(np);
+		return -EINVAL;
+	}
+
+	/* Map IMMR space for pin and clock settings */
+	immap = ioremap(get_immrbase(), 0x1000);
+	if (!immap) {
+		of_node_put(np);
+		return -ENOMEM;
+	}
+
+	/* Configure clock */
+	clrsetbits_be32(immap + MPC83XX_SCCR_OFFS, MPC837X_SCCR_USB_DRCM_11,
+			MPC837X_SCCR_USB_DRCM_11);
+
+	/* Configure pin mux for ULPI/serial */
+	clrsetbits_be32(immap + MPC83XX_SICRL_OFFS, MPC837X_SICRL_USB_MASK,
+			MPC837X_SICRL_USB_ULPI);
+
+	iounmap(immap);
+	of_node_put(np);
+	return ret;
+}
diff --git a/arch/powerpc/platforms/85xx/bsc913x_qds.c b/arch/powerpc/platforms/85xx/bsc913x_qds.c
index a029aa0..2eb62bff 100644
--- a/arch/powerpc/platforms/85xx/bsc913x_qds.c
+++ b/arch/powerpc/platforms/85xx/bsc913x_qds.c
@@ -9,7 +9,7 @@
  * Copyright 2014 Freescale Semiconductor Inc.
  */
 
-#include <linux/of_platform.h>
+#include <linux/of.h>
 #include <linux/pci.h>
 #include <asm/mpic.h>
 #include <sysdev/fsl_soc.h>
diff --git a/arch/powerpc/platforms/85xx/bsc913x_rdb.c b/arch/powerpc/platforms/85xx/bsc913x_rdb.c
index 361b437..161f006 100644
--- a/arch/powerpc/platforms/85xx/bsc913x_rdb.c
+++ b/arch/powerpc/platforms/85xx/bsc913x_rdb.c
@@ -7,7 +7,7 @@
  * Copyright 2011-2012 Freescale Semiconductor Inc.
  */
 
-#include <linux/of_platform.h>
+#include <linux/of.h>
 #include <linux/pci.h>
 #include <asm/mpic.h>
 #include <sysdev/fsl_soc.h>
diff --git a/arch/powerpc/platforms/85xx/c293pcie.c b/arch/powerpc/platforms/85xx/c293pcie.c
index 3497570..7a63a3a 100644
--- a/arch/powerpc/platforms/85xx/c293pcie.c
+++ b/arch/powerpc/platforms/85xx/c293pcie.c
@@ -7,8 +7,7 @@
 
 #include <linux/stddef.h>
 #include <linux/kernel.h>
-#include <linux/of_fdt.h>
-#include <linux/of_platform.h>
+#include <linux/of.h>
 
 #include <asm/machdep.h>
 #include <asm/udbg.h>
diff --git a/arch/powerpc/platforms/85xx/common.c b/arch/powerpc/platforms/85xx/common.c
index a554b6d..7578111 100644
--- a/arch/powerpc/platforms/85xx/common.c
+++ b/arch/powerpc/platforms/85xx/common.c
@@ -3,6 +3,7 @@
  * Routines common to most mpc85xx-based boards.
  */
 
+#include <linux/of.h>
 #include <linux/of_irq.h>
 #include <linux/of_platform.h>
 
diff --git a/arch/powerpc/platforms/85xx/corenet_generic.c b/arch/powerpc/platforms/85xx/corenet_generic.c
index bfde391..645fcca 100644
--- a/arch/powerpc/platforms/85xx/corenet_generic.c
+++ b/arch/powerpc/platforms/85xx/corenet_generic.c
@@ -30,7 +30,7 @@
 #include "smp.h"
 #include "mpc85xx.h"
 
-void __init corenet_gen_pic_init(void)
+static void __init corenet_gen_pic_init(void)
 {
 	struct mpic *mpic;
 	unsigned int flags = MPIC_BIG_ENDIAN | MPIC_SINGLE_DEST_CPU |
@@ -48,7 +48,7 @@ void __init corenet_gen_pic_init(void)
 /*
  * Setup the architecture
  */
-void __init corenet_gen_setup_arch(void)
+static void __init corenet_gen_setup_arch(void)
 {
 	mpc85xx_smp_init();
 
@@ -101,7 +101,7 @@ static const struct of_device_id of_device_ids[] = {
 	{}
 };
 
-int __init corenet_gen_publish_devices(void)
+static int __init corenet_gen_publish_devices(void)
 {
 	return of_platform_bus_probe(NULL, of_device_ids, NULL);
 }
diff --git a/arch/powerpc/platforms/85xx/ge_imp3a.c b/arch/powerpc/platforms/85xx/ge_imp3a.c
index 3678a1f..9c3b44a 100644
--- a/arch/powerpc/platforms/85xx/ge_imp3a.c
+++ b/arch/powerpc/platforms/85xx/ge_imp3a.c
@@ -17,8 +17,8 @@
 #include <linux/delay.h>
 #include <linux/seq_file.h>
 #include <linux/interrupt.h>
+#include <linux/of.h>
 #include <linux/of_address.h>
-#include <linux/of_platform.h>
 
 #include <asm/time.h>
 #include <asm/machdep.h>
diff --git a/arch/powerpc/platforms/85xx/ksi8560.c b/arch/powerpc/platforms/85xx/ksi8560.c
index af38c3a..1b6326a 100644
--- a/arch/powerpc/platforms/85xx/ksi8560.c
+++ b/arch/powerpc/platforms/85xx/ksi8560.c
@@ -18,7 +18,8 @@
 #include <linux/kdev_t.h>
 #include <linux/delay.h>
 #include <linux/seq_file.h>
-#include <linux/of_platform.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
 
 #include <asm/time.h>
 #include <asm/machdep.h>
diff --git a/arch/powerpc/platforms/85xx/mpc8536_ds.c b/arch/powerpc/platforms/85xx/mpc8536_ds.c
index 58ab383..e966b2a 100644
--- a/arch/powerpc/platforms/85xx/mpc8536_ds.c
+++ b/arch/powerpc/platforms/85xx/mpc8536_ds.c
@@ -12,7 +12,7 @@
 #include <linux/delay.h>
 #include <linux/seq_file.h>
 #include <linux/interrupt.h>
-#include <linux/of_platform.h>
+#include <linux/of.h>
 
 #include <asm/time.h>
 #include <asm/machdep.h>
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_ds.c b/arch/powerpc/platforms/85xx/mpc85xx_ds.c
index 4347d62..2856148 100644
--- a/arch/powerpc/platforms/85xx/mpc85xx_ds.c
+++ b/arch/powerpc/platforms/85xx/mpc85xx_ds.c
@@ -15,8 +15,8 @@
 #include <linux/delay.h>
 #include <linux/seq_file.h>
 #include <linux/interrupt.h>
+#include <linux/of.h>
 #include <linux/of_irq.h>
-#include <linux/of_platform.h>
 
 #include <asm/time.h>
 #include <asm/machdep.h>
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_mds.c b/arch/powerpc/platforms/85xx/mpc85xx_mds.c
index 0546f19..c19490cf 100644
--- a/arch/powerpc/platforms/85xx/mpc85xx_mds.c
+++ b/arch/powerpc/platforms/85xx/mpc85xx_mds.c
@@ -26,8 +26,8 @@
 #include <linux/seq_file.h>
 #include <linux/initrd.h>
 #include <linux/fsl_devices.h>
-#include <linux/of_platform.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
 #include <linux/phy.h>
 #include <linux/memblock.h>
 #include <linux/fsl/guts.h>
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_rdb.c b/arch/powerpc/platforms/85xx/mpc85xx_rdb.c
index c42a68d..ec9f60f 100644
--- a/arch/powerpc/platforms/85xx/mpc85xx_rdb.c
+++ b/arch/powerpc/platforms/85xx/mpc85xx_rdb.c
@@ -12,7 +12,8 @@
 #include <linux/delay.h>
 #include <linux/seq_file.h>
 #include <linux/interrupt.h>
-#include <linux/of_platform.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
 #include <linux/fsl/guts.h>
 
 #include <asm/time.h>
diff --git a/arch/powerpc/platforms/85xx/p1010rdb.c b/arch/powerpc/platforms/85xx/p1010rdb.c
index 14ec79a..10d6f1f 100644
--- a/arch/powerpc/platforms/85xx/p1010rdb.c
+++ b/arch/powerpc/platforms/85xx/p1010rdb.c
@@ -10,7 +10,7 @@
 #include <linux/pci.h>
 #include <linux/delay.h>
 #include <linux/interrupt.h>
-#include <linux/of_platform.h>
+#include <linux/of.h>
 
 #include <asm/time.h>
 #include <asm/machdep.h>
diff --git a/arch/powerpc/platforms/85xx/p1022_ds.c b/arch/powerpc/platforms/85xx/p1022_ds.c
index 23d0926..0dd786a 100644
--- a/arch/powerpc/platforms/85xx/p1022_ds.c
+++ b/arch/powerpc/platforms/85xx/p1022_ds.c
@@ -18,8 +18,8 @@
 
 #include <linux/fsl/guts.h>
 #include <linux/pci.h>
+#include <linux/of.h>
 #include <linux/of_address.h>
-#include <linux/of_platform.h>
 #include <asm/div64.h>
 #include <asm/mpic.h>
 #include <asm/swiotlb.h>
diff --git a/arch/powerpc/platforms/85xx/p1022_rdk.c b/arch/powerpc/platforms/85xx/p1022_rdk.c
index d1159150..25ab6e9 100644
--- a/arch/powerpc/platforms/85xx/p1022_rdk.c
+++ b/arch/powerpc/platforms/85xx/p1022_rdk.c
@@ -14,8 +14,8 @@
 
 #include <linux/fsl/guts.h>
 #include <linux/pci.h>
+#include <linux/of.h>
 #include <linux/of_address.h>
-#include <linux/of_platform.h>
 #include <asm/div64.h>
 #include <asm/mpic.h>
 #include <asm/swiotlb.h>
diff --git a/arch/powerpc/platforms/85xx/p1023_rdb.c b/arch/powerpc/platforms/85xx/p1023_rdb.c
index 9df0439..e4fa873 100644
--- a/arch/powerpc/platforms/85xx/p1023_rdb.c
+++ b/arch/powerpc/platforms/85xx/p1023_rdb.c
@@ -15,9 +15,8 @@
 #include <linux/delay.h>
 #include <linux/module.h>
 #include <linux/fsl_devices.h>
+#include <linux/of.h>
 #include <linux/of_address.h>
-#include <linux/of_platform.h>
-#include <linux/of_device.h>
 
 #include <asm/time.h>
 #include <asm/machdep.h>
diff --git a/arch/powerpc/platforms/85xx/qemu_e500.c b/arch/powerpc/platforms/85xx/qemu_e500.c
index 6e4b1dd..3cd2f3b 100644
--- a/arch/powerpc/platforms/85xx/qemu_e500.c
+++ b/arch/powerpc/platforms/85xx/qemu_e500.c
@@ -25,7 +25,7 @@
 #include "smp.h"
 #include "mpc85xx.h"
 
-void __init qemu_e500_pic_init(void)
+static void __init qemu_e500_pic_init(void)
 {
 	struct mpic *mpic;
 	unsigned int flags = MPIC_BIG_ENDIAN | MPIC_SINGLE_DEST_CPU |
diff --git a/arch/powerpc/platforms/85xx/socrates.c b/arch/powerpc/platforms/85xx/socrates.c
index 9fa1338..403367b 100644
--- a/arch/powerpc/platforms/85xx/socrates.c
+++ b/arch/powerpc/platforms/85xx/socrates.c
@@ -23,7 +23,7 @@
 #include <linux/kdev_t.h>
 #include <linux/delay.h>
 #include <linux/seq_file.h>
-#include <linux/of_platform.h>
+#include <linux/of.h>
 
 #include <asm/time.h>
 #include <asm/machdep.h>
diff --git a/arch/powerpc/platforms/85xx/socrates_fpga_pic.c b/arch/powerpc/platforms/85xx/socrates_fpga_pic.c
index 3768c86..baa12ef 100644
--- a/arch/powerpc/platforms/85xx/socrates_fpga_pic.c
+++ b/arch/powerpc/platforms/85xx/socrates_fpga_pic.c
@@ -6,7 +6,6 @@
 #include <linux/irq.h>
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
-#include <linux/of_platform.h>
 #include <linux/io.h>
 
 /*
diff --git a/arch/powerpc/platforms/85xx/stx_gp3.c b/arch/powerpc/platforms/85xx/stx_gp3.c
index 5e2646b..c10efc4 100644
--- a/arch/powerpc/platforms/85xx/stx_gp3.c
+++ b/arch/powerpc/platforms/85xx/stx_gp3.c
@@ -22,7 +22,7 @@
 #include <linux/kdev_t.h>
 #include <linux/delay.h>
 #include <linux/seq_file.h>
-#include <linux/of_platform.h>
+#include <linux/of.h>
 
 #include <asm/time.h>
 #include <asm/machdep.h>
diff --git a/arch/powerpc/platforms/85xx/tqm85xx.c b/arch/powerpc/platforms/85xx/tqm85xx.c
index 80effb0..6be1b98 100644
--- a/arch/powerpc/platforms/85xx/tqm85xx.c
+++ b/arch/powerpc/platforms/85xx/tqm85xx.c
@@ -20,7 +20,7 @@
 #include <linux/kdev_t.h>
 #include <linux/delay.h>
 #include <linux/seq_file.h>
-#include <linux/of_platform.h>
+#include <linux/of.h>
 
 #include <asm/time.h>
 #include <asm/machdep.h>
diff --git a/arch/powerpc/platforms/85xx/twr_p102x.c b/arch/powerpc/platforms/85xx/twr_p102x.c
index b88e23a..c0a0456 100644
--- a/arch/powerpc/platforms/85xx/twr_p102x.c
+++ b/arch/powerpc/platforms/85xx/twr_p102x.c
@@ -13,7 +13,8 @@
 #include <linux/errno.h>
 #include <linux/fsl/guts.h>
 #include <linux/pci.h>
-#include <linux/of_platform.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
 
 #include <asm/pci-bridge.h>
 #include <asm/udbg.h>
diff --git a/arch/powerpc/platforms/85xx/xes_mpc85xx.c b/arch/powerpc/platforms/85xx/xes_mpc85xx.c
index 184013e..45f257f 100644
--- a/arch/powerpc/platforms/85xx/xes_mpc85xx.c
+++ b/arch/powerpc/platforms/85xx/xes_mpc85xx.c
@@ -16,8 +16,8 @@
 #include <linux/delay.h>
 #include <linux/seq_file.h>
 #include <linux/interrupt.h>
+#include <linux/of.h>
 #include <linux/of_address.h>
-#include <linux/of_platform.h>
 
 #include <asm/time.h>
 #include <asm/machdep.h>
diff --git a/arch/powerpc/platforms/86xx/common.c b/arch/powerpc/platforms/86xx/common.c
index 0069d38..a4a55052 100644
--- a/arch/powerpc/platforms/86xx/common.c
+++ b/arch/powerpc/platforms/86xx/common.c
@@ -3,7 +3,10 @@
  * Routines common to most mpc86xx-based boards.
  */
 
+#include <linux/init.h>
+#include <linux/mod_devicetable.h>
 #include <linux/of_platform.h>
+#include <asm/reg.h>
 #include <asm/synch.h>
 
 #include "mpc86xx.h"
diff --git a/arch/powerpc/platforms/86xx/gef_ppc9a.c b/arch/powerpc/platforms/86xx/gef_ppc9a.c
index f0512e5..f7f98cc 100644
--- a/arch/powerpc/platforms/86xx/gef_ppc9a.c
+++ b/arch/powerpc/platforms/86xx/gef_ppc9a.c
@@ -18,8 +18,8 @@
 #include <linux/kdev_t.h>
 #include <linux/delay.h>
 #include <linux/seq_file.h>
+#include <linux/of.h>
 #include <linux/of_address.h>
-#include <linux/of_platform.h>
 
 #include <asm/time.h>
 #include <asm/machdep.h>
diff --git a/arch/powerpc/platforms/86xx/gef_sbc310.c b/arch/powerpc/platforms/86xx/gef_sbc310.c
index 1430b52..689835f 100644
--- a/arch/powerpc/platforms/86xx/gef_sbc310.c
+++ b/arch/powerpc/platforms/86xx/gef_sbc310.c
@@ -18,8 +18,8 @@
 #include <linux/kdev_t.h>
 #include <linux/delay.h>
 #include <linux/seq_file.h>
+#include <linux/of.h>
 #include <linux/of_address.h>
-#include <linux/of_platform.h>
 
 #include <asm/time.h>
 #include <asm/machdep.h>
diff --git a/arch/powerpc/platforms/86xx/gef_sbc610.c b/arch/powerpc/platforms/86xx/gef_sbc610.c
index c92af0d..365f511 100644
--- a/arch/powerpc/platforms/86xx/gef_sbc610.c
+++ b/arch/powerpc/platforms/86xx/gef_sbc610.c
@@ -18,8 +18,8 @@
 #include <linux/kdev_t.h>
 #include <linux/delay.h>
 #include <linux/seq_file.h>
+#include <linux/of.h>
 #include <linux/of_address.h>
-#include <linux/of_platform.h>
 
 #include <asm/time.h>
 #include <asm/machdep.h>
diff --git a/arch/powerpc/platforms/86xx/mvme7100.c b/arch/powerpc/platforms/86xx/mvme7100.c
index c0ac405..cee49ec 100644
--- a/arch/powerpc/platforms/86xx/mvme7100.c
+++ b/arch/powerpc/platforms/86xx/mvme7100.c
@@ -20,7 +20,6 @@
 #include <linux/pci.h>
 #include <linux/of.h>
 #include <linux/of_fdt.h>
-#include <linux/of_platform.h>
 #include <linux/of_address.h>
 #include <asm/udbg.h>
 #include <asm/mpic.h>
diff --git a/arch/powerpc/platforms/86xx/pic.c b/arch/powerpc/platforms/86xx/pic.c
index 2c32c34..9ca36de 100644
--- a/arch/powerpc/platforms/86xx/pic.c
+++ b/arch/powerpc/platforms/86xx/pic.c
@@ -6,12 +6,14 @@
 #include <linux/stddef.h>
 #include <linux/kernel.h>
 #include <linux/interrupt.h>
+#include <linux/of.h>
 #include <linux/of_irq.h>
-#include <linux/of_platform.h>
 
 #include <asm/mpic.h>
 #include <asm/i8259.h>
 
+#include "mpc86xx.h"
+
 #ifdef CONFIG_PPC_I8259
 static void mpc86xx_8259_cascade(struct irq_desc *desc)
 {
diff --git a/arch/powerpc/platforms/8xx/adder875.c b/arch/powerpc/platforms/8xx/adder875.c
index f6bd232..d02f8dd 100644
--- a/arch/powerpc/platforms/8xx/adder875.c
+++ b/arch/powerpc/platforms/8xx/adder875.c
@@ -12,7 +12,7 @@
 #include <asm/time.h>
 #include <asm/machdep.h>
 #include <asm/cpm1.h>
-#include <asm/fs_pd.h>
+#include <asm/8xx_immap.h>
 #include <asm/udbg.h>
 
 #include "mpc8xx.h"
diff --git a/arch/powerpc/platforms/8xx/cpm1.c b/arch/powerpc/platforms/8xx/cpm1.c
index 34ab299..ebb5f6a 100644
--- a/arch/powerpc/platforms/8xx/cpm1.c
+++ b/arch/powerpc/platforms/8xx/cpm1.c
@@ -41,7 +41,7 @@
 #include <asm/rheap.h>
 #include <asm/cpm.h>
 
-#include <asm/fs_pd.h>
+#include <sysdev/fsl_soc.h>
 
 #ifdef CONFIG_8xx_GPIO
 #include <linux/gpio/legacy-of-mm-gpiochip.h>
@@ -54,8 +54,6 @@ immap_t __iomem *mpc8xx_immr = (void __iomem *)VIRT_IMMR_BASE;
 
 void __init cpm_reset(void)
 {
-	sysconf8xx_t __iomem *siu_conf;
-
 	cpmp = &mpc8xx_immr->im_cpm;
 
 #ifndef CONFIG_PPC_EARLY_DEBUG_CPM
@@ -77,12 +75,10 @@ void __init cpm_reset(void)
 	 * manual recommends it.
 	 * Bit 25, FAM can also be set to use FEC aggressive mode (860T).
 	 */
-	siu_conf = immr_map(im_siu_conf);
 	if ((mfspr(SPRN_IMMR) & 0xffff) == 0x0900) /* MPC885 */
-		out_be32(&siu_conf->sc_sdcr, 0x40);
+		out_be32(&mpc8xx_immr->im_siu_conf.sc_sdcr, 0x40);
 	else
-		out_be32(&siu_conf->sc_sdcr, 1);
-	immr_unmap(siu_conf);
+		out_be32(&mpc8xx_immr->im_siu_conf.sc_sdcr, 1);
 }
 
 static DEFINE_SPINLOCK(cmd_lock);
diff --git a/arch/powerpc/platforms/8xx/m8xx_setup.c b/arch/powerpc/platforms/8xx/m8xx_setup.c
index 24f358f..2336b68 100644
--- a/arch/powerpc/platforms/8xx/m8xx_setup.c
+++ b/arch/powerpc/platforms/8xx/m8xx_setup.c
@@ -22,7 +22,6 @@
 
 #include <asm/io.h>
 #include <asm/8xx_immap.h>
-#include <asm/fs_pd.h>
 #include <mm/mmu_decl.h>
 
 #include "pic.h"
@@ -37,20 +36,6 @@ static irqreturn_t timebase_interrupt(int irq, void *dev)
 	return IRQ_HANDLED;
 }
 
-/* per-board overridable init_internal_rtc() function. */
-void __init __attribute__ ((weak))
-init_internal_rtc(void)
-{
-	sit8xx_t __iomem *sys_tmr = immr_map(im_sit);
-
-	/* Disable the RTC one second and alarm interrupts. */
-	clrbits16(&sys_tmr->sit_rtcsc, (RTCSC_SIE | RTCSC_ALE));
-
-	/* Enable the RTC */
-	setbits16(&sys_tmr->sit_rtcsc, (RTCSC_RTF | RTCSC_RTE));
-	immr_unmap(sys_tmr);
-}
-
 static int __init get_freq(char *name, unsigned long *val)
 {
 	struct device_node *cpu;
@@ -80,23 +65,14 @@ static int __init get_freq(char *name, unsigned long *val)
 void __init mpc8xx_calibrate_decr(void)
 {
 	struct device_node *cpu;
-	cark8xx_t __iomem *clk_r1;
-	car8xx_t __iomem *clk_r2;
-	sitk8xx_t __iomem *sys_tmr1;
-	sit8xx_t __iomem *sys_tmr2;
 	int irq, virq;
 
-	clk_r1 = immr_map(im_clkrstk);
-
 	/* Unlock the SCCR. */
-	out_be32(&clk_r1->cark_sccrk, ~KAPWR_KEY);
-	out_be32(&clk_r1->cark_sccrk, KAPWR_KEY);
-	immr_unmap(clk_r1);
+	out_be32(&mpc8xx_immr->im_clkrstk.cark_sccrk, ~KAPWR_KEY);
+	out_be32(&mpc8xx_immr->im_clkrstk.cark_sccrk, KAPWR_KEY);
 
 	/* Force all 8xx processors to use divide by 16 processor clock. */
-	clk_r2 = immr_map(im_clkrst);
-	setbits32(&clk_r2->car_sccr, 0x02000000);
-	immr_unmap(clk_r2);
+	setbits32(&mpc8xx_immr->im_clkrst.car_sccr, 0x02000000);
 
 	/* Processor frequency is MHz.
 	 */
@@ -123,16 +99,18 @@ void __init mpc8xx_calibrate_decr(void)
 	 * we guarantee the registers are locked, then we unlock them
 	 * for our use.
 	 */
-	sys_tmr1 = immr_map(im_sitk);
-	out_be32(&sys_tmr1->sitk_tbscrk, ~KAPWR_KEY);
-	out_be32(&sys_tmr1->sitk_rtcsck, ~KAPWR_KEY);
-	out_be32(&sys_tmr1->sitk_tbk, ~KAPWR_KEY);
-	out_be32(&sys_tmr1->sitk_tbscrk, KAPWR_KEY);
-	out_be32(&sys_tmr1->sitk_rtcsck, KAPWR_KEY);
-	out_be32(&sys_tmr1->sitk_tbk, KAPWR_KEY);
-	immr_unmap(sys_tmr1);
+	out_be32(&mpc8xx_immr->im_sitk.sitk_tbscrk, ~KAPWR_KEY);
+	out_be32(&mpc8xx_immr->im_sitk.sitk_rtcsck, ~KAPWR_KEY);
+	out_be32(&mpc8xx_immr->im_sitk.sitk_tbk, ~KAPWR_KEY);
+	out_be32(&mpc8xx_immr->im_sitk.sitk_tbscrk, KAPWR_KEY);
+	out_be32(&mpc8xx_immr->im_sitk.sitk_rtcsck, KAPWR_KEY);
+	out_be32(&mpc8xx_immr->im_sitk.sitk_tbk, KAPWR_KEY);
 
-	init_internal_rtc();
+	/* Disable the RTC one second and alarm interrupts. */
+	clrbits16(&mpc8xx_immr->im_sit.sit_rtcsc, (RTCSC_SIE | RTCSC_ALE));
+
+	/* Enable the RTC */
+	setbits16(&mpc8xx_immr->im_sit.sit_rtcsc, (RTCSC_RTF | RTCSC_RTE));
 
 	/* Enabling the decrementer also enables the timebase interrupts
 	 * (or from the other point of view, to get decrementer interrupts
@@ -144,10 +122,8 @@ void __init mpc8xx_calibrate_decr(void)
 	of_node_put(cpu);
 	irq = virq_to_hw(virq);
 
-	sys_tmr2 = immr_map(im_sit);
-	out_be16(&sys_tmr2->sit_tbscr, ((1 << (7 - (irq/2))) << 8) |
-					(TBSCR_TBF | TBSCR_TBE));
-	immr_unmap(sys_tmr2);
+	out_be16(&mpc8xx_immr->im_sit.sit_tbscr,
+		 ((1 << (7 - (irq / 2))) << 8) | (TBSCR_TBF | TBSCR_TBE));
 
 	if (request_irq(virq, timebase_interrupt, IRQF_NO_THREAD, "tbint",
 			NULL))
@@ -161,47 +137,36 @@ void __init mpc8xx_calibrate_decr(void)
 
 int mpc8xx_set_rtc_time(struct rtc_time *tm)
 {
-	sitk8xx_t __iomem *sys_tmr1;
-	sit8xx_t __iomem *sys_tmr2;
 	time64_t time;
 
-	sys_tmr1 = immr_map(im_sitk);
-	sys_tmr2 = immr_map(im_sit);
 	time = rtc_tm_to_time64(tm);
 
-	out_be32(&sys_tmr1->sitk_rtck, KAPWR_KEY);
-	out_be32(&sys_tmr2->sit_rtc, (u32)time);
-	out_be32(&sys_tmr1->sitk_rtck, ~KAPWR_KEY);
+	out_be32(&mpc8xx_immr->im_sitk.sitk_rtck, KAPWR_KEY);
+	out_be32(&mpc8xx_immr->im_sit.sit_rtc, (u32)time);
+	out_be32(&mpc8xx_immr->im_sitk.sitk_rtck, ~KAPWR_KEY);
 
-	immr_unmap(sys_tmr2);
-	immr_unmap(sys_tmr1);
 	return 0;
 }
 
 void mpc8xx_get_rtc_time(struct rtc_time *tm)
 {
 	unsigned long data;
-	sit8xx_t __iomem *sys_tmr = immr_map(im_sit);
 
 	/* Get time from the RTC. */
-	data = in_be32(&sys_tmr->sit_rtc);
+	data = in_be32(&mpc8xx_immr->im_sit.sit_rtc);
 	rtc_time64_to_tm(data, tm);
-	immr_unmap(sys_tmr);
 	return;
 }
 
 void __noreturn mpc8xx_restart(char *cmd)
 {
-	car8xx_t __iomem *clk_r = immr_map(im_clkrst);
-
-
 	local_irq_disable();
 
-	setbits32(&clk_r->car_plprcr, 0x00000080);
+	setbits32(&mpc8xx_immr->im_clkrst.car_plprcr, 0x00000080);
 	/* Clear the ME bit in MSR to cause checkstop on machine check
 	*/
 	mtmsr(mfmsr() & ~0x1000);
 
-	in_8(&clk_r->res[0]);
+	in_8(&mpc8xx_immr->im_clkrst.res[0]);
 	panic("Restart failed\n");
 }
diff --git a/arch/powerpc/platforms/8xx/mpc86xads_setup.c b/arch/powerpc/platforms/8xx/mpc86xads_setup.c
index 11b3d11..e4192c0 100644
--- a/arch/powerpc/platforms/8xx/mpc86xads_setup.c
+++ b/arch/powerpc/platforms/8xx/mpc86xads_setup.c
@@ -24,7 +24,6 @@
 #include <asm/time.h>
 #include <asm/8xx_immap.h>
 #include <asm/cpm1.h>
-#include <asm/fs_pd.h>
 #include <asm/udbg.h>
 
 #include "mpc86xads.h"
diff --git a/arch/powerpc/platforms/8xx/mpc885ads_setup.c b/arch/powerpc/platforms/8xx/mpc885ads_setup.c
index c7c4f08..76c7cd7 100644
--- a/arch/powerpc/platforms/8xx/mpc885ads_setup.c
+++ b/arch/powerpc/platforms/8xx/mpc885ads_setup.c
@@ -36,7 +36,6 @@
 #include <asm/time.h>
 #include <asm/8xx_immap.h>
 #include <asm/cpm1.h>
-#include <asm/fs_pd.h>
 #include <asm/udbg.h>
 
 #include "mpc885ads.h"
diff --git a/arch/powerpc/platforms/8xx/tqm8xx_setup.c b/arch/powerpc/platforms/8xx/tqm8xx_setup.c
index 6e56be8..1670dfd 100644
--- a/arch/powerpc/platforms/8xx/tqm8xx_setup.c
+++ b/arch/powerpc/platforms/8xx/tqm8xx_setup.c
@@ -38,7 +38,6 @@
 #include <asm/time.h>
 #include <asm/8xx_immap.h>
 #include <asm/cpm1.h>
-#include <asm/fs_pd.h>
 #include <asm/udbg.h>
 
 #include "mpc8xx.h"
diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig
index 3e2e252..1fd253f 100644
--- a/arch/powerpc/platforms/Kconfig
+++ b/arch/powerpc/platforms/Kconfig
@@ -251,7 +251,7 @@
 
 config CPM2
 	bool "Enable support for the CPM2 (Communications Processor Module)"
-	depends on (FSL_SOC_BOOKE && PPC32) || 8260
+	depends on (FSL_SOC_BOOKE && PPC32) || PPC_82xx
 	select CPM
 	select HAVE_PCI
 	select GPIOLIB
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index 340b86e..b2d8c0d 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -276,6 +276,13 @@
 	default "e500mc" if E500MC_CPU
 	default "powerpc" if POWERPC_CPU
 
+config TUNE_CPU
+	string
+	depends on POWERPC64_CPU
+	default "-mtune=power10" if $(cc-option,-mtune=power10)
+	default "-mtune=power9"  if $(cc-option,-mtune=power9)
+	default "-mtune=power8"  if $(cc-option,-mtune=power8)
+
 config PPC_BOOK3S
 	def_bool y
 	depends on PPC_BOOK3S_32 || PPC_BOOK3S_64
diff --git a/arch/powerpc/platforms/cell/axon_msi.c b/arch/powerpc/platforms/cell/axon_msi.c
index 1060004..28dc867 100644
--- a/arch/powerpc/platforms/cell/axon_msi.c
+++ b/arch/powerpc/platforms/cell/axon_msi.c
@@ -10,10 +10,11 @@
 #include <linux/pci.h>
 #include <linux/msi.h>
 #include <linux/export.h>
-#include <linux/of_platform.h>
 #include <linux/slab.h>
 #include <linux/debugfs.h>
+#include <linux/of.h>
 #include <linux/of_irq.h>
+#include <linux/platform_device.h>
 
 #include <asm/dcr.h>
 #include <asm/machdep.h>
diff --git a/arch/powerpc/platforms/cell/cbe_regs.c b/arch/powerpc/platforms/cell/cbe_regs.c
index fb4023f..99b3558 100644
--- a/arch/powerpc/platforms/cell/cbe_regs.c
+++ b/arch/powerpc/platforms/cell/cbe_regs.c
@@ -10,9 +10,8 @@
 #include <linux/percpu.h>
 #include <linux/types.h>
 #include <linux/export.h>
+#include <linux/of.h>
 #include <linux/of_address.h>
-#include <linux/of_device.h>
-#include <linux/of_platform.h>
 #include <linux/pgtable.h>
 
 #include <asm/io.h>
diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c
index 8c71330..1202a69 100644
--- a/arch/powerpc/platforms/cell/iommu.c
+++ b/arch/powerpc/platforms/cell/iommu.c
@@ -16,7 +16,7 @@
 #include <linux/notifier.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
-#include <linux/of_platform.h>
+#include <linux/platform_device.h>
 #include <linux/slab.h>
 #include <linux/memblock.h>
 
diff --git a/arch/powerpc/platforms/cell/ras.c b/arch/powerpc/platforms/cell/ras.c
index 98db63b..f6b8792 100644
--- a/arch/powerpc/platforms/cell/ras.c
+++ b/arch/powerpc/platforms/cell/ras.c
@@ -22,7 +22,7 @@
 #include <asm/cell-regs.h>
 
 #include "ras.h"
-
+#include "pervasive.h"
 
 static void dump_fir(int cpu)
 {
diff --git a/arch/powerpc/platforms/cell/setup.c b/arch/powerpc/platforms/cell/setup.c
index 9e07d10..f64a1ef 100644
--- a/arch/powerpc/platforms/cell/setup.c
+++ b/arch/powerpc/platforms/cell/setup.c
@@ -27,6 +27,7 @@
 #include <linux/mutex.h>
 #include <linux/memory_hotplug.h>
 #include <linux/of_platform.h>
+#include <linux/platform_device.h>
 
 #include <asm/mmu.h>
 #include <asm/processor.h>
diff --git a/arch/powerpc/platforms/cell/spider-pci.c b/arch/powerpc/platforms/cell/spider-pci.c
index e36ebd8..6843944 100644
--- a/arch/powerpc/platforms/cell/spider-pci.c
+++ b/arch/powerpc/platforms/cell/spider-pci.c
@@ -9,7 +9,6 @@
 
 #include <linux/kernel.h>
 #include <linux/of_address.h>
-#include <linux/of_platform.h>
 #include <linux/slab.h>
 #include <linux/io.h>
 
diff --git a/arch/powerpc/platforms/cell/spu_manage.c b/arch/powerpc/platforms/cell/spu_manage.c
index 74567b3..f464a1f 100644
--- a/arch/powerpc/platforms/cell/spu_manage.c
+++ b/arch/powerpc/platforms/cell/spu_manage.c
@@ -25,6 +25,7 @@
 
 #include "spufs/spufs.h"
 #include "interrupt.h"
+#include "spu_priv1_mmio.h"
 
 struct device_node *spu_devnode(struct spu *spu)
 {
diff --git a/arch/powerpc/platforms/embedded6xx/holly.c b/arch/powerpc/platforms/embedded6xx/holly.c
index 02ff260..ce9e58e 100644
--- a/arch/powerpc/platforms/embedded6xx/holly.c
+++ b/arch/powerpc/platforms/embedded6xx/holly.c
@@ -22,9 +22,9 @@
 #include <linux/serial.h>
 #include <linux/tty.h>
 #include <linux/serial_core.h>
+#include <linux/of.h>
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
-#include <linux/of_platform.h>
 #include <linux/extable.h>
 
 #include <asm/time.h>
diff --git a/arch/powerpc/platforms/maple/setup.c b/arch/powerpc/platforms/maple/setup.c
index a4a79d7..f329a03 100644
--- a/arch/powerpc/platforms/maple/setup.c
+++ b/arch/powerpc/platforms/maple/setup.c
@@ -36,8 +36,9 @@
 #include <linux/serial.h>
 #include <linux/smp.h>
 #include <linux/bitops.h>
+#include <linux/of.h>
 #include <linux/of_address.h>
-#include <linux/of_device.h>
+#include <linux/platform_device.h>
 #include <linux/memblock.h>
 
 #include <asm/processor.h>
diff --git a/arch/powerpc/platforms/pasemi/gpio_mdio.c b/arch/powerpc/platforms/pasemi/gpio_mdio.c
index 913b77b..fd130fe 100644
--- a/arch/powerpc/platforms/pasemi/gpio_mdio.c
+++ b/arch/powerpc/platforms/pasemi/gpio_mdio.c
@@ -20,7 +20,7 @@
 #include <linux/phy.h>
 #include <linux/of_address.h>
 #include <linux/of_mdio.h>
-#include <linux/of_platform.h>
+#include <linux/platform_device.h>
 
 #define DELAY 1
 
diff --git a/arch/powerpc/platforms/pasemi/pasemi.h b/arch/powerpc/platforms/pasemi/pasemi.h
index 3f277a2..018c306 100644
--- a/arch/powerpc/platforms/pasemi/pasemi.h
+++ b/arch/powerpc/platforms/pasemi/pasemi.h
@@ -4,6 +4,7 @@
 
 extern time64_t pas_get_boot_time(void);
 extern void pas_pci_init(void);
+struct pci_dev;
 extern void pas_pci_irq_fixup(struct pci_dev *dev);
 extern void pas_pci_dma_dev_setup(struct pci_dev *dev);
 
diff --git a/arch/powerpc/platforms/pasemi/setup.c b/arch/powerpc/platforms/pasemi/setup.c
index 5c5b4a0..ef985ba 100644
--- a/arch/powerpc/platforms/pasemi/setup.c
+++ b/arch/powerpc/platforms/pasemi/setup.c
@@ -16,7 +16,9 @@
 #include <linux/console.h>
 #include <linux/export.h>
 #include <linux/pci.h>
+#include <linux/of.h>
 #include <linux/of_platform.h>
+#include <linux/platform_device.h>
 #include <linux/gfp.h>
 #include <linux/irqdomain.h>
 
diff --git a/arch/powerpc/platforms/pasemi/time.c b/arch/powerpc/platforms/pasemi/time.c
index ad721882..70ac6db 100644
--- a/arch/powerpc/platforms/pasemi/time.c
+++ b/arch/powerpc/platforms/pasemi/time.c
@@ -9,6 +9,8 @@
 
 #include <asm/time.h>
 
+#include "pasemi.h"
+
 time64_t __init pas_get_boot_time(void)
 {
 	/* Let's just return a fake date right now */
diff --git a/arch/powerpc/platforms/powermac/feature.c b/arch/powerpc/platforms/powermac/feature.c
index ed58928..ae62d43 100644
--- a/arch/powerpc/platforms/powermac/feature.c
+++ b/arch/powerpc/platforms/powermac/feature.c
@@ -37,6 +37,8 @@
 #include <asm/pci-bridge.h>
 #include <asm/pmac_low_i2c.h>
 
+#include "pmac.h"
+
 #undef DEBUG_FEATURE
 
 #ifdef DEBUG_FEATURE
@@ -132,8 +134,10 @@ static struct pmac_mb_def pmac_mb;
  * Here are the chip specific feature functions
  */
 
-static inline int simple_feature_tweak(struct device_node *node, int type,
-				       int reg, u32 mask, int value)
+#ifndef CONFIG_PPC64
+
+static int simple_feature_tweak(struct device_node *node, int type, int reg,
+				u32 mask, int value)
 {
 	struct macio_chip*	macio;
 	unsigned long		flags;
@@ -152,8 +156,6 @@ static inline int simple_feature_tweak(struct device_node *node, int type,
 	return 0;
 }
 
-#ifndef CONFIG_PPC64
-
 static long ohare_htw_scc_enable(struct device_node *node, long param,
 				 long value)
 {
diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c
index 0c41f4b..6de1cd5 100644
--- a/arch/powerpc/platforms/powermac/setup.c
+++ b/arch/powerpc/platforms/powermac/setup.c
@@ -45,7 +45,7 @@
 #include <linux/root_dev.h>
 #include <linux/bitops.h>
 #include <linux/suspend.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/of_platform.h>
 
 #include <asm/reg.h>
diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c
index a83cb67..af3a5d3 100644
--- a/arch/powerpc/platforms/powernv/eeh-powernv.c
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -855,8 +855,7 @@ static int pnv_eeh_bridge_reset(struct pci_dev *pdev, int option)
 	struct pci_controller *hose = pci_bus_to_host(pdev->bus);
 	struct pnv_phb *phb = hose->private_data;
 	struct device_node *dn = pci_device_to_OF_node(pdev);
-	uint64_t id = PCI_SLOT_ID(phb->opal_id,
-				  (pdev->bus->number << 8) | pdev->devfn);
+	uint64_t id = PCI_SLOT_ID(phb->opal_id, pci_dev_id(pdev));
 	uint8_t scope;
 	int64_t rc;
 
diff --git a/arch/powerpc/platforms/powernv/ocxl.c b/arch/powerpc/platforms/powernv/ocxl.c
index 6290677..64a9c71 100644
--- a/arch/powerpc/platforms/powernv/ocxl.c
+++ b/arch/powerpc/platforms/powernv/ocxl.c
@@ -449,7 +449,7 @@ int pnv_ocxl_spa_setup(struct pci_dev *dev, void *spa_mem, int PE_mask,
 	if (!data)
 		return -ENOMEM;
 
-	bdfn = (dev->bus->number << 8) | dev->devfn;
+	bdfn = pci_dev_id(dev);
 	rc = opal_npu_spa_setup(phb->opal_id, bdfn, virt_to_phys(spa_mem),
 				PE_mask);
 	if (rc) {
diff --git a/arch/powerpc/platforms/powernv/opal-imc.c b/arch/powerpc/platforms/powernv/opal-imc.c
index 348a8cd..828fc4d 100644
--- a/arch/powerpc/platforms/powernv/opal-imc.c
+++ b/arch/powerpc/platforms/powernv/opal-imc.c
@@ -11,7 +11,6 @@
 #include <linux/platform_device.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
-#include <linux/of_platform.h>
 #include <linux/crash_dump.h>
 #include <linux/debugfs.h>
 #include <asm/opal.h>
diff --git a/arch/powerpc/platforms/powernv/opal-prd.c b/arch/powerpc/platforms/powernv/opal-prd.c
index 113bdb1..327e2f7 100644
--- a/arch/powerpc/platforms/powernv/opal-prd.c
+++ b/arch/powerpc/platforms/powernv/opal-prd.c
@@ -24,13 +24,20 @@
 #include <linux/uaccess.h>
 
 
+struct opal_prd_msg {
+	union {
+		struct opal_prd_msg_header header;
+		DECLARE_FLEX_ARRAY(u8, data);
+	};
+};
+
 /*
  * The msg member must be at the end of the struct, as it's followed by the
  * message data.
  */
 struct opal_prd_msg_queue_item {
-	struct list_head		list;
-	struct opal_prd_msg_header	msg;
+	struct list_head	list;
+	struct opal_prd_msg	msg;
 };
 
 static struct device_node *prd_node;
@@ -156,7 +163,7 @@ static ssize_t opal_prd_read(struct file *file, char __user *buf,
 	int rc;
 
 	/* we need at least a header's worth of data */
-	if (count < sizeof(item->msg))
+	if (count < sizeof(item->msg.header))
 		return -EINVAL;
 
 	if (*ppos)
@@ -186,7 +193,7 @@ static ssize_t opal_prd_read(struct file *file, char __user *buf,
 			return -EINTR;
 	}
 
-	size = be16_to_cpu(item->msg.size);
+	size = be16_to_cpu(item->msg.header.size);
 	if (size > count) {
 		err = -EINVAL;
 		goto err_requeue;
@@ -214,8 +221,8 @@ static ssize_t opal_prd_write(struct file *file, const char __user *buf,
 		size_t count, loff_t *ppos)
 {
 	struct opal_prd_msg_header hdr;
+	struct opal_prd_msg *msg;
 	ssize_t size;
-	void *msg;
 	int rc;
 
 	size = sizeof(hdr);
@@ -247,12 +254,12 @@ static ssize_t opal_prd_write(struct file *file, const char __user *buf,
 
 static int opal_prd_release(struct inode *inode, struct file *file)
 {
-	struct opal_prd_msg_header msg;
+	struct opal_prd_msg msg;
 
-	msg.size = cpu_to_be16(sizeof(msg));
-	msg.type = OPAL_PRD_MSG_TYPE_FINI;
+	msg.header.size = cpu_to_be16(sizeof(msg));
+	msg.header.type = OPAL_PRD_MSG_TYPE_FINI;
 
-	opal_prd_msg((struct opal_prd_msg *)&msg);
+	opal_prd_msg(&msg);
 
 	atomic_xchg(&prd_usage, 0);
 
@@ -352,7 +359,7 @@ static int opal_prd_msg_notifier(struct notifier_block *nb,
 	if (!item)
 		return -ENOMEM;
 
-	memcpy(&item->msg, msg->params, msg_size);
+	memcpy(&item->msg.data, msg->params, msg_size);
 
 	spin_lock_irqsave(&opal_prd_msg_queue_lock, flags);
 	list_add_tail(&item->list, &opal_prd_msg_queue);
diff --git a/arch/powerpc/platforms/powernv/opal-rtc.c b/arch/powerpc/platforms/powernv/opal-rtc.c
index a9bcf92..79011a2 100644
--- a/arch/powerpc/platforms/powernv/opal-rtc.c
+++ b/arch/powerpc/platforms/powernv/opal-rtc.c
@@ -11,8 +11,9 @@
 #include <linux/bcd.h>
 #include <linux/rtc.h>
 #include <linux/delay.h>
-#include <linux/platform_device.h>
+#include <linux/of.h>
 #include <linux/of_platform.h>
+#include <linux/platform_device.h>
 
 #include <asm/opal.h>
 #include <asm/firmware.h>
diff --git a/arch/powerpc/platforms/powernv/opal-secvar.c b/arch/powerpc/platforms/powernv/opal-secvar.c
index a8436bf..6ac410f 100644
--- a/arch/powerpc/platforms/powernv/opal-secvar.c
+++ b/arch/powerpc/platforms/powernv/opal-secvar.c
@@ -12,8 +12,8 @@
 #define pr_fmt(fmt) "secvar: "fmt
 
 #include <linux/types.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
-#include <linux/of_platform.h>
 #include <asm/opal.h>
 #include <asm/secvar.h>
 #include <asm/secure_boot.h>
diff --git a/arch/powerpc/platforms/powernv/opal-sensor.c b/arch/powerpc/platforms/powernv/opal-sensor.c
index 3192c61..8880a1c 100644
--- a/arch/powerpc/platforms/powernv/opal-sensor.c
+++ b/arch/powerpc/platforms/powernv/opal-sensor.c
@@ -6,7 +6,9 @@
  */
 
 #include <linux/delay.h>
+#include <linux/of.h>
 #include <linux/of_platform.h>
+#include <linux/platform_device.h>
 #include <asm/opal.h>
 #include <asm/machdep.h>
 
diff --git a/arch/powerpc/platforms/powernv/opal-xscom.c b/arch/powerpc/platforms/powernv/opal-xscom.c
index 6b4eed2..262cd6f 100644
--- a/arch/powerpc/platforms/powernv/opal-xscom.c
+++ b/arch/powerpc/platforms/powernv/opal-xscom.c
@@ -168,7 +168,7 @@ static int scom_debug_init_one(struct dentry *root, struct device_node *dn,
 	ent->path.size = strlen((char *)ent->path.data);
 
 	dir = debugfs_create_dir(ent->name, root);
-	if (!dir) {
+	if (IS_ERR(dir)) {
 		kfree(ent->path.data);
 		kfree(ent);
 		return -1;
@@ -190,7 +190,7 @@ static int scom_debug_init(void)
 		return 0;
 
 	root = debugfs_create_dir("scom", arch_debugfs_dir);
-	if (!root)
+	if (IS_ERR(root))
 		return -1;
 
 	rc = 0;
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index cb63782..28fac47 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -997,14 +997,14 @@ static void pnv_pci_ioda_dma_dev_setup(struct pci_dev *pdev)
 	struct pnv_ioda_pe *pe;
 
 	/* Check if the BDFN for this device is associated with a PE yet */
-	pe = pnv_pci_bdfn_to_pe(phb, pdev->devfn | (pdev->bus->number << 8));
+	pe = pnv_pci_bdfn_to_pe(phb, pci_dev_id(pdev));
 	if (!pe) {
 		/* VF PEs should be pre-configured in pnv_pci_sriov_enable() */
 		if (WARN_ON(pdev->is_virtfn))
 			return;
 
 		pnv_pci_configure_bus(pdev->bus);
-		pe = pnv_pci_bdfn_to_pe(phb, pdev->devfn | (pdev->bus->number << 8));
+		pe = pnv_pci_bdfn_to_pe(phb, pci_dev_id(pdev));
 		pci_info(pdev, "Configured PE#%x\n", pe ? pe->pe_number : 0xfffff);
 
 
@@ -2526,7 +2526,7 @@ static struct iommu_group *pnv_pci_device_group(struct pci_controller *hose,
 	if (WARN_ON(!phb))
 		return ERR_PTR(-ENODEV);
 
-	pe = pnv_pci_bdfn_to_pe(phb, pdev->devfn | (pdev->bus->number << 8));
+	pe = pnv_pci_bdfn_to_pe(phb, pci_dev_id(pdev));
 	if (!pe)
 		return ERR_PTR(-ENODEV);
 
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index 5e9c6b5..4dbb47d 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -482,15 +482,7 @@ static void pnv_kexec_cpu_down(int crash_shutdown, int secondary)
 #ifdef CONFIG_MEMORY_HOTPLUG
 static unsigned long pnv_memory_block_size(void)
 {
-	/*
-	 * We map the kernel linear region with 1GB large pages on radix. For
-	 * memory hot unplug to work our memory block size must be at least
-	 * this size.
-	 */
-	if (radix_enabled())
-		return radix_mem_block_size;
-	else
-		return 256UL * 1024 * 1024;
+	return memory_block_size;
 }
 #endif
 
diff --git a/arch/powerpc/platforms/ps3/repository.c b/arch/powerpc/platforms/ps3/repository.c
index 2057630..1abe33f 100644
--- a/arch/powerpc/platforms/ps3/repository.c
+++ b/arch/powerpc/platforms/ps3/repository.c
@@ -73,9 +73,9 @@ static void _dump_node(unsigned int lpar_id, u64 n1, u64 n2, u64 n3, u64 n4,
 
 static u64 make_first_field(const char *text, u64 index)
 {
-	u64 n;
+	u64 n = 0;
 
-	strncpy((char *)&n, text, 8);
+	memcpy((char *)&n, text, strnlen(text, sizeof(n)));
 	return PS3_VENDOR_ID_NONE + (n >> 32) + index;
 }
 
diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index 1a3cb31..e62835a 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -398,6 +398,14 @@ static int dlpar_online_cpu(struct device_node *dn)
 		for_each_present_cpu(cpu) {
 			if (get_hard_smp_processor_id(cpu) != thread)
 				continue;
+
+			if (!topology_is_primary_thread(cpu)) {
+				if (cpu_smt_control != CPU_SMT_ENABLED)
+					break;
+				if (!topology_smt_thread_allowed(cpu))
+					break;
+			}
+
 			cpu_maps_update_done();
 			find_and_update_cpu_nid(cpu);
 			rc = device_online(get_cpu_device(cpu));
@@ -845,15 +853,9 @@ static struct notifier_block pseries_smp_nb = {
 	.notifier_call = pseries_smp_notifier,
 };
 
-static int __init pseries_cpu_hotplug_init(void)
+void __init pseries_cpu_hotplug_init(void)
 {
 	int qcss_tok;
-	unsigned int node;
-
-#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
-	ppc_md.cpu_probe = dlpar_cpu_probe;
-	ppc_md.cpu_release = dlpar_cpu_release;
-#endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */
 
 	rtas_stop_self_token = rtas_function_token(RTAS_FN_STOP_SELF);
 	qcss_tok = rtas_function_token(RTAS_FN_QUERY_CPU_STOPPED_STATE);
@@ -862,12 +864,22 @@ static int __init pseries_cpu_hotplug_init(void)
 			qcss_tok == RTAS_UNKNOWN_SERVICE) {
 		printk(KERN_INFO "CPU Hotplug not supported by firmware "
 				"- disabling.\n");
-		return 0;
+		return;
 	}
 
 	smp_ops->cpu_offline_self = pseries_cpu_offline_self;
 	smp_ops->cpu_disable = pseries_cpu_disable;
 	smp_ops->cpu_die = pseries_cpu_die;
+}
+
+static int __init pseries_dlpar_init(void)
+{
+	unsigned int node;
+
+#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
+	ppc_md.cpu_probe = dlpar_cpu_probe;
+	ppc_md.cpu_release = dlpar_cpu_release;
+#endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */
 
 	/* Processors can be added/removed only on LPAR */
 	if (firmware_has_feature(FW_FEATURE_LPAR)) {
@@ -886,4 +898,4 @@ static int __init pseries_cpu_hotplug_init(void)
 
 	return 0;
 }
-machine_arch_initcall(pseries, pseries_cpu_hotplug_init);
+machine_arch_initcall(pseries, pseries_dlpar_init);
diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
index 4f3d6a2..aa4042d 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -21,54 +21,6 @@
 #include <asm/drmem.h>
 #include "pseries.h"
 
-unsigned long pseries_memory_block_size(void)
-{
-	struct device_node *np;
-	u64 memblock_size = MIN_MEMORY_BLOCK_SIZE;
-	struct resource r;
-
-	np = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
-	if (np) {
-		int len;
-		int size_cells;
-		const __be32 *prop;
-
-		size_cells = of_n_size_cells(np);
-
-		prop = of_get_property(np, "ibm,lmb-size", &len);
-		if (prop && len >= size_cells * sizeof(__be32))
-			memblock_size = of_read_number(prop, size_cells);
-		of_node_put(np);
-
-	} else  if (machine_is(pseries)) {
-		/* This fallback really only applies to pseries */
-		unsigned int memzero_size = 0;
-
-		np = of_find_node_by_path("/memory@0");
-		if (np) {
-			if (!of_address_to_resource(np, 0, &r))
-				memzero_size = resource_size(&r);
-			of_node_put(np);
-		}
-
-		if (memzero_size) {
-			/* We now know the size of memory@0, use this to find
-			 * the first memoryblock and get its size.
-			 */
-			char buf[64];
-
-			sprintf(buf, "/memory@%x", memzero_size);
-			np = of_find_node_by_path(buf);
-			if (np) {
-				if (!of_address_to_resource(np, 0, &r))
-					memblock_size = resource_size(&r);
-				of_node_put(np);
-			}
-		}
-	}
-	return memblock_size;
-}
-
 static void dlpar_free_property(struct property *prop)
 {
 	kfree(prop->name);
@@ -283,7 +235,7 @@ static int dlpar_offline_lmb(struct drmem_lmb *lmb)
 
 static int pseries_remove_memblock(unsigned long base, unsigned long memblock_size)
 {
-	unsigned long block_sz, start_pfn;
+	unsigned long start_pfn;
 	int sections_per_block;
 	int i;
 
@@ -294,8 +246,7 @@ static int pseries_remove_memblock(unsigned long base, unsigned long memblock_si
 	if (!pfn_valid(start_pfn))
 		goto out;
 
-	block_sz = pseries_memory_block_size();
-	sections_per_block = block_sz / MIN_MEMORY_BLOCK_SIZE;
+	sections_per_block = memory_block_size / MIN_MEMORY_BLOCK_SIZE;
 
 	for (i = 0; i < sections_per_block; i++) {
 		__remove_memory(base, MIN_MEMORY_BLOCK_SIZE);
@@ -354,7 +305,6 @@ static int dlpar_add_lmb(struct drmem_lmb *);
 static int dlpar_remove_lmb(struct drmem_lmb *lmb)
 {
 	struct memory_block *mem_block;
-	unsigned long block_sz;
 	int rc;
 
 	if (!lmb_is_removable(lmb))
@@ -370,13 +320,11 @@ static int dlpar_remove_lmb(struct drmem_lmb *lmb)
 		return rc;
 	}
 
-	block_sz = pseries_memory_block_size();
-
-	__remove_memory(lmb->base_addr, block_sz);
+	__remove_memory(lmb->base_addr, memory_block_size);
 	put_device(&mem_block->dev);
 
 	/* Update memory regions for memory remove */
-	memblock_remove(lmb->base_addr, block_sz);
+	memblock_remove(lmb->base_addr, memory_block_size);
 
 	invalidate_lmb_associativity_index(lmb);
 	lmb->flags &= ~DRCONF_MEM_ASSIGNED;
diff --git a/arch/powerpc/platforms/pseries/hvCall.S b/arch/powerpc/platforms/pseries/hvCall.S
index 35254ac..bae45b3 100644
--- a/arch/powerpc/platforms/pseries/hvCall.S
+++ b/arch/powerpc/platforms/pseries/hvCall.S
@@ -91,7 +91,7 @@
 	b	1f;						\
 END_FTR_SECTION(0, 1);						\
 	LOAD_REG_ADDR(r12, hcall_tracepoint_refcount) ;		\
-	std	r12,32(r1);					\
+	ld	r12,0(r12);					\
 	cmpdi	r12,0;						\
 	bne-	LABEL;						\
 1:
diff --git a/arch/powerpc/platforms/pseries/ibmebus.c b/arch/powerpc/platforms/pseries/ibmebus.c
index 44703f1..998e3af 100644
--- a/arch/powerpc/platforms/pseries/ibmebus.c
+++ b/arch/powerpc/platforms/pseries/ibmebus.c
@@ -47,6 +47,7 @@
 #include <linux/slab.h>
 #include <linux/stat.h>
 #include <linux/of_platform.h>
+#include <linux/platform_device.h>
 #include <asm/ibmebus.h>
 #include <asm/machdep.h>
 
@@ -460,6 +461,7 @@ static int __init ibmebus_bus_init(void)
 	if (err) {
 		printk(KERN_WARNING "%s: device_register returned %i\n",
 		       __func__, err);
+		put_device(&ibmebus_bus_device);
 		bus_unregister(&ibmebus_bus_type);
 
 		return err;
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index d593a72..16d93b5 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -395,8 +395,6 @@ static LIST_HEAD(dma_win_list);
 static DEFINE_SPINLOCK(dma_win_list_lock);
 /* protects initializing window twice for same device */
 static DEFINE_MUTEX(dma_win_init_mutex);
-#define DIRECT64_PROPNAME "linux,direct64-ddr-window-info"
-#define DMA64_PROPNAME "linux,dma64-ddr-window-info"
 
 static int tce_clearrange_multi_pSeriesLP(unsigned long start_pfn,
 					unsigned long num_pfn, const void *arg)
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 2eab323..f2cb621 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -41,6 +41,7 @@
 #include <asm/kexec.h>
 #include <asm/fadump.h>
 #include <asm/dtl.h>
+#include <asm/vphn.h>
 
 #include "pseries.h"
 
@@ -639,16 +640,8 @@ static const struct proc_ops vcpudispatch_stats_freq_proc_ops = {
 
 static int __init vcpudispatch_stats_procfs_init(void)
 {
-	/*
-	 * Avoid smp_processor_id while preemptible. All CPUs should have
-	 * the same value for lppaca_shared_proc.
-	 */
-	preempt_disable();
-	if (!lppaca_shared_proc(get_lppaca())) {
-		preempt_enable();
+	if (!lppaca_shared_proc())
 		return 0;
-	}
-	preempt_enable();
 
 	if (!proc_create("powerpc/vcpudispatch_stats", 0600, NULL,
 					&vcpudispatch_stats_proc_ops))
diff --git a/arch/powerpc/platforms/pseries/lparcfg.c b/arch/powerpc/platforms/pseries/lparcfg.c
index 8acc705..1c151d7 100644
--- a/arch/powerpc/platforms/pseries/lparcfg.c
+++ b/arch/powerpc/platforms/pseries/lparcfg.c
@@ -206,7 +206,7 @@ static void parse_ppp_data(struct seq_file *m)
 	           ppp_data.active_system_procs);
 
 	/* pool related entries are appropriate for shared configs */
-	if (lppaca_shared_proc(get_lppaca())) {
+	if (lppaca_shared_proc()) {
 		unsigned long pool_idle_time, pool_procs;
 
 		seq_printf(m, "pool=%d\n", ppp_data.pool_num);
@@ -560,7 +560,7 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v)
 		   partition_potential_processors);
 
 	seq_printf(m, "shared_processor_mode=%d\n",
-		   lppaca_shared_proc(get_lppaca()));
+		   lppaca_shared_proc());
 
 #ifdef CONFIG_PPC_64S_HASH_MMU
 	if (!radix_enabled())
diff --git a/arch/powerpc/platforms/pseries/plpks.c b/arch/powerpc/platforms/pseries/plpks.c
index b0658ea..2d40304 100644
--- a/arch/powerpc/platforms/pseries/plpks.c
+++ b/arch/powerpc/platforms/pseries/plpks.c
@@ -194,7 +194,7 @@ static struct plpks_auth *construct_auth(u8 consumer)
 	return auth;
 }
 
-/**
+/*
  * Label is combination of label attributes + name.
  * Label attributes are used internally by kernel and not exposed to the user.
  */
diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h
index f8bce40..8376f03 100644
--- a/arch/powerpc/platforms/pseries/pseries.h
+++ b/arch/powerpc/platforms/pseries/pseries.h
@@ -75,11 +75,13 @@ static inline int dlpar_hp_pmem(struct pseries_hp_errorlog *hp_elog)
 
 #ifdef CONFIG_HOTPLUG_CPU
 int dlpar_cpu(struct pseries_hp_errorlog *hp_elog);
+void pseries_cpu_hotplug_init(void);
 #else
 static inline int dlpar_cpu(struct pseries_hp_errorlog *hp_elog)
 {
 	return -EOPNOTSUPP;
 }
+static inline void pseries_cpu_hotplug_init(void) { }
 #endif
 
 /* PCI root bridge prepare function override for pseries */
@@ -90,8 +92,6 @@ extern struct pci_controller_ops pseries_pci_controller_ops;
 int pseries_msi_allocate_domains(struct pci_controller *phb);
 void pseries_msi_free_domains(struct pci_controller *phb);
 
-unsigned long pseries_memory_block_size(void);
-
 extern int CMO_PrPSP;
 extern int CMO_SecPSP;
 extern unsigned long CMO_PageSize;
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index e2a57cf..ecea85c 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -816,6 +816,8 @@ static void __init pSeries_setup_arch(void)
 	/* Discover PIC type and setup ppc_md accordingly */
 	smp_init_pseries();
 
+	// Setup CPU hotplug callbacks
+	pseries_cpu_hotplug_init();
 
 	if (radix_enabled() && !mmu_has_feature(MMU_FTR_GTSE))
 		if (!firmware_has_feature(FW_FEATURE_RPT_INVALIDATE))
@@ -847,7 +849,7 @@ static void __init pSeries_setup_arch(void)
 	if (firmware_has_feature(FW_FEATURE_LPAR)) {
 		vpa_init(boot_cpuid);
 
-		if (lppaca_shared_proc(get_lppaca())) {
+		if (lppaca_shared_proc()) {
 			static_branch_enable(&shared_processor);
 			pv_spinlocks_init();
 #ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
@@ -1116,6 +1118,13 @@ static int pSeries_pci_probe_mode(struct pci_bus *bus)
 	return PCI_PROBE_NORMAL;
 }
 
+#ifdef CONFIG_MEMORY_HOTPLUG
+static unsigned long pseries_memory_block_size(void)
+{
+	return memory_block_size;
+}
+#endif
+
 struct pci_controller_ops pseries_pci_controller_ops = {
 	.probe_mode		= pSeries_pci_probe_mode,
 #ifdef CONFIG_SPAPR_TCE_IOMMU
diff --git a/arch/powerpc/platforms/pseries/vas.c b/arch/powerpc/platforms/pseries/vas.c
index 3fbc2a6..e25ac52 100644
--- a/arch/powerpc/platforms/pseries/vas.c
+++ b/arch/powerpc/platforms/pseries/vas.c
@@ -17,6 +17,7 @@
 #include <asm/hvcall.h>
 #include <asm/plpar_wrappers.h>
 #include <asm/firmware.h>
+#include <asm/vphn.h>
 #include <asm/vas.h>
 #include "vas.h"
 
diff --git a/arch/powerpc/platforms/pseries/vphn.c b/arch/powerpc/platforms/pseries/vphn.c
index cca474a..3f85ece 100644
--- a/arch/powerpc/platforms/pseries/vphn.c
+++ b/arch/powerpc/platforms/pseries/vphn.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <asm/byteorder.h>
-#include <asm/lppaca.h>
+#include <asm/vphn.h>
 
 /*
  * The associativity domain numbers are returned from the hypervisor as a
diff --git a/arch/powerpc/sysdev/cpm2.c b/arch/powerpc/sysdev/cpm2.c
index 915f4d3..14cc5ea 100644
--- a/arch/powerpc/sysdev/cpm2.c
+++ b/arch/powerpc/sysdev/cpm2.c
@@ -37,11 +37,9 @@
 
 #include <asm/io.h>
 #include <asm/irq.h>
-#include <asm/mpc8260.h>
 #include <asm/page.h>
 #include <asm/cpm2.h>
 #include <asm/rheap.h>
-#include <asm/fs_pd.h>
 
 #include <sysdev/fsl_soc.h>
 
@@ -119,9 +117,9 @@ void __cpm2_setbrg(uint brg, uint rate, uint clk, int div16, int src)
 	/* This is good enough to get SMCs running.....
 	*/
 	if (brg < 4) {
-		bp = cpm2_map_size(im_brgc1, 16);
+		bp = &cpm2_immr->im_brgc1;
 	} else {
-		bp = cpm2_map_size(im_brgc5, 16);
+		bp = &cpm2_immr->im_brgc5;
 		brg -= 4;
 	}
 	bp += brg;
@@ -131,7 +129,6 @@ void __cpm2_setbrg(uint brg, uint rate, uint clk, int div16, int src)
 		val |= CPM_BRG_DIV16;
 
 	out_be32(bp, val);
-	cpm2_unmap(bp);
 }
 EXPORT_SYMBOL(__cpm2_setbrg);
 
@@ -140,7 +137,6 @@ int __init cpm2_clk_setup(enum cpm_clk_target target, int clock, int mode)
 	int ret = 0;
 	int shift;
 	int i, bits = 0;
-	cpmux_t __iomem *im_cpmux;
 	u32 __iomem *reg;
 	u32 mask = 7;
 
@@ -203,35 +199,33 @@ int __init cpm2_clk_setup(enum cpm_clk_target target, int clock, int mode)
 		{CPM_CLK_SCC4, CPM_CLK8, 7},
 	};
 
-	im_cpmux = cpm2_map(im_cpmux);
-
 	switch (target) {
 	case CPM_CLK_SCC1:
-		reg = &im_cpmux->cmx_scr;
+		reg = &cpm2_immr->im_cpmux.cmx_scr;
 		shift = 24;
 		break;
 	case CPM_CLK_SCC2:
-		reg = &im_cpmux->cmx_scr;
+		reg = &cpm2_immr->im_cpmux.cmx_scr;
 		shift = 16;
 		break;
 	case CPM_CLK_SCC3:
-		reg = &im_cpmux->cmx_scr;
+		reg = &cpm2_immr->im_cpmux.cmx_scr;
 		shift = 8;
 		break;
 	case CPM_CLK_SCC4:
-		reg = &im_cpmux->cmx_scr;
+		reg = &cpm2_immr->im_cpmux.cmx_scr;
 		shift = 0;
 		break;
 	case CPM_CLK_FCC1:
-		reg = &im_cpmux->cmx_fcr;
+		reg = &cpm2_immr->im_cpmux.cmx_fcr;
 		shift = 24;
 		break;
 	case CPM_CLK_FCC2:
-		reg = &im_cpmux->cmx_fcr;
+		reg = &cpm2_immr->im_cpmux.cmx_fcr;
 		shift = 16;
 		break;
 	case CPM_CLK_FCC3:
-		reg = &im_cpmux->cmx_fcr;
+		reg = &cpm2_immr->im_cpmux.cmx_fcr;
 		shift = 8;
 		break;
 	default:
@@ -261,7 +255,6 @@ int __init cpm2_clk_setup(enum cpm_clk_target target, int clock, int mode)
 
 	out_be32(reg, (in_be32(reg) & ~mask) | bits);
 
-	cpm2_unmap(im_cpmux);
 	return ret;
 }
 
@@ -270,7 +263,6 @@ int __init cpm2_smc_clk_setup(enum cpm_clk_target target, int clock)
 	int ret = 0;
 	int shift;
 	int i, bits = 0;
-	cpmux_t __iomem *im_cpmux;
 	u8 __iomem *reg;
 	u8 mask = 3;
 
@@ -285,16 +277,14 @@ int __init cpm2_smc_clk_setup(enum cpm_clk_target target, int clock)
 		{CPM_CLK_SMC2, CPM_CLK15, 3},
 	};
 
-	im_cpmux = cpm2_map(im_cpmux);
-
 	switch (target) {
 	case CPM_CLK_SMC1:
-		reg = &im_cpmux->cmx_smr;
+		reg = &cpm2_immr->im_cpmux.cmx_smr;
 		mask = 3;
 		shift = 4;
 		break;
 	case CPM_CLK_SMC2:
-		reg = &im_cpmux->cmx_smr;
+		reg = &cpm2_immr->im_cpmux.cmx_smr;
 		mask = 3;
 		shift = 0;
 		break;
@@ -317,7 +307,6 @@ int __init cpm2_smc_clk_setup(enum cpm_clk_target target, int clock)
 
 	out_8(reg, (in_8(reg) & ~mask) | bits);
 
-	cpm2_unmap(im_cpmux);
 	return ret;
 }
 
diff --git a/arch/powerpc/sysdev/cpm2_pic.c b/arch/powerpc/sysdev/cpm2_pic.c
index cb9ba4e..e144936 100644
--- a/arch/powerpc/sysdev/cpm2_pic.c
+++ b/arch/powerpc/sysdev/cpm2_pic.c
@@ -33,9 +33,7 @@
 #include <linux/irqdomain.h>
 
 #include <asm/immap_cpm2.h>
-#include <asm/mpc8260.h>
 #include <asm/io.h>
-#include <asm/fs_pd.h>
 
 #include "cpm2_pic.h"
 
@@ -231,7 +229,7 @@ void cpm2_pic_init(struct device_node *node)
 {
 	int i;
 
-	cpm2_intctl = cpm2_map(im_intctl);
+	cpm2_intctl = &cpm2_immr->im_intctl;
 
 	/* Clear the CPM IRQ controller, in case it has any bits set
 	 * from the bootloader
diff --git a/arch/powerpc/sysdev/cpm_common.c b/arch/powerpc/sysdev/cpm_common.c
index 8234013..47db732 100644
--- a/arch/powerpc/sysdev/cpm_common.c
+++ b/arch/powerpc/sysdev/cpm_common.c
@@ -15,11 +15,9 @@
  */
 
 #include <linux/init.h>
-#include <linux/of_device.h>
 #include <linux/spinlock.h>
 #include <linux/export.h>
 #include <linux/of.h>
-#include <linux/of_address.h>
 #include <linux/slab.h>
 
 #include <asm/udbg.h>
diff --git a/arch/powerpc/sysdev/cpm_gpio.c b/arch/powerpc/sysdev/cpm_gpio.c
index 0695d26..40f5711 100644
--- a/arch/powerpc/sysdev/cpm_gpio.c
+++ b/arch/powerpc/sysdev/cpm_gpio.c
@@ -9,7 +9,8 @@
  */
 
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
 
 #include <asm/cpm.h>
 #ifdef CONFIG_8xx_GPIO
diff --git a/arch/powerpc/sysdev/dcr-low.S b/arch/powerpc/sysdev/dcr-low.S
index 329b9c4..e8401b2 100644
--- a/arch/powerpc/sysdev/dcr-low.S
+++ b/arch/powerpc/sysdev/dcr-low.S
@@ -5,10 +5,10 @@
  * Copyright (c) 2004 Eugene Surovegin <ebs@ebshome.net>
  */
 
+#include <linux/export.h>
 #include <asm/ppc_asm.h>
 #include <asm/processor.h>
 #include <asm/bug.h>
-#include <asm/export.h>
 
 #define DCR_ACCESS_PROLOG(table) \
 	cmplwi	cr0,r3,1024;	 \
diff --git a/arch/powerpc/sysdev/ehv_pic.c b/arch/powerpc/sysdev/ehv_pic.c
index c7327b8..0408276 100644
--- a/arch/powerpc/sysdev/ehv_pic.c
+++ b/arch/powerpc/sysdev/ehv_pic.c
@@ -42,33 +42,33 @@ static u32 __iomem *mpic_percpu_base_vaddr;
  * Linux descriptor level callbacks
  */
 
-void ehv_pic_unmask_irq(struct irq_data *d)
+static void ehv_pic_unmask_irq(struct irq_data *d)
 {
 	unsigned int src = virq_to_hw(d->irq);
 
 	ev_int_set_mask(src, 0);
 }
 
-void ehv_pic_mask_irq(struct irq_data *d)
+static void ehv_pic_mask_irq(struct irq_data *d)
 {
 	unsigned int src = virq_to_hw(d->irq);
 
 	ev_int_set_mask(src, 1);
 }
 
-void ehv_pic_end_irq(struct irq_data *d)
+static void ehv_pic_end_irq(struct irq_data *d)
 {
 	unsigned int src = virq_to_hw(d->irq);
 
 	ev_int_eoi(src);
 }
 
-void ehv_pic_direct_end_irq(struct irq_data *d)
+static void ehv_pic_direct_end_irq(struct irq_data *d)
 {
 	out_be32(mpic_percpu_base_vaddr + MPIC_EOI / 4, 0);
 }
 
-int ehv_pic_set_affinity(struct irq_data *d, const struct cpumask *dest,
+static int ehv_pic_set_affinity(struct irq_data *d, const struct cpumask *dest,
 			 bool force)
 {
 	unsigned int src = virq_to_hw(d->irq);
@@ -109,7 +109,7 @@ static unsigned int ehv_pic_type_to_vecpri(unsigned int type)
 	}
 }
 
-int ehv_pic_set_irq_type(struct irq_data *d, unsigned int flow_type)
+static int ehv_pic_set_irq_type(struct irq_data *d, unsigned int flow_type)
 {
 	unsigned int src = virq_to_hw(d->irq);
 	unsigned int vecpri, vold, vnew, prio, cpu_dest;
diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c
index 6daf620..3868483 100644
--- a/arch/powerpc/sysdev/fsl_pci.c
+++ b/arch/powerpc/sysdev/fsl_pci.c
@@ -519,7 +519,7 @@ void fsl_pcibios_fixup_bus(struct pci_bus *bus)
 	}
 }
 
-int fsl_add_bridge(struct platform_device *pdev, int is_primary)
+static int fsl_add_bridge(struct platform_device *pdev, int is_primary)
 {
 	int len;
 	struct pci_controller *hose;
@@ -767,7 +767,7 @@ static int __init mpc83xx_pcie_setup(struct pci_controller *hose,
 	u32 cfg_bar;
 	int ret = -ENOMEM;
 
-	pcie = zalloc_maybe_bootmem(sizeof(*pcie), GFP_KERNEL);
+	pcie = kzalloc(sizeof(*pcie), GFP_KERNEL);
 	if (!pcie)
 		return ret;
 
diff --git a/arch/powerpc/sysdev/fsl_pci.h b/arch/powerpc/sysdev/fsl_pci.h
index 093a875..3bc4ab9 100644
--- a/arch/powerpc/sysdev/fsl_pci.h
+++ b/arch/powerpc/sysdev/fsl_pci.h
@@ -112,7 +112,6 @@ struct ccsr_pci {
 
 };
 
-extern int fsl_add_bridge(struct platform_device *pdev, int is_primary);
 extern void fsl_pcibios_fixup_bus(struct pci_bus *bus);
 extern void fsl_pcibios_fixup_phb(struct pci_controller *phb);
 extern int mpc83xx_add_bridge(struct device_node *dev);
diff --git a/arch/powerpc/sysdev/fsl_pmc.c b/arch/powerpc/sysdev/fsl_pmc.c
index 76896de..9f6dd11 100644
--- a/arch/powerpc/sysdev/fsl_pmc.c
+++ b/arch/powerpc/sysdev/fsl_pmc.c
@@ -13,9 +13,9 @@
 #include <linux/export.h>
 #include <linux/suspend.h>
 #include <linux/delay.h>
-#include <linux/device.h>
+#include <linux/mod_devicetable.h>
 #include <linux/of_address.h>
-#include <linux/of_platform.h>
+#include <linux/platform_device.h>
 
 struct pmc_regs {
 	__be32 devdisr;
diff --git a/arch/powerpc/sysdev/fsl_rio.c b/arch/powerpc/sysdev/fsl_rio.c
index 0331962..f9b214b 100644
--- a/arch/powerpc/sysdev/fsl_rio.c
+++ b/arch/powerpc/sysdev/fsl_rio.c
@@ -23,16 +23,17 @@
 #include <linux/types.h>
 #include <linux/dma-mapping.h>
 #include <linux/interrupt.h>
-#include <linux/device.h>
+#include <linux/of.h>
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
-#include <linux/of_platform.h>
+#include <linux/platform_device.h>
 #include <linux/delay.h>
 #include <linux/slab.h>
 
 #include <linux/io.h>
 #include <linux/uaccess.h>
 #include <asm/machdep.h>
+#include <asm/rio.h>
 
 #include "fsl_rio.h"
 
@@ -303,8 +304,8 @@ static void fsl_rio_inbound_mem_init(struct rio_priv *priv)
 		out_be32(&priv->inb_atmu_regs[i].riwar, 0);
 }
 
-int fsl_map_inb_mem(struct rio_mport *mport, dma_addr_t lstart,
-	u64 rstart, u64 size, u32 flags)
+static int fsl_map_inb_mem(struct rio_mport *mport, dma_addr_t lstart,
+			   u64 rstart, u64 size, u32 flags)
 {
 	struct rio_priv *priv = mport->priv;
 	u32 base_size;
@@ -354,7 +355,7 @@ int fsl_map_inb_mem(struct rio_mport *mport, dma_addr_t lstart,
 	return 0;
 }
 
-void fsl_unmap_inb_mem(struct rio_mport *mport, dma_addr_t lstart)
+static void fsl_unmap_inb_mem(struct rio_mport *mport, dma_addr_t lstart)
 {
 	u32 win_start_shift, base_start_shift;
 	struct rio_priv *priv = mport->priv;
@@ -442,7 +443,7 @@ static inline void fsl_rio_info(struct device *dev, u32 ccsr)
  * master port with system-specific info, and registers the
  * master port with the RapidIO subsystem.
  */
-int fsl_rio_setup(struct platform_device *dev)
+static int fsl_rio_setup(struct platform_device *dev)
 {
 	struct rio_ops *ops;
 	struct rio_mport *port;
diff --git a/arch/powerpc/sysdev/fsl_rmu.c b/arch/powerpc/sysdev/fsl_rmu.c
index c1f72497..f956591 100644
--- a/arch/powerpc/sysdev/fsl_rmu.c
+++ b/arch/powerpc/sysdev/fsl_rmu.c
@@ -25,7 +25,6 @@
 #include <linux/interrupt.h>
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
-#include <linux/of_platform.h>
 #include <linux/slab.h>
 
 #include "fsl_rio.h"
@@ -360,7 +359,7 @@ fsl_rio_dbell_handler(int irq, void *dev_instance)
 	return IRQ_HANDLED;
 }
 
-void msg_unit_error_handler(void)
+static void msg_unit_error_handler(void)
 {
 
 	/*XXX: Error recovery is not implemented, we just clear errors */
diff --git a/arch/powerpc/sysdev/fsl_soc.c b/arch/powerpc/sysdev/fsl_soc.c
index c117715..528506f 100644
--- a/arch/powerpc/sysdev/fsl_soc.c
+++ b/arch/powerpc/sysdev/fsl_soc.c
@@ -19,7 +19,6 @@
 #include <linux/device.h>
 #include <linux/platform_device.h>
 #include <linux/of.h>
-#include <linux/of_platform.h>
 #include <linux/phy.h>
 #include <linux/spi/spi.h>
 #include <linux/fsl_devices.h>
diff --git a/arch/powerpc/sysdev/mpc5xxx_clocks.c b/arch/powerpc/sysdev/mpc5xxx_clocks.c
index c5bf7e1..58cee28 100644
--- a/arch/powerpc/sysdev/mpc5xxx_clocks.c
+++ b/arch/powerpc/sysdev/mpc5xxx_clocks.c
@@ -25,8 +25,10 @@ unsigned long mpc5xxx_fwnode_get_bus_frequency(struct fwnode_handle *fwnode)
 
 	fwnode_for_each_parent_node(fwnode, parent) {
 		ret = fwnode_property_read_u32(parent, "bus-frequency", &bus_freq);
-		if (!ret)
+		if (!ret) {
+			fwnode_handle_put(parent);
 			return bus_freq;
+		}
 	}
 
 	return 0;
diff --git a/arch/powerpc/sysdev/mpic_msgr.c b/arch/powerpc/sysdev/mpic_msgr.c
index 1a3ac0b..7b449cc 100644
--- a/arch/powerpc/sysdev/mpic_msgr.c
+++ b/arch/powerpc/sysdev/mpic_msgr.c
@@ -7,9 +7,10 @@
  */
 
 #include <linux/list.h>
+#include <linux/of.h>
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
-#include <linux/of_platform.h>
+#include <linux/platform_device.h>
 #include <linux/errno.h>
 #include <linux/err.h>
 #include <linux/export.h>
diff --git a/arch/powerpc/sysdev/mpic_timer.c b/arch/powerpc/sysdev/mpic_timer.c
index b2f0a73..7166e2e 100644
--- a/arch/powerpc/sysdev/mpic_timer.c
+++ b/arch/powerpc/sysdev/mpic_timer.c
@@ -16,7 +16,6 @@
 #include <linux/slab.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
-#include <linux/of_device.h>
 #include <linux/of_irq.h>
 #include <linux/syscore_ops.h>
 #include <sysdev/fsl_soc.h>
diff --git a/arch/powerpc/sysdev/of_rtc.c b/arch/powerpc/sysdev/of_rtc.c
index 420f949..2211937 100644
--- a/arch/powerpc/sysdev/of_rtc.c
+++ b/arch/powerpc/sysdev/of_rtc.c
@@ -5,10 +5,10 @@
  * Copyright 2007 David Gibson <dwg@au1.ibm.com>, IBM Corporation.
  */
 #include <linux/kernel.h>
-#include <linux/of.h>
 #include <linux/init.h>
+#include <linux/of.h>
 #include <linux/of_address.h>
-#include <linux/of_platform.h>
+#include <linux/platform_device.h>
 #include <linux/slab.h>
 
 #include <asm/prom.h>
diff --git a/arch/powerpc/sysdev/pmi.c b/arch/powerpc/sysdev/pmi.c
index 9dabb50..fcf8d15 100644
--- a/arch/powerpc/sysdev/pmi.c
+++ b/arch/powerpc/sysdev/pmi.c
@@ -16,11 +16,11 @@
 #include <linux/completion.h>
 #include <linux/spinlock.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/workqueue.h>
 #include <linux/of_address.h>
-#include <linux/of_device.h>
 #include <linux/of_irq.h>
-#include <linux/of_platform.h>
+#include <linux/platform_device.h>
 
 #include <asm/io.h>
 #include <asm/pmi.h>
diff --git a/arch/powerpc/sysdev/xics/ics-opal.c b/arch/powerpc/sysdev/xics/ics-opal.c
index 6cfbb4f..5fe73da 100644
--- a/arch/powerpc/sysdev/xics/ics-opal.c
+++ b/arch/powerpc/sysdev/xics/ics-opal.c
@@ -111,7 +111,6 @@ static int ics_opal_set_affinity(struct irq_data *d,
 		       __func__, d->irq, hw_irq, rc);
 		return -1;
 	}
-	server = be16_to_cpu(oserver);
 
 	wanted_server = xics_get_irq_server(d->irq, cpumask, 1);
 	if (wanted_server < 0) {
diff --git a/arch/powerpc/tools/gcc-check-fpatchable-function-entry.sh b/arch/powerpc/tools/gcc-check-fpatchable-function-entry.sh
new file mode 100755
index 0000000..0670690
--- /dev/null
+++ b/arch/powerpc/tools/gcc-check-fpatchable-function-entry.sh
@@ -0,0 +1,26 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+set -e
+set -o pipefail
+
+# To debug, uncomment the following line
+# set -x
+
+# Output from -fpatchable-function-entry can only vary on ppc64 elfv2, so this
+# should not be invoked for other targets. Therefore we can pass in -m64 and
+# -mabi explicitly, to take care of toolchains defaulting to other targets.
+
+# Test whether the compile option -fpatchable-function-entry exists and
+# generates appropriate code
+echo "int func() { return 0; }" | \
+    $* -m64 -mabi=elfv2 -S -x c -O2 -fpatchable-function-entry=2 - -o - 2> /dev/null | \
+    grep -q "__patchable_function_entries"
+
+# Test whether nops are generated after the local entry point
+echo "int x; int func() { return x; }" | \
+    $* -m64 -mabi=elfv2 -S -x c -O2 -fpatchable-function-entry=2 - -o - 2> /dev/null | \
+    awk 'BEGIN { RS = ";" } /\.localentry.*nop.*\n[[:space:]]*nop/ { print $0 }' | \
+    grep -q "func:"
+
+exit 0
diff --git a/arch/powerpc/xmon/Makefile b/arch/powerpc/xmon/Makefile
index d334de3..682c7c0 100644
--- a/arch/powerpc/xmon/Makefile
+++ b/arch/powerpc/xmon/Makefile
@@ -10,14 +10,12 @@
 # Disable ftrace for the entire directory
 ccflags-remove-$(CONFIG_FUNCTION_TRACER) += $(CC_FLAGS_FTRACE)
 
-ifdef CONFIG_CC_IS_CLANG
-# clang stores addresses on the stack causing the frame size to blow
-# out. See https://github.com/ClangBuiltLinux/linux/issues/252
-KBUILD_CFLAGS += -Wframe-larger-than=4096
-endif
-
 ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC)
 
+# Clang stores addresses on the stack causing the frame size to blow
+# out. See https://github.com/ClangBuiltLinux/linux/issues/252
+ccflags-$(CONFIG_CC_IS_CLANG) += -Wframe-larger-than=4096
+
 obj-y			+= xmon.o nonstdio.o spr_access.o xmon_bpts.o
 
 ifdef CONFIG_XMON_DISASSEMBLY
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index ee17270..5888fcd 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -58,6 +58,7 @@
 #ifdef CONFIG_PPC64
 #include <asm/hvcall.h>
 #include <asm/paca.h>
+#include <asm/lppaca.h>
 #endif
 
 #include "nonstdio.h"
@@ -3303,7 +3304,7 @@ static void show_pte(unsigned long addr)
 {
 	unsigned long tskv = 0;
 	struct task_struct *volatile tsk = NULL;
-	struct mm_struct *mm;
+	struct mm_struct *volatile mm;
 	pgd_t *pgdp;
 	p4d_t *p4dp;
 	pud_t *pudp;
@@ -3828,9 +3829,9 @@ static void dump_tlb_44x(void)
 #ifdef CONFIG_PPC_BOOK3E_64
 static void dump_tlb_book3e(void)
 {
-	u32 mmucfg, pidmask, lpidmask;
+	u32 mmucfg;
 	u64 ramask;
-	int i, tlb, ntlbs, pidsz, lpidsz, rasz, lrat = 0;
+	int i, tlb, ntlbs, pidsz, lpidsz, rasz;
 	int mmu_version;
 	static const char *pgsz_names[] = {
 		"  1K",
@@ -3874,12 +3875,8 @@ static void dump_tlb_book3e(void)
 	pidsz = ((mmucfg >> 6) & 0x1f) + 1;
 	lpidsz = (mmucfg >> 24) & 0xf;
 	rasz = (mmucfg >> 16) & 0x7f;
-	if ((mmu_version > 1) && (mmucfg & 0x10000))
-		lrat = 1;
 	printf("Book3E MMU MAV=%d.0,%d TLBs,%d-bit PID,%d-bit LPID,%d-bit RA\n",
 	       mmu_version, ntlbs, pidsz, lpidsz, rasz);
-	pidmask = (1ul << pidsz) - 1;
-	lpidmask = (1ul << lpidsz) - 1;
 	ramask = (1ull << rasz) - 1;
 
 	for (tlb = 0; tlb < ntlbs; tlb++) {
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index a6f47c0..2f1c0cd 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -380,7 +380,7 @@ static inline pte_t pte_wrprotect(pte_t pte)
 
 /* static inline pte_t pte_mkread(pte_t pte) */
 
-static inline pte_t pte_mkwrite(pte_t pte)
+static inline pte_t pte_mkwrite_novma(pte_t pte)
 {
 	return __pte(pte_val(pte) | _PAGE_WRITE);
 }
@@ -677,9 +677,9 @@ static inline pmd_t pmd_mkyoung(pmd_t pmd)
 	return pte_pmd(pte_mkyoung(pmd_pte(pmd)));
 }
 
-static inline pmd_t pmd_mkwrite(pmd_t pmd)
+static inline pmd_t pmd_mkwrite_novma(pmd_t pmd)
 {
-	return pte_pmd(pte_mkwrite(pmd_pte(pmd)));
+	return pte_pmd(pte_mkwrite_novma(pmd_pte(pmd)));
 }
 
 static inline pmd_t pmd_wrprotect(pmd_t pmd)
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 661b6de..ae29e43 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -127,6 +127,7 @@
 	select ARCH_WANTS_NO_INSTR
 	select ARCH_WANT_DEFAULT_BPF_JIT
 	select ARCH_WANT_IPC_PARSE_VERSION
+	select ARCH_WANT_KERNEL_PMD_MKWRITE
 	select ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP
 	select BUILDTIME_TABLE_SORT
 	select CLONE_BACKWARDS2
diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h
index ccdbccf..f072678 100644
--- a/arch/s390/include/asm/hugetlb.h
+++ b/arch/s390/include/asm/hugetlb.h
@@ -104,7 +104,7 @@ static inline int huge_pte_dirty(pte_t pte)
 
 static inline pte_t huge_pte_mkwrite(pte_t pte)
 {
-	return pte_mkwrite(pte);
+	return pte_mkwrite_novma(pte);
 }
 
 static inline pte_t huge_pte_mkdirty(pte_t pte)
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index d28d2e5..fb3ee77 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -1001,7 +1001,7 @@ static inline pte_t pte_wrprotect(pte_t pte)
 	return set_pte_bit(pte, __pgprot(_PAGE_PROTECT));
 }
 
-static inline pte_t pte_mkwrite(pte_t pte)
+static inline pte_t pte_mkwrite_novma(pte_t pte)
 {
 	pte = set_pte_bit(pte, __pgprot(_PAGE_WRITE));
 	if (pte_val(pte) & _PAGE_DIRTY)
@@ -1498,7 +1498,7 @@ static inline pmd_t pmd_wrprotect(pmd_t pmd)
 	return set_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_PROTECT));
 }
 
-static inline pmd_t pmd_mkwrite(pmd_t pmd)
+static inline pmd_t pmd_mkwrite_novma(pmd_t pmd)
 {
 	pmd = set_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_WRITE));
 	if (pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY)
diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c
index ca5a418..e5ec762 100644
--- a/arch/s390/mm/pageattr.c
+++ b/arch/s390/mm/pageattr.c
@@ -98,7 +98,7 @@ static int walk_pte_level(pmd_t *pmdp, unsigned long addr, unsigned long end,
 		if (flags & SET_MEMORY_RO)
 			new = pte_wrprotect(new);
 		else if (flags & SET_MEMORY_RW)
-			new = pte_mkwrite(pte_mkdirty(new));
+			new = pte_mkwrite_novma(pte_mkdirty(new));
 		if (flags & SET_MEMORY_NX)
 			new = set_pte_bit(new, __pgprot(_PAGE_NOEXEC));
 		else if (flags & SET_MEMORY_X)
@@ -156,7 +156,7 @@ static void modify_pmd_page(pmd_t *pmdp, unsigned long addr,
 	if (flags & SET_MEMORY_RO)
 		new = pmd_wrprotect(new);
 	else if (flags & SET_MEMORY_RW)
-		new = pmd_mkwrite(pmd_mkdirty(new));
+		new = pmd_mkwrite_novma(pmd_mkdirty(new));
 	if (flags & SET_MEMORY_NX)
 		new = set_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_NOEXEC));
 	else if (flags & SET_MEMORY_X)
diff --git a/arch/sh/include/asm/pgtable_32.h b/arch/sh/include/asm/pgtable_32.h
index 676f3d4..f939f12 100644
--- a/arch/sh/include/asm/pgtable_32.h
+++ b/arch/sh/include/asm/pgtable_32.h
@@ -358,11 +358,11 @@ static inline pte_t pte_##fn(pte_t pte) { pte.pte_##h op; return pte; }
  * kernel permissions), we attempt to couple them a bit more sanely here.
  */
 PTE_BIT_FUNC(high, wrprotect, &= ~(_PAGE_EXT_USER_WRITE | _PAGE_EXT_KERN_WRITE));
-PTE_BIT_FUNC(high, mkwrite, |= _PAGE_EXT_USER_WRITE | _PAGE_EXT_KERN_WRITE);
+PTE_BIT_FUNC(high, mkwrite_novma, |= _PAGE_EXT_USER_WRITE | _PAGE_EXT_KERN_WRITE);
 PTE_BIT_FUNC(high, mkhuge, |= _PAGE_SZHUGE);
 #else
 PTE_BIT_FUNC(low, wrprotect, &= ~_PAGE_RW);
-PTE_BIT_FUNC(low, mkwrite, |= _PAGE_RW);
+PTE_BIT_FUNC(low, mkwrite_novma, |= _PAGE_RW);
 PTE_BIT_FUNC(low, mkhuge, |= _PAGE_SZHUGE);
 #endif
 
diff --git a/arch/sparc/include/asm/pgtable_32.h b/arch/sparc/include/asm/pgtable_32.h
index 315d316..9e85d57 100644
--- a/arch/sparc/include/asm/pgtable_32.h
+++ b/arch/sparc/include/asm/pgtable_32.h
@@ -239,7 +239,7 @@ static inline pte_t pte_mkold(pte_t pte)
 	return __pte(pte_val(pte) & ~SRMMU_REF);
 }
 
-static inline pte_t pte_mkwrite(pte_t pte)
+static inline pte_t pte_mkwrite_novma(pte_t pte)
 {
 	return __pte(pte_val(pte) | SRMMU_WRITE);
 }
diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h
index 09aa37c..5e41033 100644
--- a/arch/sparc/include/asm/pgtable_64.h
+++ b/arch/sparc/include/asm/pgtable_64.h
@@ -518,7 +518,7 @@ static inline pte_t pte_mkclean(pte_t pte)
 	return __pte(val);
 }
 
-static inline pte_t pte_mkwrite(pte_t pte)
+static inline pte_t pte_mkwrite_novma(pte_t pte)
 {
 	unsigned long val = pte_val(pte), mask;
 
@@ -773,11 +773,11 @@ static inline pmd_t pmd_mkyoung(pmd_t pmd)
 	return __pmd(pte_val(pte));
 }
 
-static inline pmd_t pmd_mkwrite(pmd_t pmd)
+static inline pmd_t pmd_mkwrite_novma(pmd_t pmd)
 {
 	pte_t pte = __pte(pmd_val(pmd));
 
-	pte = pte_mkwrite(pte);
+	pte = pte_mkwrite_novma(pte);
 
 	return __pmd(pte_val(pte));
 }
diff --git a/arch/sparc/kernel/signal32.c b/arch/sparc/kernel/signal32.c
index ca450c7..a23cdd7 100644
--- a/arch/sparc/kernel/signal32.c
+++ b/arch/sparc/kernel/signal32.c
@@ -753,7 +753,7 @@ asmlinkage int do_sys32_sigstack(u32 u_ssptr, u32 u_ossptr, unsigned long sp)
  */
 static_assert(NSIGILL	== 11);
 static_assert(NSIGFPE	== 15);
-static_assert(NSIGSEGV	== 9);
+static_assert(NSIGSEGV	== 10);
 static_assert(NSIGBUS	== 5);
 static_assert(NSIGTRAP	== 6);
 static_assert(NSIGCHLD	== 6);
diff --git a/arch/sparc/kernel/signal_64.c b/arch/sparc/kernel/signal_64.c
index 570e43e..b4e4109 100644
--- a/arch/sparc/kernel/signal_64.c
+++ b/arch/sparc/kernel/signal_64.c
@@ -562,7 +562,7 @@ void do_notify_resume(struct pt_regs *regs, unsigned long orig_i0, unsigned long
  */
 static_assert(NSIGILL	== 11);
 static_assert(NSIGFPE	== 15);
-static_assert(NSIGSEGV	== 9);
+static_assert(NSIGSEGV	== 10);
 static_assert(NSIGBUS	== 5);
 static_assert(NSIGTRAP	== 6);
 static_assert(NSIGCHLD	== 6);
diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h
index 44f6c76..e1ece21 100644
--- a/arch/um/include/asm/pgtable.h
+++ b/arch/um/include/asm/pgtable.h
@@ -207,7 +207,7 @@ static inline pte_t pte_mkyoung(pte_t pte)
 	return(pte);
 }
 
-static inline pte_t pte_mkwrite(pte_t pte)
+static inline pte_t pte_mkwrite_novma(pte_t pte)
 {
 	if (unlikely(pte_get_bits(pte,  _PAGE_RW)))
 		return pte;
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index bd9a180..982b777 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1815,6 +1815,11 @@
 		  (CC_IS_CLANG && CLANG_VERSION >= 140000)) && \
 		  $(as-instr,endbr64)
 
+config X86_CET
+	def_bool n
+	help
+	  CET features configured (Shadow stack or IBT)
+
 config X86_KERNEL_IBT
 	prompt "Indirect Branch Tracking"
 	def_bool y
@@ -1822,6 +1827,7 @@
 	# https://github.com/llvm/llvm-project/commit/9d7001eba9c4cb311e03cd8cdc231f9e579f2d0f
 	depends on !LD_IS_LLD || LLD_VERSION >= 140000
 	select OBJTOOL
+	select X86_CET
 	help
 	  Build the kernel with support for Indirect Branch Tracking, a
 	  hardware support course-grain forward-edge Control Flow Integrity
@@ -1915,6 +1921,24 @@
 
 	  If unsure, say N.
 
+config X86_USER_SHADOW_STACK
+	bool "X86 userspace shadow stack"
+	depends on AS_WRUSS
+	depends on X86_64
+	select ARCH_USES_HIGH_VMA_FLAGS
+	select X86_CET
+	help
+	  Shadow stack protection is a hardware feature that detects function
+	  return address corruption.  This helps mitigate ROP attacks.
+	  Applications must be enabled to use it, and old userspace does not
+	  get protection "for free".
+
+	  CPUs supporting shadow stacks were first released in 2020.
+
+	  See Documentation/arch/x86/shstk.rst for more information.
+
+	  If unsure, say N.
+
 config EFI
 	bool "EFI runtime service support"
 	depends on ACPI
diff --git a/arch/x86/Kconfig.assembler b/arch/x86/Kconfig.assembler
index b88f784..8ad41da 100644
--- a/arch/x86/Kconfig.assembler
+++ b/arch/x86/Kconfig.assembler
@@ -24,3 +24,8 @@
 	def_bool $(as-instr,vgf2p8mulb %xmm0$(comma)%xmm1$(comma)%xmm2)
 	help
 	  Supported by binutils >= 2.30 and LLVM integrated assembler
+
+config AS_WRUSS
+	def_bool $(as-instr,wrussq %rax$(comma)(%rbx))
+	help
+	  Supported by binutils >= 2.31 and LLVM integrated assembler
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index 8147682..1d6eee3 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -374,6 +374,7 @@
 450	common	set_mempolicy_home_node	sys_set_mempolicy_home_node
 451	common	cachestat		sys_cachestat
 452	common	fchmodat2		sys_fchmodat2
+453	64	map_shadow_stack	sys_map_shadow_stack
 
 #
 # Due to a historical design error, certain syscalls are numbered differently
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 7b4ecbf..2061ed1 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -307,6 +307,7 @@
 #define X86_FEATURE_MSR_TSX_CTRL	(11*32+20) /* "" MSR IA32_TSX_CTRL (Intel) implemented */
 #define X86_FEATURE_SMBA		(11*32+21) /* "" Slow Memory Bandwidth Allocation */
 #define X86_FEATURE_BMEC		(11*32+22) /* "" Bandwidth Monitoring Event Configuration */
+#define X86_FEATURE_USER_SHSTK		(11*32+23) /* Shadow stack support for user mode applications */
 
 #define X86_FEATURE_SRSO		(11*32+24) /* "" AMD BTB untrain RETs */
 #define X86_FEATURE_SRSO_ALIAS		(11*32+25) /* "" AMD BTB untrain RETs through aliasing */
@@ -383,6 +384,7 @@
 #define X86_FEATURE_OSPKE		(16*32+ 4) /* OS Protection Keys Enable */
 #define X86_FEATURE_WAITPKG		(16*32+ 5) /* UMONITOR/UMWAIT/TPAUSE Instructions */
 #define X86_FEATURE_AVX512_VBMI2	(16*32+ 6) /* Additional AVX512 Vector Bit Manipulation Instructions */
+#define X86_FEATURE_SHSTK		(16*32+ 7) /* "" Shadow stack */
 #define X86_FEATURE_GFNI		(16*32+ 8) /* Galois Field New Instructions */
 #define X86_FEATURE_VAES		(16*32+ 9) /* Vector AES */
 #define X86_FEATURE_VPCLMULQDQ		(16*32+10) /* Carry-Less Multiplication Double Quadword */
diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h
index fafe9be..702d93f 100644
--- a/arch/x86/include/asm/disabled-features.h
+++ b/arch/x86/include/asm/disabled-features.h
@@ -105,6 +105,18 @@
 # define DISABLE_TDX_GUEST	(1 << (X86_FEATURE_TDX_GUEST & 31))
 #endif
 
+#ifdef CONFIG_X86_USER_SHADOW_STACK
+#define DISABLE_USER_SHSTK	0
+#else
+#define DISABLE_USER_SHSTK	(1 << (X86_FEATURE_USER_SHSTK & 31))
+#endif
+
+#ifdef CONFIG_X86_KERNEL_IBT
+#define DISABLE_IBT	0
+#else
+#define DISABLE_IBT	(1 << (X86_FEATURE_IBT & 31))
+#endif
+
 /*
  * Make sure to add features to the correct mask
  */
@@ -120,7 +132,7 @@
 #define DISABLED_MASK9	(DISABLE_SGX)
 #define DISABLED_MASK10	0
 #define DISABLED_MASK11	(DISABLE_RETPOLINE|DISABLE_RETHUNK|DISABLE_UNRET| \
-			 DISABLE_CALL_DEPTH_TRACKING)
+			 DISABLE_CALL_DEPTH_TRACKING|DISABLE_USER_SHSTK)
 #define DISABLED_MASK12	(DISABLE_LAM)
 #define DISABLED_MASK13	0
 #define DISABLED_MASK14	0
@@ -128,7 +140,7 @@
 #define DISABLED_MASK16	(DISABLE_PKU|DISABLE_OSPKE|DISABLE_LA57|DISABLE_UMIP| \
 			 DISABLE_ENQCMD)
 #define DISABLED_MASK17	0
-#define DISABLED_MASK18	0
+#define DISABLED_MASK18	(DISABLE_IBT)
 #define DISABLED_MASK19	0
 #define DISABLED_MASK20	0
 #define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 21)
diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h
index b475d9a..31089b85 100644
--- a/arch/x86/include/asm/fpu/api.h
+++ b/arch/x86/include/asm/fpu/api.h
@@ -82,6 +82,15 @@ static inline void fpregs_unlock(void)
 		preempt_enable();
 }
 
+/*
+ * FPU state gets lazily restored before returning to userspace. So when in the
+ * kernel, the valid FPU state may be kept in the buffer. This function will force
+ * restore all the fpu state to the registers early if needed, and lock them from
+ * being automatically saved/restored. Then FPU state can be modified safely in the
+ * registers, before unlocking with fpregs_unlock().
+ */
+void fpregs_lock_and_load(void);
+
 #ifdef CONFIG_X86_DEBUG_FPU
 extern void fpregs_assert_state_consistent(void);
 #else
diff --git a/arch/x86/include/asm/fpu/regset.h b/arch/x86/include/asm/fpu/regset.h
index 4f928d6..697b77e 100644
--- a/arch/x86/include/asm/fpu/regset.h
+++ b/arch/x86/include/asm/fpu/regset.h
@@ -7,11 +7,12 @@
 
 #include <linux/regset.h>
 
-extern user_regset_active_fn regset_fpregs_active, regset_xregset_fpregs_active;
+extern user_regset_active_fn regset_fpregs_active, regset_xregset_fpregs_active,
+				ssp_active;
 extern user_regset_get2_fn fpregs_get, xfpregs_get, fpregs_soft_get,
-				 xstateregs_get;
+				 xstateregs_get, ssp_get;
 extern user_regset_set_fn fpregs_set, xfpregs_set, fpregs_soft_set,
-				 xstateregs_set;
+				 xstateregs_set, ssp_set;
 
 /*
  * xstateregs_active == regset_fpregs_active. Please refer to the comment
diff --git a/arch/x86/include/asm/fpu/sched.h b/arch/x86/include/asm/fpu/sched.h
index 78fcde7..ca6e5e5 100644
--- a/arch/x86/include/asm/fpu/sched.h
+++ b/arch/x86/include/asm/fpu/sched.h
@@ -11,7 +11,8 @@
 
 extern void save_fpregs_to_fpstate(struct fpu *fpu);
 extern void fpu__drop(struct fpu *fpu);
-extern int  fpu_clone(struct task_struct *dst, unsigned long clone_flags, bool minimal);
+extern int  fpu_clone(struct task_struct *dst, unsigned long clone_flags, bool minimal,
+		      unsigned long shstk_addr);
 extern void fpu_flush_thread(void);
 
 /*
diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h
index 7f6d858..eb81007 100644
--- a/arch/x86/include/asm/fpu/types.h
+++ b/arch/x86/include/asm/fpu/types.h
@@ -115,8 +115,8 @@ enum xfeature {
 	XFEATURE_PT_UNIMPLEMENTED_SO_FAR,
 	XFEATURE_PKRU,
 	XFEATURE_PASID,
-	XFEATURE_RSRVD_COMP_11,
-	XFEATURE_RSRVD_COMP_12,
+	XFEATURE_CET_USER,
+	XFEATURE_CET_KERNEL_UNUSED,
 	XFEATURE_RSRVD_COMP_13,
 	XFEATURE_RSRVD_COMP_14,
 	XFEATURE_LBR,
@@ -138,6 +138,8 @@ enum xfeature {
 #define XFEATURE_MASK_PT		(1 << XFEATURE_PT_UNIMPLEMENTED_SO_FAR)
 #define XFEATURE_MASK_PKRU		(1 << XFEATURE_PKRU)
 #define XFEATURE_MASK_PASID		(1 << XFEATURE_PASID)
+#define XFEATURE_MASK_CET_USER		(1 << XFEATURE_CET_USER)
+#define XFEATURE_MASK_CET_KERNEL	(1 << XFEATURE_CET_KERNEL_UNUSED)
 #define XFEATURE_MASK_LBR		(1 << XFEATURE_LBR)
 #define XFEATURE_MASK_XTILE_CFG		(1 << XFEATURE_XTILE_CFG)
 #define XFEATURE_MASK_XTILE_DATA	(1 << XFEATURE_XTILE_DATA)
@@ -253,6 +255,16 @@ struct pkru_state {
 } __packed;
 
 /*
+ * State component 11 is Control-flow Enforcement user states
+ */
+struct cet_user_state {
+	/* user control-flow settings */
+	u64 user_cet;
+	/* user shadow stack pointer */
+	u64 user_ssp;
+};
+
+/*
  * State component 15: Architectural LBR configuration state.
  * The size of Arch LBR state depends on the number of LBRs (lbr_depth).
  */
diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
index cd3dd17..d4427b8 100644
--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -50,7 +50,8 @@
 #define XFEATURE_MASK_USER_DYNAMIC	XFEATURE_MASK_XTILE_DATA
 
 /* All currently supported supervisor features */
-#define XFEATURE_MASK_SUPERVISOR_SUPPORTED (XFEATURE_MASK_PASID)
+#define XFEATURE_MASK_SUPERVISOR_SUPPORTED (XFEATURE_MASK_PASID | \
+					    XFEATURE_MASK_CET_USER)
 
 /*
  * A supervisor state component may not always contain valuable information,
@@ -77,7 +78,8 @@
  * Unsupported supervisor features. When a supervisor feature in this mask is
  * supported in the future, move it to the supported supervisor feature mask.
  */
-#define XFEATURE_MASK_SUPERVISOR_UNSUPPORTED (XFEATURE_MASK_PT)
+#define XFEATURE_MASK_SUPERVISOR_UNSUPPORTED (XFEATURE_MASK_PT | \
+					      XFEATURE_MASK_CET_KERNEL)
 
 /* All supervisor states including supported and unsupported states. */
 #define XFEATURE_MASK_SUPERVISOR_ALL (XFEATURE_MASK_SUPERVISOR_SUPPORTED | \
diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h
index cd5c10a..05fd175 100644
--- a/arch/x86/include/asm/idtentry.h
+++ b/arch/x86/include/asm/idtentry.h
@@ -614,7 +614,7 @@ DECLARE_IDTENTRY_RAW_ERRORCODE(X86_TRAP_DF,	xenpv_exc_double_fault);
 #endif
 
 /* #CP */
-#ifdef CONFIG_X86_KERNEL_IBT
+#ifdef CONFIG_X86_CET
 DECLARE_IDTENTRY_ERRORCODE(X86_TRAP_CP,	exc_control_protection);
 #endif
 
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 1d29dc7..416901d 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -186,6 +186,8 @@ do {						\
 #else
 #define deactivate_mm(tsk, mm)			\
 do {						\
+	if (!tsk->vfork_done)			\
+		shstk_free(tsk);		\
 	load_gs_index(0);			\
 	loadsegment(fs, 0);			\
 } while (0)
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index dbf8af7..d6ad98c 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -125,9 +125,15 @@ extern pmdval_t early_pmd_flags;
  * The following only work if pte_present() is true.
  * Undefined behaviour if not..
  */
-static inline int pte_dirty(pte_t pte)
+static inline bool pte_dirty(pte_t pte)
 {
-	return pte_flags(pte) & _PAGE_DIRTY;
+	return pte_flags(pte) & _PAGE_DIRTY_BITS;
+}
+
+static inline bool pte_shstk(pte_t pte)
+{
+	return cpu_feature_enabled(X86_FEATURE_SHSTK) &&
+	       (pte_flags(pte) & (_PAGE_RW | _PAGE_DIRTY)) == _PAGE_DIRTY;
 }
 
 static inline int pte_young(pte_t pte)
@@ -135,9 +141,16 @@ static inline int pte_young(pte_t pte)
 	return pte_flags(pte) & _PAGE_ACCESSED;
 }
 
-static inline int pmd_dirty(pmd_t pmd)
+static inline bool pmd_dirty(pmd_t pmd)
 {
-	return pmd_flags(pmd) & _PAGE_DIRTY;
+	return pmd_flags(pmd) & _PAGE_DIRTY_BITS;
+}
+
+static inline bool pmd_shstk(pmd_t pmd)
+{
+	return cpu_feature_enabled(X86_FEATURE_SHSTK) &&
+	       (pmd_flags(pmd) & (_PAGE_RW | _PAGE_DIRTY | _PAGE_PSE)) ==
+	       (_PAGE_DIRTY | _PAGE_PSE);
 }
 
 #define pmd_young pmd_young
@@ -146,9 +159,9 @@ static inline int pmd_young(pmd_t pmd)
 	return pmd_flags(pmd) & _PAGE_ACCESSED;
 }
 
-static inline int pud_dirty(pud_t pud)
+static inline bool pud_dirty(pud_t pud)
 {
-	return pud_flags(pud) & _PAGE_DIRTY;
+	return pud_flags(pud) & _PAGE_DIRTY_BITS;
 }
 
 static inline int pud_young(pud_t pud)
@@ -158,7 +171,27 @@ static inline int pud_young(pud_t pud)
 
 static inline int pte_write(pte_t pte)
 {
-	return pte_flags(pte) & _PAGE_RW;
+	/*
+	 * Shadow stack pages are logically writable, but do not have
+	 * _PAGE_RW.  Check for them separately from _PAGE_RW itself.
+	 */
+	return (pte_flags(pte) & _PAGE_RW) || pte_shstk(pte);
+}
+
+#define pmd_write pmd_write
+static inline int pmd_write(pmd_t pmd)
+{
+	/*
+	 * Shadow stack pages are logically writable, but do not have
+	 * _PAGE_RW.  Check for them separately from _PAGE_RW itself.
+	 */
+	return (pmd_flags(pmd) & _PAGE_RW) || pmd_shstk(pmd);
+}
+
+#define pud_write pud_write
+static inline int pud_write(pud_t pud)
+{
+	return pud_flags(pud) & _PAGE_RW;
 }
 
 static inline int pte_huge(pte_t pte)
@@ -292,9 +325,63 @@ static inline pte_t pte_clear_flags(pte_t pte, pteval_t clear)
 	return native_make_pte(v & ~clear);
 }
 
+/*
+ * Write protection operations can result in Dirty=1,Write=0 PTEs. But in the
+ * case of X86_FEATURE_USER_SHSTK, these PTEs denote shadow stack memory. So
+ * when creating dirty, write-protected memory, a software bit is used:
+ * _PAGE_BIT_SAVED_DIRTY. The following functions take a PTE and transition the
+ * Dirty bit to SavedDirty, and vice-vesra.
+ *
+ * This shifting is only done if needed. In the case of shifting
+ * Dirty->SavedDirty, the condition is if the PTE is Write=0. In the case of
+ * shifting SavedDirty->Dirty, the condition is Write=1.
+ */
+static inline pgprotval_t mksaveddirty_shift(pgprotval_t v)
+{
+	pgprotval_t cond = (~v >> _PAGE_BIT_RW) & 1;
+
+	v |= ((v >> _PAGE_BIT_DIRTY) & cond) << _PAGE_BIT_SAVED_DIRTY;
+	v &= ~(cond << _PAGE_BIT_DIRTY);
+
+	return v;
+}
+
+static inline pgprotval_t clear_saveddirty_shift(pgprotval_t v)
+{
+	pgprotval_t cond = (v >> _PAGE_BIT_RW) & 1;
+
+	v |= ((v >> _PAGE_BIT_SAVED_DIRTY) & cond) << _PAGE_BIT_DIRTY;
+	v &= ~(cond << _PAGE_BIT_SAVED_DIRTY);
+
+	return v;
+}
+
+static inline pte_t pte_mksaveddirty(pte_t pte)
+{
+	pteval_t v = native_pte_val(pte);
+
+	v = mksaveddirty_shift(v);
+	return native_make_pte(v);
+}
+
+static inline pte_t pte_clear_saveddirty(pte_t pte)
+{
+	pteval_t v = native_pte_val(pte);
+
+	v = clear_saveddirty_shift(v);
+	return native_make_pte(v);
+}
+
 static inline pte_t pte_wrprotect(pte_t pte)
 {
-	return pte_clear_flags(pte, _PAGE_RW);
+	pte = pte_clear_flags(pte, _PAGE_RW);
+
+	/*
+	 * Blindly clearing _PAGE_RW might accidentally create
+	 * a shadow stack PTE (Write=0,Dirty=1). Move the hardware
+	 * dirty value to the software bit, if present.
+	 */
+	return pte_mksaveddirty(pte);
 }
 
 #ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP
@@ -332,7 +419,7 @@ static inline pte_t pte_clear_uffd_wp(pte_t pte)
 
 static inline pte_t pte_mkclean(pte_t pte)
 {
-	return pte_clear_flags(pte, _PAGE_DIRTY);
+	return pte_clear_flags(pte, _PAGE_DIRTY_BITS);
 }
 
 static inline pte_t pte_mkold(pte_t pte)
@@ -347,7 +434,16 @@ static inline pte_t pte_mkexec(pte_t pte)
 
 static inline pte_t pte_mkdirty(pte_t pte)
 {
-	return pte_set_flags(pte, _PAGE_DIRTY | _PAGE_SOFT_DIRTY);
+	pte = pte_set_flags(pte, _PAGE_DIRTY | _PAGE_SOFT_DIRTY);
+
+	return pte_mksaveddirty(pte);
+}
+
+static inline pte_t pte_mkwrite_shstk(pte_t pte)
+{
+	pte = pte_clear_flags(pte, _PAGE_RW);
+
+	return pte_set_flags(pte, _PAGE_DIRTY);
 }
 
 static inline pte_t pte_mkyoung(pte_t pte)
@@ -355,11 +451,15 @@ static inline pte_t pte_mkyoung(pte_t pte)
 	return pte_set_flags(pte, _PAGE_ACCESSED);
 }
 
-static inline pte_t pte_mkwrite(pte_t pte)
+static inline pte_t pte_mkwrite_novma(pte_t pte)
 {
 	return pte_set_flags(pte, _PAGE_RW);
 }
 
+struct vm_area_struct;
+pte_t pte_mkwrite(pte_t pte, struct vm_area_struct *vma);
+#define pte_mkwrite pte_mkwrite
+
 static inline pte_t pte_mkhuge(pte_t pte)
 {
 	return pte_set_flags(pte, _PAGE_PSE);
@@ -404,9 +504,34 @@ static inline pmd_t pmd_clear_flags(pmd_t pmd, pmdval_t clear)
 	return native_make_pmd(v & ~clear);
 }
 
+/* See comments above mksaveddirty_shift() */
+static inline pmd_t pmd_mksaveddirty(pmd_t pmd)
+{
+	pmdval_t v = native_pmd_val(pmd);
+
+	v = mksaveddirty_shift(v);
+	return native_make_pmd(v);
+}
+
+/* See comments above mksaveddirty_shift() */
+static inline pmd_t pmd_clear_saveddirty(pmd_t pmd)
+{
+	pmdval_t v = native_pmd_val(pmd);
+
+	v = clear_saveddirty_shift(v);
+	return native_make_pmd(v);
+}
+
 static inline pmd_t pmd_wrprotect(pmd_t pmd)
 {
-	return pmd_clear_flags(pmd, _PAGE_RW);
+	pmd = pmd_clear_flags(pmd, _PAGE_RW);
+
+	/*
+	 * Blindly clearing _PAGE_RW might accidentally create
+	 * a shadow stack PMD (RW=0, Dirty=1). Move the hardware
+	 * dirty value to the software bit.
+	 */
+	return pmd_mksaveddirty(pmd);
 }
 
 #ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP
@@ -433,12 +558,21 @@ static inline pmd_t pmd_mkold(pmd_t pmd)
 
 static inline pmd_t pmd_mkclean(pmd_t pmd)
 {
-	return pmd_clear_flags(pmd, _PAGE_DIRTY);
+	return pmd_clear_flags(pmd, _PAGE_DIRTY_BITS);
 }
 
 static inline pmd_t pmd_mkdirty(pmd_t pmd)
 {
-	return pmd_set_flags(pmd, _PAGE_DIRTY | _PAGE_SOFT_DIRTY);
+	pmd = pmd_set_flags(pmd, _PAGE_DIRTY | _PAGE_SOFT_DIRTY);
+
+	return pmd_mksaveddirty(pmd);
+}
+
+static inline pmd_t pmd_mkwrite_shstk(pmd_t pmd)
+{
+	pmd = pmd_clear_flags(pmd, _PAGE_RW);
+
+	return pmd_set_flags(pmd, _PAGE_DIRTY);
 }
 
 static inline pmd_t pmd_mkdevmap(pmd_t pmd)
@@ -456,11 +590,14 @@ static inline pmd_t pmd_mkyoung(pmd_t pmd)
 	return pmd_set_flags(pmd, _PAGE_ACCESSED);
 }
 
-static inline pmd_t pmd_mkwrite(pmd_t pmd)
+static inline pmd_t pmd_mkwrite_novma(pmd_t pmd)
 {
 	return pmd_set_flags(pmd, _PAGE_RW);
 }
 
+pmd_t pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma);
+#define pmd_mkwrite pmd_mkwrite
+
 static inline pud_t pud_set_flags(pud_t pud, pudval_t set)
 {
 	pudval_t v = native_pud_val(pud);
@@ -475,6 +612,24 @@ static inline pud_t pud_clear_flags(pud_t pud, pudval_t clear)
 	return native_make_pud(v & ~clear);
 }
 
+/* See comments above mksaveddirty_shift() */
+static inline pud_t pud_mksaveddirty(pud_t pud)
+{
+	pudval_t v = native_pud_val(pud);
+
+	v = mksaveddirty_shift(v);
+	return native_make_pud(v);
+}
+
+/* See comments above mksaveddirty_shift() */
+static inline pud_t pud_clear_saveddirty(pud_t pud)
+{
+	pudval_t v = native_pud_val(pud);
+
+	v = clear_saveddirty_shift(v);
+	return native_make_pud(v);
+}
+
 static inline pud_t pud_mkold(pud_t pud)
 {
 	return pud_clear_flags(pud, _PAGE_ACCESSED);
@@ -482,17 +637,26 @@ static inline pud_t pud_mkold(pud_t pud)
 
 static inline pud_t pud_mkclean(pud_t pud)
 {
-	return pud_clear_flags(pud, _PAGE_DIRTY);
+	return pud_clear_flags(pud, _PAGE_DIRTY_BITS);
 }
 
 static inline pud_t pud_wrprotect(pud_t pud)
 {
-	return pud_clear_flags(pud, _PAGE_RW);
+	pud = pud_clear_flags(pud, _PAGE_RW);
+
+	/*
+	 * Blindly clearing _PAGE_RW might accidentally create
+	 * a shadow stack PUD (RW=0, Dirty=1). Move the hardware
+	 * dirty value to the software bit.
+	 */
+	return pud_mksaveddirty(pud);
 }
 
 static inline pud_t pud_mkdirty(pud_t pud)
 {
-	return pud_set_flags(pud, _PAGE_DIRTY | _PAGE_SOFT_DIRTY);
+	pud = pud_set_flags(pud, _PAGE_DIRTY | _PAGE_SOFT_DIRTY);
+
+	return pud_mksaveddirty(pud);
 }
 
 static inline pud_t pud_mkdevmap(pud_t pud)
@@ -512,7 +676,9 @@ static inline pud_t pud_mkyoung(pud_t pud)
 
 static inline pud_t pud_mkwrite(pud_t pud)
 {
-	return pud_set_flags(pud, _PAGE_RW);
+	pud = pud_set_flags(pud, _PAGE_RW);
+
+	return pud_clear_saveddirty(pud);
 }
 
 #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
@@ -629,6 +795,7 @@ static inline u64 flip_protnone_guard(u64 oldval, u64 val, u64 mask);
 static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 {
 	pteval_t val = pte_val(pte), oldval = val;
+	pte_t pte_result;
 
 	/*
 	 * Chop off the NX bit (if present), and add the NX portion of
@@ -637,17 +804,54 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 	val &= _PAGE_CHG_MASK;
 	val |= check_pgprot(newprot) & ~_PAGE_CHG_MASK;
 	val = flip_protnone_guard(oldval, val, PTE_PFN_MASK);
-	return __pte(val);
+
+	pte_result = __pte(val);
+
+	/*
+	 * To avoid creating Write=0,Dirty=1 PTEs, pte_modify() needs to avoid:
+	 *  1. Marking Write=0 PTEs Dirty=1
+	 *  2. Marking Dirty=1 PTEs Write=0
+	 *
+	 * The first case cannot happen because the _PAGE_CHG_MASK will filter
+	 * out any Dirty bit passed in newprot. Handle the second case by
+	 * going through the mksaveddirty exercise. Only do this if the old
+	 * value was Write=1 to avoid doing this on Shadow Stack PTEs.
+	 */
+	if (oldval & _PAGE_RW)
+		pte_result = pte_mksaveddirty(pte_result);
+	else
+		pte_result = pte_clear_saveddirty(pte_result);
+
+	return pte_result;
 }
 
 static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
 {
 	pmdval_t val = pmd_val(pmd), oldval = val;
+	pmd_t pmd_result;
 
-	val &= _HPAGE_CHG_MASK;
+	val &= (_HPAGE_CHG_MASK & ~_PAGE_DIRTY);
 	val |= check_pgprot(newprot) & ~_HPAGE_CHG_MASK;
 	val = flip_protnone_guard(oldval, val, PHYSICAL_PMD_PAGE_MASK);
-	return __pmd(val);
+
+	pmd_result = __pmd(val);
+
+	/*
+	 * To avoid creating Write=0,Dirty=1 PMDs, pte_modify() needs to avoid:
+	 *  1. Marking Write=0 PMDs Dirty=1
+	 *  2. Marking Dirty=1 PMDs Write=0
+	 *
+	 * The first case cannot happen because the _PAGE_CHG_MASK will filter
+	 * out any Dirty bit passed in newprot. Handle the second case by
+	 * going through the mksaveddirty exercise. Only do this if the old
+	 * value was Write=1 to avoid doing this on Shadow Stack PTEs.
+	 */
+	if (oldval & _PAGE_RW)
+		pmd_result = pmd_mksaveddirty(pmd_result);
+	else
+		pmd_result = pmd_clear_saveddirty(pmd_result);
+
+	return pmd_result;
 }
 
 /*
@@ -831,7 +1035,14 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd)
  * (Currently stuck as a macro because of indirect forward reference
  * to linux/mm.h:page_to_nid())
  */
-#define mk_pte(page, pgprot)   pfn_pte(page_to_pfn(page), (pgprot))
+#define mk_pte(page, pgprot)						  \
+({									  \
+	pgprot_t __pgprot = pgprot;					  \
+									  \
+	WARN_ON_ONCE((pgprot_val(__pgprot) & (_PAGE_DIRTY | _PAGE_RW)) == \
+		    _PAGE_DIRTY);					  \
+	pfn_pte(page_to_pfn(page), __pgprot);				  \
+})
 
 static inline int pmd_bad(pmd_t pmd)
 {
@@ -1090,7 +1301,17 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
 static inline void ptep_set_wrprotect(struct mm_struct *mm,
 				      unsigned long addr, pte_t *ptep)
 {
-	clear_bit(_PAGE_BIT_RW, (unsigned long *)&ptep->pte);
+	/*
+	 * Avoid accidentally creating shadow stack PTEs
+	 * (Write=0,Dirty=1).  Use cmpxchg() to prevent races with
+	 * the hardware setting Dirty=1.
+	 */
+	pte_t old_pte, new_pte;
+
+	old_pte = READ_ONCE(*ptep);
+	do {
+		new_pte = pte_wrprotect(old_pte);
+	} while (!try_cmpxchg((long *)&ptep->pte, (long *)&old_pte, *(long *)&new_pte));
 }
 
 #define flush_tlb_fix_spurious_fault(vma, address, ptep) do { } while (0)
@@ -1116,12 +1337,6 @@ extern int pmdp_clear_flush_young(struct vm_area_struct *vma,
 				  unsigned long address, pmd_t *pmdp);
 
 
-#define pmd_write pmd_write
-static inline int pmd_write(pmd_t pmd)
-{
-	return pmd_flags(pmd) & _PAGE_RW;
-}
-
 #define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR
 static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, unsigned long addr,
 				       pmd_t *pmdp)
@@ -1148,13 +1363,17 @@ static inline pud_t pudp_huge_get_and_clear(struct mm_struct *mm,
 static inline void pmdp_set_wrprotect(struct mm_struct *mm,
 				      unsigned long addr, pmd_t *pmdp)
 {
-	clear_bit(_PAGE_BIT_RW, (unsigned long *)pmdp);
-}
+	/*
+	 * Avoid accidentally creating shadow stack PTEs
+	 * (Write=0,Dirty=1).  Use cmpxchg() to prevent races with
+	 * the hardware setting Dirty=1.
+	 */
+	pmd_t old_pmd, new_pmd;
 
-#define pud_write pud_write
-static inline int pud_write(pud_t pud)
-{
-	return pud_flags(pud) & _PAGE_RW;
+	old_pmd = READ_ONCE(*pmdp);
+	do {
+		new_pmd = pmd_wrprotect(old_pmd);
+	} while (!try_cmpxchg((long *)pmdp, (long *)&old_pmd, *(long *)&new_pmd));
 }
 
 #ifndef pmdp_establish
@@ -1412,6 +1631,11 @@ static inline bool __pte_access_permitted(unsigned long pteval, bool write)
 {
 	unsigned long need_pte_bits = _PAGE_PRESENT|_PAGE_USER;
 
+	/*
+	 * Write=0,Dirty=1 PTEs are shadow stack, which the kernel
+	 * shouldn't generally allow access to, but since they
+	 * are already Write=0, the below logic covers both cases.
+	 */
 	if (write)
 		need_pte_bits |= _PAGE_RW;
 
@@ -1453,6 +1677,12 @@ static inline bool arch_has_hw_pte_young(void)
 	return true;
 }
 
+#define arch_check_zapped_pte arch_check_zapped_pte
+void arch_check_zapped_pte(struct vm_area_struct *vma, pte_t pte);
+
+#define arch_check_zapped_pmd arch_check_zapped_pmd
+void arch_check_zapped_pmd(struct vm_area_struct *vma, pmd_t pmd);
+
 #ifdef CONFIG_XEN_PV
 #define arch_has_hw_nonleaf_pmd_young arch_has_hw_nonleaf_pmd_young
 static inline bool arch_has_hw_nonleaf_pmd_young(void)
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index a6deb67..0b748ee 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -21,7 +21,8 @@
 #define _PAGE_BIT_SOFTW2	10	/* " */
 #define _PAGE_BIT_SOFTW3	11	/* " */
 #define _PAGE_BIT_PAT_LARGE	12	/* On 2MB or 1GB pages */
-#define _PAGE_BIT_SOFTW4	58	/* available for programmer */
+#define _PAGE_BIT_SOFTW4	57	/* available for programmer */
+#define _PAGE_BIT_SOFTW5	58	/* available for programmer */
 #define _PAGE_BIT_PKEY_BIT0	59	/* Protection Keys, bit 1/4 */
 #define _PAGE_BIT_PKEY_BIT1	60	/* Protection Keys, bit 2/4 */
 #define _PAGE_BIT_PKEY_BIT2	61	/* Protection Keys, bit 3/4 */
@@ -34,6 +35,13 @@
 #define _PAGE_BIT_SOFT_DIRTY	_PAGE_BIT_SOFTW3 /* software dirty tracking */
 #define _PAGE_BIT_DEVMAP	_PAGE_BIT_SOFTW4
 
+#ifdef CONFIG_X86_64
+#define _PAGE_BIT_SAVED_DIRTY	_PAGE_BIT_SOFTW5 /* Saved Dirty bit */
+#else
+/* Shared with _PAGE_BIT_UFFD_WP which is not supported on 32 bit */
+#define _PAGE_BIT_SAVED_DIRTY	_PAGE_BIT_SOFTW2 /* Saved Dirty bit */
+#endif
+
 /* If _PAGE_BIT_PRESENT is clear, we use these: */
 /* - if the user mapped it with PROT_NONE; pte_present gives true */
 #define _PAGE_BIT_PROTNONE	_PAGE_BIT_GLOBAL
@@ -117,6 +125,18 @@
 #define _PAGE_SOFTW4	(_AT(pteval_t, 0))
 #endif
 
+/*
+ * The hardware requires shadow stack to be Write=0,Dirty=1. However,
+ * there are valid cases where the kernel might create read-only PTEs that
+ * are dirty (e.g., fork(), mprotect(), uffd-wp(), soft-dirty tracking). In
+ * this case, the _PAGE_SAVED_DIRTY bit is used instead of the HW-dirty bit,
+ * to avoid creating a wrong "shadow stack" PTEs. Such PTEs have
+ * (Write=0,SavedDirty=1,Dirty=0) set.
+ */
+#define _PAGE_SAVED_DIRTY	(_AT(pteval_t, 1) << _PAGE_BIT_SAVED_DIRTY)
+
+#define _PAGE_DIRTY_BITS (_PAGE_DIRTY | _PAGE_SAVED_DIRTY)
+
 #define _PAGE_PROTNONE	(_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE)
 
 /*
@@ -125,10 +145,10 @@
  * instance, and is *not* included in this mask since
  * pte_modify() does modify it.
  */
-#define _COMMON_PAGE_CHG_MASK	(PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT |	       \
-				 _PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY |\
-				 _PAGE_SOFT_DIRTY | _PAGE_DEVMAP | _PAGE_ENC | \
-				 _PAGE_UFFD_WP)
+#define _COMMON_PAGE_CHG_MASK	(PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT |	\
+				 _PAGE_SPECIAL | _PAGE_ACCESSED |	\
+				 _PAGE_DIRTY_BITS | _PAGE_SOFT_DIRTY |	\
+				 _PAGE_DEVMAP | _PAGE_ENC | _PAGE_UFFD_WP)
 #define _PAGE_CHG_MASK	(_COMMON_PAGE_CHG_MASK | _PAGE_PAT)
 #define _HPAGE_CHG_MASK (_COMMON_PAGE_CHG_MASK | _PAGE_PSE | _PAGE_PAT_LARGE)
 
@@ -189,14 +209,22 @@ enum page_cache_mode {
 
 #define __PAGE_KERNEL		 (__PP|__RW|   0|___A|__NX|___D|   0|___G)
 #define __PAGE_KERNEL_EXEC	 (__PP|__RW|   0|___A|   0|___D|   0|___G)
+
+/*
+ * Page tables needs to have Write=1 in order for any lower PTEs to be
+ * writable. This includes shadow stack memory (Write=0, Dirty=1)
+ */
 #define _KERNPG_TABLE_NOENC	 (__PP|__RW|   0|___A|   0|___D|   0|   0)
 #define _KERNPG_TABLE		 (__PP|__RW|   0|___A|   0|___D|   0|   0| _ENC)
 #define _PAGE_TABLE_NOENC	 (__PP|__RW|_USR|___A|   0|___D|   0|   0)
 #define _PAGE_TABLE		 (__PP|__RW|_USR|___A|   0|___D|   0|   0| _ENC)
-#define __PAGE_KERNEL_RO	 (__PP|   0|   0|___A|__NX|___D|   0|___G)
-#define __PAGE_KERNEL_ROX	 (__PP|   0|   0|___A|   0|___D|   0|___G)
+
+#define __PAGE_KERNEL_RO	 (__PP|   0|   0|___A|__NX|   0|   0|___G)
+#define __PAGE_KERNEL_ROX	 (__PP|   0|   0|___A|   0|   0|   0|___G)
+#define __PAGE_KERNEL		 (__PP|__RW|   0|___A|__NX|___D|   0|___G)
+#define __PAGE_KERNEL_EXEC	 (__PP|__RW|   0|___A|   0|___D|   0|___G)
 #define __PAGE_KERNEL_NOCACHE	 (__PP|__RW|   0|___A|__NX|___D|   0|___G| __NC)
-#define __PAGE_KERNEL_VVAR	 (__PP|   0|_USR|___A|__NX|___D|   0|___G)
+#define __PAGE_KERNEL_VVAR	 (__PP|   0|_USR|___A|__NX|   0|   0|___G)
 #define __PAGE_KERNEL_LARGE	 (__PP|__RW|   0|___A|__NX|___D|_PSE|___G)
 #define __PAGE_KERNEL_LARGE_EXEC (__PP|__RW|   0|___A|   0|___D|_PSE|___G)
 #define __PAGE_KERNEL_WP	 (__PP|__RW|   0|___A|__NX|___D|   0|___G| __WP)
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index cbb9430..0086920 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -28,6 +28,7 @@ struct vm86;
 #include <asm/unwind_hints.h>
 #include <asm/vmxfeatures.h>
 #include <asm/vdso/processor.h>
+#include <asm/shstk.h>
 
 #include <linux/personality.h>
 #include <linux/cache.h>
@@ -474,6 +475,13 @@ struct thread_struct {
 	 */
 	u32			pkru;
 
+#ifdef CONFIG_X86_USER_SHADOW_STACK
+	unsigned long		features;
+	unsigned long		features_locked;
+
+	struct thread_shstk	shstk;
+#endif
+
 	/* Floating point and extended processor state */
 	struct fpu		fpu;
 	/*
diff --git a/arch/x86/include/asm/shstk.h b/arch/x86/include/asm/shstk.h
new file mode 100644
index 0000000..42fee89
--- /dev/null
+++ b/arch/x86/include/asm/shstk.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_SHSTK_H
+#define _ASM_X86_SHSTK_H
+
+#ifndef __ASSEMBLY__
+#include <linux/types.h>
+
+struct task_struct;
+struct ksignal;
+
+#ifdef CONFIG_X86_USER_SHADOW_STACK
+struct thread_shstk {
+	u64	base;
+	u64	size;
+};
+
+long shstk_prctl(struct task_struct *task, int option, unsigned long arg2);
+void reset_thread_features(void);
+unsigned long shstk_alloc_thread_stack(struct task_struct *p, unsigned long clone_flags,
+				       unsigned long stack_size);
+void shstk_free(struct task_struct *p);
+int setup_signal_shadow_stack(struct ksignal *ksig);
+int restore_signal_shadow_stack(void);
+#else
+static inline long shstk_prctl(struct task_struct *task, int option,
+			       unsigned long arg2) { return -EINVAL; }
+static inline void reset_thread_features(void) {}
+static inline unsigned long shstk_alloc_thread_stack(struct task_struct *p,
+						     unsigned long clone_flags,
+						     unsigned long stack_size) { return 0; }
+static inline void shstk_free(struct task_struct *p) {}
+static inline int setup_signal_shadow_stack(struct ksignal *ksig) { return 0; }
+static inline int restore_signal_shadow_stack(void) { return 0; }
+#endif /* CONFIG_X86_USER_SHADOW_STACK */
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_X86_SHSTK_H */
diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
index de48d13..d6cd934 100644
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -202,6 +202,19 @@ static inline void clwb(volatile void *__p)
 		: [pax] "a" (p));
 }
 
+#ifdef CONFIG_X86_USER_SHADOW_STACK
+static inline int write_user_shstk_64(u64 __user *addr, u64 val)
+{
+	asm_volatile_goto("1: wrussq %[val], (%[addr])\n"
+			  _ASM_EXTABLE(1b, %l[fail])
+			  :: [addr] "r" (addr), [val] "r" (val)
+			  :: fail);
+	return 0;
+fail:
+	return -EFAULT;
+}
+#endif /* CONFIG_X86_USER_SHADOW_STACK */
+
 #define nop() asm volatile ("nop")
 
 static inline void serialize(void)
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 6ab42ca..2572689 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -306,7 +306,8 @@ static inline bool pte_flags_need_flush(unsigned long oldflags,
 	const pteval_t flush_on_clear = _PAGE_DIRTY | _PAGE_PRESENT |
 					_PAGE_ACCESSED;
 	const pteval_t software_flags = _PAGE_SOFTW1 | _PAGE_SOFTW2 |
-					_PAGE_SOFTW3 | _PAGE_SOFTW4;
+					_PAGE_SOFTW3 | _PAGE_SOFTW4 |
+					_PAGE_SAVED_DIRTY;
 	const pteval_t flush_on_change = _PAGE_RW | _PAGE_USER | _PAGE_PWT |
 			  _PAGE_PCD | _PAGE_PSE | _PAGE_GLOBAL | _PAGE_PAT |
 			  _PAGE_PAT_LARGE | _PAGE_PKEY_BIT0 | _PAGE_PKEY_BIT1 |
diff --git a/arch/x86/include/asm/trap_pf.h b/arch/x86/include/asm/trap_pf.h
index 10b1de5..afa5243 100644
--- a/arch/x86/include/asm/trap_pf.h
+++ b/arch/x86/include/asm/trap_pf.h
@@ -11,6 +11,7 @@
  *   bit 3 ==				1: use of reserved bit detected
  *   bit 4 ==				1: fault was an instruction fetch
  *   bit 5 ==				1: protection keys block access
+ *   bit 6 ==				1: shadow stack access fault
  *   bit 15 ==				1: SGX MMU page-fault
  */
 enum x86_pf_error_code {
@@ -20,6 +21,7 @@ enum x86_pf_error_code {
 	X86_PF_RSVD	=		1 << 3,
 	X86_PF_INSTR	=		1 << 4,
 	X86_PF_PK	=		1 << 5,
+	X86_PF_SHSTK	=		1 << 6,
 	X86_PF_SGX	=		1 << 15,
 };
 
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index 47ecfff..b1c9cea 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -18,7 +18,8 @@ void __init trap_init(void);
 asmlinkage __visible noinstr struct pt_regs *vc_switch_off_ist(struct pt_regs *eregs);
 #endif
 
-extern bool ibt_selftest(void);
+extern int ibt_selftest(void);
+extern int ibt_selftest_noendbr(void);
 
 #ifdef CONFIG_X86_F00F_BUG
 /* For handling the FOOF bug */
@@ -47,4 +48,16 @@ void __noreturn handle_stack_overflow(struct pt_regs *regs,
 				      struct stack_info *info);
 #endif
 
+static inline void cond_local_irq_enable(struct pt_regs *regs)
+{
+	if (regs->flags & X86_EFLAGS_IF)
+		local_irq_enable();
+}
+
+static inline void cond_local_irq_disable(struct pt_regs *regs)
+{
+	if (regs->flags & X86_EFLAGS_IF)
+		local_irq_disable();
+}
+
 #endif /* _ASM_X86_TRAPS_H */
diff --git a/arch/x86/include/uapi/asm/mman.h b/arch/x86/include/uapi/asm/mman.h
index 775dbd3..8148bdd 100644
--- a/arch/x86/include/uapi/asm/mman.h
+++ b/arch/x86/include/uapi/asm/mman.h
@@ -3,6 +3,7 @@
 #define _ASM_X86_MMAN_H
 
 #define MAP_32BIT	0x40		/* only give out 32bit addresses */
+#define MAP_ABOVE4G	0x80		/* only map above 4GB */
 
 #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
 #define arch_calc_vm_prot_bits(prot, key) (		\
@@ -12,6 +13,9 @@
 		((key) & 0x8 ? VM_PKEY_BIT3 : 0))
 #endif
 
+/* Flags for map_shadow_stack(2) */
+#define SHADOW_STACK_SET_TOKEN	(1ULL << 0)	/* Set up a restore token in the shadow stack */
+
 #include <asm-generic/mman.h>
 
 #endif /* _ASM_X86_MMAN_H */
diff --git a/arch/x86/include/uapi/asm/prctl.h b/arch/x86/include/uapi/asm/prctl.h
index e8d7ebb..384e2cc 100644
--- a/arch/x86/include/uapi/asm/prctl.h
+++ b/arch/x86/include/uapi/asm/prctl.h
@@ -23,9 +23,21 @@
 #define ARCH_MAP_VDSO_32		0x2002
 #define ARCH_MAP_VDSO_64		0x2003
 
+/* Don't use 0x3001-0x3004 because of old glibcs */
+
 #define ARCH_GET_UNTAG_MASK		0x4001
 #define ARCH_ENABLE_TAGGED_ADDR		0x4002
 #define ARCH_GET_MAX_TAG_BITS		0x4003
 #define ARCH_FORCE_TAGGED_SVA		0x4004
 
+#define ARCH_SHSTK_ENABLE		0x5001
+#define ARCH_SHSTK_DISABLE		0x5002
+#define ARCH_SHSTK_LOCK			0x5003
+#define ARCH_SHSTK_UNLOCK		0x5004
+#define ARCH_SHSTK_STATUS		0x5005
+
+/* ARCH_SHSTK_ features bits */
+#define ARCH_SHSTK_SHSTK		(1ULL <<  0)
+#define ARCH_SHSTK_WRSS			(1ULL <<  1)
+
 #endif /* _ASM_X86_PRCTL_H */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 00df34c..3269a0e 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -48,6 +48,7 @@
 obj-y			+= traps.o idt.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
 obj-y			+= time.o ioport.o dumpstack.o nmi.o
 obj-$(CONFIG_MODIFY_LDT_SYSCALL)	+= ldt.o
+obj-$(CONFIG_X86_KERNEL_IBT)		+= ibt_selftest.o
 obj-y			+= setup.o x86_init.o i8259.o irqinit.o
 obj-$(CONFIG_JUMP_LABEL)	+= jump_label.o
 obj-$(CONFIG_IRQ_WORK)  += irq_work.o
@@ -144,6 +145,10 @@
 
 obj-$(CONFIG_CALL_THUNKS)		+= callthunks.o
 
+obj-$(CONFIG_X86_CET)			+= cet.o
+
+obj-$(CONFIG_X86_USER_SHADOW_STACK)	+= shstk.o
+
 ###
 # 64 bit specific files
 ifeq ($(CONFIG_X86_64),y)
diff --git a/arch/x86/kernel/cet.c b/arch/x86/kernel/cet.c
new file mode 100644
index 0000000..d2c732a
--- /dev/null
+++ b/arch/x86/kernel/cet.c
@@ -0,0 +1,131 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/ptrace.h>
+#include <asm/bugs.h>
+#include <asm/traps.h>
+
+enum cp_error_code {
+	CP_EC        = (1 << 15) - 1,
+
+	CP_RET       = 1,
+	CP_IRET      = 2,
+	CP_ENDBR     = 3,
+	CP_RSTRORSSP = 4,
+	CP_SETSSBSY  = 5,
+
+	CP_ENCL	     = 1 << 15,
+};
+
+static const char cp_err[][10] = {
+	[0] = "unknown",
+	[1] = "near ret",
+	[2] = "far/iret",
+	[3] = "endbranch",
+	[4] = "rstorssp",
+	[5] = "setssbsy",
+};
+
+static const char *cp_err_string(unsigned long error_code)
+{
+	unsigned int cpec = error_code & CP_EC;
+
+	if (cpec >= ARRAY_SIZE(cp_err))
+		cpec = 0;
+	return cp_err[cpec];
+}
+
+static void do_unexpected_cp(struct pt_regs *regs, unsigned long error_code)
+{
+	WARN_ONCE(1, "Unexpected %s #CP, error_code: %s\n",
+		  user_mode(regs) ? "user mode" : "kernel mode",
+		  cp_err_string(error_code));
+}
+
+static DEFINE_RATELIMIT_STATE(cpf_rate, DEFAULT_RATELIMIT_INTERVAL,
+			      DEFAULT_RATELIMIT_BURST);
+
+static void do_user_cp_fault(struct pt_regs *regs, unsigned long error_code)
+{
+	struct task_struct *tsk;
+	unsigned long ssp;
+
+	/*
+	 * An exception was just taken from userspace. Since interrupts are disabled
+	 * here, no scheduling should have messed with the registers yet and they
+	 * will be whatever is live in userspace. So read the SSP before enabling
+	 * interrupts so locking the fpregs to do it later is not required.
+	 */
+	rdmsrl(MSR_IA32_PL3_SSP, ssp);
+
+	cond_local_irq_enable(regs);
+
+	tsk = current;
+	tsk->thread.error_code = error_code;
+	tsk->thread.trap_nr = X86_TRAP_CP;
+
+	/* Ratelimit to prevent log spamming. */
+	if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
+	    __ratelimit(&cpf_rate)) {
+		pr_emerg("%s[%d] control protection ip:%lx sp:%lx ssp:%lx error:%lx(%s)%s",
+			 tsk->comm, task_pid_nr(tsk),
+			 regs->ip, regs->sp, ssp, error_code,
+			 cp_err_string(error_code),
+			 error_code & CP_ENCL ? " in enclave" : "");
+		print_vma_addr(KERN_CONT " in ", regs->ip);
+		pr_cont("\n");
+	}
+
+	force_sig_fault(SIGSEGV, SEGV_CPERR, (void __user *)0);
+	cond_local_irq_disable(regs);
+}
+
+static __ro_after_init bool ibt_fatal = true;
+
+static void do_kernel_cp_fault(struct pt_regs *regs, unsigned long error_code)
+{
+	if ((error_code & CP_EC) != CP_ENDBR) {
+		do_unexpected_cp(regs, error_code);
+		return;
+	}
+
+	if (unlikely(regs->ip == (unsigned long)&ibt_selftest_noendbr)) {
+		regs->ax = 0;
+		return;
+	}
+
+	pr_err("Missing ENDBR: %pS\n", (void *)instruction_pointer(regs));
+	if (!ibt_fatal) {
+		printk(KERN_DEFAULT CUT_HERE);
+		__warn(__FILE__, __LINE__, (void *)regs->ip, TAINT_WARN, regs, NULL);
+		return;
+	}
+	BUG();
+}
+
+static int __init ibt_setup(char *str)
+{
+	if (!strcmp(str, "off"))
+		setup_clear_cpu_cap(X86_FEATURE_IBT);
+
+	if (!strcmp(str, "warn"))
+		ibt_fatal = false;
+
+	return 1;
+}
+
+__setup("ibt=", ibt_setup);
+
+DEFINE_IDTENTRY_ERRORCODE(exc_control_protection)
+{
+	if (user_mode(regs)) {
+		if (cpu_feature_enabled(X86_FEATURE_USER_SHSTK))
+			do_user_cp_fault(regs, error_code);
+		else
+			do_unexpected_cp(regs, error_code);
+	} else {
+		if (cpu_feature_enabled(X86_FEATURE_IBT))
+			do_kernel_cp_fault(regs, error_code);
+		else
+			do_unexpected_cp(regs, error_code);
+	}
+}
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 90b8c5e..6d75fab 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -587,27 +587,43 @@ __noendbr void ibt_restore(u64 save)
 
 static __always_inline void setup_cet(struct cpuinfo_x86 *c)
 {
-	u64 msr = CET_ENDBR_EN;
+	bool user_shstk, kernel_ibt;
 
-	if (!HAS_KERNEL_IBT ||
-	    !cpu_feature_enabled(X86_FEATURE_IBT))
+	if (!IS_ENABLED(CONFIG_X86_CET))
 		return;
 
-	wrmsrl(MSR_IA32_S_CET, msr);
+	kernel_ibt = HAS_KERNEL_IBT && cpu_feature_enabled(X86_FEATURE_IBT);
+	user_shstk = cpu_feature_enabled(X86_FEATURE_SHSTK) &&
+		     IS_ENABLED(CONFIG_X86_USER_SHADOW_STACK);
+
+	if (!kernel_ibt && !user_shstk)
+		return;
+
+	if (user_shstk)
+		set_cpu_cap(c, X86_FEATURE_USER_SHSTK);
+
+	if (kernel_ibt)
+		wrmsrl(MSR_IA32_S_CET, CET_ENDBR_EN);
+	else
+		wrmsrl(MSR_IA32_S_CET, 0);
+
 	cr4_set_bits(X86_CR4_CET);
 
-	if (!ibt_selftest()) {
+	if (kernel_ibt && ibt_selftest()) {
 		pr_err("IBT selftest: Failed!\n");
 		wrmsrl(MSR_IA32_S_CET, 0);
 		setup_clear_cpu_cap(X86_FEATURE_IBT);
-		return;
 	}
 }
 
 __noendbr void cet_disable(void)
 {
-	if (cpu_feature_enabled(X86_FEATURE_IBT))
-		wrmsrl(MSR_IA32_S_CET, 0);
+	if (!(cpu_feature_enabled(X86_FEATURE_IBT) ||
+	      cpu_feature_enabled(X86_FEATURE_SHSTK)))
+		return;
+
+	wrmsrl(MSR_IA32_S_CET, 0);
+	wrmsrl(MSR_IA32_U_CET, 0);
 }
 
 /*
@@ -1491,6 +1507,9 @@ static void __init cpu_parse_early_param(void)
 	if (cmdline_find_option_bool(boot_command_line, "noxsaves"))
 		setup_clear_cpu_cap(X86_FEATURE_XSAVES);
 
+	if (cmdline_find_option_bool(boot_command_line, "nousershstk"))
+		setup_clear_cpu_cap(X86_FEATURE_USER_SHSTK);
+
 	arglen = cmdline_find_option(boot_command_line, "clearcpuid", arg, sizeof(arg));
 	if (arglen <= 0)
 		return;
diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c
index f6748c8..e462c1d 100644
--- a/arch/x86/kernel/cpu/cpuid-deps.c
+++ b/arch/x86/kernel/cpu/cpuid-deps.c
@@ -81,6 +81,7 @@ static const struct cpuid_dep cpuid_deps[] = {
 	{ X86_FEATURE_XFD,			X86_FEATURE_XSAVES    },
 	{ X86_FEATURE_XFD,			X86_FEATURE_XGETBV1   },
 	{ X86_FEATURE_AMX_TILE,			X86_FEATURE_XFD       },
+	{ X86_FEATURE_SHSTK,			X86_FEATURE_XSAVES    },
 	{}
 };
 
diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c
index 099b6f0..31c0e68 100644
--- a/arch/x86/kernel/cpu/proc.c
+++ b/arch/x86/kernel/cpu/proc.c
@@ -4,6 +4,8 @@
 #include <linux/string.h>
 #include <linux/seq_file.h>
 #include <linux/cpufreq.h>
+#include <asm/prctl.h>
+#include <linux/proc_fs.h>
 
 #include "cpu.h"
 
@@ -175,3 +177,24 @@ const struct seq_operations cpuinfo_op = {
 	.stop	= c_stop,
 	.show	= show_cpuinfo,
 };
+
+#ifdef CONFIG_X86_USER_SHADOW_STACK
+static void dump_x86_features(struct seq_file *m, unsigned long features)
+{
+	if (features & ARCH_SHSTK_SHSTK)
+		seq_puts(m, "shstk ");
+	if (features & ARCH_SHSTK_WRSS)
+		seq_puts(m, "wrss ");
+}
+
+void arch_proc_pid_thread_features(struct seq_file *m, struct task_struct *task)
+{
+	seq_puts(m, "x86_Thread_features:\t");
+	dump_x86_features(m, task->thread.features);
+	seq_putc(m, '\n');
+
+	seq_puts(m, "x86_Thread_features_locked:\t");
+	dump_x86_features(m, task->thread.features_locked);
+	seq_putc(m, '\n');
+}
+#endif /* CONFIG_X86_USER_SHADOW_STACK */
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 98e507c..a86d370 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -552,8 +552,36 @@ static inline void fpu_inherit_perms(struct fpu *dst_fpu)
 	}
 }
 
+/* A passed ssp of zero will not cause any update */
+static int update_fpu_shstk(struct task_struct *dst, unsigned long ssp)
+{
+#ifdef CONFIG_X86_USER_SHADOW_STACK
+	struct cet_user_state *xstate;
+
+	/* If ssp update is not needed. */
+	if (!ssp)
+		return 0;
+
+	xstate = get_xsave_addr(&dst->thread.fpu.fpstate->regs.xsave,
+				XFEATURE_CET_USER);
+
+	/*
+	 * If there is a non-zero ssp, then 'dst' must be configured with a shadow
+	 * stack and the fpu state should be up to date since it was just copied
+	 * from the parent in fpu_clone(). So there must be a valid non-init CET
+	 * state location in the buffer.
+	 */
+	if (WARN_ON_ONCE(!xstate))
+		return 1;
+
+	xstate->user_ssp = (u64)ssp;
+#endif
+	return 0;
+}
+
 /* Clone current's FPU state on fork */
-int fpu_clone(struct task_struct *dst, unsigned long clone_flags, bool minimal)
+int fpu_clone(struct task_struct *dst, unsigned long clone_flags, bool minimal,
+	      unsigned long ssp)
 {
 	struct fpu *src_fpu = &current->thread.fpu;
 	struct fpu *dst_fpu = &dst->thread.fpu;
@@ -613,6 +641,12 @@ int fpu_clone(struct task_struct *dst, unsigned long clone_flags, bool minimal)
 	if (use_xsave())
 		dst_fpu->fpstate->regs.xsave.header.xfeatures &= ~XFEATURE_MASK_PASID;
 
+	/*
+	 * Update shadow stack pointer, in case it changed during clone.
+	 */
+	if (update_fpu_shstk(dst, ssp))
+		return 1;
+
 	trace_x86_fpu_copy_src(src_fpu);
 	trace_x86_fpu_copy_dst(dst_fpu);
 
@@ -753,6 +787,24 @@ void switch_fpu_return(void)
 }
 EXPORT_SYMBOL_GPL(switch_fpu_return);
 
+void fpregs_lock_and_load(void)
+{
+	/*
+	 * fpregs_lock() only disables preemption (mostly). So modifying state
+	 * in an interrupt could screw up some in progress fpregs operation.
+	 * Warn about it.
+	 */
+	WARN_ON_ONCE(!irq_fpu_usable());
+	WARN_ON_ONCE(current->flags & PF_KTHREAD);
+
+	fpregs_lock();
+
+	fpregs_assert_state_consistent();
+
+	if (test_thread_flag(TIF_NEED_FPU_LOAD))
+		fpregs_restore_userregs();
+}
+
 #ifdef CONFIG_X86_DEBUG_FPU
 /*
  * If current FPU state according to its tracking (loaded FPU context on this
diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c
index 6d056b6..6bc1eb2 100644
--- a/arch/x86/kernel/fpu/regset.c
+++ b/arch/x86/kernel/fpu/regset.c
@@ -8,6 +8,7 @@
 #include <asm/fpu/api.h>
 #include <asm/fpu/signal.h>
 #include <asm/fpu/regset.h>
+#include <asm/prctl.h>
 
 #include "context.h"
 #include "internal.h"
@@ -174,6 +175,86 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
 	return ret;
 }
 
+#ifdef CONFIG_X86_USER_SHADOW_STACK
+int ssp_active(struct task_struct *target, const struct user_regset *regset)
+{
+	if (target->thread.features & ARCH_SHSTK_SHSTK)
+		return regset->n;
+
+	return 0;
+}
+
+int ssp_get(struct task_struct *target, const struct user_regset *regset,
+	    struct membuf to)
+{
+	struct fpu *fpu = &target->thread.fpu;
+	struct cet_user_state *cetregs;
+
+	if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK))
+		return -ENODEV;
+
+	sync_fpstate(fpu);
+	cetregs = get_xsave_addr(&fpu->fpstate->regs.xsave, XFEATURE_CET_USER);
+	if (WARN_ON(!cetregs)) {
+		/*
+		 * This shouldn't ever be NULL because shadow stack was
+		 * verified to be enabled above. This means
+		 * MSR_IA32_U_CET.CET_SHSTK_EN should be 1 and so
+		 * XFEATURE_CET_USER should not be in the init state.
+		 */
+		return -ENODEV;
+	}
+
+	return membuf_write(&to, (unsigned long *)&cetregs->user_ssp,
+			    sizeof(cetregs->user_ssp));
+}
+
+int ssp_set(struct task_struct *target, const struct user_regset *regset,
+	    unsigned int pos, unsigned int count,
+	    const void *kbuf, const void __user *ubuf)
+{
+	struct fpu *fpu = &target->thread.fpu;
+	struct xregs_state *xsave = &fpu->fpstate->regs.xsave;
+	struct cet_user_state *cetregs;
+	unsigned long user_ssp;
+	int r;
+
+	if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK) ||
+	    !ssp_active(target, regset))
+		return -ENODEV;
+
+	if (pos != 0 || count != sizeof(user_ssp))
+		return -EINVAL;
+
+	r = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &user_ssp, 0, -1);
+	if (r)
+		return r;
+
+	/*
+	 * Some kernel instructions (IRET, etc) can cause exceptions in the case
+	 * of disallowed CET register values. Just prevent invalid values.
+	 */
+	if (user_ssp >= TASK_SIZE_MAX || !IS_ALIGNED(user_ssp, 8))
+		return -EINVAL;
+
+	fpu_force_restore(fpu);
+
+	cetregs = get_xsave_addr(xsave, XFEATURE_CET_USER);
+	if (WARN_ON(!cetregs)) {
+		/*
+		 * This shouldn't ever be NULL because shadow stack was
+		 * verified to be enabled above. This means
+		 * MSR_IA32_U_CET.CET_SHSTK_EN should be 1 and so
+		 * XFEATURE_CET_USER should not be in the init state.
+		 */
+		return -ENODEV;
+	}
+
+	cetregs->user_ssp = user_ssp;
+	return 0;
+}
+#endif /* CONFIG_X86_USER_SHADOW_STACK */
+
 #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
 
 /*
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 1afbc48..41dac93 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -39,26 +39,26 @@
  */
 static const char *xfeature_names[] =
 {
-	"x87 floating point registers"	,
-	"SSE registers"			,
-	"AVX registers"			,
-	"MPX bounds registers"		,
-	"MPX CSR"			,
-	"AVX-512 opmask"		,
-	"AVX-512 Hi256"			,
-	"AVX-512 ZMM_Hi256"		,
-	"Processor Trace (unused)"	,
+	"x87 floating point registers",
+	"SSE registers",
+	"AVX registers",
+	"MPX bounds registers",
+	"MPX CSR",
+	"AVX-512 opmask",
+	"AVX-512 Hi256",
+	"AVX-512 ZMM_Hi256",
+	"Processor Trace (unused)",
 	"Protection Keys User registers",
 	"PASID state",
-	"unknown xstate feature"	,
-	"unknown xstate feature"	,
-	"unknown xstate feature"	,
-	"unknown xstate feature"	,
-	"unknown xstate feature"	,
-	"unknown xstate feature"	,
-	"AMX Tile config"		,
-	"AMX Tile data"			,
-	"unknown xstate feature"	,
+	"Control-flow User registers",
+	"Control-flow Kernel registers (unused)",
+	"unknown xstate feature",
+	"unknown xstate feature",
+	"unknown xstate feature",
+	"unknown xstate feature",
+	"AMX Tile config",
+	"AMX Tile data",
+	"unknown xstate feature",
 };
 
 static unsigned short xsave_cpuid_features[] __initdata = {
@@ -73,6 +73,7 @@ static unsigned short xsave_cpuid_features[] __initdata = {
 	[XFEATURE_PT_UNIMPLEMENTED_SO_FAR]	= X86_FEATURE_INTEL_PT,
 	[XFEATURE_PKRU]				= X86_FEATURE_PKU,
 	[XFEATURE_PASID]			= X86_FEATURE_ENQCMD,
+	[XFEATURE_CET_USER]			= X86_FEATURE_SHSTK,
 	[XFEATURE_XTILE_CFG]			= X86_FEATURE_AMX_TILE,
 	[XFEATURE_XTILE_DATA]			= X86_FEATURE_AMX_TILE,
 };
@@ -276,6 +277,7 @@ static void __init print_xstate_features(void)
 	print_xstate_feature(XFEATURE_MASK_Hi16_ZMM);
 	print_xstate_feature(XFEATURE_MASK_PKRU);
 	print_xstate_feature(XFEATURE_MASK_PASID);
+	print_xstate_feature(XFEATURE_MASK_CET_USER);
 	print_xstate_feature(XFEATURE_MASK_XTILE_CFG);
 	print_xstate_feature(XFEATURE_MASK_XTILE_DATA);
 }
@@ -344,6 +346,7 @@ static __init void os_xrstor_booting(struct xregs_state *xstate)
 	 XFEATURE_MASK_BNDREGS |		\
 	 XFEATURE_MASK_BNDCSR |			\
 	 XFEATURE_MASK_PASID |			\
+	 XFEATURE_MASK_CET_USER |		\
 	 XFEATURE_MASK_XTILE)
 
 /*
@@ -446,14 +449,15 @@ static void __init __xstate_dump_leaves(void)
 	}									\
 } while (0)
 
-#define XCHECK_SZ(sz, nr, nr_macro, __struct) do {			\
-	if ((nr == nr_macro) &&						\
-	    WARN_ONCE(sz != sizeof(__struct),				\
-		"%s: struct is %zu bytes, cpu state %d bytes\n",	\
-		__stringify(nr_macro), sizeof(__struct), sz)) {		\
+#define XCHECK_SZ(sz, nr, __struct) ({					\
+	if (WARN_ONCE(sz != sizeof(__struct),				\
+	    "[%s]: struct is %zu bytes, cpu state %d bytes\n",		\
+	    xfeature_names[nr], sizeof(__struct), sz)) {		\
 		__xstate_dump_leaves();					\
 	}								\
-} while (0)
+	true;								\
+})
+
 
 /**
  * check_xtile_data_against_struct - Check tile data state size.
@@ -527,36 +531,28 @@ static bool __init check_xstate_against_struct(int nr)
 	 * Ask the CPU for the size of the state.
 	 */
 	int sz = xfeature_size(nr);
+
 	/*
 	 * Match each CPU state with the corresponding software
 	 * structure.
 	 */
-	XCHECK_SZ(sz, nr, XFEATURE_YMM,       struct ymmh_struct);
-	XCHECK_SZ(sz, nr, XFEATURE_BNDREGS,   struct mpx_bndreg_state);
-	XCHECK_SZ(sz, nr, XFEATURE_BNDCSR,    struct mpx_bndcsr_state);
-	XCHECK_SZ(sz, nr, XFEATURE_OPMASK,    struct avx_512_opmask_state);
-	XCHECK_SZ(sz, nr, XFEATURE_ZMM_Hi256, struct avx_512_zmm_uppers_state);
-	XCHECK_SZ(sz, nr, XFEATURE_Hi16_ZMM,  struct avx_512_hi16_state);
-	XCHECK_SZ(sz, nr, XFEATURE_PKRU,      struct pkru_state);
-	XCHECK_SZ(sz, nr, XFEATURE_PASID,     struct ia32_pasid_state);
-	XCHECK_SZ(sz, nr, XFEATURE_XTILE_CFG, struct xtile_cfg);
-
-	/* The tile data size varies between implementations. */
-	if (nr == XFEATURE_XTILE_DATA)
-		check_xtile_data_against_struct(sz);
-
-	/*
-	 * Make *SURE* to add any feature numbers in below if
-	 * there are "holes" in the xsave state component
-	 * numbers.
-	 */
-	if ((nr < XFEATURE_YMM) ||
-	    (nr >= XFEATURE_MAX) ||
-	    (nr == XFEATURE_PT_UNIMPLEMENTED_SO_FAR) ||
-	    ((nr >= XFEATURE_RSRVD_COMP_11) && (nr <= XFEATURE_RSRVD_COMP_16))) {
+	switch (nr) {
+	case XFEATURE_YMM:	  return XCHECK_SZ(sz, nr, struct ymmh_struct);
+	case XFEATURE_BNDREGS:	  return XCHECK_SZ(sz, nr, struct mpx_bndreg_state);
+	case XFEATURE_BNDCSR:	  return XCHECK_SZ(sz, nr, struct mpx_bndcsr_state);
+	case XFEATURE_OPMASK:	  return XCHECK_SZ(sz, nr, struct avx_512_opmask_state);
+	case XFEATURE_ZMM_Hi256:  return XCHECK_SZ(sz, nr, struct avx_512_zmm_uppers_state);
+	case XFEATURE_Hi16_ZMM:	  return XCHECK_SZ(sz, nr, struct avx_512_hi16_state);
+	case XFEATURE_PKRU:	  return XCHECK_SZ(sz, nr, struct pkru_state);
+	case XFEATURE_PASID:	  return XCHECK_SZ(sz, nr, struct ia32_pasid_state);
+	case XFEATURE_XTILE_CFG:  return XCHECK_SZ(sz, nr, struct xtile_cfg);
+	case XFEATURE_CET_USER:	  return XCHECK_SZ(sz, nr, struct cet_user_state);
+	case XFEATURE_XTILE_DATA: check_xtile_data_against_struct(sz); return true;
+	default:
 		XSTATE_WARN_ON(1, "No structure for xstate: %d\n", nr);
 		return false;
 	}
+
 	return true;
 }
 
diff --git a/arch/x86/kernel/ibt_selftest.S b/arch/x86/kernel/ibt_selftest.S
new file mode 100644
index 0000000..c43c4ed
--- /dev/null
+++ b/arch/x86/kernel/ibt_selftest.S
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/linkage.h>
+#include <linux/objtool.h>
+#include <asm/nospec-branch.h>
+
+SYM_CODE_START(ibt_selftest_noendbr)
+	ANNOTATE_NOENDBR
+	UNWIND_HINT_FUNC
+	/* #CP handler sets %ax to 0 */
+	RET
+SYM_CODE_END(ibt_selftest_noendbr)
+
+SYM_FUNC_START(ibt_selftest)
+	lea ibt_selftest_noendbr(%rip), %rax
+	ANNOTATE_RETPOLINE_SAFE
+	jmp *%rax
+SYM_FUNC_END(ibt_selftest)
diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c
index f395826..b786d48 100644
--- a/arch/x86/kernel/idt.c
+++ b/arch/x86/kernel/idt.c
@@ -107,7 +107,7 @@ static const __initconst struct idt_data def_idts[] = {
 	ISTG(X86_TRAP_MC,		asm_exc_machine_check, IST_INDEX_MCE),
 #endif
 
-#ifdef CONFIG_X86_KERNEL_IBT
+#ifdef CONFIG_X86_CET
 	INTG(X86_TRAP_CP,		asm_exc_control_protection),
 #endif
 
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 72015db..9f09091 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -51,6 +51,7 @@
 #include <asm/unwind.h>
 #include <asm/tdx.h>
 #include <asm/mmu_context.h>
+#include <asm/shstk.h>
 
 #include "process.h"
 
@@ -122,6 +123,7 @@ void exit_thread(struct task_struct *tsk)
 
 	free_vm86(t);
 
+	shstk_free(tsk);
 	fpu__drop(fpu);
 }
 
@@ -162,6 +164,7 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
 	struct inactive_task_frame *frame;
 	struct fork_frame *fork_frame;
 	struct pt_regs *childregs;
+	unsigned long new_ssp;
 	int ret = 0;
 
 	childregs = task_pt_regs(p);
@@ -199,7 +202,16 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
 	frame->flags = X86_EFLAGS_FIXED;
 #endif
 
-	fpu_clone(p, clone_flags, args->fn);
+	/*
+	 * Allocate a new shadow stack for thread if needed. If shadow stack,
+	 * is disabled, new_ssp will remain 0, and fpu_clone() will know not to
+	 * update it.
+	 */
+	new_ssp = shstk_alloc_thread_stack(p, clone_flags, args->stack_size);
+	if (IS_ERR_VALUE(new_ssp))
+		return PTR_ERR((void *)new_ssp);
+
+	fpu_clone(p, clone_flags, args->fn, new_ssp);
 
 	/* Kernel thread ? */
 	if (unlikely(p->flags & PF_KTHREAD)) {
@@ -245,6 +257,13 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
 	if (!ret && unlikely(test_tsk_thread_flag(current, TIF_IO_BITMAP)))
 		io_bitmap_share(p);
 
+	/*
+	 * If copy_thread() if failing, don't leak the shadow stack possibly
+	 * allocated in shstk_alloc_thread_stack() above.
+	 */
+	if (ret)
+		shstk_free(p);
+
 	return ret;
 }
 
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 3d181c1..33b2687 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -515,6 +515,8 @@ start_thread_common(struct pt_regs *regs, unsigned long new_ip,
 		load_gs_index(__USER_DS);
 	}
 
+	reset_thread_features();
+
 	loadsegment(fs, 0);
 	loadsegment(es, _ds);
 	loadsegment(ds, _ds);
@@ -894,6 +896,12 @@ long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2)
 		else
 			return put_user(LAM_U57_BITS, (unsigned long __user *)arg2);
 #endif
+	case ARCH_SHSTK_ENABLE:
+	case ARCH_SHSTK_DISABLE:
+	case ARCH_SHSTK_LOCK:
+	case ARCH_SHSTK_UNLOCK:
+	case ARCH_SHSTK_STATUS:
+		return shstk_prctl(task, option, arg2);
 	default:
 		ret = -EINVAL;
 		break;
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index dfaa270..095f04b 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -58,6 +58,7 @@ enum x86_regset_64 {
 	REGSET64_FP,
 	REGSET64_IOPERM,
 	REGSET64_XSTATE,
+	REGSET64_SSP,
 };
 
 #define REGSET_GENERAL \
@@ -1267,6 +1268,17 @@ static struct user_regset x86_64_regsets[] __ro_after_init = {
 		.active		= ioperm_active,
 		.regset_get	= ioperm_get
 	},
+#ifdef CONFIG_X86_USER_SHADOW_STACK
+	[REGSET64_SSP] = {
+		.core_note_type	= NT_X86_SHSTK,
+		.n		= 1,
+		.size		= sizeof(u64),
+		.align		= sizeof(u64),
+		.active		= ssp_active,
+		.regset_get	= ssp_get,
+		.set		= ssp_set
+	},
+#endif
 };
 
 static const struct user_regset_view user_x86_64_view = {
diff --git a/arch/x86/kernel/shstk.c b/arch/x86/kernel/shstk.c
new file mode 100644
index 0000000..fd68992
--- /dev/null
+++ b/arch/x86/kernel/shstk.c
@@ -0,0 +1,550 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * shstk.c - Intel shadow stack support
+ *
+ * Copyright (c) 2021, Intel Corporation.
+ * Yu-cheng Yu <yu-cheng.yu@intel.com>
+ */
+
+#include <linux/sched.h>
+#include <linux/bitops.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/sched/signal.h>
+#include <linux/compat.h>
+#include <linux/sizes.h>
+#include <linux/user.h>
+#include <linux/syscalls.h>
+#include <asm/msr.h>
+#include <asm/fpu/xstate.h>
+#include <asm/fpu/types.h>
+#include <asm/shstk.h>
+#include <asm/special_insns.h>
+#include <asm/fpu/api.h>
+#include <asm/prctl.h>
+
+#define SS_FRAME_SIZE 8
+
+static bool features_enabled(unsigned long features)
+{
+	return current->thread.features & features;
+}
+
+static void features_set(unsigned long features)
+{
+	current->thread.features |= features;
+}
+
+static void features_clr(unsigned long features)
+{
+	current->thread.features &= ~features;
+}
+
+/*
+ * Create a restore token on the shadow stack.  A token is always 8-byte
+ * and aligned to 8.
+ */
+static int create_rstor_token(unsigned long ssp, unsigned long *token_addr)
+{
+	unsigned long addr;
+
+	/* Token must be aligned */
+	if (!IS_ALIGNED(ssp, 8))
+		return -EINVAL;
+
+	addr = ssp - SS_FRAME_SIZE;
+
+	/*
+	 * SSP is aligned, so reserved bits and mode bit are a zero, just mark
+	 * the token 64-bit.
+	 */
+	ssp |= BIT(0);
+
+	if (write_user_shstk_64((u64 __user *)addr, (u64)ssp))
+		return -EFAULT;
+
+	if (token_addr)
+		*token_addr = addr;
+
+	return 0;
+}
+
+/*
+ * VM_SHADOW_STACK will have a guard page. This helps userspace protect
+ * itself from attacks. The reasoning is as follows:
+ *
+ * The shadow stack pointer(SSP) is moved by CALL, RET, and INCSSPQ. The
+ * INCSSP instruction can increment the shadow stack pointer. It is the
+ * shadow stack analog of an instruction like:
+ *
+ *   addq $0x80, %rsp
+ *
+ * However, there is one important difference between an ADD on %rsp
+ * and INCSSP. In addition to modifying SSP, INCSSP also reads from the
+ * memory of the first and last elements that were "popped". It can be
+ * thought of as acting like this:
+ *
+ * READ_ONCE(ssp);       // read+discard top element on stack
+ * ssp += nr_to_pop * 8; // move the shadow stack
+ * READ_ONCE(ssp-8);     // read+discard last popped stack element
+ *
+ * The maximum distance INCSSP can move the SSP is 2040 bytes, before
+ * it would read the memory. Therefore a single page gap will be enough
+ * to prevent any operation from shifting the SSP to an adjacent stack,
+ * since it would have to land in the gap at least once, causing a
+ * fault.
+ */
+static unsigned long alloc_shstk(unsigned long addr, unsigned long size,
+				 unsigned long token_offset, bool set_res_tok)
+{
+	int flags = MAP_ANONYMOUS | MAP_PRIVATE | MAP_ABOVE4G;
+	struct mm_struct *mm = current->mm;
+	unsigned long mapped_addr, unused;
+
+	if (addr)
+		flags |= MAP_FIXED_NOREPLACE;
+
+	mmap_write_lock(mm);
+	mapped_addr = do_mmap(NULL, addr, size, PROT_READ, flags,
+			      VM_SHADOW_STACK | VM_WRITE, 0, &unused, NULL);
+	mmap_write_unlock(mm);
+
+	if (!set_res_tok || IS_ERR_VALUE(mapped_addr))
+		goto out;
+
+	if (create_rstor_token(mapped_addr + token_offset, NULL)) {
+		vm_munmap(mapped_addr, size);
+		return -EINVAL;
+	}
+
+out:
+	return mapped_addr;
+}
+
+static unsigned long adjust_shstk_size(unsigned long size)
+{
+	if (size)
+		return PAGE_ALIGN(size);
+
+	return PAGE_ALIGN(min_t(unsigned long long, rlimit(RLIMIT_STACK), SZ_4G));
+}
+
+static void unmap_shadow_stack(u64 base, u64 size)
+{
+	int r;
+
+	r = vm_munmap(base, size);
+
+	/*
+	 * mmap_write_lock_killable() failed with -EINTR. This means
+	 * the process is about to die and have it's MM cleaned up.
+	 * This task shouldn't ever make it back to userspace. In this
+	 * case it is ok to leak a shadow stack, so just exit out.
+	 */
+	if (r == -EINTR)
+		return;
+
+	/*
+	 * For all other types of vm_munmap() failure, either the
+	 * system is out of memory or there is bug.
+	 */
+	WARN_ON_ONCE(r);
+}
+
+static int shstk_setup(void)
+{
+	struct thread_shstk *shstk = &current->thread.shstk;
+	unsigned long addr, size;
+
+	/* Already enabled */
+	if (features_enabled(ARCH_SHSTK_SHSTK))
+		return 0;
+
+	/* Also not supported for 32 bit and x32 */
+	if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK) || in_32bit_syscall())
+		return -EOPNOTSUPP;
+
+	size = adjust_shstk_size(0);
+	addr = alloc_shstk(0, size, 0, false);
+	if (IS_ERR_VALUE(addr))
+		return PTR_ERR((void *)addr);
+
+	fpregs_lock_and_load();
+	wrmsrl(MSR_IA32_PL3_SSP, addr + size);
+	wrmsrl(MSR_IA32_U_CET, CET_SHSTK_EN);
+	fpregs_unlock();
+
+	shstk->base = addr;
+	shstk->size = size;
+	features_set(ARCH_SHSTK_SHSTK);
+
+	return 0;
+}
+
+void reset_thread_features(void)
+{
+	memset(&current->thread.shstk, 0, sizeof(struct thread_shstk));
+	current->thread.features = 0;
+	current->thread.features_locked = 0;
+}
+
+unsigned long shstk_alloc_thread_stack(struct task_struct *tsk, unsigned long clone_flags,
+				       unsigned long stack_size)
+{
+	struct thread_shstk *shstk = &tsk->thread.shstk;
+	unsigned long addr, size;
+
+	/*
+	 * If shadow stack is not enabled on the new thread, skip any
+	 * switch to a new shadow stack.
+	 */
+	if (!features_enabled(ARCH_SHSTK_SHSTK))
+		return 0;
+
+	/*
+	 * For CLONE_VM, except vfork, the child needs a separate shadow
+	 * stack.
+	 */
+	if ((clone_flags & (CLONE_VFORK | CLONE_VM)) != CLONE_VM)
+		return 0;
+
+	size = adjust_shstk_size(stack_size);
+	addr = alloc_shstk(0, size, 0, false);
+	if (IS_ERR_VALUE(addr))
+		return addr;
+
+	shstk->base = addr;
+	shstk->size = size;
+
+	return addr + size;
+}
+
+static unsigned long get_user_shstk_addr(void)
+{
+	unsigned long long ssp;
+
+	fpregs_lock_and_load();
+
+	rdmsrl(MSR_IA32_PL3_SSP, ssp);
+
+	fpregs_unlock();
+
+	return ssp;
+}
+
+#define SHSTK_DATA_BIT BIT(63)
+
+static int put_shstk_data(u64 __user *addr, u64 data)
+{
+	if (WARN_ON_ONCE(data & SHSTK_DATA_BIT))
+		return -EINVAL;
+
+	/*
+	 * Mark the high bit so that the sigframe can't be processed as a
+	 * return address.
+	 */
+	if (write_user_shstk_64(addr, data | SHSTK_DATA_BIT))
+		return -EFAULT;
+	return 0;
+}
+
+static int get_shstk_data(unsigned long *data, unsigned long __user *addr)
+{
+	unsigned long ldata;
+
+	if (unlikely(get_user(ldata, addr)))
+		return -EFAULT;
+
+	if (!(ldata & SHSTK_DATA_BIT))
+		return -EINVAL;
+
+	*data = ldata & ~SHSTK_DATA_BIT;
+
+	return 0;
+}
+
+static int shstk_push_sigframe(unsigned long *ssp)
+{
+	unsigned long target_ssp = *ssp;
+
+	/* Token must be aligned */
+	if (!IS_ALIGNED(target_ssp, 8))
+		return -EINVAL;
+
+	*ssp -= SS_FRAME_SIZE;
+	if (put_shstk_data((void __user *)*ssp, target_ssp))
+		return -EFAULT;
+
+	return 0;
+}
+
+static int shstk_pop_sigframe(unsigned long *ssp)
+{
+	struct vm_area_struct *vma;
+	unsigned long token_addr;
+	bool need_to_check_vma;
+	int err = 1;
+
+	/*
+	 * It is possible for the SSP to be off the end of a shadow stack by 4
+	 * or 8 bytes. If the shadow stack is at the start of a page or 4 bytes
+	 * before it, it might be this case, so check that the address being
+	 * read is actually shadow stack.
+	 */
+	if (!IS_ALIGNED(*ssp, 8))
+		return -EINVAL;
+
+	need_to_check_vma = PAGE_ALIGN(*ssp) == *ssp;
+
+	if (need_to_check_vma)
+		mmap_read_lock_killable(current->mm);
+
+	err = get_shstk_data(&token_addr, (unsigned long __user *)*ssp);
+	if (unlikely(err))
+		goto out_err;
+
+	if (need_to_check_vma) {
+		vma = find_vma(current->mm, *ssp);
+		if (!vma || !(vma->vm_flags & VM_SHADOW_STACK)) {
+			err = -EFAULT;
+			goto out_err;
+		}
+
+		mmap_read_unlock(current->mm);
+	}
+
+	/* Restore SSP aligned? */
+	if (unlikely(!IS_ALIGNED(token_addr, 8)))
+		return -EINVAL;
+
+	/* SSP in userspace? */
+	if (unlikely(token_addr >= TASK_SIZE_MAX))
+		return -EINVAL;
+
+	*ssp = token_addr;
+
+	return 0;
+out_err:
+	if (need_to_check_vma)
+		mmap_read_unlock(current->mm);
+	return err;
+}
+
+int setup_signal_shadow_stack(struct ksignal *ksig)
+{
+	void __user *restorer = ksig->ka.sa.sa_restorer;
+	unsigned long ssp;
+	int err;
+
+	if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK) ||
+	    !features_enabled(ARCH_SHSTK_SHSTK))
+		return 0;
+
+	if (!restorer)
+		return -EINVAL;
+
+	ssp = get_user_shstk_addr();
+	if (unlikely(!ssp))
+		return -EINVAL;
+
+	err = shstk_push_sigframe(&ssp);
+	if (unlikely(err))
+		return err;
+
+	/* Push restorer address */
+	ssp -= SS_FRAME_SIZE;
+	err = write_user_shstk_64((u64 __user *)ssp, (u64)restorer);
+	if (unlikely(err))
+		return -EFAULT;
+
+	fpregs_lock_and_load();
+	wrmsrl(MSR_IA32_PL3_SSP, ssp);
+	fpregs_unlock();
+
+	return 0;
+}
+
+int restore_signal_shadow_stack(void)
+{
+	unsigned long ssp;
+	int err;
+
+	if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK) ||
+	    !features_enabled(ARCH_SHSTK_SHSTK))
+		return 0;
+
+	ssp = get_user_shstk_addr();
+	if (unlikely(!ssp))
+		return -EINVAL;
+
+	err = shstk_pop_sigframe(&ssp);
+	if (unlikely(err))
+		return err;
+
+	fpregs_lock_and_load();
+	wrmsrl(MSR_IA32_PL3_SSP, ssp);
+	fpregs_unlock();
+
+	return 0;
+}
+
+void shstk_free(struct task_struct *tsk)
+{
+	struct thread_shstk *shstk = &tsk->thread.shstk;
+
+	if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK) ||
+	    !features_enabled(ARCH_SHSTK_SHSTK))
+		return;
+
+	/*
+	 * When fork() with CLONE_VM fails, the child (tsk) already has a
+	 * shadow stack allocated, and exit_thread() calls this function to
+	 * free it.  In this case the parent (current) and the child share
+	 * the same mm struct.
+	 */
+	if (!tsk->mm || tsk->mm != current->mm)
+		return;
+
+	unmap_shadow_stack(shstk->base, shstk->size);
+}
+
+static int wrss_control(bool enable)
+{
+	u64 msrval;
+
+	if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK))
+		return -EOPNOTSUPP;
+
+	/*
+	 * Only enable WRSS if shadow stack is enabled. If shadow stack is not
+	 * enabled, WRSS will already be disabled, so don't bother clearing it
+	 * when disabling.
+	 */
+	if (!features_enabled(ARCH_SHSTK_SHSTK))
+		return -EPERM;
+
+	/* Already enabled/disabled? */
+	if (features_enabled(ARCH_SHSTK_WRSS) == enable)
+		return 0;
+
+	fpregs_lock_and_load();
+	rdmsrl(MSR_IA32_U_CET, msrval);
+
+	if (enable) {
+		features_set(ARCH_SHSTK_WRSS);
+		msrval |= CET_WRSS_EN;
+	} else {
+		features_clr(ARCH_SHSTK_WRSS);
+		if (!(msrval & CET_WRSS_EN))
+			goto unlock;
+
+		msrval &= ~CET_WRSS_EN;
+	}
+
+	wrmsrl(MSR_IA32_U_CET, msrval);
+
+unlock:
+	fpregs_unlock();
+
+	return 0;
+}
+
+static int shstk_disable(void)
+{
+	if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK))
+		return -EOPNOTSUPP;
+
+	/* Already disabled? */
+	if (!features_enabled(ARCH_SHSTK_SHSTK))
+		return 0;
+
+	fpregs_lock_and_load();
+	/* Disable WRSS too when disabling shadow stack */
+	wrmsrl(MSR_IA32_U_CET, 0);
+	wrmsrl(MSR_IA32_PL3_SSP, 0);
+	fpregs_unlock();
+
+	shstk_free(current);
+	features_clr(ARCH_SHSTK_SHSTK | ARCH_SHSTK_WRSS);
+
+	return 0;
+}
+
+SYSCALL_DEFINE3(map_shadow_stack, unsigned long, addr, unsigned long, size, unsigned int, flags)
+{
+	bool set_tok = flags & SHADOW_STACK_SET_TOKEN;
+	unsigned long aligned_size;
+
+	if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK))
+		return -EOPNOTSUPP;
+
+	if (flags & ~SHADOW_STACK_SET_TOKEN)
+		return -EINVAL;
+
+	/* If there isn't space for a token */
+	if (set_tok && size < 8)
+		return -ENOSPC;
+
+	if (addr && addr < SZ_4G)
+		return -ERANGE;
+
+	/*
+	 * An overflow would result in attempting to write the restore token
+	 * to the wrong location. Not catastrophic, but just return the right
+	 * error code and block it.
+	 */
+	aligned_size = PAGE_ALIGN(size);
+	if (aligned_size < size)
+		return -EOVERFLOW;
+
+	return alloc_shstk(addr, aligned_size, size, set_tok);
+}
+
+long shstk_prctl(struct task_struct *task, int option, unsigned long arg2)
+{
+	unsigned long features = arg2;
+
+	if (option == ARCH_SHSTK_STATUS) {
+		return put_user(task->thread.features, (unsigned long __user *)arg2);
+	}
+
+	if (option == ARCH_SHSTK_LOCK) {
+		task->thread.features_locked |= features;
+		return 0;
+	}
+
+	/* Only allow via ptrace */
+	if (task != current) {
+		if (option == ARCH_SHSTK_UNLOCK && IS_ENABLED(CONFIG_CHECKPOINT_RESTORE)) {
+			task->thread.features_locked &= ~features;
+			return 0;
+		}
+		return -EINVAL;
+	}
+
+	/* Do not allow to change locked features */
+	if (features & task->thread.features_locked)
+		return -EPERM;
+
+	/* Only support enabling/disabling one feature at a time. */
+	if (hweight_long(features) > 1)
+		return -EINVAL;
+
+	if (option == ARCH_SHSTK_DISABLE) {
+		if (features & ARCH_SHSTK_WRSS)
+			return wrss_control(false);
+		if (features & ARCH_SHSTK_SHSTK)
+			return shstk_disable();
+		return -EINVAL;
+	}
+
+	/* Handle ARCH_SHSTK_ENABLE */
+	if (features & ARCH_SHSTK_SHSTK)
+		return shstk_setup();
+	if (features & ARCH_SHSTK_WRSS)
+		return wrss_control(true);
+	return -EINVAL;
+}
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index cfeec3e..65fe209 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -40,6 +40,7 @@
 #include <asm/syscall.h>
 #include <asm/sigframe.h>
 #include <asm/signal.h>
+#include <asm/shstk.h>
 
 static inline int is_ia32_compat_frame(struct ksignal *ksig)
 {
diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c
index 9027fc0..c12624b 100644
--- a/arch/x86/kernel/signal_32.c
+++ b/arch/x86/kernel/signal_32.c
@@ -402,7 +402,7 @@ int ia32_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs)
 */
 static_assert(NSIGILL  == 11);
 static_assert(NSIGFPE  == 15);
-static_assert(NSIGSEGV == 9);
+static_assert(NSIGSEGV == 10);
 static_assert(NSIGBUS  == 5);
 static_assert(NSIGTRAP == 6);
 static_assert(NSIGCHLD == 6);
diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c
index 13a1e60..cacf2ed 100644
--- a/arch/x86/kernel/signal_64.c
+++ b/arch/x86/kernel/signal_64.c
@@ -175,6 +175,9 @@ int x64_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs)
 	frame = get_sigframe(ksig, regs, sizeof(struct rt_sigframe), &fp);
 	uc_flags = frame_uc_flags(regs);
 
+	if (setup_signal_shadow_stack(ksig))
+		return -EFAULT;
+
 	if (!user_access_begin(frame, sizeof(*frame)))
 		return -EFAULT;
 
@@ -260,6 +263,9 @@ SYSCALL_DEFINE0(rt_sigreturn)
 	if (!restore_sigcontext(regs, &frame->uc.uc_mcontext, uc_flags))
 		goto badframe;
 
+	if (restore_signal_shadow_stack())
+		goto badframe;
+
 	if (restore_altstack(&frame->uc.uc_stack))
 		goto badframe;
 
@@ -403,7 +409,7 @@ void sigaction_compat_abi(struct k_sigaction *act, struct k_sigaction *oact)
 */
 static_assert(NSIGILL  == 11);
 static_assert(NSIGFPE  == 15);
-static_assert(NSIGSEGV == 9);
+static_assert(NSIGSEGV == 10);
 static_assert(NSIGBUS  == 5);
 static_assert(NSIGTRAP == 6);
 static_assert(NSIGCHLD == 6);
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c
index 8cc653f..c783aeb 100644
--- a/arch/x86/kernel/sys_x86_64.c
+++ b/arch/x86/kernel/sys_x86_64.c
@@ -193,7 +193,11 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
 
 	info.flags = VM_UNMAPPED_AREA_TOPDOWN;
 	info.length = len;
-	info.low_limit = PAGE_SIZE;
+	if (!in_32bit_syscall() && (flags & MAP_ABOVE4G))
+		info.low_limit = SZ_4G;
+	else
+		info.low_limit = PAGE_SIZE;
+
 	info.high_limit = get_mmap_base(0);
 
 	/*
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 4a817d2..c876f1d 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -77,18 +77,6 @@
 
 DECLARE_BITMAP(system_vectors, NR_VECTORS);
 
-static inline void cond_local_irq_enable(struct pt_regs *regs)
-{
-	if (regs->flags & X86_EFLAGS_IF)
-		local_irq_enable();
-}
-
-static inline void cond_local_irq_disable(struct pt_regs *regs)
-{
-	if (regs->flags & X86_EFLAGS_IF)
-		local_irq_disable();
-}
-
 __always_inline int is_valid_bugaddr(unsigned long addr)
 {
 	if (addr < TASK_SIZE_MAX)
@@ -213,81 +201,6 @@ DEFINE_IDTENTRY(exc_overflow)
 	do_error_trap(regs, 0, "overflow", X86_TRAP_OF, SIGSEGV, 0, NULL);
 }
 
-#ifdef CONFIG_X86_KERNEL_IBT
-
-static __ro_after_init bool ibt_fatal = true;
-
-extern void ibt_selftest_ip(void); /* code label defined in asm below */
-
-enum cp_error_code {
-	CP_EC        = (1 << 15) - 1,
-
-	CP_RET       = 1,
-	CP_IRET      = 2,
-	CP_ENDBR     = 3,
-	CP_RSTRORSSP = 4,
-	CP_SETSSBSY  = 5,
-
-	CP_ENCL	     = 1 << 15,
-};
-
-DEFINE_IDTENTRY_ERRORCODE(exc_control_protection)
-{
-	if (!cpu_feature_enabled(X86_FEATURE_IBT)) {
-		pr_err("Unexpected #CP\n");
-		BUG();
-	}
-
-	if (WARN_ON_ONCE(user_mode(regs) || (error_code & CP_EC) != CP_ENDBR))
-		return;
-
-	if (unlikely(regs->ip == (unsigned long)&ibt_selftest_ip)) {
-		regs->ax = 0;
-		return;
-	}
-
-	pr_err("Missing ENDBR: %pS\n", (void *)instruction_pointer(regs));
-	if (!ibt_fatal) {
-		printk(KERN_DEFAULT CUT_HERE);
-		__warn(__FILE__, __LINE__, (void *)regs->ip, TAINT_WARN, regs, NULL);
-		return;
-	}
-	BUG();
-}
-
-/* Must be noinline to ensure uniqueness of ibt_selftest_ip. */
-noinline bool ibt_selftest(void)
-{
-	unsigned long ret;
-
-	asm ("	lea ibt_selftest_ip(%%rip), %%rax\n\t"
-	     ANNOTATE_RETPOLINE_SAFE
-	     "	jmp *%%rax\n\t"
-	     "ibt_selftest_ip:\n\t"
-	     UNWIND_HINT_FUNC
-	     ANNOTATE_NOENDBR
-	     "	nop\n\t"
-
-	     : "=a" (ret) : : "memory");
-
-	return !ret;
-}
-
-static int __init ibt_setup(char *str)
-{
-	if (!strcmp(str, "off"))
-		setup_clear_cpu_cap(X86_FEATURE_IBT);
-
-	if (!strcmp(str, "warn"))
-		ibt_fatal = false;
-
-	return 1;
-}
-
-__setup("ibt=", ibt_setup);
-
-#endif /* CONFIG_X86_KERNEL_IBT */
-
 #ifdef CONFIG_X86_F00F_BUG
 void handle_invalid_op(struct pt_regs *regs)
 #else
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 2e861b9..ab778ea 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1112,8 +1112,22 @@ access_error(unsigned long error_code, struct vm_area_struct *vma)
 				       (error_code & X86_PF_INSTR), foreign))
 		return 1;
 
+	/*
+	 * Shadow stack accesses (PF_SHSTK=1) are only permitted to
+	 * shadow stack VMAs. All other accesses result in an error.
+	 */
+	if (error_code & X86_PF_SHSTK) {
+		if (unlikely(!(vma->vm_flags & VM_SHADOW_STACK)))
+			return 1;
+		if (unlikely(!(vma->vm_flags & VM_WRITE)))
+			return 1;
+		return 0;
+	}
+
 	if (error_code & X86_PF_WRITE) {
 		/* write, present and write, not present: */
+		if (unlikely(vma->vm_flags & VM_SHADOW_STACK))
+			return 1;
 		if (unlikely(!(vma->vm_flags & VM_WRITE)))
 			return 1;
 		return 0;
@@ -1305,6 +1319,14 @@ void do_user_addr_fault(struct pt_regs *regs,
 
 	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
 
+	/*
+	 * Read-only permissions can not be expressed in shadow stack PTEs.
+	 * Treat all shadow stack accesses as WRITE faults. This ensures
+	 * that the MM will prepare everything (e.g., break COW) such that
+	 * maybe_mkwrite() can create a proper shadow stack PTE.
+	 */
+	if (error_code & X86_PF_SHSTK)
+		flags |= FAULT_FLAG_WRITE;
 	if (error_code & X86_PF_WRITE)
 		flags |= FAULT_FLAG_WRITE;
 	if (error_code & X86_PF_INSTR)
diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
index df4182b..bda9f12 100644
--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -2074,12 +2074,12 @@ int set_memory_nx(unsigned long addr, int numpages)
 
 int set_memory_ro(unsigned long addr, int numpages)
 {
-	return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_RW), 0);
+	return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_RW | _PAGE_DIRTY), 0);
 }
 
 int set_memory_rox(unsigned long addr, int numpages)
 {
-	pgprot_t clr = __pgprot(_PAGE_RW);
+	pgprot_t clr = __pgprot(_PAGE_RW | _PAGE_DIRTY);
 
 	if (__supported_pte_mask & _PAGE_NX)
 		clr.pgprot |= _PAGE_NX;
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index d3a93e8..9deadf5 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -881,3 +881,43 @@ int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
 
 #endif /* CONFIG_X86_64 */
 #endif	/* CONFIG_HAVE_ARCH_HUGE_VMAP */
+
+pte_t pte_mkwrite(pte_t pte, struct vm_area_struct *vma)
+{
+	if (vma->vm_flags & VM_SHADOW_STACK)
+		return pte_mkwrite_shstk(pte);
+
+	pte = pte_mkwrite_novma(pte);
+
+	return pte_clear_saveddirty(pte);
+}
+
+pmd_t pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma)
+{
+	if (vma->vm_flags & VM_SHADOW_STACK)
+		return pmd_mkwrite_shstk(pmd);
+
+	pmd = pmd_mkwrite_novma(pmd);
+
+	return pmd_clear_saveddirty(pmd);
+}
+
+void arch_check_zapped_pte(struct vm_area_struct *vma, pte_t pte)
+{
+	/*
+	 * Hardware before shadow stack can (rarely) set Dirty=1
+	 * on a Write=0 PTE. So the below condition
+	 * only indicates a software bug when shadow stack is
+	 * supported by the HW. This checking is covered in
+	 * pte_shstk().
+	 */
+	VM_WARN_ON_ONCE(!(vma->vm_flags & VM_SHADOW_STACK) &&
+			pte_shstk(pte));
+}
+
+void arch_check_zapped_pmd(struct vm_area_struct *vma, pmd_t pmd)
+{
+	/* See note in arch_check_zapped_pte() */
+	VM_WARN_ON_ONCE(!(vma->vm_flags & VM_SHADOW_STACK) &&
+			pmd_shstk(pmd));
+}
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index 3e04f2b..49352fa 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -694,7 +694,7 @@ static struct trap_array_entry trap_array[] = {
 	TRAP_ENTRY(exc_coprocessor_error,		false ),
 	TRAP_ENTRY(exc_alignment_check,			false ),
 	TRAP_ENTRY(exc_simd_coprocessor_error,		false ),
-#ifdef CONFIG_X86_KERNEL_IBT
+#ifdef CONFIG_X86_CET
 	TRAP_ENTRY(exc_control_protection,		false ),
 #endif
 };
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index 1b5cba7..1652c39 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -166,7 +166,7 @@ void make_lowmem_page_readwrite(void *vaddr)
 	if (pte == NULL)
 		return;		/* vaddr missing */
 
-	ptev = pte_mkwrite(*pte);
+	ptev = pte_mkwrite_novma(*pte);
 
 	if (HYPERVISOR_update_va_mapping(address, ptev, 0))
 		BUG();
diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S
index 08f1ceb..9e5e680 100644
--- a/arch/x86/xen/xen-asm.S
+++ b/arch/x86/xen/xen-asm.S
@@ -148,7 +148,7 @@
 xen_pv_trap asm_exc_spurious_interrupt_bug
 xen_pv_trap asm_exc_coprocessor_error
 xen_pv_trap asm_exc_alignment_check
-#ifdef CONFIG_X86_KERNEL_IBT
+#ifdef CONFIG_X86_CET
 xen_pv_trap asm_exc_control_protection
 #endif
 #ifdef CONFIG_X86_MCE
diff --git a/arch/xtensa/include/asm/pgtable.h b/arch/xtensa/include/asm/pgtable.h
index ef79cb6..9a7e5e5 100644
--- a/arch/xtensa/include/asm/pgtable.h
+++ b/arch/xtensa/include/asm/pgtable.h
@@ -262,7 +262,7 @@ static inline pte_t pte_mkdirty(pte_t pte)
 	{ pte_val(pte) |= _PAGE_DIRTY; return pte; }
 static inline pte_t pte_mkyoung(pte_t pte)
 	{ pte_val(pte) |= _PAGE_ACCESSED; return pte; }
-static inline pte_t pte_mkwrite(pte_t pte)
+static inline pte_t pte_mkwrite_novma(pte_t pte)
 	{ pte_val(pte) |= _PAGE_WRITABLE; return pte; }
 
 #define pgprot_noncached(prot) \
diff --git a/drivers/cpuidle/cpuidle-pseries.c b/drivers/cpuidle/cpuidle-pseries.c
index a7d33f3..14db9b7 100644
--- a/drivers/cpuidle/cpuidle-pseries.c
+++ b/drivers/cpuidle/cpuidle-pseries.c
@@ -414,13 +414,7 @@ static int __init pseries_idle_probe(void)
 		return -ENODEV;
 
 	if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
-		/*
-		 * Use local_paca instead of get_lppaca() since
-		 * preemption is not disabled, and it is not required in
-		 * fact, since lppaca_ptr does not need to be the value
-		 * associated to the current CPU, it can be from any CPU.
-		 */
-		if (lppaca_shared_proc(local_paca->lppaca_ptr)) {
+		if (lppaca_shared_proc()) {
 			cpuidle_state_table = shared_states;
 			max_idle_state = ARRAY_SIZE(shared_states);
 		} else {
diff --git a/drivers/macintosh/ams/ams-core.c b/drivers/macintosh/ams/ams-core.c
index 877e8cb..c978b42 100644
--- a/drivers/macintosh/ams/ams-core.c
+++ b/drivers/macintosh/ams/ams-core.c
@@ -176,7 +176,7 @@ int ams_sensor_attach(void)
 	return result;
 }
 
-int __init ams_init(void)
+static int __init ams_init(void)
 {
 	struct device_node *np;
 
diff --git a/drivers/macintosh/ams/ams.h b/drivers/macintosh/ams/ams.h
index e053c15..5b295f5 100644
--- a/drivers/macintosh/ams/ams.h
+++ b/drivers/macintosh/ams/ams.h
@@ -6,6 +6,7 @@
 #include <linux/input.h>
 #include <linux/kthread.h>
 #include <linux/mutex.h>
+#include <linux/platform_device.h>
 #include <linux/spinlock.h>
 #include <linux/types.h>
 
diff --git a/drivers/misc/cxl/native.c b/drivers/misc/cxl/native.c
index 50b0c44..fbe16a6 100644
--- a/drivers/misc/cxl/native.c
+++ b/drivers/misc/cxl/native.c
@@ -269,11 +269,6 @@ static void attach_spa(struct cxl_afu *afu)
 	cxl_p1n_write(afu, CXL_PSL_SPAP_An, spap);
 }
 
-static inline void detach_spa(struct cxl_afu *afu)
-{
-	cxl_p1n_write(afu, CXL_PSL_SPAP_An, 0);
-}
-
 void cxl_release_spa(struct cxl_afu *afu)
 {
 	if (afu->native->spa) {
diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
index 0ff9448..4cf9e7c 100644
--- a/drivers/misc/cxl/pci.c
+++ b/drivers/misc/cxl/pci.c
@@ -150,16 +150,7 @@ static inline resource_size_t p2_size(struct pci_dev *dev)
 
 static int find_cxl_vsec(struct pci_dev *dev)
 {
-	int vsec = 0;
-	u16 val;
-
-	while ((vsec = pci_find_next_ext_capability(dev, vsec, PCI_EXT_CAP_ID_VNDR))) {
-		pci_read_config_word(dev, vsec + 0x4, &val);
-		if (val == CXL_PCI_VSEC_ID)
-			return vsec;
-	}
-	return 0;
-
+	return pci_find_vsec_capability(dev, PCI_VENDOR_ID_IBM, CXL_PCI_VSEC_ID);
 }
 
 static void dump_cxl_config_space(struct pci_dev *dev)
diff --git a/drivers/net/ethernet/freescale/fs_enet/fs_enet.h b/drivers/net/ethernet/freescale/fs_enet/fs_enet.h
index 759bb70..21c07ac 100644
--- a/drivers/net/ethernet/freescale/fs_enet/fs_enet.h
+++ b/drivers/net/ethernet/freescale/fs_enet/fs_enet.h
@@ -10,8 +10,6 @@
 #include <linux/phy.h>
 #include <linux/dma-mapping.h>
 
-#include <asm/fs_pd.h>
-
 #ifdef CONFIG_CPM1
 #include <asm/cpm1.h>
 #endif
diff --git a/drivers/net/ethernet/freescale/fs_enet/mac-fcc.c b/drivers/net/ethernet/freescale/fs_enet/mac-fcc.c
index d903a90..e2ffac9 100644
--- a/drivers/net/ethernet/freescale/fs_enet/mac-fcc.c
+++ b/drivers/net/ethernet/freescale/fs_enet/mac-fcc.c
@@ -37,7 +37,6 @@
 #include <linux/pgtable.h>
 
 #include <asm/immap_cpm2.h>
-#include <asm/mpc8260.h>
 #include <asm/cpm2.h>
 
 #include <asm/irq.h>
diff --git a/drivers/pci/hotplug/rpaphp_pci.c b/drivers/pci/hotplug/rpaphp_pci.c
index 630f770..bcfd26e 100644
--- a/drivers/pci/hotplug/rpaphp_pci.c
+++ b/drivers/pci/hotplug/rpaphp_pci.c
@@ -19,12 +19,92 @@
 #include "../pci.h"		/* for pci_add_new_bus */
 #include "rpaphp.h"
 
+/*
+ * RTAS call get-sensor-state(DR_ENTITY_SENSE) return values as per PAPR:
+ * -- generic return codes ---
+ *    -1: Hardware Error
+ *    -2: RTAS_BUSY
+ *    -3: Invalid sensor. RTAS Parameter Error.
+ * -- rtas_get_sensor function specific return codes ---
+ * -9000: Need DR entity to be powered up and unisolated before RTAS call
+ * -9001: Need DR entity to be powered up, but not unisolated, before RTAS call
+ * -9002: DR entity unusable
+ *  990x: Extended delay - where x is a number in the range of 0-5
+ */
+#define RTAS_SLOT_UNISOLATED		-9000
+#define RTAS_SLOT_NOT_UNISOLATED	-9001
+#define RTAS_SLOT_NOT_USABLE		-9002
+
+static int rtas_get_sensor_errno(int rtas_rc)
+{
+	switch (rtas_rc) {
+	case 0:
+		/* Success case */
+		return 0;
+	case RTAS_SLOT_UNISOLATED:
+	case RTAS_SLOT_NOT_UNISOLATED:
+		return -EFAULT;
+	case RTAS_SLOT_NOT_USABLE:
+		return -ENODEV;
+	case RTAS_BUSY:
+	case RTAS_EXTENDED_DELAY_MIN...RTAS_EXTENDED_DELAY_MAX:
+		return -EBUSY;
+	default:
+		return rtas_error_rc(rtas_rc);
+	}
+}
+
+/*
+ * get_adapter_status() can be called by the EEH handler during EEH recovery.
+ * On certain PHB failures, the RTAS call rtas_call(get-sensor-state) returns
+ * extended busy error (9902) until PHB is recovered by pHyp. The RTAS call
+ * interface rtas_get_sensor() loops over the RTAS call on extended delay
+ * return code (9902) until the return value is either success (0) or error
+ * (-1). This causes the EEH handler to get stuck for ~6 seconds before it
+ * could notify that the PCI error has been detected and stop any active
+ * operations. This sometimes causes EEH recovery to fail. To avoid this issue,
+ * invoke rtas_call(get-sensor-state) directly if the respective PE is in EEH
+ * recovery state and return -EBUSY error based on RTAS return status. This
+ * will help the EEH handler to notify the driver about the PCI error
+ * immediately and successfully proceed with EEH recovery steps.
+ */
+
+static int __rpaphp_get_sensor_state(struct slot *slot, int *state)
+{
+	int rc;
+	int token = rtas_token("get-sensor-state");
+	struct pci_dn *pdn;
+	struct eeh_pe *pe;
+	struct pci_controller *phb = PCI_DN(slot->dn)->phb;
+
+	if (token == RTAS_UNKNOWN_SERVICE)
+		return -ENOENT;
+
+	/*
+	 * Fallback to existing method for empty slot or PE isn't in EEH
+	 * recovery.
+	 */
+	pdn = list_first_entry_or_null(&PCI_DN(phb->dn)->child_list,
+					struct pci_dn, list);
+	if (!pdn)
+		goto fallback;
+
+	pe = eeh_dev_to_pe(pdn->edev);
+	if (pe && (pe->state & EEH_PE_RECOVERING)) {
+		rc = rtas_call(token, 2, 2, state, DR_ENTITY_SENSE,
+			       slot->index);
+		return rtas_get_sensor_errno(rc);
+	}
+fallback:
+	return rtas_get_sensor(DR_ENTITY_SENSE, slot->index, state);
+}
+
 int rpaphp_get_sensor_state(struct slot *slot, int *state)
 {
 	int rc;
 	int setlevel;
 
-	rc = rtas_get_sensor(DR_ENTITY_SENSE, slot->index, state);
+	rc = __rpaphp_get_sensor_state(slot, state);
 
 	if (rc < 0) {
 		if (rc == -EFAULT || rc == -EEXIST) {
@@ -40,8 +120,7 @@ int rpaphp_get_sensor_state(struct slot *slot, int *state)
 				dbg("%s: power on slot[%s] failed rc=%d.\n",
 				    __func__, slot->name, rc);
 			} else {
-				rc = rtas_get_sensor(DR_ENTITY_SENSE,
-						     slot->index, state);
+				rc = __rpaphp_get_sensor_state(slot, state);
 			}
 		} else if (rc == -ENODEV)
 			info("%s: slot is unusable\n", __func__);
diff --git a/fs/aio.c b/fs/aio.c
index b3174da..a4c2a6b 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -558,7 +558,7 @@ static int aio_setup_ring(struct kioctx *ctx, unsigned int nr_events)
 
 	ctx->mmap_base = do_mmap(ctx->aio_ring_file, 0, ctx->mmap_size,
 				 PROT_READ | PROT_WRITE,
-				 MAP_SHARED, 0, &unused, NULL);
+				 MAP_SHARED, 0, 0, &unused, NULL);
 	mmap_write_unlock(mm);
 	if (IS_ERR((void *)ctx->mmap_base)) {
 		ctx->mmap_size = 0;
diff --git a/fs/proc/array.c b/fs/proc/array.c
index d35bbf3..2c2efbe 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -431,6 +431,11 @@ static inline void task_untag_mask(struct seq_file *m, struct mm_struct *mm)
 	seq_printf(m, "untag_mask:\t%#lx\n", mm_untag_mask(mm));
 }
 
+__weak void arch_proc_pid_thread_features(struct seq_file *m,
+					  struct task_struct *task)
+{
+}
+
 int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
 			struct pid *pid, struct task_struct *task)
 {
@@ -455,6 +460,7 @@ int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
 	task_cpus_allowed(m, task);
 	cpuset_task_status_allowed(m, task);
 	task_context_switch_counts(m, task);
+	arch_proc_pid_thread_features(m, task);
 	return 0;
 }
 
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 15ddf46..0f2aa36 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -692,6 +692,9 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma)
 #ifdef CONFIG_HAVE_ARCH_USERFAULTFD_MINOR
 		[ilog2(VM_UFFD_MINOR)]	= "ui",
 #endif /* CONFIG_HAVE_ARCH_USERFAULTFD_MINOR */
+#ifdef CONFIG_X86_USER_SHADOW_STACK
+		[ilog2(VM_SHADOW_STACK)] = "ss",
+#endif
 	};
 	size_t i;
 
diff --git a/include/asm-generic/hugetlb.h b/include/asm-generic/hugetlb.h
index d7f6335..4da0279 100644
--- a/include/asm-generic/hugetlb.h
+++ b/include/asm-generic/hugetlb.h
@@ -22,7 +22,7 @@ static inline unsigned long huge_pte_dirty(pte_t pte)
 
 static inline pte_t huge_pte_mkwrite(pte_t pte)
 {
-	return pte_mkwrite(pte);
+	return pte_mkwrite_novma(pte);
 }
 
 #ifndef __HAVE_ARCH_HUGE_PTE_WRPROTECT
diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h
index 7fbb459..db199d6 100644
--- a/include/linux/hw_breakpoint.h
+++ b/include/linux/hw_breakpoint.h
@@ -90,9 +90,6 @@ extern int dbg_reserve_bp_slot(struct perf_event *bp);
 extern int dbg_release_bp_slot(struct perf_event *bp);
 extern int reserve_bp_slot(struct perf_event *bp);
 extern void release_bp_slot(struct perf_event *bp);
-int arch_reserve_bp_slot(struct perf_event *bp);
-void arch_release_bp_slot(struct perf_event *bp);
-void arch_unregister_hw_breakpoint(struct perf_event *bp);
 
 extern void flush_ptrace_hw_breakpoint(struct task_struct *tsk);
 
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 20e6d1d..bf5d0b1 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -319,11 +319,13 @@ extern unsigned int kobjsize(const void *objp);
 #define VM_HIGH_ARCH_BIT_2	34	/* bit only usable on 64-bit architectures */
 #define VM_HIGH_ARCH_BIT_3	35	/* bit only usable on 64-bit architectures */
 #define VM_HIGH_ARCH_BIT_4	36	/* bit only usable on 64-bit architectures */
+#define VM_HIGH_ARCH_BIT_5	37	/* bit only usable on 64-bit architectures */
 #define VM_HIGH_ARCH_0	BIT(VM_HIGH_ARCH_BIT_0)
 #define VM_HIGH_ARCH_1	BIT(VM_HIGH_ARCH_BIT_1)
 #define VM_HIGH_ARCH_2	BIT(VM_HIGH_ARCH_BIT_2)
 #define VM_HIGH_ARCH_3	BIT(VM_HIGH_ARCH_BIT_3)
 #define VM_HIGH_ARCH_4	BIT(VM_HIGH_ARCH_BIT_4)
+#define VM_HIGH_ARCH_5	BIT(VM_HIGH_ARCH_BIT_5)
 #endif /* CONFIG_ARCH_USES_HIGH_VMA_FLAGS */
 
 #ifdef CONFIG_ARCH_HAS_PKEYS
@@ -339,6 +341,21 @@ extern unsigned int kobjsize(const void *objp);
 #endif
 #endif /* CONFIG_ARCH_HAS_PKEYS */
 
+#ifdef CONFIG_X86_USER_SHADOW_STACK
+/*
+ * VM_SHADOW_STACK should not be set with VM_SHARED because of lack of
+ * support core mm.
+ *
+ * These VMAs will get a single end guard page. This helps userspace protect
+ * itself from attacks. A single page is enough for current shadow stack archs
+ * (x86). See the comments near alloc_shstk() in arch/x86/kernel/shstk.c
+ * for more details on the guard size.
+ */
+# define VM_SHADOW_STACK	VM_HIGH_ARCH_5
+#else
+# define VM_SHADOW_STACK	VM_NONE
+#endif
+
 #if defined(CONFIG_X86)
 # define VM_PAT		VM_ARCH_1	/* PAT reserves whole VMA at once (x86) */
 #elif defined(CONFIG_PPC)
@@ -370,7 +387,7 @@ extern unsigned int kobjsize(const void *objp);
 #endif
 
 #ifdef CONFIG_HAVE_ARCH_USERFAULTFD_MINOR
-# define VM_UFFD_MINOR_BIT	37
+# define VM_UFFD_MINOR_BIT	38
 # define VM_UFFD_MINOR		BIT(VM_UFFD_MINOR_BIT)	/* UFFD minor faults */
 #else /* !CONFIG_HAVE_ARCH_USERFAULTFD_MINOR */
 # define VM_UFFD_MINOR		VM_NONE
@@ -397,6 +414,8 @@ extern unsigned int kobjsize(const void *objp);
 #define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS
 #endif
 
+#define VM_STARTGAP_FLAGS (VM_GROWSDOWN | VM_SHADOW_STACK)
+
 #ifdef CONFIG_STACK_GROWSUP
 #define VM_STACK	VM_GROWSUP
 #define VM_STACK_EARLY	VM_GROWSDOWN
@@ -1309,7 +1328,7 @@ static inline unsigned long thp_size(struct page *page)
 static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
 {
 	if (likely(vma->vm_flags & VM_WRITE))
-		pte = pte_mkwrite(pte);
+		pte = pte_mkwrite(pte, vma);
 	return pte;
 }
 
@@ -3265,7 +3284,8 @@ extern unsigned long mmap_region(struct file *file, unsigned long addr,
 	struct list_head *uf);
 extern unsigned long do_mmap(struct file *file, unsigned long addr,
 	unsigned long len, unsigned long prot, unsigned long flags,
-	unsigned long pgoff, unsigned long *populate, struct list_head *uf);
+	vm_flags_t vm_flags, unsigned long pgoff, unsigned long *populate,
+	struct list_head *uf);
 extern int do_vmi_munmap(struct vma_iterator *vmi, struct mm_struct *mm,
 			 unsigned long start, size_t len, struct list_head *uf,
 			 bool unlock);
@@ -3353,15 +3373,26 @@ struct vm_area_struct *vma_lookup(struct mm_struct *mm, unsigned long addr)
 	return mtree_load(&mm->mm_mt, addr);
 }
 
+static inline unsigned long stack_guard_start_gap(struct vm_area_struct *vma)
+{
+	if (vma->vm_flags & VM_GROWSDOWN)
+		return stack_guard_gap;
+
+	/* See reasoning around the VM_SHADOW_STACK definition */
+	if (vma->vm_flags & VM_SHADOW_STACK)
+		return PAGE_SIZE;
+
+	return 0;
+}
+
 static inline unsigned long vm_start_gap(struct vm_area_struct *vma)
 {
+	unsigned long gap = stack_guard_start_gap(vma);
 	unsigned long vm_start = vma->vm_start;
 
-	if (vma->vm_flags & VM_GROWSDOWN) {
-		vm_start -= stack_guard_gap;
-		if (vm_start > vma->vm_start)
-			vm_start = 0;
-	}
+	vm_start -= gap;
+	if (vm_start > vma->vm_start)
+		vm_start = 0;
 	return vm_start;
 }
 
diff --git a/include/linux/mman.h b/include/linux/mman.h
index cee1e4b..40d9441 100644
--- a/include/linux/mman.h
+++ b/include/linux/mman.h
@@ -15,6 +15,9 @@
 #ifndef MAP_32BIT
 #define MAP_32BIT 0
 #endif
+#ifndef MAP_ABOVE4G
+#define MAP_ABOVE4G 0
+#endif
 #ifndef MAP_HUGE_2MB
 #define MAP_HUGE_2MB 0
 #endif
@@ -50,6 +53,7 @@
 		| MAP_STACK \
 		| MAP_HUGETLB \
 		| MAP_32BIT \
+		| MAP_ABOVE4G \
 		| MAP_HUGE_2MB \
 		| MAP_HUGE_1GB)
 
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index f49abcf..1fba072 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -371,6 +371,20 @@ static inline bool arch_has_hw_pte_young(void)
 }
 #endif
 
+#ifndef arch_check_zapped_pte
+static inline void arch_check_zapped_pte(struct vm_area_struct *vma,
+					 pte_t pte)
+{
+}
+#endif
+
+#ifndef arch_check_zapped_pmd
+static inline void arch_check_zapped_pmd(struct vm_area_struct *vma,
+					 pmd_t pmd)
+{
+}
+#endif
+
 #ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR
 static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
 				       unsigned long address,
@@ -577,6 +591,20 @@ extern pud_t pudp_huge_clear_flush(struct vm_area_struct *vma,
 			      pud_t *pudp);
 #endif
 
+#ifndef pte_mkwrite
+static inline pte_t pte_mkwrite(pte_t pte, struct vm_area_struct *vma)
+{
+	return pte_mkwrite_novma(pte);
+}
+#endif
+
+#if defined(CONFIG_ARCH_WANT_PMD_MKWRITE) && !defined(pmd_mkwrite)
+static inline pmd_t pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma)
+{
+	return pmd_mkwrite_novma(pmd);
+}
+#endif
+
 #ifndef __HAVE_ARCH_PTEP_SET_WRPROTECT
 struct mm_struct;
 static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long address, pte_t *ptep)
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 253f267..de407e7 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -159,6 +159,7 @@ int proc_pid_arch_status(struct seq_file *m, struct pid_namespace *ns,
 #endif /* CONFIG_PROC_PID_ARCH_STATUS */
 
 void arch_report_meminfo(struct seq_file *m);
+void arch_proc_pid_thread_features(struct seq_file *m, struct task_struct *task);
 
 #else /* CONFIG_PROC_FS */
 
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index c0cb22c..22bc6bc 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -939,6 +939,7 @@ asmlinkage long sys_set_mempolicy_home_node(unsigned long start, unsigned long l
 asmlinkage long sys_cachestat(unsigned int fd,
 		struct cachestat_range __user *cstat_range,
 		struct cachestat __user *cstat, unsigned int flags);
+asmlinkage long sys_map_shadow_stack(unsigned long addr, unsigned long size, unsigned int flags);
 
 /*
  * Architecture-specific system calls
diff --git a/include/uapi/asm-generic/siginfo.h b/include/uapi/asm-generic/siginfo.h
index ffbe4ce..0f52d0a 100644
--- a/include/uapi/asm-generic/siginfo.h
+++ b/include/uapi/asm-generic/siginfo.h
@@ -242,7 +242,8 @@ typedef struct siginfo {
 #define SEGV_ADIPERR	7	/* Precise MCD exception */
 #define SEGV_MTEAERR	8	/* Asynchronous ARM MTE error */
 #define SEGV_MTESERR	9	/* Synchronous ARM MTE exception */
-#define NSIGSEGV	9
+#define SEGV_CPERR	10	/* Control protection fault */
+#define NSIGSEGV	10
 
 /*
  * SIGBUS si_codes
diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h
index e0e1591..ee0bcff1 100644
--- a/include/uapi/linux/elf.h
+++ b/include/uapi/linux/elf.h
@@ -409,6 +409,8 @@ typedef struct elf64_shdr {
 #define NT_386_TLS	0x200		/* i386 TLS slots (struct user_desc) */
 #define NT_386_IOPERM	0x201		/* x86 io permission bitmap (1=deny) */
 #define NT_X86_XSTATE	0x202		/* x86 extended state using xsave */
+/* Old binutils treats 0x203 as a CET state */
+#define NT_X86_SHSTK	0x204		/* x86 SHSTK state */
 #define NT_S390_HIGH_GPRS	0x300	/* s390 upper register halves */
 #define NT_S390_TIMER	0x301		/* s390 timer register */
 #define NT_S390_TODCMP	0x302		/* s390 TOD clock comparator register */
diff --git a/ipc/shm.c b/ipc/shm.c
index 60e45e7..576a543 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -1662,7 +1662,7 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg,
 			goto invalid;
 	}
 
-	addr = do_mmap(file, addr, size, prot, flags, 0, &populate, NULL);
+	addr = do_mmap(file, addr, size, prot, flags, 0, 0, &populate, NULL);
 	*raddr = addr;
 	err = 0;
 	if (IS_ERR_VALUE(addr))
diff --git a/kernel/events/hw_breakpoint.c b/kernel/events/hw_breakpoint.c
index c379770..6c2cb4e 100644
--- a/kernel/events/hw_breakpoint.c
+++ b/kernel/events/hw_breakpoint.c
@@ -523,26 +523,6 @@ toggle_bp_slot(struct perf_event *bp, bool enable, enum bp_type_idx type, int we
 	return 0;
 }
 
-__weak int arch_reserve_bp_slot(struct perf_event *bp)
-{
-	return 0;
-}
-
-__weak void arch_release_bp_slot(struct perf_event *bp)
-{
-}
-
-/*
- * Function to perform processor-specific cleanup during unregistration
- */
-__weak void arch_unregister_hw_breakpoint(struct perf_event *bp)
-{
-	/*
-	 * A weak stub function here for those archs that don't define
-	 * it inside arch/.../kernel/hw_breakpoint.c
-	 */
-}
-
 /*
  * Constraints to check before allowing this new breakpoint counter.
  *
@@ -594,7 +574,6 @@ static int __reserve_bp_slot(struct perf_event *bp, u64 bp_type)
 	enum bp_type_idx type;
 	int max_pinned_slots;
 	int weight;
-	int ret;
 
 	/* We couldn't initialize breakpoint constraints on boot */
 	if (!constraints_initialized)
@@ -613,10 +592,6 @@ static int __reserve_bp_slot(struct perf_event *bp, u64 bp_type)
 	if (max_pinned_slots > hw_breakpoint_slots_cached(type))
 		return -ENOSPC;
 
-	ret = arch_reserve_bp_slot(bp);
-	if (ret)
-		return ret;
-
 	return toggle_bp_slot(bp, true, type, weight);
 }
 
@@ -634,8 +609,6 @@ static void __release_bp_slot(struct perf_event *bp, u64 bp_type)
 	enum bp_type_idx type;
 	int weight;
 
-	arch_release_bp_slot(bp);
-
 	type = find_slot_idx(bp_type);
 	weight = hw_breakpoint_weight(bp);
 	WARN_ON(toggle_bp_slot(bp, false, type, weight));
@@ -645,7 +618,6 @@ void release_bp_slot(struct perf_event *bp)
 {
 	struct mutex *mtx = bp_constraints_lock(bp);
 
-	arch_unregister_hw_breakpoint(bp);
 	__release_bp_slot(bp, bp->attr.bp_type);
 	bp_constraints_unlock(mtx);
 }
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 781de7c..e137c13 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -274,6 +274,7 @@ COND_SYSCALL(vm86old);
 COND_SYSCALL(modify_ldt);
 COND_SYSCALL(vm86);
 COND_SYSCALL(kexec_file_load);
+COND_SYSCALL(map_shadow_stack);
 
 /* s390 */
 COND_SYSCALL(s390_pci_mmio_read);
diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
index d61eaa0..48e329e 100644
--- a/mm/debug_vm_pgtable.c
+++ b/mm/debug_vm_pgtable.c
@@ -109,10 +109,10 @@ static void __init pte_basic_tests(struct pgtable_debug_args *args, int idx)
 	WARN_ON(!pte_same(pte, pte));
 	WARN_ON(!pte_young(pte_mkyoung(pte_mkold(pte))));
 	WARN_ON(!pte_dirty(pte_mkdirty(pte_mkclean(pte))));
-	WARN_ON(!pte_write(pte_mkwrite(pte_wrprotect(pte))));
+	WARN_ON(!pte_write(pte_mkwrite(pte_wrprotect(pte), args->vma)));
 	WARN_ON(pte_young(pte_mkold(pte_mkyoung(pte))));
 	WARN_ON(pte_dirty(pte_mkclean(pte_mkdirty(pte))));
-	WARN_ON(pte_write(pte_wrprotect(pte_mkwrite(pte))));
+	WARN_ON(pte_write(pte_wrprotect(pte_mkwrite(pte, args->vma))));
 	WARN_ON(pte_dirty(pte_wrprotect(pte_mkclean(pte))));
 	WARN_ON(!pte_dirty(pte_wrprotect(pte_mkdirty(pte))));
 }
@@ -156,7 +156,7 @@ static void __init pte_advanced_tests(struct pgtable_debug_args *args)
 	pte = pte_mkclean(pte);
 	set_pte_at(args->mm, args->vaddr, args->ptep, pte);
 	flush_dcache_page(page);
-	pte = pte_mkwrite(pte);
+	pte = pte_mkwrite(pte, args->vma);
 	pte = pte_mkdirty(pte);
 	ptep_set_access_flags(args->vma, args->vaddr, args->ptep, pte, 1);
 	pte = ptep_get(args->ptep);
@@ -202,10 +202,10 @@ static void __init pmd_basic_tests(struct pgtable_debug_args *args, int idx)
 	WARN_ON(!pmd_same(pmd, pmd));
 	WARN_ON(!pmd_young(pmd_mkyoung(pmd_mkold(pmd))));
 	WARN_ON(!pmd_dirty(pmd_mkdirty(pmd_mkclean(pmd))));
-	WARN_ON(!pmd_write(pmd_mkwrite(pmd_wrprotect(pmd))));
+	WARN_ON(!pmd_write(pmd_mkwrite(pmd_wrprotect(pmd), args->vma)));
 	WARN_ON(pmd_young(pmd_mkold(pmd_mkyoung(pmd))));
 	WARN_ON(pmd_dirty(pmd_mkclean(pmd_mkdirty(pmd))));
-	WARN_ON(pmd_write(pmd_wrprotect(pmd_mkwrite(pmd))));
+	WARN_ON(pmd_write(pmd_wrprotect(pmd_mkwrite(pmd, args->vma))));
 	WARN_ON(pmd_dirty(pmd_wrprotect(pmd_mkclean(pmd))));
 	WARN_ON(!pmd_dirty(pmd_wrprotect(pmd_mkdirty(pmd))));
 	/*
@@ -256,7 +256,7 @@ static void __init pmd_advanced_tests(struct pgtable_debug_args *args)
 	pmd = pmd_mkclean(pmd);
 	set_pmd_at(args->mm, vaddr, args->pmdp, pmd);
 	flush_dcache_page(page);
-	pmd = pmd_mkwrite(pmd);
+	pmd = pmd_mkwrite(pmd, args->vma);
 	pmd = pmd_mkdirty(pmd);
 	pmdp_set_access_flags(args->vma, vaddr, args->pmdp, pmd, 1);
 	pmd = READ_ONCE(*args->pmdp);
diff --git a/mm/gup.c b/mm/gup.c
index 948f3b4..2f8a2d8 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1051,7 +1051,7 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags)
 		    !writable_file_mapping_allowed(vma, gup_flags))
 			return -EFAULT;
 
-		if (!(vm_flags & VM_WRITE)) {
+		if (!(vm_flags & VM_WRITE) || (vm_flags & VM_SHADOW_STACK)) {
 			if (!(gup_flags & FOLL_FORCE))
 				return -EFAULT;
 			/* hugetlb does not support FOLL_FORCE|FOLL_WRITE. */
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index fcafd9b..064fbd9 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -551,7 +551,7 @@ __setup("transparent_hugepage=", setup_transparent_hugepage);
 pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma)
 {
 	if (likely(vma->vm_flags & VM_WRITE))
-		pmd = pmd_mkwrite(pmd);
+		pmd = pmd_mkwrite(pmd, vma);
 	return pmd;
 }
 
@@ -1566,7 +1566,7 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf)
 	pmd = pmd_modify(oldpmd, vma->vm_page_prot);
 	pmd = pmd_mkyoung(pmd);
 	if (writable)
-		pmd = pmd_mkwrite(pmd);
+		pmd = pmd_mkwrite(pmd, vma);
 	set_pmd_at(vma->vm_mm, haddr, vmf->pmd, pmd);
 	update_mmu_cache_pmd(vma, vmf->address, vmf->pmd);
 	spin_unlock(vmf->ptl);
@@ -1675,6 +1675,7 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
 	 */
 	orig_pmd = pmdp_huge_get_and_clear_full(vma, addr, pmd,
 						tlb->fullmm);
+	arch_check_zapped_pmd(vma, orig_pmd);
 	tlb_remove_pmd_tlb_entry(tlb, pmd, addr);
 	if (vma_is_special_huge(vma)) {
 		if (arch_needs_pgtable_deposit())
@@ -1919,7 +1920,7 @@ int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
 	/* See change_pte_range(). */
 	if ((cp_flags & MM_CP_TRY_CHANGE_WRITABLE) && !pmd_write(entry) &&
 	    can_change_pmd_writable(vma, addr, entry))
-		entry = pmd_mkwrite(entry);
+		entry = pmd_mkwrite(entry, vma);
 
 	ret = HPAGE_PMD_NR;
 	set_pmd_at(mm, addr, pmd, entry);
@@ -2233,7 +2234,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
 		} else {
 			entry = mk_pte(page + i, READ_ONCE(vma->vm_page_prot));
 			if (write)
-				entry = pte_mkwrite(entry);
+				entry = pte_mkwrite(entry, vma);
 			if (anon_exclusive)
 				SetPageAnonExclusive(page + i);
 			if (!young)
@@ -3265,7 +3266,7 @@ void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, struct page *new)
 	if (pmd_swp_soft_dirty(*pvmw->pmd))
 		pmde = pmd_mksoft_dirty(pmde);
 	if (is_writable_migration_entry(entry))
-		pmde = pmd_mkwrite(pmde);
+		pmde = pmd_mkwrite(pmde, vma);
 	if (pmd_swp_uffd_wp(*pvmw->pmd))
 		pmde = pmd_mkuffd_wp(pmde);
 	if (!is_migration_entry_young(entry))
diff --git a/mm/internal.h b/mm/internal.h
index d1d4bf4..30cf724 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -556,14 +556,14 @@ static inline bool is_exec_mapping(vm_flags_t flags)
 }
 
 /*
- * Stack area - automatically grows in one direction
+ * Stack area (including shadow stacks)
  *
  * VM_GROWSUP / VM_GROWSDOWN VMAs are always private anonymous:
  * do_mmap() forbids all other combinations.
  */
 static inline bool is_stack_mapping(vm_flags_t flags)
 {
-	return (flags & VM_STACK) == VM_STACK;
+	return ((flags & VM_STACK) == VM_STACK) || (flags & VM_SHADOW_STACK);
 }
 
 /*
diff --git a/mm/memory.c b/mm/memory.c
index 405a483..6c264d2 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1430,6 +1430,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
 				continue;
 			ptent = ptep_get_and_clear_full(mm, addr, pte,
 							tlb->fullmm);
+			arch_check_zapped_pte(vma, ptent);
 			tlb_remove_tlb_entry(tlb, pte, addr);
 			zap_install_uffd_wp_if_needed(vma, addr, pte, details,
 						      ptent);
@@ -4124,7 +4125,7 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
 	entry = mk_pte(&folio->page, vma->vm_page_prot);
 	entry = pte_sw_mkyoung(entry);
 	if (vma->vm_flags & VM_WRITE)
-		entry = pte_mkwrite(pte_mkdirty(entry));
+		entry = pte_mkwrite(pte_mkdirty(entry), vma);
 
 	vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address,
 			&vmf->ptl);
@@ -4842,7 +4843,7 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf)
 	pte = pte_modify(old_pte, vma->vm_page_prot);
 	pte = pte_mkyoung(pte);
 	if (writable)
-		pte = pte_mkwrite(pte);
+		pte = pte_mkwrite(pte, vma);
 	ptep_modify_prot_commit(vma, vmf->address, vmf->pte, old_pte, pte);
 	update_mmu_cache_range(vmf, vma, vmf->address, vmf->pte, 1);
 	pte_unmap_unlock(vmf->pte, vmf->ptl);
diff --git a/mm/migrate.c b/mm/migrate.c
index 78c9bd5..b7fa020 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -220,7 +220,7 @@ static bool remove_migration_pte(struct folio *folio,
 		if (folio_test_dirty(folio) && is_migration_entry_dirty(entry))
 			pte = pte_mkdirty(pte);
 		if (is_writable_migration_entry(entry))
-			pte = pte_mkwrite(pte);
+			pte = pte_mkwrite(pte, vma);
 		else if (pte_swp_uffd_wp(old_pte))
 			pte = pte_mkuffd_wp(pte);
 
diff --git a/mm/migrate_device.c b/mm/migrate_device.c
index d69131a..8ac1f79 100644
--- a/mm/migrate_device.c
+++ b/mm/migrate_device.c
@@ -624,7 +624,7 @@ static void migrate_vma_insert_page(struct migrate_vma *migrate,
 		}
 		entry = mk_pte(page, vma->vm_page_prot);
 		if (vma->vm_flags & VM_WRITE)
-			entry = pte_mkwrite(pte_mkdirty(entry));
+			entry = pte_mkwrite(pte_mkdirty(entry), vma);
 	}
 
 	ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl);
diff --git a/mm/mmap.c b/mm/mmap.c
index 514ced1..b56a7f0 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1182,11 +1182,11 @@ static inline bool file_mmap_ok(struct file *file, struct inode *inode,
  */
 unsigned long do_mmap(struct file *file, unsigned long addr,
 			unsigned long len, unsigned long prot,
-			unsigned long flags, unsigned long pgoff,
-			unsigned long *populate, struct list_head *uf)
+			unsigned long flags, vm_flags_t vm_flags,
+			unsigned long pgoff, unsigned long *populate,
+			struct list_head *uf)
 {
 	struct mm_struct *mm = current->mm;
-	vm_flags_t vm_flags;
 	int pkey = 0;
 
 	*populate = 0;
@@ -1246,7 +1246,7 @@ unsigned long do_mmap(struct file *file, unsigned long addr,
 	 * to. we assume access permissions have been handled by the open
 	 * of the memory object, so we don't do any here.
 	 */
-	vm_flags = calc_vm_prot_bits(prot, pkey) | calc_vm_flag_bits(flags) |
+	vm_flags |= calc_vm_prot_bits(prot, pkey) | calc_vm_flag_bits(flags) |
 			mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
 
 	if (flags & MAP_LOCKED)
@@ -1564,7 +1564,7 @@ static unsigned long unmapped_area(struct vm_unmapped_area_info *info)
 	gap = mas.index;
 	gap += (info->align_offset - gap) & info->align_mask;
 	tmp = mas_next(&mas, ULONG_MAX);
-	if (tmp && (tmp->vm_flags & VM_GROWSDOWN)) { /* Avoid prev check if possible */
+	if (tmp && (tmp->vm_flags & VM_STARTGAP_FLAGS)) { /* Avoid prev check if possible */
 		if (vm_start_gap(tmp) < gap + length - 1) {
 			low_limit = tmp->vm_end;
 			mas_reset(&mas);
@@ -1616,7 +1616,7 @@ static unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info)
 	gap -= (gap - info->align_offset) & info->align_mask;
 	gap_end = mas.last;
 	tmp = mas_next(&mas, ULONG_MAX);
-	if (tmp && (tmp->vm_flags & VM_GROWSDOWN)) { /* Avoid prev check if possible */
+	if (tmp && (tmp->vm_flags & VM_STARTGAP_FLAGS)) { /* Avoid prev check if possible */
 		if (vm_start_gap(tmp) <= gap_end) {
 			high_limit = vm_start_gap(tmp);
 			mas_reset(&mas);
@@ -2998,7 +2998,7 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
 
 	file = get_file(vma->vm_file);
 	ret = do_mmap(vma->vm_file, start, size,
-			prot, flags, pgoff, &populate, NULL);
+			prot, flags, 0, pgoff, &populate, NULL);
 	fput(file);
 out:
 	mmap_write_unlock(mm);
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 130db91..b94fbb4 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -185,7 +185,7 @@ static long change_pte_range(struct mmu_gather *tlb,
 			if ((cp_flags & MM_CP_TRY_CHANGE_WRITABLE) &&
 			    !pte_write(ptent) &&
 			    can_change_pte_writable(vma, addr, ptent))
-				ptent = pte_mkwrite(ptent);
+				ptent = pte_mkwrite(ptent, vma);
 
 			ptep_modify_prot_commit(vma, addr, pte, oldpte, ptent);
 			if (pte_needs_flush(oldpte, ptent))
diff --git a/mm/nommu.c b/mm/nommu.c
index 8dba41c..7f9e9e5 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1016,6 +1016,7 @@ unsigned long do_mmap(struct file *file,
 			unsigned long len,
 			unsigned long prot,
 			unsigned long flags,
+			vm_flags_t vm_flags,
 			unsigned long pgoff,
 			unsigned long *populate,
 			struct list_head *uf)
@@ -1023,7 +1024,6 @@ unsigned long do_mmap(struct file *file,
 	struct vm_area_struct *vma;
 	struct vm_region *region;
 	struct rb_node *rb;
-	vm_flags_t vm_flags;
 	unsigned long capabilities, result;
 	int ret;
 	VMA_ITERATOR(vmi, current->mm, 0);
@@ -1043,7 +1043,7 @@ unsigned long do_mmap(struct file *file,
 
 	/* we've determined that we can make the mapping, now translate what we
 	 * now know into VMA flags */
-	vm_flags = determine_vm_flags(file, prot, flags, capabilities);
+	vm_flags |= determine_vm_flags(file, prot, flags, capabilities);
 
 
 	/* we're going to need to record the mapping */
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 0fc69ef..96d9eae 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -86,7 +86,7 @@ int mfill_atomic_install_pte(pmd_t *dst_pmd,
 	if (page_in_cache && !vm_shared)
 		writable = false;
 	if (writable)
-		_dst_pte = pte_mkwrite(_dst_pte);
+		_dst_pte = pte_mkwrite(_dst_pte, dst_vma);
 	if (flags & MFILL_ATOMIC_WP)
 		_dst_pte = pte_mkuffd_wp(_dst_pte);
 
diff --git a/mm/util.c b/mm/util.c
index 4ed8b9b..f08b655 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -543,7 +543,7 @@ unsigned long vm_mmap_pgoff(struct file *file, unsigned long addr,
 	if (!ret) {
 		if (mmap_write_lock_killable(mm))
 			return -EINTR;
-		ret = do_mmap(file, addr, len, prot, flag, pgoff, &populate,
+		ret = do_mmap(file, addr, len, prot, flag, 0, pgoff, &populate,
 			      &uf);
 		mmap_write_unlock(mm);
 		userfaultfd_unmap_complete(mm, &uf);
diff --git a/tools/testing/selftests/powerpc/copyloops/asm/export.h b/tools/testing/selftests/powerpc/copyloops/linux/export.h
similarity index 100%
rename from tools/testing/selftests/powerpc/copyloops/asm/export.h
rename to tools/testing/selftests/powerpc/copyloops/linux/export.h
diff --git a/tools/testing/selftests/powerpc/harness.c b/tools/testing/selftests/powerpc/harness.c
index 0ad4f12..5876220 100644
--- a/tools/testing/selftests/powerpc/harness.c
+++ b/tools/testing/selftests/powerpc/harness.c
@@ -24,7 +24,7 @@
 /* Setting timeout to -1 disables the alarm */
 static uint64_t timeout = 120;
 
-int run_test(int (test_function)(void), char *name)
+int run_test(int (test_function)(void), const char *name)
 {
 	bool terminated;
 	int rc, status;
@@ -101,7 +101,7 @@ void test_harness_set_timeout(uint64_t time)
 	timeout = time;
 }
 
-int test_harness(int (test_function)(void), char *name)
+int test_harness(int (test_function)(void), const char *name)
 {
 	int rc;
 
diff --git a/tools/testing/selftests/powerpc/include/subunit.h b/tools/testing/selftests/powerpc/include/subunit.h
index 068d55f..b0bb774 100644
--- a/tools/testing/selftests/powerpc/include/subunit.h
+++ b/tools/testing/selftests/powerpc/include/subunit.h
@@ -6,37 +6,37 @@
 #ifndef _SELFTESTS_POWERPC_SUBUNIT_H
 #define _SELFTESTS_POWERPC_SUBUNIT_H
 
-static inline void test_start(char *name)
+static inline void test_start(const char *name)
 {
 	printf("test: %s\n", name);
 }
 
-static inline void test_failure_detail(char *name, char *detail)
+static inline void test_failure_detail(const char *name, const char *detail)
 {
 	printf("failure: %s [%s]\n", name, detail);
 }
 
-static inline void test_failure(char *name)
+static inline void test_failure(const char *name)
 {
 	printf("failure: %s\n", name);
 }
 
-static inline void test_error(char *name)
+static inline void test_error(const char *name)
 {
 	printf("error: %s\n", name);
 }
 
-static inline void test_skip(char *name)
+static inline void test_skip(const char *name)
 {
 	printf("skip: %s\n", name);
 }
 
-static inline void test_success(char *name)
+static inline void test_success(const char *name)
 {
 	printf("success: %s\n", name);
 }
 
-static inline void test_finish(char *name, int status)
+static inline void test_finish(const char *name, int status)
 {
 	if (status)
 		test_failure(name);
@@ -44,7 +44,7 @@ static inline void test_finish(char *name, int status)
 		test_success(name);
 }
 
-static inline void test_set_git_version(char *value)
+static inline void test_set_git_version(const char *value)
 {
 	printf("tags: git_version:%s\n", value);
 }
diff --git a/tools/testing/selftests/powerpc/include/utils.h b/tools/testing/selftests/powerpc/include/utils.h
index 36c30c6..66d7b23 100644
--- a/tools/testing/selftests/powerpc/include/utils.h
+++ b/tools/testing/selftests/powerpc/include/utils.h
@@ -32,7 +32,7 @@ typedef uint16_t u16;
 typedef uint8_t u8;
 
 void test_harness_set_timeout(uint64_t time);
-int test_harness(int (test_function)(void), char *name);
+int test_harness(int (test_function)(void), const char *name);
 
 int read_auxv(char *buf, ssize_t buf_size);
 void *find_auxv_entry(int type, char *auxv);
diff --git a/tools/testing/selftests/powerpc/mm/.gitignore b/tools/testing/selftests/powerpc/mm/.gitignore
index 4e1a294..0df1a3a 100644
--- a/tools/testing/selftests/powerpc/mm/.gitignore
+++ b/tools/testing/selftests/powerpc/mm/.gitignore
@@ -1,15 +1,16 @@
 # SPDX-License-Identifier: GPL-2.0-only
-hugetlb_vs_thp_test
-subpage_prot
-tempfile
-prot_sao
-segv_errors
-wild_bctr
-large_vm_fork_separation
 bad_accesses
-tlbie_test
+exec_prot
+hugetlb_vs_thp_test
+large_vm_fork_separation
+large_vm_gpr_corruption
 pkey_exec_prot
 pkey_siginfo
+prot_sao
+segv_errors
 stack_expansion_ldst
 stack_expansion_signal
-large_vm_gpr_corruption
+subpage_prot
+tempfile
+tlbie_test
+wild_bctr
diff --git a/tools/testing/selftests/powerpc/ptrace/Makefile b/tools/testing/selftests/powerpc/ptrace/Makefile
index cbeeaea..1b39b86 100644
--- a/tools/testing/selftests/powerpc/ptrace/Makefile
+++ b/tools/testing/selftests/powerpc/ptrace/Makefile
@@ -36,6 +36,7 @@
 CFLAGS += $(KHDR_INCLUDES) -fno-pie
 
 $(OUTPUT)/ptrace-gpr: ptrace-gpr.S
+$(OUTPUT)/ptrace-perf-hwbreak: ptrace-perf-asm.S
 $(OUTPUT)/ptrace-pkey $(OUTPUT)/core-pkey: LDLIBS += -pthread
 
 $(TEST_GEN_PROGS): ../harness.c ../utils.c ../lib/reg.S
diff --git a/tools/testing/selftests/powerpc/ptrace/child.h b/tools/testing/selftests/powerpc/ptrace/child.h
index d7275b7..df62ff0 100644
--- a/tools/testing/selftests/powerpc/ptrace/child.h
+++ b/tools/testing/selftests/powerpc/ptrace/child.h
@@ -48,12 +48,12 @@ struct child_sync {
 		}							\
 	} while (0)
 
-#define PARENT_SKIP_IF_UNSUPPORTED(x, sync)				\
+#define PARENT_SKIP_IF_UNSUPPORTED(x, sync, msg)			\
 	do {								\
 		if ((x) == -1 && (errno == ENODEV || errno == EINVAL)) { \
 			(sync)->parent_gave_up = true;			\
 			prod_child(sync);				\
-			SKIP_IF(1);					\
+			SKIP_IF_MSG(1, msg);				\
 		}							\
 	} while (0)
 
diff --git a/tools/testing/selftests/powerpc/ptrace/core-pkey.c b/tools/testing/selftests/powerpc/ptrace/core-pkey.c
index f6f8596..f6da4cb 100644
--- a/tools/testing/selftests/powerpc/ptrace/core-pkey.c
+++ b/tools/testing/selftests/powerpc/ptrace/core-pkey.c
@@ -266,7 +266,7 @@ static int parent(struct shared_info *info, pid_t pid)
 	 * to the child.
 	 */
 	ret = ptrace_read_regs(pid, NT_PPC_PKEY, regs, 3);
-	PARENT_SKIP_IF_UNSUPPORTED(ret, &info->child_sync);
+	PARENT_SKIP_IF_UNSUPPORTED(ret, &info->child_sync, "PKEYs not supported");
 	PARENT_FAIL_IF(ret, &info->child_sync);
 
 	info->amr = regs[0];
diff --git a/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c b/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c
index f75739b..e374c6b 100644
--- a/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c
+++ b/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c
@@ -884,7 +884,7 @@ static int perf_hwbreak(void)
 {
 	srand ( time(NULL) );
 
-	SKIP_IF(!perf_breakpoint_supported());
+	SKIP_IF_MSG(!perf_breakpoint_supported(), "Perf breakpoints not supported");
 
 	return runtest();
 }
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c b/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c
index 1345e9b..75d30d6 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c
@@ -64,26 +64,26 @@ static bool dawr_present(struct ppc_debug_info *dbginfo)
 
 static void write_var(int len)
 {
-	__u8 *pcvar;
-	__u16 *psvar;
-	__u32 *pivar;
-	__u64 *plvar;
+	volatile __u8 *pcvar;
+	volatile __u16 *psvar;
+	volatile __u32 *pivar;
+	volatile __u64 *plvar;
 
 	switch (len) {
 	case 1:
-		pcvar = (__u8 *)&glvar;
+		pcvar = (volatile __u8 *)&glvar;
 		*pcvar = 0xff;
 		break;
 	case 2:
-		psvar = (__u16 *)&glvar;
+		psvar = (volatile __u16 *)&glvar;
 		*psvar = 0xffff;
 		break;
 	case 4:
-		pivar = (__u32 *)&glvar;
+		pivar = (volatile __u32 *)&glvar;
 		*pivar = 0xffffffff;
 		break;
 	case 8:
-		plvar = (__u64 *)&glvar;
+		plvar = (volatile __u64 *)&glvar;
 		*plvar = 0xffffffffffffffffLL;
 		break;
 	}
@@ -98,16 +98,16 @@ static void read_var(int len)
 
 	switch (len) {
 	case 1:
-		cvar = (__u8)glvar;
+		cvar = (volatile __u8)glvar;
 		break;
 	case 2:
-		svar = (__u16)glvar;
+		svar = (volatile __u16)glvar;
 		break;
 	case 4:
-		ivar = (__u32)glvar;
+		ivar = (volatile __u32)glvar;
 		break;
 	case 8:
-		lvar = (__u64)glvar;
+		lvar = (volatile __u64)glvar;
 		break;
 	}
 }
@@ -603,7 +603,7 @@ static int ptrace_hwbreak(void)
 	wait(NULL);
 
 	get_dbginfo(child_pid, &dbginfo);
-	SKIP_IF(dbginfo.num_data_bps == 0);
+	SKIP_IF_MSG(dbginfo.num_data_bps == 0, "No data breakpoints present");
 
 	dawr = dawr_present(&dbginfo);
 	run_tests(child_pid, &dbginfo, dawr);
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-perf-asm.S b/tools/testing/selftests/powerpc/ptrace/ptrace-perf-asm.S
new file mode 100644
index 0000000..9aa2e58
--- /dev/null
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-perf-asm.S
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include <ppc-asm.h>
+
+.global same_watch_addr_load
+.global same_watch_addr_trap
+
+FUNC_START(same_watch_addr_child)
+	nop
+same_watch_addr_load:
+	ld 0,0(3)
+	nop
+same_watch_addr_trap:
+	trap
+	blr
+FUNC_END(same_watch_addr_child)
+
+
+.global perf_then_ptrace_load1
+.global perf_then_ptrace_load2
+.global perf_then_ptrace_trap
+
+FUNC_START(perf_then_ptrace_child)
+	nop
+perf_then_ptrace_load1:
+	ld 0,0(3)
+perf_then_ptrace_load2:
+	ld 0,0(4)
+	nop
+perf_then_ptrace_trap:
+	trap
+	blr
+FUNC_END(perf_then_ptrace_child)
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-perf-hwbreak.c b/tools/testing/selftests/powerpc/ptrace/ptrace-perf-hwbreak.c
index 3344e74..a0a0b9b 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-perf-hwbreak.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-perf-hwbreak.c
@@ -1,142 +1,167 @@
 // SPDX-License-Identifier: GPL-2.0+
-#include <stdio.h>
-#include <string.h>
-#include <signal.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <errno.h>
-#include <linux/hw_breakpoint.h>
-#include <linux/perf_event.h>
+
 #include <asm/unistd.h>
-#include <sys/ptrace.h>
+#include <linux/hw_breakpoint.h>
+#include <linux/ptrace.h>
+#include <memory.h>
+#include <stdlib.h>
 #include <sys/wait.h>
-#include "ptrace.h"
 
-char data[16];
+#include "utils.h"
 
-/* Overlapping address range */
-volatile __u64 *ptrace_data1 = (__u64 *)&data[0];
-volatile __u64 *perf_data1 = (__u64 *)&data[4];
+/*
+ * Child subroutine that performs a load on the address, then traps
+ */
+void same_watch_addr_child(unsigned long *addr);
 
-/* Non-overlapping address range */
-volatile __u64 *ptrace_data2 = (__u64 *)&data[0];
-volatile __u64 *perf_data2 = (__u64 *)&data[8];
+/* Address of the ld instruction in same_watch_addr_child() */
+extern char same_watch_addr_load[];
 
-static unsigned long pid_max_addr(void)
+/* Address of the end trap instruction in same_watch_addr_child() */
+extern char same_watch_addr_trap[];
+
+/*
+ * Child subroutine that performs a load on the first address, then a load on
+ * the second address (with no instructions separating this from the first
+ * load), then traps.
+ */
+void perf_then_ptrace_child(unsigned long *first_addr, unsigned long *second_addr);
+
+/* Address of the first ld instruction in perf_then_ptrace_child() */
+extern char perf_then_ptrace_load1[];
+
+/* Address of the second ld instruction in perf_then_ptrace_child() */
+extern char perf_then_ptrace_load2[];
+
+/* Address of the end trap instruction in perf_then_ptrace_child() */
+extern char perf_then_ptrace_trap[];
+
+static inline long sys_ptrace(long request, pid_t pid, unsigned long addr, unsigned long data)
 {
-	FILE *fp;
-	char *line, *c;
-	char addr[100];
-	size_t len = 0;
-
-	fp = fopen("/proc/kallsyms", "r");
-	if (!fp) {
-		printf("Failed to read /proc/kallsyms. Exiting..\n");
-		exit(EXIT_FAILURE);
-	}
-
-	while (getline(&line, &len, fp) != -1) {
-		if (!strstr(line, "pid_max") || strstr(line, "pid_max_max") ||
-		    strstr(line, "pid_max_min"))
-			continue;
-
-		strncpy(addr, line, len < 100 ? len : 100);
-		c = strchr(addr, ' ');
-		*c = '\0';
-		return strtoul(addr, &c, 16);
-	}
-	fclose(fp);
-	printf("Could not find pix_max. Exiting..\n");
-	exit(EXIT_FAILURE);
-	return -1;
+	return syscall(__NR_ptrace, request, pid, addr, data);
 }
 
-static void perf_user_event_attr_set(struct perf_event_attr *attr, __u64 addr, __u64 len)
+static long ptrace_traceme(void)
 {
-	memset(attr, 0, sizeof(struct perf_event_attr));
-	attr->type           = PERF_TYPE_BREAKPOINT;
-	attr->size           = sizeof(struct perf_event_attr);
-	attr->bp_type        = HW_BREAKPOINT_R;
-	attr->bp_addr        = addr;
-	attr->bp_len         = len;
-	attr->exclude_kernel = 1;
-	attr->exclude_hv     = 1;
+	return sys_ptrace(PTRACE_TRACEME, 0, 0, 0);
 }
 
-static void perf_kernel_event_attr_set(struct perf_event_attr *attr)
+static long ptrace_getregs(pid_t pid, struct pt_regs *result)
 {
-	memset(attr, 0, sizeof(struct perf_event_attr));
-	attr->type           = PERF_TYPE_BREAKPOINT;
-	attr->size           = sizeof(struct perf_event_attr);
-	attr->bp_type        = HW_BREAKPOINT_R;
-	attr->bp_addr        = pid_max_addr();
-	attr->bp_len         = sizeof(unsigned long);
-	attr->exclude_user   = 1;
-	attr->exclude_hv     = 1;
+	return sys_ptrace(PTRACE_GETREGS, pid, 0, (unsigned long)result);
 }
 
-static int perf_cpu_event_open(int cpu, __u64 addr, __u64 len)
+static long ptrace_setregs(pid_t pid, struct pt_regs *result)
 {
-	struct perf_event_attr attr;
-
-	perf_user_event_attr_set(&attr, addr, len);
-	return syscall(__NR_perf_event_open, &attr, -1, cpu, -1, 0);
+	return sys_ptrace(PTRACE_SETREGS, pid, 0, (unsigned long)result);
 }
 
-static int perf_thread_event_open(pid_t child_pid, __u64 addr, __u64 len)
+static long ptrace_cont(pid_t pid, long signal)
 {
-	struct perf_event_attr attr;
-
-	perf_user_event_attr_set(&attr, addr, len);
-	return syscall(__NR_perf_event_open, &attr, child_pid, -1, -1, 0);
+	return sys_ptrace(PTRACE_CONT, pid, 0, signal);
 }
 
-static int perf_thread_cpu_event_open(pid_t child_pid, int cpu, __u64 addr, __u64 len)
+static long ptrace_singlestep(pid_t pid, long signal)
 {
-	struct perf_event_attr attr;
-
-	perf_user_event_attr_set(&attr, addr, len);
-	return syscall(__NR_perf_event_open, &attr, child_pid, cpu, -1, 0);
+	return sys_ptrace(PTRACE_SINGLESTEP, pid, 0, signal);
 }
 
-static int perf_thread_kernel_event_open(pid_t child_pid)
+static long ppc_ptrace_gethwdbginfo(pid_t pid, struct ppc_debug_info *dbginfo)
 {
-	struct perf_event_attr attr;
-
-	perf_kernel_event_attr_set(&attr);
-	return syscall(__NR_perf_event_open, &attr, child_pid, -1, -1, 0);
+	return sys_ptrace(PPC_PTRACE_GETHWDBGINFO, pid, 0, (unsigned long)dbginfo);
 }
 
-static int perf_cpu_kernel_event_open(int cpu)
+static long ppc_ptrace_sethwdbg(pid_t pid, struct ppc_hw_breakpoint *bp_info)
 {
-	struct perf_event_attr attr;
-
-	perf_kernel_event_attr_set(&attr);
-	return syscall(__NR_perf_event_open, &attr, -1, cpu, -1, 0);
+	return sys_ptrace(PPC_PTRACE_SETHWDEBUG, pid, 0, (unsigned long)bp_info);
 }
 
-static int child(void)
+static long ppc_ptrace_delhwdbg(pid_t pid, int bp_id)
 {
-	int ret;
+	return sys_ptrace(PPC_PTRACE_DELHWDEBUG, pid, 0L, bp_id);
+}
 
-	ret = ptrace(PTRACE_TRACEME, 0, NULL, 0);
-	if (ret) {
-		printf("Error: PTRACE_TRACEME failed\n");
-		return 0;
-	}
-	kill(getpid(), SIGUSR1); /* --> parent (SIGUSR1) */
+static long ptrace_getreg_pc(pid_t pid, void **pc)
+{
+	struct pt_regs regs;
+	long err;
+
+	err = ptrace_getregs(pid, &regs);
+	if (err)
+		return err;
+
+	*pc = (void *)regs.nip;
 
 	return 0;
 }
 
-static void ptrace_ppc_hw_breakpoint(struct ppc_hw_breakpoint *info, int type,
-				     __u64 addr, int len)
+static long ptrace_setreg_pc(pid_t pid, void *pc)
+{
+	struct pt_regs regs;
+	long err;
+
+	err = ptrace_getregs(pid, &regs);
+	if (err)
+		return err;
+
+	regs.nip = (unsigned long)pc;
+
+	err = ptrace_setregs(pid, &regs);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+static int perf_event_open(struct perf_event_attr *attr, pid_t pid, int cpu,
+			   int group_fd, unsigned long flags)
+{
+	return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags);
+}
+
+static void perf_user_event_attr_set(struct perf_event_attr *attr, void *addr, u64 len)
+{
+	memset(attr, 0, sizeof(struct perf_event_attr));
+
+	attr->type		= PERF_TYPE_BREAKPOINT;
+	attr->size		= sizeof(struct perf_event_attr);
+	attr->bp_type		= HW_BREAKPOINT_R;
+	attr->bp_addr		= (u64)addr;
+	attr->bp_len		= len;
+	attr->exclude_kernel	= 1;
+	attr->exclude_hv	= 1;
+}
+
+static int perf_watchpoint_open(pid_t child_pid, void *addr, u64 len)
+{
+	struct perf_event_attr attr;
+
+	perf_user_event_attr_set(&attr, addr, len);
+	return perf_event_open(&attr, child_pid, -1, -1, 0);
+}
+
+static int perf_read_counter(int perf_fd, u64 *count)
+{
+	/*
+	 * A perf counter is retrieved by the read() syscall. It contains
+	 * the current count as 8 bytes that are interpreted as a u64
+	 */
+	ssize_t len = read(perf_fd, count, sizeof(*count));
+
+	if (len != sizeof(*count))
+		return -1;
+
+	return 0;
+}
+
+static void ppc_ptrace_init_breakpoint(struct ppc_hw_breakpoint *info,
+				       int type, void *addr, int len)
 {
 	info->version = 1;
 	info->trigger_type = type;
 	info->condition_mode = PPC_BREAKPOINT_CONDITION_NONE;
-	info->addr = addr;
-	info->addr2 = addr + len;
+	info->addr = (u64)addr;
+	info->addr2 = (u64)addr + len;
 	info->condition_value = 0;
 	if (!len)
 		info->addr_mode = PPC_BREAKPOINT_MODE_EXACT;
@@ -144,516 +169,277 @@ static void ptrace_ppc_hw_breakpoint(struct ppc_hw_breakpoint *info, int type,
 		info->addr_mode = PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE;
 }
 
-static int ptrace_open(pid_t child_pid, __u64 wp_addr, int len)
+/*
+ * Checks if we can place at least 2 watchpoints on the child process
+ */
+static int check_watchpoints(pid_t pid)
 {
-	struct ppc_hw_breakpoint info;
-
-	ptrace_ppc_hw_breakpoint(&info, PPC_BREAKPOINT_TRIGGER_RW, wp_addr, len);
-	return ptrace(PPC_PTRACE_SETHWDEBUG, child_pid, 0, &info);
-}
-
-static int test1(pid_t child_pid)
-{
-	int perf_fd;
-	int ptrace_fd;
-	int ret = 0;
-
-	/* Test:
-	 * if (new per thread event by ptrace)
-	 *	if (existing cpu event by perf)
-	 *		if (addr range overlaps)
-	 *			fail;
-	 */
-
-	perf_fd = perf_cpu_event_open(0, (__u64)perf_data1, sizeof(*perf_data1));
-	if (perf_fd < 0)
-		return -1;
-
-	ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1));
-	if (ptrace_fd > 0 || errno != ENOSPC)
-		ret = -1;
-
-	close(perf_fd);
-	return ret;
-}
-
-static int test2(pid_t child_pid)
-{
-	int perf_fd;
-	int ptrace_fd;
-	int ret = 0;
-
-	/* Test:
-	 * if (new per thread event by ptrace)
-	 *	if (existing cpu event by perf)
-	 *		if (addr range does not overlaps)
-	 *			allow;
-	 */
-
-	perf_fd = perf_cpu_event_open(0, (__u64)perf_data2, sizeof(*perf_data2));
-	if (perf_fd < 0)
-		return -1;
-
-	ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data2, sizeof(*ptrace_data2));
-	if (ptrace_fd < 0) {
-		ret = -1;
-		goto perf_close;
-	}
-	ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd);
-
-perf_close:
-	close(perf_fd);
-	return ret;
-}
-
-static int test3(pid_t child_pid)
-{
-	int perf_fd;
-	int ptrace_fd;
-	int ret = 0;
-
-	/* Test:
-	 * if (new per thread event by ptrace)
-	 *	if (existing thread event by perf on the same thread)
-	 *		if (addr range overlaps)
-	 *			fail;
-	 */
-	perf_fd = perf_thread_event_open(child_pid, (__u64)perf_data1,
-					 sizeof(*perf_data1));
-	if (perf_fd < 0)
-		return -1;
-
-	ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1));
-	if (ptrace_fd > 0 || errno != ENOSPC)
-		ret = -1;
-
-	close(perf_fd);
-	return ret;
-}
-
-static int test4(pid_t child_pid)
-{
-	int perf_fd;
-	int ptrace_fd;
-	int ret = 0;
-
-	/* Test:
-	 * if (new per thread event by ptrace)
-	 *	if (existing thread event by perf on the same thread)
-	 *		if (addr range does not overlaps)
-	 *			fail;
-	 */
-	perf_fd = perf_thread_event_open(child_pid, (__u64)perf_data2,
-					 sizeof(*perf_data2));
-	if (perf_fd < 0)
-		return -1;
-
-	ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data2, sizeof(*ptrace_data2));
-	if (ptrace_fd < 0) {
-		ret = -1;
-		goto perf_close;
-	}
-	ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd);
-
-perf_close:
-	close(perf_fd);
-	return ret;
-}
-
-static int test5(pid_t child_pid)
-{
-	int perf_fd;
-	int ptrace_fd;
-	int cpid;
-	int ret = 0;
-
-	/* Test:
-	 * if (new per thread event by ptrace)
-	 *	if (existing thread event by perf on the different thread)
-	 *		allow;
-	 */
-	cpid = fork();
-	if (!cpid) {
-		/* Temporary Child */
-		pause();
-		exit(EXIT_SUCCESS);
-	}
-
-	perf_fd = perf_thread_event_open(cpid, (__u64)perf_data1, sizeof(*perf_data1));
-	if (perf_fd < 0) {
-		ret = -1;
-		goto kill_child;
-	}
-
-	ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1));
-	if (ptrace_fd < 0) {
-		ret = -1;
-		goto perf_close;
-	}
-
-	ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd);
-perf_close:
-	close(perf_fd);
-kill_child:
-	kill(cpid, SIGINT);
-	return ret;
-}
-
-static int test6(pid_t child_pid)
-{
-	int perf_fd;
-	int ptrace_fd;
-	int ret = 0;
-
-	/* Test:
-	 * if (new per thread kernel event by perf)
-	 *	if (existing thread event by ptrace on the same thread)
-	 *		allow;
-	 * -- OR --
-	 * if (new per cpu kernel event by perf)
-	 *	if (existing thread event by ptrace)
-	 *		allow;
-	 */
-	ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1));
-	if (ptrace_fd < 0)
-		return -1;
-
-	perf_fd = perf_thread_kernel_event_open(child_pid);
-	if (perf_fd < 0) {
-		ret = -1;
-		goto ptrace_close;
-	}
-	close(perf_fd);
-
-	perf_fd = perf_cpu_kernel_event_open(0);
-	if (perf_fd < 0) {
-		ret = -1;
-		goto ptrace_close;
-	}
-	close(perf_fd);
-
-ptrace_close:
-	ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd);
-	return ret;
-}
-
-static int test7(pid_t child_pid)
-{
-	int perf_fd;
-	int ptrace_fd;
-	int ret = 0;
-
-	/* Test:
-	 * if (new per thread event by perf)
-	 *	if (existing thread event by ptrace on the same thread)
-	 *		if (addr range overlaps)
-	 *			fail;
-	 */
-	ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1));
-	if (ptrace_fd < 0)
-		return -1;
-
-	perf_fd = perf_thread_event_open(child_pid, (__u64)perf_data1,
-					 sizeof(*perf_data1));
-	if (perf_fd > 0 || errno != ENOSPC)
-		ret = -1;
-
-	ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd);
-	return ret;
-}
-
-static int test8(pid_t child_pid)
-{
-	int perf_fd;
-	int ptrace_fd;
-	int ret = 0;
-
-	/* Test:
-	 * if (new per thread event by perf)
-	 *	if (existing thread event by ptrace on the same thread)
-	 *		if (addr range does not overlaps)
-	 *			allow;
-	 */
-	ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data2, sizeof(*ptrace_data2));
-	if (ptrace_fd < 0)
-		return -1;
-
-	perf_fd = perf_thread_event_open(child_pid, (__u64)perf_data2,
-					 sizeof(*perf_data2));
-	if (perf_fd < 0) {
-		ret = -1;
-		goto ptrace_close;
-	}
-	close(perf_fd);
-
-ptrace_close:
-	ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd);
-	return ret;
-}
-
-static int test9(pid_t child_pid)
-{
-	int perf_fd;
-	int ptrace_fd;
-	int cpid;
-	int ret = 0;
-
-	/* Test:
-	 * if (new per thread event by perf)
-	 *	if (existing thread event by ptrace on the other thread)
-	 *		allow;
-	 */
-	ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1));
-	if (ptrace_fd < 0)
-		return -1;
-
-	cpid = fork();
-	if (!cpid) {
-		/* Temporary Child */
-		pause();
-		exit(EXIT_SUCCESS);
-	}
-
-	perf_fd = perf_thread_event_open(cpid, (__u64)perf_data1, sizeof(*perf_data1));
-	if (perf_fd < 0) {
-		ret = -1;
-		goto kill_child;
-	}
-	close(perf_fd);
-
-kill_child:
-	kill(cpid, SIGINT);
-	ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd);
-	return ret;
-}
-
-static int test10(pid_t child_pid)
-{
-	int perf_fd;
-	int ptrace_fd;
-	int ret = 0;
-
-	/* Test:
-	 * if (new per cpu event by perf)
-	 *	if (existing thread event by ptrace on the same thread)
-	 *		if (addr range overlaps)
-	 *			fail;
-	 */
-	ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1));
-	if (ptrace_fd < 0)
-		return -1;
-
-	perf_fd = perf_cpu_event_open(0, (__u64)perf_data1, sizeof(*perf_data1));
-	if (perf_fd > 0 || errno != ENOSPC)
-		ret = -1;
-
-	ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd);
-	return ret;
-}
-
-static int test11(pid_t child_pid)
-{
-	int perf_fd;
-	int ptrace_fd;
-	int ret = 0;
-
-	/* Test:
-	 * if (new per cpu event by perf)
-	 *	if (existing thread event by ptrace on the same thread)
-	 *		if (addr range does not overlap)
-	 *			allow;
-	 */
-	ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data2, sizeof(*ptrace_data2));
-	if (ptrace_fd < 0)
-		return -1;
-
-	perf_fd = perf_cpu_event_open(0, (__u64)perf_data2, sizeof(*perf_data2));
-	if (perf_fd < 0) {
-		ret = -1;
-		goto ptrace_close;
-	}
-	close(perf_fd);
-
-ptrace_close:
-	ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd);
-	return ret;
-}
-
-static int test12(pid_t child_pid)
-{
-	int perf_fd;
-	int ptrace_fd;
-	int ret = 0;
-
-	/* Test:
-	 * if (new per thread and per cpu event by perf)
-	 *	if (existing thread event by ptrace on the same thread)
-	 *		if (addr range overlaps)
-	 *			fail;
-	 */
-	ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1));
-	if (ptrace_fd < 0)
-		return -1;
-
-	perf_fd = perf_thread_cpu_event_open(child_pid, 0, (__u64)perf_data1, sizeof(*perf_data1));
-	if (perf_fd > 0 || errno != ENOSPC)
-		ret = -1;
-
-	ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd);
-	return ret;
-}
-
-static int test13(pid_t child_pid)
-{
-	int perf_fd;
-	int ptrace_fd;
-	int ret = 0;
-
-	/* Test:
-	 * if (new per thread and per cpu event by perf)
-	 *	if (existing thread event by ptrace on the same thread)
-	 *		if (addr range does not overlap)
-	 *			allow;
-	 */
-	ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data2, sizeof(*ptrace_data2));
-	if (ptrace_fd < 0)
-		return -1;
-
-	perf_fd = perf_thread_cpu_event_open(child_pid, 0, (__u64)perf_data2, sizeof(*perf_data2));
-	if (perf_fd < 0) {
-		ret = -1;
-		goto ptrace_close;
-	}
-	close(perf_fd);
-
-ptrace_close:
-	ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd);
-	return ret;
-}
-
-static int test14(pid_t child_pid)
-{
-	int perf_fd;
-	int ptrace_fd;
-	int cpid;
-	int ret = 0;
-
-	/* Test:
-	 * if (new per thread and per cpu event by perf)
-	 *	if (existing thread event by ptrace on the other thread)
-	 *		allow;
-	 */
-	ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1));
-	if (ptrace_fd < 0)
-		return -1;
-
-	cpid = fork();
-	if (!cpid) {
-		/* Temporary Child */
-		pause();
-		exit(EXIT_SUCCESS);
-	}
-
-	perf_fd = perf_thread_cpu_event_open(cpid, 0, (__u64)perf_data1,
-					     sizeof(*perf_data1));
-	if (perf_fd < 0) {
-		ret = -1;
-		goto kill_child;
-	}
-	close(perf_fd);
-
-kill_child:
-	kill(cpid, SIGINT);
-	ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd);
-	return ret;
-}
-
-static int do_test(const char *msg, int (*fun)(pid_t arg), pid_t arg)
-{
-	int ret;
-
-	ret = fun(arg);
-	if (ret)
-		printf("%s: Error\n", msg);
-	else
-		printf("%s: Ok\n", msg);
-	return ret;
-}
-
-char *desc[14] = {
-	"perf cpu event -> ptrace thread event (Overlapping)",
-	"perf cpu event -> ptrace thread event (Non-overlapping)",
-	"perf thread event -> ptrace same thread event (Overlapping)",
-	"perf thread event -> ptrace same thread event (Non-overlapping)",
-	"perf thread event -> ptrace other thread event",
-	"ptrace thread event -> perf kernel event",
-	"ptrace thread event -> perf same thread event (Overlapping)",
-	"ptrace thread event -> perf same thread event (Non-overlapping)",
-	"ptrace thread event -> perf other thread event",
-	"ptrace thread event -> perf cpu event (Overlapping)",
-	"ptrace thread event -> perf cpu event (Non-overlapping)",
-	"ptrace thread event -> perf same thread & cpu event (Overlapping)",
-	"ptrace thread event -> perf same thread & cpu event (Non-overlapping)",
-	"ptrace thread event -> perf other thread & cpu event",
-};
-
-static int test(pid_t child_pid)
-{
-	int ret = TEST_PASS;
-
-	ret |= do_test(desc[0], test1, child_pid);
-	ret |= do_test(desc[1], test2, child_pid);
-	ret |= do_test(desc[2], test3, child_pid);
-	ret |= do_test(desc[3], test4, child_pid);
-	ret |= do_test(desc[4], test5, child_pid);
-	ret |= do_test(desc[5], test6, child_pid);
-	ret |= do_test(desc[6], test7, child_pid);
-	ret |= do_test(desc[7], test8, child_pid);
-	ret |= do_test(desc[8], test9, child_pid);
-	ret |= do_test(desc[9], test10, child_pid);
-	ret |= do_test(desc[10], test11, child_pid);
-	ret |= do_test(desc[11], test12, child_pid);
-	ret |= do_test(desc[12], test13, child_pid);
-	ret |= do_test(desc[13], test14, child_pid);
-
-	return ret;
-}
-
-static void get_dbginfo(pid_t child_pid, struct ppc_debug_info *dbginfo)
-{
-	if (ptrace(PPC_PTRACE_GETHWDBGINFO, child_pid, NULL, dbginfo)) {
-		perror("Can't get breakpoint info");
-		exit(-1);
-	}
-}
-
-static int ptrace_perf_hwbreak(void)
-{
-	int ret;
-	pid_t child_pid;
 	struct ppc_debug_info dbginfo;
 
-	child_pid = fork();
-	if (!child_pid)
-		return child();
+	FAIL_IF_MSG(ppc_ptrace_gethwdbginfo(pid, &dbginfo), "PPC_PTRACE_GETHWDBGINFO failed");
+	SKIP_IF_MSG(dbginfo.num_data_bps <= 1, "Not enough data watchpoints (need at least 2)");
 
-	/* parent */
-	wait(NULL); /* <-- child (SIGUSR1) */
+	return 0;
+}
 
-	get_dbginfo(child_pid, &dbginfo);
-	SKIP_IF(dbginfo.num_data_bps <= 1);
+/*
+ * Wrapper around a plain fork() call that sets up the child for
+ * ptrace-ing. Both the parent and child return from this, though
+ * the child is stopped until ptrace_cont(pid) is run by the parent.
+ */
+static int ptrace_fork_child(pid_t *pid)
+{
+	int status;
 
-	ret = perf_cpu_event_open(0, (__u64)perf_data1, sizeof(*perf_data1));
-	SKIP_IF(ret < 0);
-	close(ret);
+	*pid = fork();
 
-	ret = test(child_pid);
+	if (*pid < 0)
+		FAIL_IF_MSG(1, "Failed to fork child");
 
-	ptrace(PTRACE_CONT, child_pid, NULL, 0);
-	return ret;
+	if (!*pid) {
+		FAIL_IF_EXIT_MSG(ptrace_traceme(), "PTRACE_TRACEME failed");
+		FAIL_IF_EXIT_MSG(raise(SIGSTOP), "Child failed to raise SIGSTOP");
+	} else {
+		/* Synchronise on child SIGSTOP */
+		FAIL_IF_MSG(waitpid(*pid, &status, 0) == -1, "Failed to wait for child");
+		FAIL_IF_MSG(!WIFSTOPPED(status), "Child is not stopped");
+	}
+
+	return 0;
+}
+
+/*
+ * Tests the interaction between ptrace and perf watching the same data.
+ *
+ * We expect ptrace to take 'priority', as it is has before-execute
+ * semantics.
+ *
+ * The perf counter should not be incremented yet because perf has after-execute
+ * semantics. E.g., if ptrace changes the child PC, we don't even execute the
+ * instruction at all.
+ *
+ * When the child is stopped for ptrace, we test both continue and single step.
+ * Both should increment the perf counter. We also test changing the PC somewhere
+ * different and stepping, which should not increment the perf counter.
+ */
+int same_watch_addr_test(void)
+{
+	struct ppc_hw_breakpoint bp_info;	/* ptrace breakpoint info */
+	int bp_id;	/* Breakpoint handle of ptrace watchpoint */
+	int perf_fd;	/* File descriptor of perf performance counter */
+	u64 perf_count;	/* Most recently fetched perf performance counter value */
+	pid_t pid;	/* PID of child process */
+	void *pc;	/* Most recently fetched child PC value */
+	int status;	/* Stop status of child after waitpid */
+	unsigned long value;	/* Dummy value to be read/written to by child */
+	int err;
+
+	err = ptrace_fork_child(&pid);
+	if (err)
+		return err;
+
+	if (!pid) {
+		same_watch_addr_child(&value);
+		exit(1);
+	}
+
+	err = check_watchpoints(pid);
+	if (err)
+		return err;
+
+	/* Place a perf watchpoint counter on value */
+	perf_fd = perf_watchpoint_open(pid, &value, sizeof(value));
+	FAIL_IF_MSG(perf_fd < 0, "Failed to open perf performance counter");
+
+	/* Place a ptrace watchpoint on value */
+	ppc_ptrace_init_breakpoint(&bp_info, PPC_BREAKPOINT_TRIGGER_READ, &value, sizeof(value));
+	bp_id = ppc_ptrace_sethwdbg(pid, &bp_info);
+	FAIL_IF_MSG(bp_id < 0, "Failed to set ptrace watchpoint");
+
+	/* Let the child run. It should stop on the ptrace watchpoint */
+	FAIL_IF_MSG(ptrace_cont(pid, 0), "Failed to continue child");
+
+	FAIL_IF_MSG(waitpid(pid, &status, 0) == -1, "Failed to wait for child");
+	FAIL_IF_MSG(!WIFSTOPPED(status), "Child is not stopped");
+	FAIL_IF_MSG(ptrace_getreg_pc(pid, &pc), "Failed to get child PC");
+	FAIL_IF_MSG(pc != same_watch_addr_load, "Child did not stop on load instruction");
+
+	/*
+	 * We stopped before executing the load, so perf should not have
+	 * recorded any events yet
+	 */
+	FAIL_IF_MSG(perf_read_counter(perf_fd, &perf_count), "Failed to read perf counter");
+	FAIL_IF_MSG(perf_count != 0, "perf recorded unexpected event");
+
+	/* Single stepping over the load should increment the perf counter */
+	FAIL_IF_MSG(ptrace_singlestep(pid, 0), "Failed to single step child");
+
+	FAIL_IF_MSG(waitpid(pid, &status, 0) == -1, "Failed to wait for child");
+	FAIL_IF_MSG(!WIFSTOPPED(status), "Child is not stopped");
+	FAIL_IF_MSG(ptrace_getreg_pc(pid, &pc), "Failed to get child PC");
+	FAIL_IF_MSG(pc != same_watch_addr_load + 4, "Failed to single step load instruction");
+	FAIL_IF_MSG(perf_read_counter(perf_fd, &perf_count), "Failed to read perf counter");
+	FAIL_IF_MSG(perf_count != 1, "perf counter did not increment");
+
+	/*
+	 * Set up a ptrace watchpoint on the value again and trigger it.
+	 * The perf counter should not have incremented because we do not
+	 * execute the load yet.
+	 */
+	FAIL_IF_MSG(ppc_ptrace_delhwdbg(pid, bp_id), "Failed to remove old ptrace watchpoint");
+	bp_id = ppc_ptrace_sethwdbg(pid, &bp_info);
+	FAIL_IF_MSG(bp_id < 0, "Failed to set ptrace watchpoint");
+	FAIL_IF_MSG(ptrace_setreg_pc(pid, same_watch_addr_load), "Failed to set child PC");
+	FAIL_IF_MSG(ptrace_cont(pid, 0), "Failed to continue child");
+
+	FAIL_IF_MSG(waitpid(pid, &status, 0) == -1, "Failed to wait for child");
+	FAIL_IF_MSG(!WIFSTOPPED(status), "Child is not stopped");
+	FAIL_IF_MSG(ptrace_getreg_pc(pid, &pc), "Failed to get child PC");
+	FAIL_IF_MSG(pc != same_watch_addr_load, "Child did not stop on load trap");
+	FAIL_IF_MSG(perf_read_counter(perf_fd, &perf_count), "Failed to read perf counter");
+	FAIL_IF_MSG(perf_count != 1, "perf counter should not have changed");
+
+	/* Continuing over the load should increment the perf counter */
+	FAIL_IF_MSG(ptrace_cont(pid, 0), "Failed to continue child");
+
+	FAIL_IF_MSG(waitpid(pid, &status, 0) == -1, "Failed to wait for child");
+	FAIL_IF_MSG(!WIFSTOPPED(status), "Child is not stopped");
+	FAIL_IF_MSG(ptrace_getreg_pc(pid, &pc), "Failed to get child PC");
+	FAIL_IF_MSG(pc != same_watch_addr_trap, "Child did not stop on end trap");
+	FAIL_IF_MSG(perf_read_counter(perf_fd, &perf_count), "Failed to read perf counter");
+	FAIL_IF_MSG(perf_count != 2, "perf counter did not increment");
+
+	/*
+	 * If we set the child PC back to the load instruction, then continue,
+	 * we should reach the end trap (because ptrace is one-shot) and have
+	 * another perf event.
+	 */
+	FAIL_IF_MSG(ptrace_setreg_pc(pid, same_watch_addr_load), "Failed to set child PC");
+	FAIL_IF_MSG(ptrace_cont(pid, 0), "Failed to continue child");
+
+	FAIL_IF_MSG(waitpid(pid, &status, 0) == -1, "Failed to wait for child");
+	FAIL_IF_MSG(!WIFSTOPPED(status), "Child is not stopped");
+	FAIL_IF_MSG(ptrace_getreg_pc(pid, &pc), "Failed to get child PC");
+	FAIL_IF_MSG(pc != same_watch_addr_trap, "Child did not stop on end trap");
+	FAIL_IF_MSG(perf_read_counter(perf_fd, &perf_count), "Failed to read perf counter");
+	FAIL_IF_MSG(perf_count != 3, "perf counter did not increment");
+
+	/*
+	 * If we set the child PC back to the load instruction, set a ptrace
+	 * watchpoint on the load, then continue, we should immediately get
+	 * the ptrace trap without incrementing the perf counter
+	 */
+	FAIL_IF_MSG(ppc_ptrace_delhwdbg(pid, bp_id), "Failed to remove old ptrace watchpoint");
+	bp_id = ppc_ptrace_sethwdbg(pid, &bp_info);
+	FAIL_IF_MSG(bp_id < 0, "Failed to set ptrace watchpoint");
+	FAIL_IF_MSG(ptrace_setreg_pc(pid, same_watch_addr_load), "Failed to set child PC");
+	FAIL_IF_MSG(ptrace_cont(pid, 0), "Failed to continue child");
+
+	FAIL_IF_MSG(waitpid(pid, &status, 0) == -1, "Failed to wait for child");
+	FAIL_IF_MSG(!WIFSTOPPED(status), "Child is not stopped");
+	FAIL_IF_MSG(ptrace_getreg_pc(pid, &pc), "Failed to get child PC");
+	FAIL_IF_MSG(pc != same_watch_addr_load, "Child did not stop on load instruction");
+	FAIL_IF_MSG(perf_read_counter(perf_fd, &perf_count), "Failed to read perf counter");
+	FAIL_IF_MSG(perf_count != 3, "perf counter should not have changed");
+
+	/*
+	 * If we change the PC while stopped on the load instruction, we should
+	 * not increment the perf counter (because ptrace is before-execute,
+	 * perf is after-execute).
+	 */
+	FAIL_IF_MSG(ptrace_setreg_pc(pid, same_watch_addr_load + 4), "Failed to set child PC");
+	FAIL_IF_MSG(ptrace_cont(pid, 0), "Failed to continue child");
+
+	FAIL_IF_MSG(waitpid(pid, &status, 0) == -1, "Failed to wait for child");
+	FAIL_IF_MSG(!WIFSTOPPED(status), "Child is not stopped");
+	FAIL_IF_MSG(ptrace_getreg_pc(pid, &pc), "Failed to get child PC");
+	FAIL_IF_MSG(pc != same_watch_addr_trap, "Child did not stop on end trap");
+	FAIL_IF_MSG(perf_read_counter(perf_fd, &perf_count), "Failed to read perf counter");
+	FAIL_IF_MSG(perf_count != 3, "perf counter should not have changed");
+
+	/* Clean up child */
+	FAIL_IF_MSG(kill(pid, SIGKILL) != 0, "Failed to kill child");
+
+	return 0;
+}
+
+/*
+ * Tests the interaction between ptrace and perf when:
+ * 1. perf watches a value
+ * 2. ptrace watches a different value
+ * 3. The perf value is read, then the ptrace value is read immediately after
+ *
+ * A breakpoint implementation may accidentally misattribute/skip one of
+ * the ptrace or perf handlers, as interrupt based work is done after perf
+ * and before ptrace.
+ *
+ * We expect the perf counter to increment before the ptrace watchpoint
+ * triggers.
+ */
+int perf_then_ptrace_test(void)
+{
+	struct ppc_hw_breakpoint bp_info;	/* ptrace breakpoint info */
+	int bp_id;	/* Breakpoint handle of ptrace watchpoint */
+	int perf_fd;	/* File descriptor of perf performance counter */
+	u64 perf_count;	/* Most recently fetched perf performance counter value */
+	pid_t pid;	/* PID of child process */
+	void *pc;	/* Most recently fetched child PC value */
+	int status;	/* Stop status of child after waitpid */
+	unsigned long perf_value;	/* Dummy value to be watched by perf */
+	unsigned long ptrace_value;	/* Dummy value to be watched by ptrace */
+	int err;
+
+	err = ptrace_fork_child(&pid);
+	if (err)
+		return err;
+
+	/*
+	 * If we are the child, run a subroutine that reads the perf value,
+	 * then reads the ptrace value with consecutive load instructions
+	 */
+	if (!pid) {
+		perf_then_ptrace_child(&perf_value, &ptrace_value);
+		exit(0);
+	}
+
+	err = check_watchpoints(pid);
+	if (err)
+		return err;
+
+	/* Place a perf watchpoint counter */
+	perf_fd = perf_watchpoint_open(pid, &perf_value, sizeof(perf_value));
+	FAIL_IF_MSG(perf_fd < 0, "Failed to open perf performance counter");
+
+	/* Place a ptrace watchpoint */
+	ppc_ptrace_init_breakpoint(&bp_info, PPC_BREAKPOINT_TRIGGER_READ,
+				   &ptrace_value, sizeof(ptrace_value));
+	bp_id = ppc_ptrace_sethwdbg(pid, &bp_info);
+	FAIL_IF_MSG(bp_id < 0, "Failed to set ptrace watchpoint");
+
+	/* Let the child run. It should stop on the ptrace watchpoint */
+	FAIL_IF_MSG(ptrace_cont(pid, 0), "Failed to continue child");
+
+	FAIL_IF_MSG(waitpid(pid, &status, 0) == -1, "Failed to wait for child");
+	FAIL_IF_MSG(!WIFSTOPPED(status), "Child is not stopped");
+	FAIL_IF_MSG(ptrace_getreg_pc(pid, &pc), "Failed to get child PC");
+	FAIL_IF_MSG(pc != perf_then_ptrace_load2, "Child did not stop on ptrace load");
+
+	/* perf should have recorded the first load */
+	FAIL_IF_MSG(perf_read_counter(perf_fd, &perf_count), "Failed to read perf counter");
+	FAIL_IF_MSG(perf_count != 1, "perf counter did not increment");
+
+	/* Clean up child */
+	FAIL_IF_MSG(kill(pid, SIGKILL) != 0, "Failed to kill child");
+
+	return 0;
 }
 
 int main(int argc, char *argv[])
 {
-	return test_harness(ptrace_perf_hwbreak, "ptrace-perf-hwbreak");
+	int err = 0;
+
+	err |= test_harness(same_watch_addr_test, "same_watch_addr");
+	err |= test_harness(perf_then_ptrace_test, "perf_then_ptrace");
+
+	return err;
 }
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c b/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c
index bc454f8..d894743 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c
@@ -192,7 +192,7 @@ static int parent(struct shared_info *info, pid_t pid)
 	 * to the child.
 	 */
 	ret = ptrace_read_regs(pid, NT_PPC_PKEY, regs, 3);
-	PARENT_SKIP_IF_UNSUPPORTED(ret, &info->child_sync);
+	PARENT_SKIP_IF_UNSUPPORTED(ret, &info->child_sync, "PKEYs not supported");
 	PARENT_FAIL_IF(ret, &info->child_sync);
 
 	info->amr1 = info->amr2 = regs[0];
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tar.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tar.c
index 4436ca9..14726c7 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-tar.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tar.c
@@ -79,7 +79,7 @@ int ptrace_tar(void)
 	int ret, status;
 
 	// TAR was added in v2.07
-	SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07));
+	SKIP_IF_MSG(!have_hwcap2(PPC_FEATURE2_ARCH_2_07), "TAR requires ISA 2.07 compatible hardware");
 
 	shm_id = shmget(IPC_PRIVATE, sizeof(int) * 3, 0777|IPC_CREAT);
 	pid = fork();
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-gpr.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-gpr.c
index 5dc152b..7c70d62 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-gpr.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-gpr.c
@@ -112,8 +112,8 @@ int ptrace_tm_gpr(void)
 	pid_t pid;
 	int ret, status;
 
-	SKIP_IF(!have_htm());
-	SKIP_IF(htm_is_synthetic());
+	SKIP_IF_MSG(!have_htm(), "Don't have transactional memory");
+	SKIP_IF_MSG(htm_is_synthetic(), "Transactional memory is synthetic");
 	shm_id = shmget(IPC_PRIVATE, sizeof(int) * 2, 0777|IPC_CREAT);
 	pid = fork();
 	if (pid < 0) {
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c
index 458cc1a..6c17ed0 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c
@@ -118,8 +118,8 @@ int ptrace_tm_spd_gpr(void)
 	pid_t pid;
 	int ret, status;
 
-	SKIP_IF(!have_htm());
-	SKIP_IF(htm_is_synthetic());
+	SKIP_IF_MSG(!have_htm(), "Don't have transactional memory");
+	SKIP_IF_MSG(htm_is_synthetic(), "Transactional memory is synthetic");
 	shm_id = shmget(IPC_PRIVATE, sizeof(int) * 3, 0777|IPC_CREAT);
 	pid = fork();
 	if (pid < 0) {
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-tar.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-tar.c
index e112a34..afd8dc2 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-tar.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-tar.c
@@ -128,8 +128,8 @@ int ptrace_tm_spd_tar(void)
 	pid_t pid;
 	int ret, status;
 
-	SKIP_IF(!have_htm());
-	SKIP_IF(htm_is_synthetic());
+	SKIP_IF_MSG(!have_htm(), "Don't have transactional memory");
+	SKIP_IF_MSG(htm_is_synthetic(), "Transactional memory is synthetic");
 	shm_id = shmget(IPC_PRIVATE, sizeof(int) * 3, 0777|IPC_CREAT);
 	pid = fork();
 	if (pid == 0)
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c
index 40133d4..14d2fac 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c
@@ -128,8 +128,8 @@ int ptrace_tm_spd_vsx(void)
 	pid_t pid;
 	int ret, status, i;
 
-	SKIP_IF(!have_htm());
-	SKIP_IF(htm_is_synthetic());
+	SKIP_IF_MSG(!have_htm(), "Don't have transactional memory");
+	SKIP_IF_MSG(htm_is_synthetic(), "Transactional memory is synthetic");
 	shm_id = shmget(IPC_PRIVATE, sizeof(int) * 3, 0777|IPC_CREAT);
 
 	for (i = 0; i < 128; i++) {
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spr.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spr.c
index 880ba6a..e64cdb0 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spr.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spr.c
@@ -113,8 +113,8 @@ int ptrace_tm_spr(void)
 	pid_t pid;
 	int ret, status;
 
-	SKIP_IF(!have_htm());
-	SKIP_IF(htm_is_synthetic());
+	SKIP_IF_MSG(!have_htm(), "Don't have transactional memory");
+	SKIP_IF_MSG(htm_is_synthetic(), "Transactional memory is synthetic");
 	shm_id = shmget(IPC_PRIVATE, sizeof(struct shared), 0777|IPC_CREAT);
 	shm_id1 = shmget(IPC_PRIVATE, sizeof(int), 0777|IPC_CREAT);
 	pid = fork();
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-tar.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-tar.c
index d0db6df..3963d4b0 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-tar.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-tar.c
@@ -116,8 +116,8 @@ int ptrace_tm_tar(void)
 	pid_t pid;
 	int ret, status;
 
-	SKIP_IF(!have_htm());
-	SKIP_IF(htm_is_synthetic());
+	SKIP_IF_MSG(!have_htm(), "Don't have transactional memory");
+	SKIP_IF_MSG(htm_is_synthetic(), "Transactional memory is synthetic");
 	shm_id = shmget(IPC_PRIVATE, sizeof(int) * 2, 0777|IPC_CREAT);
 	pid = fork();
 	if (pid == 0)
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c
index 4f05ce4..8c925d7 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c
@@ -112,8 +112,8 @@ int ptrace_tm_vsx(void)
 	pid_t pid;
 	int ret, status, i;
 
-	SKIP_IF(!have_htm());
-	SKIP_IF(htm_is_synthetic());
+	SKIP_IF_MSG(!have_htm(), "Don't have transactional memory");
+	SKIP_IF_MSG(htm_is_synthetic(), "Transactional memory is synthetic");
 	shm_id = shmget(IPC_PRIVATE, sizeof(int) * 2, 0777|IPC_CREAT);
 
 	for (i = 0; i < 128; i++) {
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-vsx.c b/tools/testing/selftests/powerpc/ptrace/ptrace-vsx.c
index cb9875f..11bc624 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-vsx.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-vsx.c
@@ -61,7 +61,7 @@ int ptrace_vsx(void)
 	pid_t pid;
 	int ret, status, i;
 
-	SKIP_IF(!have_hwcap(PPC_FEATURE_HAS_VSX));
+	SKIP_IF_MSG(!have_hwcap(PPC_FEATURE_HAS_VSX), "Don't have VSX");
 
 	shm_id = shmget(IPC_PRIVATE, sizeof(int) * 2, 0777|IPC_CREAT);
 
diff --git a/tools/testing/selftests/powerpc/stringloops/asm/export.h b/tools/testing/selftests/powerpc/stringloops/linux/export.h
similarity index 100%
rename from tools/testing/selftests/powerpc/stringloops/asm/export.h
rename to tools/testing/selftests/powerpc/stringloops/linux/export.h
diff --git a/tools/testing/selftests/powerpc/vphn/asm/lppaca.h b/tools/testing/selftests/powerpc/vphn/asm/lppaca.h
deleted file mode 120000
index 942b1d0..0000000
--- a/tools/testing/selftests/powerpc/vphn/asm/lppaca.h
+++ /dev/null
@@ -1 +0,0 @@
-../../../../../../arch/powerpc/include/asm/lppaca.h
\ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/vphn/asm/vphn.h b/tools/testing/selftests/powerpc/vphn/asm/vphn.h
new file mode 120000
index 0000000..3a0b2a0
--- /dev/null
+++ b/tools/testing/selftests/powerpc/vphn/asm/vphn.h
@@ -0,0 +1 @@
+../../../../../../arch/powerpc/include/asm/vphn.h
\ No newline at end of file
diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
index 598135d..7e8c937 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -18,7 +18,7 @@
 			test_FCMOV test_FCOMI test_FISTTP \
 			vdso_restorer
 TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip syscall_numbering \
-			corrupt_xstate_header amx lam
+			corrupt_xstate_header amx lam test_shadow_stack
 # Some selftests require 32bit support enabled also on 64bit systems
 TARGETS_C_32BIT_NEEDED := ldt_gdt ptrace_syscall
 
diff --git a/tools/testing/selftests/x86/test_shadow_stack.c b/tools/testing/selftests/x86/test_shadow_stack.c
new file mode 100644
index 0000000..2188968
--- /dev/null
+++ b/tools/testing/selftests/x86/test_shadow_stack.c
@@ -0,0 +1,884 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This program test's basic kernel shadow stack support. It enables shadow
+ * stack manual via the arch_prctl(), instead of relying on glibc. It's
+ * Makefile doesn't compile with shadow stack support, so it doesn't rely on
+ * any particular glibc. As a result it can't do any operations that require
+ * special glibc shadow stack support (longjmp(), swapcontext(), etc). Just
+ * stick to the basics and hope the compiler doesn't do anything strange.
+ */
+
+#define _GNU_SOURCE
+
+#include <sys/syscall.h>
+#include <asm/mman.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <string.h>
+#include <errno.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include <asm/prctl.h>
+#include <sys/prctl.h>
+#include <stdint.h>
+#include <signal.h>
+#include <pthread.h>
+#include <sys/ioctl.h>
+#include <linux/userfaultfd.h>
+#include <setjmp.h>
+#include <sys/ptrace.h>
+#include <sys/signal.h>
+#include <linux/elf.h>
+
+/*
+ * Define the ABI defines if needed, so people can run the tests
+ * without building the headers.
+ */
+#ifndef __NR_map_shadow_stack
+#define __NR_map_shadow_stack	452
+
+#define SHADOW_STACK_SET_TOKEN	(1ULL << 0)
+
+#define ARCH_SHSTK_ENABLE	0x5001
+#define ARCH_SHSTK_DISABLE	0x5002
+#define ARCH_SHSTK_LOCK		0x5003
+#define ARCH_SHSTK_UNLOCK	0x5004
+#define ARCH_SHSTK_STATUS	0x5005
+
+#define ARCH_SHSTK_SHSTK	(1ULL <<  0)
+#define ARCH_SHSTK_WRSS		(1ULL <<  1)
+
+#define NT_X86_SHSTK	0x204
+#endif
+
+#define SS_SIZE 0x200000
+#define PAGE_SIZE 0x1000
+
+#if (__GNUC__ < 8) || (__GNUC__ == 8 && __GNUC_MINOR__ < 5)
+int main(int argc, char *argv[])
+{
+	printf("[SKIP]\tCompiler does not support CET.\n");
+	return 0;
+}
+#else
+void write_shstk(unsigned long *addr, unsigned long val)
+{
+	asm volatile("wrssq %[val], (%[addr])\n"
+		     : "=m" (addr)
+		     : [addr] "r" (addr), [val] "r" (val));
+}
+
+static inline unsigned long __attribute__((always_inline)) get_ssp(void)
+{
+	unsigned long ret = 0;
+
+	asm volatile("xor %0, %0; rdsspq %0" : "=r" (ret));
+	return ret;
+}
+
+/*
+ * For use in inline enablement of shadow stack.
+ *
+ * The program can't return from the point where shadow stack gets enabled
+ * because there will be no address on the shadow stack. So it can't use
+ * syscall() for enablement, since it is a function.
+ *
+ * Based on code from nolibc.h. Keep a copy here because this can't pull in all
+ * of nolibc.h.
+ */
+#define ARCH_PRCTL(arg1, arg2)					\
+({								\
+	long _ret;						\
+	register long _num  asm("eax") = __NR_arch_prctl;	\
+	register long _arg1 asm("rdi") = (long)(arg1);		\
+	register long _arg2 asm("rsi") = (long)(arg2);		\
+								\
+	asm volatile (						\
+		"syscall\n"					\
+		: "=a"(_ret)					\
+		: "r"(_arg1), "r"(_arg2),			\
+		  "0"(_num)					\
+		: "rcx", "r11", "memory", "cc"			\
+	);							\
+	_ret;							\
+})
+
+void *create_shstk(void *addr)
+{
+	return (void *)syscall(__NR_map_shadow_stack, addr, SS_SIZE, SHADOW_STACK_SET_TOKEN);
+}
+
+void *create_normal_mem(void *addr)
+{
+	return mmap(addr, SS_SIZE, PROT_READ | PROT_WRITE,
+		    MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
+}
+
+void free_shstk(void *shstk)
+{
+	munmap(shstk, SS_SIZE);
+}
+
+int reset_shstk(void *shstk)
+{
+	return madvise(shstk, SS_SIZE, MADV_DONTNEED);
+}
+
+void try_shstk(unsigned long new_ssp)
+{
+	unsigned long ssp;
+
+	printf("[INFO]\tnew_ssp = %lx, *new_ssp = %lx\n",
+	       new_ssp, *((unsigned long *)new_ssp));
+
+	ssp = get_ssp();
+	printf("[INFO]\tchanging ssp from %lx to %lx\n", ssp, new_ssp);
+
+	asm volatile("rstorssp (%0)\n":: "r" (new_ssp));
+	asm volatile("saveprevssp");
+	printf("[INFO]\tssp is now %lx\n", get_ssp());
+
+	/* Switch back to original shadow stack */
+	ssp -= 8;
+	asm volatile("rstorssp (%0)\n":: "r" (ssp));
+	asm volatile("saveprevssp");
+}
+
+int test_shstk_pivot(void)
+{
+	void *shstk = create_shstk(0);
+
+	if (shstk == MAP_FAILED) {
+		printf("[FAIL]\tError creating shadow stack: %d\n", errno);
+		return 1;
+	}
+	try_shstk((unsigned long)shstk + SS_SIZE - 8);
+	free_shstk(shstk);
+
+	printf("[OK]\tShadow stack pivot\n");
+	return 0;
+}
+
+int test_shstk_faults(void)
+{
+	unsigned long *shstk = create_shstk(0);
+
+	/* Read shadow stack, test if it's zero to not get read optimized out */
+	if (*shstk != 0)
+		goto err;
+
+	/* Wrss memory that was already read. */
+	write_shstk(shstk, 1);
+	if (*shstk != 1)
+		goto err;
+
+	/* Page out memory, so we can wrss it again. */
+	if (reset_shstk((void *)shstk))
+		goto err;
+
+	write_shstk(shstk, 1);
+	if (*shstk != 1)
+		goto err;
+
+	printf("[OK]\tShadow stack faults\n");
+	return 0;
+
+err:
+	return 1;
+}
+
+unsigned long saved_ssp;
+unsigned long saved_ssp_val;
+volatile bool segv_triggered;
+
+void __attribute__((noinline)) violate_ss(void)
+{
+	saved_ssp = get_ssp();
+	saved_ssp_val = *(unsigned long *)saved_ssp;
+
+	/* Corrupt shadow stack */
+	printf("[INFO]\tCorrupting shadow stack\n");
+	write_shstk((void *)saved_ssp, 0);
+}
+
+void segv_handler(int signum, siginfo_t *si, void *uc)
+{
+	printf("[INFO]\tGenerated shadow stack violation successfully\n");
+
+	segv_triggered = true;
+
+	/* Fix shadow stack */
+	write_shstk((void *)saved_ssp, saved_ssp_val);
+}
+
+int test_shstk_violation(void)
+{
+	struct sigaction sa = {};
+
+	sa.sa_sigaction = segv_handler;
+	sa.sa_flags = SA_SIGINFO;
+	if (sigaction(SIGSEGV, &sa, NULL))
+		return 1;
+
+	segv_triggered = false;
+
+	/* Make sure segv_triggered is set before violate_ss() */
+	asm volatile("" : : : "memory");
+
+	violate_ss();
+
+	signal(SIGSEGV, SIG_DFL);
+
+	printf("[OK]\tShadow stack violation test\n");
+
+	return !segv_triggered;
+}
+
+/* Gup test state */
+#define MAGIC_VAL 0x12345678
+bool is_shstk_access;
+void *shstk_ptr;
+int fd;
+
+void reset_test_shstk(void *addr)
+{
+	if (shstk_ptr)
+		free_shstk(shstk_ptr);
+	shstk_ptr = create_shstk(addr);
+}
+
+void test_access_fix_handler(int signum, siginfo_t *si, void *uc)
+{
+	printf("[INFO]\tViolation from %s\n", is_shstk_access ? "shstk access" : "normal write");
+
+	segv_triggered = true;
+
+	/* Fix shadow stack */
+	if (is_shstk_access) {
+		reset_test_shstk(shstk_ptr);
+		return;
+	}
+
+	free_shstk(shstk_ptr);
+	create_normal_mem(shstk_ptr);
+}
+
+bool test_shstk_access(void *ptr)
+{
+	is_shstk_access = true;
+	segv_triggered = false;
+	write_shstk(ptr, MAGIC_VAL);
+
+	asm volatile("" : : : "memory");
+
+	return segv_triggered;
+}
+
+bool test_write_access(void *ptr)
+{
+	is_shstk_access = false;
+	segv_triggered = false;
+	*(unsigned long *)ptr = MAGIC_VAL;
+
+	asm volatile("" : : : "memory");
+
+	return segv_triggered;
+}
+
+bool gup_write(void *ptr)
+{
+	unsigned long val;
+
+	lseek(fd, (unsigned long)ptr, SEEK_SET);
+	if (write(fd, &val, sizeof(val)) < 0)
+		return 1;
+
+	return 0;
+}
+
+bool gup_read(void *ptr)
+{
+	unsigned long val;
+
+	lseek(fd, (unsigned long)ptr, SEEK_SET);
+	if (read(fd, &val, sizeof(val)) < 0)
+		return 1;
+
+	return 0;
+}
+
+int test_gup(void)
+{
+	struct sigaction sa = {};
+	int status;
+	pid_t pid;
+
+	sa.sa_sigaction = test_access_fix_handler;
+	sa.sa_flags = SA_SIGINFO;
+	if (sigaction(SIGSEGV, &sa, NULL))
+		return 1;
+
+	segv_triggered = false;
+
+	fd = open("/proc/self/mem", O_RDWR);
+	if (fd == -1)
+		return 1;
+
+	reset_test_shstk(0);
+	if (gup_read(shstk_ptr))
+		return 1;
+	if (test_shstk_access(shstk_ptr))
+		return 1;
+	printf("[INFO]\tGup read -> shstk access success\n");
+
+	reset_test_shstk(0);
+	if (gup_write(shstk_ptr))
+		return 1;
+	if (test_shstk_access(shstk_ptr))
+		return 1;
+	printf("[INFO]\tGup write -> shstk access success\n");
+
+	reset_test_shstk(0);
+	if (gup_read(shstk_ptr))
+		return 1;
+	if (!test_write_access(shstk_ptr))
+		return 1;
+	printf("[INFO]\tGup read -> write access success\n");
+
+	reset_test_shstk(0);
+	if (gup_write(shstk_ptr))
+		return 1;
+	if (!test_write_access(shstk_ptr))
+		return 1;
+	printf("[INFO]\tGup write -> write access success\n");
+
+	close(fd);
+
+	/* COW/gup test */
+	reset_test_shstk(0);
+	pid = fork();
+	if (!pid) {
+		fd = open("/proc/self/mem", O_RDWR);
+		if (fd == -1)
+			exit(1);
+
+		if (gup_write(shstk_ptr)) {
+			close(fd);
+			exit(1);
+		}
+		close(fd);
+		exit(0);
+	}
+	waitpid(pid, &status, 0);
+	if (WEXITSTATUS(status)) {
+		printf("[FAIL]\tWrite in child failed\n");
+		return 1;
+	}
+	if (*(unsigned long *)shstk_ptr == MAGIC_VAL) {
+		printf("[FAIL]\tWrite in child wrote through to shared memory\n");
+		return 1;
+	}
+
+	printf("[INFO]\tCow gup write -> write access success\n");
+
+	free_shstk(shstk_ptr);
+
+	signal(SIGSEGV, SIG_DFL);
+
+	printf("[OK]\tShadow gup test\n");
+
+	return 0;
+}
+
+int test_mprotect(void)
+{
+	struct sigaction sa = {};
+
+	sa.sa_sigaction = test_access_fix_handler;
+	sa.sa_flags = SA_SIGINFO;
+	if (sigaction(SIGSEGV, &sa, NULL))
+		return 1;
+
+	segv_triggered = false;
+
+	/* mprotect a shadow stack as read only */
+	reset_test_shstk(0);
+	if (mprotect(shstk_ptr, SS_SIZE, PROT_READ) < 0) {
+		printf("[FAIL]\tmprotect(PROT_READ) failed\n");
+		return 1;
+	}
+
+	/* try to wrss it and fail */
+	if (!test_shstk_access(shstk_ptr)) {
+		printf("[FAIL]\tShadow stack access to read-only memory succeeded\n");
+		return 1;
+	}
+
+	/*
+	 * The shadow stack was reset above to resolve the fault, make the new one
+	 * read-only.
+	 */
+	if (mprotect(shstk_ptr, SS_SIZE, PROT_READ) < 0) {
+		printf("[FAIL]\tmprotect(PROT_READ) failed\n");
+		return 1;
+	}
+
+	/* then back to writable */
+	if (mprotect(shstk_ptr, SS_SIZE, PROT_WRITE | PROT_READ) < 0) {
+		printf("[FAIL]\tmprotect(PROT_WRITE) failed\n");
+		return 1;
+	}
+
+	/* then wrss to it and succeed */
+	if (test_shstk_access(shstk_ptr)) {
+		printf("[FAIL]\tShadow stack access to mprotect() writable memory failed\n");
+		return 1;
+	}
+
+	free_shstk(shstk_ptr);
+
+	signal(SIGSEGV, SIG_DFL);
+
+	printf("[OK]\tmprotect() test\n");
+
+	return 0;
+}
+
+char zero[4096];
+
+static void *uffd_thread(void *arg)
+{
+	struct uffdio_copy req;
+	int uffd = *(int *)arg;
+	struct uffd_msg msg;
+	int ret;
+
+	while (1) {
+		ret = read(uffd, &msg, sizeof(msg));
+		if (ret > 0)
+			break;
+		else if (errno == EAGAIN)
+			continue;
+		return (void *)1;
+	}
+
+	req.dst = msg.arg.pagefault.address;
+	req.src = (__u64)zero;
+	req.len = 4096;
+	req.mode = 0;
+
+	if (ioctl(uffd, UFFDIO_COPY, &req))
+		return (void *)1;
+
+	return (void *)0;
+}
+
+int test_userfaultfd(void)
+{
+	struct uffdio_register uffdio_register;
+	struct uffdio_api uffdio_api;
+	struct sigaction sa = {};
+	pthread_t thread;
+	void *res;
+	int uffd;
+
+	sa.sa_sigaction = test_access_fix_handler;
+	sa.sa_flags = SA_SIGINFO;
+	if (sigaction(SIGSEGV, &sa, NULL))
+		return 1;
+
+	uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
+	if (uffd < 0) {
+		printf("[SKIP]\tUserfaultfd unavailable.\n");
+		return 0;
+	}
+
+	reset_test_shstk(0);
+
+	uffdio_api.api = UFFD_API;
+	uffdio_api.features = 0;
+	if (ioctl(uffd, UFFDIO_API, &uffdio_api))
+		goto err;
+
+	uffdio_register.range.start = (__u64)shstk_ptr;
+	uffdio_register.range.len = 4096;
+	uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
+	if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register))
+		goto err;
+
+	if (pthread_create(&thread, NULL, &uffd_thread, &uffd))
+		goto err;
+
+	reset_shstk(shstk_ptr);
+	test_shstk_access(shstk_ptr);
+
+	if (pthread_join(thread, &res))
+		goto err;
+
+	if (test_shstk_access(shstk_ptr))
+		goto err;
+
+	free_shstk(shstk_ptr);
+
+	signal(SIGSEGV, SIG_DFL);
+
+	if (!res)
+		printf("[OK]\tUserfaultfd test\n");
+	return !!res;
+err:
+	free_shstk(shstk_ptr);
+	close(uffd);
+	signal(SIGSEGV, SIG_DFL);
+	return 1;
+}
+
+/* Simple linked list for keeping track of mappings in test_guard_gap() */
+struct node {
+	struct node *next;
+	void *mapping;
+};
+
+/*
+ * This tests whether mmap will place other mappings in a shadow stack's guard
+ * gap. The steps are:
+ *   1. Finds an empty place by mapping and unmapping something.
+ *   2. Map a shadow stack in the middle of the known empty area.
+ *   3. Map a bunch of PAGE_SIZE mappings. These will use the search down
+ *      direction, filling any gaps until it encounters the shadow stack's
+ *      guard gap.
+ *   4. When a mapping lands below the shadow stack from step 2, then all
+ *      of the above gaps are filled. The search down algorithm will have
+ *      looked at the shadow stack gaps.
+ *   5. See if it landed in the gap.
+ */
+int test_guard_gap(void)
+{
+	void *free_area, *shstk, *test_map = (void *)0xFFFFFFFFFFFFFFFF;
+	struct node *head = NULL, *cur;
+
+	free_area = mmap(0, SS_SIZE * 3, PROT_READ | PROT_WRITE,
+			 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+	munmap(free_area, SS_SIZE * 3);
+
+	shstk = create_shstk(free_area + SS_SIZE);
+	if (shstk == MAP_FAILED)
+		return 1;
+
+	while (test_map > shstk) {
+		test_map = mmap(0, PAGE_SIZE, PROT_READ | PROT_WRITE,
+				MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+		if (test_map == MAP_FAILED)
+			return 1;
+		cur = malloc(sizeof(*cur));
+		cur->mapping = test_map;
+
+		cur->next = head;
+		head = cur;
+	}
+
+	while (head) {
+		cur = head;
+		head = cur->next;
+		munmap(cur->mapping, PAGE_SIZE);
+		free(cur);
+	}
+
+	free_shstk(shstk);
+
+	if (shstk - test_map - PAGE_SIZE != PAGE_SIZE)
+		return 1;
+
+	printf("[OK]\tGuard gap test\n");
+
+	return 0;
+}
+
+/*
+ * Too complicated to pull it out of the 32 bit header, but also get the
+ * 64 bit one needed above. Just define a copy here.
+ */
+#define __NR_compat_sigaction 67
+
+/*
+ * Call 32 bit signal handler to get 32 bit signals ABI. Make sure
+ * to push the registers that will get clobbered.
+ */
+int sigaction32(int signum, const struct sigaction *restrict act,
+		struct sigaction *restrict oldact)
+{
+	register long syscall_reg asm("eax") = __NR_compat_sigaction;
+	register long signum_reg asm("ebx") = signum;
+	register long act_reg asm("ecx") = (long)act;
+	register long oldact_reg asm("edx") = (long)oldact;
+	int ret = 0;
+
+	asm volatile ("int $0x80;"
+		      : "=a"(ret), "=m"(oldact)
+		      : "r"(syscall_reg), "r"(signum_reg), "r"(act_reg),
+			"r"(oldact_reg)
+		      : "r8", "r9", "r10", "r11"
+		     );
+
+	return ret;
+}
+
+sigjmp_buf jmp_buffer;
+
+void segv_gp_handler(int signum, siginfo_t *si, void *uc)
+{
+	segv_triggered = true;
+
+	/*
+	 * To work with old glibc, this can't rely on siglongjmp working with
+	 * shadow stack enabled, so disable shadow stack before siglongjmp().
+	 */
+	ARCH_PRCTL(ARCH_SHSTK_DISABLE, ARCH_SHSTK_SHSTK);
+	siglongjmp(jmp_buffer, -1);
+}
+
+/*
+ * Transition to 32 bit mode and check that a #GP triggers a segfault.
+ */
+int test_32bit(void)
+{
+	struct sigaction sa = {};
+	struct sigaction *sa32;
+
+	/* Create sigaction in 32 bit address range */
+	sa32 = mmap(0, 4096, PROT_READ | PROT_WRITE,
+		    MAP_32BIT | MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
+	sa32->sa_flags = SA_SIGINFO;
+
+	sa.sa_sigaction = segv_gp_handler;
+	sa.sa_flags = SA_SIGINFO;
+	if (sigaction(SIGSEGV, &sa, NULL))
+		return 1;
+
+
+	segv_triggered = false;
+
+	/* Make sure segv_triggered is set before triggering the #GP */
+	asm volatile("" : : : "memory");
+
+	/*
+	 * Set handler to somewhere in 32 bit address space
+	 */
+	sa32->sa_handler = (void *)sa32;
+	if (sigaction32(SIGUSR1, sa32, NULL))
+		return 1;
+
+	if (!sigsetjmp(jmp_buffer, 1))
+		raise(SIGUSR1);
+
+	if (segv_triggered)
+		printf("[OK]\t32 bit test\n");
+
+	return !segv_triggered;
+}
+
+void segv_handler_ptrace(int signum, siginfo_t *si, void *uc)
+{
+	/* The SSP adjustment caused a segfault. */
+	exit(0);
+}
+
+int test_ptrace(void)
+{
+	unsigned long saved_ssp, ssp = 0;
+	struct sigaction sa= {};
+	struct iovec iov;
+	int status;
+	int pid;
+
+	iov.iov_base = &ssp;
+	iov.iov_len = sizeof(ssp);
+
+	pid = fork();
+	if (!pid) {
+		ssp = get_ssp();
+
+		sa.sa_sigaction = segv_handler_ptrace;
+		sa.sa_flags = SA_SIGINFO;
+		if (sigaction(SIGSEGV, &sa, NULL))
+			return 1;
+
+		ptrace(PTRACE_TRACEME, NULL, NULL, NULL);
+		/*
+		 * The parent will tweak the SSP and return from this function
+		 * will #CP.
+		 */
+		raise(SIGTRAP);
+
+		exit(1);
+	}
+
+	while (waitpid(pid, &status, 0) != -1 && WSTOPSIG(status) != SIGTRAP);
+
+	if (ptrace(PTRACE_GETREGSET, pid, NT_X86_SHSTK, &iov)) {
+		printf("[INFO]\tFailed to PTRACE_GETREGS\n");
+		goto out_kill;
+	}
+
+	if (!ssp) {
+		printf("[INFO]\tPtrace child SSP was 0\n");
+		goto out_kill;
+	}
+
+	saved_ssp = ssp;
+
+	iov.iov_len = 0;
+	if (!ptrace(PTRACE_SETREGSET, pid, NT_X86_SHSTK, &iov)) {
+		printf("[INFO]\tToo small size accepted via PTRACE_SETREGS\n");
+		goto out_kill;
+	}
+
+	iov.iov_len = sizeof(ssp) + 1;
+	if (!ptrace(PTRACE_SETREGSET, pid, NT_X86_SHSTK, &iov)) {
+		printf("[INFO]\tToo large size accepted via PTRACE_SETREGS\n");
+		goto out_kill;
+	}
+
+	ssp += 1;
+	if (!ptrace(PTRACE_SETREGSET, pid, NT_X86_SHSTK, &iov)) {
+		printf("[INFO]\tUnaligned SSP written via PTRACE_SETREGS\n");
+		goto out_kill;
+	}
+
+	ssp = 0xFFFFFFFFFFFF0000;
+	if (!ptrace(PTRACE_SETREGSET, pid, NT_X86_SHSTK, &iov)) {
+		printf("[INFO]\tKernel range SSP written via PTRACE_SETREGS\n");
+		goto out_kill;
+	}
+
+	/*
+	 * Tweak the SSP so the child with #CP when it resumes and returns
+	 * from raise()
+	 */
+	ssp = saved_ssp + 8;
+	iov.iov_len = sizeof(ssp);
+	if (ptrace(PTRACE_SETREGSET, pid, NT_X86_SHSTK, &iov)) {
+		printf("[INFO]\tFailed to PTRACE_SETREGS\n");
+		goto out_kill;
+	}
+
+	if (ptrace(PTRACE_DETACH, pid, NULL, NULL)) {
+		printf("[INFO]\tFailed to PTRACE_DETACH\n");
+		goto out_kill;
+	}
+
+	waitpid(pid, &status, 0);
+	if (WEXITSTATUS(status))
+		return 1;
+
+	printf("[OK]\tPtrace test\n");
+	return 0;
+
+out_kill:
+	kill(pid, SIGKILL);
+	return 1;
+}
+
+int main(int argc, char *argv[])
+{
+	int ret = 0;
+
+	if (ARCH_PRCTL(ARCH_SHSTK_ENABLE, ARCH_SHSTK_SHSTK)) {
+		printf("[SKIP]\tCould not enable Shadow stack\n");
+		return 1;
+	}
+
+	if (ARCH_PRCTL(ARCH_SHSTK_DISABLE, ARCH_SHSTK_SHSTK)) {
+		ret = 1;
+		printf("[FAIL]\tDisabling shadow stack failed\n");
+	}
+
+	if (ARCH_PRCTL(ARCH_SHSTK_ENABLE, ARCH_SHSTK_SHSTK)) {
+		printf("[SKIP]\tCould not re-enable Shadow stack\n");
+		return 1;
+	}
+
+	if (ARCH_PRCTL(ARCH_SHSTK_ENABLE, ARCH_SHSTK_WRSS)) {
+		printf("[SKIP]\tCould not enable WRSS\n");
+		ret = 1;
+		goto out;
+	}
+
+	/* Should have succeeded if here, but this is a test, so double check. */
+	if (!get_ssp()) {
+		printf("[FAIL]\tShadow stack disabled\n");
+		return 1;
+	}
+
+	if (test_shstk_pivot()) {
+		ret = 1;
+		printf("[FAIL]\tShadow stack pivot\n");
+		goto out;
+	}
+
+	if (test_shstk_faults()) {
+		ret = 1;
+		printf("[FAIL]\tShadow stack fault test\n");
+		goto out;
+	}
+
+	if (test_shstk_violation()) {
+		ret = 1;
+		printf("[FAIL]\tShadow stack violation test\n");
+		goto out;
+	}
+
+	if (test_gup()) {
+		ret = 1;
+		printf("[FAIL]\tShadow shadow stack gup\n");
+		goto out;
+	}
+
+	if (test_mprotect()) {
+		ret = 1;
+		printf("[FAIL]\tShadow shadow mprotect test\n");
+		goto out;
+	}
+
+	if (test_userfaultfd()) {
+		ret = 1;
+		printf("[FAIL]\tUserfaultfd test\n");
+		goto out;
+	}
+
+	if (test_guard_gap()) {
+		ret = 1;
+		printf("[FAIL]\tGuard gap test\n");
+		goto out;
+	}
+
+	if (test_ptrace()) {
+		ret = 1;
+		printf("[FAIL]\tptrace test\n");
+	}
+
+	if (test_32bit()) {
+		ret = 1;
+		printf("[FAIL]\t32 bit test\n");
+		goto out;
+	}
+
+	return ret;
+
+out:
+	/*
+	 * Disable shadow stack before the function returns, or there will be a
+	 * shadow stack violation.
+	 */
+	if (ARCH_PRCTL(ARCH_SHSTK_DISABLE, ARCH_SHSTK_SHSTK)) {
+		ret = 1;
+		printf("[FAIL]\tDisabling shadow stack failed\n");
+	}
+
+	return ret;
+}
+#endif