Merge tag 'v6.6-vfs.super.fixes.2' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs
Pull more superblock follow-on fixes from Christian Brauner:
"This contains two more small follow-up fixes for the super work this
cycle. I went through all filesystems once more and detected two minor
issues that still needed fixing:
- Some filesystems support mtd devices (e.g., mount -t jffs2 mtd2
/mnt). The mtd infrastructure uses the sb->s_mtd pointer to find an
existing superblock. When the mtd device is put and sb->s_mtd
cleared the superblock can still be found fs_supers and so this
risks a use-after-free.
Add a small patch that aligns mtd with what we did for regular
block devices and switch keying to rely on sb->s_dev.
(This was tested with mtd devices and jffs2 as xfstests doesn't
support mtd devices.)
- Switch nfs back to rely on kill_anon_super() so the superblock is
removed from the list of active supers before sb->s_fs_info is
freed"
* tag 'v6.6-vfs.super.fixes.2' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs:
NFS: switch back to using kill_anon_super
mtd: key superblock by device number
fs: export sget_dev()
diff --git a/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_gpci b/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_gpci
index 12e2bf9..40f7cd2 100644
--- a/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_gpci
+++ b/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_gpci
@@ -80,3 +80,163 @@
Description: read only
This sysfs file exposes the cpumask which is designated to make
HCALLs to retrieve hv-gpci pmu event counter data.
+
+What: /sys/devices/hv_gpci/interface/processor_bus_topology
+Date: July 2023
+Contact: Linux on PowerPC Developer List <linuxppc-dev@lists.ozlabs.org>
+Description: admin read only
+ This sysfs file exposes the system topology information by making HCALL
+ H_GET_PERF_COUNTER_INFO. The HCALL is made with counter request value
+ PROCESSOR_BUS_TOPOLOGY(0xD0).
+
+ * This sysfs file will be created only for power10 and above platforms.
+
+ * User needs root privileges to read data from this sysfs file.
+
+ * This sysfs file will be created, only when the HCALL returns "H_SUCCESS",
+ "H_AUTHORITY" or "H_PARAMETER" as the return type.
+
+ HCALL with return error type "H_AUTHORITY" can be resolved during
+ runtime by setting "Enable Performance Information Collection" option.
+
+ * The end user reading this sysfs file must decode the content as per
+ underlying platform/firmware.
+
+ Possible error codes while reading this sysfs file:
+
+ * "-EPERM" : Partition is not permitted to retrieve performance information,
+ required to set "Enable Performance Information Collection" option.
+
+ * "-EIO" : Can't retrieve system information because of invalid buffer length/invalid address
+ or because of some hardware error. Refer to getPerfCountInfo documentation for
+ more information.
+
+ * "-EFBIG" : System information exceeds PAGE_SIZE.
+
+What: /sys/devices/hv_gpci/interface/processor_config
+Date: July 2023
+Contact: Linux on PowerPC Developer List <linuxppc-dev@lists.ozlabs.org>
+Description: admin read only
+ This sysfs file exposes the system topology information by making HCALL
+ H_GET_PERF_COUNTER_INFO. The HCALL is made with counter request value
+ PROCESSOR_CONFIG(0x90).
+
+ * This sysfs file will be created only for power10 and above platforms.
+
+ * User needs root privileges to read data from this sysfs file.
+
+ * This sysfs file will be created, only when the HCALL returns "H_SUCCESS",
+ "H_AUTHORITY" or "H_PARAMETER" as the return type.
+
+ HCALL with return error type "H_AUTHORITY" can be resolved during
+ runtime by setting "Enable Performance Information Collection" option.
+
+ * The end user reading this sysfs file must decode the content as per
+ underlying platform/firmware.
+
+ Possible error codes while reading this sysfs file:
+
+ * "-EPERM" : Partition is not permitted to retrieve performance information,
+ required to set "Enable Performance Information Collection" option.
+
+ * "-EIO" : Can't retrieve system information because of invalid buffer length/invalid address
+ or because of some hardware error. Refer to getPerfCountInfo documentation for
+ more information.
+
+ * "-EFBIG" : System information exceeds PAGE_SIZE.
+
+What: /sys/devices/hv_gpci/interface/affinity_domain_via_virtual_processor
+Date: July 2023
+Contact: Linux on PowerPC Developer List <linuxppc-dev@lists.ozlabs.org>
+Description: admin read only
+ This sysfs file exposes the system topology information by making HCALL
+ H_GET_PERF_COUNTER_INFO. The HCALL is made with counter request value
+ AFFINITY_DOMAIN_INFORMATION_BY_VIRTUAL_PROCESSOR(0xA0).
+
+ * This sysfs file will be created only for power10 and above platforms.
+
+ * User needs root privileges to read data from this sysfs file.
+
+ * This sysfs file will be created, only when the HCALL returns "H_SUCCESS",
+ "H_AUTHORITY" or "H_PARAMETER" as the return type.
+
+ HCALL with return error type "H_AUTHORITY" can be resolved during
+ runtime by setting "Enable Performance Information Collection" option.
+
+ * The end user reading this sysfs file must decode the content as per
+ underlying platform/firmware.
+
+ Possible error codes while reading this sysfs file:
+
+ * "-EPERM" : Partition is not permitted to retrieve performance information,
+ required to set "Enable Performance Information Collection" option.
+
+ * "-EIO" : Can't retrieve system information because of invalid buffer length/invalid address
+ or because of some hardware error. Refer to getPerfCountInfo documentation for
+ more information.
+
+ * "-EFBIG" : System information exceeds PAGE_SIZE.
+
+What: /sys/devices/hv_gpci/interface/affinity_domain_via_domain
+Date: July 2023
+Contact: Linux on PowerPC Developer List <linuxppc-dev@lists.ozlabs.org>
+Description: admin read only
+ This sysfs file exposes the system topology information by making HCALL
+ H_GET_PERF_COUNTER_INFO. The HCALL is made with counter request value
+ AFFINITY_DOMAIN_INFORMATION_BY_DOMAIN(0xB0).
+
+ * This sysfs file will be created only for power10 and above platforms.
+
+ * User needs root privileges to read data from this sysfs file.
+
+ * This sysfs file will be created, only when the HCALL returns "H_SUCCESS",
+ "H_AUTHORITY" or "H_PARAMETER" as the return type.
+
+ HCALL with return error type "H_AUTHORITY" can be resolved during
+ runtime by setting "Enable Performance Information Collection" option.
+
+ * The end user reading this sysfs file must decode the content as per
+ underlying platform/firmware.
+
+ Possible error codes while reading this sysfs file:
+
+ * "-EPERM" : Partition is not permitted to retrieve performance information,
+ required to set "Enable Performance Information Collection" option.
+
+ * "-EIO" : Can't retrieve system information because of invalid buffer length/invalid address
+ or because of some hardware error. Refer to getPerfCountInfo documentation for
+ more information.
+
+ * "-EFBIG" : System information exceeds PAGE_SIZE.
+
+What: /sys/devices/hv_gpci/interface/affinity_domain_via_partition
+Date: July 2023
+Contact: Linux on PowerPC Developer List <linuxppc-dev@lists.ozlabs.org>
+Description: admin read only
+ This sysfs file exposes the system topology information by making HCALL
+ H_GET_PERF_COUNTER_INFO. The HCALL is made with counter request value
+ AFFINITY_DOMAIN_INFORMATION_BY_PARTITION(0xB1).
+
+ * This sysfs file will be created only for power10 and above platforms.
+
+ * User needs root privileges to read data from this sysfs file.
+
+ * This sysfs file will be created, only when the HCALL returns "H_SUCCESS",
+ "H_AUTHORITY" or "H_PARAMETER" as the return type.
+
+ HCALL with return error type "H_AUTHORITY" can be resolved during
+ runtime by setting "Enable Performance Information Collection" option.
+
+ * The end user reading this sysfs file must decode the content as per
+ underlying platform/firmware.
+
+ Possible error codes while reading this sysfs file:
+
+ * "-EPERM" : Partition is not permitted to retrieve performance information,
+ required to set "Enable Performance Information Collection" option.
+
+ * "-EIO" : Can't retrieve system information because of invalid buffer length/invalid address
+ or because of some hardware error. Refer to getPerfCountInfo documentation for
+ more information.
+
+ * "-EFBIG" : System information exceeds PAGE_SIZE.
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 0b739a3..fcf79ac 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -3753,7 +3753,7 @@
nohibernate [HIBERNATION] Disable hibernation and resume.
- nohlt [ARM,ARM64,MICROBLAZE,MIPS,SH] Forces the kernel to
+ nohlt [ARM,ARM64,MICROBLAZE,MIPS,PPC,SH] Forces the kernel to
busy wait in do_idle() and not use the arch_cpu_idle()
implementation; requires CONFIG_GENERIC_IDLE_POLL_SETUP
to be effective. This is useful on platforms where the
@@ -3889,10 +3889,10 @@
nosmp [SMP] Tells an SMP kernel to act as a UP kernel,
and disable the IO APIC. legacy for "maxcpus=0".
- nosmt [KNL,MIPS,S390] Disable symmetric multithreading (SMT).
+ nosmt [KNL,MIPS,PPC,S390] Disable symmetric multithreading (SMT).
Equivalent to smt=1.
- [KNL,X86] Disable symmetric multithreading (SMT).
+ [KNL,X86,PPC] Disable symmetric multithreading (SMT).
nosmt=force: Force disable SMT, cannot be undone
via the sysfs control file.
diff --git a/Documentation/arch/x86/index.rst b/Documentation/arch/x86/index.rst
index c73d133..8ac64d7 100644
--- a/Documentation/arch/x86/index.rst
+++ b/Documentation/arch/x86/index.rst
@@ -22,6 +22,7 @@
mtrr
pat
intel-hfi
+ shstk
iommu
intel_txt
amd-memory-encryption
diff --git a/Documentation/arch/x86/shstk.rst b/Documentation/arch/x86/shstk.rst
new file mode 100644
index 0000000..60260e8
--- /dev/null
+++ b/Documentation/arch/x86/shstk.rst
@@ -0,0 +1,179 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+======================================================
+Control-flow Enforcement Technology (CET) Shadow Stack
+======================================================
+
+CET Background
+==============
+
+Control-flow Enforcement Technology (CET) covers several related x86 processor
+features that provide protection against control flow hijacking attacks. CET
+can protect both applications and the kernel.
+
+CET introduces shadow stack and indirect branch tracking (IBT). A shadow stack
+is a secondary stack allocated from memory which cannot be directly modified by
+applications. When executing a CALL instruction, the processor pushes the
+return address to both the normal stack and the shadow stack. Upon
+function return, the processor pops the shadow stack copy and compares it
+to the normal stack copy. If the two differ, the processor raises a
+control-protection fault. IBT verifies indirect CALL/JMP targets are intended
+as marked by the compiler with 'ENDBR' opcodes. Not all CPU's have both Shadow
+Stack and Indirect Branch Tracking. Today in the 64-bit kernel, only userspace
+shadow stack and kernel IBT are supported.
+
+Requirements to use Shadow Stack
+================================
+
+To use userspace shadow stack you need HW that supports it, a kernel
+configured with it and userspace libraries compiled with it.
+
+The kernel Kconfig option is X86_USER_SHADOW_STACK. When compiled in, shadow
+stacks can be disabled at runtime with the kernel parameter: nousershstk.
+
+To build a user shadow stack enabled kernel, Binutils v2.29 or LLVM v6 or later
+are required.
+
+At run time, /proc/cpuinfo shows CET features if the processor supports
+CET. "user_shstk" means that userspace shadow stack is supported on the current
+kernel and HW.
+
+Application Enabling
+====================
+
+An application's CET capability is marked in its ELF note and can be verified
+from readelf/llvm-readelf output::
+
+ readelf -n <application> | grep -a SHSTK
+ properties: x86 feature: SHSTK
+
+The kernel does not process these applications markers directly. Applications
+or loaders must enable CET features using the interface described in section 4.
+Typically this would be done in dynamic loader or static runtime objects, as is
+the case in GLIBC.
+
+Enabling arch_prctl()'s
+=======================
+
+Elf features should be enabled by the loader using the below arch_prctl's. They
+are only supported in 64 bit user applications. These operate on the features
+on a per-thread basis. The enablement status is inherited on clone, so if the
+feature is enabled on the first thread, it will propagate to all the thread's
+in an app.
+
+arch_prctl(ARCH_SHSTK_ENABLE, unsigned long feature)
+ Enable a single feature specified in 'feature'. Can only operate on
+ one feature at a time.
+
+arch_prctl(ARCH_SHSTK_DISABLE, unsigned long feature)
+ Disable a single feature specified in 'feature'. Can only operate on
+ one feature at a time.
+
+arch_prctl(ARCH_SHSTK_LOCK, unsigned long features)
+ Lock in features at their current enabled or disabled status. 'features'
+ is a mask of all features to lock. All bits set are processed, unset bits
+ are ignored. The mask is ORed with the existing value. So any feature bits
+ set here cannot be enabled or disabled afterwards.
+
+arch_prctl(ARCH_SHSTK_UNLOCK, unsigned long features)
+ Unlock features. 'features' is a mask of all features to unlock. All
+ bits set are processed, unset bits are ignored. Only works via ptrace.
+
+arch_prctl(ARCH_SHSTK_STATUS, unsigned long addr)
+ Copy the currently enabled features to the address passed in addr. The
+ features are described using the bits passed into the others in
+ 'features'.
+
+The return values are as follows. On success, return 0. On error, errno can
+be::
+
+ -EPERM if any of the passed feature are locked.
+ -ENOTSUPP if the feature is not supported by the hardware or
+ kernel.
+ -EINVAL arguments (non existing feature, etc)
+ -EFAULT if could not copy information back to userspace
+
+The feature's bits supported are::
+
+ ARCH_SHSTK_SHSTK - Shadow stack
+ ARCH_SHSTK_WRSS - WRSS
+
+Currently shadow stack and WRSS are supported via this interface. WRSS
+can only be enabled with shadow stack, and is automatically disabled
+if shadow stack is disabled.
+
+Proc Status
+===========
+To check if an application is actually running with shadow stack, the
+user can read the /proc/$PID/status. It will report "wrss" or "shstk"
+depending on what is enabled. The lines look like this::
+
+ x86_Thread_features: shstk wrss
+ x86_Thread_features_locked: shstk wrss
+
+Implementation of the Shadow Stack
+==================================
+
+Shadow Stack Size
+-----------------
+
+A task's shadow stack is allocated from memory to a fixed size of
+MIN(RLIMIT_STACK, 4 GB). In other words, the shadow stack is allocated to
+the maximum size of the normal stack, but capped to 4 GB. In the case
+of the clone3 syscall, there is a stack size passed in and shadow stack
+uses this instead of the rlimit.
+
+Signal
+------
+
+The main program and its signal handlers use the same shadow stack. Because
+the shadow stack stores only return addresses, a large shadow stack covers
+the condition that both the program stack and the signal alternate stack run
+out.
+
+When a signal happens, the old pre-signal state is pushed on the stack. When
+shadow stack is enabled, the shadow stack specific state is pushed onto the
+shadow stack. Today this is only the old SSP (shadow stack pointer), pushed
+in a special format with bit 63 set. On sigreturn this old SSP token is
+verified and restored by the kernel. The kernel will also push the normal
+restorer address to the shadow stack to help userspace avoid a shadow stack
+violation on the sigreturn path that goes through the restorer.
+
+So the shadow stack signal frame format is as follows::
+
+ |1...old SSP| - Pointer to old pre-signal ssp in sigframe token format
+ (bit 63 set to 1)
+ | ...| - Other state may be added in the future
+
+
+32 bit ABI signals are not supported in shadow stack processes. Linux prevents
+32 bit execution while shadow stack is enabled by the allocating shadow stacks
+outside of the 32 bit address space. When execution enters 32 bit mode, either
+via far call or returning to userspace, a #GP is generated by the hardware
+which, will be delivered to the process as a segfault. When transitioning to
+userspace the register's state will be as if the userspace ip being returned to
+caused the segfault.
+
+Fork
+----
+
+The shadow stack's vma has VM_SHADOW_STACK flag set; its PTEs are required
+to be read-only and dirty. When a shadow stack PTE is not RO and dirty, a
+shadow access triggers a page fault with the shadow stack access bit set
+in the page fault error code.
+
+When a task forks a child, its shadow stack PTEs are copied and both the
+parent's and the child's shadow stack PTEs are cleared of the dirty bit.
+Upon the next shadow stack access, the resulting shadow stack page fault
+is handled by page copy/re-use.
+
+When a pthread child is created, the kernel allocates a new shadow stack
+for the new thread. New shadow stack creation behaves like mmap() with respect
+to ASLR behavior. Similarly, on thread exit the thread's shadow stack is
+disabled.
+
+Exec
+----
+
+On exec, shadow stack features are disabled by the kernel. At which point,
+userspace can choose to re-enable, or lock them.
diff --git a/Documentation/filesystems/proc.rst b/Documentation/filesystems/proc.rst
index d6109c7..1f96155 100644
--- a/Documentation/filesystems/proc.rst
+++ b/Documentation/filesystems/proc.rst
@@ -566,6 +566,7 @@
mt arm64 MTE allocation tags are enabled
um userfaultfd missing tracking
uw userfaultfd wr-protect tracking
+ ss shadow stack page
== =======================================
Note that there is no guarantee that every flag and associated mnemonic will
diff --git a/Documentation/mm/arch_pgtable_helpers.rst b/Documentation/mm/arch_pgtable_helpers.rst
index af3891f..c82e3ee 100644
--- a/Documentation/mm/arch_pgtable_helpers.rst
+++ b/Documentation/mm/arch_pgtable_helpers.rst
@@ -46,7 +46,11 @@
+---------------------------+--------------------------------------------------+
| pte_mkclean | Creates a clean PTE |
+---------------------------+--------------------------------------------------+
-| pte_mkwrite | Creates a writable PTE |
+| pte_mkwrite | Creates a writable PTE of the type specified by |
+| | the VMA. |
++---------------------------+--------------------------------------------------+
+| pte_mkwrite_novma | Creates a writable PTE, of the conventional type |
+| | of writable. |
+---------------------------+--------------------------------------------------+
| pte_wrprotect | Creates a write protected PTE |
+---------------------------+--------------------------------------------------+
@@ -118,7 +122,11 @@
+---------------------------+--------------------------------------------------+
| pmd_mkclean | Creates a clean PMD |
+---------------------------+--------------------------------------------------+
-| pmd_mkwrite | Creates a writable PMD |
+| pmd_mkwrite | Creates a writable PMD of the type specified by |
+| | the VMA. |
++---------------------------+--------------------------------------------------+
+| pmd_mkwrite_novma | Creates a writable PMD, of the conventional type |
+| | of writable. |
+---------------------------+--------------------------------------------------+
| pmd_wrprotect | Creates a write protected PMD |
+---------------------------+--------------------------------------------------+
diff --git a/Documentation/powerpc/ptrace.rst b/Documentation/powerpc/ptrace.rst
index 77725d6..5629edf 100644
--- a/Documentation/powerpc/ptrace.rst
+++ b/Documentation/powerpc/ptrace.rst
@@ -15,7 +15,7 @@
that GDB doesn't need to special-case each of them. We added the
following 3 new ptrace requests.
-1. PTRACE_PPC_GETHWDEBUGINFO
+1. PPC_PTRACE_GETHWDBGINFO
============================
Query for GDB to discover the hardware debug features. The main info to
@@ -48,7 +48,7 @@
#define PPC_DEBUG_FEATURE_DATA_BP_DAWR 0x10
#define PPC_DEBUG_FEATURE_DATA_BP_ARCH_31 0x20
-2. PTRACE_SETHWDEBUG
+2. PPC_PTRACE_SETHWDEBUG
Sets a hardware breakpoint or watchpoint, according to the provided structure::
@@ -88,7 +88,7 @@
are not contemplated, but that is out of the scope of this work.
ptrace will return an integer (handle) uniquely identifying the breakpoint or
-watchpoint just created. This integer will be used in the PTRACE_DELHWDEBUG
+watchpoint just created. This integer will be used in the PPC_PTRACE_DELHWDEBUG
request to ask for its removal. Return -ENOSPC if the requested breakpoint
can't be allocated on the registers.
@@ -150,7 +150,7 @@
p.addr2 = (uint64_t) end_range;
p.condition_value = 0;
-3. PTRACE_DELHWDEBUG
+3. PPC_PTRACE_DELHWDEBUG
Takes an integer which identifies an existing breakpoint or watchpoint
(i.e., the value returned from PTRACE_SETHWDEBUG), and deletes the
diff --git a/arch/Kconfig b/arch/Kconfig
index ec49c01..12d5149 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -931,6 +931,14 @@
config ARCH_WANT_HUGE_PMD_SHARE
bool
+# Archs that want to use pmd_mkwrite on kernel memory need it defined even
+# if there are no userspace memory management features that use it
+config ARCH_WANT_KERNEL_PMD_MKWRITE
+ bool
+
+config ARCH_WANT_PMD_MKWRITE
+ def_bool TRANSPARENT_HUGEPAGE || ARCH_WANT_KERNEL_PMD_MKWRITE
+
config HAVE_ARCH_SOFT_DIRTY
bool
diff --git a/arch/alpha/include/asm/pgtable.h b/arch/alpha/include/asm/pgtable.h
index 747b5f7..635f0a5 100644
--- a/arch/alpha/include/asm/pgtable.h
+++ b/arch/alpha/include/asm/pgtable.h
@@ -256,7 +256,7 @@ extern inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED;
extern inline pte_t pte_wrprotect(pte_t pte) { pte_val(pte) |= _PAGE_FOW; return pte; }
extern inline pte_t pte_mkclean(pte_t pte) { pte_val(pte) &= ~(__DIRTY_BITS); return pte; }
extern inline pte_t pte_mkold(pte_t pte) { pte_val(pte) &= ~(__ACCESS_BITS); return pte; }
-extern inline pte_t pte_mkwrite(pte_t pte) { pte_val(pte) &= ~_PAGE_FOW; return pte; }
+extern inline pte_t pte_mkwrite_novma(pte_t pte){ pte_val(pte) &= ~_PAGE_FOW; return pte; }
extern inline pte_t pte_mkdirty(pte_t pte) { pte_val(pte) |= __DIRTY_BITS; return pte; }
extern inline pte_t pte_mkyoung(pte_t pte) { pte_val(pte) |= __ACCESS_BITS; return pte; }
diff --git a/arch/arc/include/asm/hugepage.h b/arch/arc/include/asm/hugepage.h
index 5001b79..ef8d416 100644
--- a/arch/arc/include/asm/hugepage.h
+++ b/arch/arc/include/asm/hugepage.h
@@ -21,7 +21,7 @@ static inline pmd_t pte_pmd(pte_t pte)
}
#define pmd_wrprotect(pmd) pte_pmd(pte_wrprotect(pmd_pte(pmd)))
-#define pmd_mkwrite(pmd) pte_pmd(pte_mkwrite(pmd_pte(pmd)))
+#define pmd_mkwrite_novma(pmd) pte_pmd(pte_mkwrite_novma(pmd_pte(pmd)))
#define pmd_mkdirty(pmd) pte_pmd(pte_mkdirty(pmd_pte(pmd)))
#define pmd_mkold(pmd) pte_pmd(pte_mkold(pmd_pte(pmd)))
#define pmd_mkyoung(pmd) pte_pmd(pte_mkyoung(pmd_pte(pmd)))
diff --git a/arch/arc/include/asm/pgtable-bits-arcv2.h b/arch/arc/include/asm/pgtable-bits-arcv2.h
index ee78ab3..f3eea3f 100644
--- a/arch/arc/include/asm/pgtable-bits-arcv2.h
+++ b/arch/arc/include/asm/pgtable-bits-arcv2.h
@@ -87,7 +87,7 @@
PTE_BIT_FUNC(mknotpresent, &= ~(_PAGE_PRESENT));
PTE_BIT_FUNC(wrprotect, &= ~(_PAGE_WRITE));
-PTE_BIT_FUNC(mkwrite, |= (_PAGE_WRITE));
+PTE_BIT_FUNC(mkwrite_novma, |= (_PAGE_WRITE));
PTE_BIT_FUNC(mkclean, &= ~(_PAGE_DIRTY));
PTE_BIT_FUNC(mkdirty, |= (_PAGE_DIRTY));
PTE_BIT_FUNC(mkold, &= ~(_PAGE_ACCESSED));
diff --git a/arch/arm/common/locomo.c b/arch/arm/common/locomo.c
index 309b747..70480dd 100644
--- a/arch/arm/common/locomo.c
+++ b/arch/arm/common/locomo.c
@@ -350,19 +350,6 @@ static int locomo_resume(struct platform_device *dev)
}
#endif
-
-/**
- * locomo_probe - probe for a single LoCoMo chip.
- * @phys_addr: physical address of device.
- *
- * Probe for a LoCoMo chip. This must be called
- * before any other locomo-specific code.
- *
- * Returns:
- * %-ENODEV device not found.
- * %-EBUSY physical address already marked in-use.
- * %0 successful.
- */
static int
__locomo_probe(struct device *me, struct resource *mem, int irq)
{
@@ -479,6 +466,21 @@ static void __locomo_remove(struct locomo *lchip)
kfree(lchip);
}
+/**
+ * locomo_probe - probe for a single LoCoMo chip.
+ * @dev: platform device
+ *
+ * Probe for a LoCoMo chip. This must be called
+ * before any other locomo-specific code.
+ *
+ * Returns:
+ * * %-EINVAL - device's IORESOURCE_MEM not found
+ * * %-ENXIO - could not allocate an IRQ for the device
+ * * %-ENODEV - device not found.
+ * * %-EBUSY - physical address already marked in-use.
+ * * %-ENOMEM - could not allocate or iomap memory.
+ * * %0 - successful.
+ */
static int locomo_probe(struct platform_device *dev)
{
struct resource *mem;
diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h
index 1060497..71c3add 100644
--- a/arch/arm/include/asm/pgtable-3level.h
+++ b/arch/arm/include/asm/pgtable-3level.h
@@ -202,7 +202,7 @@ static inline pmd_t pmd_##fn(pmd_t pmd) { pmd_val(pmd) op; return pmd; }
PMD_BIT_FUNC(wrprotect, |= L_PMD_SECT_RDONLY);
PMD_BIT_FUNC(mkold, &= ~PMD_SECT_AF);
-PMD_BIT_FUNC(mkwrite, &= ~L_PMD_SECT_RDONLY);
+PMD_BIT_FUNC(mkwrite_novma, &= ~L_PMD_SECT_RDONLY);
PMD_BIT_FUNC(mkdirty, |= L_PMD_SECT_DIRTY);
PMD_BIT_FUNC(mkclean, &= ~L_PMD_SECT_DIRTY);
PMD_BIT_FUNC(mkyoung, |= PMD_SECT_AF);
diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
index ba573f2..16b02f4 100644
--- a/arch/arm/include/asm/pgtable.h
+++ b/arch/arm/include/asm/pgtable.h
@@ -228,7 +228,7 @@ static inline pte_t pte_wrprotect(pte_t pte)
return set_pte_bit(pte, __pgprot(L_PTE_RDONLY));
}
-static inline pte_t pte_mkwrite(pte_t pte)
+static inline pte_t pte_mkwrite_novma(pte_t pte)
{
return clear_pte_bit(pte, __pgprot(L_PTE_RDONLY));
}
diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
index 7f092cb..943ffcf 100644
--- a/arch/arm/include/asm/thread_info.h
+++ b/arch/arm/include/asm/thread_info.h
@@ -40,6 +40,7 @@ struct task_struct;
DECLARE_PER_CPU(struct task_struct *, __entry_task);
#include <asm/types.h>
+#include <asm/traps.h>
struct cpu_context_save {
__u32 r4;
@@ -66,7 +67,6 @@ struct thread_info {
__u32 cpu_domain; /* cpu domain */
struct cpu_context_save cpu_context; /* cpu context */
__u32 abi_syscall; /* ABI type and syscall nr */
- __u8 used_cp[16]; /* thread used copro */
unsigned long tp_value[2]; /* TLS registers */
union fp_state fpstate __attribute__((aligned(8)));
union vfp_state vfpstate;
@@ -105,6 +105,21 @@ extern void iwmmxt_task_restore(struct thread_info *, void *);
extern void iwmmxt_task_release(struct thread_info *);
extern void iwmmxt_task_switch(struct thread_info *);
+extern int iwmmxt_undef_handler(struct pt_regs *, u32);
+
+static inline void register_iwmmxt_undef_handler(void)
+{
+ static struct undef_hook iwmmxt_undef_hook = {
+ .instr_mask = 0x0c000e00,
+ .instr_val = 0x0c000000,
+ .cpsr_mask = MODE_MASK | PSR_T_BIT,
+ .cpsr_val = USR_MODE,
+ .fn = iwmmxt_undef_handler,
+ };
+
+ register_undef_hook(&iwmmxt_undef_hook);
+}
+
extern void vfp_sync_hwstate(struct thread_info *);
extern void vfp_flush_hwstate(struct thread_info *);
diff --git a/arch/arm/include/asm/vfp.h b/arch/arm/include/asm/vfp.h
index 5b57b87..157ea34 100644
--- a/arch/arm/include/asm/vfp.h
+++ b/arch/arm/include/asm/vfp.h
@@ -102,7 +102,6 @@
#ifndef __ASSEMBLY__
void vfp_disable(void);
-void VFP_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs);
#endif
#endif /* __ASM_VFP_H */
diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c
index 6a80d4b..219cbc7 100644
--- a/arch/arm/kernel/asm-offsets.c
+++ b/arch/arm/kernel/asm-offsets.c
@@ -47,7 +47,6 @@ int main(void)
DEFINE(TI_CPU_DOMAIN, offsetof(struct thread_info, cpu_domain));
DEFINE(TI_CPU_SAVE, offsetof(struct thread_info, cpu_context));
DEFINE(TI_ABI_SYSCALL, offsetof(struct thread_info, abi_syscall));
- DEFINE(TI_USED_CP, offsetof(struct thread_info, used_cp));
DEFINE(TI_TP_VALUE, offsetof(struct thread_info, tp_value));
DEFINE(TI_FPSTATE, offsetof(struct thread_info, fpstate));
#ifdef CONFIG_VFP
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 76e8125..6150a71 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -446,258 +446,26 @@
__und_usr:
usr_entry uaccess=0
- mov r2, r4
- mov r3, r5
-
- @ r2 = regs->ARM_pc, which is either 2 or 4 bytes ahead of the
- @ faulting instruction depending on Thumb mode.
- @ r3 = regs->ARM_cpsr
- @
- @ The emulation code returns using r9 if it has emulated the
- @ instruction, or the more conventional lr if we are to treat
- @ this as a real undefined instruction
- @
- badr r9, ret_from_exception
-
@ IRQs must be enabled before attempting to read the instruction from
@ user space since that could cause a page/translation fault if the
@ page table was modified by another CPU.
enable_irq
- tst r3, #PSR_T_BIT @ Thumb mode?
- bne __und_usr_thumb
- sub r4, r2, #4 @ ARM instr at LR - 4
-1: ldrt r0, [r4]
- ARM_BE8(rev r0, r0) @ little endian instruction
-
+ tst r5, #PSR_T_BIT @ Thumb mode?
+ mov r1, #2 @ set insn size to 2 for Thumb
+ bne 0f @ handle as Thumb undef exception
+#ifdef CONFIG_FPE_NWFPE
+ adr r9, ret_from_exception
+ bl call_fpe @ returns via R9 on success
+#endif
+ mov r1, #4 @ set insn size to 4 for ARM
+0: mov r0, sp
uaccess_disable ip
-
- @ r0 = 32-bit ARM instruction which caused the exception
- @ r2 = PC value for the following instruction (:= regs->ARM_pc)
- @ r4 = PC value for the faulting instruction
- @ lr = 32-bit undefined instruction function
- badr lr, __und_usr_fault_32
- b call_fpe
-
-__und_usr_thumb:
- @ Thumb instruction
- sub r4, r2, #2 @ First half of thumb instr at LR - 2
-#if CONFIG_ARM_THUMB && __LINUX_ARM_ARCH__ >= 6 && CONFIG_CPU_V7
-/*
- * Thumb-2 instruction handling. Note that because pre-v6 and >= v6 platforms
- * can never be supported in a single kernel, this code is not applicable at
- * all when __LINUX_ARM_ARCH__ < 6. This allows simplifying assumptions to be
- * made about .arch directives.
- */
-#if __LINUX_ARM_ARCH__ < 7
-/* If the target CPU may not be Thumb-2-capable, a run-time check is needed: */
- ldr_va r5, cpu_architecture
- cmp r5, #CPU_ARCH_ARMv7
- blo __und_usr_fault_16 @ 16bit undefined instruction
-/*
- * The following code won't get run unless the running CPU really is v7, so
- * coding round the lack of ldrht on older arches is pointless. Temporarily
- * override the assembler target arch with the minimum required instead:
- */
- .arch armv6t2
-#endif
-2: ldrht r5, [r4]
-ARM_BE8(rev16 r5, r5) @ little endian instruction
- cmp r5, #0xe800 @ 32bit instruction if xx != 0
- blo __und_usr_fault_16_pan @ 16bit undefined instruction
-3: ldrht r0, [r2]
-ARM_BE8(rev16 r0, r0) @ little endian instruction
- uaccess_disable ip
- add r2, r2, #2 @ r2 is PC + 2, make it PC + 4
- str r2, [sp, #S_PC] @ it's a 2x16bit instr, update
- orr r0, r0, r5, lsl #16
- badr lr, __und_usr_fault_32
- @ r0 = the two 16-bit Thumb instructions which caused the exception
- @ r2 = PC value for the following Thumb instruction (:= regs->ARM_pc)
- @ r4 = PC value for the first 16-bit Thumb instruction
- @ lr = 32bit undefined instruction function
-
-#if __LINUX_ARM_ARCH__ < 7
-/* If the target arch was overridden, change it back: */
-#ifdef CONFIG_CPU_32v6K
- .arch armv6k
-#else
- .arch armv6
-#endif
-#endif /* __LINUX_ARM_ARCH__ < 7 */
-#else /* !(CONFIG_ARM_THUMB && __LINUX_ARM_ARCH__ >= 6 && CONFIG_CPU_V7) */
- b __und_usr_fault_16
-#endif
+ bl __und_fault
+ b ret_from_exception
UNWIND(.fnend)
ENDPROC(__und_usr)
-/*
- * The out of line fixup for the ldrt instructions above.
- */
- .pushsection .text.fixup, "ax"
- .align 2
-4: str r4, [sp, #S_PC] @ retry current instruction
- ret r9
- .popsection
- .pushsection __ex_table,"a"
- .long 1b, 4b
-#if CONFIG_ARM_THUMB && __LINUX_ARM_ARCH__ >= 6 && CONFIG_CPU_V7
- .long 2b, 4b
- .long 3b, 4b
-#endif
- .popsection
-
-/*
- * Check whether the instruction is a co-processor instruction.
- * If yes, we need to call the relevant co-processor handler.
- *
- * Note that we don't do a full check here for the co-processor
- * instructions; all instructions with bit 27 set are well
- * defined. The only instructions that should fault are the
- * co-processor instructions. However, we have to watch out
- * for the ARM6/ARM7 SWI bug.
- *
- * NEON is a special case that has to be handled here. Not all
- * NEON instructions are co-processor instructions, so we have
- * to make a special case of checking for them. Plus, there's
- * five groups of them, so we have a table of mask/opcode pairs
- * to check against, and if any match then we branch off into the
- * NEON handler code.
- *
- * Emulators may wish to make use of the following registers:
- * r0 = instruction opcode (32-bit ARM or two 16-bit Thumb)
- * r2 = PC value to resume execution after successful emulation
- * r9 = normal "successful" return address
- * r10 = this threads thread_info structure
- * lr = unrecognised instruction return address
- * IRQs enabled, FIQs enabled.
- */
- @
- @ Fall-through from Thumb-2 __und_usr
- @
-#ifdef CONFIG_NEON
- get_thread_info r10 @ get current thread
- adr r6, .LCneon_thumb_opcodes
- b 2f
-#endif
-call_fpe:
- get_thread_info r10 @ get current thread
-#ifdef CONFIG_NEON
- adr r6, .LCneon_arm_opcodes
-2: ldr r5, [r6], #4 @ mask value
- ldr r7, [r6], #4 @ opcode bits matching in mask
- cmp r5, #0 @ end mask?
- beq 1f
- and r8, r0, r5
- cmp r8, r7 @ NEON instruction?
- bne 2b
- mov r7, #1
- strb r7, [r10, #TI_USED_CP + 10] @ mark CP#10 as used
- strb r7, [r10, #TI_USED_CP + 11] @ mark CP#11 as used
- b do_vfp @ let VFP handler handle this
-1:
-#endif
- tst r0, #0x08000000 @ only CDP/CPRT/LDC/STC have bit 27
- tstne r0, #0x04000000 @ bit 26 set on both ARM and Thumb-2
- reteq lr
- and r8, r0, #0x00000f00 @ mask out CP number
- mov r7, #1
- add r6, r10, r8, lsr #8 @ add used_cp[] array offset first
- strb r7, [r6, #TI_USED_CP] @ set appropriate used_cp[]
-#ifdef CONFIG_IWMMXT
- @ Test if we need to give access to iWMMXt coprocessors
- ldr r5, [r10, #TI_FLAGS]
- rsbs r7, r8, #(1 << 8) @ CP 0 or 1 only
- movscs r7, r5, lsr #(TIF_USING_IWMMXT + 1)
- bcs iwmmxt_task_enable
-#endif
- ARM( add pc, pc, r8, lsr #6 )
- THUMB( lsr r8, r8, #6 )
- THUMB( add pc, r8 )
- nop
-
- ret.w lr @ CP#0
- W(b) do_fpe @ CP#1 (FPE)
- W(b) do_fpe @ CP#2 (FPE)
- ret.w lr @ CP#3
- ret.w lr @ CP#4
- ret.w lr @ CP#5
- ret.w lr @ CP#6
- ret.w lr @ CP#7
- ret.w lr @ CP#8
- ret.w lr @ CP#9
-#ifdef CONFIG_VFP
- W(b) do_vfp @ CP#10 (VFP)
- W(b) do_vfp @ CP#11 (VFP)
-#else
- ret.w lr @ CP#10 (VFP)
- ret.w lr @ CP#11 (VFP)
-#endif
- ret.w lr @ CP#12
- ret.w lr @ CP#13
- ret.w lr @ CP#14 (Debug)
- ret.w lr @ CP#15 (Control)
-
-#ifdef CONFIG_NEON
- .align 6
-
-.LCneon_arm_opcodes:
- .word 0xfe000000 @ mask
- .word 0xf2000000 @ opcode
-
- .word 0xff100000 @ mask
- .word 0xf4000000 @ opcode
-
- .word 0x00000000 @ mask
- .word 0x00000000 @ opcode
-
-.LCneon_thumb_opcodes:
- .word 0xef000000 @ mask
- .word 0xef000000 @ opcode
-
- .word 0xff100000 @ mask
- .word 0xf9000000 @ opcode
-
- .word 0x00000000 @ mask
- .word 0x00000000 @ opcode
-#endif
-
-do_fpe:
- add r10, r10, #TI_FPSTATE @ r10 = workspace
- ldr_va pc, fp_enter, tmp=r4 @ Call FP module USR entry point
-
-/*
- * The FP module is called with these registers set:
- * r0 = instruction
- * r2 = PC+4
- * r9 = normal "successful" return address
- * r10 = FP workspace
- * lr = unrecognised FP instruction return address
- */
-
- .pushsection .data
- .align 2
-ENTRY(fp_enter)
- .word no_fp
- .popsection
-
-ENTRY(no_fp)
- ret lr
-ENDPROC(no_fp)
-
-__und_usr_fault_32:
- mov r1, #4
- b 1f
-__und_usr_fault_16_pan:
- uaccess_disable ip
-__und_usr_fault_16:
- mov r1, #2
-1: mov r0, sp
- badr lr, ret_from_exception
- b __und_fault
-ENDPROC(__und_usr_fault_32)
-ENDPROC(__und_usr_fault_16)
-
.align 5
__pabt_usr:
usr_entry
diff --git a/arch/arm/kernel/iwmmxt.S b/arch/arm/kernel/iwmmxt.S
index d2b4ac0..a0218c4 100644
--- a/arch/arm/kernel/iwmmxt.S
+++ b/arch/arm/kernel/iwmmxt.S
@@ -58,9 +58,19 @@
.text
.arm
+ENTRY(iwmmxt_undef_handler)
+ push {r9, r10, lr}
+ get_thread_info r10
+ mov r9, pc
+ b iwmmxt_task_enable
+ mov r0, #0
+ pop {r9, r10, pc}
+ENDPROC(iwmmxt_undef_handler)
+
/*
* Lazy switching of Concan coprocessor context
*
+ * r0 = struct pt_regs pointer
* r10 = struct thread_info pointer
* r9 = ret_from_exception
* lr = undefined instr exit
@@ -84,12 +94,12 @@
PJ4(mcr p15, 0, r2, c1, c0, 2)
ldr r3, =concan_owner
- add r0, r10, #TI_IWMMXT_STATE @ get task Concan save area
- ldr r2, [sp, #60] @ current task pc value
+ ldr r2, [r0, #S_PC] @ current task pc value
ldr r1, [r3] @ get current Concan owner
- str r0, [r3] @ this task now owns Concan regs
sub r2, r2, #4 @ adjust pc back
- str r2, [sp, #60]
+ str r2, [r0, #S_PC]
+ add r0, r10, #TI_IWMMXT_STATE @ get task Concan save area
+ str r0, [r3] @ this task now owns Concan regs
mrc p15, 0, r2, c2, c0, 0
mov r2, r2 @ cpwait
diff --git a/arch/arm/kernel/machine_kexec.c b/arch/arm/kernel/machine_kexec.c
index 46364b6..5d07cf9 100644
--- a/arch/arm/kernel/machine_kexec.c
+++ b/arch/arm/kernel/machine_kexec.c
@@ -94,16 +94,28 @@ static void machine_crash_nonpanic_core(void *unused)
}
}
+static DEFINE_PER_CPU(call_single_data_t, cpu_stop_csd) =
+ CSD_INIT(machine_crash_nonpanic_core, NULL);
+
void crash_smp_send_stop(void)
{
static int cpus_stopped;
unsigned long msecs;
+ call_single_data_t *csd;
+ int cpu, this_cpu = raw_smp_processor_id();
if (cpus_stopped)
return;
atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1);
- smp_call_function(machine_crash_nonpanic_core, NULL, false);
+ for_each_online_cpu(cpu) {
+ if (cpu == this_cpu)
+ continue;
+
+ csd = &per_cpu(cpu_stop_csd, cpu);
+ smp_call_function_single_async(cpu, csd);
+ }
+
msecs = 1000; /* Wait at most a second for the other cpus to stop */
while ((atomic_read(&waiting_for_crash_ipi) > 0) && msecs) {
mdelay(1);
diff --git a/arch/arm/kernel/pj4-cp0.c b/arch/arm/kernel/pj4-cp0.c
index 1d1fb22..4bca809 100644
--- a/arch/arm/kernel/pj4-cp0.c
+++ b/arch/arm/kernel/pj4-cp0.c
@@ -126,6 +126,7 @@ static int __init pj4_cp0_init(void)
pr_info("PJ4 iWMMXt v%d coprocessor enabled.\n", vers);
elf_hwcap |= HWCAP_IWMMXT;
thread_register_notifier(&iwmmxt_notifier_block);
+ register_iwmmxt_undef_handler();
#endif
return 0;
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index 0e8ff85..e16ed10 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -222,7 +222,6 @@ void flush_thread(void)
flush_ptrace_hw_breakpoint(tsk);
- memset(thread->used_cp, 0, sizeof(thread->used_cp));
memset(&tsk->thread.debug, 0, sizeof(struct debug_info));
memset(&thread->fpstate, 0, sizeof(union fp_state));
diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c
index fef32d7..c421a89 100644
--- a/arch/arm/kernel/ptrace.c
+++ b/arch/arm/kernel/ptrace.c
@@ -584,8 +584,6 @@ static int fpa_set(struct task_struct *target,
{
struct thread_info *thread = task_thread_info(target);
- thread->used_cp[1] = thread->used_cp[2] = 1;
-
return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
&thread->fpstate,
0, sizeof(struct user_fp));
diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c
index 8d0afa11..79a6730 100644
--- a/arch/arm/kernel/signal.c
+++ b/arch/arm/kernel/signal.c
@@ -682,7 +682,7 @@ asmlinkage void do_rseq_syscall(struct pt_regs *regs)
*/
static_assert(NSIGILL == 11);
static_assert(NSIGFPE == 15);
-static_assert(NSIGSEGV == 9);
+static_assert(NSIGSEGV == 10);
static_assert(NSIGBUS == 5);
static_assert(NSIGTRAP == 6);
static_assert(NSIGCHLD == 6);
diff --git a/arch/arm/kernel/xscale-cp0.c b/arch/arm/kernel/xscale-cp0.c
index ed4f6e7..00d00d3 100644
--- a/arch/arm/kernel/xscale-cp0.c
+++ b/arch/arm/kernel/xscale-cp0.c
@@ -166,6 +166,7 @@ static int __init xscale_cp0_init(void)
pr_info("XScale iWMMXt coprocessor detected.\n");
elf_hwcap |= HWCAP_IWMMXT;
thread_register_notifier(&iwmmxt_notifier_block);
+ register_iwmmxt_undef_handler();
#endif
} else {
pr_info("XScale DSP coprocessor detected.\n");
diff --git a/arch/arm/mm/proc-feroceon.S b/arch/arm/mm/proc-feroceon.S
index 61ce82a..072ff9b 100644
--- a/arch/arm/mm/proc-feroceon.S
+++ b/arch/arm/mm/proc-feroceon.S
@@ -56,6 +56,10 @@
movne r2, r2, lsr #2 @ turned into # of sets
sub r2, r2, #(1 << 5)
stmia r1, {r2, r3}
+#ifdef CONFIG_VFP
+ mov r1, #1 @ disable quirky VFP
+ str_l r1, VFP_arch_feroceon, r2
+#endif
ret lr
/*
diff --git a/arch/arm/nwfpe/entry.S b/arch/arm/nwfpe/entry.S
index d8f9915..354d297 100644
--- a/arch/arm/nwfpe/entry.S
+++ b/arch/arm/nwfpe/entry.S
@@ -7,6 +7,7 @@
Direct questions, comments to Scott Bambrough <scottb@netwinder.org>
*/
+#include <linux/linkage.h>
#include <asm/assembler.h>
#include <asm/opcodes.h>
@@ -104,6 +105,7 @@
@ plain LDR instruction. Weird, but it seems harmless.
.pushsection .text.fixup,"ax"
.align 2
+.Lrep: str r4, [sp, #S_PC] @ retry current instruction
.Lfix: ret r9 @ let the user eat segfaults
.popsection
@@ -111,3 +113,78 @@
.align 3
.long .Lx1, .Lfix
.popsection
+
+ @
+ @ Check whether the instruction is a co-processor instruction.
+ @ If yes, we need to call the relevant co-processor handler.
+ @ Only FPE instructions are dispatched here, everything else
+ @ is handled by undef hooks.
+ @
+ @ Emulators may wish to make use of the following registers:
+ @ r4 = PC value to resume execution after successful emulation
+ @ r9 = normal "successful" return address
+ @ lr = unrecognised instruction return address
+ @ IRQs enabled, FIQs enabled.
+ @
+ENTRY(call_fpe)
+ mov r2, r4
+ sub r4, r4, #4 @ ARM instruction at user PC - 4
+USERL( .Lrep, ldrt r0, [r4]) @ load opcode from user space
+ARM_BE8(rev r0, r0) @ little endian instruction
+
+ uaccess_disable ip
+
+ get_thread_info r10 @ get current thread
+ tst r0, #0x08000000 @ only CDP/CPRT/LDC/STC have bit 27
+ reteq lr
+ and r8, r0, #0x00000f00 @ mask out CP number
+#ifdef CONFIG_IWMMXT
+ @ Test if we need to give access to iWMMXt coprocessors
+ ldr r5, [r10, #TI_FLAGS]
+ rsbs r7, r8, #(1 << 8) @ CP 0 or 1 only
+ movscs r7, r5, lsr #(TIF_USING_IWMMXT + 1)
+ movcs r0, sp @ pass struct pt_regs
+ bcs iwmmxt_task_enable
+#endif
+ add pc, pc, r8, lsr #6
+ nop
+
+ ret lr @ CP#0
+ b do_fpe @ CP#1 (FPE)
+ b do_fpe @ CP#2 (FPE)
+ ret lr @ CP#3
+ ret lr @ CP#4
+ ret lr @ CP#5
+ ret lr @ CP#6
+ ret lr @ CP#7
+ ret lr @ CP#8
+ ret lr @ CP#9
+ ret lr @ CP#10 (VFP)
+ ret lr @ CP#11 (VFP)
+ ret lr @ CP#12
+ ret lr @ CP#13
+ ret lr @ CP#14 (Debug)
+ ret lr @ CP#15 (Control)
+
+do_fpe:
+ add r10, r10, #TI_FPSTATE @ r10 = workspace
+ ldr_va pc, fp_enter, tmp=r4 @ Call FP module USR entry point
+
+ @
+ @ The FP module is called with these registers set:
+ @ r0 = instruction
+ @ r2 = PC+4
+ @ r9 = normal "successful" return address
+ @ r10 = FP workspace
+ @ lr = unrecognised FP instruction return address
+ @
+
+ .pushsection .data
+ .align 2
+ENTRY(fp_enter)
+ .word no_fp
+ .popsection
+
+no_fp:
+ ret lr
+ENDPROC(no_fp)
diff --git a/arch/arm/vfp/Makefile b/arch/arm/vfp/Makefile
index 749901a..dfd64bc 100644
--- a/arch/arm/vfp/Makefile
+++ b/arch/arm/vfp/Makefile
@@ -8,4 +8,4 @@
# ccflags-y := -DDEBUG
# asflags-y := -DDEBUG
-obj-y += vfpmodule.o entry.o vfphw.o vfpsingle.o vfpdouble.o
+obj-y += vfpmodule.o vfphw.o vfpsingle.o vfpdouble.o
diff --git a/arch/arm/vfp/entry.S b/arch/arm/vfp/entry.S
deleted file mode 100644
index 62206ef..0000000
--- a/arch/arm/vfp/entry.S
+++ /dev/null
@@ -1,31 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/arm/vfp/entry.S
- *
- * Copyright (C) 2004 ARM Limited.
- * Written by Deep Blue Solutions Limited.
- */
-#include <linux/init.h>
-#include <linux/linkage.h>
-#include <asm/thread_info.h>
-#include <asm/vfpmacros.h>
-#include <asm/assembler.h>
-#include <asm/asm-offsets.h>
-
-@ VFP entry point.
-@
-@ r0 = instruction opcode (32-bit ARM or two 16-bit Thumb)
-@ r2 = PC value to resume execution after successful emulation
-@ r9 = normal "successful" return address
-@ r10 = this threads thread_info structure
-@ lr = unrecognised instruction return address
-@ IRQs enabled.
-@
-ENTRY(do_vfp)
- mov r1, r10
- str lr, [sp, #-8]!
- add r3, sp, #4
- str r9, [r3]
- bl vfp_entry
- ldr pc, [sp], #8
-ENDPROC(do_vfp)
diff --git a/arch/arm/vfp/vfp.h b/arch/arm/vfp/vfp.h
index 5cd6d50..e43a630 100644
--- a/arch/arm/vfp/vfp.h
+++ b/arch/arm/vfp/vfp.h
@@ -375,3 +375,4 @@ struct op {
};
asmlinkage void vfp_save_state(void *location, u32 fpexc);
+asmlinkage u32 vfp_load_state(const void *location);
diff --git a/arch/arm/vfp/vfphw.S b/arch/arm/vfp/vfphw.S
index a4610d0..d5a03f3 100644
--- a/arch/arm/vfp/vfphw.S
+++ b/arch/arm/vfp/vfphw.S
@@ -4,12 +4,6 @@
*
* Copyright (C) 2004 ARM Limited.
* Written by Deep Blue Solutions Limited.
- *
- * This code is called from the kernel's undefined instruction trap.
- * r1 holds the thread_info pointer
- * r3 holds the return address for successful handling.
- * lr holds the return address for unrecognised instructions.
- * sp points to a struct pt_regs (as defined in include/asm/proc/ptrace.h)
*/
#include <linux/init.h>
#include <linux/linkage.h>
@@ -19,20 +13,6 @@
#include <asm/assembler.h>
#include <asm/asm-offsets.h>
- .macro DBGSTR, str
-#ifdef DEBUG
- stmfd sp!, {r0-r3, ip, lr}
- ldr r0, =1f
- bl _printk
- ldmfd sp!, {r0-r3, ip, lr}
-
- .pushsection .rodata, "a"
-1: .ascii KERN_DEBUG "VFP: \str\n"
- .byte 0
- .previous
-#endif
- .endm
-
.macro DBGSTR1, str, arg
#ifdef DEBUG
stmfd sp!, {r0-r3, ip, lr}
@@ -48,181 +28,25 @@
#endif
.endm
- .macro DBGSTR3, str, arg1, arg2, arg3
-#ifdef DEBUG
- stmfd sp!, {r0-r3, ip, lr}
- mov r3, \arg3
- mov r2, \arg2
- mov r1, \arg1
- ldr r0, =1f
- bl _printk
- ldmfd sp!, {r0-r3, ip, lr}
-
- .pushsection .rodata, "a"
-1: .ascii KERN_DEBUG "VFP: \str\n"
- .byte 0
- .previous
-#endif
- .endm
-
-
-@ VFP hardware support entry point.
-@
-@ r0 = instruction opcode (32-bit ARM or two 16-bit Thumb)
-@ r1 = thread_info pointer
-@ r2 = PC value to resume execution after successful emulation
-@ r3 = normal "successful" return address
-@ lr = unrecognised instruction return address
-@ IRQs enabled.
-ENTRY(vfp_support_entry)
- ldr r11, [r1, #TI_CPU] @ CPU number
- add r10, r1, #TI_VFPSTATE @ r10 = workspace
-
- DBGSTR3 "instr %08x pc %08x state %p", r0, r2, r10
-
- .fpu vfpv2
- VFPFMRX r1, FPEXC @ Is the VFP enabled?
- DBGSTR1 "fpexc %08x", r1
- tst r1, #FPEXC_EN
- bne look_for_VFP_exceptions @ VFP is already enabled
-
- DBGSTR1 "enable %x", r10
- ldr r9, vfp_current_hw_state_address
- orr r1, r1, #FPEXC_EN @ user FPEXC has the enable bit set
- ldr r4, [r9, r11, lsl #2] @ vfp_current_hw_state pointer
- bic r5, r1, #FPEXC_EX @ make sure exceptions are disabled
- cmp r4, r10 @ this thread owns the hw context?
-#ifndef CONFIG_SMP
- @ For UP, checking that this thread owns the hw context is
- @ sufficient to determine that the hardware state is valid.
- beq vfp_hw_state_valid
-
- @ On UP, we lazily save the VFP context. As a different
- @ thread wants ownership of the VFP hardware, save the old
- @ state if there was a previous (valid) owner.
-
- VFPFMXR FPEXC, r5 @ enable VFP, disable any pending
- @ exceptions, so we can get at the
- @ rest of it
-
- DBGSTR1 "save old state %p", r4
- cmp r4, #0 @ if the vfp_current_hw_state is NULL
- beq vfp_reload_hw @ then the hw state needs reloading
- VFPFSTMIA r4, r5 @ save the working registers
- VFPFMRX r5, FPSCR @ current status
-#ifndef CONFIG_CPU_FEROCEON
- tst r1, #FPEXC_EX @ is there additional state to save?
- beq 1f
- VFPFMRX r6, FPINST @ FPINST (only if FPEXC.EX is set)
- tst r1, #FPEXC_FP2V @ is there an FPINST2 to read?
- beq 1f
- VFPFMRX r8, FPINST2 @ FPINST2 if needed (and present)
-1:
-#endif
- stmia r4, {r1, r5, r6, r8} @ save FPEXC, FPSCR, FPINST, FPINST2
-vfp_reload_hw:
-
-#else
- @ For SMP, if this thread does not own the hw context, then we
- @ need to reload it. No need to save the old state as on SMP,
- @ we always save the state when we switch away from a thread.
- bne vfp_reload_hw
-
- @ This thread has ownership of the current hardware context.
- @ However, it may have been migrated to another CPU, in which
- @ case the saved state is newer than the hardware context.
- @ Check this by looking at the CPU number which the state was
- @ last loaded onto.
- ldr ip, [r10, #VFP_CPU]
- teq ip, r11
- beq vfp_hw_state_valid
-
-vfp_reload_hw:
- @ We're loading this threads state into the VFP hardware. Update
- @ the CPU number which contains the most up to date VFP context.
- str r11, [r10, #VFP_CPU]
-
- VFPFMXR FPEXC, r5 @ enable VFP, disable any pending
- @ exceptions, so we can get at the
- @ rest of it
-#endif
-
- DBGSTR1 "load state %p", r10
- str r10, [r9, r11, lsl #2] @ update the vfp_current_hw_state pointer
+ENTRY(vfp_load_state)
+ @ Load the current VFP state
+ @ r0 - load location
+ @ returns FPEXC
+ DBGSTR1 "load VFP state %p", r0
@ Load the saved state back into the VFP
- VFPFLDMIA r10, r5 @ reload the working registers while
+ VFPFLDMIA r0, r1 @ reload the working registers while
@ FPEXC is in a safe state
- ldmia r10, {r1, r5, r6, r8} @ load FPEXC, FPSCR, FPINST, FPINST2
-#ifndef CONFIG_CPU_FEROCEON
- tst r1, #FPEXC_EX @ is there additional state to restore?
+ ldmia r0, {r0-r3} @ load FPEXC, FPSCR, FPINST, FPINST2
+ tst r0, #FPEXC_EX @ is there additional state to restore?
beq 1f
- VFPFMXR FPINST, r6 @ restore FPINST (only if FPEXC.EX is set)
- tst r1, #FPEXC_FP2V @ is there an FPINST2 to write?
+ VFPFMXR FPINST, r2 @ restore FPINST (only if FPEXC.EX is set)
+ tst r0, #FPEXC_FP2V @ is there an FPINST2 to write?
beq 1f
- VFPFMXR FPINST2, r8 @ FPINST2 if needed (and present)
+ VFPFMXR FPINST2, r3 @ FPINST2 if needed (and present)
1:
-#endif
- VFPFMXR FPSCR, r5 @ restore status
-
-@ The context stored in the VFP hardware is up to date with this thread
-vfp_hw_state_valid:
- tst r1, #FPEXC_EX
- bne process_exception @ might as well handle the pending
- @ exception before retrying branch
- @ out before setting an FPEXC that
- @ stops us reading stuff
- VFPFMXR FPEXC, r1 @ Restore FPEXC last
- mov sp, r3 @ we think we have handled things
- pop {lr}
- sub r2, r2, #4 @ Retry current instruction - if Thumb
- str r2, [sp, #S_PC] @ mode it's two 16-bit instructions,
- @ else it's one 32-bit instruction, so
- @ always subtract 4 from the following
- @ instruction address.
-
-local_bh_enable_and_ret:
- adr r0, .
- mov r1, #SOFTIRQ_DISABLE_OFFSET
- b __local_bh_enable_ip @ tail call
-
-look_for_VFP_exceptions:
- @ Check for synchronous or asynchronous exception
- tst r1, #FPEXC_EX | FPEXC_DEX
- bne process_exception
- @ On some implementations of the VFP subarch 1, setting FPSCR.IXE
- @ causes all the CDP instructions to be bounced synchronously without
- @ setting the FPEXC.EX bit
- VFPFMRX r5, FPSCR
- tst r5, #FPSCR_IXE
- bne process_exception
-
- tst r5, #FPSCR_LENGTH_MASK
- beq skip
- orr r1, r1, #FPEXC_DEX
- b process_exception
-skip:
-
- @ Fall into hand on to next handler - appropriate coproc instr
- @ not recognised by VFP
-
- DBGSTR "not VFP"
- b local_bh_enable_and_ret
-
-process_exception:
- DBGSTR "bounce"
- mov sp, r3 @ setup for a return to the user code.
- pop {lr}
- mov r2, sp @ nothing stacked - regdump is at TOS
-
- @ Now call the C code to package up the bounce to the support code
- @ r0 holds the trigger instruction
- @ r1 holds the FPEXC value
- @ r2 pointer to register dump
- b VFP_bounce @ we have handled this - the support
- @ code will raise an exception if
- @ required. If not, the user code will
- @ retry the faulted instruction
-ENDPROC(vfp_support_entry)
+ VFPFMXR FPSCR, r1 @ restore status
+ ret lr
+ENDPROC(vfp_load_state)
ENTRY(vfp_save_state)
@ Save the current VFP state
@@ -242,10 +66,6 @@
ret lr
ENDPROC(vfp_save_state)
- .align
-vfp_current_hw_state_address:
- .word vfp_current_hw_state
-
.macro tbl_branch, base, tmp, shift
#ifdef CONFIG_THUMB2_KERNEL
adr \tmp, 1f
diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c
index 1ba5078..7e8773a 100644
--- a/arch/arm/vfp/vfpmodule.c
+++ b/arch/arm/vfp/vfpmodule.c
@@ -18,6 +18,7 @@
#include <linux/uaccess.h>
#include <linux/user.h>
#include <linux/export.h>
+#include <linux/perf_event.h>
#include <asm/cp15.h>
#include <asm/cputype.h>
@@ -30,11 +31,6 @@
#include "vfpinstr.h"
#include "vfp.h"
-/*
- * Our undef handlers (in entry.S)
- */
-asmlinkage void vfp_support_entry(u32, void *, u32, u32);
-
static bool have_vfp __ro_after_init;
/*
@@ -42,7 +38,11 @@ static bool have_vfp __ro_after_init;
* Used in startup: set to non-zero if VFP checks fail
* After startup, holds VFP architecture
*/
-static unsigned int __initdata VFP_arch;
+static unsigned int VFP_arch;
+
+#ifdef CONFIG_CPU_FEROCEON
+extern unsigned int VFP_arch_feroceon __alias(VFP_arch);
+#endif
/*
* The pointer to the vfpstate structure of the thread which currently
@@ -314,13 +314,14 @@ static u32 vfp_emulate_instruction(u32 inst, u32 fpscr, struct pt_regs *regs)
* emulate it.
*/
}
+ perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, regs->ARM_pc);
return exceptions & ~VFP_NAN_FLAG;
}
/*
* Package up a bounce condition.
*/
-void VFP_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs)
+static void VFP_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs)
{
u32 fpscr, orig_fpscr, fpsid, exceptions;
@@ -356,14 +357,12 @@ void VFP_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs)
}
if (fpexc & FPEXC_EX) {
-#ifndef CONFIG_CPU_FEROCEON
/*
* Asynchronous exception. The instruction is read from FPINST
* and the interrupted instruction has to be restarted.
*/
trigger = fmrx(FPINST);
regs->ARM_pc -= 4;
-#endif
} else if (!(fpexc & FPEXC_DEX)) {
/*
* Illegal combination of bits. It can be caused by an
@@ -371,7 +370,7 @@ void VFP_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs)
* on VFP subarch 1.
*/
vfp_raise_exceptions(VFP_EXCEPTION_ERROR, trigger, fpscr, regs);
- goto exit;
+ return;
}
/*
@@ -402,7 +401,7 @@ void VFP_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs)
* the FPEXC.FP2V bit is valid only if FPEXC.EX is 1.
*/
if ((fpexc & (FPEXC_EX | FPEXC_FP2V)) != (FPEXC_EX | FPEXC_FP2V))
- goto exit;
+ return;
/*
* The barrier() here prevents fpinst2 being read
@@ -415,8 +414,6 @@ void VFP_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs)
exceptions = vfp_emulate_instruction(trigger, orig_fpscr, regs);
if (exceptions)
vfp_raise_exceptions(exceptions, trigger, orig_fpscr, regs);
- exit:
- local_bh_enable();
}
static void vfp_enable(void *unused)
@@ -645,27 +642,6 @@ static int vfp_starting_cpu(unsigned int unused)
return 0;
}
-/*
- * Entered with:
- *
- * r0 = instruction opcode (32-bit ARM or two 16-bit Thumb)
- * r1 = thread_info pointer
- * r2 = PC value to resume execution after successful emulation
- * r3 = normal "successful" return address
- * lr = unrecognised instruction return address
- */
-asmlinkage void vfp_entry(u32 trigger, struct thread_info *ti, u32 resume_pc,
- u32 resume_return_address)
-{
- if (unlikely(!have_vfp))
- return;
-
- local_bh_disable();
- vfp_support_entry(trigger, ti, resume_pc, resume_return_address);
-}
-
-#ifdef CONFIG_KERNEL_MODE_NEON
-
static int vfp_kmode_exception(struct pt_regs *regs, unsigned int instr)
{
/*
@@ -688,47 +664,151 @@ static int vfp_kmode_exception(struct pt_regs *regs, unsigned int instr)
return 1;
}
-static struct undef_hook vfp_kmode_exception_hook[] = {{
+/*
+ * vfp_support_entry - Handle VFP exception
+ *
+ * @regs: pt_regs structure holding the register state at exception entry
+ * @trigger: The opcode of the instruction that triggered the exception
+ *
+ * Returns 0 if the exception was handled, or an error code otherwise.
+ */
+static int vfp_support_entry(struct pt_regs *regs, u32 trigger)
+{
+ struct thread_info *ti = current_thread_info();
+ u32 fpexc;
+
+ if (unlikely(!have_vfp))
+ return -ENODEV;
+
+ if (!user_mode(regs))
+ return vfp_kmode_exception(regs, trigger);
+
+ local_bh_disable();
+ fpexc = fmrx(FPEXC);
+
+ /*
+ * If the VFP unit was not enabled yet, we have to check whether the
+ * VFP state in the CPU's registers is the most recent VFP state
+ * associated with the process. On UP systems, we don't save the VFP
+ * state eagerly on a context switch, so we may need to save the
+ * VFP state to memory first, as it may belong to another process.
+ */
+ if (!(fpexc & FPEXC_EN)) {
+ /*
+ * Enable the VFP unit but mask the FP exception flag for the
+ * time being, so we can access all the registers.
+ */
+ fpexc |= FPEXC_EN;
+ fmxr(FPEXC, fpexc & ~FPEXC_EX);
+
+ /*
+ * Check whether or not the VFP state in the CPU's registers is
+ * the most recent VFP state associated with this task. On SMP,
+ * migration may result in multiple CPUs holding VFP states
+ * that belong to the same task, but only the most recent one
+ * is valid.
+ */
+ if (!vfp_state_in_hw(ti->cpu, ti)) {
+ if (!IS_ENABLED(CONFIG_SMP) &&
+ vfp_current_hw_state[ti->cpu] != NULL) {
+ /*
+ * This CPU is currently holding the most
+ * recent VFP state associated with another
+ * task, and we must save that to memory first.
+ */
+ vfp_save_state(vfp_current_hw_state[ti->cpu],
+ fpexc);
+ }
+
+ /*
+ * We can now proceed with loading the task's VFP state
+ * from memory into the CPU registers.
+ */
+ fpexc = vfp_load_state(&ti->vfpstate);
+ vfp_current_hw_state[ti->cpu] = &ti->vfpstate;
+#ifdef CONFIG_SMP
+ /*
+ * Record that this CPU is now the one holding the most
+ * recent VFP state of the task.
+ */
+ ti->vfpstate.hard.cpu = ti->cpu;
+#endif
+ }
+
+ if (fpexc & FPEXC_EX)
+ /*
+ * Might as well handle the pending exception before
+ * retrying branch out before setting an FPEXC that
+ * stops us reading stuff.
+ */
+ goto bounce;
+
+ /*
+ * No FP exception is pending: just enable the VFP and
+ * replay the instruction that trapped.
+ */
+ fmxr(FPEXC, fpexc);
+ } else {
+ /* Check for synchronous or asynchronous exceptions */
+ if (!(fpexc & (FPEXC_EX | FPEXC_DEX))) {
+ u32 fpscr = fmrx(FPSCR);
+
+ /*
+ * On some implementations of the VFP subarch 1,
+ * setting FPSCR.IXE causes all the CDP instructions to
+ * be bounced synchronously without setting the
+ * FPEXC.EX bit
+ */
+ if (!(fpscr & FPSCR_IXE)) {
+ if (!(fpscr & FPSCR_LENGTH_MASK)) {
+ pr_debug("not VFP\n");
+ local_bh_enable();
+ return -ENOEXEC;
+ }
+ fpexc |= FPEXC_DEX;
+ }
+ }
+bounce: regs->ARM_pc += 4;
+ VFP_bounce(trigger, fpexc, regs);
+ }
+
+ local_bh_enable();
+ return 0;
+}
+
+static struct undef_hook neon_support_hook[] = {{
.instr_mask = 0xfe000000,
.instr_val = 0xf2000000,
- .cpsr_mask = MODE_MASK | PSR_T_BIT,
- .cpsr_val = SVC_MODE,
- .fn = vfp_kmode_exception,
+ .cpsr_mask = PSR_T_BIT,
+ .cpsr_val = 0,
+ .fn = vfp_support_entry,
}, {
.instr_mask = 0xff100000,
.instr_val = 0xf4000000,
- .cpsr_mask = MODE_MASK | PSR_T_BIT,
- .cpsr_val = SVC_MODE,
- .fn = vfp_kmode_exception,
+ .cpsr_mask = PSR_T_BIT,
+ .cpsr_val = 0,
+ .fn = vfp_support_entry,
}, {
.instr_mask = 0xef000000,
.instr_val = 0xef000000,
- .cpsr_mask = MODE_MASK | PSR_T_BIT,
- .cpsr_val = SVC_MODE | PSR_T_BIT,
- .fn = vfp_kmode_exception,
+ .cpsr_mask = PSR_T_BIT,
+ .cpsr_val = PSR_T_BIT,
+ .fn = vfp_support_entry,
}, {
.instr_mask = 0xff100000,
.instr_val = 0xf9000000,
- .cpsr_mask = MODE_MASK | PSR_T_BIT,
- .cpsr_val = SVC_MODE | PSR_T_BIT,
- .fn = vfp_kmode_exception,
-}, {
- .instr_mask = 0x0c000e00,
- .instr_val = 0x0c000a00,
- .cpsr_mask = MODE_MASK,
- .cpsr_val = SVC_MODE,
- .fn = vfp_kmode_exception,
+ .cpsr_mask = PSR_T_BIT,
+ .cpsr_val = PSR_T_BIT,
+ .fn = vfp_support_entry,
}};
-static int __init vfp_kmode_exception_hook_init(void)
-{
- int i;
+static struct undef_hook vfp_support_hook = {
+ .instr_mask = 0x0c000e00,
+ .instr_val = 0x0c000a00,
+ .fn = vfp_support_entry,
+};
- for (i = 0; i < ARRAY_SIZE(vfp_kmode_exception_hook); i++)
- register_undef_hook(&vfp_kmode_exception_hook[i]);
- return 0;
-}
-subsys_initcall(vfp_kmode_exception_hook_init);
+#ifdef CONFIG_KERNEL_MODE_NEON
/*
* Kernel-side NEON support functions
@@ -833,8 +913,11 @@ static int __init vfp_init(void)
* for NEON if the hardware has the MVFR registers.
*/
if (IS_ENABLED(CONFIG_NEON) &&
- (fmrx(MVFR1) & 0x000fff00) == 0x00011100)
+ (fmrx(MVFR1) & 0x000fff00) == 0x00011100) {
elf_hwcap |= HWCAP_NEON;
+ for (int i = 0; i < ARRAY_SIZE(neon_support_hook); i++)
+ register_undef_hook(&neon_support_hook[i]);
+ }
if (IS_ENABLED(CONFIG_VFPv3)) {
u32 mvfr0 = fmrx(MVFR0);
@@ -903,6 +986,7 @@ static int __init vfp_init(void)
have_vfp = true;
+ register_undef_hook(&vfp_support_hook);
thread_register_notifier(&vfp_notifier_block);
vfp_pm_init();
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 432932a..7f7d9b1 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -181,7 +181,7 @@ static inline pmd_t set_pmd_bit(pmd_t pmd, pgprot_t prot)
return pmd;
}
-static inline pte_t pte_mkwrite(pte_t pte)
+static inline pte_t pte_mkwrite_novma(pte_t pte)
{
pte = set_pte_bit(pte, __pgprot(PTE_WRITE));
pte = clear_pte_bit(pte, __pgprot(PTE_RDONLY));
@@ -487,7 +487,7 @@ static inline int pmd_trans_huge(pmd_t pmd)
#define pmd_cont(pmd) pte_cont(pmd_pte(pmd))
#define pmd_wrprotect(pmd) pte_pmd(pte_wrprotect(pmd_pte(pmd)))
#define pmd_mkold(pmd) pte_pmd(pte_mkold(pmd_pte(pmd)))
-#define pmd_mkwrite(pmd) pte_pmd(pte_mkwrite(pmd_pte(pmd)))
+#define pmd_mkwrite_novma(pmd) pte_pmd(pte_mkwrite_novma(pmd_pte(pmd)))
#define pmd_mkclean(pmd) pte_pmd(pte_mkclean(pmd_pte(pmd)))
#define pmd_mkdirty(pmd) pte_pmd(pte_mkdirty(pmd_pte(pmd)))
#define pmd_mkyoung(pmd) pte_pmd(pte_mkyoung(pmd_pte(pmd)))
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index c7ebe74..0e8beb3 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -1344,7 +1344,7 @@ void __init minsigstksz_setup(void)
*/
static_assert(NSIGILL == 11);
static_assert(NSIGFPE == 15);
-static_assert(NSIGSEGV == 9);
+static_assert(NSIGSEGV == 10);
static_assert(NSIGBUS == 5);
static_assert(NSIGTRAP == 6);
static_assert(NSIGCHLD == 6);
diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c
index 4700f85..bbd5427 100644
--- a/arch/arm64/kernel/signal32.c
+++ b/arch/arm64/kernel/signal32.c
@@ -460,7 +460,7 @@ void compat_setup_restart_syscall(struct pt_regs *regs)
*/
static_assert(NSIGILL == 11);
static_assert(NSIGFPE == 15);
-static_assert(NSIGSEGV == 9);
+static_assert(NSIGSEGV == 10);
static_assert(NSIGBUS == 5);
static_assert(NSIGTRAP == 6);
static_assert(NSIGCHLD == 6);
diff --git a/arch/arm64/mm/trans_pgd.c b/arch/arm64/mm/trans_pgd.c
index e9ad391..7b14df3 100644
--- a/arch/arm64/mm/trans_pgd.c
+++ b/arch/arm64/mm/trans_pgd.c
@@ -41,7 +41,7 @@ static void _copy_pte(pte_t *dst_ptep, pte_t *src_ptep, unsigned long addr)
* read only (code, rodata). Clear the RDONLY bit from
* the temporary mappings we use during restore.
*/
- set_pte(dst_ptep, pte_mkwrite(pte));
+ set_pte(dst_ptep, pte_mkwrite_novma(pte));
} else if ((debug_pagealloc_enabled() ||
is_kfence_address((void *)addr)) && !pte_none(pte)) {
/*
@@ -55,7 +55,7 @@ static void _copy_pte(pte_t *dst_ptep, pte_t *src_ptep, unsigned long addr)
*/
BUG_ON(!pfn_valid(pte_pfn(pte)));
- set_pte(dst_ptep, pte_mkpresent(pte_mkwrite(pte)));
+ set_pte(dst_ptep, pte_mkpresent(pte_mkwrite_novma(pte)));
}
}
diff --git a/arch/csky/include/asm/pgtable.h b/arch/csky/include/asm/pgtable.h
index 4240503..a397e17 100644
--- a/arch/csky/include/asm/pgtable.h
+++ b/arch/csky/include/asm/pgtable.h
@@ -176,7 +176,7 @@ static inline pte_t pte_mkold(pte_t pte)
return pte;
}
-static inline pte_t pte_mkwrite(pte_t pte)
+static inline pte_t pte_mkwrite_novma(pte_t pte)
{
pte_val(pte) |= _PAGE_WRITE;
if (pte_val(pte) & _PAGE_MODIFIED)
diff --git a/arch/hexagon/include/asm/pgtable.h b/arch/hexagon/include/asm/pgtable.h
index dd05dd7..8c5b7a1 100644
--- a/arch/hexagon/include/asm/pgtable.h
+++ b/arch/hexagon/include/asm/pgtable.h
@@ -300,7 +300,7 @@ static inline pte_t pte_wrprotect(pte_t pte)
}
/* pte_mkwrite - mark page as writable */
-static inline pte_t pte_mkwrite(pte_t pte)
+static inline pte_t pte_mkwrite_novma(pte_t pte)
{
pte_val(pte) |= _PAGE_WRITE;
return pte;
diff --git a/arch/ia64/include/asm/pgtable.h b/arch/ia64/include/asm/pgtable.h
index 4e5dd80..9be2d2b 100644
--- a/arch/ia64/include/asm/pgtable.h
+++ b/arch/ia64/include/asm/pgtable.h
@@ -269,7 +269,7 @@ ia64_phys_addr_valid (unsigned long addr)
* access rights:
*/
#define pte_wrprotect(pte) (__pte(pte_val(pte) & ~_PAGE_AR_RW))
-#define pte_mkwrite(pte) (__pte(pte_val(pte) | _PAGE_AR_RW))
+#define pte_mkwrite_novma(pte) (__pte(pte_val(pte) | _PAGE_AR_RW))
#define pte_mkold(pte) (__pte(pte_val(pte) & ~_PAGE_A))
#define pte_mkyoung(pte) (__pte(pte_val(pte) | _PAGE_A))
#define pte_mkclean(pte) (__pte(pte_val(pte) & ~_PAGE_D))
diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h
index e7cf25e..06963a1 100644
--- a/arch/loongarch/include/asm/pgtable.h
+++ b/arch/loongarch/include/asm/pgtable.h
@@ -384,7 +384,7 @@ static inline pte_t pte_mkdirty(pte_t pte)
return pte;
}
-static inline pte_t pte_mkwrite(pte_t pte)
+static inline pte_t pte_mkwrite_novma(pte_t pte)
{
pte_val(pte) |= _PAGE_WRITE;
if (pte_val(pte) & _PAGE_MODIFIED)
@@ -493,7 +493,7 @@ static inline int pmd_write(pmd_t pmd)
return !!(pmd_val(pmd) & _PAGE_WRITE);
}
-static inline pmd_t pmd_mkwrite(pmd_t pmd)
+static inline pmd_t pmd_mkwrite_novma(pmd_t pmd)
{
pmd_val(pmd) |= _PAGE_WRITE;
if (pmd_val(pmd) & _PAGE_MODIFIED)
diff --git a/arch/m68k/include/asm/mcf_pgtable.h b/arch/m68k/include/asm/mcf_pgtable.h
index 772b7e7..48f87a8 100644
--- a/arch/m68k/include/asm/mcf_pgtable.h
+++ b/arch/m68k/include/asm/mcf_pgtable.h
@@ -210,7 +210,7 @@ static inline pte_t pte_mkold(pte_t pte)
return pte;
}
-static inline pte_t pte_mkwrite(pte_t pte)
+static inline pte_t pte_mkwrite_novma(pte_t pte)
{
pte_val(pte) |= CF_PAGE_WRITABLE;
return pte;
diff --git a/arch/m68k/include/asm/motorola_pgtable.h b/arch/m68k/include/asm/motorola_pgtable.h
index 38d5e5e..9866c7a 100644
--- a/arch/m68k/include/asm/motorola_pgtable.h
+++ b/arch/m68k/include/asm/motorola_pgtable.h
@@ -156,7 +156,7 @@ static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED;
static inline pte_t pte_wrprotect(pte_t pte) { pte_val(pte) |= _PAGE_RONLY; return pte; }
static inline pte_t pte_mkclean(pte_t pte) { pte_val(pte) &= ~_PAGE_DIRTY; return pte; }
static inline pte_t pte_mkold(pte_t pte) { pte_val(pte) &= ~_PAGE_ACCESSED; return pte; }
-static inline pte_t pte_mkwrite(pte_t pte) { pte_val(pte) &= ~_PAGE_RONLY; return pte; }
+static inline pte_t pte_mkwrite_novma(pte_t pte){ pte_val(pte) &= ~_PAGE_RONLY; return pte; }
static inline pte_t pte_mkdirty(pte_t pte) { pte_val(pte) |= _PAGE_DIRTY; return pte; }
static inline pte_t pte_mkyoung(pte_t pte) { pte_val(pte) |= _PAGE_ACCESSED; return pte; }
static inline pte_t pte_mknocache(pte_t pte)
diff --git a/arch/m68k/include/asm/sun3_pgtable.h b/arch/m68k/include/asm/sun3_pgtable.h
index 0cc39a8..30081ae 100644
--- a/arch/m68k/include/asm/sun3_pgtable.h
+++ b/arch/m68k/include/asm/sun3_pgtable.h
@@ -144,7 +144,7 @@ static inline int pte_young(pte_t pte) { return pte_val(pte) & SUN3_PAGE_ACCESS
static inline pte_t pte_wrprotect(pte_t pte) { pte_val(pte) &= ~SUN3_PAGE_WRITEABLE; return pte; }
static inline pte_t pte_mkclean(pte_t pte) { pte_val(pte) &= ~SUN3_PAGE_MODIFIED; return pte; }
static inline pte_t pte_mkold(pte_t pte) { pte_val(pte) &= ~SUN3_PAGE_ACCESSED; return pte; }
-static inline pte_t pte_mkwrite(pte_t pte) { pte_val(pte) |= SUN3_PAGE_WRITEABLE; return pte; }
+static inline pte_t pte_mkwrite_novma(pte_t pte){ pte_val(pte) |= SUN3_PAGE_WRITEABLE; return pte; }
static inline pte_t pte_mkdirty(pte_t pte) { pte_val(pte) |= SUN3_PAGE_MODIFIED; return pte; }
static inline pte_t pte_mkyoung(pte_t pte) { pte_val(pte) |= SUN3_PAGE_ACCESSED; return pte; }
static inline pte_t pte_mknocache(pte_t pte) { pte_val(pte) |= SUN3_PAGE_NOCACHE; return pte; }
diff --git a/arch/microblaze/include/asm/pgtable.h b/arch/microblaze/include/asm/pgtable.h
index 6f9b990..3042eb98 100644
--- a/arch/microblaze/include/asm/pgtable.h
+++ b/arch/microblaze/include/asm/pgtable.h
@@ -266,7 +266,7 @@ static inline pte_t pte_mkread(pte_t pte) \
{ pte_val(pte) |= _PAGE_USER; return pte; }
static inline pte_t pte_mkexec(pte_t pte) \
{ pte_val(pte) |= _PAGE_USER | _PAGE_EXEC; return pte; }
-static inline pte_t pte_mkwrite(pte_t pte) \
+static inline pte_t pte_mkwrite_novma(pte_t pte) \
{ pte_val(pte) |= _PAGE_RW; return pte; }
static inline pte_t pte_mkdirty(pte_t pte) \
{ pte_val(pte) |= _PAGE_DIRTY; return pte; }
diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h
index cbb93a8..430b208 100644
--- a/arch/mips/include/asm/pgtable.h
+++ b/arch/mips/include/asm/pgtable.h
@@ -319,7 +319,7 @@ static inline pte_t pte_mkold(pte_t pte)
return pte;
}
-static inline pte_t pte_mkwrite(pte_t pte)
+static inline pte_t pte_mkwrite_novma(pte_t pte)
{
pte.pte_low |= _PAGE_WRITE;
if (pte.pte_low & _PAGE_MODIFIED) {
@@ -374,7 +374,7 @@ static inline pte_t pte_mkold(pte_t pte)
return pte;
}
-static inline pte_t pte_mkwrite(pte_t pte)
+static inline pte_t pte_mkwrite_novma(pte_t pte)
{
pte_val(pte) |= _PAGE_WRITE;
if (pte_val(pte) & _PAGE_MODIFIED)
@@ -646,7 +646,7 @@ static inline pmd_t pmd_wrprotect(pmd_t pmd)
return pmd;
}
-static inline pmd_t pmd_mkwrite(pmd_t pmd)
+static inline pmd_t pmd_mkwrite_novma(pmd_t pmd)
{
pmd_val(pmd) |= _PAGE_WRITE;
if (pmd_val(pmd) & _PAGE_MODIFIED)
diff --git a/arch/nios2/include/asm/pgtable.h b/arch/nios2/include/asm/pgtable.h
index be6bf3e0..5144506 100644
--- a/arch/nios2/include/asm/pgtable.h
+++ b/arch/nios2/include/asm/pgtable.h
@@ -129,7 +129,7 @@ static inline pte_t pte_mkold(pte_t pte)
return pte;
}
-static inline pte_t pte_mkwrite(pte_t pte)
+static inline pte_t pte_mkwrite_novma(pte_t pte)
{
pte_val(pte) |= _PAGE_WRITE;
return pte;
diff --git a/arch/openrisc/include/asm/pgtable.h b/arch/openrisc/include/asm/pgtable.h
index 7bdf1bb..60c6ce7 100644
--- a/arch/openrisc/include/asm/pgtable.h
+++ b/arch/openrisc/include/asm/pgtable.h
@@ -250,7 +250,7 @@ static inline pte_t pte_mkold(pte_t pte)
return pte;
}
-static inline pte_t pte_mkwrite(pte_t pte)
+static inline pte_t pte_mkwrite_novma(pte_t pte)
{
pte_val(pte) |= _PAGE_WRITE;
return pte;
diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h
index ce38bb3..974acca 100644
--- a/arch/parisc/include/asm/pgtable.h
+++ b/arch/parisc/include/asm/pgtable.h
@@ -322,7 +322,7 @@ static inline pte_t pte_mkold(pte_t pte) { pte_val(pte) &= ~_PAGE_ACCESSED; retu
static inline pte_t pte_wrprotect(pte_t pte) { pte_val(pte) &= ~_PAGE_WRITE; return pte; }
static inline pte_t pte_mkdirty(pte_t pte) { pte_val(pte) |= _PAGE_DIRTY; return pte; }
static inline pte_t pte_mkyoung(pte_t pte) { pte_val(pte) |= _PAGE_ACCESSED; return pte; }
-static inline pte_t pte_mkwrite(pte_t pte) { pte_val(pte) |= _PAGE_WRITE; return pte; }
+static inline pte_t pte_mkwrite_novma(pte_t pte) { pte_val(pte) |= _PAGE_WRITE; return pte; }
static inline pte_t pte_mkspecial(pte_t pte) { pte_val(pte) |= _PAGE_SPECIAL; return pte; }
/*
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 21edd66..54b9387 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -188,6 +188,7 @@
select DYNAMIC_FTRACE if FUNCTION_TRACER
select EDAC_ATOMIC_SCRUB
select EDAC_SUPPORT
+ select FTRACE_MCOUNT_USE_PATCHABLE_FUNCTION_ENTRY if ARCH_USING_PATCHABLE_FUNCTION_ENTRY
select GENERIC_ATOMIC64 if PPC32
select GENERIC_CLOCKEVENTS_BROADCAST if SMP
select GENERIC_CMOS_UPDATE
@@ -195,6 +196,7 @@
select GENERIC_CPU_VULNERABILITIES if PPC_BARRIER_NOSPEC
select GENERIC_EARLY_IOREMAP
select GENERIC_GETTIMEOFDAY
+ select GENERIC_IDLE_POLL_SETUP
select GENERIC_IOREMAP
select GENERIC_IRQ_SHOW
select GENERIC_IRQ_SHOW_LEVEL
@@ -229,8 +231,8 @@
select HAVE_DEBUG_KMEMLEAK
select HAVE_DEBUG_STACKOVERFLOW
select HAVE_DYNAMIC_FTRACE
- select HAVE_DYNAMIC_FTRACE_WITH_ARGS if MPROFILE_KERNEL || PPC32
- select HAVE_DYNAMIC_FTRACE_WITH_REGS if MPROFILE_KERNEL || PPC32
+ select HAVE_DYNAMIC_FTRACE_WITH_ARGS if ARCH_USING_PATCHABLE_FUNCTION_ENTRY || MPROFILE_KERNEL || PPC32
+ select HAVE_DYNAMIC_FTRACE_WITH_REGS if ARCH_USING_PATCHABLE_FUNCTION_ENTRY || MPROFILE_KERNEL || PPC32
select HAVE_EBPF_JIT
select HAVE_EFFICIENT_UNALIGNED_ACCESS
select HAVE_FAST_GUP
@@ -258,7 +260,7 @@
select HAVE_MOD_ARCH_SPECIFIC
select HAVE_NMI if PERF_EVENTS || (PPC64 && PPC_BOOK3S)
select HAVE_OPTPROBES
- select HAVE_OBJTOOL if PPC32 || MPROFILE_KERNEL
+ select HAVE_OBJTOOL if ARCH_USING_PATCHABLE_FUNCTION_ENTRY || MPROFILE_KERNEL || PPC32
select HAVE_OBJTOOL_MCOUNT if HAVE_OBJTOOL
select HAVE_PERF_EVENTS
select HAVE_PERF_EVENTS_NMI if PPC64
@@ -275,6 +277,8 @@
select HAVE_SYSCALL_TRACEPOINTS
select HAVE_VIRT_CPU_ACCOUNTING
select HAVE_VIRT_CPU_ACCOUNTING_GEN
+ select HOTPLUG_SMT if HOTPLUG_CPU
+ select SMT_NUM_THREADS_DYNAMIC
select HUGETLB_PAGE_SIZE_VARIABLE if PPC_BOOK3S_64 && HUGETLB_PAGE
select IOMMU_HELPER if PPC64
select IRQ_DOMAIN
@@ -554,6 +558,13 @@
def_bool $(success,$(srctree)/arch/powerpc/tools/gcc-check-mprofile-kernel.sh $(CC) -mlittle-endian) if CPU_LITTLE_ENDIAN
def_bool $(success,$(srctree)/arch/powerpc/tools/gcc-check-mprofile-kernel.sh $(CC) -mbig-endian) if CPU_BIG_ENDIAN
+config ARCH_USING_PATCHABLE_FUNCTION_ENTRY
+ depends on FUNCTION_TRACER && (PPC32 || PPC64_ELF_ABI_V2)
+ depends on $(cc-option,-fpatchable-function-entry=2)
+ def_bool y if PPC32
+ def_bool $(success,$(srctree)/arch/powerpc/tools/gcc-check-fpatchable-function-entry.sh $(CC) -mlittle-endian) if PPC64 && CPU_LITTLE_ENDIAN
+ def_bool $(success,$(srctree)/arch/powerpc/tools/gcc-check-fpatchable-function-entry.sh $(CC) -mbig-endian) if PPC64 && CPU_BIG_ENDIAN
+
config HOTPLUG_CPU
bool "Support for enabling/disabling CPUs"
depends on SMP && (PPC_PSERIES || \
@@ -1126,12 +1137,6 @@
help
Freescale General-purpose Timers support
-config PCI_8260
- bool
- depends on PCI && 8260
- select PPC_INDIRECT_PCI
- default y
-
config FSL_RIO
bool "Freescale Embedded SRIO Controller support"
depends on RAPIDIO = y && HAVE_RAPIDIO
diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index dac7ca1..f19dbaa 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -143,18 +143,21 @@
CFLAGS-$(CONFIG_PPC32) += $(call cc-option,-mno-readonly-in-sdata)
ifdef CONFIG_FUNCTION_TRACER
+ifdef CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY
+KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY
+CC_FLAGS_FTRACE := -fpatchable-function-entry=2
+else
CC_FLAGS_FTRACE := -pg
ifdef CONFIG_MPROFILE_KERNEL
CC_FLAGS_FTRACE += -mprofile-kernel
endif
endif
+endif
CFLAGS-$(CONFIG_TARGET_CPU_BOOL) += -mcpu=$(CONFIG_TARGET_CPU)
AFLAGS-$(CONFIG_TARGET_CPU_BOOL) += -mcpu=$(CONFIG_TARGET_CPU)
-CFLAGS-$(CONFIG_POWERPC64_CPU) += $(call cc-option,-mtune=power10, \
- $(call cc-option,-mtune=power9, \
- $(call cc-option,-mtune=power8)))
+CFLAGS-y += $(CONFIG_TUNE_CPU)
asinstr := $(call as-instr,lis 9$(comma)foo@high,-DHAVE_AS_ATHIGH=1)
diff --git a/arch/powerpc/boot/dts/fsl/c293si-post.dtsi b/arch/powerpc/boot/dts/fsl/c293si-post.dtsi
index bec0fc3..f208fb8 100644
--- a/arch/powerpc/boot/dts/fsl/c293si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/c293si-post.dtsi
@@ -124,10 +124,10 @@ crypto@80000 {
reg = <0x80000 0x20000>;
ranges = <0x0 0x80000 0x20000>;
- jr@1000{
+ jr@1000 {
interrupts = <45 2 0 0>;
};
- jr@2000{
+ jr@2000 {
interrupts = <57 2 0 0>;
};
};
@@ -140,10 +140,10 @@ crypto@a0000 {
reg = <0xa0000 0x20000>;
ranges = <0x0 0xa0000 0x20000>;
- jr@1000{
+ jr@1000 {
interrupts = <49 2 0 0>;
};
- jr@2000{
+ jr@2000 {
interrupts = <50 2 0 0>;
};
};
@@ -156,10 +156,10 @@ crypto@c0000 {
reg = <0xc0000 0x20000>;
ranges = <0x0 0xc0000 0x20000>;
- jr@1000{
+ jr@1000 {
interrupts = <55 2 0 0>;
};
- jr@2000{
+ jr@2000 {
interrupts = <56 2 0 0>;
};
};
diff --git a/arch/powerpc/boot/dts/fsl/p1022rdk.dts b/arch/powerpc/boot/dts/fsl/p1022rdk.dts
index 29e8af1..4261c2f 100644
--- a/arch/powerpc/boot/dts/fsl/p1022rdk.dts
+++ b/arch/powerpc/boot/dts/fsl/p1022rdk.dts
@@ -60,23 +60,23 @@ rtc@68 {
compatible = "st,m41t62";
reg = <0x68>;
};
- adt7461@4c{
+ adt7461@4c {
compatible = "adi,adt7461";
reg = <0x4c>;
};
- zl6100@21{
+ zl6100@21 {
compatible = "isil,zl6100";
reg = <0x21>;
};
- zl6100@24{
+ zl6100@24 {
compatible = "isil,zl6100";
reg = <0x24>;
};
- zl6100@26{
+ zl6100@26 {
compatible = "isil,zl6100";
reg = <0x26>;
};
- zl6100@29{
+ zl6100@29 {
compatible = "isil,zl6100";
reg = <0x29>;
};
diff --git a/arch/powerpc/boot/dts/fsl/p1022si-post.dtsi b/arch/powerpc/boot/dts/fsl/p1022si-post.dtsi
index 5f51b7b..093e4e3 100644
--- a/arch/powerpc/boot/dts/fsl/p1022si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/p1022si-post.dtsi
@@ -238,7 +238,7 @@ global-utilities@e0000 {
fsl,has-rstcr;
};
- power@e0070{
+ power@e0070 {
compatible = "fsl,mpc8536-pmc", "fsl,mpc8548-pmc";
reg = <0xe0070 0x20>;
};
diff --git a/arch/powerpc/boot/dts/fsl/p3041ds.dts b/arch/powerpc/boot/dts/fsl/p3041ds.dts
index 6f5f728..ca0e027 100644
--- a/arch/powerpc/boot/dts/fsl/p3041ds.dts
+++ b/arch/powerpc/boot/dts/fsl/p3041ds.dts
@@ -41,7 +41,7 @@ / {
#size-cells = <2>;
interrupt-parent = <&mpic>;
- aliases{
+ aliases {
phy_rgmii_0 = &phy_rgmii_0;
phy_rgmii_1 = &phy_rgmii_1;
phy_sgmii_1c = &phy_sgmii_1c;
@@ -165,7 +165,7 @@ adt7461@4c {
};
};
- fman@400000{
+ fman@400000 {
ethernet@e0000 {
phy-handle = <&phy_sgmii_1c>;
phy-connection-type = "sgmii";
diff --git a/arch/powerpc/boot/dts/fsl/p5040ds.dts b/arch/powerpc/boot/dts/fsl/p5040ds.dts
index 30850b3..5cfc689e 100644
--- a/arch/powerpc/boot/dts/fsl/p5040ds.dts
+++ b/arch/powerpc/boot/dts/fsl/p5040ds.dts
@@ -41,7 +41,7 @@ / {
#size-cells = <2>;
interrupt-parent = <&mpic>;
- aliases{
+ aliases {
phy_sgmii_slot2_1c = &phy_sgmii_slot2_1c;
phy_sgmii_slot2_1d = &phy_sgmii_slot2_1d;
phy_sgmii_slot2_1e = &phy_sgmii_slot2_1e;
diff --git a/arch/powerpc/boot/dts/fsl/t4240qds.dts b/arch/powerpc/boot/dts/fsl/t4240qds.dts
index c0913ac..128b5798 100644
--- a/arch/powerpc/boot/dts/fsl/t4240qds.dts
+++ b/arch/powerpc/boot/dts/fsl/t4240qds.dts
@@ -41,7 +41,7 @@ / {
#size-cells = <2>;
interrupt-parent = <&mpic>;
- aliases{
+ aliases {
phy_rgmii1 = &phyrgmii1;
phy_rgmii2 = &phyrgmii2;
phy_sgmii3 = &phy3;
diff --git a/arch/powerpc/boot/dts/mpc5121.dtsi b/arch/powerpc/boot/dts/mpc5121.dtsi
index 3f66b91..d3fc806 100644
--- a/arch/powerpc/boot/dts/mpc5121.dtsi
+++ b/arch/powerpc/boot/dts/mpc5121.dtsi
@@ -140,7 +140,7 @@ clks: clock@f00 {
};
/* Power Management Controller */
- pmc@1000{
+ pmc@1000 {
compatible = "fsl,mpc5121-pmc";
reg = <0x1000 0x100>;
interrupts = <83 0x8>;
diff --git a/arch/powerpc/boot/dts/mpc5125twr.dts b/arch/powerpc/boot/dts/mpc5125twr.dts
index 0bd2acc..ee09070 100644
--- a/arch/powerpc/boot/dts/mpc5125twr.dts
+++ b/arch/powerpc/boot/dts/mpc5125twr.dts
@@ -104,7 +104,7 @@ clks: clock@f00 { // Clock control
clock-names = "osc";
};
- pmc@1000{ // Power Management Controller
+ pmc@1000 { // Power Management Controller
compatible = "fsl,mpc5121-pmc";
reg = <0x1000 0x100>;
interrupts = <83 0x2>;
diff --git a/arch/powerpc/configs/pmac32_defconfig b/arch/powerpc/configs/pmac32_defconfig
index 05ed585..a205da9 100644
--- a/arch/powerpc/configs/pmac32_defconfig
+++ b/arch/powerpc/configs/pmac32_defconfig
@@ -176,8 +176,9 @@
# CONFIG_SERIO_I8042 is not set
# CONFIG_SERIO_SERPORT is not set
CONFIG_SERIAL_8250=m
-CONFIG_SERIAL_PMACZILOG=m
+CONFIG_SERIAL_PMACZILOG=y
CONFIG_SERIAL_PMACZILOG_TTYS=y
+CONFIG_SERIAL_PMACZILOG_CONSOLE=y
CONFIG_NVRAM=y
CONFIG_I2C_CHARDEV=m
CONFIG_APM_POWER=y
diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig
index c0f4bbc..6e7b9e8 100644
--- a/arch/powerpc/configs/ppc64_defconfig
+++ b/arch/powerpc/configs/ppc64_defconfig
@@ -390,8 +390,11 @@
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_LZO=m
CONFIG_CRYPTO_CRC32C_VPMSUM=m
+CONFIG_CRYPTO_CRCT10DIF_VPMSUM=m
+CONFIG_CRYPTO_VPMSUM_TESTER=m
CONFIG_CRYPTO_MD5_PPC=m
CONFIG_CRYPTO_SHA1_PPC=m
+CONFIG_CRYPTO_AES_GCM_P10=m
CONFIG_CRYPTO_DEV_NX=y
CONFIG_CRYPTO_DEV_NX_ENCRYPT=m
CONFIG_CRYPTO_DEV_VMX=y
diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig
index 1034aea..eaf3273 100644
--- a/arch/powerpc/configs/ppc6xx_defconfig
+++ b/arch/powerpc/configs/ppc6xx_defconfig
@@ -183,7 +183,6 @@
CONFIG_IP_NF_FILTER=m
CONFIG_IP_NF_TARGET_REJECT=m
CONFIG_IP_NF_MANGLE=m
-CONFIG_IP_NF_TARGET_CLUSTERIP=m
CONFIG_IP_NF_TARGET_ECN=m
CONFIG_IP_NF_TARGET_TTL=m
CONFIG_IP_NF_RAW=m
diff --git a/arch/powerpc/configs/skiroot_defconfig b/arch/powerpc/configs/skiroot_defconfig
index 71cfb990a..8d3eacb 100644
--- a/arch/powerpc/configs/skiroot_defconfig
+++ b/arch/powerpc/configs/skiroot_defconfig
@@ -289,7 +289,6 @@
# CONFIG_XZ_DEC_SPARC is not set
CONFIG_PRINTK_TIME=y
CONFIG_MAGIC_SYSRQ=y
-CONFIG_SLUB_DEBUG_ON=y
CONFIG_SCHED_STACK_END_CHECK=y
CONFIG_DEBUG_STACKOVERFLOW=y
CONFIG_PANIC_ON_OOPS=y
diff --git a/arch/powerpc/crypto/Kconfig b/arch/powerpc/crypto/Kconfig
index f25024a..803da4a 100644
--- a/arch/powerpc/crypto/Kconfig
+++ b/arch/powerpc/crypto/Kconfig
@@ -100,7 +100,7 @@
select CRYPTO_LIB_AES
select CRYPTO_ALGAPI
select CRYPTO_AEAD
- default m
+ select CRYPTO_SKCIPHER
help
AEAD cipher: AES cipher algorithms (FIPS-197)
GCM (Galois/Counter Mode) authenticated encryption mode (NIST SP800-38D)
diff --git a/arch/powerpc/include/asm/8xx_immap.h b/arch/powerpc/include/asm/8xx_immap.h
index bdf0563..f9cac46 100644
--- a/arch/powerpc/include/asm/8xx_immap.h
+++ b/arch/powerpc/include/asm/8xx_immap.h
@@ -560,5 +560,7 @@ typedef struct immap {
cpm8xx_t im_cpm; /* Communication processor */
} immap_t;
+extern immap_t __iomem *mpc8xx_immr;
+
#endif /* __IMMAP_8XX__ */
#endif /* __KERNEL__ */
diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild
index 419319c..61a8d55 100644
--- a/arch/powerpc/include/asm/Kbuild
+++ b/arch/powerpc/include/asm/Kbuild
@@ -3,7 +3,6 @@
generated-y += syscall_table_64.h
generated-y += syscall_table_spu.h
generic-y += agp.h
-generic-y += export.h
generic-y += kvm_types.h
generic-y += mcs_spinlock.h
generic-y += qrwlock.h
diff --git a/arch/powerpc/include/asm/book3s/32/kup.h b/arch/powerpc/include/asm/book3s/32/kup.h
index 678f9c9..4e14a54 100644
--- a/arch/powerpc/include/asm/book3s/32/kup.h
+++ b/arch/powerpc/include/asm/book3s/32/kup.h
@@ -9,79 +9,53 @@
#ifndef __ASSEMBLY__
-#include <linux/jump_label.h>
-
-extern struct static_key_false disable_kuap_key;
-
-static __always_inline bool kuep_is_disabled(void)
-{
- return !IS_ENABLED(CONFIG_PPC_KUEP);
-}
-
#ifdef CONFIG_PPC_KUAP
#include <linux/sched.h>
#define KUAP_NONE (~0UL)
-#define KUAP_ALL (~1UL)
-static __always_inline bool kuap_is_disabled(void)
-{
- return static_branch_unlikely(&disable_kuap_key);
-}
-
-static inline void kuap_lock_one(unsigned long addr)
+static __always_inline void kuap_lock_one(unsigned long addr)
{
mtsr(mfsr(addr) | SR_KS, addr);
isync(); /* Context sync required after mtsr() */
}
-static inline void kuap_unlock_one(unsigned long addr)
+static __always_inline void kuap_unlock_one(unsigned long addr)
{
mtsr(mfsr(addr) & ~SR_KS, addr);
isync(); /* Context sync required after mtsr() */
}
-static inline void kuap_lock_all(void)
+static __always_inline void uaccess_begin_32s(unsigned long addr)
{
- update_user_segments(mfsr(0) | SR_KS);
- isync(); /* Context sync required after mtsr() */
+ unsigned long tmp;
+
+ asm volatile(ASM_MMU_FTR_IFSET(
+ "mfsrin %0, %1;"
+ "rlwinm %0, %0, 0, %2;"
+ "mtsrin %0, %1;"
+ "isync", "", %3)
+ : "=&r"(tmp)
+ : "r"(addr), "i"(~SR_KS), "i"(MMU_FTR_KUAP)
+ : "memory");
}
-static inline void kuap_unlock_all(void)
+static __always_inline void uaccess_end_32s(unsigned long addr)
{
- update_user_segments(mfsr(0) & ~SR_KS);
- isync(); /* Context sync required after mtsr() */
+ unsigned long tmp;
+
+ asm volatile(ASM_MMU_FTR_IFSET(
+ "mfsrin %0, %1;"
+ "oris %0, %0, %2;"
+ "mtsrin %0, %1;"
+ "isync", "", %3)
+ : "=&r"(tmp)
+ : "r"(addr), "i"(SR_KS >> 16), "i"(MMU_FTR_KUAP)
+ : "memory");
}
-void kuap_lock_all_ool(void);
-void kuap_unlock_all_ool(void);
-
-static inline void kuap_lock_addr(unsigned long addr, bool ool)
-{
- if (likely(addr != KUAP_ALL))
- kuap_lock_one(addr);
- else if (!ool)
- kuap_lock_all();
- else
- kuap_lock_all_ool();
-}
-
-static inline void kuap_unlock(unsigned long addr, bool ool)
-{
- if (likely(addr != KUAP_ALL))
- kuap_unlock_one(addr);
- else if (!ool)
- kuap_unlock_all();
- else
- kuap_unlock_all_ool();
-}
-
-static inline void __kuap_lock(void)
-{
-}
-
-static inline void __kuap_save_and_lock(struct pt_regs *regs)
+static __always_inline void __kuap_save_and_lock(struct pt_regs *regs)
{
unsigned long kuap = current->thread.kuap;
@@ -90,18 +64,19 @@ static inline void __kuap_save_and_lock(struct pt_regs *regs)
return;
current->thread.kuap = KUAP_NONE;
- kuap_lock_addr(kuap, false);
+ kuap_lock_one(kuap);
}
+#define __kuap_save_and_lock __kuap_save_and_lock
-static inline void kuap_user_restore(struct pt_regs *regs)
+static __always_inline void kuap_user_restore(struct pt_regs *regs)
{
}
-static inline void __kuap_kernel_restore(struct pt_regs *regs, unsigned long kuap)
+static __always_inline void __kuap_kernel_restore(struct pt_regs *regs, unsigned long kuap)
{
if (unlikely(kuap != KUAP_NONE)) {
current->thread.kuap = KUAP_NONE;
- kuap_lock_addr(kuap, false);
+ kuap_lock_one(kuap);
}
if (likely(regs->kuap == KUAP_NONE))
@@ -109,10 +84,10 @@ static inline void __kuap_kernel_restore(struct pt_regs *regs, unsigned long kua
current->thread.kuap = regs->kuap;
- kuap_unlock(regs->kuap, false);
+ kuap_unlock_one(regs->kuap);
}
-static inline unsigned long __kuap_get_and_assert_locked(void)
+static __always_inline unsigned long __kuap_get_and_assert_locked(void)
{
unsigned long kuap = current->thread.kuap;
@@ -120,9 +95,10 @@ static inline unsigned long __kuap_get_and_assert_locked(void)
return kuap;
}
+#define __kuap_get_and_assert_locked __kuap_get_and_assert_locked
-static __always_inline void __allow_user_access(void __user *to, const void __user *from,
- u32 size, unsigned long dir)
+static __always_inline void allow_user_access(void __user *to, const void __user *from,
+ u32 size, unsigned long dir)
{
BUILD_BUG_ON(!__builtin_constant_p(dir));
@@ -130,10 +106,10 @@ static __always_inline void __allow_user_access(void __user *to, const void __us
return;
current->thread.kuap = (__force u32)to;
- kuap_unlock_one((__force u32)to);
+ uaccess_begin_32s((__force u32)to);
}
-static __always_inline void __prevent_user_access(unsigned long dir)
+static __always_inline void prevent_user_access(unsigned long dir)
{
u32 kuap = current->thread.kuap;
@@ -143,42 +119,51 @@ static __always_inline void __prevent_user_access(unsigned long dir)
return;
current->thread.kuap = KUAP_NONE;
- kuap_lock_addr(kuap, true);
+ uaccess_end_32s(kuap);
}
-static inline unsigned long __prevent_user_access_return(void)
+static __always_inline unsigned long prevent_user_access_return(void)
{
unsigned long flags = current->thread.kuap;
if (flags != KUAP_NONE) {
current->thread.kuap = KUAP_NONE;
- kuap_lock_addr(flags, true);
+ uaccess_end_32s(flags);
}
return flags;
}
-static inline void __restore_user_access(unsigned long flags)
+static __always_inline void restore_user_access(unsigned long flags)
{
if (flags != KUAP_NONE) {
current->thread.kuap = flags;
- kuap_unlock(flags, true);
+ uaccess_begin_32s(flags);
}
}
-static inline bool
+static __always_inline bool
__bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
{
unsigned long kuap = regs->kuap;
- if (!is_write || kuap == KUAP_ALL)
+ if (!is_write)
return false;
if (kuap == KUAP_NONE)
return true;
- /* If faulting address doesn't match unlocked segment, unlock all */
- if ((kuap ^ address) & 0xf0000000)
- regs->kuap = KUAP_ALL;
+ /*
+ * If faulting address doesn't match unlocked segment, change segment.
+ * In case of unaligned store crossing two segments, emulate store.
+ */
+ if ((kuap ^ address) & 0xf0000000) {
+ if (!(kuap & 0x0fffffff) && address > kuap - 4 && fix_alignment(regs)) {
+ regs_add_return_ip(regs, 4);
+ emulate_single_step(regs);
+ } else {
+ regs->kuap = address;
+ }
+ }
return false;
}
diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h
index 5f12b93..9b13eb1 100644
--- a/arch/powerpc/include/asm/book3s/32/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/32/pgtable.h
@@ -493,7 +493,7 @@ static inline pte_t pte_mkpte(pte_t pte)
return pte;
}
-static inline pte_t pte_mkwrite(pte_t pte)
+static inline pte_t pte_mkwrite_novma(pte_t pte)
{
return __pte(pte_val(pte) | _PAGE_RW);
}
@@ -536,58 +536,43 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
/* This low level function performs the actual PTE insertion
- * Setting the PTE depends on the MMU type and other factors. It's
- * an horrible mess that I'm not going to try to clean up now but
- * I'm keeping it in one place rather than spread around
+ * Setting the PTE depends on the MMU type and other factors.
+ *
+ * First case is 32-bit in UP mode with 32-bit PTEs, we need to preserve
+ * the _PAGE_HASHPTE bit since we may not have invalidated the previous
+ * translation in the hash yet (done in a subsequent flush_tlb_xxx())
+ * and see we need to keep track that this PTE needs invalidating.
+ *
+ * Second case is 32-bit with 64-bit PTE. In this case, we
+ * can just store as long as we do the two halves in the right order
+ * with a barrier in between. This is possible because we take care,
+ * in the hash code, to pre-invalidate if the PTE was already hashed,
+ * which synchronizes us with any concurrent invalidation.
+ * In the percpu case, we fallback to the simple update preserving
+ * the hash bits (ie, same as the non-SMP case).
+ *
+ * Third case is 32-bit in SMP mode with 32-bit PTEs. We use the
+ * helper pte_update() which does an atomic update. We need to do that
+ * because a concurrent invalidation can clear _PAGE_HASHPTE. If it's a
+ * per-CPU PTE such as a kmap_atomic, we also do a simple update preserving
+ * the hash bits instead.
*/
static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte, int percpu)
{
-#if defined(CONFIG_SMP) && !defined(CONFIG_PTE_64BIT)
- /* First case is 32-bit Hash MMU in SMP mode with 32-bit PTEs. We use the
- * helper pte_update() which does an atomic update. We need to do that
- * because a concurrent invalidation can clear _PAGE_HASHPTE. If it's a
- * per-CPU PTE such as a kmap_atomic, we do a simple update preserving
- * the hash bits instead (ie, same as the non-SMP case)
- */
- if (percpu)
- *ptep = __pte((pte_val(*ptep) & _PAGE_HASHPTE)
- | (pte_val(pte) & ~_PAGE_HASHPTE));
- else
+ if ((!IS_ENABLED(CONFIG_SMP) && !IS_ENABLED(CONFIG_PTE_64BIT)) || percpu) {
+ *ptep = __pte((pte_val(*ptep) & _PAGE_HASHPTE) |
+ (pte_val(pte) & ~_PAGE_HASHPTE));
+ } else if (IS_ENABLED(CONFIG_PTE_64BIT)) {
+ if (pte_val(*ptep) & _PAGE_HASHPTE)
+ flush_hash_entry(mm, ptep, addr);
+
+ asm volatile("stw%X0 %2,%0; eieio; stw%X1 %L2,%1" :
+ "=m" (*ptep), "=m" (*((unsigned char *)ptep+4)) :
+ "r" (pte) : "memory");
+ } else {
pte_update(mm, addr, ptep, ~_PAGE_HASHPTE, pte_val(pte), 0);
-
-#elif defined(CONFIG_PTE_64BIT)
- /* Second case is 32-bit with 64-bit PTE. In this case, we
- * can just store as long as we do the two halves in the right order
- * with a barrier in between. This is possible because we take care,
- * in the hash code, to pre-invalidate if the PTE was already hashed,
- * which synchronizes us with any concurrent invalidation.
- * In the percpu case, we also fallback to the simple update preserving
- * the hash bits
- */
- if (percpu) {
- *ptep = __pte((pte_val(*ptep) & _PAGE_HASHPTE)
- | (pte_val(pte) & ~_PAGE_HASHPTE));
- return;
}
- if (pte_val(*ptep) & _PAGE_HASHPTE)
- flush_hash_entry(mm, ptep, addr);
- __asm__ __volatile__("\
- stw%X0 %2,%0\n\
- eieio\n\
- stw%X1 %L2,%1"
- : "=m" (*ptep), "=m" (*((unsigned char *)ptep+4))
- : "r" (pte) : "memory");
-
-#else
- /* Third case is 32-bit hash table in UP mode, we need to preserve
- * the _PAGE_HASHPTE bit since we may not have invalidated the previous
- * translation in the hash yet (done in a subsequent flush_tlb_xxx())
- * and see we need to keep track that this PTE needs invalidating
- */
- *ptep = __pte((pte_val(*ptep) & _PAGE_HASHPTE)
- | (pte_val(pte) & ~_PAGE_HASHPTE));
-#endif
}
/*
diff --git a/arch/powerpc/include/asm/book3s/64/hash-pkey.h b/arch/powerpc/include/asm/book3s/64/hash-pkey.h
index f1e60d5..6c5564c 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-pkey.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-pkey.h
@@ -24,7 +24,7 @@ static inline u64 pte_to_hpte_pkey_bits(u64 pteflags, unsigned long flags)
((pteflags & H_PTE_PKEY_BIT1) ? HPTE_R_KEY_BIT1 : 0x0UL) |
((pteflags & H_PTE_PKEY_BIT0) ? HPTE_R_KEY_BIT0 : 0x0UL));
- if (mmu_has_feature(MMU_FTR_BOOK3S_KUAP) ||
+ if (mmu_has_feature(MMU_FTR_KUAP) ||
mmu_has_feature(MMU_FTR_BOOK3S_KUEP)) {
if ((pte_pkey == 0) && (flags & HPTE_USE_KERNEL_KEY))
return HASH_DEFAULT_KERNEL_KEY;
diff --git a/arch/powerpc/include/asm/book3s/64/kup.h b/arch/powerpc/include/asm/book3s/64/kup.h
index 84c09e5..497a7bd 100644
--- a/arch/powerpc/include/asm/book3s/64/kup.h
+++ b/arch/powerpc/include/asm/book3s/64/kup.h
@@ -31,7 +31,7 @@
mfspr \gpr2, SPRN_AMR
cmpd \gpr1, \gpr2
beq 99f
- END_MMU_FTR_SECTION_NESTED_IFCLR(MMU_FTR_BOOK3S_KUAP, 68)
+ END_MMU_FTR_SECTION_NESTED_IFCLR(MMU_FTR_KUAP, 68)
isync
mtspr SPRN_AMR, \gpr1
@@ -78,7 +78,7 @@
* No need to restore IAMR when returning to kernel space.
*/
100:
- END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_BOOK3S_KUAP, 67)
+ END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_KUAP, 67)
#endif
.endm
@@ -91,7 +91,7 @@
LOAD_REG_IMMEDIATE(\gpr2, AMR_KUAP_BLOCKED)
999: tdne \gpr1, \gpr2
EMIT_WARN_ENTRY 999b, __FILE__, __LINE__, (BUGFLAG_WARNING | BUGFLAG_ONCE)
- END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_BOOK3S_KUAP, 67)
+ END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_KUAP, 67)
#endif
.endm
#endif
@@ -130,7 +130,7 @@
*/
BEGIN_MMU_FTR_SECTION_NESTED(68)
b 100f // skip_save_amr
- END_MMU_FTR_SECTION_NESTED_IFCLR(MMU_FTR_PKEY | MMU_FTR_BOOK3S_KUAP, 68)
+ END_MMU_FTR_SECTION_NESTED_IFCLR(MMU_FTR_PKEY | MMU_FTR_KUAP, 68)
/*
* if pkey is disabled and we are entering from userspace
@@ -166,7 +166,7 @@
mtspr SPRN_AMR, \gpr2
isync
102:
- END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_BOOK3S_KUAP, 69)
+ END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_KUAP, 69)
/*
* if entering from kernel we don't need save IAMR
@@ -213,14 +213,14 @@ extern u64 __ro_after_init default_iamr;
* access restrictions. Because of this ignore AMR value when accessing
* userspace via kernel thread.
*/
-static inline u64 current_thread_amr(void)
+static __always_inline u64 current_thread_amr(void)
{
if (current->thread.regs)
return current->thread.regs->amr;
return default_amr;
}
-static inline u64 current_thread_iamr(void)
+static __always_inline u64 current_thread_iamr(void)
{
if (current->thread.regs)
return current->thread.regs->iamr;
@@ -230,12 +230,7 @@ static inline u64 current_thread_iamr(void)
#ifdef CONFIG_PPC_KUAP
-static __always_inline bool kuap_is_disabled(void)
-{
- return !mmu_has_feature(MMU_FTR_BOOK3S_KUAP);
-}
-
-static inline void kuap_user_restore(struct pt_regs *regs)
+static __always_inline void kuap_user_restore(struct pt_regs *regs)
{
bool restore_amr = false, restore_iamr = false;
unsigned long amr, iamr;
@@ -243,7 +238,7 @@ static inline void kuap_user_restore(struct pt_regs *regs)
if (!mmu_has_feature(MMU_FTR_PKEY))
return;
- if (!mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) {
+ if (!mmu_has_feature(MMU_FTR_KUAP)) {
amr = mfspr(SPRN_AMR);
if (amr != regs->amr)
restore_amr = true;
@@ -274,7 +269,7 @@ static inline void kuap_user_restore(struct pt_regs *regs)
*/
}
-static inline void __kuap_kernel_restore(struct pt_regs *regs, unsigned long amr)
+static __always_inline void __kuap_kernel_restore(struct pt_regs *regs, unsigned long amr)
{
if (likely(regs->amr == amr))
return;
@@ -290,7 +285,7 @@ static inline void __kuap_kernel_restore(struct pt_regs *regs, unsigned long amr
*/
}
-static inline unsigned long __kuap_get_and_assert_locked(void)
+static __always_inline unsigned long __kuap_get_and_assert_locked(void)
{
unsigned long amr = mfspr(SPRN_AMR);
@@ -298,22 +293,16 @@ static inline unsigned long __kuap_get_and_assert_locked(void)
WARN_ON_ONCE(amr != AMR_KUAP_BLOCKED);
return amr;
}
+#define __kuap_get_and_assert_locked __kuap_get_and_assert_locked
-/* Do nothing, book3s/64 does that in ASM */
-static inline void __kuap_lock(void)
-{
-}
-
-static inline void __kuap_save_and_lock(struct pt_regs *regs)
-{
-}
+/* __kuap_lock() not required, book3s/64 does that in ASM */
/*
* We support individually allowing read or write, but we don't support nesting
* because that would require an expensive read/modify write of the AMR.
*/
-static inline unsigned long get_kuap(void)
+static __always_inline unsigned long get_kuap(void)
{
/*
* We return AMR_KUAP_BLOCKED when we don't support KUAP because
@@ -323,7 +312,7 @@ static inline unsigned long get_kuap(void)
* This has no effect in terms of actually blocking things on hash,
* so it doesn't break anything.
*/
- if (!mmu_has_feature(MMU_FTR_BOOK3S_KUAP))
+ if (!mmu_has_feature(MMU_FTR_KUAP))
return AMR_KUAP_BLOCKED;
return mfspr(SPRN_AMR);
@@ -331,7 +320,7 @@ static inline unsigned long get_kuap(void)
static __always_inline void set_kuap(unsigned long value)
{
- if (!mmu_has_feature(MMU_FTR_BOOK3S_KUAP))
+ if (!mmu_has_feature(MMU_FTR_KUAP))
return;
/*
@@ -343,7 +332,8 @@ static __always_inline void set_kuap(unsigned long value)
isync();
}
-static inline bool __bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
+static __always_inline bool
+__bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
{
/*
* For radix this will be a storage protection fault (DSISR_PROTFAULT).
@@ -386,12 +376,12 @@ static __always_inline void allow_user_access(void __user *to, const void __user
#else /* CONFIG_PPC_KUAP */
-static inline unsigned long get_kuap(void)
+static __always_inline unsigned long get_kuap(void)
{
return AMR_KUAP_BLOCKED;
}
-static inline void set_kuap(unsigned long value) { }
+static __always_inline void set_kuap(unsigned long value) { }
static __always_inline void allow_user_access(void __user *to, const void __user *from,
unsigned long size, unsigned long dir)
@@ -406,7 +396,7 @@ static __always_inline void prevent_user_access(unsigned long dir)
do_uaccess_flush();
}
-static inline unsigned long prevent_user_access_return(void)
+static __always_inline unsigned long prevent_user_access_return(void)
{
unsigned long flags = get_kuap();
@@ -417,7 +407,7 @@ static inline unsigned long prevent_user_access_return(void)
return flags;
}
-static inline void restore_user_access(unsigned long flags)
+static __always_inline void restore_user_access(unsigned long flags)
{
set_kuap(flags);
if (static_branch_unlikely(&uaccess_flush_key) && flags == AMR_KUAP_BLOCKED)
diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h
index 570a496..fedbc5d 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu.h
@@ -71,10 +71,7 @@ extern unsigned int mmu_pid_bits;
/* Base PID to allocate from */
extern unsigned int mmu_base_pid;
-/*
- * memory block size used with radix translation.
- */
-extern unsigned long __ro_after_init radix_mem_block_size;
+extern unsigned long __ro_after_init memory_block_size;
#define PRTB_SIZE_SHIFT (mmu_pid_bits + 4)
#define PRTB_ENTRIES (1ul << mmu_pid_bits)
@@ -261,7 +258,7 @@ static inline void radix_init_pseries(void) { }
#define arch_clear_mm_cpumask_cpu(cpu, mm) \
do { \
if (cpumask_test_cpu(cpu, mm_cpumask(mm))) { \
- atomic_dec(&(mm)->context.active_cpus); \
+ dec_mm_active_cpus(mm); \
cpumask_clear_cpu(cpu, mm_cpumask(mm)); \
} \
} while (0)
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 8269b23..5c497c8 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -596,7 +596,7 @@ static inline pte_t pte_mkexec(pte_t pte)
return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_EXEC));
}
-static inline pte_t pte_mkwrite(pte_t pte)
+static inline pte_t pte_mkwrite_novma(pte_t pte)
{
/*
* write implies read, hence set both
@@ -931,7 +931,7 @@ static inline pte_t *pudp_ptep(pud_t *pud)
#define pud_mkdirty(pud) pte_pud(pte_mkdirty(pud_pte(pud)))
#define pud_mkclean(pud) pte_pud(pte_mkclean(pud_pte(pud)))
#define pud_mkyoung(pud) pte_pud(pte_mkyoung(pud_pte(pud)))
-#define pud_mkwrite(pud) pte_pud(pte_mkwrite(pud_pte(pud)))
+#define pud_mkwrite(pud) pte_pud(pte_mkwrite_novma(pud_pte(pud)))
#define pud_write(pud) pte_write(pud_pte(pud))
#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
@@ -1088,7 +1088,7 @@ static inline pte_t *pmdp_ptep(pmd_t *pmd)
#define pmd_mkdirty(pmd) pte_pmd(pte_mkdirty(pmd_pte(pmd)))
#define pmd_mkclean(pmd) pte_pmd(pte_mkclean(pmd_pte(pmd)))
#define pmd_mkyoung(pmd) pte_pmd(pte_mkyoung(pmd_pte(pmd)))
-#define pmd_mkwrite(pmd) pte_pmd(pte_mkwrite(pmd_pte(pmd)))
+#define pmd_mkwrite_novma(pmd) pte_pmd(pte_mkwrite_novma(pmd_pte(pmd)))
#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
#define pmd_soft_dirty(pmd) pte_soft_dirty(pmd_pte(pmd))
diff --git a/arch/powerpc/include/asm/bug.h b/arch/powerpc/include/asm/bug.h
index 00c6b0b..1db485a 100644
--- a/arch/powerpc/include/asm/bug.h
+++ b/arch/powerpc/include/asm/bug.h
@@ -120,6 +120,7 @@
struct pt_regs;
void hash__do_page_fault(struct pt_regs *);
void bad_page_fault(struct pt_regs *, int);
+void emulate_single_step(struct pt_regs *regs);
extern void _exception(int, struct pt_regs *, int, unsigned long);
extern void _exception_pkey(struct pt_regs *, unsigned long, int);
extern void die(const char *, struct pt_regs *, long);
diff --git a/arch/powerpc/include/asm/cpm2.h b/arch/powerpc/include/asm/cpm2.h
index 9ee192a..249d43c 100644
--- a/arch/powerpc/include/asm/cpm2.h
+++ b/arch/powerpc/include/asm/cpm2.h
@@ -1080,6 +1080,9 @@ typedef struct im_idma {
#define FCC2_MEM_OFFSET FCC_MEM_OFFSET(1)
#define FCC3_MEM_OFFSET FCC_MEM_OFFSET(2)
+/* Pipeline Maximum Depth */
+#define MPC82XX_BCR_PLDP 0x00800000
+
/* Clocks and GRG's */
enum cpm_clk_dir {
diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h
index 443a9d48..8765d51 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -252,7 +252,7 @@ static inline void cpu_feature_keys_init(void) { }
* This is also required by 52xx family.
*/
#if defined(CONFIG_SMP) || defined(CONFIG_MPC10X_BRIDGE) \
- || defined(CONFIG_PPC_83xx) || defined(CONFIG_8260) \
+ || defined(CONFIG_PPC_83xx) || defined(CONFIG_PPC_82xx) \
|| defined(CONFIG_PPC_MPC52xx)
#define CPU_FTR_COMMON CPU_FTR_NEED_COHERENT
#else
diff --git a/arch/powerpc/include/asm/dtl.h b/arch/powerpc/include/asm/dtl.h
index 4bcb9f9..d6f43d1 100644
--- a/arch/powerpc/include/asm/dtl.h
+++ b/arch/powerpc/include/asm/dtl.h
@@ -39,6 +39,5 @@ extern rwlock_t dtl_access_lock;
extern void register_dtl_buffer(int cpu);
extern void alloc_dtl_buffers(unsigned long *time_limit);
-extern long hcall_vphn(unsigned long cpu, u64 flags, __be32 *associativity);
#endif /* _ASM_POWERPC_DTL_H */
diff --git a/arch/powerpc/include/asm/feature-fixups.h b/arch/powerpc/include/asm/feature-fixups.h
index ac605fc..77824bd 100644
--- a/arch/powerpc/include/asm/feature-fixups.h
+++ b/arch/powerpc/include/asm/feature-fixups.h
@@ -292,6 +292,7 @@ extern long __start___barrier_nospec_fixup, __stop___barrier_nospec_fixup;
extern long __start__btb_flush_fixup, __stop__btb_flush_fixup;
void apply_feature_fixups(void);
+void update_mmu_feature_fixups(unsigned long mask);
void setup_feature_keys(void);
#endif
diff --git a/arch/powerpc/include/asm/fs_pd.h b/arch/powerpc/include/asm/fs_pd.h
index 8def56e..d530f68 100644
--- a/arch/powerpc/include/asm/fs_pd.h
+++ b/arch/powerpc/include/asm/fs_pd.h
@@ -14,28 +14,6 @@
#include <sysdev/fsl_soc.h>
#include <asm/time.h>
-#ifdef CONFIG_CPM2
-#include <asm/cpm2.h>
-
-#if defined(CONFIG_8260)
-#include <asm/mpc8260.h>
-#endif
-
-#define cpm2_map(member) (&cpm2_immr->member)
-#define cpm2_map_size(member, size) (&cpm2_immr->member)
-#define cpm2_unmap(addr) do {} while(0)
-#endif
-
-#ifdef CONFIG_PPC_8xx
-#include <asm/8xx_immap.h>
-
-extern immap_t __iomem *mpc8xx_immr;
-
-#define immr_map(member) (&mpc8xx_immr->member)
-#define immr_map_size(member, size) (&mpc8xx_immr->member)
-#define immr_unmap(addr) do {} while (0)
-#endif
-
static inline int uart_baudrate(void)
{
return get_baudrate();
diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h
index 91c049d..9e5a39b 100644
--- a/arch/powerpc/include/asm/ftrace.h
+++ b/arch/powerpc/include/asm/ftrace.h
@@ -11,8 +11,8 @@
#define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
/* Ignore unused weak functions which will have larger offsets */
-#ifdef CONFIG_MPROFILE_KERNEL
-#define FTRACE_MCOUNT_MAX_OFFSET 12
+#if defined(CONFIG_MPROFILE_KERNEL) || defined(CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY)
+#define FTRACE_MCOUNT_MAX_OFFSET 16
#elif defined(CONFIG_PPC32)
#define FTRACE_MCOUNT_MAX_OFFSET 8
#endif
@@ -22,18 +22,26 @@ extern void _mcount(void);
static inline unsigned long ftrace_call_adjust(unsigned long addr)
{
- /* relocation of mcount call site is the same as the address */
+ if (IS_ENABLED(CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY))
+ addr += MCOUNT_INSN_SIZE;
+
return addr;
}
unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip,
unsigned long sp);
+struct module;
+struct dyn_ftrace;
struct dyn_arch_ftrace {
struct module *mod;
};
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS
+#define ftrace_need_init_nop() (true)
+int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec);
+#define ftrace_init_nop ftrace_init_nop
+
struct ftrace_regs {
struct pt_regs regs;
};
@@ -124,15 +132,19 @@ static inline u8 this_cpu_get_ftrace_enabled(void)
{
return get_paca()->ftrace_enabled;
}
-
-void ftrace_free_init_tramp(void);
#else /* CONFIG_PPC64 */
static inline void this_cpu_disable_ftrace(void) { }
static inline void this_cpu_enable_ftrace(void) { }
static inline void this_cpu_set_ftrace_enabled(u8 ftrace_enabled) { }
static inline u8 this_cpu_get_ftrace_enabled(void) { return 1; }
-static inline void ftrace_free_init_tramp(void) { }
#endif /* CONFIG_PPC64 */
+
+#ifdef CONFIG_FUNCTION_TRACER
+extern unsigned int ftrace_tramp_text[], ftrace_tramp_init[];
+void ftrace_free_init_tramp(void);
+#else
+static inline void ftrace_free_init_tramp(void) { }
+#endif
#endif /* !__ASSEMBLY__ */
#endif /* _ASM_POWERPC_FTRACE */
diff --git a/arch/powerpc/include/asm/hw_breakpoint.h b/arch/powerpc/include/asm/hw_breakpoint.h
index 84d39fd..66db014 100644
--- a/arch/powerpc/include/asm/hw_breakpoint.h
+++ b/arch/powerpc/include/asm/hw_breakpoint.h
@@ -18,6 +18,7 @@ struct arch_hw_breakpoint {
u16 len; /* length of the target data symbol */
u16 hw_len; /* length programmed in hw */
u8 flags;
+ bool perf_single_step; /* temporarily uninstalled for a perf single step */
};
/* Note: Don't change the first 6 bits below as they are in the same order
diff --git a/arch/powerpc/include/asm/ibmebus.h b/arch/powerpc/include/asm/ibmebus.h
index 088f95b..6f33253 100644
--- a/arch/powerpc/include/asm/ibmebus.h
+++ b/arch/powerpc/include/asm/ibmebus.h
@@ -46,6 +46,8 @@
#include <linux/of_device.h>
#include <linux/of_platform.h>
+struct platform_driver;
+
extern struct bus_type ibmebus_bus_type;
int ibmebus_register_driver(struct platform_driver *drv);
diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
index 34e14df..0266959 100644
--- a/arch/powerpc/include/asm/iommu.h
+++ b/arch/powerpc/include/asm/iommu.h
@@ -28,6 +28,9 @@
#define IOMMU_PAGE_MASK(tblptr) (~((1 << (tblptr)->it_page_shift) - 1))
#define IOMMU_PAGE_ALIGN(addr, tblptr) ALIGN(addr, IOMMU_PAGE_SIZE(tblptr))
+#define DIRECT64_PROPNAME "linux,direct64-ddr-window-info"
+#define DMA64_PROPNAME "linux,dma64-ddr-window-info"
+
/* Boot time flags */
extern int iommu_is_off;
extern int iommu_force_on;
diff --git a/arch/powerpc/include/asm/kfence.h b/arch/powerpc/include/asm/kfence.h
index 6fd2b4d..424ceef 100644
--- a/arch/powerpc/include/asm/kfence.h
+++ b/arch/powerpc/include/asm/kfence.h
@@ -23,7 +23,7 @@ static inline bool arch_kfence_init_pool(void)
#ifdef CONFIG_PPC64
static inline bool kfence_protect_page(unsigned long addr, bool protect)
{
- struct page *page = virt_to_page(addr);
+ struct page *page = virt_to_page((void *)addr);
__kernel_map_pages(page, 1, !protect);
diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h
index d751ddd..ad7e8c5a 100644
--- a/arch/powerpc/include/asm/kup.h
+++ b/arch/powerpc/include/asm/kup.h
@@ -6,6 +6,12 @@
#define KUAP_WRITE 2
#define KUAP_READ_WRITE (KUAP_READ | KUAP_WRITE)
+#ifndef __ASSEMBLY__
+#include <linux/types.h>
+
+static __always_inline bool kuap_is_disabled(void);
+#endif
+
#ifdef CONFIG_PPC_BOOK3S_64
#include <asm/book3s/64/kup.h>
#endif
@@ -41,26 +47,24 @@ void setup_kuep(bool disabled);
#ifdef CONFIG_PPC_KUAP
void setup_kuap(bool disabled);
+
+static __always_inline bool kuap_is_disabled(void)
+{
+ return !mmu_has_feature(MMU_FTR_KUAP);
+}
#else
static inline void setup_kuap(bool disabled) { }
static __always_inline bool kuap_is_disabled(void) { return true; }
-static inline bool
+static __always_inline bool
__bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
{
return false;
}
-static inline void __kuap_lock(void) { }
-static inline void __kuap_save_and_lock(struct pt_regs *regs) { }
-static inline void kuap_user_restore(struct pt_regs *regs) { }
-static inline void __kuap_kernel_restore(struct pt_regs *regs, unsigned long amr) { }
-
-static inline unsigned long __kuap_get_and_assert_locked(void)
-{
- return 0;
-}
+static __always_inline void kuap_user_restore(struct pt_regs *regs) { }
+static __always_inline void __kuap_kernel_restore(struct pt_regs *regs, unsigned long amr) { }
/*
* book3s/64/kup-radix.h defines these functions for the !KUAP case to flush
@@ -68,11 +72,11 @@ static inline unsigned long __kuap_get_and_assert_locked(void)
* platforms.
*/
#ifndef CONFIG_PPC_BOOK3S_64
-static inline void __allow_user_access(void __user *to, const void __user *from,
- unsigned long size, unsigned long dir) { }
-static inline void __prevent_user_access(unsigned long dir) { }
-static inline unsigned long __prevent_user_access_return(void) { return 0UL; }
-static inline void __restore_user_access(unsigned long flags) { }
+static __always_inline void allow_user_access(void __user *to, const void __user *from,
+ unsigned long size, unsigned long dir) { }
+static __always_inline void prevent_user_access(unsigned long dir) { }
+static __always_inline unsigned long prevent_user_access_return(void) { return 0UL; }
+static __always_inline void restore_user_access(unsigned long flags) { }
#endif /* CONFIG_PPC_BOOK3S_64 */
#endif /* CONFIG_PPC_KUAP */
@@ -85,29 +89,24 @@ bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
return __bad_kuap_fault(regs, address, is_write);
}
-static __always_inline void kuap_assert_locked(void)
-{
- if (kuap_is_disabled())
- return;
-
- if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG))
- __kuap_get_and_assert_locked();
-}
-
static __always_inline void kuap_lock(void)
{
+#ifdef __kuap_lock
if (kuap_is_disabled())
return;
__kuap_lock();
+#endif
}
static __always_inline void kuap_save_and_lock(struct pt_regs *regs)
{
+#ifdef __kuap_save_and_lock
if (kuap_is_disabled())
return;
__kuap_save_and_lock(regs);
+#endif
}
static __always_inline void kuap_kernel_restore(struct pt_regs *regs, unsigned long amr)
@@ -120,47 +119,19 @@ static __always_inline void kuap_kernel_restore(struct pt_regs *regs, unsigned l
static __always_inline unsigned long kuap_get_and_assert_locked(void)
{
- if (kuap_is_disabled())
- return 0;
-
- return __kuap_get_and_assert_locked();
+#ifdef __kuap_get_and_assert_locked
+ if (!kuap_is_disabled())
+ return __kuap_get_and_assert_locked();
+#endif
+ return 0;
}
-#ifndef CONFIG_PPC_BOOK3S_64
-static __always_inline void allow_user_access(void __user *to, const void __user *from,
- unsigned long size, unsigned long dir)
+static __always_inline void kuap_assert_locked(void)
{
- if (kuap_is_disabled())
- return;
-
- __allow_user_access(to, from, size, dir);
+ if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG))
+ kuap_get_and_assert_locked();
}
-static __always_inline void prevent_user_access(unsigned long dir)
-{
- if (kuap_is_disabled())
- return;
-
- __prevent_user_access(dir);
-}
-
-static __always_inline unsigned long prevent_user_access_return(void)
-{
- if (kuap_is_disabled())
- return 0;
-
- return __prevent_user_access_return();
-}
-
-static __always_inline void restore_user_access(unsigned long flags)
-{
- if (kuap_is_disabled())
- return;
-
- __restore_user_access(flags);
-}
-#endif /* CONFIG_PPC_BOOK3S_64 */
-
static __always_inline void allow_read_from_user(const void __user *from, unsigned long size)
{
barrier_nospec();
diff --git a/arch/powerpc/include/asm/lppaca.h b/arch/powerpc/include/asm/lppaca.h
index 34d44cb..61ec244 100644
--- a/arch/powerpc/include/asm/lppaca.h
+++ b/arch/powerpc/include/asm/lppaca.h
@@ -6,28 +6,6 @@
#ifndef _ASM_POWERPC_LPPACA_H
#define _ASM_POWERPC_LPPACA_H
-/*
- * The below VPHN macros are outside the __KERNEL__ check since these are
- * used for compiling the vphn selftest in userspace
- */
-
-/* The H_HOME_NODE_ASSOCIATIVITY h_call returns 6 64-bit registers. */
-#define VPHN_REGISTER_COUNT 6
-
-/*
- * 6 64-bit registers unpacked into up to 24 be32 associativity values. To
- * form the complete property we have to add the length in the first cell.
- */
-#define VPHN_ASSOC_BUFSIZE (VPHN_REGISTER_COUNT*sizeof(u64)/sizeof(u16) + 1)
-
-/*
- * The H_HOME_NODE_ASSOCIATIVITY hcall takes two values for flags:
- * 1 for retrieving associativity information for a guest cpu
- * 2 for retrieving associativity information for a host/hypervisor cpu
- */
-#define VPHN_FLAG_VCPU 1
-#define VPHN_FLAG_PCPU 2
-
#ifdef __KERNEL__
/*
@@ -45,6 +23,7 @@
#include <asm/types.h>
#include <asm/mmu.h>
#include <asm/firmware.h>
+#include <asm/paca.h>
/*
* The lppaca is the "virtual processor area" registered with the hypervisor,
@@ -127,13 +106,23 @@ struct lppaca {
*/
#define LPPACA_OLD_SHARED_PROC 2
-static inline bool lppaca_shared_proc(struct lppaca *l)
+#ifdef CONFIG_PPC_PSERIES
+/*
+ * All CPUs should have the same shared proc value, so directly access the PACA
+ * to avoid false positives from DEBUG_PREEMPT.
+ */
+static inline bool lppaca_shared_proc(void)
{
+ struct lppaca *l = local_paca->lppaca_ptr;
+
if (!firmware_has_feature(FW_FEATURE_SPLPAR))
return false;
return !!(l->__old_status & LPPACA_OLD_SHARED_PROC);
}
+#define get_lppaca() (get_paca()->lppaca_ptr)
+#endif
+
/*
* SLB shadow buffer structure as defined in the PAPR. The save_area
* contains adjacent ESID and VSID pairs for each shadowed SLB. The
@@ -149,8 +138,6 @@ struct slb_shadow {
} save_area[SLB_NUM_BOLTED];
} ____cacheline_aligned;
-extern long hcall_vphn(unsigned long cpu, u64 flags, __be32 *associativity);
-
#endif /* CONFIG_PPC_BOOK3S */
#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_LPPACA_H */
diff --git a/arch/powerpc/include/asm/macio.h b/arch/powerpc/include/asm/macio.h
index ff5fd82..3a07c62 100644
--- a/arch/powerpc/include/asm/macio.h
+++ b/arch/powerpc/include/asm/macio.h
@@ -3,7 +3,8 @@
#define __MACIO_ASIC_H__
#ifdef __KERNEL__
-#include <linux/of_device.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
extern struct bus_type macio_bus_type;
diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h
index 94b9811..52cc258 100644
--- a/arch/powerpc/include/asm/mmu.h
+++ b/arch/powerpc/include/asm/mmu.h
@@ -33,7 +33,7 @@
* key 0 controlling userspace addresses on radix
* Key 3 on hash
*/
-#define MMU_FTR_BOOK3S_KUAP ASM_CONST(0x00000200)
+#define MMU_FTR_KUAP ASM_CONST(0x00000200)
/*
* Supports KUEP feature
@@ -144,11 +144,6 @@
typedef pte_t *pgtable_t;
-#ifdef CONFIG_PPC_E500
-#include <asm/percpu.h>
-DECLARE_PER_CPU(int, next_tlbcam_idx);
-#endif
-
enum {
MMU_FTRS_POSSIBLE =
#if defined(CONFIG_PPC_BOOK3S_604)
@@ -188,7 +183,7 @@ enum {
#endif /* CONFIG_PPC_RADIX_MMU */
#endif
#ifdef CONFIG_PPC_KUAP
- MMU_FTR_BOOK3S_KUAP |
+ MMU_FTR_KUAP |
#endif /* CONFIG_PPC_KUAP */
#ifdef CONFIG_PPC_MEM_KEYS
MMU_FTR_PKEY |
diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
index 57f5017..37bffa0 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -127,6 +127,7 @@ static inline void inc_mm_active_cpus(struct mm_struct *mm)
static inline void dec_mm_active_cpus(struct mm_struct *mm)
{
+ VM_WARN_ON_ONCE(atomic_read(&mm->context.active_cpus) <= 0);
atomic_dec(&mm->context.active_cpus);
}
diff --git a/arch/powerpc/include/asm/module.h b/arch/powerpc/include/asm/module.h
index ac53606..a8e2e833 100644
--- a/arch/powerpc/include/asm/module.h
+++ b/arch/powerpc/include/asm/module.h
@@ -75,10 +75,6 @@ struct mod_arch_specific {
#endif
#ifdef CONFIG_DYNAMIC_FTRACE
-# ifdef MODULE
- asm(".section .ftrace.tramp,\"ax\",@nobits; .align 3; .previous");
-# endif /* MODULE */
-
int module_trampoline_target(struct module *mod, unsigned long trampoline,
unsigned long *target);
int module_finalize_ftrace(struct module *mod, const Elf_Shdr *sechdrs);
diff --git a/arch/powerpc/include/asm/mpc8260.h b/arch/powerpc/include/asm/mpc8260.h
deleted file mode 100644
index 155114b..0000000
--- a/arch/powerpc/include/asm/mpc8260.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Since there are many different boards and no standard configuration,
- * we have a unique include file for each. Rather than change every
- * file that has to include MPC8260 configuration, they all include
- * this one and the configuration switching is done here.
- */
-#ifdef __KERNEL__
-#ifndef __ASM_POWERPC_MPC8260_H__
-#define __ASM_POWERPC_MPC8260_H__
-
-#define MPC82XX_BCR_PLDP 0x00800000 /* Pipeline Maximum Depth */
-
-#ifdef CONFIG_8260
-
-#ifdef CONFIG_PCI_8260
-#include <platforms/82xx/m82xx_pci.h>
-#endif
-
-#endif /* CONFIG_8260 */
-#endif /* !__ASM_POWERPC_MPC8260_H__ */
-#endif /* __KERNEL__ */
diff --git a/arch/powerpc/include/asm/nohash/32/kup-8xx.h b/arch/powerpc/include/asm/nohash/32/kup-8xx.h
index c44d977..46bc592 100644
--- a/arch/powerpc/include/asm/nohash/32/kup-8xx.h
+++ b/arch/powerpc/include/asm/nohash/32/kup-8xx.h
@@ -9,76 +9,74 @@
#ifndef __ASSEMBLY__
-#include <linux/jump_label.h>
-
#include <asm/reg.h>
-extern struct static_key_false disable_kuap_key;
-
-static __always_inline bool kuap_is_disabled(void)
-{
- return static_branch_unlikely(&disable_kuap_key);
-}
-
-static inline void __kuap_lock(void)
-{
-}
-
-static inline void __kuap_save_and_lock(struct pt_regs *regs)
+static __always_inline void __kuap_save_and_lock(struct pt_regs *regs)
{
regs->kuap = mfspr(SPRN_MD_AP);
mtspr(SPRN_MD_AP, MD_APG_KUAP);
}
+#define __kuap_save_and_lock __kuap_save_and_lock
-static inline void kuap_user_restore(struct pt_regs *regs)
+static __always_inline void kuap_user_restore(struct pt_regs *regs)
{
}
-static inline void __kuap_kernel_restore(struct pt_regs *regs, unsigned long kuap)
+static __always_inline void __kuap_kernel_restore(struct pt_regs *regs, unsigned long kuap)
{
mtspr(SPRN_MD_AP, regs->kuap);
}
-static inline unsigned long __kuap_get_and_assert_locked(void)
+#ifdef CONFIG_PPC_KUAP_DEBUG
+static __always_inline unsigned long __kuap_get_and_assert_locked(void)
{
- unsigned long kuap;
+ WARN_ON_ONCE(mfspr(SPRN_MD_AP) >> 16 != MD_APG_KUAP >> 16);
- kuap = mfspr(SPRN_MD_AP);
+ return 0;
+}
+#define __kuap_get_and_assert_locked __kuap_get_and_assert_locked
+#endif
- if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG))
- WARN_ON_ONCE(kuap >> 16 != MD_APG_KUAP >> 16);
-
- return kuap;
+static __always_inline void uaccess_begin_8xx(unsigned long val)
+{
+ asm(ASM_MMU_FTR_IFSET("mtspr %0, %1", "", %2) : :
+ "i"(SPRN_MD_AP), "r"(val), "i"(MMU_FTR_KUAP) : "memory");
}
-static inline void __allow_user_access(void __user *to, const void __user *from,
- unsigned long size, unsigned long dir)
+static __always_inline void uaccess_end_8xx(void)
{
- mtspr(SPRN_MD_AP, MD_APG_INIT);
+ asm(ASM_MMU_FTR_IFSET("mtspr %0, %1", "", %2) : :
+ "i"(SPRN_MD_AP), "r"(MD_APG_KUAP), "i"(MMU_FTR_KUAP) : "memory");
}
-static inline void __prevent_user_access(unsigned long dir)
+static __always_inline void allow_user_access(void __user *to, const void __user *from,
+ unsigned long size, unsigned long dir)
{
- mtspr(SPRN_MD_AP, MD_APG_KUAP);
+ uaccess_begin_8xx(MD_APG_INIT);
}
-static inline unsigned long __prevent_user_access_return(void)
+static __always_inline void prevent_user_access(unsigned long dir)
+{
+ uaccess_end_8xx();
+}
+
+static __always_inline unsigned long prevent_user_access_return(void)
{
unsigned long flags;
flags = mfspr(SPRN_MD_AP);
- mtspr(SPRN_MD_AP, MD_APG_KUAP);
+ uaccess_end_8xx();
return flags;
}
-static inline void __restore_user_access(unsigned long flags)
+static __always_inline void restore_user_access(unsigned long flags)
{
- mtspr(SPRN_MD_AP, flags);
+ uaccess_begin_8xx(flags);
}
-static inline bool
+static __always_inline bool
__bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
{
return !((regs->kuap ^ MD_APG_KUAP) & 0xff000000);
diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h
index fec56d9..f99c53a 100644
--- a/arch/powerpc/include/asm/nohash/32/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/32/pgtable.h
@@ -170,8 +170,8 @@ void unmap_kernel_page(unsigned long va);
#define pte_clear(mm, addr, ptep) \
do { pte_update(mm, addr, ptep, ~0, 0, 0); } while (0)
-#ifndef pte_mkwrite
-static inline pte_t pte_mkwrite(pte_t pte)
+#ifndef pte_mkwrite_novma
+static inline pte_t pte_mkwrite_novma(pte_t pte)
{
return __pte(pte_val(pte) | _PAGE_RW);
}
@@ -355,7 +355,7 @@ static inline int pte_young(pte_t pte)
#define pmd_pfn(pmd) (pmd_val(pmd) >> PAGE_SHIFT)
#else
#define pmd_page_vaddr(pmd) \
- ((unsigned long)(pmd_val(pmd) & ~(PTE_TABLE_SIZE - 1)))
+ ((const void *)(pmd_val(pmd) & ~(PTE_TABLE_SIZE - 1)))
#define pmd_pfn(pmd) (__pa(pmd_val(pmd)) >> PAGE_SHIFT)
#endif
diff --git a/arch/powerpc/include/asm/nohash/32/pte-8xx.h b/arch/powerpc/include/asm/nohash/32/pte-8xx.h
index 1a89ebd..21f681e 100644
--- a/arch/powerpc/include/asm/nohash/32/pte-8xx.h
+++ b/arch/powerpc/include/asm/nohash/32/pte-8xx.h
@@ -101,12 +101,12 @@ static inline int pte_write(pte_t pte)
#define pte_write pte_write
-static inline pte_t pte_mkwrite(pte_t pte)
+static inline pte_t pte_mkwrite_novma(pte_t pte)
{
return __pte(pte_val(pte) & ~_PAGE_RO);
}
-#define pte_mkwrite pte_mkwrite
+#define pte_mkwrite_novma pte_mkwrite_novma
static inline bool pte_user(pte_t pte)
{
diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h
index 287e258..5cd9acf 100644
--- a/arch/powerpc/include/asm/nohash/64/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/64/pgtable.h
@@ -85,7 +85,7 @@
#ifndef __ASSEMBLY__
/* pte_clear moved to later in this file */
-static inline pte_t pte_mkwrite(pte_t pte)
+static inline pte_t pte_mkwrite_novma(pte_t pte)
{
return __pte(pte_val(pte) | _PAGE_RW);
}
@@ -127,7 +127,7 @@ static inline pte_t pmd_pte(pmd_t pmd)
#define pmd_bad(pmd) (!is_kernel_addr(pmd_val(pmd)) \
|| (pmd_val(pmd) & PMD_BAD_BITS))
#define pmd_present(pmd) (!pmd_none(pmd))
-#define pmd_page_vaddr(pmd) (pmd_val(pmd) & ~PMD_MASKED_BITS)
+#define pmd_page_vaddr(pmd) ((const void *)(pmd_val(pmd) & ~PMD_MASKED_BITS))
extern struct page *pmd_page(pmd_t pmd);
#define pmd_pfn(pmd) (page_to_pfn(pmd_page(pmd)))
diff --git a/arch/powerpc/include/asm/nohash/kup-booke.h b/arch/powerpc/include/asm/nohash/kup-booke.h
index 49bb41e..0c7c325 100644
--- a/arch/powerpc/include/asm/nohash/kup-booke.h
+++ b/arch/powerpc/include/asm/nohash/kup-booke.h
@@ -3,6 +3,7 @@
#define _ASM_POWERPC_KUP_BOOKE_H_
#include <asm/bug.h>
+#include <asm/mmu.h>
#ifdef CONFIG_PPC_KUAP
@@ -13,32 +14,26 @@
#else
-#include <linux/jump_label.h>
#include <linux/sched.h>
#include <asm/reg.h>
-extern struct static_key_false disable_kuap_key;
-
-static __always_inline bool kuap_is_disabled(void)
-{
- return static_branch_unlikely(&disable_kuap_key);
-}
-
-static inline void __kuap_lock(void)
+static __always_inline void __kuap_lock(void)
{
mtspr(SPRN_PID, 0);
isync();
}
+#define __kuap_lock __kuap_lock
-static inline void __kuap_save_and_lock(struct pt_regs *regs)
+static __always_inline void __kuap_save_and_lock(struct pt_regs *regs)
{
regs->kuap = mfspr(SPRN_PID);
mtspr(SPRN_PID, 0);
isync();
}
+#define __kuap_save_and_lock __kuap_save_and_lock
-static inline void kuap_user_restore(struct pt_regs *regs)
+static __always_inline void kuap_user_restore(struct pt_regs *regs)
{
if (kuap_is_disabled())
return;
@@ -48,7 +43,7 @@ static inline void kuap_user_restore(struct pt_regs *regs)
/* Context synchronisation is performed by rfi */
}
-static inline void __kuap_kernel_restore(struct pt_regs *regs, unsigned long kuap)
+static __always_inline void __kuap_kernel_restore(struct pt_regs *regs, unsigned long kuap)
{
if (regs->kuap)
mtspr(SPRN_PID, current->thread.pid);
@@ -56,48 +51,55 @@ static inline void __kuap_kernel_restore(struct pt_regs *regs, unsigned long kua
/* Context synchronisation is performed by rfi */
}
-static inline unsigned long __kuap_get_and_assert_locked(void)
+#ifdef CONFIG_PPC_KUAP_DEBUG
+static __always_inline unsigned long __kuap_get_and_assert_locked(void)
{
- unsigned long kuap = mfspr(SPRN_PID);
+ WARN_ON_ONCE(mfspr(SPRN_PID));
- if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG))
- WARN_ON_ONCE(kuap);
+ return 0;
+}
+#define __kuap_get_and_assert_locked __kuap_get_and_assert_locked
+#endif
- return kuap;
+static __always_inline void uaccess_begin_booke(unsigned long val)
+{
+ asm(ASM_MMU_FTR_IFSET("mtspr %0, %1; isync", "", %2) : :
+ "i"(SPRN_PID), "r"(val), "i"(MMU_FTR_KUAP) : "memory");
}
-static inline void __allow_user_access(void __user *to, const void __user *from,
- unsigned long size, unsigned long dir)
+static __always_inline void uaccess_end_booke(void)
{
- mtspr(SPRN_PID, current->thread.pid);
- isync();
+ asm(ASM_MMU_FTR_IFSET("mtspr %0, %1; isync", "", %2) : :
+ "i"(SPRN_PID), "r"(0), "i"(MMU_FTR_KUAP) : "memory");
}
-static inline void __prevent_user_access(unsigned long dir)
+static __always_inline void allow_user_access(void __user *to, const void __user *from,
+ unsigned long size, unsigned long dir)
{
- mtspr(SPRN_PID, 0);
- isync();
+ uaccess_begin_booke(current->thread.pid);
}
-static inline unsigned long __prevent_user_access_return(void)
+static __always_inline void prevent_user_access(unsigned long dir)
+{
+ uaccess_end_booke();
+}
+
+static __always_inline unsigned long prevent_user_access_return(void)
{
unsigned long flags = mfspr(SPRN_PID);
- mtspr(SPRN_PID, 0);
- isync();
+ uaccess_end_booke();
return flags;
}
-static inline void __restore_user_access(unsigned long flags)
+static __always_inline void restore_user_access(unsigned long flags)
{
- if (flags) {
- mtspr(SPRN_PID, current->thread.pid);
- isync();
- }
+ if (flags)
+ uaccess_begin_booke(current->thread.pid);
}
-static inline bool
+static __always_inline bool
__bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
{
return !regs->kuap;
diff --git a/arch/powerpc/include/asm/nohash/mmu-e500.h b/arch/powerpc/include/asm/nohash/mmu-e500.h
index e43a418..6ddced0 100644
--- a/arch/powerpc/include/asm/nohash/mmu-e500.h
+++ b/arch/powerpc/include/asm/nohash/mmu-e500.h
@@ -319,6 +319,9 @@ extern int book3e_htw_mode;
#endif
+#include <asm/percpu.h>
+DECLARE_PER_CPU(int, next_tlbcam_idx);
+
#endif /* !__ASSEMBLY__ */
#endif /* _ASM_POWERPC_MMU_BOOK3E_H_ */
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index cb32593..e667d45 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -15,7 +15,6 @@
#include <linux/cache.h>
#include <linux/string.h>
#include <asm/types.h>
-#include <asm/lppaca.h>
#include <asm/mmu.h>
#include <asm/page.h>
#ifdef CONFIG_PPC_BOOK3E_64
@@ -47,14 +46,11 @@ extern unsigned int debug_smp_processor_id(void); /* from linux/smp.h */
#define get_paca() local_paca
#endif
-#ifdef CONFIG_PPC_PSERIES
-#define get_lppaca() (get_paca()->lppaca_ptr)
-#endif
-
#define get_slb_shadow() (get_paca()->slb_shadow_ptr)
struct task_struct;
struct rtas_args;
+struct lppaca;
/*
* Defines the layout of the paca.
diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
index f2b6bf5..e5fcc79 100644
--- a/arch/powerpc/include/asm/page.h
+++ b/arch/powerpc/include/asm/page.h
@@ -9,6 +9,7 @@
#ifndef __ASSEMBLY__
#include <linux/types.h>
#include <linux/kernel.h>
+#include <linux/bug.h>
#else
#include <asm/types.h>
#endif
@@ -119,16 +120,6 @@ extern long long virt_phys_offset;
#define ARCH_PFN_OFFSET ((unsigned long)(MEMORY_START >> PAGE_SHIFT))
#endif
-#define virt_to_pfn(kaddr) (__pa(kaddr) >> PAGE_SHIFT)
-#define virt_to_page(kaddr) pfn_to_page(virt_to_pfn(kaddr))
-#define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT)
-
-#define virt_addr_valid(vaddr) ({ \
- unsigned long _addr = (unsigned long)vaddr; \
- _addr >= PAGE_OFFSET && _addr < (unsigned long)high_memory && \
- pfn_valid(virt_to_pfn(_addr)); \
-})
-
/*
* On Book-E parts we need __va to parse the device tree and we can't
* determine MEMORY_START until then. However we can determine PHYSICAL_START
@@ -233,6 +224,25 @@ extern long long virt_phys_offset;
#endif
#endif
+#ifndef __ASSEMBLY__
+static inline unsigned long virt_to_pfn(const void *kaddr)
+{
+ return __pa(kaddr) >> PAGE_SHIFT;
+}
+
+static inline const void *pfn_to_kaddr(unsigned long pfn)
+{
+ return __va(pfn << PAGE_SHIFT);
+}
+#endif
+
+#define virt_to_page(kaddr) pfn_to_page(virt_to_pfn(kaddr))
+#define virt_addr_valid(vaddr) ({ \
+ unsigned long _addr = (unsigned long)vaddr; \
+ _addr >= PAGE_OFFSET && _addr < (unsigned long)high_memory && \
+ pfn_valid(virt_to_pfn((void *)_addr)); \
+})
+
/*
* Unfortunately the PLT is in the BSS in the PPC32 ELF ABI,
* and needs to be executable. This means the whole heap ends
diff --git a/arch/powerpc/include/asm/paravirt.h b/arch/powerpc/include/asm/paravirt.h
index f5ba1a3..e08513d 100644
--- a/arch/powerpc/include/asm/paravirt.h
+++ b/arch/powerpc/include/asm/paravirt.h
@@ -6,6 +6,7 @@
#include <asm/smp.h>
#ifdef CONFIG_PPC64
#include <asm/paca.h>
+#include <asm/lppaca.h>
#include <asm/hvcall.h>
#endif
diff --git a/arch/powerpc/include/asm/pci.h b/arch/powerpc/include/asm/pci.h
index 289f1ec..f5078a7 100644
--- a/arch/powerpc/include/asm/pci.h
+++ b/arch/powerpc/include/asm/pci.h
@@ -82,7 +82,8 @@ extern int pci_legacy_write(struct pci_bus *bus, loff_t port, u32 val,
extern int pci_mmap_legacy_page_range(struct pci_bus *bus,
struct vm_area_struct *vma,
enum pci_mmap_state mmap_state);
-
+extern void pci_adjust_legacy_attr(struct pci_bus *bus,
+ enum pci_mmap_state mmap_type);
#define HAVE_PCI_LEGACY 1
extern void pcibios_claim_one_bus(struct pci_bus *b);
diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h
index b2e9bc4..d0ee46d 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -72,9 +72,9 @@ static inline pgprot_t pte_pgprot(pte_t pte)
}
#ifndef pmd_page_vaddr
-static inline unsigned long pmd_page_vaddr(pmd_t pmd)
+static inline const void *pmd_page_vaddr(pmd_t pmd)
{
- return ((unsigned long)__va(pmd_val(pmd) & ~PMD_MASKED_BITS));
+ return __va(pmd_val(pmd) & ~PMD_MASKED_BITS);
}
#define pmd_page_vaddr pmd_page_vaddr
#endif
diff --git a/arch/powerpc/include/asm/plpar_wrappers.h b/arch/powerpc/include/asm/plpar_wrappers.h
index 8239c0a..fe3d0ea 100644
--- a/arch/powerpc/include/asm/plpar_wrappers.h
+++ b/arch/powerpc/include/asm/plpar_wrappers.h
@@ -9,6 +9,7 @@
#include <asm/hvcall.h>
#include <asm/paca.h>
+#include <asm/lppaca.h>
#include <asm/page.h>
static inline long poll_pending(void)
diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index ef6972aa..0056012 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -397,6 +397,7 @@
#define PPC_RAW_RFCI (0x4c000066)
#define PPC_RAW_RFDI (0x4c00004e)
#define PPC_RAW_RFMCI (0x4c00004c)
+#define PPC_RAW_TLBILX_LPID (0x7c000024)
#define PPC_RAW_TLBILX(t, a, b) (0x7c000024 | __PPC_T_TLB(t) | __PPC_RA0(a) | __PPC_RB(b))
#define PPC_RAW_WAIT_v203 (0x7c00007c)
#define PPC_RAW_WAIT(w, p) (0x7c00003c | __PPC_WC(w) | __PPC_PL(p))
@@ -616,6 +617,7 @@
#define PPC_TLBILX(t, a, b) stringify_in_c(.long PPC_RAW_TLBILX(t, a, b))
#define PPC_TLBILX_ALL(a, b) PPC_TLBILX(0, a, b)
#define PPC_TLBILX_PID(a, b) PPC_TLBILX(1, a, b)
+#define PPC_TLBILX_LPID stringify_in_c(.long PPC_RAW_TLBILX_LPID)
#define PPC_TLBILX_VA(a, b) PPC_TLBILX(3, a, b)
#define PPC_WAIT_v203 stringify_in_c(.long PPC_RAW_WAIT_v203)
#define PPC_WAIT(w, p) stringify_in_c(.long PPC_RAW_WAIT(w, p))
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index a6c7069..b2c51d3 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -172,11 +172,6 @@ struct thread_struct {
unsigned int align_ctl; /* alignment handling control */
#ifdef CONFIG_HAVE_HW_BREAKPOINT
struct perf_event *ptrace_bps[HBP_NUM_MAX];
- /*
- * Helps identify source of single-step exception and subsequent
- * hw-breakpoint enablement
- */
- struct perf_event *last_hit_ubp[HBP_NUM_MAX];
#endif /* CONFIG_HAVE_HW_BREAKPOINT */
struct arch_hw_breakpoint hw_brk[HBP_NUM_MAX]; /* hardware breakpoint info */
unsigned long trap_nr; /* last trap # on this thread */
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index bb01212..4ae4ab9 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -1414,11 +1414,9 @@ static inline void mtmsr_isync(unsigned long val)
#define mfspr(rn) ({unsigned long rval; \
asm volatile("mfspr %0," __stringify(rn) \
: "=r" (rval)); rval;})
-#ifndef mtspr
#define mtspr(rn, v) asm volatile("mtspr " __stringify(rn) ",%0" : \
: "r" ((unsigned long)(v)) \
: "memory")
-#endif
#define wrtspr(rn) asm volatile("mtspr " __stringify(rn) ",2" : : : "memory")
static inline void wrtee(unsigned long val)
diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
index 3abe15ac..c697c3c 100644
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -202,7 +202,9 @@ typedef struct {
#define RTAS_USER_REGION_SIZE (64 * 1024)
/* RTAS return status codes */
+#define RTAS_HARDWARE_ERROR -1 /* Hardware Error */
#define RTAS_BUSY -2 /* RTAS Busy */
+#define RTAS_INVALID_PARAMETER -3 /* Invalid indicator/domain/sensor etc. */
#define RTAS_EXTENDED_DELAY_MIN 9900
#define RTAS_EXTENDED_DELAY_MAX 9905
@@ -425,6 +427,7 @@ extern int rtas_set_indicator(int indicator, int index, int new_value);
extern int rtas_set_indicator_fast(int indicator, int index, int new_value);
extern void rtas_progress(char *s, unsigned short hex);
int rtas_ibm_suspend_me(int *fw_status);
+int rtas_error_rc(int rtas_rc);
struct rtc_time;
extern time64_t rtas_get_boot_time(void);
diff --git a/arch/powerpc/include/asm/sections.h b/arch/powerpc/include/asm/sections.h
index 4e1f548..ea26665 100644
--- a/arch/powerpc/include/asm/sections.h
+++ b/arch/powerpc/include/asm/sections.h
@@ -74,6 +74,8 @@ static inline int overlaps_kernel_text(unsigned long start, unsigned long end)
(unsigned long)_stext < end;
}
+#else
+static inline unsigned long kernel_toc_addr(void) { BUILD_BUG(); return -1UL; }
#endif
#endif /* __KERNEL__ */
diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h
index e29e83f..eed74c1 100644
--- a/arch/powerpc/include/asm/setup.h
+++ b/arch/powerpc/include/asm/setup.h
@@ -8,7 +8,6 @@
extern void ppc_printk_progress(char *s, unsigned short hex);
extern unsigned long long memory_limit;
-extern void *zalloc_maybe_bootmem(size_t size, gfp_t mask);
struct device_node;
diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h
index 8a4d4f4..f4e6f2d 100644
--- a/arch/powerpc/include/asm/topology.h
+++ b/arch/powerpc/include/asm/topology.h
@@ -143,5 +143,20 @@ static inline int cpu_to_coregroup_id(int cpu)
#endif
#endif
+#ifdef CONFIG_HOTPLUG_SMT
+#include <linux/cpu_smt.h>
+#include <asm/cputhreads.h>
+
+static inline bool topology_is_primary_thread(unsigned int cpu)
+{
+ return cpu == cpu_first_thread_sibling(cpu);
+}
+
+static inline bool topology_smt_thread_allowed(unsigned int cpu)
+{
+ return cpu_thread_in_core(cpu) < cpu_smt_num_threads;
+}
+#endif
+
#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_TOPOLOGY_H */
diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index a2d255aa..fb725ec 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -386,7 +386,7 @@ copy_mc_to_user(void __user *to, const void *from, unsigned long n)
extern long __copy_from_user_flushcache(void *dst, const void __user *src,
unsigned size);
-static __must_check inline bool user_access_begin(const void __user *ptr, size_t len)
+static __must_check __always_inline bool user_access_begin(const void __user *ptr, size_t len)
{
if (unlikely(!access_ok(ptr, len)))
return false;
@@ -401,7 +401,7 @@ static __must_check inline bool user_access_begin(const void __user *ptr, size_t
#define user_access_save prevent_user_access_return
#define user_access_restore restore_user_access
-static __must_check inline bool
+static __must_check __always_inline bool
user_read_access_begin(const void __user *ptr, size_t len)
{
if (unlikely(!access_ok(ptr, len)))
@@ -415,7 +415,7 @@ user_read_access_begin(const void __user *ptr, size_t len)
#define user_read_access_begin user_read_access_begin
#define user_read_access_end prevent_current_read_from_user
-static __must_check inline bool
+static __must_check __always_inline bool
user_write_access_begin(const void __user *ptr, size_t len)
{
if (unlikely(!access_ok(ptr, len)))
diff --git a/arch/powerpc/include/asm/vermagic.h b/arch/powerpc/include/asm/vermagic.h
index b054a85..6f250fe 100644
--- a/arch/powerpc/include/asm/vermagic.h
+++ b/arch/powerpc/include/asm/vermagic.h
@@ -2,7 +2,9 @@
#ifndef _ASM_VERMAGIC_H
#define _ASM_VERMAGIC_H
-#ifdef CONFIG_MPROFILE_KERNEL
+#ifdef CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY
+#define MODULE_ARCH_VERMAGIC_FTRACE "patchable-function-entry "
+#elif defined(CONFIG_MPROFILE_KERNEL)
#define MODULE_ARCH_VERMAGIC_FTRACE "mprofile-kernel "
#else
#define MODULE_ARCH_VERMAGIC_FTRACE ""
diff --git a/arch/powerpc/include/asm/vphn.h b/arch/powerpc/include/asm/vphn.h
new file mode 100644
index 0000000..8c2f795
--- /dev/null
+++ b/arch/powerpc/include/asm/vphn.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _ASM_POWERPC_VPHN_H
+#define _ASM_POWERPC_VPHN_H
+
+/* The H_HOME_NODE_ASSOCIATIVITY h_call returns 6 64-bit registers. */
+#define VPHN_REGISTER_COUNT 6
+
+/*
+ * 6 64-bit registers unpacked into up to 24 be32 associativity values. To
+ * form the complete property we have to add the length in the first cell.
+ */
+#define VPHN_ASSOC_BUFSIZE (VPHN_REGISTER_COUNT*sizeof(u64)/sizeof(u16) + 1)
+
+/*
+ * The H_HOME_NODE_ASSOCIATIVITY hcall takes two values for flags:
+ * 1 for retrieving associativity information for a guest cpu
+ * 2 for retrieving associativity information for a host/hypervisor cpu
+ */
+#define VPHN_FLAG_VCPU 1
+#define VPHN_FLAG_PCPU 2
+
+long hcall_vphn(unsigned long cpu, u64 flags, __be32 *associativity);
+
+#endif // _ASM_POWERPC_VPHN_H
diff --git a/arch/powerpc/kernel/audit.c b/arch/powerpc/kernel/audit.c
index 1bcfca5f..92298d6 100644
--- a/arch/powerpc/kernel/audit.c
+++ b/arch/powerpc/kernel/audit.c
@@ -4,6 +4,8 @@
#include <linux/audit.h>
#include <asm/unistd.h>
+#include "audit_32.h"
+
static unsigned dir_class[] = {
#include <asm-generic/audit_dir_write.h>
~0U
@@ -41,7 +43,6 @@ int audit_classify_arch(int arch)
int audit_classify_syscall(int abi, unsigned syscall)
{
#ifdef CONFIG_PPC64
- extern int ppc32_classify_syscall(unsigned);
if (abi == AUDIT_ARCH_PPC)
return ppc32_classify_syscall(syscall);
#endif
diff --git a/arch/powerpc/kernel/audit_32.h b/arch/powerpc/kernel/audit_32.h
new file mode 100644
index 0000000..c6c79c3
--- /dev/null
+++ b/arch/powerpc/kernel/audit_32.h
@@ -0,0 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef __AUDIT_32_H__
+#define __AUDIT_32_H__
+
+extern int ppc32_classify_syscall(unsigned);
+
+#endif
diff --git a/arch/powerpc/kernel/compat_audit.c b/arch/powerpc/kernel/compat_audit.c
index d92ffe4..57b38c5 100644
--- a/arch/powerpc/kernel/compat_audit.c
+++ b/arch/powerpc/kernel/compat_audit.c
@@ -3,6 +3,8 @@
#include <linux/audit_arch.h>
#include <asm/unistd.h>
+#include "audit_32.h"
+
unsigned ppc32_dir_class[] = {
#include <asm-generic/audit_dir_write.h>
~0U
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index 8a32bff..e97a0fd 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -75,6 +75,10 @@ static struct cpu_spec * __init setup_cpu_spec(unsigned long offset,
t->cpu_features |= old.cpu_features & CPU_FTR_PMAO_BUG;
}
+ /* Set kuap ON at startup, will be disabled later if cmdline has 'nosmap' */
+ if (IS_ENABLED(CONFIG_PPC_KUAP) && IS_ENABLED(CONFIG_PPC32))
+ t->mmu_features |= MMU_FTR_KUAP;
+
*PTRRELOC(&cur_cpu_spec) = &the_cpu_spec;
/*
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index fe27d41..9692acb 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -29,7 +29,6 @@
#include <asm/asm-offsets.h>
#include <asm/unistd.h>
#include <asm/ptrace.h>
-#include <asm/export.h>
#include <asm/feature-fixups.h>
#include <asm/barrier.h>
#include <asm/kup.h>
diff --git a/arch/powerpc/kernel/epapr_hcalls.S b/arch/powerpc/kernel/epapr_hcalls.S
index 033116e..1a9b5ae 100644
--- a/arch/powerpc/kernel/epapr_hcalls.S
+++ b/arch/powerpc/kernel/epapr_hcalls.S
@@ -3,6 +3,7 @@
* Copyright (C) 2012 Freescale Semiconductor, Inc.
*/
+#include <linux/export.h>
#include <linux/threads.h>
#include <asm/epapr_hcalls.h>
#include <asm/reg.h>
@@ -12,7 +13,6 @@
#include <asm/ppc_asm.h>
#include <asm/asm-compat.h>
#include <asm/asm-offsets.h>
-#include <asm/export.h>
#ifndef CONFIG_PPC64
/* epapr_ev_idle() was derived from e500_idle() */
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index ea0a073..3ff2da7 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -654,6 +654,7 @@ int __init fadump_reserve_mem(void)
return ret;
error_out:
fw_dump.fadump_enabled = 0;
+ fw_dump.reserve_dump_area_size = 0;
return 0;
}
diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S
index f71f2bb..6a9acfb 100644
--- a/arch/powerpc/kernel/fpu.S
+++ b/arch/powerpc/kernel/fpu.S
@@ -9,6 +9,7 @@
* Copyright (C) 1997 Dan Malek (dmalek@jlc.net).
*/
+#include <linux/export.h>
#include <asm/reg.h>
#include <asm/page.h>
#include <asm/mmu.h>
@@ -18,7 +19,6 @@
#include <asm/ppc_asm.h>
#include <asm/asm-offsets.h>
#include <asm/ptrace.h>
-#include <asm/export.h>
#include <asm/asm-compat.h>
#include <asm/feature-fixups.h>
diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S
index 3f68a16..b32e7b2 100644
--- a/arch/powerpc/kernel/head_40x.S
+++ b/arch/powerpc/kernel/head_40x.S
@@ -38,7 +38,6 @@
#include <asm/ppc_asm.h>
#include <asm/asm-offsets.h>
#include <asm/ptrace.h>
-#include <asm/export.h>
#include "head_32.h"
diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S
index 63a85c1..a3197c9 100644
--- a/arch/powerpc/kernel/head_44x.S
+++ b/arch/powerpc/kernel/head_44x.S
@@ -35,7 +35,6 @@
#include <asm/asm-offsets.h>
#include <asm/ptrace.h>
#include <asm/synch.h>
-#include <asm/export.h>
#include <asm/code-patching-asm.h>
#include "head_booke.h"
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index 6440b1b..4690c21 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -40,7 +40,6 @@
#include <asm/hw_irq.h>
#include <asm/cputhreads.h>
#include <asm/ppc-opcode.h>
-#include <asm/export.h>
#include <asm/feature-fixups.h>
#ifdef CONFIG_PPC_BOOK3S
#include <asm/exception-64s.h>
diff --git a/arch/powerpc/kernel/head_85xx.S b/arch/powerpc/kernel/head_85xx.S
index fdbee10..97e9ea0 100644
--- a/arch/powerpc/kernel/head_85xx.S
+++ b/arch/powerpc/kernel/head_85xx.S
@@ -40,7 +40,6 @@
#include <asm/asm-offsets.h>
#include <asm/cache.h>
#include <asm/ptrace.h>
-#include <asm/export.h>
#include <asm/feature-fixups.h>
#include "head_booke.h"
diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index a79751e..647b0b4 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -29,7 +29,6 @@
#include <asm/ppc_asm.h>
#include <asm/asm-offsets.h>
#include <asm/ptrace.h>
-#include <asm/export.h>
#include <asm/code-patching-asm.h>
#include <asm/interrupt.h>
diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S
index c51f28b..6764b98 100644
--- a/arch/powerpc/kernel/head_book3s_32.S
+++ b/arch/powerpc/kernel/head_book3s_32.S
@@ -31,7 +31,6 @@
#include <asm/ptrace.h>
#include <asm/bug.h>
#include <asm/kvm_book3s_asm.h>
-#include <asm/export.h>
#include <asm/feature-fixups.h>
#include <asm/interrupt.h>
diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c
index e1b4e70..b8513dc 100644
--- a/arch/powerpc/kernel/hw_breakpoint.c
+++ b/arch/powerpc/kernel/hw_breakpoint.c
@@ -43,16 +43,6 @@ int hw_breakpoint_slots(int type)
return 0; /* no instruction breakpoints available */
}
-static bool single_step_pending(void)
-{
- int i;
-
- for (i = 0; i < nr_wp_slots(); i++) {
- if (current->thread.last_hit_ubp[i])
- return true;
- }
- return false;
-}
/*
* Install a perf counter breakpoint.
@@ -84,7 +74,7 @@ int arch_install_hw_breakpoint(struct perf_event *bp)
* Do not install DABR values if the instruction must be single-stepped.
* If so, DABR will be populated in single_step_dabr_instruction().
*/
- if (!single_step_pending())
+ if (!info->perf_single_step)
__set_breakpoint(i, info);
return 0;
@@ -124,275 +114,6 @@ static bool is_ptrace_bp(struct perf_event *bp)
return bp->overflow_handler == ptrace_triggered;
}
-struct breakpoint {
- struct list_head list;
- struct perf_event *bp;
- bool ptrace_bp;
-};
-
-/*
- * While kernel/events/hw_breakpoint.c does its own synchronization, we cannot
- * rely on it safely synchronizing internals here; however, we can rely on it
- * not requesting more breakpoints than available.
- */
-static DEFINE_SPINLOCK(cpu_bps_lock);
-static DEFINE_PER_CPU(struct breakpoint *, cpu_bps[HBP_NUM_MAX]);
-static DEFINE_SPINLOCK(task_bps_lock);
-static LIST_HEAD(task_bps);
-
-static struct breakpoint *alloc_breakpoint(struct perf_event *bp)
-{
- struct breakpoint *tmp;
-
- tmp = kzalloc(sizeof(*tmp), GFP_KERNEL);
- if (!tmp)
- return ERR_PTR(-ENOMEM);
- tmp->bp = bp;
- tmp->ptrace_bp = is_ptrace_bp(bp);
- return tmp;
-}
-
-static bool bp_addr_range_overlap(struct perf_event *bp1, struct perf_event *bp2)
-{
- __u64 bp1_saddr, bp1_eaddr, bp2_saddr, bp2_eaddr;
-
- bp1_saddr = ALIGN_DOWN(bp1->attr.bp_addr, HW_BREAKPOINT_SIZE);
- bp1_eaddr = ALIGN(bp1->attr.bp_addr + bp1->attr.bp_len, HW_BREAKPOINT_SIZE);
- bp2_saddr = ALIGN_DOWN(bp2->attr.bp_addr, HW_BREAKPOINT_SIZE);
- bp2_eaddr = ALIGN(bp2->attr.bp_addr + bp2->attr.bp_len, HW_BREAKPOINT_SIZE);
-
- return (bp1_saddr < bp2_eaddr && bp1_eaddr > bp2_saddr);
-}
-
-static bool alternate_infra_bp(struct breakpoint *b, struct perf_event *bp)
-{
- return is_ptrace_bp(bp) ? !b->ptrace_bp : b->ptrace_bp;
-}
-
-static bool can_co_exist(struct breakpoint *b, struct perf_event *bp)
-{
- return !(alternate_infra_bp(b, bp) && bp_addr_range_overlap(b->bp, bp));
-}
-
-static int task_bps_add(struct perf_event *bp)
-{
- struct breakpoint *tmp;
-
- tmp = alloc_breakpoint(bp);
- if (IS_ERR(tmp))
- return PTR_ERR(tmp);
-
- spin_lock(&task_bps_lock);
- list_add(&tmp->list, &task_bps);
- spin_unlock(&task_bps_lock);
- return 0;
-}
-
-static void task_bps_remove(struct perf_event *bp)
-{
- struct list_head *pos, *q;
-
- spin_lock(&task_bps_lock);
- list_for_each_safe(pos, q, &task_bps) {
- struct breakpoint *tmp = list_entry(pos, struct breakpoint, list);
-
- if (tmp->bp == bp) {
- list_del(&tmp->list);
- kfree(tmp);
- break;
- }
- }
- spin_unlock(&task_bps_lock);
-}
-
-/*
- * If any task has breakpoint from alternate infrastructure,
- * return true. Otherwise return false.
- */
-static bool all_task_bps_check(struct perf_event *bp)
-{
- struct breakpoint *tmp;
- bool ret = false;
-
- spin_lock(&task_bps_lock);
- list_for_each_entry(tmp, &task_bps, list) {
- if (!can_co_exist(tmp, bp)) {
- ret = true;
- break;
- }
- }
- spin_unlock(&task_bps_lock);
- return ret;
-}
-
-/*
- * If same task has breakpoint from alternate infrastructure,
- * return true. Otherwise return false.
- */
-static bool same_task_bps_check(struct perf_event *bp)
-{
- struct breakpoint *tmp;
- bool ret = false;
-
- spin_lock(&task_bps_lock);
- list_for_each_entry(tmp, &task_bps, list) {
- if (tmp->bp->hw.target == bp->hw.target &&
- !can_co_exist(tmp, bp)) {
- ret = true;
- break;
- }
- }
- spin_unlock(&task_bps_lock);
- return ret;
-}
-
-static int cpu_bps_add(struct perf_event *bp)
-{
- struct breakpoint **cpu_bp;
- struct breakpoint *tmp;
- int i = 0;
-
- tmp = alloc_breakpoint(bp);
- if (IS_ERR(tmp))
- return PTR_ERR(tmp);
-
- spin_lock(&cpu_bps_lock);
- cpu_bp = per_cpu_ptr(cpu_bps, bp->cpu);
- for (i = 0; i < nr_wp_slots(); i++) {
- if (!cpu_bp[i]) {
- cpu_bp[i] = tmp;
- break;
- }
- }
- spin_unlock(&cpu_bps_lock);
- return 0;
-}
-
-static void cpu_bps_remove(struct perf_event *bp)
-{
- struct breakpoint **cpu_bp;
- int i = 0;
-
- spin_lock(&cpu_bps_lock);
- cpu_bp = per_cpu_ptr(cpu_bps, bp->cpu);
- for (i = 0; i < nr_wp_slots(); i++) {
- if (!cpu_bp[i])
- continue;
-
- if (cpu_bp[i]->bp == bp) {
- kfree(cpu_bp[i]);
- cpu_bp[i] = NULL;
- break;
- }
- }
- spin_unlock(&cpu_bps_lock);
-}
-
-static bool cpu_bps_check(int cpu, struct perf_event *bp)
-{
- struct breakpoint **cpu_bp;
- bool ret = false;
- int i;
-
- spin_lock(&cpu_bps_lock);
- cpu_bp = per_cpu_ptr(cpu_bps, cpu);
- for (i = 0; i < nr_wp_slots(); i++) {
- if (cpu_bp[i] && !can_co_exist(cpu_bp[i], bp)) {
- ret = true;
- break;
- }
- }
- spin_unlock(&cpu_bps_lock);
- return ret;
-}
-
-static bool all_cpu_bps_check(struct perf_event *bp)
-{
- int cpu;
-
- for_each_online_cpu(cpu) {
- if (cpu_bps_check(cpu, bp))
- return true;
- }
- return false;
-}
-
-int arch_reserve_bp_slot(struct perf_event *bp)
-{
- int ret;
-
- /* ptrace breakpoint */
- if (is_ptrace_bp(bp)) {
- if (all_cpu_bps_check(bp))
- return -ENOSPC;
-
- if (same_task_bps_check(bp))
- return -ENOSPC;
-
- return task_bps_add(bp);
- }
-
- /* perf breakpoint */
- if (is_kernel_addr(bp->attr.bp_addr))
- return 0;
-
- if (bp->hw.target && bp->cpu == -1) {
- if (same_task_bps_check(bp))
- return -ENOSPC;
-
- return task_bps_add(bp);
- } else if (!bp->hw.target && bp->cpu != -1) {
- if (all_task_bps_check(bp))
- return -ENOSPC;
-
- return cpu_bps_add(bp);
- }
-
- if (same_task_bps_check(bp))
- return -ENOSPC;
-
- ret = cpu_bps_add(bp);
- if (ret)
- return ret;
- ret = task_bps_add(bp);
- if (ret)
- cpu_bps_remove(bp);
-
- return ret;
-}
-
-void arch_release_bp_slot(struct perf_event *bp)
-{
- if (!is_kernel_addr(bp->attr.bp_addr)) {
- if (bp->hw.target)
- task_bps_remove(bp);
- if (bp->cpu != -1)
- cpu_bps_remove(bp);
- }
-}
-
-/*
- * Perform cleanup of arch-specific counters during unregistration
- * of the perf-event
- */
-void arch_unregister_hw_breakpoint(struct perf_event *bp)
-{
- /*
- * If the breakpoint is unregistered between a hw_breakpoint_handler()
- * and the single_step_dabr_instruction(), then cleanup the breakpoint
- * restoration variables to prevent dangling pointers.
- * FIXME, this should not be using bp->ctx at all! Sayeth peterz.
- */
- if (bp->ctx && bp->ctx->task && bp->ctx->task != ((void *)-1L)) {
- int i;
-
- for (i = 0; i < nr_wp_slots(); i++) {
- if (bp->ctx->task->thread.last_hit_ubp[i] == bp)
- bp->ctx->task->thread.last_hit_ubp[i] = NULL;
- }
- }
-}
-
/*
* Check for virtual address in kernel space.
*/
@@ -499,6 +220,10 @@ int hw_breakpoint_arch_parse(struct perf_event *bp,
* Restores the breakpoint on the debug registers.
* Invoke this function if it is known that the execution context is
* about to change to cause loss of MSR_SE settings.
+ *
+ * The perf watchpoint will simply re-trigger once the thread is started again,
+ * and the watchpoint handler will set up MSR_SE and perf_single_step as
+ * needed.
*/
void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs)
{
@@ -506,7 +231,9 @@ void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs)
int i;
for (i = 0; i < nr_wp_slots(); i++) {
- if (unlikely(tsk->thread.last_hit_ubp[i]))
+ struct perf_event *bp = __this_cpu_read(bp_per_reg[i]);
+
+ if (unlikely(bp && counter_arch_bp(bp)->perf_single_step))
goto reset;
}
return;
@@ -516,7 +243,7 @@ void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs)
for (i = 0; i < nr_wp_slots(); i++) {
info = counter_arch_bp(__this_cpu_read(bp_per_reg[i]));
__set_breakpoint(i, info);
- tsk->thread.last_hit_ubp[i] = NULL;
+ info->perf_single_step = false;
}
}
@@ -534,23 +261,22 @@ static bool is_octword_vsx_instr(int type, int size)
* We've failed in reliably handling the hw-breakpoint. Unregister
* it and throw a warning message to let the user know about it.
*/
-static void handler_error(struct perf_event *bp, struct arch_hw_breakpoint *info)
+static void handler_error(struct perf_event *bp)
{
WARN(1, "Unable to handle hardware breakpoint. Breakpoint at 0x%lx will be disabled.",
- info->address);
+ counter_arch_bp(bp)->address);
perf_event_disable_inatomic(bp);
}
-static void larx_stcx_err(struct perf_event *bp, struct arch_hw_breakpoint *info)
+static void larx_stcx_err(struct perf_event *bp)
{
printk_ratelimited("Breakpoint hit on instruction that can't be emulated. Breakpoint at 0x%lx will be disabled.\n",
- info->address);
+ counter_arch_bp(bp)->address);
perf_event_disable_inatomic(bp);
}
static bool stepping_handler(struct pt_regs *regs, struct perf_event **bp,
- struct arch_hw_breakpoint **info, int *hit,
- ppc_inst_t instr)
+ int *hit, ppc_inst_t instr)
{
int i;
int stepped;
@@ -560,8 +286,9 @@ static bool stepping_handler(struct pt_regs *regs, struct perf_event **bp,
for (i = 0; i < nr_wp_slots(); i++) {
if (!hit[i])
continue;
- current->thread.last_hit_ubp[i] = bp[i];
- info[i] = NULL;
+
+ counter_arch_bp(bp[i])->perf_single_step = true;
+ bp[i] = NULL;
}
regs_set_return_msr(regs, regs->msr | MSR_SE);
return false;
@@ -572,15 +299,15 @@ static bool stepping_handler(struct pt_regs *regs, struct perf_event **bp,
for (i = 0; i < nr_wp_slots(); i++) {
if (!hit[i])
continue;
- handler_error(bp[i], info[i]);
- info[i] = NULL;
+ handler_error(bp[i]);
+ bp[i] = NULL;
}
return false;
}
return true;
}
-static void handle_p10dd1_spurious_exception(struct arch_hw_breakpoint **info,
+static void handle_p10dd1_spurious_exception(struct perf_event **bp,
int *hit, unsigned long ea)
{
int i;
@@ -592,10 +319,14 @@ static void handle_p10dd1_spurious_exception(struct arch_hw_breakpoint **info,
* spurious exception.
*/
for (i = 0; i < nr_wp_slots(); i++) {
- if (!info[i])
+ struct arch_hw_breakpoint *info;
+
+ if (!bp[i])
continue;
- hw_end_addr = ALIGN(info[i]->address + info[i]->len, HW_BREAKPOINT_SIZE);
+ info = counter_arch_bp(bp[i]);
+
+ hw_end_addr = ALIGN(info->address + info->len, HW_BREAKPOINT_SIZE);
/*
* Ending address of DAWR range is less than starting
@@ -625,9 +356,9 @@ static void handle_p10dd1_spurious_exception(struct arch_hw_breakpoint **info,
return;
for (i = 0; i < nr_wp_slots(); i++) {
- if (info[i]) {
+ if (bp[i]) {
hit[i] = 1;
- info[i]->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ;
+ counter_arch_bp(bp[i])->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ;
}
}
}
@@ -638,7 +369,6 @@ int hw_breakpoint_handler(struct die_args *args)
int rc = NOTIFY_STOP;
struct perf_event *bp[HBP_NUM_MAX] = { NULL };
struct pt_regs *regs = args->regs;
- struct arch_hw_breakpoint *info[HBP_NUM_MAX] = { NULL };
int i;
int hit[HBP_NUM_MAX] = {0};
int nr_hit = 0;
@@ -663,18 +393,20 @@ int hw_breakpoint_handler(struct die_args *args)
wp_get_instr_detail(regs, &instr, &type, &size, &ea);
for (i = 0; i < nr_wp_slots(); i++) {
+ struct arch_hw_breakpoint *info;
+
bp[i] = __this_cpu_read(bp_per_reg[i]);
if (!bp[i])
continue;
- info[i] = counter_arch_bp(bp[i]);
- info[i]->type &= ~HW_BRK_TYPE_EXTRANEOUS_IRQ;
+ info = counter_arch_bp(bp[i]);
+ info->type &= ~HW_BRK_TYPE_EXTRANEOUS_IRQ;
- if (wp_check_constraints(regs, instr, ea, type, size, info[i])) {
+ if (wp_check_constraints(regs, instr, ea, type, size, info)) {
if (!IS_ENABLED(CONFIG_PPC_8xx) &&
ppc_inst_equal(instr, ppc_inst(0))) {
- handler_error(bp[i], info[i]);
- info[i] = NULL;
+ handler_error(bp[i]);
+ bp[i] = NULL;
err = 1;
continue;
}
@@ -693,7 +425,7 @@ int hw_breakpoint_handler(struct die_args *args)
/* Workaround for Power10 DD1 */
if (!IS_ENABLED(CONFIG_PPC_8xx) && mfspr(SPRN_PVR) == 0x800100 &&
is_octword_vsx_instr(type, size)) {
- handle_p10dd1_spurious_exception(info, hit, ea);
+ handle_p10dd1_spurious_exception(bp, hit, ea);
} else {
rc = NOTIFY_DONE;
goto out;
@@ -708,10 +440,10 @@ int hw_breakpoint_handler(struct die_args *args)
*/
if (ptrace_bp) {
for (i = 0; i < nr_wp_slots(); i++) {
- if (!hit[i])
+ if (!hit[i] || !is_ptrace_bp(bp[i]))
continue;
perf_bp_event(bp[i], regs);
- info[i] = NULL;
+ bp[i] = NULL;
}
rc = NOTIFY_DONE;
goto reset;
@@ -722,13 +454,13 @@ int hw_breakpoint_handler(struct die_args *args)
for (i = 0; i < nr_wp_slots(); i++) {
if (!hit[i])
continue;
- larx_stcx_err(bp[i], info[i]);
- info[i] = NULL;
+ larx_stcx_err(bp[i]);
+ bp[i] = NULL;
}
goto reset;
}
- if (!stepping_handler(regs, bp, info, hit, instr))
+ if (!stepping_handler(regs, bp, hit, instr))
goto reset;
}
@@ -739,15 +471,15 @@ int hw_breakpoint_handler(struct die_args *args)
for (i = 0; i < nr_wp_slots(); i++) {
if (!hit[i])
continue;
- if (!(info[i]->type & HW_BRK_TYPE_EXTRANEOUS_IRQ))
+ if (!(counter_arch_bp(bp[i])->type & HW_BRK_TYPE_EXTRANEOUS_IRQ))
perf_bp_event(bp[i], regs);
}
reset:
for (i = 0; i < nr_wp_slots(); i++) {
- if (!info[i])
+ if (!bp[i])
continue;
- __set_breakpoint(i, info[i]);
+ __set_breakpoint(i, counter_arch_bp(bp[i]));
}
out:
@@ -762,24 +494,28 @@ NOKPROBE_SYMBOL(hw_breakpoint_handler);
static int single_step_dabr_instruction(struct die_args *args)
{
struct pt_regs *regs = args->regs;
- struct perf_event *bp = NULL;
- struct arch_hw_breakpoint *info;
- int i;
bool found = false;
/*
* Check if we are single-stepping as a result of a
* previous HW Breakpoint exception
*/
- for (i = 0; i < nr_wp_slots(); i++) {
- bp = current->thread.last_hit_ubp[i];
+ for (int i = 0; i < nr_wp_slots(); i++) {
+ struct perf_event *bp;
+ struct arch_hw_breakpoint *info;
+
+ bp = __this_cpu_read(bp_per_reg[i]);
if (!bp)
continue;
- found = true;
info = counter_arch_bp(bp);
+ if (!info->perf_single_step)
+ continue;
+
+ found = true;
+
/*
* We shall invoke the user-defined callback function in the
* single stepping handler to confirm to 'trigger-after-execute'
@@ -787,26 +523,16 @@ static int single_step_dabr_instruction(struct die_args *args)
*/
if (!(info->type & HW_BRK_TYPE_EXTRANEOUS_IRQ))
perf_bp_event(bp, regs);
- current->thread.last_hit_ubp[i] = NULL;
- }
- if (!found)
- return NOTIFY_DONE;
-
- for (i = 0; i < nr_wp_slots(); i++) {
- bp = __this_cpu_read(bp_per_reg[i]);
- if (!bp)
- continue;
-
- info = counter_arch_bp(bp);
- __set_breakpoint(i, info);
+ info->perf_single_step = false;
+ __set_breakpoint(i, counter_arch_bp(bp));
}
/*
* If the process was being single-stepped by ptrace, let the
* other single-step actions occur (e.g. generate SIGTRAP).
*/
- if (test_thread_flag(TIF_SINGLESTEP))
+ if (!found || test_thread_flag(TIF_SINGLESTEP))
return NOTIFY_DONE;
return NOTIFY_STOP;
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index c52449ae..14251bc 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -172,17 +172,28 @@ static int fail_iommu_bus_notify(struct notifier_block *nb,
return 0;
}
-static struct notifier_block fail_iommu_bus_notifier = {
+/*
+ * PCI and VIO buses need separate notifier_block structs, since they're linked
+ * list nodes. Sharing a notifier_block would mean that any notifiers later
+ * registered for PCI buses would also get called by VIO buses and vice versa.
+ */
+static struct notifier_block fail_iommu_pci_bus_notifier = {
.notifier_call = fail_iommu_bus_notify
};
+#ifdef CONFIG_IBMVIO
+static struct notifier_block fail_iommu_vio_bus_notifier = {
+ .notifier_call = fail_iommu_bus_notify
+};
+#endif
+
static int __init fail_iommu_setup(void)
{
#ifdef CONFIG_PCI
- bus_register_notifier(&pci_bus_type, &fail_iommu_bus_notifier);
+ bus_register_notifier(&pci_bus_type, &fail_iommu_pci_bus_notifier);
#endif
#ifdef CONFIG_IBMVIO
- bus_register_notifier(&vio_bus_type, &fail_iommu_bus_notifier);
+ bus_register_notifier(&vio_bus_type, &fail_iommu_vio_bus_notifier);
#endif
return 0;
diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c
index 6ee6574..1da2f6e 100644
--- a/arch/powerpc/kernel/legacy_serial.c
+++ b/arch/powerpc/kernel/legacy_serial.c
@@ -5,8 +5,8 @@
#include <linux/serial_core.h>
#include <linux/console.h>
#include <linux/pci.h>
+#include <linux/of.h>
#include <linux/of_address.h>
-#include <linux/of_device.h>
#include <linux/of_irq.h>
#include <linux/serial_reg.h>
#include <asm/io.h>
diff --git a/arch/powerpc/kernel/misc.S b/arch/powerpc/kernel/misc.S
index fb7de354..29e1440 100644
--- a/arch/powerpc/kernel/misc.S
+++ b/arch/powerpc/kernel/misc.S
@@ -10,11 +10,11 @@
*
* setjmp/longjmp code by Paul Mackerras.
*/
+#include <linux/export.h>
#include <asm/ppc_asm.h>
#include <asm/unistd.h>
#include <asm/asm-compat.h>
#include <asm/asm-offsets.h>
-#include <asm/export.h>
.text
diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index daf8f87..2eabb15 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -8,6 +8,7 @@
*
*/
+#include <linux/export.h>
#include <linux/sys.h>
#include <asm/unistd.h>
#include <asm/errno.h>
@@ -22,7 +23,6 @@
#include <asm/processor.h>
#include <asm/bug.h>
#include <asm/ptrace.h>
-#include <asm/export.h>
#include <asm/feature-fixups.h>
.text
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index 2c9ac70a..1a8cdaf 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -9,6 +9,7 @@
* PPC64 updates by Dave Engebretsen (engebret@us.ibm.com)
*/
+#include <linux/export.h>
#include <linux/linkage.h>
#include <linux/sys.h>
#include <asm/unistd.h>
@@ -23,7 +24,6 @@
#include <asm/kexec.h>
#include <asm/ptrace.h>
#include <asm/mmu.h>
-#include <asm/export.h>
#include <asm/feature-fixups.h>
.text
diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c
index 9257028..7112adc 100644
--- a/arch/powerpc/kernel/module_64.c
+++ b/arch/powerpc/kernel/module_64.c
@@ -465,7 +465,7 @@ int module_frob_arch_sections(Elf64_Ehdr *hdr,
return 0;
}
-#ifdef CONFIG_MPROFILE_KERNEL
+#if defined(CONFIG_MPROFILE_KERNEL) || defined(CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY)
static u32 stub_insns[] = {
#ifdef CONFIG_PPC_KERNEL_PCREL
diff --git a/arch/powerpc/kernel/of_platform.c b/arch/powerpc/kernel/of_platform.c
index f89376f..adc76fa 100644
--- a/arch/powerpc/kernel/of_platform.c
+++ b/arch/powerpc/kernel/of_platform.c
@@ -13,9 +13,7 @@
#include <linux/export.h>
#include <linux/mod_devicetable.h>
#include <linux/pci.h>
-#include <linux/of.h>
-#include <linux/of_device.h>
-#include <linux/of_platform.h>
+#include <linux/platform_device.h>
#include <linux/atomic.h>
#include <asm/errno.h>
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index e88d7c9..040255d 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -125,7 +125,7 @@ struct pci_controller *pcibios_alloc_controller(struct device_node *dev)
{
struct pci_controller *phb;
- phb = zalloc_maybe_bootmem(sizeof(struct pci_controller), GFP_KERNEL);
+ phb = kzalloc(sizeof(struct pci_controller), GFP_KERNEL);
if (phb == NULL)
return NULL;
diff --git a/arch/powerpc/kernel/pmc.c b/arch/powerpc/kernel/pmc.c
index 15414c8..9fabb4d 100644
--- a/arch/powerpc/kernel/pmc.c
+++ b/arch/powerpc/kernel/pmc.c
@@ -74,7 +74,7 @@ void release_pmc_hardware(void)
}
EXPORT_SYMBOL_GPL(release_pmc_hardware);
-#ifdef CONFIG_PPC64
+#ifdef CONFIG_PPC_BOOK3S_64
void power4_enable_pmcs(void)
{
unsigned long hid0;
diff --git a/arch/powerpc/kernel/ptrace/ptrace-view.c b/arch/powerpc/kernel/ptrace/ptrace-view.c
index 3910cd7..584cf5c 100644
--- a/arch/powerpc/kernel/ptrace/ptrace-view.c
+++ b/arch/powerpc/kernel/ptrace/ptrace-view.c
@@ -716,69 +716,86 @@ int gpr32_get_common(struct task_struct *target,
return membuf_zero(&to, (ELF_NGREG - PT_REGS_COUNT) * sizeof(u32));
}
-int gpr32_set_common(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- const void *kbuf, const void __user *ubuf,
- unsigned long *regs)
+static int gpr32_set_common_kernel(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, unsigned long *regs)
{
const compat_ulong_t *k = kbuf;
+
+ pos /= sizeof(compat_ulong_t);
+ count /= sizeof(compat_ulong_t);
+
+ for (; count > 0 && pos < PT_MSR; --count)
+ regs[pos++] = *k++;
+
+ if (count > 0 && pos == PT_MSR) {
+ set_user_msr(target, *k++);
+ ++pos;
+ --count;
+ }
+
+ for (; count > 0 && pos <= PT_MAX_PUT_REG; --count)
+ regs[pos++] = *k++;
+ for (; count > 0 && pos < PT_TRAP; --count, ++pos)
+ ++k;
+
+ if (count > 0 && pos == PT_TRAP) {
+ set_user_trap(target, *k++);
+ ++pos;
+ --count;
+ }
+
+ kbuf = k;
+ pos *= sizeof(compat_ulong_t);
+ count *= sizeof(compat_ulong_t);
+ user_regset_copyin_ignore(&pos, &count, &kbuf, NULL,
+ (PT_TRAP + 1) * sizeof(compat_ulong_t), -1);
+ return 0;
+}
+
+static int gpr32_set_common_user(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void __user *ubuf, unsigned long *regs)
+{
const compat_ulong_t __user *u = ubuf;
+ const void *kbuf = NULL;
compat_ulong_t reg;
- if (!kbuf && !user_read_access_begin(u, count))
+ if (!user_read_access_begin(u, count))
return -EFAULT;
pos /= sizeof(reg);
count /= sizeof(reg);
- if (kbuf)
- for (; count > 0 && pos < PT_MSR; --count)
- regs[pos++] = *k++;
- else
- for (; count > 0 && pos < PT_MSR; --count) {
- unsafe_get_user(reg, u++, Efault);
- regs[pos++] = reg;
- }
-
+ for (; count > 0 && pos < PT_MSR; --count) {
+ unsafe_get_user(reg, u++, Efault);
+ regs[pos++] = reg;
+ }
if (count > 0 && pos == PT_MSR) {
- if (kbuf)
- reg = *k++;
- else
- unsafe_get_user(reg, u++, Efault);
+ unsafe_get_user(reg, u++, Efault);
set_user_msr(target, reg);
++pos;
--count;
}
- if (kbuf) {
- for (; count > 0 && pos <= PT_MAX_PUT_REG; --count)
- regs[pos++] = *k++;
- for (; count > 0 && pos < PT_TRAP; --count, ++pos)
- ++k;
- } else {
- for (; count > 0 && pos <= PT_MAX_PUT_REG; --count) {
- unsafe_get_user(reg, u++, Efault);
- regs[pos++] = reg;
- }
- for (; count > 0 && pos < PT_TRAP; --count, ++pos)
- unsafe_get_user(reg, u++, Efault);
+ for (; count > 0 && pos <= PT_MAX_PUT_REG; --count) {
+ unsafe_get_user(reg, u++, Efault);
+ regs[pos++] = reg;
}
+ for (; count > 0 && pos < PT_TRAP; --count, ++pos)
+ unsafe_get_user(reg, u++, Efault);
if (count > 0 && pos == PT_TRAP) {
- if (kbuf)
- reg = *k++;
- else
- unsafe_get_user(reg, u++, Efault);
+ unsafe_get_user(reg, u++, Efault);
set_user_trap(target, reg);
++pos;
--count;
}
- if (!kbuf)
- user_read_access_end();
+ user_read_access_end();
- kbuf = k;
ubuf = u;
pos *= sizeof(reg);
count *= sizeof(reg);
@@ -791,6 +808,18 @@ int gpr32_set_common(struct task_struct *target,
return -EFAULT;
}
+int gpr32_set_common(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf,
+ unsigned long *regs)
+{
+ if (kbuf)
+ return gpr32_set_common_kernel(target, regset, pos, count, kbuf, regs);
+ else
+ return gpr32_set_common_user(target, regset, pos, count, ubuf, regs);
+}
+
static int gpr32_get(struct task_struct *target,
const struct user_regset *regset,
struct membuf to)
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index c087eee..eddc031 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -1330,33 +1330,34 @@ bool __ref rtas_busy_delay(int status)
}
EXPORT_SYMBOL_GPL(rtas_busy_delay);
-static int rtas_error_rc(int rtas_rc)
+int rtas_error_rc(int rtas_rc)
{
int rc;
switch (rtas_rc) {
- case -1: /* Hardware Error */
- rc = -EIO;
- break;
- case -3: /* Bad indicator/domain/etc */
- rc = -EINVAL;
- break;
- case -9000: /* Isolation error */
- rc = -EFAULT;
- break;
- case -9001: /* Outstanding TCE/PTE */
- rc = -EEXIST;
- break;
- case -9002: /* No usable slot */
- rc = -ENODEV;
- break;
- default:
- pr_err("%s: unexpected error %d\n", __func__, rtas_rc);
- rc = -ERANGE;
- break;
+ case RTAS_HARDWARE_ERROR: /* Hardware Error */
+ rc = -EIO;
+ break;
+ case RTAS_INVALID_PARAMETER: /* Bad indicator/domain/etc */
+ rc = -EINVAL;
+ break;
+ case -9000: /* Isolation error */
+ rc = -EFAULT;
+ break;
+ case -9001: /* Outstanding TCE/PTE */
+ rc = -EEXIST;
+ break;
+ case -9002: /* No usable slot */
+ rc = -ENODEV;
+ break;
+ default:
+ pr_err("%s: unexpected error %d\n", __func__, rtas_rc);
+ rc = -ERANGE;
+ break;
}
return rc;
}
+EXPORT_SYMBOL_GPL(rtas_error_rc);
int rtas_get_power_level(int powerdomain, int *level)
{
@@ -1587,6 +1588,7 @@ static bool ibm_extended_os_term;
void rtas_os_term(char *str)
{
s32 token = rtas_function_token(RTAS_FN_IBM_OS_TERM);
+ static struct rtas_args args;
int status;
/*
@@ -1607,7 +1609,8 @@ void rtas_os_term(char *str)
* schedules.
*/
do {
- status = rtas_call(token, 1, 1, NULL, __pa(rtas_os_term_buf));
+ rtas_call_unlocked(&args, token, 1, 1, NULL, __pa(rtas_os_term_buf));
+ status = be32_to_cpu(args.rets[0]);
} while (rtas_busy_delay_time(status));
if (status != 0)
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index d2a4462..2f1026f 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -31,9 +31,9 @@
#include <linux/serial_8250.h>
#include <linux/percpu.h>
#include <linux/memblock.h>
-#include <linux/of_irq.h>
+#include <linux/of.h>
#include <linux/of_fdt.h>
-#include <linux/of_platform.h>
+#include <linux/of_irq.h>
#include <linux/hugetlb.h>
#include <linux/pgtable.h>
#include <asm/io.h>
@@ -969,8 +969,12 @@ void __init setup_arch(char **cmdline_p)
klp_init_thread_info(&init_task);
setup_initial_init_mm(_stext, _etext, _edata, _end);
-
+ /* sched_init() does the mmgrab(&init_mm) for the primary CPU */
+ VM_WARN_ON(cpumask_test_cpu(smp_processor_id(), mm_cpumask(&init_mm)));
+ cpumask_set_cpu(smp_processor_id(), mm_cpumask(&init_mm));
+ inc_mm_active_cpus(&init_mm);
mm_iommu_init(&init_mm);
+
irqstack_early_init();
exc_lvl_early_init();
emergency_stack_init();
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index fbbb695..5826f51 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -47,6 +47,7 @@
#include <asm/smp.h>
#include <asm/time.h>
#include <asm/machdep.h>
+#include <asm/mmu_context.h>
#include <asm/cputhreads.h>
#include <asm/cputable.h>
#include <asm/mpic.h>
@@ -1087,7 +1088,7 @@ static int __init init_big_cores(void)
void __init smp_prepare_cpus(unsigned int max_cpus)
{
- unsigned int cpu;
+ unsigned int cpu, num_threads;
DBG("smp_prepare_cpus\n");
@@ -1154,6 +1155,12 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
if (smp_ops && smp_ops->probe)
smp_ops->probe();
+
+ // Initalise the generic SMT topology support
+ num_threads = 1;
+ if (smt_enabled_at_boot)
+ num_threads = smt_enabled_at_boot;
+ cpu_smt_set_num_threads(num_threads, threads_per_core);
}
void smp_prepare_boot_cpu(void)
@@ -1616,6 +1623,9 @@ void start_secondary(void *unused)
mmgrab_lazy_tlb(&init_mm);
current->active_mm = &init_mm;
+ VM_WARN_ON(cpumask_test_cpu(smp_processor_id(), mm_cpumask(&init_mm)));
+ cpumask_set_cpu(cpu, mm_cpumask(&init_mm));
+ inc_mm_active_cpus(&init_mm);
smp_store_cpu_info(cpu);
set_dec(tb_ticks_per_jiffy);
@@ -1751,6 +1761,14 @@ int __cpu_disable(void)
void __cpu_die(unsigned int cpu)
{
+ /*
+ * This could perhaps be a generic call in idlea_task_dead(), but
+ * that requires testing from all archs, so first put it here to
+ */
+ VM_WARN_ON_ONCE(!cpumask_test_cpu(cpu, mm_cpumask(&init_mm)));
+ dec_mm_active_cpus(&init_mm);
+ cpumask_clear_cpu(cpu, mm_cpumask(&init_mm));
+
if (smp_ops->cpu_die)
smp_ops->cpu_die(cpu);
}
diff --git a/arch/powerpc/kernel/syscall.c b/arch/powerpc/kernel/syscall.c
index 18b9d32..77fedb1 100644
--- a/arch/powerpc/kernel/syscall.c
+++ b/arch/powerpc/kernel/syscall.c
@@ -46,7 +46,7 @@ notrace long system_call_exception(struct pt_regs *regs, unsigned long r0)
iamr = mfspr(SPRN_IAMR);
regs->amr = amr;
regs->iamr = iamr;
- if (mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) {
+ if (mmu_has_feature(MMU_FTR_KUAP)) {
mtspr(SPRN_AMR, AMR_KUAP_BLOCKED);
flush_needed = true;
}
diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S
index 9feab5e..a9cd650 100644
--- a/arch/powerpc/kernel/tm.S
+++ b/arch/powerpc/kernel/tm.S
@@ -6,13 +6,13 @@
* Copyright 2012 Matt Evans & Michael Neuling, IBM Corporation.
*/
+#include <linux/export.h>
#include <asm/asm-offsets.h>
#include <asm/ppc_asm.h>
#include <asm/ppc-opcode.h>
#include <asm/ptrace.h>
#include <asm/reg.h>
#include <asm/bug.h>
-#include <asm/export.h>
#include <asm/feature-fixups.h>
#ifdef CONFIG_VSX
diff --git a/arch/powerpc/kernel/trace/Makefile b/arch/powerpc/kernel/trace/Makefile
index b16a9f9..125f4ca 100644
--- a/arch/powerpc/kernel/trace/Makefile
+++ b/arch/powerpc/kernel/trace/Makefile
@@ -6,15 +6,15 @@
ifdef CONFIG_FUNCTION_TRACER
# do not trace tracer code
CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_ftrace_64_pg.o = $(CC_FLAGS_FTRACE)
endif
-obj32-$(CONFIG_FUNCTION_TRACER) += ftrace_mprofile.o
+obj32-$(CONFIG_FUNCTION_TRACER) += ftrace.o ftrace_entry.o
ifdef CONFIG_MPROFILE_KERNEL
-obj64-$(CONFIG_FUNCTION_TRACER) += ftrace_mprofile.o
+obj64-$(CONFIG_FUNCTION_TRACER) += ftrace.o ftrace_entry.o
else
-obj64-$(CONFIG_FUNCTION_TRACER) += ftrace_64_pg.o
+obj64-$(CONFIG_FUNCTION_TRACER) += ftrace_64_pg.o ftrace_64_pg_entry.o
endif
-obj-$(CONFIG_FUNCTION_TRACER) += ftrace_low.o ftrace.o
obj-$(CONFIG_TRACING) += trace_clock.o
obj-$(CONFIG_PPC64) += $(obj64-y)
@@ -25,3 +25,7 @@
KCOV_INSTRUMENT_ftrace.o := n
KCSAN_SANITIZE_ftrace.o := n
UBSAN_SANITIZE_ftrace.o := n
+GCOV_PROFILE_ftrace_64_pg.o := n
+KCOV_INSTRUMENT_ftrace_64_pg.o := n
+KCSAN_SANITIZE_ftrace_64_pg.o := n
+UBSAN_SANITIZE_ftrace_64_pg.o := n
diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c
index a47f303..8201062 100644
--- a/arch/powerpc/kernel/trace/ftrace.c
+++ b/arch/powerpc/kernel/trace/ftrace.c
@@ -28,64 +28,52 @@
#include <asm/syscall.h>
#include <asm/inst.h>
-/*
- * We generally only have a single long_branch tramp and at most 2 or 3 plt
- * tramps generated. But, we don't use the plt tramps currently. We also allot
- * 2 tramps after .text and .init.text. So, we only end up with around 3 usable
- * tramps in total. Set aside 8 just to be sure.
- */
-#define NUM_FTRACE_TRAMPS 8
+#define NUM_FTRACE_TRAMPS 2
static unsigned long ftrace_tramps[NUM_FTRACE_TRAMPS];
-static ppc_inst_t
-ftrace_call_replace(unsigned long ip, unsigned long addr, int link)
+static ppc_inst_t ftrace_create_branch_inst(unsigned long ip, unsigned long addr, int link)
{
ppc_inst_t op;
- addr = ppc_function_entry((void *)addr);
-
- /* if (link) set op to 'bl' else 'b' */
+ WARN_ON(!is_offset_in_branch_range(addr - ip));
create_branch(&op, (u32 *)ip, addr, link ? BRANCH_SET_LINK : 0);
return op;
}
-static inline int
-ftrace_modify_code(unsigned long ip, ppc_inst_t old, ppc_inst_t new)
+static inline int ftrace_read_inst(unsigned long ip, ppc_inst_t *op)
{
- ppc_inst_t replaced;
-
- /*
- * Note:
- * We are paranoid about modifying text, as if a bug was to happen, it
- * could cause us to read or write to someplace that could cause harm.
- * Carefully read and modify the code with probe_kernel_*(), and make
- * sure what we read is what we expected it to be before modifying it.
- */
-
- /* read the text we want to modify */
- if (copy_inst_from_kernel_nofault(&replaced, (void *)ip))
+ if (copy_inst_from_kernel_nofault(op, (void *)ip)) {
+ pr_err("0x%lx: fetching instruction failed\n", ip);
return -EFAULT;
-
- /* Make sure it is what we expect it to be */
- if (!ppc_inst_equal(replaced, old)) {
- pr_err("%p: replaced (%08lx) != old (%08lx)", (void *)ip,
- ppc_inst_as_ulong(replaced), ppc_inst_as_ulong(old));
- return -EINVAL;
}
- /* replace the text with the new text */
- return patch_instruction((u32 *)ip, new);
+ return 0;
}
-/*
- * Helper functions that are the same for both PPC64 and PPC32.
- */
-static int test_24bit_addr(unsigned long ip, unsigned long addr)
+static inline int ftrace_validate_inst(unsigned long ip, ppc_inst_t inst)
{
- addr = ppc_function_entry((void *)addr);
+ ppc_inst_t op;
+ int ret;
- return is_offset_in_branch_range(addr - ip);
+ ret = ftrace_read_inst(ip, &op);
+ if (!ret && !ppc_inst_equal(op, inst)) {
+ pr_err("0x%lx: expected (%08lx) != found (%08lx)\n",
+ ip, ppc_inst_as_ulong(inst), ppc_inst_as_ulong(op));
+ ret = -EINVAL;
+ }
+
+ return ret;
+}
+
+static inline int ftrace_modify_code(unsigned long ip, ppc_inst_t old, ppc_inst_t new)
+{
+ int ret = ftrace_validate_inst(ip, old);
+
+ if (!ret)
+ ret = patch_instruction((u32 *)ip, new);
+
+ return ret;
}
static int is_bl_op(ppc_inst_t op)
@@ -93,138 +81,11 @@ static int is_bl_op(ppc_inst_t op)
return (ppc_inst_val(op) & ~PPC_LI_MASK) == PPC_RAW_BL(0);
}
-static int is_b_op(ppc_inst_t op)
-{
- return (ppc_inst_val(op) & ~PPC_LI_MASK) == PPC_RAW_BRANCH(0);
-}
-
-static unsigned long find_bl_target(unsigned long ip, ppc_inst_t op)
-{
- int offset;
-
- offset = PPC_LI(ppc_inst_val(op));
- /* make it signed */
- if (offset & 0x02000000)
- offset |= 0xfe000000;
-
- return ip + (long)offset;
-}
-
-#ifdef CONFIG_MODULES
-static int
-__ftrace_make_nop(struct module *mod,
- struct dyn_ftrace *rec, unsigned long addr)
-{
- unsigned long entry, ptr, tramp;
- unsigned long ip = rec->ip;
- ppc_inst_t op, pop;
-
- /* read where this goes */
- if (copy_inst_from_kernel_nofault(&op, (void *)ip)) {
- pr_err("Fetching opcode failed.\n");
- return -EFAULT;
- }
-
- /* Make sure that this is still a 24bit jump */
- if (!is_bl_op(op)) {
- pr_err("Not expected bl: opcode is %08lx\n", ppc_inst_as_ulong(op));
- return -EINVAL;
- }
-
- /* lets find where the pointer goes */
- tramp = find_bl_target(ip, op);
-
- pr_devel("ip:%lx jumps to %lx", ip, tramp);
-
- if (module_trampoline_target(mod, tramp, &ptr)) {
- pr_err("Failed to get trampoline target\n");
- return -EFAULT;
- }
-
- pr_devel("trampoline target %lx", ptr);
-
- entry = ppc_global_function_entry((void *)addr);
- /* This should match what was called */
- if (ptr != entry) {
- pr_err("addr %lx does not match expected %lx\n", ptr, entry);
- return -EINVAL;
- }
-
- if (IS_ENABLED(CONFIG_MPROFILE_KERNEL)) {
- if (copy_inst_from_kernel_nofault(&op, (void *)(ip - 4))) {
- pr_err("Fetching instruction at %lx failed.\n", ip - 4);
- return -EFAULT;
- }
-
- /* We expect either a mflr r0, or a std r0, LRSAVE(r1) */
- if (!ppc_inst_equal(op, ppc_inst(PPC_RAW_MFLR(_R0))) &&
- !ppc_inst_equal(op, ppc_inst(PPC_INST_STD_LR))) {
- pr_err("Unexpected instruction %08lx around bl _mcount\n",
- ppc_inst_as_ulong(op));
- return -EINVAL;
- }
- } else if (IS_ENABLED(CONFIG_PPC64)) {
- /*
- * Check what is in the next instruction. We can see ld r2,40(r1), but
- * on first pass after boot we will see mflr r0.
- */
- if (copy_inst_from_kernel_nofault(&op, (void *)(ip + 4))) {
- pr_err("Fetching op failed.\n");
- return -EFAULT;
- }
-
- if (!ppc_inst_equal(op, ppc_inst(PPC_INST_LD_TOC))) {
- pr_err("Expected %08lx found %08lx\n", PPC_INST_LD_TOC,
- ppc_inst_as_ulong(op));
- return -EINVAL;
- }
- }
-
- /*
- * When using -mprofile-kernel or PPC32 there is no load to jump over.
- *
- * Otherwise our original call site looks like:
- *
- * bl <tramp>
- * ld r2,XX(r1)
- *
- * Milton Miller pointed out that we can not simply nop the branch.
- * If a task was preempted when calling a trace function, the nops
- * will remove the way to restore the TOC in r2 and the r2 TOC will
- * get corrupted.
- *
- * Use a b +8 to jump over the load.
- * XXX: could make PCREL depend on MPROFILE_KERNEL
- * XXX: check PCREL && MPROFILE_KERNEL calling sequence
- */
- if (IS_ENABLED(CONFIG_MPROFILE_KERNEL) || IS_ENABLED(CONFIG_PPC32))
- pop = ppc_inst(PPC_RAW_NOP());
- else
- pop = ppc_inst(PPC_RAW_BRANCH(8)); /* b +8 */
-
- if (patch_instruction((u32 *)ip, pop)) {
- pr_err("Patching NOP failed.\n");
- return -EPERM;
- }
-
- return 0;
-}
-#else
-static int __ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr)
-{
- return 0;
-}
-#endif /* CONFIG_MODULES */
-
static unsigned long find_ftrace_tramp(unsigned long ip)
{
int i;
- /*
- * We have the compiler generated long_branch tramps at the end
- * and we prefer those
- */
- for (i = NUM_FTRACE_TRAMPS - 1; i >= 0; i--)
+ for (i = 0; i < NUM_FTRACE_TRAMPS; i++)
if (!ftrace_tramps[i])
continue;
else if (is_offset_in_branch_range(ftrace_tramps[i] - ip))
@@ -233,449 +94,195 @@ static unsigned long find_ftrace_tramp(unsigned long ip)
return 0;
}
-static int add_ftrace_tramp(unsigned long tramp)
-{
- int i;
-
- for (i = 0; i < NUM_FTRACE_TRAMPS; i++)
- if (!ftrace_tramps[i]) {
- ftrace_tramps[i] = tramp;
- return 0;
- }
-
- return -1;
-}
-
-/*
- * If this is a compiler generated long_branch trampoline (essentially, a
- * trampoline that has a branch to _mcount()), we re-write the branch to
- * instead go to ftrace_[regs_]caller() and note down the location of this
- * trampoline.
- */
-static int setup_mcount_compiler_tramp(unsigned long tramp)
-{
- int i;
- ppc_inst_t op;
- unsigned long ptr;
-
- /* Is this a known long jump tramp? */
- for (i = 0; i < NUM_FTRACE_TRAMPS; i++)
- if (ftrace_tramps[i] == tramp)
- return 0;
-
- /* New trampoline -- read where this goes */
- if (copy_inst_from_kernel_nofault(&op, (void *)tramp)) {
- pr_debug("Fetching opcode failed.\n");
- return -1;
- }
-
- /* Is this a 24 bit branch? */
- if (!is_b_op(op)) {
- pr_debug("Trampoline is not a long branch tramp.\n");
- return -1;
- }
-
- /* lets find where the pointer goes */
- ptr = find_bl_target(tramp, op);
-
- if (ptr != ppc_global_function_entry((void *)_mcount)) {
- pr_debug("Trampoline target %p is not _mcount\n", (void *)ptr);
- return -1;
- }
-
- /* Let's re-write the tramp to go to ftrace_[regs_]caller */
- if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS))
- ptr = ppc_global_function_entry((void *)ftrace_regs_caller);
- else
- ptr = ppc_global_function_entry((void *)ftrace_caller);
-
- if (patch_branch((u32 *)tramp, ptr, 0)) {
- pr_debug("REL24 out of range!\n");
- return -1;
- }
-
- if (add_ftrace_tramp(tramp)) {
- pr_debug("No tramp locations left\n");
- return -1;
- }
-
- return 0;
-}
-
-static int __ftrace_make_nop_kernel(struct dyn_ftrace *rec, unsigned long addr)
-{
- unsigned long tramp, ip = rec->ip;
- ppc_inst_t op;
-
- /* Read where this goes */
- if (copy_inst_from_kernel_nofault(&op, (void *)ip)) {
- pr_err("Fetching opcode failed.\n");
- return -EFAULT;
- }
-
- /* Make sure that this is still a 24bit jump */
- if (!is_bl_op(op)) {
- pr_err("Not expected bl: opcode is %08lx\n", ppc_inst_as_ulong(op));
- return -EINVAL;
- }
-
- /* Let's find where the pointer goes */
- tramp = find_bl_target(ip, op);
-
- pr_devel("ip:%lx jumps to %lx", ip, tramp);
-
- if (setup_mcount_compiler_tramp(tramp)) {
- /* Are other trampolines reachable? */
- if (!find_ftrace_tramp(ip)) {
- pr_err("No ftrace trampolines reachable from %ps\n",
- (void *)ip);
- return -EINVAL;
- }
- }
-
- if (patch_instruction((u32 *)ip, ppc_inst(PPC_RAW_NOP()))) {
- pr_err("Patching NOP failed.\n");
- return -EPERM;
- }
-
- return 0;
-}
-
-int ftrace_make_nop(struct module *mod,
- struct dyn_ftrace *rec, unsigned long addr)
+static int ftrace_get_call_inst(struct dyn_ftrace *rec, unsigned long addr, ppc_inst_t *call_inst)
{
unsigned long ip = rec->ip;
- ppc_inst_t old, new;
+ unsigned long stub;
- /*
- * If the calling address is more that 24 bits away,
- * then we had to use a trampoline to make the call.
- * Otherwise just update the call site.
- */
- if (test_24bit_addr(ip, addr)) {
- /* within range */
- old = ftrace_call_replace(ip, addr, 1);
- new = ppc_inst(PPC_RAW_NOP());
- return ftrace_modify_code(ip, old, new);
- } else if (core_kernel_text(ip)) {
- return __ftrace_make_nop_kernel(rec, addr);
- } else if (!IS_ENABLED(CONFIG_MODULES)) {
- return -EINVAL;
- }
-
- /*
- * Out of range jumps are called from modules.
- * We should either already have a pointer to the module
- * or it has been passed in.
- */
- if (!rec->arch.mod) {
- if (!mod) {
- pr_err("No module loaded addr=%lx\n", addr);
- return -EFAULT;
- }
- rec->arch.mod = mod;
- } else if (mod) {
- if (mod != rec->arch.mod) {
- pr_err("Record mod %p not equal to passed in mod %p\n",
- rec->arch.mod, mod);
- return -EINVAL;
- }
- /* nothing to do if mod == rec->arch.mod */
- } else
- mod = rec->arch.mod;
-
- return __ftrace_make_nop(mod, rec, addr);
-}
-
+ if (is_offset_in_branch_range(addr - ip)) {
+ /* Within range */
+ stub = addr;
#ifdef CONFIG_MODULES
-/*
- * Examine the existing instructions for __ftrace_make_call.
- * They should effectively be a NOP, and follow formal constraints,
- * depending on the ABI. Return false if they don't.
- */
-static bool expected_nop_sequence(void *ip, ppc_inst_t op0, ppc_inst_t op1)
-{
- if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS))
- return ppc_inst_equal(op0, ppc_inst(PPC_RAW_NOP()));
- else
- return ppc_inst_equal(op0, ppc_inst(PPC_RAW_BRANCH(8))) &&
- ppc_inst_equal(op1, ppc_inst(PPC_INST_LD_TOC));
-}
-
-static int
-__ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
-{
- ppc_inst_t op[2];
- void *ip = (void *)rec->ip;
- unsigned long entry, ptr, tramp;
- struct module *mod = rec->arch.mod;
-
- /* read where this goes */
- if (copy_inst_from_kernel_nofault(op, ip))
- return -EFAULT;
-
- if (!IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS) &&
- copy_inst_from_kernel_nofault(op + 1, ip + 4))
- return -EFAULT;
-
- if (!expected_nop_sequence(ip, op[0], op[1])) {
- pr_err("Unexpected call sequence at %p: %08lx %08lx\n", ip,
- ppc_inst_as_ulong(op[0]), ppc_inst_as_ulong(op[1]));
- return -EINVAL;
- }
-
- /* If we never set up ftrace trampoline(s), then bail */
- if (!mod->arch.tramp ||
- (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS) && !mod->arch.tramp_regs)) {
- pr_err("No ftrace trampoline\n");
- return -EINVAL;
- }
-
- if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS) && rec->flags & FTRACE_FL_REGS)
- tramp = mod->arch.tramp_regs;
- else
- tramp = mod->arch.tramp;
-
- if (module_trampoline_target(mod, tramp, &ptr)) {
- pr_err("Failed to get trampoline target\n");
- return -EFAULT;
- }
-
- pr_devel("trampoline target %lx", ptr);
-
- entry = ppc_global_function_entry((void *)addr);
- /* This should match what was called */
- if (ptr != entry) {
- pr_err("addr %lx does not match expected %lx\n", ptr, entry);
- return -EINVAL;
- }
-
- if (patch_branch(ip, tramp, BRANCH_SET_LINK)) {
- pr_err("REL24 out of range!\n");
- return -EINVAL;
- }
-
- return 0;
-}
-#else
-static int __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
-{
- return 0;
-}
-#endif /* CONFIG_MODULES */
-
-static int __ftrace_make_call_kernel(struct dyn_ftrace *rec, unsigned long addr)
-{
- ppc_inst_t op;
- void *ip = (void *)rec->ip;
- unsigned long tramp, entry, ptr;
-
- /* Make sure we're being asked to patch branch to a known ftrace addr */
- entry = ppc_global_function_entry((void *)ftrace_caller);
- ptr = ppc_global_function_entry((void *)addr);
-
- if (ptr != entry && IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS))
- entry = ppc_global_function_entry((void *)ftrace_regs_caller);
-
- if (ptr != entry) {
- pr_err("Unknown ftrace addr to patch: %ps\n", (void *)ptr);
- return -EINVAL;
- }
-
- /* Make sure we have a nop */
- if (copy_inst_from_kernel_nofault(&op, ip)) {
- pr_err("Unable to read ftrace location %p\n", ip);
- return -EFAULT;
- }
-
- if (!ppc_inst_equal(op, ppc_inst(PPC_RAW_NOP()))) {
- pr_err("Unexpected call sequence at %p: %08lx\n",
- ip, ppc_inst_as_ulong(op));
- return -EINVAL;
- }
-
- tramp = find_ftrace_tramp((unsigned long)ip);
- if (!tramp) {
- pr_err("No ftrace trampolines reachable from %ps\n", ip);
- return -EINVAL;
- }
-
- if (patch_branch(ip, tramp, BRANCH_SET_LINK)) {
- pr_err("Error patching branch to ftrace tramp!\n");
- return -EINVAL;
- }
-
- return 0;
-}
-
-int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
-{
- unsigned long ip = rec->ip;
- ppc_inst_t old, new;
-
- /*
- * If the calling address is more that 24 bits away,
- * then we had to use a trampoline to make the call.
- * Otherwise just update the call site.
- */
- if (test_24bit_addr(ip, addr)) {
- /* within range */
- old = ppc_inst(PPC_RAW_NOP());
- new = ftrace_call_replace(ip, addr, 1);
- return ftrace_modify_code(ip, old, new);
+ } else if (rec->arch.mod) {
+ /* Module code would be going to one of the module stubs */
+ stub = (addr == (unsigned long)ftrace_caller ? rec->arch.mod->arch.tramp :
+ rec->arch.mod->arch.tramp_regs);
+#endif
} else if (core_kernel_text(ip)) {
- return __ftrace_make_call_kernel(rec, addr);
- } else if (!IS_ENABLED(CONFIG_MODULES)) {
- /* We should not get here without modules */
+ /* We would be branching to one of our ftrace stubs */
+ stub = find_ftrace_tramp(ip);
+ if (!stub) {
+ pr_err("0x%lx: No ftrace stubs reachable\n", ip);
+ return -EINVAL;
+ }
+ } else {
return -EINVAL;
}
- /*
- * Out of range jumps are called from modules.
- * Being that we are converting from nop, it had better
- * already have a module defined.
- */
- if (!rec->arch.mod) {
- pr_err("No module loaded\n");
- return -EINVAL;
- }
-
- return __ftrace_make_call(rec, addr);
+ *call_inst = ftrace_create_branch_inst(ip, stub, 1);
+ return 0;
}
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
-#ifdef CONFIG_MODULES
-static int
-__ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
- unsigned long addr)
+int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long addr)
{
- ppc_inst_t op;
- unsigned long ip = rec->ip;
- unsigned long entry, ptr, tramp;
- struct module *mod = rec->arch.mod;
+ /* This should never be called since we override ftrace_replace_code() */
+ WARN_ON(1);
+ return -EINVAL;
+}
+#endif
- /* If we never set up ftrace trampolines, then bail */
- if (!mod->arch.tramp || !mod->arch.tramp_regs) {
- pr_err("No ftrace trampoline\n");
+int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+{
+ ppc_inst_t old, new;
+ int ret;
+
+ /* This can only ever be called during module load */
+ if (WARN_ON(!IS_ENABLED(CONFIG_MODULES) || core_kernel_text(rec->ip)))
+ return -EINVAL;
+
+ old = ppc_inst(PPC_RAW_NOP());
+ ret = ftrace_get_call_inst(rec, addr, &new);
+ if (ret)
+ return ret;
+
+ return ftrace_modify_code(rec->ip, old, new);
+}
+
+int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr)
+{
+ /*
+ * This should never be called since we override ftrace_replace_code(),
+ * as well as ftrace_init_nop()
+ */
+ WARN_ON(1);
+ return -EINVAL;
+}
+
+void ftrace_replace_code(int enable)
+{
+ ppc_inst_t old, new, call_inst, new_call_inst;
+ ppc_inst_t nop_inst = ppc_inst(PPC_RAW_NOP());
+ unsigned long ip, new_addr, addr;
+ struct ftrace_rec_iter *iter;
+ struct dyn_ftrace *rec;
+ int ret = 0, update;
+
+ for_ftrace_rec_iter(iter) {
+ rec = ftrace_rec_iter_record(iter);
+ ip = rec->ip;
+
+ if (rec->flags & FTRACE_FL_DISABLED && !(rec->flags & FTRACE_FL_ENABLED))
+ continue;
+
+ addr = ftrace_get_addr_curr(rec);
+ new_addr = ftrace_get_addr_new(rec);
+ update = ftrace_update_record(rec, enable);
+
+ switch (update) {
+ case FTRACE_UPDATE_IGNORE:
+ default:
+ continue;
+ case FTRACE_UPDATE_MODIFY_CALL:
+ ret = ftrace_get_call_inst(rec, new_addr, &new_call_inst);
+ ret |= ftrace_get_call_inst(rec, addr, &call_inst);
+ old = call_inst;
+ new = new_call_inst;
+ break;
+ case FTRACE_UPDATE_MAKE_NOP:
+ ret = ftrace_get_call_inst(rec, addr, &call_inst);
+ old = call_inst;
+ new = nop_inst;
+ break;
+ case FTRACE_UPDATE_MAKE_CALL:
+ ret = ftrace_get_call_inst(rec, new_addr, &call_inst);
+ old = nop_inst;
+ new = call_inst;
+ break;
+ }
+
+ if (!ret)
+ ret = ftrace_modify_code(ip, old, new);
+ if (ret)
+ goto out;
+ }
+
+out:
+ if (ret)
+ ftrace_bug(ret, rec);
+ return;
+}
+
+int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec)
+{
+ unsigned long addr, ip = rec->ip;
+ ppc_inst_t old, new;
+ int ret = 0;
+
+ /* Verify instructions surrounding the ftrace location */
+ if (IS_ENABLED(CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY)) {
+ /* Expect nops */
+ ret = ftrace_validate_inst(ip - 4, ppc_inst(PPC_RAW_NOP()));
+ if (!ret)
+ ret = ftrace_validate_inst(ip, ppc_inst(PPC_RAW_NOP()));
+ } else if (IS_ENABLED(CONFIG_PPC32)) {
+ /* Expected sequence: 'mflr r0', 'stw r0,4(r1)', 'bl _mcount' */
+ ret = ftrace_validate_inst(ip - 8, ppc_inst(PPC_RAW_MFLR(_R0)));
+ if (!ret)
+ ret = ftrace_validate_inst(ip - 4, ppc_inst(PPC_RAW_STW(_R0, _R1, 4)));
+ } else if (IS_ENABLED(CONFIG_MPROFILE_KERNEL)) {
+ /* Expected sequence: 'mflr r0', ['std r0,16(r1)'], 'bl _mcount' */
+ ret = ftrace_read_inst(ip - 4, &old);
+ if (!ret && !ppc_inst_equal(old, ppc_inst(PPC_RAW_MFLR(_R0)))) {
+ ret = ftrace_validate_inst(ip - 8, ppc_inst(PPC_RAW_MFLR(_R0)));
+ ret |= ftrace_validate_inst(ip - 4, ppc_inst(PPC_RAW_STD(_R0, _R1, 16)));
+ }
+ } else {
return -EINVAL;
}
- /* read where this goes */
- if (copy_inst_from_kernel_nofault(&op, (void *)ip)) {
- pr_err("Fetching opcode failed.\n");
- return -EFAULT;
- }
+ if (ret)
+ return ret;
- /* Make sure that this is still a 24bit jump */
- if (!is_bl_op(op)) {
- pr_err("Not expected bl: opcode is %08lx\n", ppc_inst_as_ulong(op));
- return -EINVAL;
- }
-
- /* lets find where the pointer goes */
- tramp = find_bl_target(ip, op);
- entry = ppc_global_function_entry((void *)old_addr);
-
- pr_devel("ip:%lx jumps to %lx", ip, tramp);
-
- if (tramp != entry) {
- /* old_addr is not within range, so we must have used a trampoline */
- if (module_trampoline_target(mod, tramp, &ptr)) {
- pr_err("Failed to get trampoline target\n");
+ if (!core_kernel_text(ip)) {
+ if (!mod) {
+ pr_err("0x%lx: No module provided for non-kernel address\n", ip);
return -EFAULT;
}
-
- pr_devel("trampoline target %lx", ptr);
-
- /* This should match what was called */
- if (ptr != entry) {
- pr_err("addr %lx does not match expected %lx\n", ptr, entry);
- return -EINVAL;
- }
+ rec->arch.mod = mod;
}
- /* The new target may be within range */
- if (test_24bit_addr(ip, addr)) {
- /* within range */
- if (patch_branch((u32 *)ip, addr, BRANCH_SET_LINK)) {
- pr_err("REL24 out of range!\n");
- return -EINVAL;
- }
-
- return 0;
- }
-
- if (rec->flags & FTRACE_FL_REGS)
- tramp = mod->arch.tramp_regs;
- else
- tramp = mod->arch.tramp;
-
- if (module_trampoline_target(mod, tramp, &ptr)) {
- pr_err("Failed to get trampoline target\n");
- return -EFAULT;
- }
-
- pr_devel("trampoline target %lx", ptr);
-
- entry = ppc_global_function_entry((void *)addr);
- /* This should match what was called */
- if (ptr != entry) {
- pr_err("addr %lx does not match expected %lx\n", ptr, entry);
- return -EINVAL;
- }
-
- if (patch_branch((u32 *)ip, tramp, BRANCH_SET_LINK)) {
- pr_err("REL24 out of range!\n");
- return -EINVAL;
- }
-
- return 0;
-}
-#else
-static int __ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long addr)
-{
- return 0;
-}
-#endif
-
-int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
- unsigned long addr)
-{
- unsigned long ip = rec->ip;
- ppc_inst_t old, new;
-
- /*
- * If the calling address is more that 24 bits away,
- * then we had to use a trampoline to make the call.
- * Otherwise just update the call site.
- */
- if (test_24bit_addr(ip, addr) && test_24bit_addr(ip, old_addr)) {
- /* within range */
- old = ftrace_call_replace(ip, old_addr, 1);
- new = ftrace_call_replace(ip, addr, 1);
- return ftrace_modify_code(ip, old, new);
- } else if (core_kernel_text(ip)) {
+ /* Nop-out the ftrace location */
+ new = ppc_inst(PPC_RAW_NOP());
+ addr = MCOUNT_ADDR;
+ if (IS_ENABLED(CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY)) {
+ /* we instead patch-in the 'mflr r0' */
+ old = ppc_inst(PPC_RAW_NOP());
+ new = ppc_inst(PPC_RAW_MFLR(_R0));
+ ret = ftrace_modify_code(ip - 4, old, new);
+ } else if (is_offset_in_branch_range(addr - ip)) {
+ /* Within range */
+ old = ftrace_create_branch_inst(ip, addr, 1);
+ ret = ftrace_modify_code(ip, old, new);
+ } else if (core_kernel_text(ip) || (IS_ENABLED(CONFIG_MODULES) && mod)) {
/*
- * We always patch out of range locations to go to the regs
- * variant, so there is nothing to do here
+ * We would be branching to a linker-generated stub, or to the module _mcount
+ * stub. Let's just confirm we have a 'bl' here.
*/
- return 0;
- } else if (!IS_ENABLED(CONFIG_MODULES)) {
- /* We should not get here without modules */
+ ret = ftrace_read_inst(ip, &old);
+ if (ret)
+ return ret;
+ if (!is_bl_op(old)) {
+ pr_err("0x%lx: expected (bl) != found (%08lx)\n", ip, ppc_inst_as_ulong(old));
+ return -EINVAL;
+ }
+ ret = patch_instruction((u32 *)ip, new);
+ } else {
return -EINVAL;
}
- /*
- * Out of range jumps are called from modules.
- */
- if (!rec->arch.mod) {
- pr_err("No module loaded\n");
- return -EINVAL;
- }
-
- return __ftrace_modify_call(rec, old_addr, addr);
+ return ret;
}
-#endif
int ftrace_update_ftrace_func(ftrace_func_t func)
{
@@ -684,14 +291,14 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
int ret;
old = ppc_inst_read((u32 *)&ftrace_call);
- new = ftrace_call_replace(ip, (unsigned long)func, 1);
+ new = ftrace_create_branch_inst(ip, ppc_function_entry(func), 1);
ret = ftrace_modify_code(ip, old, new);
/* Also update the regs callback function */
if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS) && !ret) {
ip = (unsigned long)(&ftrace_regs_call);
old = ppc_inst_read((u32 *)&ftrace_regs_call);
- new = ftrace_call_replace(ip, (unsigned long)func, 1);
+ new = ftrace_create_branch_inst(ip, ppc_function_entry(func), 1);
ret = ftrace_modify_code(ip, old, new);
}
@@ -707,11 +314,6 @@ void arch_ftrace_update_code(int command)
ftrace_modify_all_code(command);
}
-#ifdef CONFIG_PPC64
-#define PACATOC offsetof(struct paca_struct, kernel_toc)
-
-extern unsigned int ftrace_tramp_text[], ftrace_tramp_init[];
-
void ftrace_free_init_tramp(void)
{
int i;
@@ -723,35 +325,43 @@ void ftrace_free_init_tramp(void)
}
}
-int __init ftrace_dyn_arch_init(void)
+static void __init add_ftrace_tramp(unsigned long tramp)
{
int i;
+
+ for (i = 0; i < NUM_FTRACE_TRAMPS; i++)
+ if (!ftrace_tramps[i]) {
+ ftrace_tramps[i] = tramp;
+ return;
+ }
+}
+
+int __init ftrace_dyn_arch_init(void)
+{
unsigned int *tramp[] = { ftrace_tramp_text, ftrace_tramp_init };
-#ifdef CONFIG_PPC_KERNEL_PCREL
+ unsigned long addr = FTRACE_REGS_ADDR;
+ long reladdr;
+ int i;
u32 stub_insns[] = {
+#ifdef CONFIG_PPC_KERNEL_PCREL
/* pla r12,addr */
PPC_PREFIX_MLS | __PPC_PRFX_R(1),
PPC_INST_PADDI | ___PPC_RT(_R12),
PPC_RAW_MTCTR(_R12),
PPC_RAW_BCTR()
- };
-#else
- u32 stub_insns[] = {
- PPC_RAW_LD(_R12, _R13, PACATOC),
+#elif defined(CONFIG_PPC64)
+ PPC_RAW_LD(_R12, _R13, offsetof(struct paca_struct, kernel_toc)),
PPC_RAW_ADDIS(_R12, _R12, 0),
PPC_RAW_ADDI(_R12, _R12, 0),
PPC_RAW_MTCTR(_R12),
PPC_RAW_BCTR()
- };
+#else
+ PPC_RAW_LIS(_R12, 0),
+ PPC_RAW_ADDI(_R12, _R12, 0),
+ PPC_RAW_MTCTR(_R12),
+ PPC_RAW_BCTR()
#endif
-
- unsigned long addr;
- long reladdr;
-
- if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS))
- addr = ppc_global_function_entry((void *)ftrace_regs_caller);
- else
- addr = ppc_global_function_entry((void *)ftrace_caller);
+ };
if (IS_ENABLED(CONFIG_PPC_KERNEL_PCREL)) {
for (i = 0; i < 2; i++) {
@@ -768,10 +378,10 @@ int __init ftrace_dyn_arch_init(void)
tramp[i][1] |= IMM_L(reladdr);
add_ftrace_tramp((unsigned long)tramp[i]);
}
- } else {
+ } else if (IS_ENABLED(CONFIG_PPC64)) {
reladdr = addr - kernel_toc_addr();
- if (reladdr >= (long)SZ_2G || reladdr < -(long)SZ_2G) {
+ if (reladdr >= (long)SZ_2G || reladdr < -(long long)SZ_2G) {
pr_err("Address of %ps out of range of kernel_toc.\n",
(void *)addr);
return -1;
@@ -783,51 +393,23 @@ int __init ftrace_dyn_arch_init(void)
tramp[i][2] |= PPC_LO(reladdr);
add_ftrace_tramp((unsigned long)tramp[i]);
}
+ } else {
+ for (i = 0; i < 2; i++) {
+ memcpy(tramp[i], stub_insns, sizeof(stub_insns));
+ tramp[i][0] |= PPC_HA(addr);
+ tramp[i][1] |= PPC_LO(addr);
+ add_ftrace_tramp((unsigned long)tramp[i]);
+ }
}
return 0;
}
-#endif
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-
-extern void ftrace_graph_call(void);
-extern void ftrace_graph_stub(void);
-
-static int ftrace_modify_ftrace_graph_caller(bool enable)
+void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
+ struct ftrace_ops *op, struct ftrace_regs *fregs)
{
- unsigned long ip = (unsigned long)(&ftrace_graph_call);
- unsigned long addr = (unsigned long)(&ftrace_graph_caller);
- unsigned long stub = (unsigned long)(&ftrace_graph_stub);
- ppc_inst_t old, new;
-
- if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_ARGS))
- return 0;
-
- old = ftrace_call_replace(ip, enable ? stub : addr, 0);
- new = ftrace_call_replace(ip, enable ? addr : stub, 0);
-
- return ftrace_modify_code(ip, old, new);
-}
-
-int ftrace_enable_ftrace_graph_caller(void)
-{
- return ftrace_modify_ftrace_graph_caller(true);
-}
-
-int ftrace_disable_ftrace_graph_caller(void)
-{
- return ftrace_modify_ftrace_graph_caller(false);
-}
-
-/*
- * Hook the return address and push it in the stack of return addrs
- * in current thread info. Return the address we want to divert to.
- */
-static unsigned long
-__prepare_ftrace_return(unsigned long parent, unsigned long ip, unsigned long sp)
-{
- unsigned long return_hooker;
+ unsigned long sp = fregs->regs.gpr[1];
int bit;
if (unlikely(ftrace_graph_is_dead()))
@@ -836,41 +418,15 @@ __prepare_ftrace_return(unsigned long parent, unsigned long ip, unsigned long sp
if (unlikely(atomic_read(¤t->tracing_graph_pause)))
goto out;
- bit = ftrace_test_recursion_trylock(ip, parent);
+ bit = ftrace_test_recursion_trylock(ip, parent_ip);
if (bit < 0)
goto out;
- return_hooker = ppc_function_entry(return_to_handler);
-
- if (!function_graph_enter(parent, ip, 0, (unsigned long *)sp))
- parent = return_hooker;
+ if (!function_graph_enter(parent_ip, ip, 0, (unsigned long *)sp))
+ parent_ip = ppc_function_entry(return_to_handler);
ftrace_test_recursion_unlock(bit);
out:
- return parent;
+ fregs->regs.link = parent_ip;
}
-
-#ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS
-void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
- struct ftrace_ops *op, struct ftrace_regs *fregs)
-{
- fregs->regs.link = __prepare_ftrace_return(parent_ip, ip, fregs->regs.gpr[1]);
-}
-#else
-unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip,
- unsigned long sp)
-{
- return __prepare_ftrace_return(parent, ip, sp);
-}
-#endif
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
-
-#ifdef CONFIG_PPC64_ELF_ABI_V1
-char *arch_ftrace_match_adjust(char *str, const char *search)
-{
- if (str[0] == '.' && search[0] != '.')
- return str + 1;
- else
- return str;
-}
-#endif /* CONFIG_PPC64_ELF_ABI_V1 */
diff --git a/arch/powerpc/kernel/trace/ftrace_64_pg.S b/arch/powerpc/kernel/trace/ftrace_64_pg.S
deleted file mode 100644
index 6708e24..0000000
--- a/arch/powerpc/kernel/trace/ftrace_64_pg.S
+++ /dev/null
@@ -1,67 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Split from ftrace_64.S
- */
-
-#include <linux/magic.h>
-#include <asm/ppc_asm.h>
-#include <asm/asm-offsets.h>
-#include <asm/ftrace.h>
-#include <asm/ppc-opcode.h>
-#include <asm/export.h>
-
-_GLOBAL_TOC(ftrace_caller)
- lbz r3, PACA_FTRACE_ENABLED(r13)
- cmpdi r3, 0
- beqlr
-
- /* Taken from output of objdump from lib64/glibc */
- mflr r3
- ld r11, 0(r1)
- stdu r1, -112(r1)
- std r3, 128(r1)
- ld r4, 16(r11)
- subi r3, r3, MCOUNT_INSN_SIZE
-.globl ftrace_call
-ftrace_call:
- bl ftrace_stub
- nop
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-.globl ftrace_graph_call
-ftrace_graph_call:
- b ftrace_graph_stub
-_GLOBAL(ftrace_graph_stub)
-#endif
- ld r0, 128(r1)
- mtlr r0
- addi r1, r1, 112
-
-_GLOBAL(ftrace_stub)
- blr
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-_GLOBAL(ftrace_graph_caller)
- addi r5, r1, 112
- /* load r4 with local address */
- ld r4, 128(r1)
- subi r4, r4, MCOUNT_INSN_SIZE
-
- /* Grab the LR out of the caller stack frame */
- ld r11, 112(r1)
- ld r3, 16(r11)
-
- bl prepare_ftrace_return
- nop
-
- /*
- * prepare_ftrace_return gives us the address we divert to.
- * Change the LR in the callers stack frame to this.
- */
- ld r11, 112(r1)
- std r3, 16(r11)
-
- ld r0, 128(r1)
- mtlr r0
- addi r1, r1, 112
- blr
-#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/powerpc/kernel/trace/ftrace_64_pg.c b/arch/powerpc/kernel/trace/ftrace_64_pg.c
new file mode 100644
index 0000000..7b85c3b
--- /dev/null
+++ b/arch/powerpc/kernel/trace/ftrace_64_pg.c
@@ -0,0 +1,846 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Code for replacing ftrace calls with jumps.
+ *
+ * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
+ *
+ * Thanks goes out to P.A. Semi, Inc for supplying me with a PPC64 box.
+ *
+ * Added function graph tracer code, taken from x86 that was written
+ * by Frederic Weisbecker, and ported to PPC by Steven Rostedt.
+ *
+ */
+
+#define pr_fmt(fmt) "ftrace-powerpc: " fmt
+
+#include <linux/spinlock.h>
+#include <linux/hardirq.h>
+#include <linux/uaccess.h>
+#include <linux/module.h>
+#include <linux/ftrace.h>
+#include <linux/percpu.h>
+#include <linux/init.h>
+#include <linux/list.h>
+
+#include <asm/cacheflush.h>
+#include <asm/code-patching.h>
+#include <asm/ftrace.h>
+#include <asm/syscall.h>
+#include <asm/inst.h>
+
+/*
+ * We generally only have a single long_branch tramp and at most 2 or 3 plt
+ * tramps generated. But, we don't use the plt tramps currently. We also allot
+ * 2 tramps after .text and .init.text. So, we only end up with around 3 usable
+ * tramps in total. Set aside 8 just to be sure.
+ */
+#define NUM_FTRACE_TRAMPS 8
+static unsigned long ftrace_tramps[NUM_FTRACE_TRAMPS];
+
+static ppc_inst_t
+ftrace_call_replace(unsigned long ip, unsigned long addr, int link)
+{
+ ppc_inst_t op;
+
+ addr = ppc_function_entry((void *)addr);
+
+ /* if (link) set op to 'bl' else 'b' */
+ create_branch(&op, (u32 *)ip, addr, link ? BRANCH_SET_LINK : 0);
+
+ return op;
+}
+
+static inline int
+ftrace_modify_code(unsigned long ip, ppc_inst_t old, ppc_inst_t new)
+{
+ ppc_inst_t replaced;
+
+ /*
+ * Note:
+ * We are paranoid about modifying text, as if a bug was to happen, it
+ * could cause us to read or write to someplace that could cause harm.
+ * Carefully read and modify the code with probe_kernel_*(), and make
+ * sure what we read is what we expected it to be before modifying it.
+ */
+
+ /* read the text we want to modify */
+ if (copy_inst_from_kernel_nofault(&replaced, (void *)ip))
+ return -EFAULT;
+
+ /* Make sure it is what we expect it to be */
+ if (!ppc_inst_equal(replaced, old)) {
+ pr_err("%p: replaced (%08lx) != old (%08lx)", (void *)ip,
+ ppc_inst_as_ulong(replaced), ppc_inst_as_ulong(old));
+ return -EINVAL;
+ }
+
+ /* replace the text with the new text */
+ return patch_instruction((u32 *)ip, new);
+}
+
+/*
+ * Helper functions that are the same for both PPC64 and PPC32.
+ */
+static int test_24bit_addr(unsigned long ip, unsigned long addr)
+{
+ addr = ppc_function_entry((void *)addr);
+
+ return is_offset_in_branch_range(addr - ip);
+}
+
+static int is_bl_op(ppc_inst_t op)
+{
+ return (ppc_inst_val(op) & ~PPC_LI_MASK) == PPC_RAW_BL(0);
+}
+
+static int is_b_op(ppc_inst_t op)
+{
+ return (ppc_inst_val(op) & ~PPC_LI_MASK) == PPC_RAW_BRANCH(0);
+}
+
+static unsigned long find_bl_target(unsigned long ip, ppc_inst_t op)
+{
+ int offset;
+
+ offset = PPC_LI(ppc_inst_val(op));
+ /* make it signed */
+ if (offset & 0x02000000)
+ offset |= 0xfe000000;
+
+ return ip + (long)offset;
+}
+
+#ifdef CONFIG_MODULES
+static int
+__ftrace_make_nop(struct module *mod,
+ struct dyn_ftrace *rec, unsigned long addr)
+{
+ unsigned long entry, ptr, tramp;
+ unsigned long ip = rec->ip;
+ ppc_inst_t op, pop;
+
+ /* read where this goes */
+ if (copy_inst_from_kernel_nofault(&op, (void *)ip)) {
+ pr_err("Fetching opcode failed.\n");
+ return -EFAULT;
+ }
+
+ /* Make sure that this is still a 24bit jump */
+ if (!is_bl_op(op)) {
+ pr_err("Not expected bl: opcode is %08lx\n", ppc_inst_as_ulong(op));
+ return -EINVAL;
+ }
+
+ /* lets find where the pointer goes */
+ tramp = find_bl_target(ip, op);
+
+ pr_devel("ip:%lx jumps to %lx", ip, tramp);
+
+ if (module_trampoline_target(mod, tramp, &ptr)) {
+ pr_err("Failed to get trampoline target\n");
+ return -EFAULT;
+ }
+
+ pr_devel("trampoline target %lx", ptr);
+
+ entry = ppc_global_function_entry((void *)addr);
+ /* This should match what was called */
+ if (ptr != entry) {
+ pr_err("addr %lx does not match expected %lx\n", ptr, entry);
+ return -EINVAL;
+ }
+
+ if (IS_ENABLED(CONFIG_MPROFILE_KERNEL)) {
+ if (copy_inst_from_kernel_nofault(&op, (void *)(ip - 4))) {
+ pr_err("Fetching instruction at %lx failed.\n", ip - 4);
+ return -EFAULT;
+ }
+
+ /* We expect either a mflr r0, or a std r0, LRSAVE(r1) */
+ if (!ppc_inst_equal(op, ppc_inst(PPC_RAW_MFLR(_R0))) &&
+ !ppc_inst_equal(op, ppc_inst(PPC_INST_STD_LR))) {
+ pr_err("Unexpected instruction %08lx around bl _mcount\n",
+ ppc_inst_as_ulong(op));
+ return -EINVAL;
+ }
+ } else if (IS_ENABLED(CONFIG_PPC64)) {
+ /*
+ * Check what is in the next instruction. We can see ld r2,40(r1), but
+ * on first pass after boot we will see mflr r0.
+ */
+ if (copy_inst_from_kernel_nofault(&op, (void *)(ip + 4))) {
+ pr_err("Fetching op failed.\n");
+ return -EFAULT;
+ }
+
+ if (!ppc_inst_equal(op, ppc_inst(PPC_INST_LD_TOC))) {
+ pr_err("Expected %08lx found %08lx\n", PPC_INST_LD_TOC,
+ ppc_inst_as_ulong(op));
+ return -EINVAL;
+ }
+ }
+
+ /*
+ * When using -mprofile-kernel or PPC32 there is no load to jump over.
+ *
+ * Otherwise our original call site looks like:
+ *
+ * bl <tramp>
+ * ld r2,XX(r1)
+ *
+ * Milton Miller pointed out that we can not simply nop the branch.
+ * If a task was preempted when calling a trace function, the nops
+ * will remove the way to restore the TOC in r2 and the r2 TOC will
+ * get corrupted.
+ *
+ * Use a b +8 to jump over the load.
+ */
+ if (IS_ENABLED(CONFIG_MPROFILE_KERNEL) || IS_ENABLED(CONFIG_PPC32))
+ pop = ppc_inst(PPC_RAW_NOP());
+ else
+ pop = ppc_inst(PPC_RAW_BRANCH(8)); /* b +8 */
+
+ if (patch_instruction((u32 *)ip, pop)) {
+ pr_err("Patching NOP failed.\n");
+ return -EPERM;
+ }
+
+ return 0;
+}
+#else
+static int __ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr)
+{
+ return 0;
+}
+#endif /* CONFIG_MODULES */
+
+static unsigned long find_ftrace_tramp(unsigned long ip)
+{
+ int i;
+
+ /*
+ * We have the compiler generated long_branch tramps at the end
+ * and we prefer those
+ */
+ for (i = NUM_FTRACE_TRAMPS - 1; i >= 0; i--)
+ if (!ftrace_tramps[i])
+ continue;
+ else if (is_offset_in_branch_range(ftrace_tramps[i] - ip))
+ return ftrace_tramps[i];
+
+ return 0;
+}
+
+static int add_ftrace_tramp(unsigned long tramp)
+{
+ int i;
+
+ for (i = 0; i < NUM_FTRACE_TRAMPS; i++)
+ if (!ftrace_tramps[i]) {
+ ftrace_tramps[i] = tramp;
+ return 0;
+ }
+
+ return -1;
+}
+
+/*
+ * If this is a compiler generated long_branch trampoline (essentially, a
+ * trampoline that has a branch to _mcount()), we re-write the branch to
+ * instead go to ftrace_[regs_]caller() and note down the location of this
+ * trampoline.
+ */
+static int setup_mcount_compiler_tramp(unsigned long tramp)
+{
+ int i;
+ ppc_inst_t op;
+ unsigned long ptr;
+
+ /* Is this a known long jump tramp? */
+ for (i = 0; i < NUM_FTRACE_TRAMPS; i++)
+ if (ftrace_tramps[i] == tramp)
+ return 0;
+
+ /* New trampoline -- read where this goes */
+ if (copy_inst_from_kernel_nofault(&op, (void *)tramp)) {
+ pr_debug("Fetching opcode failed.\n");
+ return -1;
+ }
+
+ /* Is this a 24 bit branch? */
+ if (!is_b_op(op)) {
+ pr_debug("Trampoline is not a long branch tramp.\n");
+ return -1;
+ }
+
+ /* lets find where the pointer goes */
+ ptr = find_bl_target(tramp, op);
+
+ if (ptr != ppc_global_function_entry((void *)_mcount)) {
+ pr_debug("Trampoline target %p is not _mcount\n", (void *)ptr);
+ return -1;
+ }
+
+ /* Let's re-write the tramp to go to ftrace_[regs_]caller */
+ if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS))
+ ptr = ppc_global_function_entry((void *)ftrace_regs_caller);
+ else
+ ptr = ppc_global_function_entry((void *)ftrace_caller);
+
+ if (patch_branch((u32 *)tramp, ptr, 0)) {
+ pr_debug("REL24 out of range!\n");
+ return -1;
+ }
+
+ if (add_ftrace_tramp(tramp)) {
+ pr_debug("No tramp locations left\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int __ftrace_make_nop_kernel(struct dyn_ftrace *rec, unsigned long addr)
+{
+ unsigned long tramp, ip = rec->ip;
+ ppc_inst_t op;
+
+ /* Read where this goes */
+ if (copy_inst_from_kernel_nofault(&op, (void *)ip)) {
+ pr_err("Fetching opcode failed.\n");
+ return -EFAULT;
+ }
+
+ /* Make sure that this is still a 24bit jump */
+ if (!is_bl_op(op)) {
+ pr_err("Not expected bl: opcode is %08lx\n", ppc_inst_as_ulong(op));
+ return -EINVAL;
+ }
+
+ /* Let's find where the pointer goes */
+ tramp = find_bl_target(ip, op);
+
+ pr_devel("ip:%lx jumps to %lx", ip, tramp);
+
+ if (setup_mcount_compiler_tramp(tramp)) {
+ /* Are other trampolines reachable? */
+ if (!find_ftrace_tramp(ip)) {
+ pr_err("No ftrace trampolines reachable from %ps\n",
+ (void *)ip);
+ return -EINVAL;
+ }
+ }
+
+ if (patch_instruction((u32 *)ip, ppc_inst(PPC_RAW_NOP()))) {
+ pr_err("Patching NOP failed.\n");
+ return -EPERM;
+ }
+
+ return 0;
+}
+
+int ftrace_make_nop(struct module *mod,
+ struct dyn_ftrace *rec, unsigned long addr)
+{
+ unsigned long ip = rec->ip;
+ ppc_inst_t old, new;
+
+ /*
+ * If the calling address is more that 24 bits away,
+ * then we had to use a trampoline to make the call.
+ * Otherwise just update the call site.
+ */
+ if (test_24bit_addr(ip, addr)) {
+ /* within range */
+ old = ftrace_call_replace(ip, addr, 1);
+ new = ppc_inst(PPC_RAW_NOP());
+ return ftrace_modify_code(ip, old, new);
+ } else if (core_kernel_text(ip)) {
+ return __ftrace_make_nop_kernel(rec, addr);
+ } else if (!IS_ENABLED(CONFIG_MODULES)) {
+ return -EINVAL;
+ }
+
+ /*
+ * Out of range jumps are called from modules.
+ * We should either already have a pointer to the module
+ * or it has been passed in.
+ */
+ if (!rec->arch.mod) {
+ if (!mod) {
+ pr_err("No module loaded addr=%lx\n", addr);
+ return -EFAULT;
+ }
+ rec->arch.mod = mod;
+ } else if (mod) {
+ if (mod != rec->arch.mod) {
+ pr_err("Record mod %p not equal to passed in mod %p\n",
+ rec->arch.mod, mod);
+ return -EINVAL;
+ }
+ /* nothing to do if mod == rec->arch.mod */
+ } else
+ mod = rec->arch.mod;
+
+ return __ftrace_make_nop(mod, rec, addr);
+}
+
+#ifdef CONFIG_MODULES
+/*
+ * Examine the existing instructions for __ftrace_make_call.
+ * They should effectively be a NOP, and follow formal constraints,
+ * depending on the ABI. Return false if they don't.
+ */
+static bool expected_nop_sequence(void *ip, ppc_inst_t op0, ppc_inst_t op1)
+{
+ if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS))
+ return ppc_inst_equal(op0, ppc_inst(PPC_RAW_NOP()));
+ else
+ return ppc_inst_equal(op0, ppc_inst(PPC_RAW_BRANCH(8))) &&
+ ppc_inst_equal(op1, ppc_inst(PPC_INST_LD_TOC));
+}
+
+static int
+__ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+{
+ ppc_inst_t op[2];
+ void *ip = (void *)rec->ip;
+ unsigned long entry, ptr, tramp;
+ struct module *mod = rec->arch.mod;
+
+ /* read where this goes */
+ if (copy_inst_from_kernel_nofault(op, ip))
+ return -EFAULT;
+
+ if (!IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS) &&
+ copy_inst_from_kernel_nofault(op + 1, ip + 4))
+ return -EFAULT;
+
+ if (!expected_nop_sequence(ip, op[0], op[1])) {
+ pr_err("Unexpected call sequence at %p: %08lx %08lx\n", ip,
+ ppc_inst_as_ulong(op[0]), ppc_inst_as_ulong(op[1]));
+ return -EINVAL;
+ }
+
+ /* If we never set up ftrace trampoline(s), then bail */
+ if (!mod->arch.tramp ||
+ (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS) && !mod->arch.tramp_regs)) {
+ pr_err("No ftrace trampoline\n");
+ return -EINVAL;
+ }
+
+ if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS) && rec->flags & FTRACE_FL_REGS)
+ tramp = mod->arch.tramp_regs;
+ else
+ tramp = mod->arch.tramp;
+
+ if (module_trampoline_target(mod, tramp, &ptr)) {
+ pr_err("Failed to get trampoline target\n");
+ return -EFAULT;
+ }
+
+ pr_devel("trampoline target %lx", ptr);
+
+ entry = ppc_global_function_entry((void *)addr);
+ /* This should match what was called */
+ if (ptr != entry) {
+ pr_err("addr %lx does not match expected %lx\n", ptr, entry);
+ return -EINVAL;
+ }
+
+ if (patch_branch(ip, tramp, BRANCH_SET_LINK)) {
+ pr_err("REL24 out of range!\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+#else
+static int __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+{
+ return 0;
+}
+#endif /* CONFIG_MODULES */
+
+static int __ftrace_make_call_kernel(struct dyn_ftrace *rec, unsigned long addr)
+{
+ ppc_inst_t op;
+ void *ip = (void *)rec->ip;
+ unsigned long tramp, entry, ptr;
+
+ /* Make sure we're being asked to patch branch to a known ftrace addr */
+ entry = ppc_global_function_entry((void *)ftrace_caller);
+ ptr = ppc_global_function_entry((void *)addr);
+
+ if (ptr != entry && IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS))
+ entry = ppc_global_function_entry((void *)ftrace_regs_caller);
+
+ if (ptr != entry) {
+ pr_err("Unknown ftrace addr to patch: %ps\n", (void *)ptr);
+ return -EINVAL;
+ }
+
+ /* Make sure we have a nop */
+ if (copy_inst_from_kernel_nofault(&op, ip)) {
+ pr_err("Unable to read ftrace location %p\n", ip);
+ return -EFAULT;
+ }
+
+ if (!ppc_inst_equal(op, ppc_inst(PPC_RAW_NOP()))) {
+ pr_err("Unexpected call sequence at %p: %08lx\n",
+ ip, ppc_inst_as_ulong(op));
+ return -EINVAL;
+ }
+
+ tramp = find_ftrace_tramp((unsigned long)ip);
+ if (!tramp) {
+ pr_err("No ftrace trampolines reachable from %ps\n", ip);
+ return -EINVAL;
+ }
+
+ if (patch_branch(ip, tramp, BRANCH_SET_LINK)) {
+ pr_err("Error patching branch to ftrace tramp!\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+{
+ unsigned long ip = rec->ip;
+ ppc_inst_t old, new;
+
+ /*
+ * If the calling address is more that 24 bits away,
+ * then we had to use a trampoline to make the call.
+ * Otherwise just update the call site.
+ */
+ if (test_24bit_addr(ip, addr)) {
+ /* within range */
+ old = ppc_inst(PPC_RAW_NOP());
+ new = ftrace_call_replace(ip, addr, 1);
+ return ftrace_modify_code(ip, old, new);
+ } else if (core_kernel_text(ip)) {
+ return __ftrace_make_call_kernel(rec, addr);
+ } else if (!IS_ENABLED(CONFIG_MODULES)) {
+ /* We should not get here without modules */
+ return -EINVAL;
+ }
+
+ /*
+ * Out of range jumps are called from modules.
+ * Being that we are converting from nop, it had better
+ * already have a module defined.
+ */
+ if (!rec->arch.mod) {
+ pr_err("No module loaded\n");
+ return -EINVAL;
+ }
+
+ return __ftrace_make_call(rec, addr);
+}
+
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+#ifdef CONFIG_MODULES
+static int
+__ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
+ unsigned long addr)
+{
+ ppc_inst_t op;
+ unsigned long ip = rec->ip;
+ unsigned long entry, ptr, tramp;
+ struct module *mod = rec->arch.mod;
+
+ /* If we never set up ftrace trampolines, then bail */
+ if (!mod->arch.tramp || !mod->arch.tramp_regs) {
+ pr_err("No ftrace trampoline\n");
+ return -EINVAL;
+ }
+
+ /* read where this goes */
+ if (copy_inst_from_kernel_nofault(&op, (void *)ip)) {
+ pr_err("Fetching opcode failed.\n");
+ return -EFAULT;
+ }
+
+ /* Make sure that this is still a 24bit jump */
+ if (!is_bl_op(op)) {
+ pr_err("Not expected bl: opcode is %08lx\n", ppc_inst_as_ulong(op));
+ return -EINVAL;
+ }
+
+ /* lets find where the pointer goes */
+ tramp = find_bl_target(ip, op);
+ entry = ppc_global_function_entry((void *)old_addr);
+
+ pr_devel("ip:%lx jumps to %lx", ip, tramp);
+
+ if (tramp != entry) {
+ /* old_addr is not within range, so we must have used a trampoline */
+ if (module_trampoline_target(mod, tramp, &ptr)) {
+ pr_err("Failed to get trampoline target\n");
+ return -EFAULT;
+ }
+
+ pr_devel("trampoline target %lx", ptr);
+
+ /* This should match what was called */
+ if (ptr != entry) {
+ pr_err("addr %lx does not match expected %lx\n", ptr, entry);
+ return -EINVAL;
+ }
+ }
+
+ /* The new target may be within range */
+ if (test_24bit_addr(ip, addr)) {
+ /* within range */
+ if (patch_branch((u32 *)ip, addr, BRANCH_SET_LINK)) {
+ pr_err("REL24 out of range!\n");
+ return -EINVAL;
+ }
+
+ return 0;
+ }
+
+ if (rec->flags & FTRACE_FL_REGS)
+ tramp = mod->arch.tramp_regs;
+ else
+ tramp = mod->arch.tramp;
+
+ if (module_trampoline_target(mod, tramp, &ptr)) {
+ pr_err("Failed to get trampoline target\n");
+ return -EFAULT;
+ }
+
+ pr_devel("trampoline target %lx", ptr);
+
+ entry = ppc_global_function_entry((void *)addr);
+ /* This should match what was called */
+ if (ptr != entry) {
+ pr_err("addr %lx does not match expected %lx\n", ptr, entry);
+ return -EINVAL;
+ }
+
+ if (patch_branch((u32 *)ip, tramp, BRANCH_SET_LINK)) {
+ pr_err("REL24 out of range!\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+#else
+static int __ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long addr)
+{
+ return 0;
+}
+#endif
+
+int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
+ unsigned long addr)
+{
+ unsigned long ip = rec->ip;
+ ppc_inst_t old, new;
+
+ /*
+ * If the calling address is more that 24 bits away,
+ * then we had to use a trampoline to make the call.
+ * Otherwise just update the call site.
+ */
+ if (test_24bit_addr(ip, addr) && test_24bit_addr(ip, old_addr)) {
+ /* within range */
+ old = ftrace_call_replace(ip, old_addr, 1);
+ new = ftrace_call_replace(ip, addr, 1);
+ return ftrace_modify_code(ip, old, new);
+ } else if (core_kernel_text(ip)) {
+ /*
+ * We always patch out of range locations to go to the regs
+ * variant, so there is nothing to do here
+ */
+ return 0;
+ } else if (!IS_ENABLED(CONFIG_MODULES)) {
+ /* We should not get here without modules */
+ return -EINVAL;
+ }
+
+ /*
+ * Out of range jumps are called from modules.
+ */
+ if (!rec->arch.mod) {
+ pr_err("No module loaded\n");
+ return -EINVAL;
+ }
+
+ return __ftrace_modify_call(rec, old_addr, addr);
+}
+#endif
+
+int ftrace_update_ftrace_func(ftrace_func_t func)
+{
+ unsigned long ip = (unsigned long)(&ftrace_call);
+ ppc_inst_t old, new;
+ int ret;
+
+ old = ppc_inst_read((u32 *)&ftrace_call);
+ new = ftrace_call_replace(ip, (unsigned long)func, 1);
+ ret = ftrace_modify_code(ip, old, new);
+
+ /* Also update the regs callback function */
+ if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS) && !ret) {
+ ip = (unsigned long)(&ftrace_regs_call);
+ old = ppc_inst_read((u32 *)&ftrace_regs_call);
+ new = ftrace_call_replace(ip, (unsigned long)func, 1);
+ ret = ftrace_modify_code(ip, old, new);
+ }
+
+ return ret;
+}
+
+/*
+ * Use the default ftrace_modify_all_code, but without
+ * stop_machine().
+ */
+void arch_ftrace_update_code(int command)
+{
+ ftrace_modify_all_code(command);
+}
+
+#ifdef CONFIG_PPC64
+#define PACATOC offsetof(struct paca_struct, kernel_toc)
+
+extern unsigned int ftrace_tramp_text[], ftrace_tramp_init[];
+
+void ftrace_free_init_tramp(void)
+{
+ int i;
+
+ for (i = 0; i < NUM_FTRACE_TRAMPS && ftrace_tramps[i]; i++)
+ if (ftrace_tramps[i] == (unsigned long)ftrace_tramp_init) {
+ ftrace_tramps[i] = 0;
+ return;
+ }
+}
+
+int __init ftrace_dyn_arch_init(void)
+{
+ int i;
+ unsigned int *tramp[] = { ftrace_tramp_text, ftrace_tramp_init };
+ u32 stub_insns[] = {
+ PPC_RAW_LD(_R12, _R13, PACATOC),
+ PPC_RAW_ADDIS(_R12, _R12, 0),
+ PPC_RAW_ADDI(_R12, _R12, 0),
+ PPC_RAW_MTCTR(_R12),
+ PPC_RAW_BCTR()
+ };
+ unsigned long addr;
+ long reladdr;
+
+ if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS))
+ addr = ppc_global_function_entry((void *)ftrace_regs_caller);
+ else
+ addr = ppc_global_function_entry((void *)ftrace_caller);
+
+ reladdr = addr - kernel_toc_addr();
+
+ if (reladdr >= SZ_2G || reladdr < -(long)SZ_2G) {
+ pr_err("Address of %ps out of range of kernel_toc.\n",
+ (void *)addr);
+ return -1;
+ }
+
+ for (i = 0; i < 2; i++) {
+ memcpy(tramp[i], stub_insns, sizeof(stub_insns));
+ tramp[i][1] |= PPC_HA(reladdr);
+ tramp[i][2] |= PPC_LO(reladdr);
+ add_ftrace_tramp((unsigned long)tramp[i]);
+ }
+
+ return 0;
+}
+#endif
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+
+extern void ftrace_graph_call(void);
+extern void ftrace_graph_stub(void);
+
+static int ftrace_modify_ftrace_graph_caller(bool enable)
+{
+ unsigned long ip = (unsigned long)(&ftrace_graph_call);
+ unsigned long addr = (unsigned long)(&ftrace_graph_caller);
+ unsigned long stub = (unsigned long)(&ftrace_graph_stub);
+ ppc_inst_t old, new;
+
+ if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_ARGS))
+ return 0;
+
+ old = ftrace_call_replace(ip, enable ? stub : addr, 0);
+ new = ftrace_call_replace(ip, enable ? addr : stub, 0);
+
+ return ftrace_modify_code(ip, old, new);
+}
+
+int ftrace_enable_ftrace_graph_caller(void)
+{
+ return ftrace_modify_ftrace_graph_caller(true);
+}
+
+int ftrace_disable_ftrace_graph_caller(void)
+{
+ return ftrace_modify_ftrace_graph_caller(false);
+}
+
+/*
+ * Hook the return address and push it in the stack of return addrs
+ * in current thread info. Return the address we want to divert to.
+ */
+static unsigned long
+__prepare_ftrace_return(unsigned long parent, unsigned long ip, unsigned long sp)
+{
+ unsigned long return_hooker;
+ int bit;
+
+ if (unlikely(ftrace_graph_is_dead()))
+ goto out;
+
+ if (unlikely(atomic_read(¤t->tracing_graph_pause)))
+ goto out;
+
+ bit = ftrace_test_recursion_trylock(ip, parent);
+ if (bit < 0)
+ goto out;
+
+ return_hooker = ppc_function_entry(return_to_handler);
+
+ if (!function_graph_enter(parent, ip, 0, (unsigned long *)sp))
+ parent = return_hooker;
+
+ ftrace_test_recursion_unlock(bit);
+out:
+ return parent;
+}
+
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS
+void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
+ struct ftrace_ops *op, struct ftrace_regs *fregs)
+{
+ fregs->regs.link = __prepare_ftrace_return(parent_ip, ip, fregs->regs.gpr[1]);
+}
+#else
+unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip,
+ unsigned long sp)
+{
+ return __prepare_ftrace_return(parent, ip, sp);
+}
+#endif
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
+
+#ifdef CONFIG_PPC64_ELF_ABI_V1
+char *arch_ftrace_match_adjust(char *str, const char *search)
+{
+ if (str[0] == '.' && search[0] != '.')
+ return str + 1;
+ else
+ return str;
+}
+#endif /* CONFIG_PPC64_ELF_ABI_V1 */
diff --git a/arch/powerpc/kernel/trace/ftrace_64_pg_entry.S b/arch/powerpc/kernel/trace/ftrace_64_pg_entry.S
new file mode 100644
index 0000000..a8a7f28
--- /dev/null
+++ b/arch/powerpc/kernel/trace/ftrace_64_pg_entry.S
@@ -0,0 +1,132 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Split from ftrace_64.S
+ */
+
+#include <linux/export.h>
+#include <linux/magic.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/ftrace.h>
+#include <asm/ppc-opcode.h>
+
+_GLOBAL_TOC(ftrace_caller)
+ lbz r3, PACA_FTRACE_ENABLED(r13)
+ cmpdi r3, 0
+ beqlr
+
+ /* Taken from output of objdump from lib64/glibc */
+ mflr r3
+ ld r11, 0(r1)
+ stdu r1, -112(r1)
+ std r3, 128(r1)
+ ld r4, 16(r11)
+ subi r3, r3, MCOUNT_INSN_SIZE
+.globl ftrace_call
+ftrace_call:
+ bl ftrace_stub
+ nop
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+.globl ftrace_graph_call
+ftrace_graph_call:
+ b ftrace_graph_stub
+_GLOBAL(ftrace_graph_stub)
+#endif
+ ld r0, 128(r1)
+ mtlr r0
+ addi r1, r1, 112
+
+_GLOBAL(ftrace_stub)
+ blr
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+_GLOBAL(ftrace_graph_caller)
+ addi r5, r1, 112
+ /* load r4 with local address */
+ ld r4, 128(r1)
+ subi r4, r4, MCOUNT_INSN_SIZE
+
+ /* Grab the LR out of the caller stack frame */
+ ld r11, 112(r1)
+ ld r3, 16(r11)
+
+ bl prepare_ftrace_return
+ nop
+
+ /*
+ * prepare_ftrace_return gives us the address we divert to.
+ * Change the LR in the callers stack frame to this.
+ */
+ ld r11, 112(r1)
+ std r3, 16(r11)
+
+ ld r0, 128(r1)
+ mtlr r0
+ addi r1, r1, 112
+ blr
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
+
+.pushsection ".tramp.ftrace.text","aw",@progbits;
+.globl ftrace_tramp_text
+ftrace_tramp_text:
+ .space 32
+.popsection
+
+.pushsection ".tramp.ftrace.init","aw",@progbits;
+.globl ftrace_tramp_init
+ftrace_tramp_init:
+ .space 32
+.popsection
+
+_GLOBAL(mcount)
+_GLOBAL(_mcount)
+EXPORT_SYMBOL(_mcount)
+ mflr r12
+ mtctr r12
+ mtlr r0
+ bctr
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+_GLOBAL(return_to_handler)
+ /* need to save return values */
+#ifdef CONFIG_PPC64
+ std r4, -32(r1)
+ std r3, -24(r1)
+ /* save TOC */
+ std r2, -16(r1)
+ std r31, -8(r1)
+ mr r31, r1
+ stdu r1, -112(r1)
+
+ /*
+ * We might be called from a module.
+ * Switch to our TOC to run inside the core kernel.
+ */
+ LOAD_PACA_TOC()
+#else
+ stwu r1, -16(r1)
+ stw r3, 8(r1)
+ stw r4, 12(r1)
+#endif
+
+ bl ftrace_return_to_handler
+ nop
+
+ /* return value has real return address */
+ mtlr r3
+
+#ifdef CONFIG_PPC64
+ ld r1, 0(r1)
+ ld r4, -32(r1)
+ ld r3, -24(r1)
+ ld r2, -16(r1)
+ ld r31, -8(r1)
+#else
+ lwz r3, 8(r1)
+ lwz r4, 12(r1)
+ addi r1, r1, 16
+#endif
+
+ /* Jump back to real return address */
+ blr
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/powerpc/kernel/trace/ftrace_mprofile.S b/arch/powerpc/kernel/trace/ftrace_entry.S
similarity index 83%
rename from arch/powerpc/kernel/trace/ftrace_mprofile.S
rename to arch/powerpc/kernel/trace/ftrace_entry.S
index 1f7d86d..9070188 100644
--- a/arch/powerpc/kernel/trace/ftrace_mprofile.S
+++ b/arch/powerpc/kernel/trace/ftrace_entry.S
@@ -3,12 +3,12 @@
* Split from ftrace_64.S
*/
+#include <linux/export.h>
#include <linux/magic.h>
#include <asm/ppc_asm.h>
#include <asm/asm-offsets.h>
#include <asm/ftrace.h>
#include <asm/ppc-opcode.h>
-#include <asm/export.h>
#include <asm/thread_info.h>
#include <asm/bug.h>
#include <asm/ptrace.h>
@@ -254,3 +254,70 @@
/* Return to original caller of live patched function */
blr
#endif /* CONFIG_LIVEPATCH */
+
+#ifndef CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY
+_GLOBAL(mcount)
+_GLOBAL(_mcount)
+EXPORT_SYMBOL(_mcount)
+ mflr r12
+ mtctr r12
+ mtlr r0
+ bctr
+#endif
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+_GLOBAL(return_to_handler)
+ /* need to save return values */
+#ifdef CONFIG_PPC64
+ std r4, -32(r1)
+ std r3, -24(r1)
+ /* save TOC */
+ std r2, -16(r1)
+ std r31, -8(r1)
+ mr r31, r1
+ stdu r1, -112(r1)
+
+ /*
+ * We might be called from a module.
+ * Switch to our TOC to run inside the core kernel.
+ */
+ LOAD_PACA_TOC()
+#else
+ stwu r1, -16(r1)
+ stw r3, 8(r1)
+ stw r4, 12(r1)
+#endif
+
+ bl ftrace_return_to_handler
+ nop
+
+ /* return value has real return address */
+ mtlr r3
+
+#ifdef CONFIG_PPC64
+ ld r1, 0(r1)
+ ld r4, -32(r1)
+ ld r3, -24(r1)
+ ld r2, -16(r1)
+ ld r31, -8(r1)
+#else
+ lwz r3, 8(r1)
+ lwz r4, 12(r1)
+ addi r1, r1, 16
+#endif
+
+ /* Jump back to real return address */
+ blr
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
+
+.pushsection ".tramp.ftrace.text","aw",@progbits;
+.globl ftrace_tramp_text
+ftrace_tramp_text:
+ .space 32
+.popsection
+
+.pushsection ".tramp.ftrace.init","aw",@progbits;
+.globl ftrace_tramp_init
+ftrace_tramp_init:
+ .space 32
+.popsection
diff --git a/arch/powerpc/kernel/trace/ftrace_low.S b/arch/powerpc/kernel/trace/ftrace_low.S
deleted file mode 100644
index 294d1e0..0000000
--- a/arch/powerpc/kernel/trace/ftrace_low.S
+++ /dev/null
@@ -1,78 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Split from entry_64.S
- */
-
-#include <linux/magic.h>
-#include <asm/ppc_asm.h>
-#include <asm/asm-offsets.h>
-#include <asm/ftrace.h>
-#include <asm/ppc-opcode.h>
-#include <asm/export.h>
-
-#ifdef CONFIG_PPC64
-.pushsection ".tramp.ftrace.text","aw",@progbits;
-.globl ftrace_tramp_text
-ftrace_tramp_text:
- .space 64
-.popsection
-
-.pushsection ".tramp.ftrace.init","aw",@progbits;
-.globl ftrace_tramp_init
-ftrace_tramp_init:
- .space 64
-.popsection
-#endif
-
-_GLOBAL(mcount)
-_GLOBAL(_mcount)
-EXPORT_SYMBOL(_mcount)
- mflr r12
- mtctr r12
- mtlr r0
- bctr
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-_GLOBAL(return_to_handler)
- /* need to save return values */
-#ifdef CONFIG_PPC64
- std r4, -32(r1)
- std r3, -24(r1)
- /* save TOC */
- std r2, -16(r1)
- std r31, -8(r1)
- mr r31, r1
- stdu r1, -112(r1)
-
- /*
- * We might be called from a module.
- * Switch to our TOC to run inside the core kernel.
- */
- LOAD_PACA_TOC()
-#else
- stwu r1, -16(r1)
- stw r3, 8(r1)
- stw r4, 12(r1)
-#endif
-
- bl ftrace_return_to_handler
- nop
-
- /* return value has real return address */
- mtlr r3
-
-#ifdef CONFIG_PPC64
- ld r1, 0(r1)
- ld r4, -32(r1)
- ld r3, -24(r1)
- ld r2, -16(r1)
- ld r31, -8(r1)
-#else
- lwz r3, 8(r1)
- lwz r4, 12(r1)
- addi r1, r1, 16
-#endif
-
- /* Jump back to real return address */
- blr
-#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 7ef147e..eeff136 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -1158,7 +1158,7 @@ DEFINE_INTERRUPT_HANDLER(single_step_exception)
* pretend we got a single-step exception. This was pointed out
* by Kumar Gala. -- paulus
*/
-static void emulate_single_step(struct pt_regs *regs)
+void emulate_single_step(struct pt_regs *regs)
{
if (single_stepping(regs))
__single_step_exception(regs);
@@ -2225,21 +2225,10 @@ void __noreturn unrecoverable_exception(struct pt_regs *regs)
}
#if defined(CONFIG_BOOKE_WDT) || defined(CONFIG_40x)
-/*
- * Default handler for a Watchdog exception,
- * spins until a reboot occurs
- */
-void __attribute__ ((weak)) WatchdogHandler(struct pt_regs *regs)
-{
- /* Generic WatchdogHandler, implement your own */
- mtspr(SPRN_TCR, mfspr(SPRN_TCR)&(~TCR_WIE));
- return;
-}
-
DEFINE_INTERRUPT_HANDLER_NMI(WatchdogException)
{
printk (KERN_EMERG "PowerPC Book-E Watchdog Exception\n");
- WatchdogHandler(regs);
+ mtspr(SPRN_TCR, mfspr(SPRN_TCR) & ~TCR_WIE);
return 0;
}
#endif
diff --git a/arch/powerpc/kernel/ucall.S b/arch/powerpc/kernel/ucall.S
index 07296bc..80a1f9a4 100644
--- a/arch/powerpc/kernel/ucall.S
+++ b/arch/powerpc/kernel/ucall.S
@@ -5,8 +5,8 @@
* Copyright 2019, IBM Corporation.
*
*/
+#include <linux/export.h>
#include <asm/ppc_asm.h>
-#include <asm/export.h>
_GLOBAL(ucall_norets)
EXPORT_SYMBOL_GPL(ucall_norets)
diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S
index fcc0ad6..4094e4c 100644
--- a/arch/powerpc/kernel/vector.S
+++ b/arch/powerpc/kernel/vector.S
@@ -1,4 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/export.h>
#include <linux/linkage.h>
#include <asm/processor.h>
#include <asm/ppc_asm.h>
@@ -8,7 +9,6 @@
#include <asm/thread_info.h>
#include <asm/page.h>
#include <asm/ptrace.h>
-#include <asm/export.h>
#include <asm/asm-compat.h>
/*
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index 13614f0..1c5970d 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -107,9 +107,7 @@
#endif
/* careful! __ftr_alt_* sections need to be close to .text */
*(.text.hot .text.hot.* TEXT_MAIN .text.fixup .text.unlikely .text.unlikely.* .fixup __ftr_alt_* .ref.text);
-#ifdef CONFIG_PPC64
*(.tramp.ftrace.text);
-#endif
NOINSTR_TEXT
SCHED_TEXT
LOCK_TEXT
@@ -276,9 +274,7 @@
*/
. = ALIGN(PAGE_SIZE);
_einittext = .;
-#ifdef CONFIG_PPC64
*(.tramp.ftrace.init);
-#endif
} :text
/* .exit.text is discarded at runtime, not link time,
diff --git a/arch/powerpc/kexec/crash.c b/arch/powerpc/kexec/crash.c
index 252724e..ef5c2d2 100644
--- a/arch/powerpc/kexec/crash.c
+++ b/arch/powerpc/kexec/crash.c
@@ -350,7 +350,7 @@ EXPORT_SYMBOL(crash_shutdown_unregister);
void default_machine_crash_shutdown(struct pt_regs *regs)
{
- unsigned int i;
+ volatile unsigned int i;
int (*old_handler)(struct pt_regs *regs);
if (TRAP(regs) == INTERRUPT_SYSTEM_RESET)
diff --git a/arch/powerpc/kexec/file_load_64.c b/arch/powerpc/kexec/file_load_64.c
index 110d28b..a3de536 100644
--- a/arch/powerpc/kexec/file_load_64.c
+++ b/arch/powerpc/kexec/file_load_64.c
@@ -17,7 +17,7 @@
#include <linux/kexec.h>
#include <linux/of_fdt.h>
#include <linux/libfdt.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
#include <linux/memblock.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
@@ -27,6 +27,7 @@
#include <asm/kexec_ranges.h>
#include <asm/crashdump-ppc64.h>
#include <asm/mmzone.h>
+#include <asm/iommu.h>
#include <asm/prom.h>
#include <asm/plpks.h>
@@ -933,9 +934,9 @@ int setup_purgatory_ppc64(struct kimage *image, const void *slave_code,
}
/**
- * get_cpu_node_size - Compute the size of a CPU node in the FDT.
- * This should be done only once and the value is stored in
- * a static variable.
+ * cpu_node_size - Compute the size of a CPU node in the FDT.
+ * This should be done only once and the value is stored in
+ * a static variable.
* Returns the max size of a CPU node in the FDT.
*/
static unsigned int cpu_node_size(void)
@@ -1208,8 +1209,6 @@ int setup_new_fdt_ppc64(const struct kimage *image, void *fdt,
if (ret < 0)
goto out;
-#define DIRECT64_PROPNAME "linux,direct64-ddr-window-info"
-#define DMA64_PROPNAME "linux,dma64-ddr-window-info"
ret = update_pci_dma_nodes(fdt, DIRECT64_PROPNAME);
if (ret < 0)
goto out;
@@ -1217,8 +1216,6 @@ int setup_new_fdt_ppc64(const struct kimage *image, void *fdt,
ret = update_pci_dma_nodes(fdt, DMA64_PROPNAME);
if (ret < 0)
goto out;
-#undef DMA64_PROPNAME
-#undef DIRECT64_PROPNAME
/* Update memory reserve map */
ret = get_reserved_memory_ranges(&rmem);
diff --git a/arch/powerpc/kexec/ranges.c b/arch/powerpc/kexec/ranges.c
index 5fc53a5..fb3e12f 100644
--- a/arch/powerpc/kexec/ranges.c
+++ b/arch/powerpc/kexec/ranges.c
@@ -18,7 +18,7 @@
#include <linux/sort.h>
#include <linux/kexec.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
#include <linux/slab.h>
#include <asm/sections.h>
#include <asm/kexec_ranges.h>
diff --git a/arch/powerpc/kvm/book3s_64_entry.S b/arch/powerpc/kvm/book3s_64_entry.S
index 6c2b1d1..3b361af 100644
--- a/arch/powerpc/kvm/book3s_64_entry.S
+++ b/arch/powerpc/kvm/book3s_64_entry.S
@@ -1,9 +1,9 @@
/* SPDX-License-Identifier: GPL-2.0-only */
+#include <linux/export.h>
#include <asm/asm-offsets.h>
#include <asm/cache.h>
#include <asm/code-patching-asm.h>
#include <asm/exception-64s.h>
-#include <asm/export.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_book3s_asm.h>
#include <asm/mmu.h>
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 7f765d5..efd0ebf 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -182,7 +182,7 @@ void kvmppc_free_hpt(struct kvm_hpt_info *info)
vfree(info->rev);
info->rev = NULL;
if (info->cma)
- kvm_free_hpt_cma(virt_to_page(info->virt),
+ kvm_free_hpt_cma(virt_to_page((void *)info->virt),
1 << (info->order - PAGE_SHIFT));
else if (info->virt)
free_pages(info->virt, info->order - PAGE_SHIFT);
diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c
index ccfd969..82be6d8 100644
--- a/arch/powerpc/kvm/book3s_hv_ras.c
+++ b/arch/powerpc/kvm/book3s_hv_ras.c
@@ -9,6 +9,7 @@
#include <linux/kvm.h>
#include <linux/kvm_host.h>
#include <linux/kernel.h>
+#include <asm/lppaca.h>
#include <asm/opal.h>
#include <asm/mce.h>
#include <asm/machdep.h>
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 870110e..ea7ad20 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -10,6 +10,7 @@
* Authors: Alexander Graf <agraf@suse.de>
*/
+#include <linux/export.h>
#include <linux/linkage.h>
#include <linux/objtool.h>
#include <asm/ppc_asm.h>
@@ -24,7 +25,6 @@
#include <asm/exception-64s.h>
#include <asm/kvm_book3s_asm.h>
#include <asm/book3s/64/mmu-hash.h>
-#include <asm/export.h>
#include <asm/tm.h>
#include <asm/opal.h>
#include <asm/thread_info.h>
diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c
index d58df71..e476e10 100644
--- a/arch/powerpc/kvm/e500mc.c
+++ b/arch/powerpc/kvm/e500mc.c
@@ -20,6 +20,7 @@
#include <asm/cputable.h>
#include <asm/kvm_ppc.h>
#include <asm/dbell.h>
+#include <asm/ppc-opcode.h>
#include "booke.h"
#include "e500.h"
@@ -92,7 +93,11 @@ void kvmppc_e500_tlbil_all(struct kvmppc_vcpu_e500 *vcpu_e500)
local_irq_save(flags);
mtspr(SPRN_MAS5, MAS5_SGS | get_lpid(&vcpu_e500->vcpu));
- asm volatile("tlbilxlpid");
+ /*
+ * clang-17 and older could not assemble tlbilxlpid.
+ * https://github.com/ClangBuiltLinux/linux/issues/1891
+ */
+ asm volatile (PPC_TLBILX_LPID);
mtspr(SPRN_MAS5, 0);
local_irq_restore(flags);
}
diff --git a/arch/powerpc/kvm/tm.S b/arch/powerpc/kvm/tm.S
index 2158f61..b506c4d 100644
--- a/arch/powerpc/kvm/tm.S
+++ b/arch/powerpc/kvm/tm.S
@@ -6,10 +6,10 @@
* Copyright 2011 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
*/
+#include <linux/export.h>
#include <asm/reg.h>
#include <asm/ppc_asm.h>
#include <asm/asm-offsets.h>
-#include <asm/export.h>
#include <asm/tm.h>
#include <asm/cputable.h>
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index 9aa8286..51ad039 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -27,7 +27,7 @@
CFLAGS_code-patching.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
CFLAGS_feature-fixups.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
-obj-y += alloc.o code-patching.o feature-fixups.o pmem.o
+obj-y += code-patching.o feature-fixups.o pmem.o
obj-$(CONFIG_CODE_PATCHING_SELFTEST) += test-code-patching.o
diff --git a/arch/powerpc/lib/alloc.c b/arch/powerpc/lib/alloc.c
deleted file mode 100644
index ce18087..0000000
--- a/arch/powerpc/lib/alloc.c
+++ /dev/null
@@ -1,23 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/memblock.h>
-#include <linux/string.h>
-#include <asm/setup.h>
-
-
-void * __ref zalloc_maybe_bootmem(size_t size, gfp_t mask)
-{
- void *p;
-
- if (slab_is_available())
- p = kzalloc(size, mask);
- else {
- p = memblock_alloc(size, SMP_CACHE_BYTES);
- if (!p)
- panic("%s: Failed to allocate %zu bytes\n", __func__,
- size);
- }
- return p;
-}
diff --git a/arch/powerpc/lib/checksum_32.S b/arch/powerpc/lib/checksum_32.S
index 4541e8e..cd00b9b 100644
--- a/arch/powerpc/lib/checksum_32.S
+++ b/arch/powerpc/lib/checksum_32.S
@@ -8,12 +8,12 @@
* Severely hacked about by Paul Mackerras (paulus@cs.anu.edu.au).
*/
+#include <linux/export.h>
#include <linux/sys.h>
#include <asm/processor.h>
#include <asm/cache.h>
#include <asm/errno.h>
#include <asm/ppc_asm.h>
-#include <asm/export.h>
.text
diff --git a/arch/powerpc/lib/checksum_64.S b/arch/powerpc/lib/checksum_64.S
index 98ff51b..d53d8f0 100644
--- a/arch/powerpc/lib/checksum_64.S
+++ b/arch/powerpc/lib/checksum_64.S
@@ -8,11 +8,11 @@
* Severely hacked about by Paul Mackerras (paulus@cs.anu.edu.au).
*/
+#include <linux/export.h>
#include <linux/sys.h>
#include <asm/processor.h>
#include <asm/errno.h>
#include <asm/ppc_asm.h>
-#include <asm/export.h>
/*
* Computes the checksum of a memory block at buff, length len,
diff --git a/arch/powerpc/lib/copy_32.S b/arch/powerpc/lib/copy_32.S
index 3e9c27c..933b685 100644
--- a/arch/powerpc/lib/copy_32.S
+++ b/arch/powerpc/lib/copy_32.S
@@ -4,11 +4,11 @@
*
* Copyright (C) 1996-2005 Paul Mackerras.
*/
+#include <linux/export.h>
#include <asm/processor.h>
#include <asm/cache.h>
#include <asm/errno.h>
#include <asm/ppc_asm.h>
-#include <asm/export.h>
#include <asm/code-patching-asm.h>
#include <asm/kasan.h>
diff --git a/arch/powerpc/lib/copy_mc_64.S b/arch/powerpc/lib/copy_mc_64.S
index 88d46c4..bf1014b 100644
--- a/arch/powerpc/lib/copy_mc_64.S
+++ b/arch/powerpc/lib/copy_mc_64.S
@@ -4,9 +4,9 @@
* Derived from copyuser_power7.s by Anton Blanchard <anton@au.ibm.com>
* Author - Balbir Singh <bsingharora@gmail.com>
*/
+#include <linux/export.h>
#include <asm/ppc_asm.h>
#include <asm/errno.h>
-#include <asm/export.h>
.macro err1
100:
diff --git a/arch/powerpc/lib/copypage_64.S b/arch/powerpc/lib/copypage_64.S
index 5d09a02..f33a2e6 100644
--- a/arch/powerpc/lib/copypage_64.S
+++ b/arch/powerpc/lib/copypage_64.S
@@ -2,11 +2,11 @@
/*
* Copyright (C) 2008 Mark Nelson, IBM Corp.
*/
+#include <linux/export.h>
#include <asm/page.h>
#include <asm/processor.h>
#include <asm/ppc_asm.h>
#include <asm/asm-offsets.h>
-#include <asm/export.h>
#include <asm/feature-fixups.h>
_GLOBAL_TOC(copy_page)
diff --git a/arch/powerpc/lib/copyuser_64.S b/arch/powerpc/lib/copyuser_64.S
index db8719a..9af969d 100644
--- a/arch/powerpc/lib/copyuser_64.S
+++ b/arch/powerpc/lib/copyuser_64.S
@@ -2,9 +2,9 @@
/*
* Copyright (C) 2002 Paul Mackerras, IBM Corp.
*/
+#include <linux/export.h>
#include <asm/processor.h>
#include <asm/ppc_asm.h>
-#include <asm/export.h>
#include <asm/asm-compat.h>
#include <asm/feature-fixups.h>
diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c
index 80def1c..4f82581 100644
--- a/arch/powerpc/lib/feature-fixups.c
+++ b/arch/powerpc/lib/feature-fixups.c
@@ -67,7 +67,8 @@ static int patch_alt_instruction(u32 *src, u32 *dest, u32 *alt_start, u32 *alt_e
return 0;
}
-static int patch_feature_section(unsigned long value, struct fixup_entry *fcur)
+static int patch_feature_section_mask(unsigned long value, unsigned long mask,
+ struct fixup_entry *fcur)
{
u32 *start, *end, *alt_start, *alt_end, *src, *dest;
@@ -79,7 +80,7 @@ static int patch_feature_section(unsigned long value, struct fixup_entry *fcur)
if ((alt_end - alt_start) > (end - start))
return 1;
- if ((value & fcur->mask) == fcur->value)
+ if ((value & fcur->mask & mask) == (fcur->value & mask))
return 0;
src = alt_start;
@@ -97,7 +98,8 @@ static int patch_feature_section(unsigned long value, struct fixup_entry *fcur)
return 0;
}
-void do_feature_fixups(unsigned long value, void *fixup_start, void *fixup_end)
+static void do_feature_fixups_mask(unsigned long value, unsigned long mask,
+ void *fixup_start, void *fixup_end)
{
struct fixup_entry *fcur, *fend;
@@ -105,7 +107,7 @@ void do_feature_fixups(unsigned long value, void *fixup_start, void *fixup_end)
fend = fixup_end;
for (; fcur < fend; fcur++) {
- if (patch_feature_section(value, fcur)) {
+ if (patch_feature_section_mask(value, mask, fcur)) {
WARN_ON(1);
printk("Unable to patch feature section at %p - %p" \
" with %p - %p\n",
@@ -117,6 +119,11 @@ void do_feature_fixups(unsigned long value, void *fixup_start, void *fixup_end)
}
}
+void do_feature_fixups(unsigned long value, void *fixup_start, void *fixup_end)
+{
+ do_feature_fixups_mask(value, ~0, fixup_start, fixup_end);
+}
+
#ifdef CONFIG_PPC_BARRIER_NOSPEC
static bool is_fixup_addr_valid(void *dest, size_t size)
{
@@ -651,6 +658,17 @@ void __init apply_feature_fixups(void)
do_final_fixups();
}
+void __init update_mmu_feature_fixups(unsigned long mask)
+{
+ saved_mmu_features &= ~mask;
+ saved_mmu_features |= cur_cpu_spec->mmu_features & mask;
+
+ do_feature_fixups_mask(cur_cpu_spec->mmu_features, mask,
+ PTRRELOC(&__start___mmu_ftr_fixup),
+ PTRRELOC(&__stop___mmu_ftr_fixup));
+ mmu_feature_keys_init();
+}
+
void __init setup_feature_keys(void)
{
/*
@@ -683,6 +701,11 @@ late_initcall(check_features);
#define check(x) \
if (!(x)) printk("feature-fixups: test failed at line %d\n", __LINE__);
+static int patch_feature_section(unsigned long value, struct fixup_entry *fcur)
+{
+ return patch_feature_section_mask(value, ~0, fcur);
+}
+
/* This must be after the text it fixes up, vmlinux.lds.S enforces that atm */
static struct fixup_entry fixup;
diff --git a/arch/powerpc/lib/hweight_64.S b/arch/powerpc/lib/hweight_64.S
index 09af295..1518750 100644
--- a/arch/powerpc/lib/hweight_64.S
+++ b/arch/powerpc/lib/hweight_64.S
@@ -5,9 +5,9 @@
*
* Author: Anton Blanchard <anton@au.ibm.com>
*/
+#include <linux/export.h>
#include <asm/processor.h>
#include <asm/ppc_asm.h>
-#include <asm/export.h>
#include <asm/feature-fixups.h>
/* Note: This code relies on -mminimal-toc */
diff --git a/arch/powerpc/lib/mem_64.S b/arch/powerpc/lib/mem_64.S
index 9351ffa..6fd06cd 100644
--- a/arch/powerpc/lib/mem_64.S
+++ b/arch/powerpc/lib/mem_64.S
@@ -4,10 +4,10 @@
*
* Copyright (C) 1996 Paul Mackerras.
*/
+#include <linux/export.h>
#include <asm/processor.h>
#include <asm/errno.h>
#include <asm/ppc_asm.h>
-#include <asm/export.h>
#include <asm/kasan.h>
#ifndef CONFIG_KASAN
diff --git a/arch/powerpc/lib/memcmp_32.S b/arch/powerpc/lib/memcmp_32.S
index 5010e37..f6fca56 100644
--- a/arch/powerpc/lib/memcmp_32.S
+++ b/arch/powerpc/lib/memcmp_32.S
@@ -7,8 +7,8 @@
*
*/
+#include <linux/export.h>
#include <asm/ppc_asm.h>
-#include <asm/export.h>
.text
diff --git a/arch/powerpc/lib/memcmp_64.S b/arch/powerpc/lib/memcmp_64.S
index 0b9b168..142c666 100644
--- a/arch/powerpc/lib/memcmp_64.S
+++ b/arch/powerpc/lib/memcmp_64.S
@@ -3,8 +3,8 @@
* Author: Anton Blanchard <anton@au.ibm.com>
* Copyright 2015 IBM Corporation.
*/
+#include <linux/export.h>
#include <asm/ppc_asm.h>
-#include <asm/export.h>
#include <asm/ppc-opcode.h>
#define off8 r6
diff --git a/arch/powerpc/lib/memcpy_64.S b/arch/powerpc/lib/memcpy_64.S
index 016c91e..b5a67e2 100644
--- a/arch/powerpc/lib/memcpy_64.S
+++ b/arch/powerpc/lib/memcpy_64.S
@@ -2,9 +2,9 @@
/*
* Copyright (C) 2002 Paul Mackerras, IBM Corp.
*/
+#include <linux/export.h>
#include <asm/processor.h>
#include <asm/ppc_asm.h>
-#include <asm/export.h>
#include <asm/asm-compat.h>
#include <asm/feature-fixups.h>
#include <asm/kasan.h>
diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index 38158b7..a4ab862 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -485,7 +485,7 @@ write_mem_aligned(unsigned long val, unsigned long ea, int nb, struct pt_regs *r
* Copy from a buffer to userspace, using the largest possible
* aligned accesses, up to sizeof(long).
*/
-static nokprobe_inline int __copy_mem_out(u8 *dest, unsigned long ea, int nb, struct pt_regs *regs)
+static __always_inline int __copy_mem_out(u8 *dest, unsigned long ea, int nb, struct pt_regs *regs)
{
int c;
@@ -1043,7 +1043,7 @@ static nokprobe_inline int do_vsx_store(struct instruction_op *op,
}
#endif /* CONFIG_VSX */
-static int __emulate_dcbz(unsigned long ea)
+static __always_inline int __emulate_dcbz(unsigned long ea)
{
unsigned long i;
unsigned long size = l1_dcache_bytes();
diff --git a/arch/powerpc/lib/string.S b/arch/powerpc/lib/string.S
index 2752b1c..daa7206 100644
--- a/arch/powerpc/lib/string.S
+++ b/arch/powerpc/lib/string.S
@@ -4,8 +4,8 @@
*
* Copyright (C) 1996 Paul Mackerras.
*/
+#include <linux/export.h>
#include <asm/ppc_asm.h>
-#include <asm/export.h>
#include <asm/cache.h>
.text
diff --git a/arch/powerpc/lib/string_32.S b/arch/powerpc/lib/string_32.S
index 1ddb263..3ee4561 100644
--- a/arch/powerpc/lib/string_32.S
+++ b/arch/powerpc/lib/string_32.S
@@ -7,8 +7,8 @@
*
*/
+#include <linux/export.h>
#include <asm/ppc_asm.h>
-#include <asm/export.h>
#include <asm/cache.h>
.text
diff --git a/arch/powerpc/lib/string_64.S b/arch/powerpc/lib/string_64.S
index df41ce0..a25eb85 100644
--- a/arch/powerpc/lib/string_64.S
+++ b/arch/powerpc/lib/string_64.S
@@ -6,10 +6,10 @@
* Author: Anton Blanchard <anton@au.ibm.com>
*/
+#include <linux/export.h>
#include <asm/ppc_asm.h>
#include <asm/linkage.h>
#include <asm/asm-offsets.h>
-#include <asm/export.h>
/**
* __arch_clear_user: - Zero a block of memory in user space, with less checking.
diff --git a/arch/powerpc/lib/strlen_32.S b/arch/powerpc/lib/strlen_32.S
index 0a8d3f6..bbd24fe 100644
--- a/arch/powerpc/lib/strlen_32.S
+++ b/arch/powerpc/lib/strlen_32.S
@@ -6,8 +6,8 @@
*
* Inspired from glibc implementation
*/
+#include <linux/export.h>
#include <asm/ppc_asm.h>
-#include <asm/export.h>
#include <asm/cache.h>
.text
diff --git a/arch/powerpc/mm/book3s32/hash_low.S b/arch/powerpc/mm/book3s32/hash_low.S
index a5a21d4..8b804e1 100644
--- a/arch/powerpc/mm/book3s32/hash_low.S
+++ b/arch/powerpc/mm/book3s32/hash_low.S
@@ -14,6 +14,7 @@
* hash table, so this file is not used on them.)
*/
+#include <linux/export.h>
#include <linux/pgtable.h>
#include <linux/init.h>
#include <asm/reg.h>
@@ -22,7 +23,6 @@
#include <asm/ppc_asm.h>
#include <asm/thread_info.h>
#include <asm/asm-offsets.h>
-#include <asm/export.h>
#include <asm/feature-fixups.h>
#include <asm/code-patching-asm.h>
diff --git a/arch/powerpc/mm/book3s32/kuap.c b/arch/powerpc/mm/book3s32/kuap.c
index 28676ca..3a88155 100644
--- a/arch/powerpc/mm/book3s32/kuap.c
+++ b/arch/powerpc/mm/book3s32/kuap.c
@@ -3,25 +3,11 @@
#include <asm/kup.h>
#include <asm/smp.h>
-struct static_key_false disable_kuap_key;
-EXPORT_SYMBOL(disable_kuap_key);
-
-void kuap_lock_all_ool(void)
-{
- kuap_lock_all();
-}
-EXPORT_SYMBOL(kuap_lock_all_ool);
-
-void kuap_unlock_all_ool(void)
-{
- kuap_unlock_all();
-}
-EXPORT_SYMBOL(kuap_unlock_all_ool);
-
void setup_kuap(bool disabled)
{
if (!disabled) {
- kuap_lock_all_ool();
+ update_user_segments(mfsr(0) | SR_KS);
+ isync(); /* Context sync required after mtsr() */
init_mm.context.sr0 |= SR_KS;
current->thread.sr0 |= SR_KS;
}
@@ -30,7 +16,7 @@ void setup_kuap(bool disabled)
return;
if (disabled)
- static_branch_enable(&disable_kuap_key);
+ cur_cpu_spec->mmu_features &= ~MMU_FTR_KUAP;
else
pr_info("Activating Kernel Userspace Access Protection\n");
}
diff --git a/arch/powerpc/mm/book3s32/mmu_context.c b/arch/powerpc/mm/book3s32/mmu_context.c
index 269a3eb2..1922f9a 100644
--- a/arch/powerpc/mm/book3s32/mmu_context.c
+++ b/arch/powerpc/mm/book3s32/mmu_context.c
@@ -71,7 +71,7 @@ int init_new_context(struct task_struct *t, struct mm_struct *mm)
mm->context.id = __init_new_context();
mm->context.sr0 = CTX_TO_VSID(mm->context.id, 0);
- if (!kuep_is_disabled())
+ if (IS_ENABLED(CONFIG_PPC_KUEP))
mm->context.sr0 |= SR_NX;
if (!kuap_is_disabled())
mm->context.sr0 |= SR_KS;
diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c
index 1498ccd..8f8a62d3 100644
--- a/arch/powerpc/mm/book3s64/pgtable.c
+++ b/arch/powerpc/mm/book3s64/pgtable.c
@@ -9,6 +9,7 @@
#include <linux/memremap.h>
#include <linux/pkeys.h>
#include <linux/debugfs.h>
+#include <linux/proc_fs.h>
#include <misc/cxl-base.h>
#include <asm/pgalloc.h>
diff --git a/arch/powerpc/mm/book3s64/pkeys.c b/arch/powerpc/mm/book3s64/pkeys.c
index 1d2675a..1257339 100644
--- a/arch/powerpc/mm/book3s64/pkeys.c
+++ b/arch/powerpc/mm/book3s64/pkeys.c
@@ -291,7 +291,7 @@ void setup_kuap(bool disabled)
if (smp_processor_id() == boot_cpuid) {
pr_info("Activating Kernel Userspace Access Prevention\n");
- cur_cpu_spec->mmu_features |= MMU_FTR_BOOK3S_KUAP;
+ cur_cpu_spec->mmu_features |= MMU_FTR_KUAP;
}
/*
diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
index 9667901..c6a4ac7 100644
--- a/arch/powerpc/mm/book3s64/radix_pgtable.c
+++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
@@ -37,7 +37,6 @@
#include <mm/mmu_decl.h>
unsigned int mmu_base_pid;
-unsigned long radix_mem_block_size __ro_after_init;
static __ref void *early_alloc_pgtable(unsigned long size, int nid,
unsigned long region_start, unsigned long region_end)
@@ -300,7 +299,7 @@ static int __meminit create_physical_mapping(unsigned long start,
bool prev_exec, exec = false;
pgprot_t prot;
int psize;
- unsigned long max_mapping_size = radix_mem_block_size;
+ unsigned long max_mapping_size = memory_block_size;
if (debug_pagealloc_enabled_or_kfence())
max_mapping_size = PAGE_SIZE;
@@ -502,58 +501,6 @@ static int __init radix_dt_scan_page_sizes(unsigned long node,
return 1;
}
-#ifdef CONFIG_MEMORY_HOTPLUG
-static int __init probe_memory_block_size(unsigned long node, const char *uname, int
- depth, void *data)
-{
- unsigned long *mem_block_size = (unsigned long *)data;
- const __be32 *prop;
- int len;
-
- if (depth != 1)
- return 0;
-
- if (strcmp(uname, "ibm,dynamic-reconfiguration-memory"))
- return 0;
-
- prop = of_get_flat_dt_prop(node, "ibm,lmb-size", &len);
-
- if (!prop || len < dt_root_size_cells * sizeof(__be32))
- /*
- * Nothing in the device tree
- */
- *mem_block_size = MIN_MEMORY_BLOCK_SIZE;
- else
- *mem_block_size = of_read_number(prop, dt_root_size_cells);
- return 1;
-}
-
-static unsigned long __init radix_memory_block_size(void)
-{
- unsigned long mem_block_size = MIN_MEMORY_BLOCK_SIZE;
-
- /*
- * OPAL firmware feature is set by now. Hence we are ok
- * to test OPAL feature.
- */
- if (firmware_has_feature(FW_FEATURE_OPAL))
- mem_block_size = 1UL * 1024 * 1024 * 1024;
- else
- of_scan_flat_dt(probe_memory_block_size, &mem_block_size);
-
- return mem_block_size;
-}
-
-#else /* CONFIG_MEMORY_HOTPLUG */
-
-static unsigned long __init radix_memory_block_size(void)
-{
- return 1UL * 1024 * 1024 * 1024;
-}
-
-#endif /* CONFIG_MEMORY_HOTPLUG */
-
-
void __init radix__early_init_devtree(void)
{
int rc;
@@ -577,16 +524,6 @@ void __init radix__early_init_devtree(void)
mmu_psize_defs[MMU_PAGE_64K].h_rpt_pgsize =
psize_to_rpti_pgsize(MMU_PAGE_64K);
}
-
- /*
- * Max mapping size used when mapping pages. We don't use
- * ppc_md.memory_block_size() here because this get called
- * early and we don't have machine probe called yet. Also
- * the pseries implementation only check for ibm,lmb-size.
- * All hypervisor supporting radix do expose that device
- * tree node.
- */
- radix_mem_block_size = radix_memory_block_size();
return;
}
diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c
index 3020a8b..39acc2c 100644
--- a/arch/powerpc/mm/book3s64/radix_tlb.c
+++ b/arch/powerpc/mm/book3s64/radix_tlb.c
@@ -127,21 +127,6 @@ static __always_inline void __tlbie_pid(unsigned long pid, unsigned long ric)
trace_tlbie(0, 0, rb, rs, ric, prs, r);
}
-static __always_inline void __tlbie_pid_lpid(unsigned long pid,
- unsigned long lpid,
- unsigned long ric)
-{
- unsigned long rb, rs, prs, r;
-
- rb = PPC_BIT(53); /* IS = 1 */
- rs = (pid << PPC_BITLSHIFT(31)) | (lpid & ~(PPC_BITMASK(0, 31)));
- prs = 1; /* process scoped */
- r = 1; /* radix format */
-
- asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
- : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
- trace_tlbie(0, 0, rb, rs, ric, prs, r);
-}
static __always_inline void __tlbie_lpid(unsigned long lpid, unsigned long ric)
{
unsigned long rb,rs,prs,r;
@@ -202,23 +187,6 @@ static __always_inline void __tlbie_va(unsigned long va, unsigned long pid,
trace_tlbie(0, 0, rb, rs, ric, prs, r);
}
-static __always_inline void __tlbie_va_lpid(unsigned long va, unsigned long pid,
- unsigned long lpid,
- unsigned long ap, unsigned long ric)
-{
- unsigned long rb, rs, prs, r;
-
- rb = va & ~(PPC_BITMASK(52, 63));
- rb |= ap << PPC_BITLSHIFT(58);
- rs = (pid << PPC_BITLSHIFT(31)) | (lpid & ~(PPC_BITMASK(0, 31)));
- prs = 1; /* process scoped */
- r = 1; /* radix format */
-
- asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
- : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
- trace_tlbie(0, 0, rb, rs, ric, prs, r);
-}
-
static __always_inline void __tlbie_lpid_va(unsigned long va, unsigned long lpid,
unsigned long ap, unsigned long ric)
{
@@ -264,22 +232,6 @@ static inline void fixup_tlbie_va_range(unsigned long va, unsigned long pid,
}
}
-static inline void fixup_tlbie_va_range_lpid(unsigned long va,
- unsigned long pid,
- unsigned long lpid,
- unsigned long ap)
-{
- if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
- asm volatile("ptesync" : : : "memory");
- __tlbie_pid_lpid(0, lpid, RIC_FLUSH_TLB);
- }
-
- if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
- asm volatile("ptesync" : : : "memory");
- __tlbie_va_lpid(va, pid, lpid, ap, RIC_FLUSH_TLB);
- }
-}
-
static inline void fixup_tlbie_pid(unsigned long pid)
{
/*
@@ -299,26 +251,6 @@ static inline void fixup_tlbie_pid(unsigned long pid)
}
}
-static inline void fixup_tlbie_pid_lpid(unsigned long pid, unsigned long lpid)
-{
- /*
- * We can use any address for the invalidation, pick one which is
- * probably unused as an optimisation.
- */
- unsigned long va = ((1UL << 52) - 1);
-
- if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
- asm volatile("ptesync" : : : "memory");
- __tlbie_pid_lpid(0, lpid, RIC_FLUSH_TLB);
- }
-
- if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
- asm volatile("ptesync" : : : "memory");
- __tlbie_va_lpid(va, pid, lpid, mmu_get_ap(MMU_PAGE_64K),
- RIC_FLUSH_TLB);
- }
-}
-
static inline void fixup_tlbie_lpid_va(unsigned long va, unsigned long lpid,
unsigned long ap)
{
@@ -416,31 +348,6 @@ static inline void _tlbie_pid(unsigned long pid, unsigned long ric)
asm volatile("eieio; tlbsync; ptesync": : :"memory");
}
-static inline void _tlbie_pid_lpid(unsigned long pid, unsigned long lpid,
- unsigned long ric)
-{
- asm volatile("ptesync" : : : "memory");
-
- /*
- * Workaround the fact that the "ric" argument to __tlbie_pid
- * must be a compile-time contraint to match the "i" constraint
- * in the asm statement.
- */
- switch (ric) {
- case RIC_FLUSH_TLB:
- __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB);
- fixup_tlbie_pid_lpid(pid, lpid);
- break;
- case RIC_FLUSH_PWC:
- __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC);
- break;
- case RIC_FLUSH_ALL:
- default:
- __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_ALL);
- fixup_tlbie_pid_lpid(pid, lpid);
- }
- asm volatile("eieio; tlbsync; ptesync" : : : "memory");
-}
struct tlbiel_pid {
unsigned long pid;
unsigned long ric;
@@ -566,20 +473,6 @@ static inline void __tlbie_va_range(unsigned long start, unsigned long end,
fixup_tlbie_va_range(addr - page_size, pid, ap);
}
-static inline void __tlbie_va_range_lpid(unsigned long start, unsigned long end,
- unsigned long pid, unsigned long lpid,
- unsigned long page_size,
- unsigned long psize)
-{
- unsigned long addr;
- unsigned long ap = mmu_get_ap(psize);
-
- for (addr = start; addr < end; addr += page_size)
- __tlbie_va_lpid(addr, pid, lpid, ap, RIC_FLUSH_TLB);
-
- fixup_tlbie_va_range_lpid(addr - page_size, pid, lpid, ap);
-}
-
static __always_inline void _tlbie_va(unsigned long va, unsigned long pid,
unsigned long psize, unsigned long ric)
{
@@ -660,18 +553,6 @@ static inline void _tlbie_va_range(unsigned long start, unsigned long end,
asm volatile("eieio; tlbsync; ptesync": : :"memory");
}
-static inline void _tlbie_va_range_lpid(unsigned long start, unsigned long end,
- unsigned long pid, unsigned long lpid,
- unsigned long page_size,
- unsigned long psize, bool also_pwc)
-{
- asm volatile("ptesync" : : : "memory");
- if (also_pwc)
- __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC);
- __tlbie_va_range_lpid(start, end, pid, lpid, page_size, psize);
- asm volatile("eieio; tlbsync; ptesync" : : : "memory");
-}
-
static inline void _tlbiel_va_range_multicast(struct mm_struct *mm,
unsigned long start, unsigned long end,
unsigned long pid, unsigned long page_size,
@@ -820,7 +701,7 @@ void exit_lazy_flush_tlb(struct mm_struct *mm, bool always_flush)
* that's what the caller expects.
*/
if (cpumask_test_cpu(cpu, mm_cpumask(mm))) {
- atomic_dec(&mm->context.active_cpus);
+ dec_mm_active_cpus(mm);
cpumask_clear_cpu(cpu, mm_cpumask(mm));
always_flush = true;
}
@@ -1316,7 +1197,35 @@ void radix__tlb_flush(struct mmu_gather *tlb)
* See the comment for radix in arch_exit_mmap().
*/
if (tlb->fullmm) {
- __flush_all_mm(mm, true);
+ if (IS_ENABLED(CONFIG_MMU_LAZY_TLB_SHOOTDOWN)) {
+ /*
+ * Shootdown based lazy tlb mm refcounting means we
+ * have to IPI everyone in the mm_cpumask anyway soon
+ * when the mm goes away, so might as well do it as
+ * part of the final flush now.
+ *
+ * If lazy shootdown was improved to reduce IPIs (e.g.,
+ * by batching), then it may end up being better to use
+ * tlbies here instead.
+ */
+ preempt_disable();
+
+ smp_mb(); /* see radix__flush_tlb_mm */
+ exit_flush_lazy_tlbs(mm);
+ _tlbiel_pid(mm->context.id, RIC_FLUSH_ALL);
+
+ /*
+ * It should not be possible to have coprocessors still
+ * attached here.
+ */
+ if (WARN_ON_ONCE(atomic_read(&mm->context.copros) > 0))
+ __flush_all_mm(mm, true);
+
+ preempt_enable();
+ } else {
+ __flush_all_mm(mm, true);
+ }
+
} else if ( (psize = radix_get_mmu_psize(page_size)) == -1) {
if (!tlb->freed_tables)
radix__flush_tlb_mm(mm);
@@ -1497,6 +1406,127 @@ void radix__flush_tlb_all(void)
}
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+static __always_inline void __tlbie_pid_lpid(unsigned long pid,
+ unsigned long lpid,
+ unsigned long ric)
+{
+ unsigned long rb, rs, prs, r;
+
+ rb = PPC_BIT(53); /* IS = 1 */
+ rs = (pid << PPC_BITLSHIFT(31)) | (lpid & ~(PPC_BITMASK(0, 31)));
+ prs = 1; /* process scoped */
+ r = 1; /* radix format */
+
+ asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
+ : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
+ trace_tlbie(0, 0, rb, rs, ric, prs, r);
+}
+
+static __always_inline void __tlbie_va_lpid(unsigned long va, unsigned long pid,
+ unsigned long lpid,
+ unsigned long ap, unsigned long ric)
+{
+ unsigned long rb, rs, prs, r;
+
+ rb = va & ~(PPC_BITMASK(52, 63));
+ rb |= ap << PPC_BITLSHIFT(58);
+ rs = (pid << PPC_BITLSHIFT(31)) | (lpid & ~(PPC_BITMASK(0, 31)));
+ prs = 1; /* process scoped */
+ r = 1; /* radix format */
+
+ asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
+ : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
+ trace_tlbie(0, 0, rb, rs, ric, prs, r);
+}
+
+static inline void fixup_tlbie_pid_lpid(unsigned long pid, unsigned long lpid)
+{
+ /*
+ * We can use any address for the invalidation, pick one which is
+ * probably unused as an optimisation.
+ */
+ unsigned long va = ((1UL << 52) - 1);
+
+ if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
+ asm volatile("ptesync" : : : "memory");
+ __tlbie_pid_lpid(0, lpid, RIC_FLUSH_TLB);
+ }
+
+ if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
+ asm volatile("ptesync" : : : "memory");
+ __tlbie_va_lpid(va, pid, lpid, mmu_get_ap(MMU_PAGE_64K),
+ RIC_FLUSH_TLB);
+ }
+}
+
+static inline void _tlbie_pid_lpid(unsigned long pid, unsigned long lpid,
+ unsigned long ric)
+{
+ asm volatile("ptesync" : : : "memory");
+
+ /*
+ * Workaround the fact that the "ric" argument to __tlbie_pid
+ * must be a compile-time contraint to match the "i" constraint
+ * in the asm statement.
+ */
+ switch (ric) {
+ case RIC_FLUSH_TLB:
+ __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB);
+ fixup_tlbie_pid_lpid(pid, lpid);
+ break;
+ case RIC_FLUSH_PWC:
+ __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC);
+ break;
+ case RIC_FLUSH_ALL:
+ default:
+ __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_ALL);
+ fixup_tlbie_pid_lpid(pid, lpid);
+ }
+ asm volatile("eieio; tlbsync; ptesync" : : : "memory");
+}
+
+static inline void fixup_tlbie_va_range_lpid(unsigned long va,
+ unsigned long pid,
+ unsigned long lpid,
+ unsigned long ap)
+{
+ if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
+ asm volatile("ptesync" : : : "memory");
+ __tlbie_pid_lpid(0, lpid, RIC_FLUSH_TLB);
+ }
+
+ if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
+ asm volatile("ptesync" : : : "memory");
+ __tlbie_va_lpid(va, pid, lpid, ap, RIC_FLUSH_TLB);
+ }
+}
+
+static inline void __tlbie_va_range_lpid(unsigned long start, unsigned long end,
+ unsigned long pid, unsigned long lpid,
+ unsigned long page_size,
+ unsigned long psize)
+{
+ unsigned long addr;
+ unsigned long ap = mmu_get_ap(psize);
+
+ for (addr = start; addr < end; addr += page_size)
+ __tlbie_va_lpid(addr, pid, lpid, ap, RIC_FLUSH_TLB);
+
+ fixup_tlbie_va_range_lpid(addr - page_size, pid, lpid, ap);
+}
+
+static inline void _tlbie_va_range_lpid(unsigned long start, unsigned long end,
+ unsigned long pid, unsigned long lpid,
+ unsigned long page_size,
+ unsigned long psize, bool also_pwc)
+{
+ asm volatile("ptesync" : : : "memory");
+ if (also_pwc)
+ __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC);
+ __tlbie_va_range_lpid(start, end, pid, lpid, page_size, psize);
+ asm volatile("eieio; tlbsync; ptesync" : : : "memory");
+}
+
/*
* Performs process-scoped invalidations for a given LPID
* as part of H_RPT_INVALIDATE hcall.
diff --git a/arch/powerpc/mm/book3s64/slb.c b/arch/powerpc/mm/book3s64/slb.c
index 6956f63..f2708c8 100644
--- a/arch/powerpc/mm/book3s64/slb.c
+++ b/arch/powerpc/mm/book3s64/slb.c
@@ -13,6 +13,7 @@
#include <asm/mmu.h>
#include <asm/mmu_context.h>
#include <asm/paca.h>
+#include <asm/lppaca.h>
#include <asm/ppc-opcode.h>
#include <asm/cputable.h>
#include <asm/cacheflush.h>
diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c
index d4cc374..d8adc45 100644
--- a/arch/powerpc/mm/init_32.c
+++ b/arch/powerpc/mm/init_32.c
@@ -126,6 +126,8 @@ void __init MMU_init(void)
setup_kup();
+ update_mmu_feature_fixups(MMU_FTR_KUAP);
+
/* Shortly after that, the entire linear mapping will be available */
memblock_set_current_limit(lowmem_end_addr);
}
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index e0208cb..d96bbc0 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -40,6 +40,7 @@
#include <linux/of_fdt.h>
#include <linux/libfdt.h>
#include <linux/memremap.h>
+#include <linux/memory.h>
#include <asm/pgalloc.h>
#include <asm/page.h>
@@ -493,6 +494,130 @@ static int __init dt_scan_mmu_pid_width(unsigned long node,
return 1;
}
+/*
+ * Outside hotplug the kernel uses this value to map the kernel direct map
+ * with radix. To be compatible with older kernels, let's keep this value
+ * as 16M which is also SECTION_SIZE with SPARSEMEM. We can ideally map
+ * things with 1GB size in the case where we don't support hotplug.
+ */
+#ifndef CONFIG_MEMORY_HOTPLUG
+#define DEFAULT_MEMORY_BLOCK_SIZE SZ_16M
+#else
+#define DEFAULT_MEMORY_BLOCK_SIZE MIN_MEMORY_BLOCK_SIZE
+#endif
+
+static void update_memory_block_size(unsigned long *block_size, unsigned long mem_size)
+{
+ unsigned long min_memory_block_size = DEFAULT_MEMORY_BLOCK_SIZE;
+
+ for (; *block_size > min_memory_block_size; *block_size >>= 2) {
+ if ((mem_size & *block_size) == 0)
+ break;
+ }
+}
+
+static int __init probe_memory_block_size(unsigned long node, const char *uname, int
+ depth, void *data)
+{
+ const char *type;
+ unsigned long *block_size = (unsigned long *)data;
+ const __be32 *reg, *endp;
+ int l;
+
+ if (depth != 1)
+ return 0;
+ /*
+ * If we have dynamic-reconfiguration-memory node, use the
+ * lmb value.
+ */
+ if (strcmp(uname, "ibm,dynamic-reconfiguration-memory") == 0) {
+
+ const __be32 *prop;
+
+ prop = of_get_flat_dt_prop(node, "ibm,lmb-size", &l);
+
+ if (!prop || l < dt_root_size_cells * sizeof(__be32))
+ /*
+ * Nothing in the device tree
+ */
+ *block_size = DEFAULT_MEMORY_BLOCK_SIZE;
+ else
+ *block_size = of_read_number(prop, dt_root_size_cells);
+ /*
+ * We have found the final value. Don't probe further.
+ */
+ return 1;
+ }
+ /*
+ * Find all the device tree nodes of memory type and make sure
+ * the area can be mapped using the memory block size value
+ * we end up using. We start with 1G value and keep reducing
+ * it such that we can map the entire area using memory_block_size.
+ * This will be used on powernv and older pseries that don't
+ * have ibm,lmb-size node.
+ * For ex: with P5 we can end up with
+ * memory@0 -> 128MB
+ * memory@128M -> 64M
+ * This will end up using 64MB memory block size value.
+ */
+ type = of_get_flat_dt_prop(node, "device_type", NULL);
+ if (type == NULL || strcmp(type, "memory") != 0)
+ return 0;
+
+ reg = of_get_flat_dt_prop(node, "linux,usable-memory", &l);
+ if (!reg)
+ reg = of_get_flat_dt_prop(node, "reg", &l);
+ if (!reg)
+ return 0;
+
+ endp = reg + (l / sizeof(__be32));
+ while ((endp - reg) >= (dt_root_addr_cells + dt_root_size_cells)) {
+ const char *compatible;
+ u64 size;
+
+ dt_mem_next_cell(dt_root_addr_cells, ®);
+ size = dt_mem_next_cell(dt_root_size_cells, ®);
+
+ if (size) {
+ update_memory_block_size(block_size, size);
+ continue;
+ }
+ /*
+ * ibm,coherent-device-memory with linux,usable-memory = 0
+ * Force 256MiB block size. Work around for GPUs on P9 PowerNV
+ * linux,usable-memory == 0 implies driver managed memory and
+ * we can't use large memory block size due to hotplug/unplug
+ * limitations.
+ */
+ compatible = of_get_flat_dt_prop(node, "compatible", NULL);
+ if (compatible && !strcmp(compatible, "ibm,coherent-device-memory")) {
+ if (*block_size > SZ_256M)
+ *block_size = SZ_256M;
+ /*
+ * We keep 256M as the upper limit with GPU present.
+ */
+ return 0;
+ }
+ }
+ /* continue looking for other memory device types */
+ return 0;
+}
+
+/*
+ * start with 1G memory block size. Early init will
+ * fix this with correct value.
+ */
+unsigned long memory_block_size __ro_after_init = 1UL << 30;
+static void __init early_init_memory_block_size(void)
+{
+ /*
+ * We need to do memory_block_size probe early so that
+ * radix__early_init_mmu() can use this as limit for
+ * mapping page size.
+ */
+ of_scan_flat_dt(probe_memory_block_size, &memory_block_size);
+}
+
void __init mmu_early_init_devtree(void)
{
bool hvmode = !!(mfmsr() & MSR_HV);
@@ -526,6 +651,8 @@ void __init mmu_early_init_devtree(void)
if (!hvmode)
early_check_vec5();
+ early_init_memory_block_size();
+
if (early_radix_enabled()) {
radix__early_init_devtree();
diff --git a/arch/powerpc/mm/mmu_context.c b/arch/powerpc/mm/mmu_context.c
index 1fb9c99..b24c190 100644
--- a/arch/powerpc/mm/mmu_context.c
+++ b/arch/powerpc/mm/mmu_context.c
@@ -43,11 +43,13 @@ static inline void switch_mm_pgdir(struct task_struct *tsk,
void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
struct task_struct *tsk)
{
+ int cpu = smp_processor_id();
bool new_on_cpu = false;
/* Mark this context has been used on the new CPU */
- if (!cpumask_test_cpu(smp_processor_id(), mm_cpumask(next))) {
- cpumask_set_cpu(smp_processor_id(), mm_cpumask(next));
+ if (!cpumask_test_cpu(cpu, mm_cpumask(next))) {
+ VM_WARN_ON_ONCE(next == &init_mm);
+ cpumask_set_cpu(cpu, mm_cpumask(next));
inc_mm_active_cpus(next);
/*
@@ -100,6 +102,8 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
* sub architectures. Out of line for now
*/
switch_mmu_context(prev, next, tsk);
+
+ VM_WARN_ON_ONCE(!cpumask_test_cpu(cpu, mm_cpumask(prev)));
}
#ifndef CONFIG_PPC_BOOK3S_64
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h
index c6dccb4..7f9ff064 100644
--- a/arch/powerpc/mm/mmu_decl.h
+++ b/arch/powerpc/mm/mmu_decl.h
@@ -110,6 +110,7 @@ extern void MMU_init_hw(void);
void MMU_init_hw_patch(void);
unsigned long mmu_mapin_ram(unsigned long base, unsigned long top);
#endif
+void mmu_init_secondary(int cpu);
#ifdef CONFIG_PPC_E500
extern unsigned long map_mem_in_cams(unsigned long ram, int max_cam_idx,
diff --git a/arch/powerpc/mm/nohash/kup.c b/arch/powerpc/mm/nohash/kup.c
index 552becf..e1f7de2 100644
--- a/arch/powerpc/mm/nohash/kup.c
+++ b/arch/powerpc/mm/nohash/kup.c
@@ -5,7 +5,6 @@
#include <linux/export.h>
#include <linux/init.h>
-#include <linux/jump_label.h>
#include <linux/printk.h>
#include <linux/smp.h>
@@ -13,21 +12,18 @@
#include <asm/smp.h>
#ifdef CONFIG_PPC_KUAP
-struct static_key_false disable_kuap_key;
-EXPORT_SYMBOL(disable_kuap_key);
-
void setup_kuap(bool disabled)
{
if (disabled) {
if (IS_ENABLED(CONFIG_40x))
disable_kuep = true;
if (smp_processor_id() == boot_cpuid)
- static_branch_enable(&disable_kuap_key);
+ cur_cpu_spec->mmu_features &= ~MMU_FTR_KUAP;
return;
}
pr_info("Activating Kernel Userspace Access Protection\n");
- __prevent_user_access(KUAP_READ_WRITE);
+ prevent_user_access(KUAP_READ_WRITE);
}
#endif
diff --git a/arch/powerpc/mm/nohash/tlb.c b/arch/powerpc/mm/nohash/tlb.c
index a903b30..5ffa0af 100644
--- a/arch/powerpc/mm/nohash/tlb.c
+++ b/arch/powerpc/mm/nohash/tlb.c
@@ -318,17 +318,6 @@ EXPORT_SYMBOL(flush_tlb_page);
#endif /* CONFIG_SMP */
-#ifdef CONFIG_PPC_47x
-void __init early_init_mmu_47x(void)
-{
-#ifdef CONFIG_SMP
- unsigned long root = of_get_flat_dt_root();
- if (of_get_flat_dt_prop(root, "cooperative-partition", NULL))
- mmu_clear_feature(MMU_FTR_USE_TLBIVAX_BCAST);
-#endif /* CONFIG_SMP */
-}
-#endif /* CONFIG_PPC_47x */
-
/*
* Flush kernel TLB entries in the given range
*/
@@ -746,8 +735,10 @@ void setup_initial_memory_limit(phys_addr_t first_memblock_base,
#else /* ! CONFIG_PPC64 */
void __init early_init_mmu(void)
{
-#ifdef CONFIG_PPC_47x
- early_init_mmu_47x();
-#endif
+ unsigned long root = of_get_flat_dt_root();
+
+ if (IS_ENABLED(CONFIG_PPC_47x) && IS_ENABLED(CONFIG_SMP) &&
+ of_get_flat_dt_prop(root, "cooperative-partition", NULL))
+ mmu_clear_feature(MMU_FTR_USE_TLBIVAX_BCAST);
}
#endif /* CONFIG_PPC64 */
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 9f73d08..f6c4ace 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -34,6 +34,7 @@
#include <asm/hvcall.h>
#include <asm/setup.h>
#include <asm/vdso.h>
+#include <asm/vphn.h>
#include <asm/drmem.h>
static int numa_enabled = 1;
diff --git a/arch/powerpc/perf/core-fsl-emb.c b/arch/powerpc/perf/core-fsl-emb.c
index ee721f4..1a53ab0 100644
--- a/arch/powerpc/perf/core-fsl-emb.c
+++ b/arch/powerpc/perf/core-fsl-emb.c
@@ -645,7 +645,6 @@ static void perf_event_interrupt(struct pt_regs *regs)
struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
struct perf_event *event;
unsigned long val;
- int found = 0;
for (i = 0; i < ppmu->n_counter; ++i) {
event = cpuhw->event[i];
@@ -654,7 +653,6 @@ static void perf_event_interrupt(struct pt_regs *regs)
if ((int)val < 0) {
if (event) {
/* event has overflowed */
- found = 1;
record_and_restart(event, val, regs);
} else {
/*
@@ -672,11 +670,13 @@ static void perf_event_interrupt(struct pt_regs *regs)
isync();
}
-void hw_perf_event_setup(int cpu)
+static int fsl_emb_pmu_prepare_cpu(unsigned int cpu)
{
struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu);
memset(cpuhw, 0, sizeof(*cpuhw));
+
+ return 0;
}
int register_fsl_emb_pmu(struct fsl_emb_pmu *pmu)
@@ -689,6 +689,8 @@ int register_fsl_emb_pmu(struct fsl_emb_pmu *pmu)
pmu->name);
perf_pmu_register(&fsl_emb_pmu, "cpu", PERF_TYPE_RAW);
+ cpuhp_setup_state(CPUHP_PERF_POWER, "perf/powerpc:prepare",
+ fsl_emb_pmu_prepare_cpu, NULL);
return 0;
}
diff --git a/arch/powerpc/perf/hv-gpci.c b/arch/powerpc/perf/hv-gpci.c
index 7ff8ff3..39dbe6b 100644
--- a/arch/powerpc/perf/hv-gpci.c
+++ b/arch/powerpc/perf/hv-gpci.c
@@ -102,6 +102,511 @@ static ssize_t cpumask_show(struct device *dev,
return cpumap_print_to_pagebuf(true, buf, &hv_gpci_cpumask);
}
+/* Interface attribute array index to store system information */
+#define INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR 6
+#define INTERFACE_PROCESSOR_CONFIG_ATTR 7
+#define INTERFACE_AFFINITY_DOMAIN_VIA_VP_ATTR 8
+#define INTERFACE_AFFINITY_DOMAIN_VIA_DOM_ATTR 9
+#define INTERFACE_AFFINITY_DOMAIN_VIA_PAR_ATTR 10
+#define INTERFACE_NULL_ATTR 11
+
+/* Counter request value to retrieve system information */
+enum {
+ PROCESSOR_BUS_TOPOLOGY,
+ PROCESSOR_CONFIG,
+ AFFINITY_DOMAIN_VIA_VP, /* affinity domain via virtual processor */
+ AFFINITY_DOMAIN_VIA_DOM, /* affinity domain via domain */
+ AFFINITY_DOMAIN_VIA_PAR, /* affinity domain via partition */
+};
+
+static int sysinfo_counter_request[] = {
+ [PROCESSOR_BUS_TOPOLOGY] = 0xD0,
+ [PROCESSOR_CONFIG] = 0x90,
+ [AFFINITY_DOMAIN_VIA_VP] = 0xA0,
+ [AFFINITY_DOMAIN_VIA_DOM] = 0xB0,
+ [AFFINITY_DOMAIN_VIA_PAR] = 0xB1,
+};
+
+static DEFINE_PER_CPU(char, hv_gpci_reqb[HGPCI_REQ_BUFFER_SIZE]) __aligned(sizeof(uint64_t));
+
+static unsigned long systeminfo_gpci_request(u32 req, u32 starting_index,
+ u16 secondary_index, char *buf,
+ size_t *n, struct hv_gpci_request_buffer *arg)
+{
+ unsigned long ret;
+ size_t i, j;
+
+ arg->params.counter_request = cpu_to_be32(req);
+ arg->params.starting_index = cpu_to_be32(starting_index);
+ arg->params.secondary_index = cpu_to_be16(secondary_index);
+
+ ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO,
+ virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE);
+
+ /*
+ * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL',
+ * which means that the current buffer size cannot accommodate
+ * all the information and a partial buffer returned.
+ * hcall fails incase of ret value other than H_SUCCESS or H_PARAMETER.
+ *
+ * ret value as H_AUTHORITY implies that partition is not permitted to retrieve
+ * performance information, and required to set
+ * "Enable Performance Information Collection" option.
+ */
+ if (ret == H_AUTHORITY)
+ return -EPERM;
+
+ /*
+ * hcall can fail with other possible ret value like H_PRIVILEGE/H_HARDWARE
+ * because of invalid buffer-length/address or due to some hardware
+ * error.
+ */
+ if (ret && (ret != H_PARAMETER))
+ return -EIO;
+
+ /*
+ * hcall H_GET_PERF_COUNTER_INFO populates the 'returned_values'
+ * to show the total number of counter_value array elements
+ * returned via hcall.
+ * hcall also populates 'cv_element_size' corresponds to individual
+ * counter_value array element size. Below loop go through all
+ * counter_value array elements as per their size and add it to
+ * the output buffer.
+ */
+ for (i = 0; i < be16_to_cpu(arg->params.returned_values); i++) {
+ j = i * be16_to_cpu(arg->params.cv_element_size);
+
+ for (; j < (i + 1) * be16_to_cpu(arg->params.cv_element_size); j++)
+ *n += sprintf(buf + *n, "%02x", (u8)arg->bytes[j]);
+ *n += sprintf(buf + *n, "\n");
+ }
+
+ if (*n >= PAGE_SIZE) {
+ pr_info("System information exceeds PAGE_SIZE\n");
+ return -EFBIG;
+ }
+
+ return ret;
+}
+
+static ssize_t processor_bus_topology_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct hv_gpci_request_buffer *arg;
+ unsigned long ret;
+ size_t n = 0;
+
+ arg = (void *)get_cpu_var(hv_gpci_reqb);
+ memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
+
+ /*
+ * Pass the counter request value 0xD0 corresponds to request
+ * type 'Processor_bus_topology', to retrieve
+ * the system topology information.
+ * starting_index value implies the starting hardware
+ * chip id.
+ */
+ ret = systeminfo_gpci_request(sysinfo_counter_request[PROCESSOR_BUS_TOPOLOGY],
+ 0, 0, buf, &n, arg);
+
+ if (!ret)
+ return n;
+
+ if (ret != H_PARAMETER)
+ goto out;
+
+ /*
+ * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL', which
+ * implies that buffer can't accommodate all information, and a partial buffer
+ * returned. To handle that, we need to make subsequent requests
+ * with next starting index to retrieve additional (missing) data.
+ * Below loop do subsequent hcalls with next starting index and add it
+ * to buffer util we get all the information.
+ */
+ while (ret == H_PARAMETER) {
+ int returned_values = be16_to_cpu(arg->params.returned_values);
+ int elementsize = be16_to_cpu(arg->params.cv_element_size);
+ int last_element = (returned_values - 1) * elementsize;
+
+ /*
+ * Since the starting index value is part of counter_value
+ * buffer elements, use the starting index value in the last
+ * element and add 1 to make subsequent hcalls.
+ */
+ u32 starting_index = arg->bytes[last_element + 3] +
+ (arg->bytes[last_element + 2] << 8) +
+ (arg->bytes[last_element + 1] << 16) +
+ (arg->bytes[last_element] << 24) + 1;
+
+ memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
+
+ ret = systeminfo_gpci_request(sysinfo_counter_request[PROCESSOR_BUS_TOPOLOGY],
+ starting_index, 0, buf, &n, arg);
+
+ if (!ret)
+ return n;
+
+ if (ret != H_PARAMETER)
+ goto out;
+ }
+
+ return n;
+
+out:
+ put_cpu_var(hv_gpci_reqb);
+ return ret;
+}
+
+static ssize_t processor_config_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct hv_gpci_request_buffer *arg;
+ unsigned long ret;
+ size_t n = 0;
+
+ arg = (void *)get_cpu_var(hv_gpci_reqb);
+ memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
+
+ /*
+ * Pass the counter request value 0x90 corresponds to request
+ * type 'Processor_config', to retrieve
+ * the system processor information.
+ * starting_index value implies the starting hardware
+ * processor index.
+ */
+ ret = systeminfo_gpci_request(sysinfo_counter_request[PROCESSOR_CONFIG],
+ 0, 0, buf, &n, arg);
+
+ if (!ret)
+ return n;
+
+ if (ret != H_PARAMETER)
+ goto out;
+
+ /*
+ * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL', which
+ * implies that buffer can't accommodate all information, and a partial buffer
+ * returned. To handle that, we need to take subsequent requests
+ * with next starting index to retrieve additional (missing) data.
+ * Below loop do subsequent hcalls with next starting index and add it
+ * to buffer util we get all the information.
+ */
+ while (ret == H_PARAMETER) {
+ int returned_values = be16_to_cpu(arg->params.returned_values);
+ int elementsize = be16_to_cpu(arg->params.cv_element_size);
+ int last_element = (returned_values - 1) * elementsize;
+
+ /*
+ * Since the starting index is part of counter_value
+ * buffer elements, use the starting index value in the last
+ * element and add 1 to subsequent hcalls.
+ */
+ u32 starting_index = arg->bytes[last_element + 3] +
+ (arg->bytes[last_element + 2] << 8) +
+ (arg->bytes[last_element + 1] << 16) +
+ (arg->bytes[last_element] << 24) + 1;
+
+ memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
+
+ ret = systeminfo_gpci_request(sysinfo_counter_request[PROCESSOR_CONFIG],
+ starting_index, 0, buf, &n, arg);
+
+ if (!ret)
+ return n;
+
+ if (ret != H_PARAMETER)
+ goto out;
+ }
+
+ return n;
+
+out:
+ put_cpu_var(hv_gpci_reqb);
+ return ret;
+}
+
+static ssize_t affinity_domain_via_virtual_processor_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct hv_gpci_request_buffer *arg;
+ unsigned long ret;
+ size_t n = 0;
+
+ arg = (void *)get_cpu_var(hv_gpci_reqb);
+ memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
+
+ /*
+ * Pass the counter request 0xA0 corresponds to request
+ * type 'Affinity_domain_information_by_virutal_processor',
+ * to retrieve the system affinity domain information.
+ * starting_index value refers to the starting hardware
+ * processor index.
+ */
+ ret = systeminfo_gpci_request(sysinfo_counter_request[AFFINITY_DOMAIN_VIA_VP],
+ 0, 0, buf, &n, arg);
+
+ if (!ret)
+ return n;
+
+ if (ret != H_PARAMETER)
+ goto out;
+
+ /*
+ * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL', which
+ * implies that buffer can't accommodate all information, and a partial buffer
+ * returned. To handle that, we need to take subsequent requests
+ * with next secondary index to retrieve additional (missing) data.
+ * Below loop do subsequent hcalls with next secondary index and add it
+ * to buffer util we get all the information.
+ */
+ while (ret == H_PARAMETER) {
+ int returned_values = be16_to_cpu(arg->params.returned_values);
+ int elementsize = be16_to_cpu(arg->params.cv_element_size);
+ int last_element = (returned_values - 1) * elementsize;
+
+ /*
+ * Since the starting index and secondary index type is part of the
+ * counter_value buffer elements, use the starting index value in the
+ * last array element as subsequent starting index, and use secondary index
+ * value in the last array element plus 1 as subsequent secondary index.
+ * For counter request '0xA0', starting index points to partition id
+ * and secondary index points to corresponding virtual processor index.
+ */
+ u32 starting_index = arg->bytes[last_element + 1] + (arg->bytes[last_element] << 8);
+ u16 secondary_index = arg->bytes[last_element + 3] +
+ (arg->bytes[last_element + 2] << 8) + 1;
+
+ memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
+
+ ret = systeminfo_gpci_request(sysinfo_counter_request[AFFINITY_DOMAIN_VIA_VP],
+ starting_index, secondary_index, buf, &n, arg);
+
+ if (!ret)
+ return n;
+
+ if (ret != H_PARAMETER)
+ goto out;
+ }
+
+ return n;
+
+out:
+ put_cpu_var(hv_gpci_reqb);
+ return ret;
+}
+
+static ssize_t affinity_domain_via_domain_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct hv_gpci_request_buffer *arg;
+ unsigned long ret;
+ size_t n = 0;
+
+ arg = (void *)get_cpu_var(hv_gpci_reqb);
+ memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
+
+ /*
+ * Pass the counter request 0xB0 corresponds to request
+ * type 'Affinity_domain_information_by_domain',
+ * to retrieve the system affinity domain information.
+ * starting_index value refers to the starting hardware
+ * processor index.
+ */
+ ret = systeminfo_gpci_request(sysinfo_counter_request[AFFINITY_DOMAIN_VIA_DOM],
+ 0, 0, buf, &n, arg);
+
+ if (!ret)
+ return n;
+
+ if (ret != H_PARAMETER)
+ goto out;
+
+ /*
+ * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL', which
+ * implies that buffer can't accommodate all information, and a partial buffer
+ * returned. To handle that, we need to take subsequent requests
+ * with next starting index to retrieve additional (missing) data.
+ * Below loop do subsequent hcalls with next starting index and add it
+ * to buffer util we get all the information.
+ */
+ while (ret == H_PARAMETER) {
+ int returned_values = be16_to_cpu(arg->params.returned_values);
+ int elementsize = be16_to_cpu(arg->params.cv_element_size);
+ int last_element = (returned_values - 1) * elementsize;
+
+ /*
+ * Since the starting index value is part of counter_value
+ * buffer elements, use the starting index value in the last
+ * element and add 1 to make subsequent hcalls.
+ */
+ u32 starting_index = arg->bytes[last_element + 1] +
+ (arg->bytes[last_element] << 8) + 1;
+
+ memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
+
+ ret = systeminfo_gpci_request(sysinfo_counter_request[AFFINITY_DOMAIN_VIA_DOM],
+ starting_index, 0, buf, &n, arg);
+
+ if (!ret)
+ return n;
+
+ if (ret != H_PARAMETER)
+ goto out;
+ }
+
+ return n;
+
+out:
+ put_cpu_var(hv_gpci_reqb);
+ return ret;
+}
+
+static void affinity_domain_via_partition_result_parse(int returned_values,
+ int element_size, char *buf, size_t *last_element,
+ size_t *n, struct hv_gpci_request_buffer *arg)
+{
+ size_t i = 0, j = 0;
+ size_t k, l, m;
+ uint16_t total_affinity_domain_ele, size_of_each_affinity_domain_ele;
+
+ /*
+ * hcall H_GET_PERF_COUNTER_INFO populates the 'returned_values'
+ * to show the total number of counter_value array elements
+ * returned via hcall.
+ * Unlike other request types, the data structure returned by this
+ * request is variable-size. For this counter request type,
+ * hcall populates 'cv_element_size' corresponds to minimum size of
+ * the structure returned i.e; the size of the structure with no domain
+ * information. Below loop go through all counter_value array
+ * to determine the number and size of each domain array element and
+ * add it to the output buffer.
+ */
+ while (i < returned_values) {
+ k = j;
+ for (; k < j + element_size; k++)
+ *n += sprintf(buf + *n, "%02x", (u8)arg->bytes[k]);
+ *n += sprintf(buf + *n, "\n");
+
+ total_affinity_domain_ele = (u8)arg->bytes[k - 2] << 8 | (u8)arg->bytes[k - 3];
+ size_of_each_affinity_domain_ele = (u8)arg->bytes[k] << 8 | (u8)arg->bytes[k - 1];
+
+ for (l = 0; l < total_affinity_domain_ele; l++) {
+ for (m = 0; m < size_of_each_affinity_domain_ele; m++) {
+ *n += sprintf(buf + *n, "%02x", (u8)arg->bytes[k]);
+ k++;
+ }
+ *n += sprintf(buf + *n, "\n");
+ }
+
+ *n += sprintf(buf + *n, "\n");
+ i++;
+ j = k;
+ }
+
+ *last_element = k;
+}
+
+static ssize_t affinity_domain_via_partition_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct hv_gpci_request_buffer *arg;
+ unsigned long ret;
+ size_t n = 0;
+ size_t last_element = 0;
+ u32 starting_index;
+
+ arg = (void *)get_cpu_var(hv_gpci_reqb);
+ memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
+
+ /*
+ * Pass the counter request value 0xB1 corresponds to counter request
+ * type 'Affinity_domain_information_by_partition',
+ * to retrieve the system affinity domain by partition information.
+ * starting_index value refers to the starting hardware
+ * processor index.
+ */
+ arg->params.counter_request = cpu_to_be32(sysinfo_counter_request[AFFINITY_DOMAIN_VIA_PAR]);
+ arg->params.starting_index = cpu_to_be32(0);
+
+ ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO,
+ virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE);
+
+ if (!ret)
+ goto parse_result;
+
+ /*
+ * ret value as 'H_PARAMETER' implies that the current buffer size
+ * can't accommodate all the information, and a partial buffer
+ * returned. To handle that, we need to make subsequent requests
+ * with next starting index to retrieve additional (missing) data.
+ * Below loop do subsequent hcalls with next starting index and add it
+ * to buffer util we get all the information.
+ */
+ while (ret == H_PARAMETER) {
+ affinity_domain_via_partition_result_parse(
+ be16_to_cpu(arg->params.returned_values) - 1,
+ be16_to_cpu(arg->params.cv_element_size), buf,
+ &last_element, &n, arg);
+
+ if (n >= PAGE_SIZE) {
+ put_cpu_var(hv_gpci_reqb);
+ pr_debug("System information exceeds PAGE_SIZE\n");
+ return -EFBIG;
+ }
+
+ /*
+ * Since the starting index value is part of counter_value
+ * buffer elements, use the starting_index value in the last
+ * element and add 1 to make subsequent hcalls.
+ */
+ starting_index = (u8)arg->bytes[last_element] << 8 |
+ (u8)arg->bytes[last_element + 1];
+
+ memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
+ arg->params.counter_request = cpu_to_be32(
+ sysinfo_counter_request[AFFINITY_DOMAIN_VIA_PAR]);
+ arg->params.starting_index = cpu_to_be32(starting_index);
+
+ ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO,
+ virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE);
+
+ if (ret && (ret != H_PARAMETER))
+ goto out;
+ }
+
+parse_result:
+ affinity_domain_via_partition_result_parse(
+ be16_to_cpu(arg->params.returned_values),
+ be16_to_cpu(arg->params.cv_element_size),
+ buf, &last_element, &n, arg);
+
+ put_cpu_var(hv_gpci_reqb);
+ return n;
+
+out:
+ put_cpu_var(hv_gpci_reqb);
+
+ /*
+ * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL',
+ * which means that the current buffer size cannot accommodate
+ * all the information and a partial buffer returned.
+ * hcall fails incase of ret value other than H_SUCCESS or H_PARAMETER.
+ *
+ * ret value as H_AUTHORITY implies that partition is not permitted to retrieve
+ * performance information, and required to set
+ * "Enable Performance Information Collection" option.
+ */
+ if (ret == H_AUTHORITY)
+ return -EPERM;
+
+ /*
+ * hcall can fail with other possible ret value like H_PRIVILEGE/H_HARDWARE
+ * because of invalid buffer-length/address or due to some hardware
+ * error.
+ */
+ return -EIO;
+}
+
static DEVICE_ATTR_RO(kernel_version);
static DEVICE_ATTR_RO(cpumask);
@@ -118,6 +623,31 @@ static struct attribute *interface_attrs[] = {
&hv_caps_attr_expanded.attr,
&hv_caps_attr_lab.attr,
&hv_caps_attr_collect_privileged.attr,
+ /*
+ * This NULL is a placeholder for the processor_bus_topology
+ * attribute, set in init function if applicable.
+ */
+ NULL,
+ /*
+ * This NULL is a placeholder for the processor_config
+ * attribute, set in init function if applicable.
+ */
+ NULL,
+ /*
+ * This NULL is a placeholder for the affinity_domain_via_virtual_processor
+ * attribute, set in init function if applicable.
+ */
+ NULL,
+ /*
+ * This NULL is a placeholder for the affinity_domain_via_domain
+ * attribute, set in init function if applicable.
+ */
+ NULL,
+ /*
+ * This NULL is a placeholder for the affinity_domain_via_partition
+ * attribute, set in init function if applicable.
+ */
+ NULL,
NULL,
};
@@ -143,8 +673,6 @@ static const struct attribute_group *attr_groups[] = {
NULL,
};
-static DEFINE_PER_CPU(char, hv_gpci_reqb[HGPCI_REQ_BUFFER_SIZE]) __aligned(sizeof(uint64_t));
-
static unsigned long single_gpci_request(u32 req, u32 starting_index,
u16 secondary_index, u8 version_in, u32 offset, u8 length,
u64 *value)
@@ -325,6 +853,107 @@ static int hv_gpci_cpu_hotplug_init(void)
ppc_hv_gpci_cpu_offline);
}
+static struct device_attribute *sysinfo_device_attr_create(int
+ sysinfo_interface_group_index, u32 req)
+{
+ struct device_attribute *attr = NULL;
+ unsigned long ret;
+ struct hv_gpci_request_buffer *arg;
+
+ if (sysinfo_interface_group_index < INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR ||
+ sysinfo_interface_group_index >= INTERFACE_NULL_ATTR) {
+ pr_info("Wrong interface group index for system information\n");
+ return NULL;
+ }
+
+ /* Check for given counter request value support */
+ arg = (void *)get_cpu_var(hv_gpci_reqb);
+ memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
+
+ arg->params.counter_request = cpu_to_be32(req);
+
+ ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO,
+ virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE);
+
+ put_cpu_var(hv_gpci_reqb);
+
+ /*
+ * Add given counter request value attribute in the interface_attrs
+ * attribute array, only for valid return types.
+ */
+ if (!ret || ret == H_AUTHORITY || ret == H_PARAMETER) {
+ attr = kzalloc(sizeof(*attr), GFP_KERNEL);
+ if (!attr)
+ return NULL;
+
+ sysfs_attr_init(&attr->attr);
+ attr->attr.mode = 0444;
+
+ switch (sysinfo_interface_group_index) {
+ case INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR:
+ attr->attr.name = "processor_bus_topology";
+ attr->show = processor_bus_topology_show;
+ break;
+ case INTERFACE_PROCESSOR_CONFIG_ATTR:
+ attr->attr.name = "processor_config";
+ attr->show = processor_config_show;
+ break;
+ case INTERFACE_AFFINITY_DOMAIN_VIA_VP_ATTR:
+ attr->attr.name = "affinity_domain_via_virtual_processor";
+ attr->show = affinity_domain_via_virtual_processor_show;
+ break;
+ case INTERFACE_AFFINITY_DOMAIN_VIA_DOM_ATTR:
+ attr->attr.name = "affinity_domain_via_domain";
+ attr->show = affinity_domain_via_domain_show;
+ break;
+ case INTERFACE_AFFINITY_DOMAIN_VIA_PAR_ATTR:
+ attr->attr.name = "affinity_domain_via_partition";
+ attr->show = affinity_domain_via_partition_show;
+ break;
+ }
+ } else
+ pr_devel("hcall failed, with error: 0x%lx\n", ret);
+
+ return attr;
+}
+
+static void add_sysinfo_interface_files(void)
+{
+ int sysfs_count;
+ struct device_attribute *attr[INTERFACE_NULL_ATTR - INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR];
+ int i;
+
+ sysfs_count = INTERFACE_NULL_ATTR - INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR;
+
+ /* Get device attribute for a given counter request value */
+ for (i = 0; i < sysfs_count; i++) {
+ attr[i] = sysinfo_device_attr_create(i + INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR,
+ sysinfo_counter_request[i]);
+
+ if (!attr[i])
+ goto out;
+ }
+
+ /* Add sysinfo interface attributes in the interface_attrs attribute array */
+ for (i = 0; i < sysfs_count; i++)
+ interface_attrs[i + INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR] = &attr[i]->attr;
+
+ return;
+
+out:
+ /*
+ * The sysinfo interface attributes will be added, only if hcall passed for
+ * all the counter request values. Free the device attribute array incase
+ * of any hcall failure.
+ */
+ if (i > 0) {
+ while (i >= 0) {
+ kfree(attr[i]);
+ i--;
+ }
+ }
+}
+
static int hv_gpci_init(void)
{
int r;
@@ -388,6 +1017,10 @@ static int hv_gpci_init(void)
if (r)
return r;
+ /* sysinfo interface files are only available for power10 and above platforms */
+ if (PVR_VER(mfspr(SPRN_PVR)) >= PVR_POWER10)
+ add_sysinfo_interface_files();
+
return 0;
}
diff --git a/arch/powerpc/platforms/44x/warp.c b/arch/powerpc/platforms/44x/warp.c
index bfeb9bd..bf0188d 100644
--- a/arch/powerpc/platforms/44x/warp.c
+++ b/arch/powerpc/platforms/44x/warp.c
@@ -83,45 +83,8 @@ static int __init warp_post_info(void)
#ifdef CONFIG_SENSORS_AD7414
-static LIST_HEAD(dtm_shutdown_list);
static void __iomem *dtm_fpga;
-struct dtm_shutdown {
- struct list_head list;
- void (*func)(void *arg);
- void *arg;
-};
-
-int pika_dtm_register_shutdown(void (*func)(void *arg), void *arg)
-{
- struct dtm_shutdown *shutdown;
-
- shutdown = kmalloc(sizeof(struct dtm_shutdown), GFP_KERNEL);
- if (shutdown == NULL)
- return -ENOMEM;
-
- shutdown->func = func;
- shutdown->arg = arg;
-
- list_add(&shutdown->list, &dtm_shutdown_list);
-
- return 0;
-}
-
-int pika_dtm_unregister_shutdown(void (*func)(void *arg), void *arg)
-{
- struct dtm_shutdown *shutdown;
-
- list_for_each_entry(shutdown, &dtm_shutdown_list, list)
- if (shutdown->func == func && shutdown->arg == arg) {
- list_del(&shutdown->list);
- kfree(shutdown);
- return 0;
- }
-
- return -EINVAL;
-}
-
#define WARP_GREEN_LED 0
#define WARP_RED_LED 1
@@ -153,17 +116,12 @@ static struct platform_device warp_gpio_leds = {
static irqreturn_t temp_isr(int irq, void *context)
{
- struct dtm_shutdown *shutdown;
int value = 1;
local_irq_disable();
gpiod_set_value(warp_gpio_led_pins[WARP_GREEN_LED].gpiod, 0);
- /* Run through the shutdown list. */
- list_for_each_entry(shutdown, &dtm_shutdown_list, list)
- shutdown->func(shutdown->arg);
-
printk(KERN_EMERG "\n\nCritical Temperature Shutdown\n\n");
while (1) {
@@ -366,19 +324,6 @@ machine_late_initcall(warp, pika_dtm_start);
#else /* !CONFIG_SENSORS_AD7414 */
-int pika_dtm_register_shutdown(void (*func)(void *arg), void *arg)
-{
- return 0;
-}
-
-int pika_dtm_unregister_shutdown(void (*func)(void *arg), void *arg)
-{
- return 0;
-}
-
machine_late_initcall(warp, warp_post_info);
#endif
-
-EXPORT_SYMBOL(pika_dtm_register_shutdown);
-EXPORT_SYMBOL(pika_dtm_unregister_shutdown);
diff --git a/arch/powerpc/platforms/4xx/cpm.c b/arch/powerpc/platforms/4xx/cpm.c
index 182e128..670f8ad 100644
--- a/arch/powerpc/platforms/4xx/cpm.c
+++ b/arch/powerpc/platforms/4xx/cpm.c
@@ -18,7 +18,7 @@
*/
#include <linux/kernel.h>
-#include <linux/of_platform.h>
+#include <linux/of.h>
#include <linux/sysfs.h>
#include <linux/cpu.h>
#include <linux/suspend.h>
diff --git a/arch/powerpc/platforms/4xx/hsta_msi.c b/arch/powerpc/platforms/4xx/hsta_msi.c
index e11b57a..c6bd846 100644
--- a/arch/powerpc/platforms/4xx/hsta_msi.c
+++ b/arch/powerpc/platforms/4xx/hsta_msi.c
@@ -11,7 +11,7 @@
#include <linux/msi.h>
#include <linux/of.h>
#include <linux/of_irq.h>
-#include <linux/of_platform.h>
+#include <linux/platform_device.h>
#include <linux/pci.h>
#include <linux/semaphore.h>
#include <asm/msi_bitmap.h>
diff --git a/arch/powerpc/platforms/4xx/soc.c b/arch/powerpc/platforms/4xx/soc.c
index ac1cd8b..b2d9404 100644
--- a/arch/powerpc/platforms/4xx/soc.c
+++ b/arch/powerpc/platforms/4xx/soc.c
@@ -15,12 +15,13 @@
#include <linux/errno.h>
#include <linux/interrupt.h>
#include <linux/irq.h>
+#include <linux/of.h>
#include <linux/of_irq.h>
-#include <linux/of_platform.h>
#include <asm/dcr.h>
#include <asm/dcr-regs.h>
#include <asm/reg.h>
+#include <asm/ppc4xx.h>
static u32 dcrbase_l2c;
diff --git a/arch/powerpc/platforms/4xx/uic.c b/arch/powerpc/platforms/4xx/uic.c
index d667ad0..e3e148b 100644
--- a/arch/powerpc/platforms/4xx/uic.c
+++ b/arch/powerpc/platforms/4xx/uic.c
@@ -24,6 +24,7 @@
#include <asm/irq.h>
#include <asm/io.h>
#include <asm/dcr.h>
+#include <asm/uic.h>
#define NR_UIC_INTS 32
diff --git a/arch/powerpc/platforms/512x/mpc5121_ads.c b/arch/powerpc/platforms/512x/mpc5121_ads.c
index 80b25ce..a18f85b 100644
--- a/arch/powerpc/platforms/512x/mpc5121_ads.c
+++ b/arch/powerpc/platforms/512x/mpc5121_ads.c
@@ -10,7 +10,7 @@
#include <linux/kernel.h>
#include <linux/io.h>
-#include <linux/of_platform.h>
+#include <linux/of.h>
#include <asm/machdep.h>
#include <asm/ipic.h>
diff --git a/arch/powerpc/platforms/512x/mpc512x.h b/arch/powerpc/platforms/512x/mpc512x.h
index 2f3c60e..d2cb06e 100644
--- a/arch/powerpc/platforms/512x/mpc512x.h
+++ b/arch/powerpc/platforms/512x/mpc512x.h
@@ -13,7 +13,6 @@ extern void __init mpc512x_init(void);
extern void __init mpc512x_setup_arch(void);
extern int __init mpc5121_clk_init(void);
const char *__init mpc512x_select_psc_compat(void);
-const char *__init mpc512x_select_reset_compat(void);
extern void __noreturn mpc512x_restart(char *cmd);
#endif /* __MPC512X_H__ */
diff --git a/arch/powerpc/platforms/512x/mpc512x_generic.c b/arch/powerpc/platforms/512x/mpc512x_generic.c
index 97dfaac..0d58ab2 100644
--- a/arch/powerpc/platforms/512x/mpc512x_generic.c
+++ b/arch/powerpc/platforms/512x/mpc512x_generic.c
@@ -9,7 +9,7 @@
*/
#include <linux/kernel.h>
-#include <linux/of_platform.h>
+#include <linux/of.h>
#include <asm/machdep.h>
#include <asm/ipic.h>
diff --git a/arch/powerpc/platforms/512x/mpc512x_lpbfifo.c b/arch/powerpc/platforms/512x/mpc512x_lpbfifo.c
index c1e9816..4a25b6b 100644
--- a/arch/powerpc/platforms/512x/mpc512x_lpbfifo.c
+++ b/arch/powerpc/platforms/512x/mpc512x_lpbfifo.c
@@ -10,9 +10,9 @@
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/of.h>
-#include <linux/of_platform.h>
#include <linux/of_address.h>
#include <linux/of_irq.h>
+#include <linux/platform_device.h>
#include <asm/mpc5121.h>
#include <asm/io.h>
#include <linux/spinlock.h>
diff --git a/arch/powerpc/platforms/512x/mpc512x_shared.c b/arch/powerpc/platforms/512x/mpc512x_shared.c
index 5ac0ead..8f75e95 100644
--- a/arch/powerpc/platforms/512x/mpc512x_shared.c
+++ b/arch/powerpc/platforms/512x/mpc512x_shared.c
@@ -29,20 +29,6 @@
static struct mpc512x_reset_module __iomem *reset_module_base;
-static void __init mpc512x_restart_init(void)
-{
- struct device_node *np;
- const char *reset_compat;
-
- reset_compat = mpc512x_select_reset_compat();
- np = of_find_compatible_node(NULL, NULL, reset_compat);
- if (!np)
- return;
-
- reset_module_base = of_iomap(np, 0);
- of_node_put(np);
-}
-
void __noreturn mpc512x_restart(char *cmd)
{
if (reset_module_base) {
@@ -363,7 +349,7 @@ const char *__init mpc512x_select_psc_compat(void)
return NULL;
}
-const char *__init mpc512x_select_reset_compat(void)
+static const char *__init mpc512x_select_reset_compat(void)
{
if (of_machine_is_compatible("fsl,mpc5121"))
return "fsl,mpc5121-reset";
@@ -455,6 +441,20 @@ static void __init mpc512x_psc_fifo_init(void)
}
}
+static void __init mpc512x_restart_init(void)
+{
+ struct device_node *np;
+ const char *reset_compat;
+
+ reset_compat = mpc512x_select_reset_compat();
+ np = of_find_compatible_node(NULL, NULL, reset_compat);
+ if (!np)
+ return;
+
+ reset_module_base = of_iomap(np, 0);
+ of_node_put(np);
+}
+
void __init mpc512x_init_early(void)
{
mpc512x_restart_init();
diff --git a/arch/powerpc/platforms/512x/pdm360ng.c b/arch/powerpc/platforms/512x/pdm360ng.c
index 4bdec1c..ce51cfe 100644
--- a/arch/powerpc/platforms/512x/pdm360ng.c
+++ b/arch/powerpc/platforms/512x/pdm360ng.c
@@ -7,11 +7,12 @@
* PDM360NG board setup
*/
+#include <linux/device.h>
#include <linux/kernel.h>
#include <linux/io.h>
+#include <linux/of.h>
#include <linux/of_address.h>
#include <linux/of_fdt.h>
-#include <linux/of_platform.h>
#include <asm/machdep.h>
#include <asm/ipic.h>
diff --git a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
index 3fce4e1..5810595 100644
--- a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
+++ b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
@@ -48,7 +48,6 @@
* the output mode. This driver does not change the output mode setting.
*/
-#include <linux/device.h>
#include <linux/irq.h>
#include <linux/interrupt.h>
#include <linux/io.h>
@@ -57,8 +56,8 @@
#include <linux/of.h>
#include <linux/of_address.h>
#include <linux/of_irq.h>
-#include <linux/of_platform.h>
#include <linux/of_gpio.h>
+#include <linux/platform_device.h>
#include <linux/kernel.h>
#include <linux/property.h>
#include <linux/slab.h>
diff --git a/arch/powerpc/platforms/82xx/Kconfig b/arch/powerpc/platforms/82xx/Kconfig
index 4eb372b..d9f1a2a 100644
--- a/arch/powerpc/platforms/82xx/Kconfig
+++ b/arch/powerpc/platforms/82xx/Kconfig
@@ -7,8 +7,8 @@
config EP8248E
bool "Embedded Planet EP8248E (a.k.a. CWH-PPC-8248N-VE)"
- select 8272
- select 8260
+ select CPM2
+ select PPC_INDIRECT_PCI if PCI
select FSL_SOC
select PHYLIB if NETDEVICES
select MDIO_BITBANG if PHYLIB
@@ -20,26 +20,10 @@
config MGCOGE
bool "Keymile MGCOGE"
- select 8272
- select 8260
+ select CPM2
+ select PPC_INDIRECT_PCI if PCI
select FSL_SOC
help
This enables support for the Keymile MGCOGE board.
endif
-
-config 8260
- bool
- depends on PPC_BOOK3S_32
- select CPM2
- help
- The MPC8260 is a typical embedded CPU made by Freescale. Selecting
- this option means that you wish to build a kernel for a machine with
- an 8260 class CPU.
-
-config 8272
- bool
- select 8260
- help
- The MPC8272 CPM has a different internal dpram setup than other CPM2
- devices
diff --git a/arch/powerpc/platforms/82xx/ep8248e.c b/arch/powerpc/platforms/82xx/ep8248e.c
index 8f1856b..3dc65ce 100644
--- a/arch/powerpc/platforms/82xx/ep8248e.c
+++ b/arch/powerpc/platforms/82xx/ep8248e.c
@@ -13,13 +13,13 @@
#include <linux/of_mdio.h>
#include <linux/slab.h>
#include <linux/of_platform.h>
+#include <linux/platform_device.h>
#include <asm/io.h>
#include <asm/cpm2.h>
#include <asm/udbg.h>
#include <asm/machdep.h>
#include <asm/time.h>
-#include <asm/mpc8260.h>
#include <sysdev/fsl_soc.h>
#include <sysdev/cpm2_pic.h>
@@ -140,12 +140,6 @@ static int ep8248e_mdio_probe(struct platform_device *ofdev)
return ret;
}
-static int ep8248e_mdio_remove(struct platform_device *ofdev)
-{
- BUG();
- return 0;
-}
-
static const struct of_device_id ep8248e_mdio_match[] = {
{
.compatible = "fsl,ep8248e-mdio-bitbang",
@@ -157,9 +151,9 @@ static struct platform_driver ep8248e_mdio_driver = {
.driver = {
.name = "ep8248e-mdio-bitbang",
.of_match_table = ep8248e_mdio_match,
+ .suppress_bind_attrs = true,
},
.probe = ep8248e_mdio_probe,
- .remove = ep8248e_mdio_remove,
};
struct cpm_pin {
diff --git a/arch/powerpc/platforms/82xx/km82xx.c b/arch/powerpc/platforms/82xx/km82xx.c
index 51c9bfd..c86da3f 100644
--- a/arch/powerpc/platforms/82xx/km82xx.c
+++ b/arch/powerpc/platforms/82xx/km82xx.c
@@ -19,7 +19,6 @@
#include <asm/udbg.h>
#include <asm/machdep.h>
#include <linux/time.h>
-#include <asm/mpc8260.h>
#include <sysdev/fsl_soc.h>
#include <sysdev/cpm2_pic.h>
diff --git a/arch/powerpc/platforms/82xx/m82xx_pci.h b/arch/powerpc/platforms/82xx/m82xx_pci.h
deleted file mode 100644
index d07c4d7..0000000
--- a/arch/powerpc/platforms/82xx/m82xx_pci.h
+++ /dev/null
@@ -1,14 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-#ifndef _PPC_KERNEL_M82XX_PCI_H
-#define _PPC_KERNEL_M82XX_PCI_H
-
-/*
- */
-
-#define SIU_INT_IRQ1 ((uint)0x13 + CPM_IRQ_OFFSET)
-
-#ifndef _IO_BASE
-#define _IO_BASE isa_io_base
-#endif
-
-#endif /* _PPC_KERNEL_M8260_PCI_H */
diff --git a/arch/powerpc/platforms/82xx/pq2.c b/arch/powerpc/platforms/82xx/pq2.c
index 3b5cb39..391d72a2 100644
--- a/arch/powerpc/platforms/82xx/pq2.c
+++ b/arch/powerpc/platforms/82xx/pq2.c
@@ -32,49 +32,3 @@ void __noreturn pq2_restart(char *cmd)
panic("Restart failed\n");
}
NOKPROBE_SYMBOL(pq2_restart)
-
-#ifdef CONFIG_PCI
-static int pq2_pci_exclude_device(struct pci_controller *hose,
- u_char bus, u8 devfn)
-{
- if (bus == 0 && PCI_SLOT(devfn) == 0)
- return PCIBIOS_DEVICE_NOT_FOUND;
- else
- return PCIBIOS_SUCCESSFUL;
-}
-
-static void __init pq2_pci_add_bridge(struct device_node *np)
-{
- struct pci_controller *hose;
- struct resource r;
-
- if (of_address_to_resource(np, 0, &r) || r.end - r.start < 0x10b)
- goto err;
-
- pci_add_flags(PCI_REASSIGN_ALL_BUS);
-
- hose = pcibios_alloc_controller(np);
- if (!hose)
- return;
-
- hose->dn = np;
-
- setup_indirect_pci(hose, r.start + 0x100, r.start + 0x104, 0);
- pci_process_bridge_OF_ranges(hose, np, 1);
-
- return;
-
-err:
- printk(KERN_ERR "No valid PCI reg property in device tree\n");
-}
-
-void __init pq2_init_pci(void)
-{
- struct device_node *np;
-
- ppc_md.pci_exclude_device = pq2_pci_exclude_device;
-
- for_each_compatible_node(np, NULL, "fsl,pq2-pci")
- pq2_pci_add_bridge(np);
-}
-#endif
diff --git a/arch/powerpc/platforms/83xx/Makefile b/arch/powerpc/platforms/83xx/Makefile
index 6b4013e..6fc3dba 100644
--- a/arch/powerpc/platforms/83xx/Makefile
+++ b/arch/powerpc/platforms/83xx/Makefile
@@ -2,7 +2,7 @@
#
# Makefile for the PowerPC 83xx linux kernel.
#
-obj-y := misc.o usb.o
+obj-y := misc.o
obj-$(CONFIG_SUSPEND) += suspend.o suspend-asm.o
obj-$(CONFIG_MCU_MPC8349EMITX) += mcu_mpc8349emitx.o
obj-$(CONFIG_MPC830x_RDB) += mpc830x_rdb.o
@@ -13,3 +13,6 @@
obj-$(CONFIG_MPC837x_RDB) += mpc837x_rdb.o
obj-$(CONFIG_ASP834x) += asp834x.o
obj-$(CONFIG_KMETER1) += km83xx.o
+obj-$(CONFIG_PPC_MPC831x) += usb_831x.o
+obj-$(CONFIG_PPC_MPC834x) += usb_834x.o
+obj-$(CONFIG_PPC_MPC837x) += usb_837x.o
diff --git a/arch/powerpc/platforms/83xx/km83xx.c b/arch/powerpc/platforms/83xx/km83xx.c
index 26ddc71..2b5d187 100644
--- a/arch/powerpc/platforms/83xx/km83xx.c
+++ b/arch/powerpc/platforms/83xx/km83xx.c
@@ -20,8 +20,8 @@
#include <linux/seq_file.h>
#include <linux/root_dev.h>
#include <linux/initrd.h>
-#include <linux/of_platform.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
#include <linux/atomic.h>
#include <linux/time.h>
diff --git a/arch/powerpc/platforms/83xx/mpc832x_rdb.c b/arch/powerpc/platforms/83xx/mpc832x_rdb.c
index 3b4e417..d523ce0 100644
--- a/arch/powerpc/platforms/83xx/mpc832x_rdb.c
+++ b/arch/powerpc/platforms/83xx/mpc832x_rdb.c
@@ -15,8 +15,10 @@
#include <linux/spi/spi.h>
#include <linux/spi/mmc_spi.h>
#include <linux/mmc/host.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
#include <linux/of_irq.h>
-#include <linux/of_platform.h>
+#include <linux/platform_device.h>
#include <linux/fsl_devices.h>
#include <asm/time.h>
diff --git a/arch/powerpc/platforms/83xx/mpc83xx.h b/arch/powerpc/platforms/83xx/mpc83xx.h
index aea803b..0b8738a 100644
--- a/arch/powerpc/platforms/83xx/mpc83xx.h
+++ b/arch/powerpc/platforms/83xx/mpc83xx.h
@@ -3,8 +3,6 @@
#define __MPC83XX_H__
#include <linux/init.h>
-#include <linux/device.h>
-#include <asm/pci-bridge.h>
/* System Clock Control Register */
#define MPC83XX_SCCR_OFFS 0xA08
diff --git a/arch/powerpc/platforms/83xx/suspend.c b/arch/powerpc/platforms/83xx/suspend.c
index 3fa8979..9833c36 100644
--- a/arch/powerpc/platforms/83xx/suspend.c
+++ b/arch/powerpc/platforms/83xx/suspend.c
@@ -19,7 +19,7 @@
#include <linux/fsl_devices.h>
#include <linux/of_address.h>
#include <linux/of_irq.h>
-#include <linux/of_platform.h>
+#include <linux/platform_device.h>
#include <linux/export.h>
#include <asm/reg.h>
diff --git a/arch/powerpc/platforms/83xx/usb.c b/arch/powerpc/platforms/83xx/usb.c
deleted file mode 100644
index e2a13a0..0000000
--- a/arch/powerpc/platforms/83xx/usb.c
+++ /dev/null
@@ -1,251 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Freescale 83xx USB SOC setup code
- *
- * Copyright (C) 2007 Freescale Semiconductor, Inc.
- * Author: Li Yang
- */
-
-
-#include <linux/stddef.h>
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/of.h>
-#include <linux/of_address.h>
-
-#include <asm/io.h>
-#include <sysdev/fsl_soc.h>
-
-#include "mpc83xx.h"
-
-
-#ifdef CONFIG_PPC_MPC834x
-int __init mpc834x_usb_cfg(void)
-{
- unsigned long sccr, sicrl, sicrh;
- void __iomem *immap;
- struct device_node *np = NULL;
- int port0_is_dr = 0, port1_is_dr = 0;
- const void *prop, *dr_mode;
-
- immap = ioremap(get_immrbase(), 0x1000);
- if (!immap)
- return -ENOMEM;
-
- /* Read registers */
- /* Note: DR and MPH must use the same clock setting in SCCR */
- sccr = in_be32(immap + MPC83XX_SCCR_OFFS) & ~MPC83XX_SCCR_USB_MASK;
- sicrl = in_be32(immap + MPC83XX_SICRL_OFFS) & ~MPC834X_SICRL_USB_MASK;
- sicrh = in_be32(immap + MPC83XX_SICRH_OFFS) & ~MPC834X_SICRH_USB_UTMI;
-
- np = of_find_compatible_node(NULL, NULL, "fsl-usb2-dr");
- if (np) {
- sccr |= MPC83XX_SCCR_USB_DRCM_11; /* 1:3 */
-
- prop = of_get_property(np, "phy_type", NULL);
- port1_is_dr = 1;
- if (prop && (!strcmp(prop, "utmi") ||
- !strcmp(prop, "utmi_wide"))) {
- sicrl |= MPC834X_SICRL_USB0 | MPC834X_SICRL_USB1;
- sicrh |= MPC834X_SICRH_USB_UTMI;
- port0_is_dr = 1;
- } else if (prop && !strcmp(prop, "serial")) {
- dr_mode = of_get_property(np, "dr_mode", NULL);
- if (dr_mode && !strcmp(dr_mode, "otg")) {
- sicrl |= MPC834X_SICRL_USB0 | MPC834X_SICRL_USB1;
- port0_is_dr = 1;
- } else {
- sicrl |= MPC834X_SICRL_USB1;
- }
- } else if (prop && !strcmp(prop, "ulpi")) {
- sicrl |= MPC834X_SICRL_USB1;
- } else {
- printk(KERN_WARNING "834x USB PHY type not supported\n");
- }
- of_node_put(np);
- }
- np = of_find_compatible_node(NULL, NULL, "fsl-usb2-mph");
- if (np) {
- sccr |= MPC83XX_SCCR_USB_MPHCM_11; /* 1:3 */
-
- prop = of_get_property(np, "port0", NULL);
- if (prop) {
- if (port0_is_dr)
- printk(KERN_WARNING
- "834x USB port0 can't be used by both DR and MPH!\n");
- sicrl &= ~MPC834X_SICRL_USB0;
- }
- prop = of_get_property(np, "port1", NULL);
- if (prop) {
- if (port1_is_dr)
- printk(KERN_WARNING
- "834x USB port1 can't be used by both DR and MPH!\n");
- sicrl &= ~MPC834X_SICRL_USB1;
- }
- of_node_put(np);
- }
-
- /* Write back */
- out_be32(immap + MPC83XX_SCCR_OFFS, sccr);
- out_be32(immap + MPC83XX_SICRL_OFFS, sicrl);
- out_be32(immap + MPC83XX_SICRH_OFFS, sicrh);
-
- iounmap(immap);
- return 0;
-}
-#endif /* CONFIG_PPC_MPC834x */
-
-#ifdef CONFIG_PPC_MPC831x
-int __init mpc831x_usb_cfg(void)
-{
- u32 temp;
- void __iomem *immap, *usb_regs;
- struct device_node *np = NULL;
- struct device_node *immr_node = NULL;
- const void *prop;
- struct resource res;
- int ret = 0;
-#ifdef CONFIG_USB_OTG
- const void *dr_mode;
-#endif
-
- np = of_find_compatible_node(NULL, NULL, "fsl-usb2-dr");
- if (!np)
- return -ENODEV;
- prop = of_get_property(np, "phy_type", NULL);
-
- /* Map IMMR space for pin and clock settings */
- immap = ioremap(get_immrbase(), 0x1000);
- if (!immap) {
- of_node_put(np);
- return -ENOMEM;
- }
-
- /* Configure clock */
- immr_node = of_get_parent(np);
- if (immr_node && (of_device_is_compatible(immr_node, "fsl,mpc8315-immr") ||
- of_device_is_compatible(immr_node, "fsl,mpc8308-immr")))
- clrsetbits_be32(immap + MPC83XX_SCCR_OFFS,
- MPC8315_SCCR_USB_MASK,
- MPC8315_SCCR_USB_DRCM_01);
- else
- clrsetbits_be32(immap + MPC83XX_SCCR_OFFS,
- MPC83XX_SCCR_USB_MASK,
- MPC83XX_SCCR_USB_DRCM_11);
-
- /* Configure pin mux for ULPI. There is no pin mux for UTMI */
- if (prop && !strcmp(prop, "ulpi")) {
- if (of_device_is_compatible(immr_node, "fsl,mpc8308-immr")) {
- clrsetbits_be32(immap + MPC83XX_SICRH_OFFS,
- MPC8308_SICRH_USB_MASK,
- MPC8308_SICRH_USB_ULPI);
- } else if (of_device_is_compatible(immr_node, "fsl,mpc8315-immr")) {
- clrsetbits_be32(immap + MPC83XX_SICRL_OFFS,
- MPC8315_SICRL_USB_MASK,
- MPC8315_SICRL_USB_ULPI);
- clrsetbits_be32(immap + MPC83XX_SICRH_OFFS,
- MPC8315_SICRH_USB_MASK,
- MPC8315_SICRH_USB_ULPI);
- } else {
- clrsetbits_be32(immap + MPC83XX_SICRL_OFFS,
- MPC831X_SICRL_USB_MASK,
- MPC831X_SICRL_USB_ULPI);
- clrsetbits_be32(immap + MPC83XX_SICRH_OFFS,
- MPC831X_SICRH_USB_MASK,
- MPC831X_SICRH_USB_ULPI);
- }
- }
-
- iounmap(immap);
-
- of_node_put(immr_node);
-
- /* Map USB SOC space */
- ret = of_address_to_resource(np, 0, &res);
- if (ret) {
- of_node_put(np);
- return ret;
- }
- usb_regs = ioremap(res.start, resource_size(&res));
-
- /* Using on-chip PHY */
- if (prop && (!strcmp(prop, "utmi_wide") ||
- !strcmp(prop, "utmi"))) {
- u32 refsel;
-
- if (of_device_is_compatible(immr_node, "fsl,mpc8308-immr"))
- goto out;
-
- if (of_device_is_compatible(immr_node, "fsl,mpc8315-immr"))
- refsel = CONTROL_REFSEL_24MHZ;
- else
- refsel = CONTROL_REFSEL_48MHZ;
- /* Set UTMI_PHY_EN and REFSEL */
- out_be32(usb_regs + FSL_USB2_CONTROL_OFFS,
- CONTROL_UTMI_PHY_EN | refsel);
- /* Using external UPLI PHY */
- } else if (prop && !strcmp(prop, "ulpi")) {
- /* Set PHY_CLK_SEL to ULPI */
- temp = CONTROL_PHY_CLK_SEL_ULPI;
-#ifdef CONFIG_USB_OTG
- /* Set OTG_PORT */
- if (!of_device_is_compatible(immr_node, "fsl,mpc8308-immr")) {
- dr_mode = of_get_property(np, "dr_mode", NULL);
- if (dr_mode && !strcmp(dr_mode, "otg"))
- temp |= CONTROL_OTG_PORT;
- }
-#endif /* CONFIG_USB_OTG */
- out_be32(usb_regs + FSL_USB2_CONTROL_OFFS, temp);
- } else {
- printk(KERN_WARNING "831x USB PHY type not supported\n");
- ret = -EINVAL;
- }
-
-out:
- iounmap(usb_regs);
- of_node_put(np);
- return ret;
-}
-#endif /* CONFIG_PPC_MPC831x */
-
-#ifdef CONFIG_PPC_MPC837x
-int __init mpc837x_usb_cfg(void)
-{
- void __iomem *immap;
- struct device_node *np = NULL;
- const void *prop;
- int ret = 0;
-
- np = of_find_compatible_node(NULL, NULL, "fsl-usb2-dr");
- if (!np || !of_device_is_available(np)) {
- of_node_put(np);
- return -ENODEV;
- }
- prop = of_get_property(np, "phy_type", NULL);
-
- if (!prop || (strcmp(prop, "ulpi") && strcmp(prop, "serial"))) {
- printk(KERN_WARNING "837x USB PHY type not supported\n");
- of_node_put(np);
- return -EINVAL;
- }
-
- /* Map IMMR space for pin and clock settings */
- immap = ioremap(get_immrbase(), 0x1000);
- if (!immap) {
- of_node_put(np);
- return -ENOMEM;
- }
-
- /* Configure clock */
- clrsetbits_be32(immap + MPC83XX_SCCR_OFFS, MPC837X_SCCR_USB_DRCM_11,
- MPC837X_SCCR_USB_DRCM_11);
-
- /* Configure pin mux for ULPI/serial */
- clrsetbits_be32(immap + MPC83XX_SICRL_OFFS, MPC837X_SICRL_USB_MASK,
- MPC837X_SICRL_USB_ULPI);
-
- iounmap(immap);
- of_node_put(np);
- return ret;
-}
-#endif /* CONFIG_PPC_MPC837x */
diff --git a/arch/powerpc/platforms/83xx/usb_831x.c b/arch/powerpc/platforms/83xx/usb_831x.c
new file mode 100644
index 0000000..28c24e9
--- /dev/null
+++ b/arch/powerpc/platforms/83xx/usb_831x.c
@@ -0,0 +1,128 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Freescale 83xx USB SOC setup code
+ *
+ * Copyright (C) 2007 Freescale Semiconductor, Inc.
+ * Author: Li Yang
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/io.h>
+
+#include <sysdev/fsl_soc.h>
+
+#include "mpc83xx.h"
+
+int __init mpc831x_usb_cfg(void)
+{
+ u32 temp;
+ void __iomem *immap, *usb_regs;
+ struct device_node *np = NULL;
+ struct device_node *immr_node = NULL;
+ const void *prop;
+ struct resource res;
+ int ret = 0;
+#ifdef CONFIG_USB_OTG
+ const void *dr_mode;
+#endif
+
+ np = of_find_compatible_node(NULL, NULL, "fsl-usb2-dr");
+ if (!np)
+ return -ENODEV;
+ prop = of_get_property(np, "phy_type", NULL);
+
+ /* Map IMMR space for pin and clock settings */
+ immap = ioremap(get_immrbase(), 0x1000);
+ if (!immap) {
+ of_node_put(np);
+ return -ENOMEM;
+ }
+
+ /* Configure clock */
+ immr_node = of_get_parent(np);
+ if (immr_node && (of_device_is_compatible(immr_node, "fsl,mpc8315-immr") ||
+ of_device_is_compatible(immr_node, "fsl,mpc8308-immr")))
+ clrsetbits_be32(immap + MPC83XX_SCCR_OFFS,
+ MPC8315_SCCR_USB_MASK,
+ MPC8315_SCCR_USB_DRCM_01);
+ else
+ clrsetbits_be32(immap + MPC83XX_SCCR_OFFS,
+ MPC83XX_SCCR_USB_MASK,
+ MPC83XX_SCCR_USB_DRCM_11);
+
+ /* Configure pin mux for ULPI. There is no pin mux for UTMI */
+ if (prop && !strcmp(prop, "ulpi")) {
+ if (of_device_is_compatible(immr_node, "fsl,mpc8308-immr")) {
+ clrsetbits_be32(immap + MPC83XX_SICRH_OFFS,
+ MPC8308_SICRH_USB_MASK,
+ MPC8308_SICRH_USB_ULPI);
+ } else if (of_device_is_compatible(immr_node, "fsl,mpc8315-immr")) {
+ clrsetbits_be32(immap + MPC83XX_SICRL_OFFS,
+ MPC8315_SICRL_USB_MASK,
+ MPC8315_SICRL_USB_ULPI);
+ clrsetbits_be32(immap + MPC83XX_SICRH_OFFS,
+ MPC8315_SICRH_USB_MASK,
+ MPC8315_SICRH_USB_ULPI);
+ } else {
+ clrsetbits_be32(immap + MPC83XX_SICRL_OFFS,
+ MPC831X_SICRL_USB_MASK,
+ MPC831X_SICRL_USB_ULPI);
+ clrsetbits_be32(immap + MPC83XX_SICRH_OFFS,
+ MPC831X_SICRH_USB_MASK,
+ MPC831X_SICRH_USB_ULPI);
+ }
+ }
+
+ iounmap(immap);
+
+ of_node_put(immr_node);
+
+ /* Map USB SOC space */
+ ret = of_address_to_resource(np, 0, &res);
+ if (ret) {
+ of_node_put(np);
+ return ret;
+ }
+ usb_regs = ioremap(res.start, resource_size(&res));
+
+ /* Using on-chip PHY */
+ if (prop && (!strcmp(prop, "utmi_wide") || !strcmp(prop, "utmi"))) {
+ u32 refsel;
+
+ if (of_device_is_compatible(immr_node, "fsl,mpc8308-immr"))
+ goto out;
+
+ if (of_device_is_compatible(immr_node, "fsl,mpc8315-immr"))
+ refsel = CONTROL_REFSEL_24MHZ;
+ else
+ refsel = CONTROL_REFSEL_48MHZ;
+ /* Set UTMI_PHY_EN and REFSEL */
+ out_be32(usb_regs + FSL_USB2_CONTROL_OFFS,
+ CONTROL_UTMI_PHY_EN | refsel);
+ /* Using external UPLI PHY */
+ } else if (prop && !strcmp(prop, "ulpi")) {
+ /* Set PHY_CLK_SEL to ULPI */
+ temp = CONTROL_PHY_CLK_SEL_ULPI;
+#ifdef CONFIG_USB_OTG
+ /* Set OTG_PORT */
+ if (!of_device_is_compatible(immr_node, "fsl,mpc8308-immr")) {
+ dr_mode = of_get_property(np, "dr_mode", NULL);
+ if (dr_mode && !strcmp(dr_mode, "otg"))
+ temp |= CONTROL_OTG_PORT;
+ }
+#endif /* CONFIG_USB_OTG */
+ out_be32(usb_regs + FSL_USB2_CONTROL_OFFS, temp);
+ } else {
+ pr_warn("831x USB PHY type not supported\n");
+ ret = -EINVAL;
+ }
+
+out:
+ iounmap(usb_regs);
+ of_node_put(np);
+ return ret;
+}
diff --git a/arch/powerpc/platforms/83xx/usb_834x.c b/arch/powerpc/platforms/83xx/usb_834x.c
new file mode 100644
index 0000000..3a8d6c6
--- /dev/null
+++ b/arch/powerpc/platforms/83xx/usb_834x.c
@@ -0,0 +1,90 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Freescale 83xx USB SOC setup code
+ *
+ * Copyright (C) 2007 Freescale Semiconductor, Inc.
+ * Author: Li Yang
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/io.h>
+
+#include <sysdev/fsl_soc.h>
+
+#include "mpc83xx.h"
+
+int __init mpc834x_usb_cfg(void)
+{
+ unsigned long sccr, sicrl, sicrh;
+ void __iomem *immap;
+ struct device_node *np = NULL;
+ int port0_is_dr = 0, port1_is_dr = 0;
+ const void *prop, *dr_mode;
+
+ immap = ioremap(get_immrbase(), 0x1000);
+ if (!immap)
+ return -ENOMEM;
+
+ /* Read registers */
+ /* Note: DR and MPH must use the same clock setting in SCCR */
+ sccr = in_be32(immap + MPC83XX_SCCR_OFFS) & ~MPC83XX_SCCR_USB_MASK;
+ sicrl = in_be32(immap + MPC83XX_SICRL_OFFS) & ~MPC834X_SICRL_USB_MASK;
+ sicrh = in_be32(immap + MPC83XX_SICRH_OFFS) & ~MPC834X_SICRH_USB_UTMI;
+
+ np = of_find_compatible_node(NULL, NULL, "fsl-usb2-dr");
+ if (np) {
+ sccr |= MPC83XX_SCCR_USB_DRCM_11; /* 1:3 */
+
+ prop = of_get_property(np, "phy_type", NULL);
+ port1_is_dr = 1;
+ if (prop &&
+ (!strcmp(prop, "utmi") || !strcmp(prop, "utmi_wide"))) {
+ sicrl |= MPC834X_SICRL_USB0 | MPC834X_SICRL_USB1;
+ sicrh |= MPC834X_SICRH_USB_UTMI;
+ port0_is_dr = 1;
+ } else if (prop && !strcmp(prop, "serial")) {
+ dr_mode = of_get_property(np, "dr_mode", NULL);
+ if (dr_mode && !strcmp(dr_mode, "otg")) {
+ sicrl |= MPC834X_SICRL_USB0 | MPC834X_SICRL_USB1;
+ port0_is_dr = 1;
+ } else {
+ sicrl |= MPC834X_SICRL_USB1;
+ }
+ } else if (prop && !strcmp(prop, "ulpi")) {
+ sicrl |= MPC834X_SICRL_USB1;
+ } else {
+ pr_warn("834x USB PHY type not supported\n");
+ }
+ of_node_put(np);
+ }
+ np = of_find_compatible_node(NULL, NULL, "fsl-usb2-mph");
+ if (np) {
+ sccr |= MPC83XX_SCCR_USB_MPHCM_11; /* 1:3 */
+
+ prop = of_get_property(np, "port0", NULL);
+ if (prop) {
+ if (port0_is_dr)
+ pr_warn("834x USB port0 can't be used by both DR and MPH!\n");
+ sicrl &= ~MPC834X_SICRL_USB0;
+ }
+ prop = of_get_property(np, "port1", NULL);
+ if (prop) {
+ if (port1_is_dr)
+ pr_warn("834x USB port1 can't be used by both DR and MPH!\n");
+ sicrl &= ~MPC834X_SICRL_USB1;
+ }
+ of_node_put(np);
+ }
+
+ /* Write back */
+ out_be32(immap + MPC83XX_SCCR_OFFS, sccr);
+ out_be32(immap + MPC83XX_SICRL_OFFS, sicrl);
+ out_be32(immap + MPC83XX_SICRH_OFFS, sicrh);
+
+ iounmap(immap);
+ return 0;
+}
diff --git a/arch/powerpc/platforms/83xx/usb_837x.c b/arch/powerpc/platforms/83xx/usb_837x.c
new file mode 100644
index 0000000..726935b
--- /dev/null
+++ b/arch/powerpc/platforms/83xx/usb_837x.c
@@ -0,0 +1,58 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Freescale 83xx USB SOC setup code
+ *
+ * Copyright (C) 2007 Freescale Semiconductor, Inc.
+ * Author: Li Yang
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/io.h>
+
+#include <sysdev/fsl_soc.h>
+
+#include "mpc83xx.h"
+
+int __init mpc837x_usb_cfg(void)
+{
+ void __iomem *immap;
+ struct device_node *np = NULL;
+ const void *prop;
+ int ret = 0;
+
+ np = of_find_compatible_node(NULL, NULL, "fsl-usb2-dr");
+ if (!np || !of_device_is_available(np)) {
+ of_node_put(np);
+ return -ENODEV;
+ }
+ prop = of_get_property(np, "phy_type", NULL);
+
+ if (!prop || (strcmp(prop, "ulpi") && strcmp(prop, "serial"))) {
+ pr_warn("837x USB PHY type not supported\n");
+ of_node_put(np);
+ return -EINVAL;
+ }
+
+ /* Map IMMR space for pin and clock settings */
+ immap = ioremap(get_immrbase(), 0x1000);
+ if (!immap) {
+ of_node_put(np);
+ return -ENOMEM;
+ }
+
+ /* Configure clock */
+ clrsetbits_be32(immap + MPC83XX_SCCR_OFFS, MPC837X_SCCR_USB_DRCM_11,
+ MPC837X_SCCR_USB_DRCM_11);
+
+ /* Configure pin mux for ULPI/serial */
+ clrsetbits_be32(immap + MPC83XX_SICRL_OFFS, MPC837X_SICRL_USB_MASK,
+ MPC837X_SICRL_USB_ULPI);
+
+ iounmap(immap);
+ of_node_put(np);
+ return ret;
+}
diff --git a/arch/powerpc/platforms/85xx/bsc913x_qds.c b/arch/powerpc/platforms/85xx/bsc913x_qds.c
index a029aa0..2eb62bff 100644
--- a/arch/powerpc/platforms/85xx/bsc913x_qds.c
+++ b/arch/powerpc/platforms/85xx/bsc913x_qds.c
@@ -9,7 +9,7 @@
* Copyright 2014 Freescale Semiconductor Inc.
*/
-#include <linux/of_platform.h>
+#include <linux/of.h>
#include <linux/pci.h>
#include <asm/mpic.h>
#include <sysdev/fsl_soc.h>
diff --git a/arch/powerpc/platforms/85xx/bsc913x_rdb.c b/arch/powerpc/platforms/85xx/bsc913x_rdb.c
index 361b437..161f006 100644
--- a/arch/powerpc/platforms/85xx/bsc913x_rdb.c
+++ b/arch/powerpc/platforms/85xx/bsc913x_rdb.c
@@ -7,7 +7,7 @@
* Copyright 2011-2012 Freescale Semiconductor Inc.
*/
-#include <linux/of_platform.h>
+#include <linux/of.h>
#include <linux/pci.h>
#include <asm/mpic.h>
#include <sysdev/fsl_soc.h>
diff --git a/arch/powerpc/platforms/85xx/c293pcie.c b/arch/powerpc/platforms/85xx/c293pcie.c
index 3497570..7a63a3a 100644
--- a/arch/powerpc/platforms/85xx/c293pcie.c
+++ b/arch/powerpc/platforms/85xx/c293pcie.c
@@ -7,8 +7,7 @@
#include <linux/stddef.h>
#include <linux/kernel.h>
-#include <linux/of_fdt.h>
-#include <linux/of_platform.h>
+#include <linux/of.h>
#include <asm/machdep.h>
#include <asm/udbg.h>
diff --git a/arch/powerpc/platforms/85xx/common.c b/arch/powerpc/platforms/85xx/common.c
index a554b6d..7578111 100644
--- a/arch/powerpc/platforms/85xx/common.c
+++ b/arch/powerpc/platforms/85xx/common.c
@@ -3,6 +3,7 @@
* Routines common to most mpc85xx-based boards.
*/
+#include <linux/of.h>
#include <linux/of_irq.h>
#include <linux/of_platform.h>
diff --git a/arch/powerpc/platforms/85xx/corenet_generic.c b/arch/powerpc/platforms/85xx/corenet_generic.c
index bfde391..645fcca 100644
--- a/arch/powerpc/platforms/85xx/corenet_generic.c
+++ b/arch/powerpc/platforms/85xx/corenet_generic.c
@@ -30,7 +30,7 @@
#include "smp.h"
#include "mpc85xx.h"
-void __init corenet_gen_pic_init(void)
+static void __init corenet_gen_pic_init(void)
{
struct mpic *mpic;
unsigned int flags = MPIC_BIG_ENDIAN | MPIC_SINGLE_DEST_CPU |
@@ -48,7 +48,7 @@ void __init corenet_gen_pic_init(void)
/*
* Setup the architecture
*/
-void __init corenet_gen_setup_arch(void)
+static void __init corenet_gen_setup_arch(void)
{
mpc85xx_smp_init();
@@ -101,7 +101,7 @@ static const struct of_device_id of_device_ids[] = {
{}
};
-int __init corenet_gen_publish_devices(void)
+static int __init corenet_gen_publish_devices(void)
{
return of_platform_bus_probe(NULL, of_device_ids, NULL);
}
diff --git a/arch/powerpc/platforms/85xx/ge_imp3a.c b/arch/powerpc/platforms/85xx/ge_imp3a.c
index 3678a1f..9c3b44a 100644
--- a/arch/powerpc/platforms/85xx/ge_imp3a.c
+++ b/arch/powerpc/platforms/85xx/ge_imp3a.c
@@ -17,8 +17,8 @@
#include <linux/delay.h>
#include <linux/seq_file.h>
#include <linux/interrupt.h>
+#include <linux/of.h>
#include <linux/of_address.h>
-#include <linux/of_platform.h>
#include <asm/time.h>
#include <asm/machdep.h>
diff --git a/arch/powerpc/platforms/85xx/ksi8560.c b/arch/powerpc/platforms/85xx/ksi8560.c
index af38c3a..1b6326a 100644
--- a/arch/powerpc/platforms/85xx/ksi8560.c
+++ b/arch/powerpc/platforms/85xx/ksi8560.c
@@ -18,7 +18,8 @@
#include <linux/kdev_t.h>
#include <linux/delay.h>
#include <linux/seq_file.h>
-#include <linux/of_platform.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
#include <asm/time.h>
#include <asm/machdep.h>
diff --git a/arch/powerpc/platforms/85xx/mpc8536_ds.c b/arch/powerpc/platforms/85xx/mpc8536_ds.c
index 58ab383..e966b2a 100644
--- a/arch/powerpc/platforms/85xx/mpc8536_ds.c
+++ b/arch/powerpc/platforms/85xx/mpc8536_ds.c
@@ -12,7 +12,7 @@
#include <linux/delay.h>
#include <linux/seq_file.h>
#include <linux/interrupt.h>
-#include <linux/of_platform.h>
+#include <linux/of.h>
#include <asm/time.h>
#include <asm/machdep.h>
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_ds.c b/arch/powerpc/platforms/85xx/mpc85xx_ds.c
index 4347d62..2856148 100644
--- a/arch/powerpc/platforms/85xx/mpc85xx_ds.c
+++ b/arch/powerpc/platforms/85xx/mpc85xx_ds.c
@@ -15,8 +15,8 @@
#include <linux/delay.h>
#include <linux/seq_file.h>
#include <linux/interrupt.h>
+#include <linux/of.h>
#include <linux/of_irq.h>
-#include <linux/of_platform.h>
#include <asm/time.h>
#include <asm/machdep.h>
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_mds.c b/arch/powerpc/platforms/85xx/mpc85xx_mds.c
index 0546f19..c19490cf 100644
--- a/arch/powerpc/platforms/85xx/mpc85xx_mds.c
+++ b/arch/powerpc/platforms/85xx/mpc85xx_mds.c
@@ -26,8 +26,8 @@
#include <linux/seq_file.h>
#include <linux/initrd.h>
#include <linux/fsl_devices.h>
-#include <linux/of_platform.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
#include <linux/phy.h>
#include <linux/memblock.h>
#include <linux/fsl/guts.h>
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_rdb.c b/arch/powerpc/platforms/85xx/mpc85xx_rdb.c
index c42a68d..ec9f60f 100644
--- a/arch/powerpc/platforms/85xx/mpc85xx_rdb.c
+++ b/arch/powerpc/platforms/85xx/mpc85xx_rdb.c
@@ -12,7 +12,8 @@
#include <linux/delay.h>
#include <linux/seq_file.h>
#include <linux/interrupt.h>
-#include <linux/of_platform.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
#include <linux/fsl/guts.h>
#include <asm/time.h>
diff --git a/arch/powerpc/platforms/85xx/p1010rdb.c b/arch/powerpc/platforms/85xx/p1010rdb.c
index 14ec79a..10d6f1f 100644
--- a/arch/powerpc/platforms/85xx/p1010rdb.c
+++ b/arch/powerpc/platforms/85xx/p1010rdb.c
@@ -10,7 +10,7 @@
#include <linux/pci.h>
#include <linux/delay.h>
#include <linux/interrupt.h>
-#include <linux/of_platform.h>
+#include <linux/of.h>
#include <asm/time.h>
#include <asm/machdep.h>
diff --git a/arch/powerpc/platforms/85xx/p1022_ds.c b/arch/powerpc/platforms/85xx/p1022_ds.c
index 23d0926..0dd786a 100644
--- a/arch/powerpc/platforms/85xx/p1022_ds.c
+++ b/arch/powerpc/platforms/85xx/p1022_ds.c
@@ -18,8 +18,8 @@
#include <linux/fsl/guts.h>
#include <linux/pci.h>
+#include <linux/of.h>
#include <linux/of_address.h>
-#include <linux/of_platform.h>
#include <asm/div64.h>
#include <asm/mpic.h>
#include <asm/swiotlb.h>
diff --git a/arch/powerpc/platforms/85xx/p1022_rdk.c b/arch/powerpc/platforms/85xx/p1022_rdk.c
index d1159150..25ab6e9 100644
--- a/arch/powerpc/platforms/85xx/p1022_rdk.c
+++ b/arch/powerpc/platforms/85xx/p1022_rdk.c
@@ -14,8 +14,8 @@
#include <linux/fsl/guts.h>
#include <linux/pci.h>
+#include <linux/of.h>
#include <linux/of_address.h>
-#include <linux/of_platform.h>
#include <asm/div64.h>
#include <asm/mpic.h>
#include <asm/swiotlb.h>
diff --git a/arch/powerpc/platforms/85xx/p1023_rdb.c b/arch/powerpc/platforms/85xx/p1023_rdb.c
index 9df0439..e4fa873 100644
--- a/arch/powerpc/platforms/85xx/p1023_rdb.c
+++ b/arch/powerpc/platforms/85xx/p1023_rdb.c
@@ -15,9 +15,8 @@
#include <linux/delay.h>
#include <linux/module.h>
#include <linux/fsl_devices.h>
+#include <linux/of.h>
#include <linux/of_address.h>
-#include <linux/of_platform.h>
-#include <linux/of_device.h>
#include <asm/time.h>
#include <asm/machdep.h>
diff --git a/arch/powerpc/platforms/85xx/qemu_e500.c b/arch/powerpc/platforms/85xx/qemu_e500.c
index 6e4b1dd..3cd2f3b 100644
--- a/arch/powerpc/platforms/85xx/qemu_e500.c
+++ b/arch/powerpc/platforms/85xx/qemu_e500.c
@@ -25,7 +25,7 @@
#include "smp.h"
#include "mpc85xx.h"
-void __init qemu_e500_pic_init(void)
+static void __init qemu_e500_pic_init(void)
{
struct mpic *mpic;
unsigned int flags = MPIC_BIG_ENDIAN | MPIC_SINGLE_DEST_CPU |
diff --git a/arch/powerpc/platforms/85xx/socrates.c b/arch/powerpc/platforms/85xx/socrates.c
index 9fa1338..403367b 100644
--- a/arch/powerpc/platforms/85xx/socrates.c
+++ b/arch/powerpc/platforms/85xx/socrates.c
@@ -23,7 +23,7 @@
#include <linux/kdev_t.h>
#include <linux/delay.h>
#include <linux/seq_file.h>
-#include <linux/of_platform.h>
+#include <linux/of.h>
#include <asm/time.h>
#include <asm/machdep.h>
diff --git a/arch/powerpc/platforms/85xx/socrates_fpga_pic.c b/arch/powerpc/platforms/85xx/socrates_fpga_pic.c
index 3768c86..baa12ef 100644
--- a/arch/powerpc/platforms/85xx/socrates_fpga_pic.c
+++ b/arch/powerpc/platforms/85xx/socrates_fpga_pic.c
@@ -6,7 +6,6 @@
#include <linux/irq.h>
#include <linux/of_address.h>
#include <linux/of_irq.h>
-#include <linux/of_platform.h>
#include <linux/io.h>
/*
diff --git a/arch/powerpc/platforms/85xx/stx_gp3.c b/arch/powerpc/platforms/85xx/stx_gp3.c
index 5e2646b..c10efc4 100644
--- a/arch/powerpc/platforms/85xx/stx_gp3.c
+++ b/arch/powerpc/platforms/85xx/stx_gp3.c
@@ -22,7 +22,7 @@
#include <linux/kdev_t.h>
#include <linux/delay.h>
#include <linux/seq_file.h>
-#include <linux/of_platform.h>
+#include <linux/of.h>
#include <asm/time.h>
#include <asm/machdep.h>
diff --git a/arch/powerpc/platforms/85xx/tqm85xx.c b/arch/powerpc/platforms/85xx/tqm85xx.c
index 80effb0..6be1b98 100644
--- a/arch/powerpc/platforms/85xx/tqm85xx.c
+++ b/arch/powerpc/platforms/85xx/tqm85xx.c
@@ -20,7 +20,7 @@
#include <linux/kdev_t.h>
#include <linux/delay.h>
#include <linux/seq_file.h>
-#include <linux/of_platform.h>
+#include <linux/of.h>
#include <asm/time.h>
#include <asm/machdep.h>
diff --git a/arch/powerpc/platforms/85xx/twr_p102x.c b/arch/powerpc/platforms/85xx/twr_p102x.c
index b88e23a..c0a0456 100644
--- a/arch/powerpc/platforms/85xx/twr_p102x.c
+++ b/arch/powerpc/platforms/85xx/twr_p102x.c
@@ -13,7 +13,8 @@
#include <linux/errno.h>
#include <linux/fsl/guts.h>
#include <linux/pci.h>
-#include <linux/of_platform.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
#include <asm/pci-bridge.h>
#include <asm/udbg.h>
diff --git a/arch/powerpc/platforms/85xx/xes_mpc85xx.c b/arch/powerpc/platforms/85xx/xes_mpc85xx.c
index 184013e..45f257f 100644
--- a/arch/powerpc/platforms/85xx/xes_mpc85xx.c
+++ b/arch/powerpc/platforms/85xx/xes_mpc85xx.c
@@ -16,8 +16,8 @@
#include <linux/delay.h>
#include <linux/seq_file.h>
#include <linux/interrupt.h>
+#include <linux/of.h>
#include <linux/of_address.h>
-#include <linux/of_platform.h>
#include <asm/time.h>
#include <asm/machdep.h>
diff --git a/arch/powerpc/platforms/86xx/common.c b/arch/powerpc/platforms/86xx/common.c
index 0069d38..a4a55052 100644
--- a/arch/powerpc/platforms/86xx/common.c
+++ b/arch/powerpc/platforms/86xx/common.c
@@ -3,7 +3,10 @@
* Routines common to most mpc86xx-based boards.
*/
+#include <linux/init.h>
+#include <linux/mod_devicetable.h>
#include <linux/of_platform.h>
+#include <asm/reg.h>
#include <asm/synch.h>
#include "mpc86xx.h"
diff --git a/arch/powerpc/platforms/86xx/gef_ppc9a.c b/arch/powerpc/platforms/86xx/gef_ppc9a.c
index f0512e5..f7f98cc 100644
--- a/arch/powerpc/platforms/86xx/gef_ppc9a.c
+++ b/arch/powerpc/platforms/86xx/gef_ppc9a.c
@@ -18,8 +18,8 @@
#include <linux/kdev_t.h>
#include <linux/delay.h>
#include <linux/seq_file.h>
+#include <linux/of.h>
#include <linux/of_address.h>
-#include <linux/of_platform.h>
#include <asm/time.h>
#include <asm/machdep.h>
diff --git a/arch/powerpc/platforms/86xx/gef_sbc310.c b/arch/powerpc/platforms/86xx/gef_sbc310.c
index 1430b52..689835f 100644
--- a/arch/powerpc/platforms/86xx/gef_sbc310.c
+++ b/arch/powerpc/platforms/86xx/gef_sbc310.c
@@ -18,8 +18,8 @@
#include <linux/kdev_t.h>
#include <linux/delay.h>
#include <linux/seq_file.h>
+#include <linux/of.h>
#include <linux/of_address.h>
-#include <linux/of_platform.h>
#include <asm/time.h>
#include <asm/machdep.h>
diff --git a/arch/powerpc/platforms/86xx/gef_sbc610.c b/arch/powerpc/platforms/86xx/gef_sbc610.c
index c92af0d..365f511 100644
--- a/arch/powerpc/platforms/86xx/gef_sbc610.c
+++ b/arch/powerpc/platforms/86xx/gef_sbc610.c
@@ -18,8 +18,8 @@
#include <linux/kdev_t.h>
#include <linux/delay.h>
#include <linux/seq_file.h>
+#include <linux/of.h>
#include <linux/of_address.h>
-#include <linux/of_platform.h>
#include <asm/time.h>
#include <asm/machdep.h>
diff --git a/arch/powerpc/platforms/86xx/mvme7100.c b/arch/powerpc/platforms/86xx/mvme7100.c
index c0ac405..cee49ec 100644
--- a/arch/powerpc/platforms/86xx/mvme7100.c
+++ b/arch/powerpc/platforms/86xx/mvme7100.c
@@ -20,7 +20,6 @@
#include <linux/pci.h>
#include <linux/of.h>
#include <linux/of_fdt.h>
-#include <linux/of_platform.h>
#include <linux/of_address.h>
#include <asm/udbg.h>
#include <asm/mpic.h>
diff --git a/arch/powerpc/platforms/86xx/pic.c b/arch/powerpc/platforms/86xx/pic.c
index 2c32c34..9ca36de 100644
--- a/arch/powerpc/platforms/86xx/pic.c
+++ b/arch/powerpc/platforms/86xx/pic.c
@@ -6,12 +6,14 @@
#include <linux/stddef.h>
#include <linux/kernel.h>
#include <linux/interrupt.h>
+#include <linux/of.h>
#include <linux/of_irq.h>
-#include <linux/of_platform.h>
#include <asm/mpic.h>
#include <asm/i8259.h>
+#include "mpc86xx.h"
+
#ifdef CONFIG_PPC_I8259
static void mpc86xx_8259_cascade(struct irq_desc *desc)
{
diff --git a/arch/powerpc/platforms/8xx/adder875.c b/arch/powerpc/platforms/8xx/adder875.c
index f6bd232..d02f8dd 100644
--- a/arch/powerpc/platforms/8xx/adder875.c
+++ b/arch/powerpc/platforms/8xx/adder875.c
@@ -12,7 +12,7 @@
#include <asm/time.h>
#include <asm/machdep.h>
#include <asm/cpm1.h>
-#include <asm/fs_pd.h>
+#include <asm/8xx_immap.h>
#include <asm/udbg.h>
#include "mpc8xx.h"
diff --git a/arch/powerpc/platforms/8xx/cpm1.c b/arch/powerpc/platforms/8xx/cpm1.c
index 34ab299..ebb5f6a 100644
--- a/arch/powerpc/platforms/8xx/cpm1.c
+++ b/arch/powerpc/platforms/8xx/cpm1.c
@@ -41,7 +41,7 @@
#include <asm/rheap.h>
#include <asm/cpm.h>
-#include <asm/fs_pd.h>
+#include <sysdev/fsl_soc.h>
#ifdef CONFIG_8xx_GPIO
#include <linux/gpio/legacy-of-mm-gpiochip.h>
@@ -54,8 +54,6 @@ immap_t __iomem *mpc8xx_immr = (void __iomem *)VIRT_IMMR_BASE;
void __init cpm_reset(void)
{
- sysconf8xx_t __iomem *siu_conf;
-
cpmp = &mpc8xx_immr->im_cpm;
#ifndef CONFIG_PPC_EARLY_DEBUG_CPM
@@ -77,12 +75,10 @@ void __init cpm_reset(void)
* manual recommends it.
* Bit 25, FAM can also be set to use FEC aggressive mode (860T).
*/
- siu_conf = immr_map(im_siu_conf);
if ((mfspr(SPRN_IMMR) & 0xffff) == 0x0900) /* MPC885 */
- out_be32(&siu_conf->sc_sdcr, 0x40);
+ out_be32(&mpc8xx_immr->im_siu_conf.sc_sdcr, 0x40);
else
- out_be32(&siu_conf->sc_sdcr, 1);
- immr_unmap(siu_conf);
+ out_be32(&mpc8xx_immr->im_siu_conf.sc_sdcr, 1);
}
static DEFINE_SPINLOCK(cmd_lock);
diff --git a/arch/powerpc/platforms/8xx/m8xx_setup.c b/arch/powerpc/platforms/8xx/m8xx_setup.c
index 24f358f..2336b68 100644
--- a/arch/powerpc/platforms/8xx/m8xx_setup.c
+++ b/arch/powerpc/platforms/8xx/m8xx_setup.c
@@ -22,7 +22,6 @@
#include <asm/io.h>
#include <asm/8xx_immap.h>
-#include <asm/fs_pd.h>
#include <mm/mmu_decl.h>
#include "pic.h"
@@ -37,20 +36,6 @@ static irqreturn_t timebase_interrupt(int irq, void *dev)
return IRQ_HANDLED;
}
-/* per-board overridable init_internal_rtc() function. */
-void __init __attribute__ ((weak))
-init_internal_rtc(void)
-{
- sit8xx_t __iomem *sys_tmr = immr_map(im_sit);
-
- /* Disable the RTC one second and alarm interrupts. */
- clrbits16(&sys_tmr->sit_rtcsc, (RTCSC_SIE | RTCSC_ALE));
-
- /* Enable the RTC */
- setbits16(&sys_tmr->sit_rtcsc, (RTCSC_RTF | RTCSC_RTE));
- immr_unmap(sys_tmr);
-}
-
static int __init get_freq(char *name, unsigned long *val)
{
struct device_node *cpu;
@@ -80,23 +65,14 @@ static int __init get_freq(char *name, unsigned long *val)
void __init mpc8xx_calibrate_decr(void)
{
struct device_node *cpu;
- cark8xx_t __iomem *clk_r1;
- car8xx_t __iomem *clk_r2;
- sitk8xx_t __iomem *sys_tmr1;
- sit8xx_t __iomem *sys_tmr2;
int irq, virq;
- clk_r1 = immr_map(im_clkrstk);
-
/* Unlock the SCCR. */
- out_be32(&clk_r1->cark_sccrk, ~KAPWR_KEY);
- out_be32(&clk_r1->cark_sccrk, KAPWR_KEY);
- immr_unmap(clk_r1);
+ out_be32(&mpc8xx_immr->im_clkrstk.cark_sccrk, ~KAPWR_KEY);
+ out_be32(&mpc8xx_immr->im_clkrstk.cark_sccrk, KAPWR_KEY);
/* Force all 8xx processors to use divide by 16 processor clock. */
- clk_r2 = immr_map(im_clkrst);
- setbits32(&clk_r2->car_sccr, 0x02000000);
- immr_unmap(clk_r2);
+ setbits32(&mpc8xx_immr->im_clkrst.car_sccr, 0x02000000);
/* Processor frequency is MHz.
*/
@@ -123,16 +99,18 @@ void __init mpc8xx_calibrate_decr(void)
* we guarantee the registers are locked, then we unlock them
* for our use.
*/
- sys_tmr1 = immr_map(im_sitk);
- out_be32(&sys_tmr1->sitk_tbscrk, ~KAPWR_KEY);
- out_be32(&sys_tmr1->sitk_rtcsck, ~KAPWR_KEY);
- out_be32(&sys_tmr1->sitk_tbk, ~KAPWR_KEY);
- out_be32(&sys_tmr1->sitk_tbscrk, KAPWR_KEY);
- out_be32(&sys_tmr1->sitk_rtcsck, KAPWR_KEY);
- out_be32(&sys_tmr1->sitk_tbk, KAPWR_KEY);
- immr_unmap(sys_tmr1);
+ out_be32(&mpc8xx_immr->im_sitk.sitk_tbscrk, ~KAPWR_KEY);
+ out_be32(&mpc8xx_immr->im_sitk.sitk_rtcsck, ~KAPWR_KEY);
+ out_be32(&mpc8xx_immr->im_sitk.sitk_tbk, ~KAPWR_KEY);
+ out_be32(&mpc8xx_immr->im_sitk.sitk_tbscrk, KAPWR_KEY);
+ out_be32(&mpc8xx_immr->im_sitk.sitk_rtcsck, KAPWR_KEY);
+ out_be32(&mpc8xx_immr->im_sitk.sitk_tbk, KAPWR_KEY);
- init_internal_rtc();
+ /* Disable the RTC one second and alarm interrupts. */
+ clrbits16(&mpc8xx_immr->im_sit.sit_rtcsc, (RTCSC_SIE | RTCSC_ALE));
+
+ /* Enable the RTC */
+ setbits16(&mpc8xx_immr->im_sit.sit_rtcsc, (RTCSC_RTF | RTCSC_RTE));
/* Enabling the decrementer also enables the timebase interrupts
* (or from the other point of view, to get decrementer interrupts
@@ -144,10 +122,8 @@ void __init mpc8xx_calibrate_decr(void)
of_node_put(cpu);
irq = virq_to_hw(virq);
- sys_tmr2 = immr_map(im_sit);
- out_be16(&sys_tmr2->sit_tbscr, ((1 << (7 - (irq/2))) << 8) |
- (TBSCR_TBF | TBSCR_TBE));
- immr_unmap(sys_tmr2);
+ out_be16(&mpc8xx_immr->im_sit.sit_tbscr,
+ ((1 << (7 - (irq / 2))) << 8) | (TBSCR_TBF | TBSCR_TBE));
if (request_irq(virq, timebase_interrupt, IRQF_NO_THREAD, "tbint",
NULL))
@@ -161,47 +137,36 @@ void __init mpc8xx_calibrate_decr(void)
int mpc8xx_set_rtc_time(struct rtc_time *tm)
{
- sitk8xx_t __iomem *sys_tmr1;
- sit8xx_t __iomem *sys_tmr2;
time64_t time;
- sys_tmr1 = immr_map(im_sitk);
- sys_tmr2 = immr_map(im_sit);
time = rtc_tm_to_time64(tm);
- out_be32(&sys_tmr1->sitk_rtck, KAPWR_KEY);
- out_be32(&sys_tmr2->sit_rtc, (u32)time);
- out_be32(&sys_tmr1->sitk_rtck, ~KAPWR_KEY);
+ out_be32(&mpc8xx_immr->im_sitk.sitk_rtck, KAPWR_KEY);
+ out_be32(&mpc8xx_immr->im_sit.sit_rtc, (u32)time);
+ out_be32(&mpc8xx_immr->im_sitk.sitk_rtck, ~KAPWR_KEY);
- immr_unmap(sys_tmr2);
- immr_unmap(sys_tmr1);
return 0;
}
void mpc8xx_get_rtc_time(struct rtc_time *tm)
{
unsigned long data;
- sit8xx_t __iomem *sys_tmr = immr_map(im_sit);
/* Get time from the RTC. */
- data = in_be32(&sys_tmr->sit_rtc);
+ data = in_be32(&mpc8xx_immr->im_sit.sit_rtc);
rtc_time64_to_tm(data, tm);
- immr_unmap(sys_tmr);
return;
}
void __noreturn mpc8xx_restart(char *cmd)
{
- car8xx_t __iomem *clk_r = immr_map(im_clkrst);
-
-
local_irq_disable();
- setbits32(&clk_r->car_plprcr, 0x00000080);
+ setbits32(&mpc8xx_immr->im_clkrst.car_plprcr, 0x00000080);
/* Clear the ME bit in MSR to cause checkstop on machine check
*/
mtmsr(mfmsr() & ~0x1000);
- in_8(&clk_r->res[0]);
+ in_8(&mpc8xx_immr->im_clkrst.res[0]);
panic("Restart failed\n");
}
diff --git a/arch/powerpc/platforms/8xx/mpc86xads_setup.c b/arch/powerpc/platforms/8xx/mpc86xads_setup.c
index 11b3d11..e4192c0 100644
--- a/arch/powerpc/platforms/8xx/mpc86xads_setup.c
+++ b/arch/powerpc/platforms/8xx/mpc86xads_setup.c
@@ -24,7 +24,6 @@
#include <asm/time.h>
#include <asm/8xx_immap.h>
#include <asm/cpm1.h>
-#include <asm/fs_pd.h>
#include <asm/udbg.h>
#include "mpc86xads.h"
diff --git a/arch/powerpc/platforms/8xx/mpc885ads_setup.c b/arch/powerpc/platforms/8xx/mpc885ads_setup.c
index c7c4f08..76c7cd7 100644
--- a/arch/powerpc/platforms/8xx/mpc885ads_setup.c
+++ b/arch/powerpc/platforms/8xx/mpc885ads_setup.c
@@ -36,7 +36,6 @@
#include <asm/time.h>
#include <asm/8xx_immap.h>
#include <asm/cpm1.h>
-#include <asm/fs_pd.h>
#include <asm/udbg.h>
#include "mpc885ads.h"
diff --git a/arch/powerpc/platforms/8xx/tqm8xx_setup.c b/arch/powerpc/platforms/8xx/tqm8xx_setup.c
index 6e56be8..1670dfd 100644
--- a/arch/powerpc/platforms/8xx/tqm8xx_setup.c
+++ b/arch/powerpc/platforms/8xx/tqm8xx_setup.c
@@ -38,7 +38,6 @@
#include <asm/time.h>
#include <asm/8xx_immap.h>
#include <asm/cpm1.h>
-#include <asm/fs_pd.h>
#include <asm/udbg.h>
#include "mpc8xx.h"
diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig
index 3e2e252..1fd253f 100644
--- a/arch/powerpc/platforms/Kconfig
+++ b/arch/powerpc/platforms/Kconfig
@@ -251,7 +251,7 @@
config CPM2
bool "Enable support for the CPM2 (Communications Processor Module)"
- depends on (FSL_SOC_BOOKE && PPC32) || 8260
+ depends on (FSL_SOC_BOOKE && PPC32) || PPC_82xx
select CPM
select HAVE_PCI
select GPIOLIB
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index 340b86e..b2d8c0d 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -276,6 +276,13 @@
default "e500mc" if E500MC_CPU
default "powerpc" if POWERPC_CPU
+config TUNE_CPU
+ string
+ depends on POWERPC64_CPU
+ default "-mtune=power10" if $(cc-option,-mtune=power10)
+ default "-mtune=power9" if $(cc-option,-mtune=power9)
+ default "-mtune=power8" if $(cc-option,-mtune=power8)
+
config PPC_BOOK3S
def_bool y
depends on PPC_BOOK3S_32 || PPC_BOOK3S_64
diff --git a/arch/powerpc/platforms/cell/axon_msi.c b/arch/powerpc/platforms/cell/axon_msi.c
index 1060004..28dc867 100644
--- a/arch/powerpc/platforms/cell/axon_msi.c
+++ b/arch/powerpc/platforms/cell/axon_msi.c
@@ -10,10 +10,11 @@
#include <linux/pci.h>
#include <linux/msi.h>
#include <linux/export.h>
-#include <linux/of_platform.h>
#include <linux/slab.h>
#include <linux/debugfs.h>
+#include <linux/of.h>
#include <linux/of_irq.h>
+#include <linux/platform_device.h>
#include <asm/dcr.h>
#include <asm/machdep.h>
diff --git a/arch/powerpc/platforms/cell/cbe_regs.c b/arch/powerpc/platforms/cell/cbe_regs.c
index fb4023f..99b3558 100644
--- a/arch/powerpc/platforms/cell/cbe_regs.c
+++ b/arch/powerpc/platforms/cell/cbe_regs.c
@@ -10,9 +10,8 @@
#include <linux/percpu.h>
#include <linux/types.h>
#include <linux/export.h>
+#include <linux/of.h>
#include <linux/of_address.h>
-#include <linux/of_device.h>
-#include <linux/of_platform.h>
#include <linux/pgtable.h>
#include <asm/io.h>
diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c
index 8c71330..1202a69 100644
--- a/arch/powerpc/platforms/cell/iommu.c
+++ b/arch/powerpc/platforms/cell/iommu.c
@@ -16,7 +16,7 @@
#include <linux/notifier.h>
#include <linux/of.h>
#include <linux/of_address.h>
-#include <linux/of_platform.h>
+#include <linux/platform_device.h>
#include <linux/slab.h>
#include <linux/memblock.h>
diff --git a/arch/powerpc/platforms/cell/ras.c b/arch/powerpc/platforms/cell/ras.c
index 98db63b..f6b8792 100644
--- a/arch/powerpc/platforms/cell/ras.c
+++ b/arch/powerpc/platforms/cell/ras.c
@@ -22,7 +22,7 @@
#include <asm/cell-regs.h>
#include "ras.h"
-
+#include "pervasive.h"
static void dump_fir(int cpu)
{
diff --git a/arch/powerpc/platforms/cell/setup.c b/arch/powerpc/platforms/cell/setup.c
index 9e07d10..f64a1ef 100644
--- a/arch/powerpc/platforms/cell/setup.c
+++ b/arch/powerpc/platforms/cell/setup.c
@@ -27,6 +27,7 @@
#include <linux/mutex.h>
#include <linux/memory_hotplug.h>
#include <linux/of_platform.h>
+#include <linux/platform_device.h>
#include <asm/mmu.h>
#include <asm/processor.h>
diff --git a/arch/powerpc/platforms/cell/spider-pci.c b/arch/powerpc/platforms/cell/spider-pci.c
index e36ebd8..6843944 100644
--- a/arch/powerpc/platforms/cell/spider-pci.c
+++ b/arch/powerpc/platforms/cell/spider-pci.c
@@ -9,7 +9,6 @@
#include <linux/kernel.h>
#include <linux/of_address.h>
-#include <linux/of_platform.h>
#include <linux/slab.h>
#include <linux/io.h>
diff --git a/arch/powerpc/platforms/cell/spu_manage.c b/arch/powerpc/platforms/cell/spu_manage.c
index 74567b3..f464a1f 100644
--- a/arch/powerpc/platforms/cell/spu_manage.c
+++ b/arch/powerpc/platforms/cell/spu_manage.c
@@ -25,6 +25,7 @@
#include "spufs/spufs.h"
#include "interrupt.h"
+#include "spu_priv1_mmio.h"
struct device_node *spu_devnode(struct spu *spu)
{
diff --git a/arch/powerpc/platforms/embedded6xx/holly.c b/arch/powerpc/platforms/embedded6xx/holly.c
index 02ff260..ce9e58e 100644
--- a/arch/powerpc/platforms/embedded6xx/holly.c
+++ b/arch/powerpc/platforms/embedded6xx/holly.c
@@ -22,9 +22,9 @@
#include <linux/serial.h>
#include <linux/tty.h>
#include <linux/serial_core.h>
+#include <linux/of.h>
#include <linux/of_address.h>
#include <linux/of_irq.h>
-#include <linux/of_platform.h>
#include <linux/extable.h>
#include <asm/time.h>
diff --git a/arch/powerpc/platforms/maple/setup.c b/arch/powerpc/platforms/maple/setup.c
index a4a79d7..f329a03 100644
--- a/arch/powerpc/platforms/maple/setup.c
+++ b/arch/powerpc/platforms/maple/setup.c
@@ -36,8 +36,9 @@
#include <linux/serial.h>
#include <linux/smp.h>
#include <linux/bitops.h>
+#include <linux/of.h>
#include <linux/of_address.h>
-#include <linux/of_device.h>
+#include <linux/platform_device.h>
#include <linux/memblock.h>
#include <asm/processor.h>
diff --git a/arch/powerpc/platforms/pasemi/gpio_mdio.c b/arch/powerpc/platforms/pasemi/gpio_mdio.c
index 913b77b..fd130fe 100644
--- a/arch/powerpc/platforms/pasemi/gpio_mdio.c
+++ b/arch/powerpc/platforms/pasemi/gpio_mdio.c
@@ -20,7 +20,7 @@
#include <linux/phy.h>
#include <linux/of_address.h>
#include <linux/of_mdio.h>
-#include <linux/of_platform.h>
+#include <linux/platform_device.h>
#define DELAY 1
diff --git a/arch/powerpc/platforms/pasemi/pasemi.h b/arch/powerpc/platforms/pasemi/pasemi.h
index 3f277a2..018c306 100644
--- a/arch/powerpc/platforms/pasemi/pasemi.h
+++ b/arch/powerpc/platforms/pasemi/pasemi.h
@@ -4,6 +4,7 @@
extern time64_t pas_get_boot_time(void);
extern void pas_pci_init(void);
+struct pci_dev;
extern void pas_pci_irq_fixup(struct pci_dev *dev);
extern void pas_pci_dma_dev_setup(struct pci_dev *dev);
diff --git a/arch/powerpc/platforms/pasemi/setup.c b/arch/powerpc/platforms/pasemi/setup.c
index 5c5b4a0..ef985ba 100644
--- a/arch/powerpc/platforms/pasemi/setup.c
+++ b/arch/powerpc/platforms/pasemi/setup.c
@@ -16,7 +16,9 @@
#include <linux/console.h>
#include <linux/export.h>
#include <linux/pci.h>
+#include <linux/of.h>
#include <linux/of_platform.h>
+#include <linux/platform_device.h>
#include <linux/gfp.h>
#include <linux/irqdomain.h>
diff --git a/arch/powerpc/platforms/pasemi/time.c b/arch/powerpc/platforms/pasemi/time.c
index ad721882..70ac6db 100644
--- a/arch/powerpc/platforms/pasemi/time.c
+++ b/arch/powerpc/platforms/pasemi/time.c
@@ -9,6 +9,8 @@
#include <asm/time.h>
+#include "pasemi.h"
+
time64_t __init pas_get_boot_time(void)
{
/* Let's just return a fake date right now */
diff --git a/arch/powerpc/platforms/powermac/feature.c b/arch/powerpc/platforms/powermac/feature.c
index ed58928..ae62d43 100644
--- a/arch/powerpc/platforms/powermac/feature.c
+++ b/arch/powerpc/platforms/powermac/feature.c
@@ -37,6 +37,8 @@
#include <asm/pci-bridge.h>
#include <asm/pmac_low_i2c.h>
+#include "pmac.h"
+
#undef DEBUG_FEATURE
#ifdef DEBUG_FEATURE
@@ -132,8 +134,10 @@ static struct pmac_mb_def pmac_mb;
* Here are the chip specific feature functions
*/
-static inline int simple_feature_tweak(struct device_node *node, int type,
- int reg, u32 mask, int value)
+#ifndef CONFIG_PPC64
+
+static int simple_feature_tweak(struct device_node *node, int type, int reg,
+ u32 mask, int value)
{
struct macio_chip* macio;
unsigned long flags;
@@ -152,8 +156,6 @@ static inline int simple_feature_tweak(struct device_node *node, int type,
return 0;
}
-#ifndef CONFIG_PPC64
-
static long ohare_htw_scc_enable(struct device_node *node, long param,
long value)
{
diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c
index 0c41f4b..6de1cd5 100644
--- a/arch/powerpc/platforms/powermac/setup.c
+++ b/arch/powerpc/platforms/powermac/setup.c
@@ -45,7 +45,7 @@
#include <linux/root_dev.h>
#include <linux/bitops.h>
#include <linux/suspend.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
#include <linux/of_platform.h>
#include <asm/reg.h>
diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c
index a83cb67..af3a5d3 100644
--- a/arch/powerpc/platforms/powernv/eeh-powernv.c
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -855,8 +855,7 @@ static int pnv_eeh_bridge_reset(struct pci_dev *pdev, int option)
struct pci_controller *hose = pci_bus_to_host(pdev->bus);
struct pnv_phb *phb = hose->private_data;
struct device_node *dn = pci_device_to_OF_node(pdev);
- uint64_t id = PCI_SLOT_ID(phb->opal_id,
- (pdev->bus->number << 8) | pdev->devfn);
+ uint64_t id = PCI_SLOT_ID(phb->opal_id, pci_dev_id(pdev));
uint8_t scope;
int64_t rc;
diff --git a/arch/powerpc/platforms/powernv/ocxl.c b/arch/powerpc/platforms/powernv/ocxl.c
index 6290677..64a9c71 100644
--- a/arch/powerpc/platforms/powernv/ocxl.c
+++ b/arch/powerpc/platforms/powernv/ocxl.c
@@ -449,7 +449,7 @@ int pnv_ocxl_spa_setup(struct pci_dev *dev, void *spa_mem, int PE_mask,
if (!data)
return -ENOMEM;
- bdfn = (dev->bus->number << 8) | dev->devfn;
+ bdfn = pci_dev_id(dev);
rc = opal_npu_spa_setup(phb->opal_id, bdfn, virt_to_phys(spa_mem),
PE_mask);
if (rc) {
diff --git a/arch/powerpc/platforms/powernv/opal-imc.c b/arch/powerpc/platforms/powernv/opal-imc.c
index 348a8cd..828fc4d 100644
--- a/arch/powerpc/platforms/powernv/opal-imc.c
+++ b/arch/powerpc/platforms/powernv/opal-imc.c
@@ -11,7 +11,6 @@
#include <linux/platform_device.h>
#include <linux/of.h>
#include <linux/of_address.h>
-#include <linux/of_platform.h>
#include <linux/crash_dump.h>
#include <linux/debugfs.h>
#include <asm/opal.h>
diff --git a/arch/powerpc/platforms/powernv/opal-prd.c b/arch/powerpc/platforms/powernv/opal-prd.c
index 113bdb1..327e2f7 100644
--- a/arch/powerpc/platforms/powernv/opal-prd.c
+++ b/arch/powerpc/platforms/powernv/opal-prd.c
@@ -24,13 +24,20 @@
#include <linux/uaccess.h>
+struct opal_prd_msg {
+ union {
+ struct opal_prd_msg_header header;
+ DECLARE_FLEX_ARRAY(u8, data);
+ };
+};
+
/*
* The msg member must be at the end of the struct, as it's followed by the
* message data.
*/
struct opal_prd_msg_queue_item {
- struct list_head list;
- struct opal_prd_msg_header msg;
+ struct list_head list;
+ struct opal_prd_msg msg;
};
static struct device_node *prd_node;
@@ -156,7 +163,7 @@ static ssize_t opal_prd_read(struct file *file, char __user *buf,
int rc;
/* we need at least a header's worth of data */
- if (count < sizeof(item->msg))
+ if (count < sizeof(item->msg.header))
return -EINVAL;
if (*ppos)
@@ -186,7 +193,7 @@ static ssize_t opal_prd_read(struct file *file, char __user *buf,
return -EINTR;
}
- size = be16_to_cpu(item->msg.size);
+ size = be16_to_cpu(item->msg.header.size);
if (size > count) {
err = -EINVAL;
goto err_requeue;
@@ -214,8 +221,8 @@ static ssize_t opal_prd_write(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
{
struct opal_prd_msg_header hdr;
+ struct opal_prd_msg *msg;
ssize_t size;
- void *msg;
int rc;
size = sizeof(hdr);
@@ -247,12 +254,12 @@ static ssize_t opal_prd_write(struct file *file, const char __user *buf,
static int opal_prd_release(struct inode *inode, struct file *file)
{
- struct opal_prd_msg_header msg;
+ struct opal_prd_msg msg;
- msg.size = cpu_to_be16(sizeof(msg));
- msg.type = OPAL_PRD_MSG_TYPE_FINI;
+ msg.header.size = cpu_to_be16(sizeof(msg));
+ msg.header.type = OPAL_PRD_MSG_TYPE_FINI;
- opal_prd_msg((struct opal_prd_msg *)&msg);
+ opal_prd_msg(&msg);
atomic_xchg(&prd_usage, 0);
@@ -352,7 +359,7 @@ static int opal_prd_msg_notifier(struct notifier_block *nb,
if (!item)
return -ENOMEM;
- memcpy(&item->msg, msg->params, msg_size);
+ memcpy(&item->msg.data, msg->params, msg_size);
spin_lock_irqsave(&opal_prd_msg_queue_lock, flags);
list_add_tail(&item->list, &opal_prd_msg_queue);
diff --git a/arch/powerpc/platforms/powernv/opal-rtc.c b/arch/powerpc/platforms/powernv/opal-rtc.c
index a9bcf92..79011a2 100644
--- a/arch/powerpc/platforms/powernv/opal-rtc.c
+++ b/arch/powerpc/platforms/powernv/opal-rtc.c
@@ -11,8 +11,9 @@
#include <linux/bcd.h>
#include <linux/rtc.h>
#include <linux/delay.h>
-#include <linux/platform_device.h>
+#include <linux/of.h>
#include <linux/of_platform.h>
+#include <linux/platform_device.h>
#include <asm/opal.h>
#include <asm/firmware.h>
diff --git a/arch/powerpc/platforms/powernv/opal-secvar.c b/arch/powerpc/platforms/powernv/opal-secvar.c
index a8436bf..6ac410f 100644
--- a/arch/powerpc/platforms/powernv/opal-secvar.c
+++ b/arch/powerpc/platforms/powernv/opal-secvar.c
@@ -12,8 +12,8 @@
#define pr_fmt(fmt) "secvar: "fmt
#include <linux/types.h>
+#include <linux/of.h>
#include <linux/platform_device.h>
-#include <linux/of_platform.h>
#include <asm/opal.h>
#include <asm/secvar.h>
#include <asm/secure_boot.h>
diff --git a/arch/powerpc/platforms/powernv/opal-sensor.c b/arch/powerpc/platforms/powernv/opal-sensor.c
index 3192c61..8880a1c 100644
--- a/arch/powerpc/platforms/powernv/opal-sensor.c
+++ b/arch/powerpc/platforms/powernv/opal-sensor.c
@@ -6,7 +6,9 @@
*/
#include <linux/delay.h>
+#include <linux/of.h>
#include <linux/of_platform.h>
+#include <linux/platform_device.h>
#include <asm/opal.h>
#include <asm/machdep.h>
diff --git a/arch/powerpc/platforms/powernv/opal-xscom.c b/arch/powerpc/platforms/powernv/opal-xscom.c
index 6b4eed2..262cd6f 100644
--- a/arch/powerpc/platforms/powernv/opal-xscom.c
+++ b/arch/powerpc/platforms/powernv/opal-xscom.c
@@ -168,7 +168,7 @@ static int scom_debug_init_one(struct dentry *root, struct device_node *dn,
ent->path.size = strlen((char *)ent->path.data);
dir = debugfs_create_dir(ent->name, root);
- if (!dir) {
+ if (IS_ERR(dir)) {
kfree(ent->path.data);
kfree(ent);
return -1;
@@ -190,7 +190,7 @@ static int scom_debug_init(void)
return 0;
root = debugfs_create_dir("scom", arch_debugfs_dir);
- if (!root)
+ if (IS_ERR(root))
return -1;
rc = 0;
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index cb63782..28fac47 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -997,14 +997,14 @@ static void pnv_pci_ioda_dma_dev_setup(struct pci_dev *pdev)
struct pnv_ioda_pe *pe;
/* Check if the BDFN for this device is associated with a PE yet */
- pe = pnv_pci_bdfn_to_pe(phb, pdev->devfn | (pdev->bus->number << 8));
+ pe = pnv_pci_bdfn_to_pe(phb, pci_dev_id(pdev));
if (!pe) {
/* VF PEs should be pre-configured in pnv_pci_sriov_enable() */
if (WARN_ON(pdev->is_virtfn))
return;
pnv_pci_configure_bus(pdev->bus);
- pe = pnv_pci_bdfn_to_pe(phb, pdev->devfn | (pdev->bus->number << 8));
+ pe = pnv_pci_bdfn_to_pe(phb, pci_dev_id(pdev));
pci_info(pdev, "Configured PE#%x\n", pe ? pe->pe_number : 0xfffff);
@@ -2526,7 +2526,7 @@ static struct iommu_group *pnv_pci_device_group(struct pci_controller *hose,
if (WARN_ON(!phb))
return ERR_PTR(-ENODEV);
- pe = pnv_pci_bdfn_to_pe(phb, pdev->devfn | (pdev->bus->number << 8));
+ pe = pnv_pci_bdfn_to_pe(phb, pci_dev_id(pdev));
if (!pe)
return ERR_PTR(-ENODEV);
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index 5e9c6b5..4dbb47d 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -482,15 +482,7 @@ static void pnv_kexec_cpu_down(int crash_shutdown, int secondary)
#ifdef CONFIG_MEMORY_HOTPLUG
static unsigned long pnv_memory_block_size(void)
{
- /*
- * We map the kernel linear region with 1GB large pages on radix. For
- * memory hot unplug to work our memory block size must be at least
- * this size.
- */
- if (radix_enabled())
- return radix_mem_block_size;
- else
- return 256UL * 1024 * 1024;
+ return memory_block_size;
}
#endif
diff --git a/arch/powerpc/platforms/ps3/repository.c b/arch/powerpc/platforms/ps3/repository.c
index 2057630..1abe33f 100644
--- a/arch/powerpc/platforms/ps3/repository.c
+++ b/arch/powerpc/platforms/ps3/repository.c
@@ -73,9 +73,9 @@ static void _dump_node(unsigned int lpar_id, u64 n1, u64 n2, u64 n3, u64 n4,
static u64 make_first_field(const char *text, u64 index)
{
- u64 n;
+ u64 n = 0;
- strncpy((char *)&n, text, 8);
+ memcpy((char *)&n, text, strnlen(text, sizeof(n)));
return PS3_VENDOR_ID_NONE + (n >> 32) + index;
}
diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index 1a3cb31..e62835a 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -398,6 +398,14 @@ static int dlpar_online_cpu(struct device_node *dn)
for_each_present_cpu(cpu) {
if (get_hard_smp_processor_id(cpu) != thread)
continue;
+
+ if (!topology_is_primary_thread(cpu)) {
+ if (cpu_smt_control != CPU_SMT_ENABLED)
+ break;
+ if (!topology_smt_thread_allowed(cpu))
+ break;
+ }
+
cpu_maps_update_done();
find_and_update_cpu_nid(cpu);
rc = device_online(get_cpu_device(cpu));
@@ -845,15 +853,9 @@ static struct notifier_block pseries_smp_nb = {
.notifier_call = pseries_smp_notifier,
};
-static int __init pseries_cpu_hotplug_init(void)
+void __init pseries_cpu_hotplug_init(void)
{
int qcss_tok;
- unsigned int node;
-
-#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
- ppc_md.cpu_probe = dlpar_cpu_probe;
- ppc_md.cpu_release = dlpar_cpu_release;
-#endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */
rtas_stop_self_token = rtas_function_token(RTAS_FN_STOP_SELF);
qcss_tok = rtas_function_token(RTAS_FN_QUERY_CPU_STOPPED_STATE);
@@ -862,12 +864,22 @@ static int __init pseries_cpu_hotplug_init(void)
qcss_tok == RTAS_UNKNOWN_SERVICE) {
printk(KERN_INFO "CPU Hotplug not supported by firmware "
"- disabling.\n");
- return 0;
+ return;
}
smp_ops->cpu_offline_self = pseries_cpu_offline_self;
smp_ops->cpu_disable = pseries_cpu_disable;
smp_ops->cpu_die = pseries_cpu_die;
+}
+
+static int __init pseries_dlpar_init(void)
+{
+ unsigned int node;
+
+#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
+ ppc_md.cpu_probe = dlpar_cpu_probe;
+ ppc_md.cpu_release = dlpar_cpu_release;
+#endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */
/* Processors can be added/removed only on LPAR */
if (firmware_has_feature(FW_FEATURE_LPAR)) {
@@ -886,4 +898,4 @@ static int __init pseries_cpu_hotplug_init(void)
return 0;
}
-machine_arch_initcall(pseries, pseries_cpu_hotplug_init);
+machine_arch_initcall(pseries, pseries_dlpar_init);
diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
index 4f3d6a2..aa4042d 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -21,54 +21,6 @@
#include <asm/drmem.h>
#include "pseries.h"
-unsigned long pseries_memory_block_size(void)
-{
- struct device_node *np;
- u64 memblock_size = MIN_MEMORY_BLOCK_SIZE;
- struct resource r;
-
- np = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
- if (np) {
- int len;
- int size_cells;
- const __be32 *prop;
-
- size_cells = of_n_size_cells(np);
-
- prop = of_get_property(np, "ibm,lmb-size", &len);
- if (prop && len >= size_cells * sizeof(__be32))
- memblock_size = of_read_number(prop, size_cells);
- of_node_put(np);
-
- } else if (machine_is(pseries)) {
- /* This fallback really only applies to pseries */
- unsigned int memzero_size = 0;
-
- np = of_find_node_by_path("/memory@0");
- if (np) {
- if (!of_address_to_resource(np, 0, &r))
- memzero_size = resource_size(&r);
- of_node_put(np);
- }
-
- if (memzero_size) {
- /* We now know the size of memory@0, use this to find
- * the first memoryblock and get its size.
- */
- char buf[64];
-
- sprintf(buf, "/memory@%x", memzero_size);
- np = of_find_node_by_path(buf);
- if (np) {
- if (!of_address_to_resource(np, 0, &r))
- memblock_size = resource_size(&r);
- of_node_put(np);
- }
- }
- }
- return memblock_size;
-}
-
static void dlpar_free_property(struct property *prop)
{
kfree(prop->name);
@@ -283,7 +235,7 @@ static int dlpar_offline_lmb(struct drmem_lmb *lmb)
static int pseries_remove_memblock(unsigned long base, unsigned long memblock_size)
{
- unsigned long block_sz, start_pfn;
+ unsigned long start_pfn;
int sections_per_block;
int i;
@@ -294,8 +246,7 @@ static int pseries_remove_memblock(unsigned long base, unsigned long memblock_si
if (!pfn_valid(start_pfn))
goto out;
- block_sz = pseries_memory_block_size();
- sections_per_block = block_sz / MIN_MEMORY_BLOCK_SIZE;
+ sections_per_block = memory_block_size / MIN_MEMORY_BLOCK_SIZE;
for (i = 0; i < sections_per_block; i++) {
__remove_memory(base, MIN_MEMORY_BLOCK_SIZE);
@@ -354,7 +305,6 @@ static int dlpar_add_lmb(struct drmem_lmb *);
static int dlpar_remove_lmb(struct drmem_lmb *lmb)
{
struct memory_block *mem_block;
- unsigned long block_sz;
int rc;
if (!lmb_is_removable(lmb))
@@ -370,13 +320,11 @@ static int dlpar_remove_lmb(struct drmem_lmb *lmb)
return rc;
}
- block_sz = pseries_memory_block_size();
-
- __remove_memory(lmb->base_addr, block_sz);
+ __remove_memory(lmb->base_addr, memory_block_size);
put_device(&mem_block->dev);
/* Update memory regions for memory remove */
- memblock_remove(lmb->base_addr, block_sz);
+ memblock_remove(lmb->base_addr, memory_block_size);
invalidate_lmb_associativity_index(lmb);
lmb->flags &= ~DRCONF_MEM_ASSIGNED;
diff --git a/arch/powerpc/platforms/pseries/hvCall.S b/arch/powerpc/platforms/pseries/hvCall.S
index 35254ac..bae45b3 100644
--- a/arch/powerpc/platforms/pseries/hvCall.S
+++ b/arch/powerpc/platforms/pseries/hvCall.S
@@ -91,7 +91,7 @@
b 1f; \
END_FTR_SECTION(0, 1); \
LOAD_REG_ADDR(r12, hcall_tracepoint_refcount) ; \
- std r12,32(r1); \
+ ld r12,0(r12); \
cmpdi r12,0; \
bne- LABEL; \
1:
diff --git a/arch/powerpc/platforms/pseries/ibmebus.c b/arch/powerpc/platforms/pseries/ibmebus.c
index 44703f1..998e3af 100644
--- a/arch/powerpc/platforms/pseries/ibmebus.c
+++ b/arch/powerpc/platforms/pseries/ibmebus.c
@@ -47,6 +47,7 @@
#include <linux/slab.h>
#include <linux/stat.h>
#include <linux/of_platform.h>
+#include <linux/platform_device.h>
#include <asm/ibmebus.h>
#include <asm/machdep.h>
@@ -460,6 +461,7 @@ static int __init ibmebus_bus_init(void)
if (err) {
printk(KERN_WARNING "%s: device_register returned %i\n",
__func__, err);
+ put_device(&ibmebus_bus_device);
bus_unregister(&ibmebus_bus_type);
return err;
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index d593a72..16d93b5 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -395,8 +395,6 @@ static LIST_HEAD(dma_win_list);
static DEFINE_SPINLOCK(dma_win_list_lock);
/* protects initializing window twice for same device */
static DEFINE_MUTEX(dma_win_init_mutex);
-#define DIRECT64_PROPNAME "linux,direct64-ddr-window-info"
-#define DMA64_PROPNAME "linux,dma64-ddr-window-info"
static int tce_clearrange_multi_pSeriesLP(unsigned long start_pfn,
unsigned long num_pfn, const void *arg)
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 2eab323..f2cb621 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -41,6 +41,7 @@
#include <asm/kexec.h>
#include <asm/fadump.h>
#include <asm/dtl.h>
+#include <asm/vphn.h>
#include "pseries.h"
@@ -639,16 +640,8 @@ static const struct proc_ops vcpudispatch_stats_freq_proc_ops = {
static int __init vcpudispatch_stats_procfs_init(void)
{
- /*
- * Avoid smp_processor_id while preemptible. All CPUs should have
- * the same value for lppaca_shared_proc.
- */
- preempt_disable();
- if (!lppaca_shared_proc(get_lppaca())) {
- preempt_enable();
+ if (!lppaca_shared_proc())
return 0;
- }
- preempt_enable();
if (!proc_create("powerpc/vcpudispatch_stats", 0600, NULL,
&vcpudispatch_stats_proc_ops))
diff --git a/arch/powerpc/platforms/pseries/lparcfg.c b/arch/powerpc/platforms/pseries/lparcfg.c
index 8acc705..1c151d7 100644
--- a/arch/powerpc/platforms/pseries/lparcfg.c
+++ b/arch/powerpc/platforms/pseries/lparcfg.c
@@ -206,7 +206,7 @@ static void parse_ppp_data(struct seq_file *m)
ppp_data.active_system_procs);
/* pool related entries are appropriate for shared configs */
- if (lppaca_shared_proc(get_lppaca())) {
+ if (lppaca_shared_proc()) {
unsigned long pool_idle_time, pool_procs;
seq_printf(m, "pool=%d\n", ppp_data.pool_num);
@@ -560,7 +560,7 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v)
partition_potential_processors);
seq_printf(m, "shared_processor_mode=%d\n",
- lppaca_shared_proc(get_lppaca()));
+ lppaca_shared_proc());
#ifdef CONFIG_PPC_64S_HASH_MMU
if (!radix_enabled())
diff --git a/arch/powerpc/platforms/pseries/plpks.c b/arch/powerpc/platforms/pseries/plpks.c
index b0658ea..2d40304 100644
--- a/arch/powerpc/platforms/pseries/plpks.c
+++ b/arch/powerpc/platforms/pseries/plpks.c
@@ -194,7 +194,7 @@ static struct plpks_auth *construct_auth(u8 consumer)
return auth;
}
-/**
+/*
* Label is combination of label attributes + name.
* Label attributes are used internally by kernel and not exposed to the user.
*/
diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h
index f8bce40..8376f03 100644
--- a/arch/powerpc/platforms/pseries/pseries.h
+++ b/arch/powerpc/platforms/pseries/pseries.h
@@ -75,11 +75,13 @@ static inline int dlpar_hp_pmem(struct pseries_hp_errorlog *hp_elog)
#ifdef CONFIG_HOTPLUG_CPU
int dlpar_cpu(struct pseries_hp_errorlog *hp_elog);
+void pseries_cpu_hotplug_init(void);
#else
static inline int dlpar_cpu(struct pseries_hp_errorlog *hp_elog)
{
return -EOPNOTSUPP;
}
+static inline void pseries_cpu_hotplug_init(void) { }
#endif
/* PCI root bridge prepare function override for pseries */
@@ -90,8 +92,6 @@ extern struct pci_controller_ops pseries_pci_controller_ops;
int pseries_msi_allocate_domains(struct pci_controller *phb);
void pseries_msi_free_domains(struct pci_controller *phb);
-unsigned long pseries_memory_block_size(void);
-
extern int CMO_PrPSP;
extern int CMO_SecPSP;
extern unsigned long CMO_PageSize;
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index e2a57cf..ecea85c 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -816,6 +816,8 @@ static void __init pSeries_setup_arch(void)
/* Discover PIC type and setup ppc_md accordingly */
smp_init_pseries();
+ // Setup CPU hotplug callbacks
+ pseries_cpu_hotplug_init();
if (radix_enabled() && !mmu_has_feature(MMU_FTR_GTSE))
if (!firmware_has_feature(FW_FEATURE_RPT_INVALIDATE))
@@ -847,7 +849,7 @@ static void __init pSeries_setup_arch(void)
if (firmware_has_feature(FW_FEATURE_LPAR)) {
vpa_init(boot_cpuid);
- if (lppaca_shared_proc(get_lppaca())) {
+ if (lppaca_shared_proc()) {
static_branch_enable(&shared_processor);
pv_spinlocks_init();
#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
@@ -1116,6 +1118,13 @@ static int pSeries_pci_probe_mode(struct pci_bus *bus)
return PCI_PROBE_NORMAL;
}
+#ifdef CONFIG_MEMORY_HOTPLUG
+static unsigned long pseries_memory_block_size(void)
+{
+ return memory_block_size;
+}
+#endif
+
struct pci_controller_ops pseries_pci_controller_ops = {
.probe_mode = pSeries_pci_probe_mode,
#ifdef CONFIG_SPAPR_TCE_IOMMU
diff --git a/arch/powerpc/platforms/pseries/vas.c b/arch/powerpc/platforms/pseries/vas.c
index 3fbc2a6..e25ac52 100644
--- a/arch/powerpc/platforms/pseries/vas.c
+++ b/arch/powerpc/platforms/pseries/vas.c
@@ -17,6 +17,7 @@
#include <asm/hvcall.h>
#include <asm/plpar_wrappers.h>
#include <asm/firmware.h>
+#include <asm/vphn.h>
#include <asm/vas.h>
#include "vas.h"
diff --git a/arch/powerpc/platforms/pseries/vphn.c b/arch/powerpc/platforms/pseries/vphn.c
index cca474a..3f85ece 100644
--- a/arch/powerpc/platforms/pseries/vphn.c
+++ b/arch/powerpc/platforms/pseries/vphn.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include <asm/byteorder.h>
-#include <asm/lppaca.h>
+#include <asm/vphn.h>
/*
* The associativity domain numbers are returned from the hypervisor as a
diff --git a/arch/powerpc/sysdev/cpm2.c b/arch/powerpc/sysdev/cpm2.c
index 915f4d3..14cc5ea 100644
--- a/arch/powerpc/sysdev/cpm2.c
+++ b/arch/powerpc/sysdev/cpm2.c
@@ -37,11 +37,9 @@
#include <asm/io.h>
#include <asm/irq.h>
-#include <asm/mpc8260.h>
#include <asm/page.h>
#include <asm/cpm2.h>
#include <asm/rheap.h>
-#include <asm/fs_pd.h>
#include <sysdev/fsl_soc.h>
@@ -119,9 +117,9 @@ void __cpm2_setbrg(uint brg, uint rate, uint clk, int div16, int src)
/* This is good enough to get SMCs running.....
*/
if (brg < 4) {
- bp = cpm2_map_size(im_brgc1, 16);
+ bp = &cpm2_immr->im_brgc1;
} else {
- bp = cpm2_map_size(im_brgc5, 16);
+ bp = &cpm2_immr->im_brgc5;
brg -= 4;
}
bp += brg;
@@ -131,7 +129,6 @@ void __cpm2_setbrg(uint brg, uint rate, uint clk, int div16, int src)
val |= CPM_BRG_DIV16;
out_be32(bp, val);
- cpm2_unmap(bp);
}
EXPORT_SYMBOL(__cpm2_setbrg);
@@ -140,7 +137,6 @@ int __init cpm2_clk_setup(enum cpm_clk_target target, int clock, int mode)
int ret = 0;
int shift;
int i, bits = 0;
- cpmux_t __iomem *im_cpmux;
u32 __iomem *reg;
u32 mask = 7;
@@ -203,35 +199,33 @@ int __init cpm2_clk_setup(enum cpm_clk_target target, int clock, int mode)
{CPM_CLK_SCC4, CPM_CLK8, 7},
};
- im_cpmux = cpm2_map(im_cpmux);
-
switch (target) {
case CPM_CLK_SCC1:
- reg = &im_cpmux->cmx_scr;
+ reg = &cpm2_immr->im_cpmux.cmx_scr;
shift = 24;
break;
case CPM_CLK_SCC2:
- reg = &im_cpmux->cmx_scr;
+ reg = &cpm2_immr->im_cpmux.cmx_scr;
shift = 16;
break;
case CPM_CLK_SCC3:
- reg = &im_cpmux->cmx_scr;
+ reg = &cpm2_immr->im_cpmux.cmx_scr;
shift = 8;
break;
case CPM_CLK_SCC4:
- reg = &im_cpmux->cmx_scr;
+ reg = &cpm2_immr->im_cpmux.cmx_scr;
shift = 0;
break;
case CPM_CLK_FCC1:
- reg = &im_cpmux->cmx_fcr;
+ reg = &cpm2_immr->im_cpmux.cmx_fcr;
shift = 24;
break;
case CPM_CLK_FCC2:
- reg = &im_cpmux->cmx_fcr;
+ reg = &cpm2_immr->im_cpmux.cmx_fcr;
shift = 16;
break;
case CPM_CLK_FCC3:
- reg = &im_cpmux->cmx_fcr;
+ reg = &cpm2_immr->im_cpmux.cmx_fcr;
shift = 8;
break;
default:
@@ -261,7 +255,6 @@ int __init cpm2_clk_setup(enum cpm_clk_target target, int clock, int mode)
out_be32(reg, (in_be32(reg) & ~mask) | bits);
- cpm2_unmap(im_cpmux);
return ret;
}
@@ -270,7 +263,6 @@ int __init cpm2_smc_clk_setup(enum cpm_clk_target target, int clock)
int ret = 0;
int shift;
int i, bits = 0;
- cpmux_t __iomem *im_cpmux;
u8 __iomem *reg;
u8 mask = 3;
@@ -285,16 +277,14 @@ int __init cpm2_smc_clk_setup(enum cpm_clk_target target, int clock)
{CPM_CLK_SMC2, CPM_CLK15, 3},
};
- im_cpmux = cpm2_map(im_cpmux);
-
switch (target) {
case CPM_CLK_SMC1:
- reg = &im_cpmux->cmx_smr;
+ reg = &cpm2_immr->im_cpmux.cmx_smr;
mask = 3;
shift = 4;
break;
case CPM_CLK_SMC2:
- reg = &im_cpmux->cmx_smr;
+ reg = &cpm2_immr->im_cpmux.cmx_smr;
mask = 3;
shift = 0;
break;
@@ -317,7 +307,6 @@ int __init cpm2_smc_clk_setup(enum cpm_clk_target target, int clock)
out_8(reg, (in_8(reg) & ~mask) | bits);
- cpm2_unmap(im_cpmux);
return ret;
}
diff --git a/arch/powerpc/sysdev/cpm2_pic.c b/arch/powerpc/sysdev/cpm2_pic.c
index cb9ba4e..e144936 100644
--- a/arch/powerpc/sysdev/cpm2_pic.c
+++ b/arch/powerpc/sysdev/cpm2_pic.c
@@ -33,9 +33,7 @@
#include <linux/irqdomain.h>
#include <asm/immap_cpm2.h>
-#include <asm/mpc8260.h>
#include <asm/io.h>
-#include <asm/fs_pd.h>
#include "cpm2_pic.h"
@@ -231,7 +229,7 @@ void cpm2_pic_init(struct device_node *node)
{
int i;
- cpm2_intctl = cpm2_map(im_intctl);
+ cpm2_intctl = &cpm2_immr->im_intctl;
/* Clear the CPM IRQ controller, in case it has any bits set
* from the bootloader
diff --git a/arch/powerpc/sysdev/cpm_common.c b/arch/powerpc/sysdev/cpm_common.c
index 8234013..47db732 100644
--- a/arch/powerpc/sysdev/cpm_common.c
+++ b/arch/powerpc/sysdev/cpm_common.c
@@ -15,11 +15,9 @@
*/
#include <linux/init.h>
-#include <linux/of_device.h>
#include <linux/spinlock.h>
#include <linux/export.h>
#include <linux/of.h>
-#include <linux/of_address.h>
#include <linux/slab.h>
#include <asm/udbg.h>
diff --git a/arch/powerpc/sysdev/cpm_gpio.c b/arch/powerpc/sysdev/cpm_gpio.c
index 0695d26..40f5711 100644
--- a/arch/powerpc/sysdev/cpm_gpio.c
+++ b/arch/powerpc/sysdev/cpm_gpio.c
@@ -9,7 +9,8 @@
*/
#include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
#include <asm/cpm.h>
#ifdef CONFIG_8xx_GPIO
diff --git a/arch/powerpc/sysdev/dcr-low.S b/arch/powerpc/sysdev/dcr-low.S
index 329b9c4..e8401b2 100644
--- a/arch/powerpc/sysdev/dcr-low.S
+++ b/arch/powerpc/sysdev/dcr-low.S
@@ -5,10 +5,10 @@
* Copyright (c) 2004 Eugene Surovegin <ebs@ebshome.net>
*/
+#include <linux/export.h>
#include <asm/ppc_asm.h>
#include <asm/processor.h>
#include <asm/bug.h>
-#include <asm/export.h>
#define DCR_ACCESS_PROLOG(table) \
cmplwi cr0,r3,1024; \
diff --git a/arch/powerpc/sysdev/ehv_pic.c b/arch/powerpc/sysdev/ehv_pic.c
index c7327b8..0408276 100644
--- a/arch/powerpc/sysdev/ehv_pic.c
+++ b/arch/powerpc/sysdev/ehv_pic.c
@@ -42,33 +42,33 @@ static u32 __iomem *mpic_percpu_base_vaddr;
* Linux descriptor level callbacks
*/
-void ehv_pic_unmask_irq(struct irq_data *d)
+static void ehv_pic_unmask_irq(struct irq_data *d)
{
unsigned int src = virq_to_hw(d->irq);
ev_int_set_mask(src, 0);
}
-void ehv_pic_mask_irq(struct irq_data *d)
+static void ehv_pic_mask_irq(struct irq_data *d)
{
unsigned int src = virq_to_hw(d->irq);
ev_int_set_mask(src, 1);
}
-void ehv_pic_end_irq(struct irq_data *d)
+static void ehv_pic_end_irq(struct irq_data *d)
{
unsigned int src = virq_to_hw(d->irq);
ev_int_eoi(src);
}
-void ehv_pic_direct_end_irq(struct irq_data *d)
+static void ehv_pic_direct_end_irq(struct irq_data *d)
{
out_be32(mpic_percpu_base_vaddr + MPIC_EOI / 4, 0);
}
-int ehv_pic_set_affinity(struct irq_data *d, const struct cpumask *dest,
+static int ehv_pic_set_affinity(struct irq_data *d, const struct cpumask *dest,
bool force)
{
unsigned int src = virq_to_hw(d->irq);
@@ -109,7 +109,7 @@ static unsigned int ehv_pic_type_to_vecpri(unsigned int type)
}
}
-int ehv_pic_set_irq_type(struct irq_data *d, unsigned int flow_type)
+static int ehv_pic_set_irq_type(struct irq_data *d, unsigned int flow_type)
{
unsigned int src = virq_to_hw(d->irq);
unsigned int vecpri, vold, vnew, prio, cpu_dest;
diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c
index 6daf620..3868483 100644
--- a/arch/powerpc/sysdev/fsl_pci.c
+++ b/arch/powerpc/sysdev/fsl_pci.c
@@ -519,7 +519,7 @@ void fsl_pcibios_fixup_bus(struct pci_bus *bus)
}
}
-int fsl_add_bridge(struct platform_device *pdev, int is_primary)
+static int fsl_add_bridge(struct platform_device *pdev, int is_primary)
{
int len;
struct pci_controller *hose;
@@ -767,7 +767,7 @@ static int __init mpc83xx_pcie_setup(struct pci_controller *hose,
u32 cfg_bar;
int ret = -ENOMEM;
- pcie = zalloc_maybe_bootmem(sizeof(*pcie), GFP_KERNEL);
+ pcie = kzalloc(sizeof(*pcie), GFP_KERNEL);
if (!pcie)
return ret;
diff --git a/arch/powerpc/sysdev/fsl_pci.h b/arch/powerpc/sysdev/fsl_pci.h
index 093a875..3bc4ab9 100644
--- a/arch/powerpc/sysdev/fsl_pci.h
+++ b/arch/powerpc/sysdev/fsl_pci.h
@@ -112,7 +112,6 @@ struct ccsr_pci {
};
-extern int fsl_add_bridge(struct platform_device *pdev, int is_primary);
extern void fsl_pcibios_fixup_bus(struct pci_bus *bus);
extern void fsl_pcibios_fixup_phb(struct pci_controller *phb);
extern int mpc83xx_add_bridge(struct device_node *dev);
diff --git a/arch/powerpc/sysdev/fsl_pmc.c b/arch/powerpc/sysdev/fsl_pmc.c
index 76896de..9f6dd11 100644
--- a/arch/powerpc/sysdev/fsl_pmc.c
+++ b/arch/powerpc/sysdev/fsl_pmc.c
@@ -13,9 +13,9 @@
#include <linux/export.h>
#include <linux/suspend.h>
#include <linux/delay.h>
-#include <linux/device.h>
+#include <linux/mod_devicetable.h>
#include <linux/of_address.h>
-#include <linux/of_platform.h>
+#include <linux/platform_device.h>
struct pmc_regs {
__be32 devdisr;
diff --git a/arch/powerpc/sysdev/fsl_rio.c b/arch/powerpc/sysdev/fsl_rio.c
index 0331962..f9b214b 100644
--- a/arch/powerpc/sysdev/fsl_rio.c
+++ b/arch/powerpc/sysdev/fsl_rio.c
@@ -23,16 +23,17 @@
#include <linux/types.h>
#include <linux/dma-mapping.h>
#include <linux/interrupt.h>
-#include <linux/device.h>
+#include <linux/of.h>
#include <linux/of_address.h>
#include <linux/of_irq.h>
-#include <linux/of_platform.h>
+#include <linux/platform_device.h>
#include <linux/delay.h>
#include <linux/slab.h>
#include <linux/io.h>
#include <linux/uaccess.h>
#include <asm/machdep.h>
+#include <asm/rio.h>
#include "fsl_rio.h"
@@ -303,8 +304,8 @@ static void fsl_rio_inbound_mem_init(struct rio_priv *priv)
out_be32(&priv->inb_atmu_regs[i].riwar, 0);
}
-int fsl_map_inb_mem(struct rio_mport *mport, dma_addr_t lstart,
- u64 rstart, u64 size, u32 flags)
+static int fsl_map_inb_mem(struct rio_mport *mport, dma_addr_t lstart,
+ u64 rstart, u64 size, u32 flags)
{
struct rio_priv *priv = mport->priv;
u32 base_size;
@@ -354,7 +355,7 @@ int fsl_map_inb_mem(struct rio_mport *mport, dma_addr_t lstart,
return 0;
}
-void fsl_unmap_inb_mem(struct rio_mport *mport, dma_addr_t lstart)
+static void fsl_unmap_inb_mem(struct rio_mport *mport, dma_addr_t lstart)
{
u32 win_start_shift, base_start_shift;
struct rio_priv *priv = mport->priv;
@@ -442,7 +443,7 @@ static inline void fsl_rio_info(struct device *dev, u32 ccsr)
* master port with system-specific info, and registers the
* master port with the RapidIO subsystem.
*/
-int fsl_rio_setup(struct platform_device *dev)
+static int fsl_rio_setup(struct platform_device *dev)
{
struct rio_ops *ops;
struct rio_mport *port;
diff --git a/arch/powerpc/sysdev/fsl_rmu.c b/arch/powerpc/sysdev/fsl_rmu.c
index c1f72497..f956591 100644
--- a/arch/powerpc/sysdev/fsl_rmu.c
+++ b/arch/powerpc/sysdev/fsl_rmu.c
@@ -25,7 +25,6 @@
#include <linux/interrupt.h>
#include <linux/of_address.h>
#include <linux/of_irq.h>
-#include <linux/of_platform.h>
#include <linux/slab.h>
#include "fsl_rio.h"
@@ -360,7 +359,7 @@ fsl_rio_dbell_handler(int irq, void *dev_instance)
return IRQ_HANDLED;
}
-void msg_unit_error_handler(void)
+static void msg_unit_error_handler(void)
{
/*XXX: Error recovery is not implemented, we just clear errors */
diff --git a/arch/powerpc/sysdev/fsl_soc.c b/arch/powerpc/sysdev/fsl_soc.c
index c117715..528506f 100644
--- a/arch/powerpc/sysdev/fsl_soc.c
+++ b/arch/powerpc/sysdev/fsl_soc.c
@@ -19,7 +19,6 @@
#include <linux/device.h>
#include <linux/platform_device.h>
#include <linux/of.h>
-#include <linux/of_platform.h>
#include <linux/phy.h>
#include <linux/spi/spi.h>
#include <linux/fsl_devices.h>
diff --git a/arch/powerpc/sysdev/mpc5xxx_clocks.c b/arch/powerpc/sysdev/mpc5xxx_clocks.c
index c5bf7e1..58cee28 100644
--- a/arch/powerpc/sysdev/mpc5xxx_clocks.c
+++ b/arch/powerpc/sysdev/mpc5xxx_clocks.c
@@ -25,8 +25,10 @@ unsigned long mpc5xxx_fwnode_get_bus_frequency(struct fwnode_handle *fwnode)
fwnode_for_each_parent_node(fwnode, parent) {
ret = fwnode_property_read_u32(parent, "bus-frequency", &bus_freq);
- if (!ret)
+ if (!ret) {
+ fwnode_handle_put(parent);
return bus_freq;
+ }
}
return 0;
diff --git a/arch/powerpc/sysdev/mpic_msgr.c b/arch/powerpc/sysdev/mpic_msgr.c
index 1a3ac0b..7b449cc 100644
--- a/arch/powerpc/sysdev/mpic_msgr.c
+++ b/arch/powerpc/sysdev/mpic_msgr.c
@@ -7,9 +7,10 @@
*/
#include <linux/list.h>
+#include <linux/of.h>
#include <linux/of_address.h>
#include <linux/of_irq.h>
-#include <linux/of_platform.h>
+#include <linux/platform_device.h>
#include <linux/errno.h>
#include <linux/err.h>
#include <linux/export.h>
diff --git a/arch/powerpc/sysdev/mpic_timer.c b/arch/powerpc/sysdev/mpic_timer.c
index b2f0a73..7166e2e 100644
--- a/arch/powerpc/sysdev/mpic_timer.c
+++ b/arch/powerpc/sysdev/mpic_timer.c
@@ -16,7 +16,6 @@
#include <linux/slab.h>
#include <linux/of.h>
#include <linux/of_address.h>
-#include <linux/of_device.h>
#include <linux/of_irq.h>
#include <linux/syscore_ops.h>
#include <sysdev/fsl_soc.h>
diff --git a/arch/powerpc/sysdev/of_rtc.c b/arch/powerpc/sysdev/of_rtc.c
index 420f949..2211937 100644
--- a/arch/powerpc/sysdev/of_rtc.c
+++ b/arch/powerpc/sysdev/of_rtc.c
@@ -5,10 +5,10 @@
* Copyright 2007 David Gibson <dwg@au1.ibm.com>, IBM Corporation.
*/
#include <linux/kernel.h>
-#include <linux/of.h>
#include <linux/init.h>
+#include <linux/of.h>
#include <linux/of_address.h>
-#include <linux/of_platform.h>
+#include <linux/platform_device.h>
#include <linux/slab.h>
#include <asm/prom.h>
diff --git a/arch/powerpc/sysdev/pmi.c b/arch/powerpc/sysdev/pmi.c
index 9dabb50..fcf8d15 100644
--- a/arch/powerpc/sysdev/pmi.c
+++ b/arch/powerpc/sysdev/pmi.c
@@ -16,11 +16,11 @@
#include <linux/completion.h>
#include <linux/spinlock.h>
#include <linux/module.h>
+#include <linux/mod_devicetable.h>
#include <linux/workqueue.h>
#include <linux/of_address.h>
-#include <linux/of_device.h>
#include <linux/of_irq.h>
-#include <linux/of_platform.h>
+#include <linux/platform_device.h>
#include <asm/io.h>
#include <asm/pmi.h>
diff --git a/arch/powerpc/sysdev/xics/ics-opal.c b/arch/powerpc/sysdev/xics/ics-opal.c
index 6cfbb4f..5fe73da 100644
--- a/arch/powerpc/sysdev/xics/ics-opal.c
+++ b/arch/powerpc/sysdev/xics/ics-opal.c
@@ -111,7 +111,6 @@ static int ics_opal_set_affinity(struct irq_data *d,
__func__, d->irq, hw_irq, rc);
return -1;
}
- server = be16_to_cpu(oserver);
wanted_server = xics_get_irq_server(d->irq, cpumask, 1);
if (wanted_server < 0) {
diff --git a/arch/powerpc/tools/gcc-check-fpatchable-function-entry.sh b/arch/powerpc/tools/gcc-check-fpatchable-function-entry.sh
new file mode 100755
index 0000000..0670690
--- /dev/null
+++ b/arch/powerpc/tools/gcc-check-fpatchable-function-entry.sh
@@ -0,0 +1,26 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+set -e
+set -o pipefail
+
+# To debug, uncomment the following line
+# set -x
+
+# Output from -fpatchable-function-entry can only vary on ppc64 elfv2, so this
+# should not be invoked for other targets. Therefore we can pass in -m64 and
+# -mabi explicitly, to take care of toolchains defaulting to other targets.
+
+# Test whether the compile option -fpatchable-function-entry exists and
+# generates appropriate code
+echo "int func() { return 0; }" | \
+ $* -m64 -mabi=elfv2 -S -x c -O2 -fpatchable-function-entry=2 - -o - 2> /dev/null | \
+ grep -q "__patchable_function_entries"
+
+# Test whether nops are generated after the local entry point
+echo "int x; int func() { return x; }" | \
+ $* -m64 -mabi=elfv2 -S -x c -O2 -fpatchable-function-entry=2 - -o - 2> /dev/null | \
+ awk 'BEGIN { RS = ";" } /\.localentry.*nop.*\n[[:space:]]*nop/ { print $0 }' | \
+ grep -q "func:"
+
+exit 0
diff --git a/arch/powerpc/xmon/Makefile b/arch/powerpc/xmon/Makefile
index d334de3..682c7c0 100644
--- a/arch/powerpc/xmon/Makefile
+++ b/arch/powerpc/xmon/Makefile
@@ -10,14 +10,12 @@
# Disable ftrace for the entire directory
ccflags-remove-$(CONFIG_FUNCTION_TRACER) += $(CC_FLAGS_FTRACE)
-ifdef CONFIG_CC_IS_CLANG
-# clang stores addresses on the stack causing the frame size to blow
-# out. See https://github.com/ClangBuiltLinux/linux/issues/252
-KBUILD_CFLAGS += -Wframe-larger-than=4096
-endif
-
ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC)
+# Clang stores addresses on the stack causing the frame size to blow
+# out. See https://github.com/ClangBuiltLinux/linux/issues/252
+ccflags-$(CONFIG_CC_IS_CLANG) += -Wframe-larger-than=4096
+
obj-y += xmon.o nonstdio.o spr_access.o xmon_bpts.o
ifdef CONFIG_XMON_DISASSEMBLY
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index ee17270..5888fcd 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -58,6 +58,7 @@
#ifdef CONFIG_PPC64
#include <asm/hvcall.h>
#include <asm/paca.h>
+#include <asm/lppaca.h>
#endif
#include "nonstdio.h"
@@ -3303,7 +3304,7 @@ static void show_pte(unsigned long addr)
{
unsigned long tskv = 0;
struct task_struct *volatile tsk = NULL;
- struct mm_struct *mm;
+ struct mm_struct *volatile mm;
pgd_t *pgdp;
p4d_t *p4dp;
pud_t *pudp;
@@ -3828,9 +3829,9 @@ static void dump_tlb_44x(void)
#ifdef CONFIG_PPC_BOOK3E_64
static void dump_tlb_book3e(void)
{
- u32 mmucfg, pidmask, lpidmask;
+ u32 mmucfg;
u64 ramask;
- int i, tlb, ntlbs, pidsz, lpidsz, rasz, lrat = 0;
+ int i, tlb, ntlbs, pidsz, lpidsz, rasz;
int mmu_version;
static const char *pgsz_names[] = {
" 1K",
@@ -3874,12 +3875,8 @@ static void dump_tlb_book3e(void)
pidsz = ((mmucfg >> 6) & 0x1f) + 1;
lpidsz = (mmucfg >> 24) & 0xf;
rasz = (mmucfg >> 16) & 0x7f;
- if ((mmu_version > 1) && (mmucfg & 0x10000))
- lrat = 1;
printf("Book3E MMU MAV=%d.0,%d TLBs,%d-bit PID,%d-bit LPID,%d-bit RA\n",
mmu_version, ntlbs, pidsz, lpidsz, rasz);
- pidmask = (1ul << pidsz) - 1;
- lpidmask = (1ul << lpidsz) - 1;
ramask = (1ull << rasz) - 1;
for (tlb = 0; tlb < ntlbs; tlb++) {
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index a6f47c0..2f1c0cd 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -380,7 +380,7 @@ static inline pte_t pte_wrprotect(pte_t pte)
/* static inline pte_t pte_mkread(pte_t pte) */
-static inline pte_t pte_mkwrite(pte_t pte)
+static inline pte_t pte_mkwrite_novma(pte_t pte)
{
return __pte(pte_val(pte) | _PAGE_WRITE);
}
@@ -677,9 +677,9 @@ static inline pmd_t pmd_mkyoung(pmd_t pmd)
return pte_pmd(pte_mkyoung(pmd_pte(pmd)));
}
-static inline pmd_t pmd_mkwrite(pmd_t pmd)
+static inline pmd_t pmd_mkwrite_novma(pmd_t pmd)
{
- return pte_pmd(pte_mkwrite(pmd_pte(pmd)));
+ return pte_pmd(pte_mkwrite_novma(pmd_pte(pmd)));
}
static inline pmd_t pmd_wrprotect(pmd_t pmd)
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 661b6de..ae29e43 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -127,6 +127,7 @@
select ARCH_WANTS_NO_INSTR
select ARCH_WANT_DEFAULT_BPF_JIT
select ARCH_WANT_IPC_PARSE_VERSION
+ select ARCH_WANT_KERNEL_PMD_MKWRITE
select ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP
select BUILDTIME_TABLE_SORT
select CLONE_BACKWARDS2
diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h
index ccdbccf..f072678 100644
--- a/arch/s390/include/asm/hugetlb.h
+++ b/arch/s390/include/asm/hugetlb.h
@@ -104,7 +104,7 @@ static inline int huge_pte_dirty(pte_t pte)
static inline pte_t huge_pte_mkwrite(pte_t pte)
{
- return pte_mkwrite(pte);
+ return pte_mkwrite_novma(pte);
}
static inline pte_t huge_pte_mkdirty(pte_t pte)
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index d28d2e5..fb3ee77 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -1001,7 +1001,7 @@ static inline pte_t pte_wrprotect(pte_t pte)
return set_pte_bit(pte, __pgprot(_PAGE_PROTECT));
}
-static inline pte_t pte_mkwrite(pte_t pte)
+static inline pte_t pte_mkwrite_novma(pte_t pte)
{
pte = set_pte_bit(pte, __pgprot(_PAGE_WRITE));
if (pte_val(pte) & _PAGE_DIRTY)
@@ -1498,7 +1498,7 @@ static inline pmd_t pmd_wrprotect(pmd_t pmd)
return set_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_PROTECT));
}
-static inline pmd_t pmd_mkwrite(pmd_t pmd)
+static inline pmd_t pmd_mkwrite_novma(pmd_t pmd)
{
pmd = set_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_WRITE));
if (pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY)
diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c
index ca5a418..e5ec762 100644
--- a/arch/s390/mm/pageattr.c
+++ b/arch/s390/mm/pageattr.c
@@ -98,7 +98,7 @@ static int walk_pte_level(pmd_t *pmdp, unsigned long addr, unsigned long end,
if (flags & SET_MEMORY_RO)
new = pte_wrprotect(new);
else if (flags & SET_MEMORY_RW)
- new = pte_mkwrite(pte_mkdirty(new));
+ new = pte_mkwrite_novma(pte_mkdirty(new));
if (flags & SET_MEMORY_NX)
new = set_pte_bit(new, __pgprot(_PAGE_NOEXEC));
else if (flags & SET_MEMORY_X)
@@ -156,7 +156,7 @@ static void modify_pmd_page(pmd_t *pmdp, unsigned long addr,
if (flags & SET_MEMORY_RO)
new = pmd_wrprotect(new);
else if (flags & SET_MEMORY_RW)
- new = pmd_mkwrite(pmd_mkdirty(new));
+ new = pmd_mkwrite_novma(pmd_mkdirty(new));
if (flags & SET_MEMORY_NX)
new = set_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_NOEXEC));
else if (flags & SET_MEMORY_X)
diff --git a/arch/sh/include/asm/pgtable_32.h b/arch/sh/include/asm/pgtable_32.h
index 676f3d4..f939f12 100644
--- a/arch/sh/include/asm/pgtable_32.h
+++ b/arch/sh/include/asm/pgtable_32.h
@@ -358,11 +358,11 @@ static inline pte_t pte_##fn(pte_t pte) { pte.pte_##h op; return pte; }
* kernel permissions), we attempt to couple them a bit more sanely here.
*/
PTE_BIT_FUNC(high, wrprotect, &= ~(_PAGE_EXT_USER_WRITE | _PAGE_EXT_KERN_WRITE));
-PTE_BIT_FUNC(high, mkwrite, |= _PAGE_EXT_USER_WRITE | _PAGE_EXT_KERN_WRITE);
+PTE_BIT_FUNC(high, mkwrite_novma, |= _PAGE_EXT_USER_WRITE | _PAGE_EXT_KERN_WRITE);
PTE_BIT_FUNC(high, mkhuge, |= _PAGE_SZHUGE);
#else
PTE_BIT_FUNC(low, wrprotect, &= ~_PAGE_RW);
-PTE_BIT_FUNC(low, mkwrite, |= _PAGE_RW);
+PTE_BIT_FUNC(low, mkwrite_novma, |= _PAGE_RW);
PTE_BIT_FUNC(low, mkhuge, |= _PAGE_SZHUGE);
#endif
diff --git a/arch/sparc/include/asm/pgtable_32.h b/arch/sparc/include/asm/pgtable_32.h
index 315d316..9e85d57 100644
--- a/arch/sparc/include/asm/pgtable_32.h
+++ b/arch/sparc/include/asm/pgtable_32.h
@@ -239,7 +239,7 @@ static inline pte_t pte_mkold(pte_t pte)
return __pte(pte_val(pte) & ~SRMMU_REF);
}
-static inline pte_t pte_mkwrite(pte_t pte)
+static inline pte_t pte_mkwrite_novma(pte_t pte)
{
return __pte(pte_val(pte) | SRMMU_WRITE);
}
diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h
index 09aa37c..5e41033 100644
--- a/arch/sparc/include/asm/pgtable_64.h
+++ b/arch/sparc/include/asm/pgtable_64.h
@@ -518,7 +518,7 @@ static inline pte_t pte_mkclean(pte_t pte)
return __pte(val);
}
-static inline pte_t pte_mkwrite(pte_t pte)
+static inline pte_t pte_mkwrite_novma(pte_t pte)
{
unsigned long val = pte_val(pte), mask;
@@ -773,11 +773,11 @@ static inline pmd_t pmd_mkyoung(pmd_t pmd)
return __pmd(pte_val(pte));
}
-static inline pmd_t pmd_mkwrite(pmd_t pmd)
+static inline pmd_t pmd_mkwrite_novma(pmd_t pmd)
{
pte_t pte = __pte(pmd_val(pmd));
- pte = pte_mkwrite(pte);
+ pte = pte_mkwrite_novma(pte);
return __pmd(pte_val(pte));
}
diff --git a/arch/sparc/kernel/signal32.c b/arch/sparc/kernel/signal32.c
index ca450c7..a23cdd7 100644
--- a/arch/sparc/kernel/signal32.c
+++ b/arch/sparc/kernel/signal32.c
@@ -753,7 +753,7 @@ asmlinkage int do_sys32_sigstack(u32 u_ssptr, u32 u_ossptr, unsigned long sp)
*/
static_assert(NSIGILL == 11);
static_assert(NSIGFPE == 15);
-static_assert(NSIGSEGV == 9);
+static_assert(NSIGSEGV == 10);
static_assert(NSIGBUS == 5);
static_assert(NSIGTRAP == 6);
static_assert(NSIGCHLD == 6);
diff --git a/arch/sparc/kernel/signal_64.c b/arch/sparc/kernel/signal_64.c
index 570e43e..b4e4109 100644
--- a/arch/sparc/kernel/signal_64.c
+++ b/arch/sparc/kernel/signal_64.c
@@ -562,7 +562,7 @@ void do_notify_resume(struct pt_regs *regs, unsigned long orig_i0, unsigned long
*/
static_assert(NSIGILL == 11);
static_assert(NSIGFPE == 15);
-static_assert(NSIGSEGV == 9);
+static_assert(NSIGSEGV == 10);
static_assert(NSIGBUS == 5);
static_assert(NSIGTRAP == 6);
static_assert(NSIGCHLD == 6);
diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h
index 44f6c76..e1ece21 100644
--- a/arch/um/include/asm/pgtable.h
+++ b/arch/um/include/asm/pgtable.h
@@ -207,7 +207,7 @@ static inline pte_t pte_mkyoung(pte_t pte)
return(pte);
}
-static inline pte_t pte_mkwrite(pte_t pte)
+static inline pte_t pte_mkwrite_novma(pte_t pte)
{
if (unlikely(pte_get_bits(pte, _PAGE_RW)))
return pte;
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index bd9a180..982b777 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1815,6 +1815,11 @@
(CC_IS_CLANG && CLANG_VERSION >= 140000)) && \
$(as-instr,endbr64)
+config X86_CET
+ def_bool n
+ help
+ CET features configured (Shadow stack or IBT)
+
config X86_KERNEL_IBT
prompt "Indirect Branch Tracking"
def_bool y
@@ -1822,6 +1827,7 @@
# https://github.com/llvm/llvm-project/commit/9d7001eba9c4cb311e03cd8cdc231f9e579f2d0f
depends on !LD_IS_LLD || LLD_VERSION >= 140000
select OBJTOOL
+ select X86_CET
help
Build the kernel with support for Indirect Branch Tracking, a
hardware support course-grain forward-edge Control Flow Integrity
@@ -1915,6 +1921,24 @@
If unsure, say N.
+config X86_USER_SHADOW_STACK
+ bool "X86 userspace shadow stack"
+ depends on AS_WRUSS
+ depends on X86_64
+ select ARCH_USES_HIGH_VMA_FLAGS
+ select X86_CET
+ help
+ Shadow stack protection is a hardware feature that detects function
+ return address corruption. This helps mitigate ROP attacks.
+ Applications must be enabled to use it, and old userspace does not
+ get protection "for free".
+
+ CPUs supporting shadow stacks were first released in 2020.
+
+ See Documentation/arch/x86/shstk.rst for more information.
+
+ If unsure, say N.
+
config EFI
bool "EFI runtime service support"
depends on ACPI
diff --git a/arch/x86/Kconfig.assembler b/arch/x86/Kconfig.assembler
index b88f784..8ad41da 100644
--- a/arch/x86/Kconfig.assembler
+++ b/arch/x86/Kconfig.assembler
@@ -24,3 +24,8 @@
def_bool $(as-instr,vgf2p8mulb %xmm0$(comma)%xmm1$(comma)%xmm2)
help
Supported by binutils >= 2.30 and LLVM integrated assembler
+
+config AS_WRUSS
+ def_bool $(as-instr,wrussq %rax$(comma)(%rbx))
+ help
+ Supported by binutils >= 2.31 and LLVM integrated assembler
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index 8147682..1d6eee3 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -374,6 +374,7 @@
450 common set_mempolicy_home_node sys_set_mempolicy_home_node
451 common cachestat sys_cachestat
452 common fchmodat2 sys_fchmodat2
+453 64 map_shadow_stack sys_map_shadow_stack
#
# Due to a historical design error, certain syscalls are numbered differently
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 7b4ecbf..2061ed1 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -307,6 +307,7 @@
#define X86_FEATURE_MSR_TSX_CTRL (11*32+20) /* "" MSR IA32_TSX_CTRL (Intel) implemented */
#define X86_FEATURE_SMBA (11*32+21) /* "" Slow Memory Bandwidth Allocation */
#define X86_FEATURE_BMEC (11*32+22) /* "" Bandwidth Monitoring Event Configuration */
+#define X86_FEATURE_USER_SHSTK (11*32+23) /* Shadow stack support for user mode applications */
#define X86_FEATURE_SRSO (11*32+24) /* "" AMD BTB untrain RETs */
#define X86_FEATURE_SRSO_ALIAS (11*32+25) /* "" AMD BTB untrain RETs through aliasing */
@@ -383,6 +384,7 @@
#define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */
#define X86_FEATURE_WAITPKG (16*32+ 5) /* UMONITOR/UMWAIT/TPAUSE Instructions */
#define X86_FEATURE_AVX512_VBMI2 (16*32+ 6) /* Additional AVX512 Vector Bit Manipulation Instructions */
+#define X86_FEATURE_SHSTK (16*32+ 7) /* "" Shadow stack */
#define X86_FEATURE_GFNI (16*32+ 8) /* Galois Field New Instructions */
#define X86_FEATURE_VAES (16*32+ 9) /* Vector AES */
#define X86_FEATURE_VPCLMULQDQ (16*32+10) /* Carry-Less Multiplication Double Quadword */
diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h
index fafe9be..702d93f 100644
--- a/arch/x86/include/asm/disabled-features.h
+++ b/arch/x86/include/asm/disabled-features.h
@@ -105,6 +105,18 @@
# define DISABLE_TDX_GUEST (1 << (X86_FEATURE_TDX_GUEST & 31))
#endif
+#ifdef CONFIG_X86_USER_SHADOW_STACK
+#define DISABLE_USER_SHSTK 0
+#else
+#define DISABLE_USER_SHSTK (1 << (X86_FEATURE_USER_SHSTK & 31))
+#endif
+
+#ifdef CONFIG_X86_KERNEL_IBT
+#define DISABLE_IBT 0
+#else
+#define DISABLE_IBT (1 << (X86_FEATURE_IBT & 31))
+#endif
+
/*
* Make sure to add features to the correct mask
*/
@@ -120,7 +132,7 @@
#define DISABLED_MASK9 (DISABLE_SGX)
#define DISABLED_MASK10 0
#define DISABLED_MASK11 (DISABLE_RETPOLINE|DISABLE_RETHUNK|DISABLE_UNRET| \
- DISABLE_CALL_DEPTH_TRACKING)
+ DISABLE_CALL_DEPTH_TRACKING|DISABLE_USER_SHSTK)
#define DISABLED_MASK12 (DISABLE_LAM)
#define DISABLED_MASK13 0
#define DISABLED_MASK14 0
@@ -128,7 +140,7 @@
#define DISABLED_MASK16 (DISABLE_PKU|DISABLE_OSPKE|DISABLE_LA57|DISABLE_UMIP| \
DISABLE_ENQCMD)
#define DISABLED_MASK17 0
-#define DISABLED_MASK18 0
+#define DISABLED_MASK18 (DISABLE_IBT)
#define DISABLED_MASK19 0
#define DISABLED_MASK20 0
#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 21)
diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h
index b475d9a..31089b85 100644
--- a/arch/x86/include/asm/fpu/api.h
+++ b/arch/x86/include/asm/fpu/api.h
@@ -82,6 +82,15 @@ static inline void fpregs_unlock(void)
preempt_enable();
}
+/*
+ * FPU state gets lazily restored before returning to userspace. So when in the
+ * kernel, the valid FPU state may be kept in the buffer. This function will force
+ * restore all the fpu state to the registers early if needed, and lock them from
+ * being automatically saved/restored. Then FPU state can be modified safely in the
+ * registers, before unlocking with fpregs_unlock().
+ */
+void fpregs_lock_and_load(void);
+
#ifdef CONFIG_X86_DEBUG_FPU
extern void fpregs_assert_state_consistent(void);
#else
diff --git a/arch/x86/include/asm/fpu/regset.h b/arch/x86/include/asm/fpu/regset.h
index 4f928d6..697b77e 100644
--- a/arch/x86/include/asm/fpu/regset.h
+++ b/arch/x86/include/asm/fpu/regset.h
@@ -7,11 +7,12 @@
#include <linux/regset.h>
-extern user_regset_active_fn regset_fpregs_active, regset_xregset_fpregs_active;
+extern user_regset_active_fn regset_fpregs_active, regset_xregset_fpregs_active,
+ ssp_active;
extern user_regset_get2_fn fpregs_get, xfpregs_get, fpregs_soft_get,
- xstateregs_get;
+ xstateregs_get, ssp_get;
extern user_regset_set_fn fpregs_set, xfpregs_set, fpregs_soft_set,
- xstateregs_set;
+ xstateregs_set, ssp_set;
/*
* xstateregs_active == regset_fpregs_active. Please refer to the comment
diff --git a/arch/x86/include/asm/fpu/sched.h b/arch/x86/include/asm/fpu/sched.h
index 78fcde7..ca6e5e5 100644
--- a/arch/x86/include/asm/fpu/sched.h
+++ b/arch/x86/include/asm/fpu/sched.h
@@ -11,7 +11,8 @@
extern void save_fpregs_to_fpstate(struct fpu *fpu);
extern void fpu__drop(struct fpu *fpu);
-extern int fpu_clone(struct task_struct *dst, unsigned long clone_flags, bool minimal);
+extern int fpu_clone(struct task_struct *dst, unsigned long clone_flags, bool minimal,
+ unsigned long shstk_addr);
extern void fpu_flush_thread(void);
/*
diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h
index 7f6d858..eb81007 100644
--- a/arch/x86/include/asm/fpu/types.h
+++ b/arch/x86/include/asm/fpu/types.h
@@ -115,8 +115,8 @@ enum xfeature {
XFEATURE_PT_UNIMPLEMENTED_SO_FAR,
XFEATURE_PKRU,
XFEATURE_PASID,
- XFEATURE_RSRVD_COMP_11,
- XFEATURE_RSRVD_COMP_12,
+ XFEATURE_CET_USER,
+ XFEATURE_CET_KERNEL_UNUSED,
XFEATURE_RSRVD_COMP_13,
XFEATURE_RSRVD_COMP_14,
XFEATURE_LBR,
@@ -138,6 +138,8 @@ enum xfeature {
#define XFEATURE_MASK_PT (1 << XFEATURE_PT_UNIMPLEMENTED_SO_FAR)
#define XFEATURE_MASK_PKRU (1 << XFEATURE_PKRU)
#define XFEATURE_MASK_PASID (1 << XFEATURE_PASID)
+#define XFEATURE_MASK_CET_USER (1 << XFEATURE_CET_USER)
+#define XFEATURE_MASK_CET_KERNEL (1 << XFEATURE_CET_KERNEL_UNUSED)
#define XFEATURE_MASK_LBR (1 << XFEATURE_LBR)
#define XFEATURE_MASK_XTILE_CFG (1 << XFEATURE_XTILE_CFG)
#define XFEATURE_MASK_XTILE_DATA (1 << XFEATURE_XTILE_DATA)
@@ -253,6 +255,16 @@ struct pkru_state {
} __packed;
/*
+ * State component 11 is Control-flow Enforcement user states
+ */
+struct cet_user_state {
+ /* user control-flow settings */
+ u64 user_cet;
+ /* user shadow stack pointer */
+ u64 user_ssp;
+};
+
+/*
* State component 15: Architectural LBR configuration state.
* The size of Arch LBR state depends on the number of LBRs (lbr_depth).
*/
diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
index cd3dd17..d4427b8 100644
--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -50,7 +50,8 @@
#define XFEATURE_MASK_USER_DYNAMIC XFEATURE_MASK_XTILE_DATA
/* All currently supported supervisor features */
-#define XFEATURE_MASK_SUPERVISOR_SUPPORTED (XFEATURE_MASK_PASID)
+#define XFEATURE_MASK_SUPERVISOR_SUPPORTED (XFEATURE_MASK_PASID | \
+ XFEATURE_MASK_CET_USER)
/*
* A supervisor state component may not always contain valuable information,
@@ -77,7 +78,8 @@
* Unsupported supervisor features. When a supervisor feature in this mask is
* supported in the future, move it to the supported supervisor feature mask.
*/
-#define XFEATURE_MASK_SUPERVISOR_UNSUPPORTED (XFEATURE_MASK_PT)
+#define XFEATURE_MASK_SUPERVISOR_UNSUPPORTED (XFEATURE_MASK_PT | \
+ XFEATURE_MASK_CET_KERNEL)
/* All supervisor states including supported and unsupported states. */
#define XFEATURE_MASK_SUPERVISOR_ALL (XFEATURE_MASK_SUPERVISOR_SUPPORTED | \
diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h
index cd5c10a..05fd175 100644
--- a/arch/x86/include/asm/idtentry.h
+++ b/arch/x86/include/asm/idtentry.h
@@ -614,7 +614,7 @@ DECLARE_IDTENTRY_RAW_ERRORCODE(X86_TRAP_DF, xenpv_exc_double_fault);
#endif
/* #CP */
-#ifdef CONFIG_X86_KERNEL_IBT
+#ifdef CONFIG_X86_CET
DECLARE_IDTENTRY_ERRORCODE(X86_TRAP_CP, exc_control_protection);
#endif
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 1d29dc7..416901d 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -186,6 +186,8 @@ do { \
#else
#define deactivate_mm(tsk, mm) \
do { \
+ if (!tsk->vfork_done) \
+ shstk_free(tsk); \
load_gs_index(0); \
loadsegment(fs, 0); \
} while (0)
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index dbf8af7..d6ad98c 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -125,9 +125,15 @@ extern pmdval_t early_pmd_flags;
* The following only work if pte_present() is true.
* Undefined behaviour if not..
*/
-static inline int pte_dirty(pte_t pte)
+static inline bool pte_dirty(pte_t pte)
{
- return pte_flags(pte) & _PAGE_DIRTY;
+ return pte_flags(pte) & _PAGE_DIRTY_BITS;
+}
+
+static inline bool pte_shstk(pte_t pte)
+{
+ return cpu_feature_enabled(X86_FEATURE_SHSTK) &&
+ (pte_flags(pte) & (_PAGE_RW | _PAGE_DIRTY)) == _PAGE_DIRTY;
}
static inline int pte_young(pte_t pte)
@@ -135,9 +141,16 @@ static inline int pte_young(pte_t pte)
return pte_flags(pte) & _PAGE_ACCESSED;
}
-static inline int pmd_dirty(pmd_t pmd)
+static inline bool pmd_dirty(pmd_t pmd)
{
- return pmd_flags(pmd) & _PAGE_DIRTY;
+ return pmd_flags(pmd) & _PAGE_DIRTY_BITS;
+}
+
+static inline bool pmd_shstk(pmd_t pmd)
+{
+ return cpu_feature_enabled(X86_FEATURE_SHSTK) &&
+ (pmd_flags(pmd) & (_PAGE_RW | _PAGE_DIRTY | _PAGE_PSE)) ==
+ (_PAGE_DIRTY | _PAGE_PSE);
}
#define pmd_young pmd_young
@@ -146,9 +159,9 @@ static inline int pmd_young(pmd_t pmd)
return pmd_flags(pmd) & _PAGE_ACCESSED;
}
-static inline int pud_dirty(pud_t pud)
+static inline bool pud_dirty(pud_t pud)
{
- return pud_flags(pud) & _PAGE_DIRTY;
+ return pud_flags(pud) & _PAGE_DIRTY_BITS;
}
static inline int pud_young(pud_t pud)
@@ -158,7 +171,27 @@ static inline int pud_young(pud_t pud)
static inline int pte_write(pte_t pte)
{
- return pte_flags(pte) & _PAGE_RW;
+ /*
+ * Shadow stack pages are logically writable, but do not have
+ * _PAGE_RW. Check for them separately from _PAGE_RW itself.
+ */
+ return (pte_flags(pte) & _PAGE_RW) || pte_shstk(pte);
+}
+
+#define pmd_write pmd_write
+static inline int pmd_write(pmd_t pmd)
+{
+ /*
+ * Shadow stack pages are logically writable, but do not have
+ * _PAGE_RW. Check for them separately from _PAGE_RW itself.
+ */
+ return (pmd_flags(pmd) & _PAGE_RW) || pmd_shstk(pmd);
+}
+
+#define pud_write pud_write
+static inline int pud_write(pud_t pud)
+{
+ return pud_flags(pud) & _PAGE_RW;
}
static inline int pte_huge(pte_t pte)
@@ -292,9 +325,63 @@ static inline pte_t pte_clear_flags(pte_t pte, pteval_t clear)
return native_make_pte(v & ~clear);
}
+/*
+ * Write protection operations can result in Dirty=1,Write=0 PTEs. But in the
+ * case of X86_FEATURE_USER_SHSTK, these PTEs denote shadow stack memory. So
+ * when creating dirty, write-protected memory, a software bit is used:
+ * _PAGE_BIT_SAVED_DIRTY. The following functions take a PTE and transition the
+ * Dirty bit to SavedDirty, and vice-vesra.
+ *
+ * This shifting is only done if needed. In the case of shifting
+ * Dirty->SavedDirty, the condition is if the PTE is Write=0. In the case of
+ * shifting SavedDirty->Dirty, the condition is Write=1.
+ */
+static inline pgprotval_t mksaveddirty_shift(pgprotval_t v)
+{
+ pgprotval_t cond = (~v >> _PAGE_BIT_RW) & 1;
+
+ v |= ((v >> _PAGE_BIT_DIRTY) & cond) << _PAGE_BIT_SAVED_DIRTY;
+ v &= ~(cond << _PAGE_BIT_DIRTY);
+
+ return v;
+}
+
+static inline pgprotval_t clear_saveddirty_shift(pgprotval_t v)
+{
+ pgprotval_t cond = (v >> _PAGE_BIT_RW) & 1;
+
+ v |= ((v >> _PAGE_BIT_SAVED_DIRTY) & cond) << _PAGE_BIT_DIRTY;
+ v &= ~(cond << _PAGE_BIT_SAVED_DIRTY);
+
+ return v;
+}
+
+static inline pte_t pte_mksaveddirty(pte_t pte)
+{
+ pteval_t v = native_pte_val(pte);
+
+ v = mksaveddirty_shift(v);
+ return native_make_pte(v);
+}
+
+static inline pte_t pte_clear_saveddirty(pte_t pte)
+{
+ pteval_t v = native_pte_val(pte);
+
+ v = clear_saveddirty_shift(v);
+ return native_make_pte(v);
+}
+
static inline pte_t pte_wrprotect(pte_t pte)
{
- return pte_clear_flags(pte, _PAGE_RW);
+ pte = pte_clear_flags(pte, _PAGE_RW);
+
+ /*
+ * Blindly clearing _PAGE_RW might accidentally create
+ * a shadow stack PTE (Write=0,Dirty=1). Move the hardware
+ * dirty value to the software bit, if present.
+ */
+ return pte_mksaveddirty(pte);
}
#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP
@@ -332,7 +419,7 @@ static inline pte_t pte_clear_uffd_wp(pte_t pte)
static inline pte_t pte_mkclean(pte_t pte)
{
- return pte_clear_flags(pte, _PAGE_DIRTY);
+ return pte_clear_flags(pte, _PAGE_DIRTY_BITS);
}
static inline pte_t pte_mkold(pte_t pte)
@@ -347,7 +434,16 @@ static inline pte_t pte_mkexec(pte_t pte)
static inline pte_t pte_mkdirty(pte_t pte)
{
- return pte_set_flags(pte, _PAGE_DIRTY | _PAGE_SOFT_DIRTY);
+ pte = pte_set_flags(pte, _PAGE_DIRTY | _PAGE_SOFT_DIRTY);
+
+ return pte_mksaveddirty(pte);
+}
+
+static inline pte_t pte_mkwrite_shstk(pte_t pte)
+{
+ pte = pte_clear_flags(pte, _PAGE_RW);
+
+ return pte_set_flags(pte, _PAGE_DIRTY);
}
static inline pte_t pte_mkyoung(pte_t pte)
@@ -355,11 +451,15 @@ static inline pte_t pte_mkyoung(pte_t pte)
return pte_set_flags(pte, _PAGE_ACCESSED);
}
-static inline pte_t pte_mkwrite(pte_t pte)
+static inline pte_t pte_mkwrite_novma(pte_t pte)
{
return pte_set_flags(pte, _PAGE_RW);
}
+struct vm_area_struct;
+pte_t pte_mkwrite(pte_t pte, struct vm_area_struct *vma);
+#define pte_mkwrite pte_mkwrite
+
static inline pte_t pte_mkhuge(pte_t pte)
{
return pte_set_flags(pte, _PAGE_PSE);
@@ -404,9 +504,34 @@ static inline pmd_t pmd_clear_flags(pmd_t pmd, pmdval_t clear)
return native_make_pmd(v & ~clear);
}
+/* See comments above mksaveddirty_shift() */
+static inline pmd_t pmd_mksaveddirty(pmd_t pmd)
+{
+ pmdval_t v = native_pmd_val(pmd);
+
+ v = mksaveddirty_shift(v);
+ return native_make_pmd(v);
+}
+
+/* See comments above mksaveddirty_shift() */
+static inline pmd_t pmd_clear_saveddirty(pmd_t pmd)
+{
+ pmdval_t v = native_pmd_val(pmd);
+
+ v = clear_saveddirty_shift(v);
+ return native_make_pmd(v);
+}
+
static inline pmd_t pmd_wrprotect(pmd_t pmd)
{
- return pmd_clear_flags(pmd, _PAGE_RW);
+ pmd = pmd_clear_flags(pmd, _PAGE_RW);
+
+ /*
+ * Blindly clearing _PAGE_RW might accidentally create
+ * a shadow stack PMD (RW=0, Dirty=1). Move the hardware
+ * dirty value to the software bit.
+ */
+ return pmd_mksaveddirty(pmd);
}
#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP
@@ -433,12 +558,21 @@ static inline pmd_t pmd_mkold(pmd_t pmd)
static inline pmd_t pmd_mkclean(pmd_t pmd)
{
- return pmd_clear_flags(pmd, _PAGE_DIRTY);
+ return pmd_clear_flags(pmd, _PAGE_DIRTY_BITS);
}
static inline pmd_t pmd_mkdirty(pmd_t pmd)
{
- return pmd_set_flags(pmd, _PAGE_DIRTY | _PAGE_SOFT_DIRTY);
+ pmd = pmd_set_flags(pmd, _PAGE_DIRTY | _PAGE_SOFT_DIRTY);
+
+ return pmd_mksaveddirty(pmd);
+}
+
+static inline pmd_t pmd_mkwrite_shstk(pmd_t pmd)
+{
+ pmd = pmd_clear_flags(pmd, _PAGE_RW);
+
+ return pmd_set_flags(pmd, _PAGE_DIRTY);
}
static inline pmd_t pmd_mkdevmap(pmd_t pmd)
@@ -456,11 +590,14 @@ static inline pmd_t pmd_mkyoung(pmd_t pmd)
return pmd_set_flags(pmd, _PAGE_ACCESSED);
}
-static inline pmd_t pmd_mkwrite(pmd_t pmd)
+static inline pmd_t pmd_mkwrite_novma(pmd_t pmd)
{
return pmd_set_flags(pmd, _PAGE_RW);
}
+pmd_t pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma);
+#define pmd_mkwrite pmd_mkwrite
+
static inline pud_t pud_set_flags(pud_t pud, pudval_t set)
{
pudval_t v = native_pud_val(pud);
@@ -475,6 +612,24 @@ static inline pud_t pud_clear_flags(pud_t pud, pudval_t clear)
return native_make_pud(v & ~clear);
}
+/* See comments above mksaveddirty_shift() */
+static inline pud_t pud_mksaveddirty(pud_t pud)
+{
+ pudval_t v = native_pud_val(pud);
+
+ v = mksaveddirty_shift(v);
+ return native_make_pud(v);
+}
+
+/* See comments above mksaveddirty_shift() */
+static inline pud_t pud_clear_saveddirty(pud_t pud)
+{
+ pudval_t v = native_pud_val(pud);
+
+ v = clear_saveddirty_shift(v);
+ return native_make_pud(v);
+}
+
static inline pud_t pud_mkold(pud_t pud)
{
return pud_clear_flags(pud, _PAGE_ACCESSED);
@@ -482,17 +637,26 @@ static inline pud_t pud_mkold(pud_t pud)
static inline pud_t pud_mkclean(pud_t pud)
{
- return pud_clear_flags(pud, _PAGE_DIRTY);
+ return pud_clear_flags(pud, _PAGE_DIRTY_BITS);
}
static inline pud_t pud_wrprotect(pud_t pud)
{
- return pud_clear_flags(pud, _PAGE_RW);
+ pud = pud_clear_flags(pud, _PAGE_RW);
+
+ /*
+ * Blindly clearing _PAGE_RW might accidentally create
+ * a shadow stack PUD (RW=0, Dirty=1). Move the hardware
+ * dirty value to the software bit.
+ */
+ return pud_mksaveddirty(pud);
}
static inline pud_t pud_mkdirty(pud_t pud)
{
- return pud_set_flags(pud, _PAGE_DIRTY | _PAGE_SOFT_DIRTY);
+ pud = pud_set_flags(pud, _PAGE_DIRTY | _PAGE_SOFT_DIRTY);
+
+ return pud_mksaveddirty(pud);
}
static inline pud_t pud_mkdevmap(pud_t pud)
@@ -512,7 +676,9 @@ static inline pud_t pud_mkyoung(pud_t pud)
static inline pud_t pud_mkwrite(pud_t pud)
{
- return pud_set_flags(pud, _PAGE_RW);
+ pud = pud_set_flags(pud, _PAGE_RW);
+
+ return pud_clear_saveddirty(pud);
}
#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
@@ -629,6 +795,7 @@ static inline u64 flip_protnone_guard(u64 oldval, u64 val, u64 mask);
static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
{
pteval_t val = pte_val(pte), oldval = val;
+ pte_t pte_result;
/*
* Chop off the NX bit (if present), and add the NX portion of
@@ -637,17 +804,54 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
val &= _PAGE_CHG_MASK;
val |= check_pgprot(newprot) & ~_PAGE_CHG_MASK;
val = flip_protnone_guard(oldval, val, PTE_PFN_MASK);
- return __pte(val);
+
+ pte_result = __pte(val);
+
+ /*
+ * To avoid creating Write=0,Dirty=1 PTEs, pte_modify() needs to avoid:
+ * 1. Marking Write=0 PTEs Dirty=1
+ * 2. Marking Dirty=1 PTEs Write=0
+ *
+ * The first case cannot happen because the _PAGE_CHG_MASK will filter
+ * out any Dirty bit passed in newprot. Handle the second case by
+ * going through the mksaveddirty exercise. Only do this if the old
+ * value was Write=1 to avoid doing this on Shadow Stack PTEs.
+ */
+ if (oldval & _PAGE_RW)
+ pte_result = pte_mksaveddirty(pte_result);
+ else
+ pte_result = pte_clear_saveddirty(pte_result);
+
+ return pte_result;
}
static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
{
pmdval_t val = pmd_val(pmd), oldval = val;
+ pmd_t pmd_result;
- val &= _HPAGE_CHG_MASK;
+ val &= (_HPAGE_CHG_MASK & ~_PAGE_DIRTY);
val |= check_pgprot(newprot) & ~_HPAGE_CHG_MASK;
val = flip_protnone_guard(oldval, val, PHYSICAL_PMD_PAGE_MASK);
- return __pmd(val);
+
+ pmd_result = __pmd(val);
+
+ /*
+ * To avoid creating Write=0,Dirty=1 PMDs, pte_modify() needs to avoid:
+ * 1. Marking Write=0 PMDs Dirty=1
+ * 2. Marking Dirty=1 PMDs Write=0
+ *
+ * The first case cannot happen because the _PAGE_CHG_MASK will filter
+ * out any Dirty bit passed in newprot. Handle the second case by
+ * going through the mksaveddirty exercise. Only do this if the old
+ * value was Write=1 to avoid doing this on Shadow Stack PTEs.
+ */
+ if (oldval & _PAGE_RW)
+ pmd_result = pmd_mksaveddirty(pmd_result);
+ else
+ pmd_result = pmd_clear_saveddirty(pmd_result);
+
+ return pmd_result;
}
/*
@@ -831,7 +1035,14 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd)
* (Currently stuck as a macro because of indirect forward reference
* to linux/mm.h:page_to_nid())
*/
-#define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot))
+#define mk_pte(page, pgprot) \
+({ \
+ pgprot_t __pgprot = pgprot; \
+ \
+ WARN_ON_ONCE((pgprot_val(__pgprot) & (_PAGE_DIRTY | _PAGE_RW)) == \
+ _PAGE_DIRTY); \
+ pfn_pte(page_to_pfn(page), __pgprot); \
+})
static inline int pmd_bad(pmd_t pmd)
{
@@ -1090,7 +1301,17 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
static inline void ptep_set_wrprotect(struct mm_struct *mm,
unsigned long addr, pte_t *ptep)
{
- clear_bit(_PAGE_BIT_RW, (unsigned long *)&ptep->pte);
+ /*
+ * Avoid accidentally creating shadow stack PTEs
+ * (Write=0,Dirty=1). Use cmpxchg() to prevent races with
+ * the hardware setting Dirty=1.
+ */
+ pte_t old_pte, new_pte;
+
+ old_pte = READ_ONCE(*ptep);
+ do {
+ new_pte = pte_wrprotect(old_pte);
+ } while (!try_cmpxchg((long *)&ptep->pte, (long *)&old_pte, *(long *)&new_pte));
}
#define flush_tlb_fix_spurious_fault(vma, address, ptep) do { } while (0)
@@ -1116,12 +1337,6 @@ extern int pmdp_clear_flush_young(struct vm_area_struct *vma,
unsigned long address, pmd_t *pmdp);
-#define pmd_write pmd_write
-static inline int pmd_write(pmd_t pmd)
-{
- return pmd_flags(pmd) & _PAGE_RW;
-}
-
#define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR
static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, unsigned long addr,
pmd_t *pmdp)
@@ -1148,13 +1363,17 @@ static inline pud_t pudp_huge_get_and_clear(struct mm_struct *mm,
static inline void pmdp_set_wrprotect(struct mm_struct *mm,
unsigned long addr, pmd_t *pmdp)
{
- clear_bit(_PAGE_BIT_RW, (unsigned long *)pmdp);
-}
+ /*
+ * Avoid accidentally creating shadow stack PTEs
+ * (Write=0,Dirty=1). Use cmpxchg() to prevent races with
+ * the hardware setting Dirty=1.
+ */
+ pmd_t old_pmd, new_pmd;
-#define pud_write pud_write
-static inline int pud_write(pud_t pud)
-{
- return pud_flags(pud) & _PAGE_RW;
+ old_pmd = READ_ONCE(*pmdp);
+ do {
+ new_pmd = pmd_wrprotect(old_pmd);
+ } while (!try_cmpxchg((long *)pmdp, (long *)&old_pmd, *(long *)&new_pmd));
}
#ifndef pmdp_establish
@@ -1412,6 +1631,11 @@ static inline bool __pte_access_permitted(unsigned long pteval, bool write)
{
unsigned long need_pte_bits = _PAGE_PRESENT|_PAGE_USER;
+ /*
+ * Write=0,Dirty=1 PTEs are shadow stack, which the kernel
+ * shouldn't generally allow access to, but since they
+ * are already Write=0, the below logic covers both cases.
+ */
if (write)
need_pte_bits |= _PAGE_RW;
@@ -1453,6 +1677,12 @@ static inline bool arch_has_hw_pte_young(void)
return true;
}
+#define arch_check_zapped_pte arch_check_zapped_pte
+void arch_check_zapped_pte(struct vm_area_struct *vma, pte_t pte);
+
+#define arch_check_zapped_pmd arch_check_zapped_pmd
+void arch_check_zapped_pmd(struct vm_area_struct *vma, pmd_t pmd);
+
#ifdef CONFIG_XEN_PV
#define arch_has_hw_nonleaf_pmd_young arch_has_hw_nonleaf_pmd_young
static inline bool arch_has_hw_nonleaf_pmd_young(void)
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index a6deb67..0b748ee 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -21,7 +21,8 @@
#define _PAGE_BIT_SOFTW2 10 /* " */
#define _PAGE_BIT_SOFTW3 11 /* " */
#define _PAGE_BIT_PAT_LARGE 12 /* On 2MB or 1GB pages */
-#define _PAGE_BIT_SOFTW4 58 /* available for programmer */
+#define _PAGE_BIT_SOFTW4 57 /* available for programmer */
+#define _PAGE_BIT_SOFTW5 58 /* available for programmer */
#define _PAGE_BIT_PKEY_BIT0 59 /* Protection Keys, bit 1/4 */
#define _PAGE_BIT_PKEY_BIT1 60 /* Protection Keys, bit 2/4 */
#define _PAGE_BIT_PKEY_BIT2 61 /* Protection Keys, bit 3/4 */
@@ -34,6 +35,13 @@
#define _PAGE_BIT_SOFT_DIRTY _PAGE_BIT_SOFTW3 /* software dirty tracking */
#define _PAGE_BIT_DEVMAP _PAGE_BIT_SOFTW4
+#ifdef CONFIG_X86_64
+#define _PAGE_BIT_SAVED_DIRTY _PAGE_BIT_SOFTW5 /* Saved Dirty bit */
+#else
+/* Shared with _PAGE_BIT_UFFD_WP which is not supported on 32 bit */
+#define _PAGE_BIT_SAVED_DIRTY _PAGE_BIT_SOFTW2 /* Saved Dirty bit */
+#endif
+
/* If _PAGE_BIT_PRESENT is clear, we use these: */
/* - if the user mapped it with PROT_NONE; pte_present gives true */
#define _PAGE_BIT_PROTNONE _PAGE_BIT_GLOBAL
@@ -117,6 +125,18 @@
#define _PAGE_SOFTW4 (_AT(pteval_t, 0))
#endif
+/*
+ * The hardware requires shadow stack to be Write=0,Dirty=1. However,
+ * there are valid cases where the kernel might create read-only PTEs that
+ * are dirty (e.g., fork(), mprotect(), uffd-wp(), soft-dirty tracking). In
+ * this case, the _PAGE_SAVED_DIRTY bit is used instead of the HW-dirty bit,
+ * to avoid creating a wrong "shadow stack" PTEs. Such PTEs have
+ * (Write=0,SavedDirty=1,Dirty=0) set.
+ */
+#define _PAGE_SAVED_DIRTY (_AT(pteval_t, 1) << _PAGE_BIT_SAVED_DIRTY)
+
+#define _PAGE_DIRTY_BITS (_PAGE_DIRTY | _PAGE_SAVED_DIRTY)
+
#define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE)
/*
@@ -125,10 +145,10 @@
* instance, and is *not* included in this mask since
* pte_modify() does modify it.
*/
-#define _COMMON_PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT | \
- _PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY |\
- _PAGE_SOFT_DIRTY | _PAGE_DEVMAP | _PAGE_ENC | \
- _PAGE_UFFD_WP)
+#define _COMMON_PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT | \
+ _PAGE_SPECIAL | _PAGE_ACCESSED | \
+ _PAGE_DIRTY_BITS | _PAGE_SOFT_DIRTY | \
+ _PAGE_DEVMAP | _PAGE_ENC | _PAGE_UFFD_WP)
#define _PAGE_CHG_MASK (_COMMON_PAGE_CHG_MASK | _PAGE_PAT)
#define _HPAGE_CHG_MASK (_COMMON_PAGE_CHG_MASK | _PAGE_PSE | _PAGE_PAT_LARGE)
@@ -189,14 +209,22 @@ enum page_cache_mode {
#define __PAGE_KERNEL (__PP|__RW| 0|___A|__NX|___D| 0|___G)
#define __PAGE_KERNEL_EXEC (__PP|__RW| 0|___A| 0|___D| 0|___G)
+
+/*
+ * Page tables needs to have Write=1 in order for any lower PTEs to be
+ * writable. This includes shadow stack memory (Write=0, Dirty=1)
+ */
#define _KERNPG_TABLE_NOENC (__PP|__RW| 0|___A| 0|___D| 0| 0)
#define _KERNPG_TABLE (__PP|__RW| 0|___A| 0|___D| 0| 0| _ENC)
#define _PAGE_TABLE_NOENC (__PP|__RW|_USR|___A| 0|___D| 0| 0)
#define _PAGE_TABLE (__PP|__RW|_USR|___A| 0|___D| 0| 0| _ENC)
-#define __PAGE_KERNEL_RO (__PP| 0| 0|___A|__NX|___D| 0|___G)
-#define __PAGE_KERNEL_ROX (__PP| 0| 0|___A| 0|___D| 0|___G)
+
+#define __PAGE_KERNEL_RO (__PP| 0| 0|___A|__NX| 0| 0|___G)
+#define __PAGE_KERNEL_ROX (__PP| 0| 0|___A| 0| 0| 0|___G)
+#define __PAGE_KERNEL (__PP|__RW| 0|___A|__NX|___D| 0|___G)
+#define __PAGE_KERNEL_EXEC (__PP|__RW| 0|___A| 0|___D| 0|___G)
#define __PAGE_KERNEL_NOCACHE (__PP|__RW| 0|___A|__NX|___D| 0|___G| __NC)
-#define __PAGE_KERNEL_VVAR (__PP| 0|_USR|___A|__NX|___D| 0|___G)
+#define __PAGE_KERNEL_VVAR (__PP| 0|_USR|___A|__NX| 0| 0|___G)
#define __PAGE_KERNEL_LARGE (__PP|__RW| 0|___A|__NX|___D|_PSE|___G)
#define __PAGE_KERNEL_LARGE_EXEC (__PP|__RW| 0|___A| 0|___D|_PSE|___G)
#define __PAGE_KERNEL_WP (__PP|__RW| 0|___A|__NX|___D| 0|___G| __WP)
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index cbb9430..0086920 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -28,6 +28,7 @@ struct vm86;
#include <asm/unwind_hints.h>
#include <asm/vmxfeatures.h>
#include <asm/vdso/processor.h>
+#include <asm/shstk.h>
#include <linux/personality.h>
#include <linux/cache.h>
@@ -474,6 +475,13 @@ struct thread_struct {
*/
u32 pkru;
+#ifdef CONFIG_X86_USER_SHADOW_STACK
+ unsigned long features;
+ unsigned long features_locked;
+
+ struct thread_shstk shstk;
+#endif
+
/* Floating point and extended processor state */
struct fpu fpu;
/*
diff --git a/arch/x86/include/asm/shstk.h b/arch/x86/include/asm/shstk.h
new file mode 100644
index 0000000..42fee89
--- /dev/null
+++ b/arch/x86/include/asm/shstk.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_SHSTK_H
+#define _ASM_X86_SHSTK_H
+
+#ifndef __ASSEMBLY__
+#include <linux/types.h>
+
+struct task_struct;
+struct ksignal;
+
+#ifdef CONFIG_X86_USER_SHADOW_STACK
+struct thread_shstk {
+ u64 base;
+ u64 size;
+};
+
+long shstk_prctl(struct task_struct *task, int option, unsigned long arg2);
+void reset_thread_features(void);
+unsigned long shstk_alloc_thread_stack(struct task_struct *p, unsigned long clone_flags,
+ unsigned long stack_size);
+void shstk_free(struct task_struct *p);
+int setup_signal_shadow_stack(struct ksignal *ksig);
+int restore_signal_shadow_stack(void);
+#else
+static inline long shstk_prctl(struct task_struct *task, int option,
+ unsigned long arg2) { return -EINVAL; }
+static inline void reset_thread_features(void) {}
+static inline unsigned long shstk_alloc_thread_stack(struct task_struct *p,
+ unsigned long clone_flags,
+ unsigned long stack_size) { return 0; }
+static inline void shstk_free(struct task_struct *p) {}
+static inline int setup_signal_shadow_stack(struct ksignal *ksig) { return 0; }
+static inline int restore_signal_shadow_stack(void) { return 0; }
+#endif /* CONFIG_X86_USER_SHADOW_STACK */
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_X86_SHSTK_H */
diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
index de48d13..d6cd934 100644
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -202,6 +202,19 @@ static inline void clwb(volatile void *__p)
: [pax] "a" (p));
}
+#ifdef CONFIG_X86_USER_SHADOW_STACK
+static inline int write_user_shstk_64(u64 __user *addr, u64 val)
+{
+ asm_volatile_goto("1: wrussq %[val], (%[addr])\n"
+ _ASM_EXTABLE(1b, %l[fail])
+ :: [addr] "r" (addr), [val] "r" (val)
+ :: fail);
+ return 0;
+fail:
+ return -EFAULT;
+}
+#endif /* CONFIG_X86_USER_SHADOW_STACK */
+
#define nop() asm volatile ("nop")
static inline void serialize(void)
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 6ab42ca..2572689 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -306,7 +306,8 @@ static inline bool pte_flags_need_flush(unsigned long oldflags,
const pteval_t flush_on_clear = _PAGE_DIRTY | _PAGE_PRESENT |
_PAGE_ACCESSED;
const pteval_t software_flags = _PAGE_SOFTW1 | _PAGE_SOFTW2 |
- _PAGE_SOFTW3 | _PAGE_SOFTW4;
+ _PAGE_SOFTW3 | _PAGE_SOFTW4 |
+ _PAGE_SAVED_DIRTY;
const pteval_t flush_on_change = _PAGE_RW | _PAGE_USER | _PAGE_PWT |
_PAGE_PCD | _PAGE_PSE | _PAGE_GLOBAL | _PAGE_PAT |
_PAGE_PAT_LARGE | _PAGE_PKEY_BIT0 | _PAGE_PKEY_BIT1 |
diff --git a/arch/x86/include/asm/trap_pf.h b/arch/x86/include/asm/trap_pf.h
index 10b1de5..afa5243 100644
--- a/arch/x86/include/asm/trap_pf.h
+++ b/arch/x86/include/asm/trap_pf.h
@@ -11,6 +11,7 @@
* bit 3 == 1: use of reserved bit detected
* bit 4 == 1: fault was an instruction fetch
* bit 5 == 1: protection keys block access
+ * bit 6 == 1: shadow stack access fault
* bit 15 == 1: SGX MMU page-fault
*/
enum x86_pf_error_code {
@@ -20,6 +21,7 @@ enum x86_pf_error_code {
X86_PF_RSVD = 1 << 3,
X86_PF_INSTR = 1 << 4,
X86_PF_PK = 1 << 5,
+ X86_PF_SHSTK = 1 << 6,
X86_PF_SGX = 1 << 15,
};
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index 47ecfff..b1c9cea 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -18,7 +18,8 @@ void __init trap_init(void);
asmlinkage __visible noinstr struct pt_regs *vc_switch_off_ist(struct pt_regs *eregs);
#endif
-extern bool ibt_selftest(void);
+extern int ibt_selftest(void);
+extern int ibt_selftest_noendbr(void);
#ifdef CONFIG_X86_F00F_BUG
/* For handling the FOOF bug */
@@ -47,4 +48,16 @@ void __noreturn handle_stack_overflow(struct pt_regs *regs,
struct stack_info *info);
#endif
+static inline void cond_local_irq_enable(struct pt_regs *regs)
+{
+ if (regs->flags & X86_EFLAGS_IF)
+ local_irq_enable();
+}
+
+static inline void cond_local_irq_disable(struct pt_regs *regs)
+{
+ if (regs->flags & X86_EFLAGS_IF)
+ local_irq_disable();
+}
+
#endif /* _ASM_X86_TRAPS_H */
diff --git a/arch/x86/include/uapi/asm/mman.h b/arch/x86/include/uapi/asm/mman.h
index 775dbd3..8148bdd 100644
--- a/arch/x86/include/uapi/asm/mman.h
+++ b/arch/x86/include/uapi/asm/mman.h
@@ -3,6 +3,7 @@
#define _ASM_X86_MMAN_H
#define MAP_32BIT 0x40 /* only give out 32bit addresses */
+#define MAP_ABOVE4G 0x80 /* only map above 4GB */
#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
#define arch_calc_vm_prot_bits(prot, key) ( \
@@ -12,6 +13,9 @@
((key) & 0x8 ? VM_PKEY_BIT3 : 0))
#endif
+/* Flags for map_shadow_stack(2) */
+#define SHADOW_STACK_SET_TOKEN (1ULL << 0) /* Set up a restore token in the shadow stack */
+
#include <asm-generic/mman.h>
#endif /* _ASM_X86_MMAN_H */
diff --git a/arch/x86/include/uapi/asm/prctl.h b/arch/x86/include/uapi/asm/prctl.h
index e8d7ebb..384e2cc 100644
--- a/arch/x86/include/uapi/asm/prctl.h
+++ b/arch/x86/include/uapi/asm/prctl.h
@@ -23,9 +23,21 @@
#define ARCH_MAP_VDSO_32 0x2002
#define ARCH_MAP_VDSO_64 0x2003
+/* Don't use 0x3001-0x3004 because of old glibcs */
+
#define ARCH_GET_UNTAG_MASK 0x4001
#define ARCH_ENABLE_TAGGED_ADDR 0x4002
#define ARCH_GET_MAX_TAG_BITS 0x4003
#define ARCH_FORCE_TAGGED_SVA 0x4004
+#define ARCH_SHSTK_ENABLE 0x5001
+#define ARCH_SHSTK_DISABLE 0x5002
+#define ARCH_SHSTK_LOCK 0x5003
+#define ARCH_SHSTK_UNLOCK 0x5004
+#define ARCH_SHSTK_STATUS 0x5005
+
+/* ARCH_SHSTK_ features bits */
+#define ARCH_SHSTK_SHSTK (1ULL << 0)
+#define ARCH_SHSTK_WRSS (1ULL << 1)
+
#endif /* _ASM_X86_PRCTL_H */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 00df34c..3269a0e 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -48,6 +48,7 @@
obj-y += traps.o idt.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
obj-y += time.o ioport.o dumpstack.o nmi.o
obj-$(CONFIG_MODIFY_LDT_SYSCALL) += ldt.o
+obj-$(CONFIG_X86_KERNEL_IBT) += ibt_selftest.o
obj-y += setup.o x86_init.o i8259.o irqinit.o
obj-$(CONFIG_JUMP_LABEL) += jump_label.o
obj-$(CONFIG_IRQ_WORK) += irq_work.o
@@ -144,6 +145,10 @@
obj-$(CONFIG_CALL_THUNKS) += callthunks.o
+obj-$(CONFIG_X86_CET) += cet.o
+
+obj-$(CONFIG_X86_USER_SHADOW_STACK) += shstk.o
+
###
# 64 bit specific files
ifeq ($(CONFIG_X86_64),y)
diff --git a/arch/x86/kernel/cet.c b/arch/x86/kernel/cet.c
new file mode 100644
index 0000000..d2c732a
--- /dev/null
+++ b/arch/x86/kernel/cet.c
@@ -0,0 +1,131 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/ptrace.h>
+#include <asm/bugs.h>
+#include <asm/traps.h>
+
+enum cp_error_code {
+ CP_EC = (1 << 15) - 1,
+
+ CP_RET = 1,
+ CP_IRET = 2,
+ CP_ENDBR = 3,
+ CP_RSTRORSSP = 4,
+ CP_SETSSBSY = 5,
+
+ CP_ENCL = 1 << 15,
+};
+
+static const char cp_err[][10] = {
+ [0] = "unknown",
+ [1] = "near ret",
+ [2] = "far/iret",
+ [3] = "endbranch",
+ [4] = "rstorssp",
+ [5] = "setssbsy",
+};
+
+static const char *cp_err_string(unsigned long error_code)
+{
+ unsigned int cpec = error_code & CP_EC;
+
+ if (cpec >= ARRAY_SIZE(cp_err))
+ cpec = 0;
+ return cp_err[cpec];
+}
+
+static void do_unexpected_cp(struct pt_regs *regs, unsigned long error_code)
+{
+ WARN_ONCE(1, "Unexpected %s #CP, error_code: %s\n",
+ user_mode(regs) ? "user mode" : "kernel mode",
+ cp_err_string(error_code));
+}
+
+static DEFINE_RATELIMIT_STATE(cpf_rate, DEFAULT_RATELIMIT_INTERVAL,
+ DEFAULT_RATELIMIT_BURST);
+
+static void do_user_cp_fault(struct pt_regs *regs, unsigned long error_code)
+{
+ struct task_struct *tsk;
+ unsigned long ssp;
+
+ /*
+ * An exception was just taken from userspace. Since interrupts are disabled
+ * here, no scheduling should have messed with the registers yet and they
+ * will be whatever is live in userspace. So read the SSP before enabling
+ * interrupts so locking the fpregs to do it later is not required.
+ */
+ rdmsrl(MSR_IA32_PL3_SSP, ssp);
+
+ cond_local_irq_enable(regs);
+
+ tsk = current;
+ tsk->thread.error_code = error_code;
+ tsk->thread.trap_nr = X86_TRAP_CP;
+
+ /* Ratelimit to prevent log spamming. */
+ if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
+ __ratelimit(&cpf_rate)) {
+ pr_emerg("%s[%d] control protection ip:%lx sp:%lx ssp:%lx error:%lx(%s)%s",
+ tsk->comm, task_pid_nr(tsk),
+ regs->ip, regs->sp, ssp, error_code,
+ cp_err_string(error_code),
+ error_code & CP_ENCL ? " in enclave" : "");
+ print_vma_addr(KERN_CONT " in ", regs->ip);
+ pr_cont("\n");
+ }
+
+ force_sig_fault(SIGSEGV, SEGV_CPERR, (void __user *)0);
+ cond_local_irq_disable(regs);
+}
+
+static __ro_after_init bool ibt_fatal = true;
+
+static void do_kernel_cp_fault(struct pt_regs *regs, unsigned long error_code)
+{
+ if ((error_code & CP_EC) != CP_ENDBR) {
+ do_unexpected_cp(regs, error_code);
+ return;
+ }
+
+ if (unlikely(regs->ip == (unsigned long)&ibt_selftest_noendbr)) {
+ regs->ax = 0;
+ return;
+ }
+
+ pr_err("Missing ENDBR: %pS\n", (void *)instruction_pointer(regs));
+ if (!ibt_fatal) {
+ printk(KERN_DEFAULT CUT_HERE);
+ __warn(__FILE__, __LINE__, (void *)regs->ip, TAINT_WARN, regs, NULL);
+ return;
+ }
+ BUG();
+}
+
+static int __init ibt_setup(char *str)
+{
+ if (!strcmp(str, "off"))
+ setup_clear_cpu_cap(X86_FEATURE_IBT);
+
+ if (!strcmp(str, "warn"))
+ ibt_fatal = false;
+
+ return 1;
+}
+
+__setup("ibt=", ibt_setup);
+
+DEFINE_IDTENTRY_ERRORCODE(exc_control_protection)
+{
+ if (user_mode(regs)) {
+ if (cpu_feature_enabled(X86_FEATURE_USER_SHSTK))
+ do_user_cp_fault(regs, error_code);
+ else
+ do_unexpected_cp(regs, error_code);
+ } else {
+ if (cpu_feature_enabled(X86_FEATURE_IBT))
+ do_kernel_cp_fault(regs, error_code);
+ else
+ do_unexpected_cp(regs, error_code);
+ }
+}
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 90b8c5e..6d75fab 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -587,27 +587,43 @@ __noendbr void ibt_restore(u64 save)
static __always_inline void setup_cet(struct cpuinfo_x86 *c)
{
- u64 msr = CET_ENDBR_EN;
+ bool user_shstk, kernel_ibt;
- if (!HAS_KERNEL_IBT ||
- !cpu_feature_enabled(X86_FEATURE_IBT))
+ if (!IS_ENABLED(CONFIG_X86_CET))
return;
- wrmsrl(MSR_IA32_S_CET, msr);
+ kernel_ibt = HAS_KERNEL_IBT && cpu_feature_enabled(X86_FEATURE_IBT);
+ user_shstk = cpu_feature_enabled(X86_FEATURE_SHSTK) &&
+ IS_ENABLED(CONFIG_X86_USER_SHADOW_STACK);
+
+ if (!kernel_ibt && !user_shstk)
+ return;
+
+ if (user_shstk)
+ set_cpu_cap(c, X86_FEATURE_USER_SHSTK);
+
+ if (kernel_ibt)
+ wrmsrl(MSR_IA32_S_CET, CET_ENDBR_EN);
+ else
+ wrmsrl(MSR_IA32_S_CET, 0);
+
cr4_set_bits(X86_CR4_CET);
- if (!ibt_selftest()) {
+ if (kernel_ibt && ibt_selftest()) {
pr_err("IBT selftest: Failed!\n");
wrmsrl(MSR_IA32_S_CET, 0);
setup_clear_cpu_cap(X86_FEATURE_IBT);
- return;
}
}
__noendbr void cet_disable(void)
{
- if (cpu_feature_enabled(X86_FEATURE_IBT))
- wrmsrl(MSR_IA32_S_CET, 0);
+ if (!(cpu_feature_enabled(X86_FEATURE_IBT) ||
+ cpu_feature_enabled(X86_FEATURE_SHSTK)))
+ return;
+
+ wrmsrl(MSR_IA32_S_CET, 0);
+ wrmsrl(MSR_IA32_U_CET, 0);
}
/*
@@ -1491,6 +1507,9 @@ static void __init cpu_parse_early_param(void)
if (cmdline_find_option_bool(boot_command_line, "noxsaves"))
setup_clear_cpu_cap(X86_FEATURE_XSAVES);
+ if (cmdline_find_option_bool(boot_command_line, "nousershstk"))
+ setup_clear_cpu_cap(X86_FEATURE_USER_SHSTK);
+
arglen = cmdline_find_option(boot_command_line, "clearcpuid", arg, sizeof(arg));
if (arglen <= 0)
return;
diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c
index f6748c8..e462c1d 100644
--- a/arch/x86/kernel/cpu/cpuid-deps.c
+++ b/arch/x86/kernel/cpu/cpuid-deps.c
@@ -81,6 +81,7 @@ static const struct cpuid_dep cpuid_deps[] = {
{ X86_FEATURE_XFD, X86_FEATURE_XSAVES },
{ X86_FEATURE_XFD, X86_FEATURE_XGETBV1 },
{ X86_FEATURE_AMX_TILE, X86_FEATURE_XFD },
+ { X86_FEATURE_SHSTK, X86_FEATURE_XSAVES },
{}
};
diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c
index 099b6f0..31c0e68 100644
--- a/arch/x86/kernel/cpu/proc.c
+++ b/arch/x86/kernel/cpu/proc.c
@@ -4,6 +4,8 @@
#include <linux/string.h>
#include <linux/seq_file.h>
#include <linux/cpufreq.h>
+#include <asm/prctl.h>
+#include <linux/proc_fs.h>
#include "cpu.h"
@@ -175,3 +177,24 @@ const struct seq_operations cpuinfo_op = {
.stop = c_stop,
.show = show_cpuinfo,
};
+
+#ifdef CONFIG_X86_USER_SHADOW_STACK
+static void dump_x86_features(struct seq_file *m, unsigned long features)
+{
+ if (features & ARCH_SHSTK_SHSTK)
+ seq_puts(m, "shstk ");
+ if (features & ARCH_SHSTK_WRSS)
+ seq_puts(m, "wrss ");
+}
+
+void arch_proc_pid_thread_features(struct seq_file *m, struct task_struct *task)
+{
+ seq_puts(m, "x86_Thread_features:\t");
+ dump_x86_features(m, task->thread.features);
+ seq_putc(m, '\n');
+
+ seq_puts(m, "x86_Thread_features_locked:\t");
+ dump_x86_features(m, task->thread.features_locked);
+ seq_putc(m, '\n');
+}
+#endif /* CONFIG_X86_USER_SHADOW_STACK */
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 98e507c..a86d370 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -552,8 +552,36 @@ static inline void fpu_inherit_perms(struct fpu *dst_fpu)
}
}
+/* A passed ssp of zero will not cause any update */
+static int update_fpu_shstk(struct task_struct *dst, unsigned long ssp)
+{
+#ifdef CONFIG_X86_USER_SHADOW_STACK
+ struct cet_user_state *xstate;
+
+ /* If ssp update is not needed. */
+ if (!ssp)
+ return 0;
+
+ xstate = get_xsave_addr(&dst->thread.fpu.fpstate->regs.xsave,
+ XFEATURE_CET_USER);
+
+ /*
+ * If there is a non-zero ssp, then 'dst' must be configured with a shadow
+ * stack and the fpu state should be up to date since it was just copied
+ * from the parent in fpu_clone(). So there must be a valid non-init CET
+ * state location in the buffer.
+ */
+ if (WARN_ON_ONCE(!xstate))
+ return 1;
+
+ xstate->user_ssp = (u64)ssp;
+#endif
+ return 0;
+}
+
/* Clone current's FPU state on fork */
-int fpu_clone(struct task_struct *dst, unsigned long clone_flags, bool minimal)
+int fpu_clone(struct task_struct *dst, unsigned long clone_flags, bool minimal,
+ unsigned long ssp)
{
struct fpu *src_fpu = ¤t->thread.fpu;
struct fpu *dst_fpu = &dst->thread.fpu;
@@ -613,6 +641,12 @@ int fpu_clone(struct task_struct *dst, unsigned long clone_flags, bool minimal)
if (use_xsave())
dst_fpu->fpstate->regs.xsave.header.xfeatures &= ~XFEATURE_MASK_PASID;
+ /*
+ * Update shadow stack pointer, in case it changed during clone.
+ */
+ if (update_fpu_shstk(dst, ssp))
+ return 1;
+
trace_x86_fpu_copy_src(src_fpu);
trace_x86_fpu_copy_dst(dst_fpu);
@@ -753,6 +787,24 @@ void switch_fpu_return(void)
}
EXPORT_SYMBOL_GPL(switch_fpu_return);
+void fpregs_lock_and_load(void)
+{
+ /*
+ * fpregs_lock() only disables preemption (mostly). So modifying state
+ * in an interrupt could screw up some in progress fpregs operation.
+ * Warn about it.
+ */
+ WARN_ON_ONCE(!irq_fpu_usable());
+ WARN_ON_ONCE(current->flags & PF_KTHREAD);
+
+ fpregs_lock();
+
+ fpregs_assert_state_consistent();
+
+ if (test_thread_flag(TIF_NEED_FPU_LOAD))
+ fpregs_restore_userregs();
+}
+
#ifdef CONFIG_X86_DEBUG_FPU
/*
* If current FPU state according to its tracking (loaded FPU context on this
diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c
index 6d056b6..6bc1eb2 100644
--- a/arch/x86/kernel/fpu/regset.c
+++ b/arch/x86/kernel/fpu/regset.c
@@ -8,6 +8,7 @@
#include <asm/fpu/api.h>
#include <asm/fpu/signal.h>
#include <asm/fpu/regset.h>
+#include <asm/prctl.h>
#include "context.h"
#include "internal.h"
@@ -174,6 +175,86 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
return ret;
}
+#ifdef CONFIG_X86_USER_SHADOW_STACK
+int ssp_active(struct task_struct *target, const struct user_regset *regset)
+{
+ if (target->thread.features & ARCH_SHSTK_SHSTK)
+ return regset->n;
+
+ return 0;
+}
+
+int ssp_get(struct task_struct *target, const struct user_regset *regset,
+ struct membuf to)
+{
+ struct fpu *fpu = &target->thread.fpu;
+ struct cet_user_state *cetregs;
+
+ if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK))
+ return -ENODEV;
+
+ sync_fpstate(fpu);
+ cetregs = get_xsave_addr(&fpu->fpstate->regs.xsave, XFEATURE_CET_USER);
+ if (WARN_ON(!cetregs)) {
+ /*
+ * This shouldn't ever be NULL because shadow stack was
+ * verified to be enabled above. This means
+ * MSR_IA32_U_CET.CET_SHSTK_EN should be 1 and so
+ * XFEATURE_CET_USER should not be in the init state.
+ */
+ return -ENODEV;
+ }
+
+ return membuf_write(&to, (unsigned long *)&cetregs->user_ssp,
+ sizeof(cetregs->user_ssp));
+}
+
+int ssp_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ struct fpu *fpu = &target->thread.fpu;
+ struct xregs_state *xsave = &fpu->fpstate->regs.xsave;
+ struct cet_user_state *cetregs;
+ unsigned long user_ssp;
+ int r;
+
+ if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK) ||
+ !ssp_active(target, regset))
+ return -ENODEV;
+
+ if (pos != 0 || count != sizeof(user_ssp))
+ return -EINVAL;
+
+ r = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &user_ssp, 0, -1);
+ if (r)
+ return r;
+
+ /*
+ * Some kernel instructions (IRET, etc) can cause exceptions in the case
+ * of disallowed CET register values. Just prevent invalid values.
+ */
+ if (user_ssp >= TASK_SIZE_MAX || !IS_ALIGNED(user_ssp, 8))
+ return -EINVAL;
+
+ fpu_force_restore(fpu);
+
+ cetregs = get_xsave_addr(xsave, XFEATURE_CET_USER);
+ if (WARN_ON(!cetregs)) {
+ /*
+ * This shouldn't ever be NULL because shadow stack was
+ * verified to be enabled above. This means
+ * MSR_IA32_U_CET.CET_SHSTK_EN should be 1 and so
+ * XFEATURE_CET_USER should not be in the init state.
+ */
+ return -ENODEV;
+ }
+
+ cetregs->user_ssp = user_ssp;
+ return 0;
+}
+#endif /* CONFIG_X86_USER_SHADOW_STACK */
+
#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
/*
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 1afbc48..41dac93 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -39,26 +39,26 @@
*/
static const char *xfeature_names[] =
{
- "x87 floating point registers" ,
- "SSE registers" ,
- "AVX registers" ,
- "MPX bounds registers" ,
- "MPX CSR" ,
- "AVX-512 opmask" ,
- "AVX-512 Hi256" ,
- "AVX-512 ZMM_Hi256" ,
- "Processor Trace (unused)" ,
+ "x87 floating point registers",
+ "SSE registers",
+ "AVX registers",
+ "MPX bounds registers",
+ "MPX CSR",
+ "AVX-512 opmask",
+ "AVX-512 Hi256",
+ "AVX-512 ZMM_Hi256",
+ "Processor Trace (unused)",
"Protection Keys User registers",
"PASID state",
- "unknown xstate feature" ,
- "unknown xstate feature" ,
- "unknown xstate feature" ,
- "unknown xstate feature" ,
- "unknown xstate feature" ,
- "unknown xstate feature" ,
- "AMX Tile config" ,
- "AMX Tile data" ,
- "unknown xstate feature" ,
+ "Control-flow User registers",
+ "Control-flow Kernel registers (unused)",
+ "unknown xstate feature",
+ "unknown xstate feature",
+ "unknown xstate feature",
+ "unknown xstate feature",
+ "AMX Tile config",
+ "AMX Tile data",
+ "unknown xstate feature",
};
static unsigned short xsave_cpuid_features[] __initdata = {
@@ -73,6 +73,7 @@ static unsigned short xsave_cpuid_features[] __initdata = {
[XFEATURE_PT_UNIMPLEMENTED_SO_FAR] = X86_FEATURE_INTEL_PT,
[XFEATURE_PKRU] = X86_FEATURE_PKU,
[XFEATURE_PASID] = X86_FEATURE_ENQCMD,
+ [XFEATURE_CET_USER] = X86_FEATURE_SHSTK,
[XFEATURE_XTILE_CFG] = X86_FEATURE_AMX_TILE,
[XFEATURE_XTILE_DATA] = X86_FEATURE_AMX_TILE,
};
@@ -276,6 +277,7 @@ static void __init print_xstate_features(void)
print_xstate_feature(XFEATURE_MASK_Hi16_ZMM);
print_xstate_feature(XFEATURE_MASK_PKRU);
print_xstate_feature(XFEATURE_MASK_PASID);
+ print_xstate_feature(XFEATURE_MASK_CET_USER);
print_xstate_feature(XFEATURE_MASK_XTILE_CFG);
print_xstate_feature(XFEATURE_MASK_XTILE_DATA);
}
@@ -344,6 +346,7 @@ static __init void os_xrstor_booting(struct xregs_state *xstate)
XFEATURE_MASK_BNDREGS | \
XFEATURE_MASK_BNDCSR | \
XFEATURE_MASK_PASID | \
+ XFEATURE_MASK_CET_USER | \
XFEATURE_MASK_XTILE)
/*
@@ -446,14 +449,15 @@ static void __init __xstate_dump_leaves(void)
} \
} while (0)
-#define XCHECK_SZ(sz, nr, nr_macro, __struct) do { \
- if ((nr == nr_macro) && \
- WARN_ONCE(sz != sizeof(__struct), \
- "%s: struct is %zu bytes, cpu state %d bytes\n", \
- __stringify(nr_macro), sizeof(__struct), sz)) { \
+#define XCHECK_SZ(sz, nr, __struct) ({ \
+ if (WARN_ONCE(sz != sizeof(__struct), \
+ "[%s]: struct is %zu bytes, cpu state %d bytes\n", \
+ xfeature_names[nr], sizeof(__struct), sz)) { \
__xstate_dump_leaves(); \
} \
-} while (0)
+ true; \
+})
+
/**
* check_xtile_data_against_struct - Check tile data state size.
@@ -527,36 +531,28 @@ static bool __init check_xstate_against_struct(int nr)
* Ask the CPU for the size of the state.
*/
int sz = xfeature_size(nr);
+
/*
* Match each CPU state with the corresponding software
* structure.
*/
- XCHECK_SZ(sz, nr, XFEATURE_YMM, struct ymmh_struct);
- XCHECK_SZ(sz, nr, XFEATURE_BNDREGS, struct mpx_bndreg_state);
- XCHECK_SZ(sz, nr, XFEATURE_BNDCSR, struct mpx_bndcsr_state);
- XCHECK_SZ(sz, nr, XFEATURE_OPMASK, struct avx_512_opmask_state);
- XCHECK_SZ(sz, nr, XFEATURE_ZMM_Hi256, struct avx_512_zmm_uppers_state);
- XCHECK_SZ(sz, nr, XFEATURE_Hi16_ZMM, struct avx_512_hi16_state);
- XCHECK_SZ(sz, nr, XFEATURE_PKRU, struct pkru_state);
- XCHECK_SZ(sz, nr, XFEATURE_PASID, struct ia32_pasid_state);
- XCHECK_SZ(sz, nr, XFEATURE_XTILE_CFG, struct xtile_cfg);
-
- /* The tile data size varies between implementations. */
- if (nr == XFEATURE_XTILE_DATA)
- check_xtile_data_against_struct(sz);
-
- /*
- * Make *SURE* to add any feature numbers in below if
- * there are "holes" in the xsave state component
- * numbers.
- */
- if ((nr < XFEATURE_YMM) ||
- (nr >= XFEATURE_MAX) ||
- (nr == XFEATURE_PT_UNIMPLEMENTED_SO_FAR) ||
- ((nr >= XFEATURE_RSRVD_COMP_11) && (nr <= XFEATURE_RSRVD_COMP_16))) {
+ switch (nr) {
+ case XFEATURE_YMM: return XCHECK_SZ(sz, nr, struct ymmh_struct);
+ case XFEATURE_BNDREGS: return XCHECK_SZ(sz, nr, struct mpx_bndreg_state);
+ case XFEATURE_BNDCSR: return XCHECK_SZ(sz, nr, struct mpx_bndcsr_state);
+ case XFEATURE_OPMASK: return XCHECK_SZ(sz, nr, struct avx_512_opmask_state);
+ case XFEATURE_ZMM_Hi256: return XCHECK_SZ(sz, nr, struct avx_512_zmm_uppers_state);
+ case XFEATURE_Hi16_ZMM: return XCHECK_SZ(sz, nr, struct avx_512_hi16_state);
+ case XFEATURE_PKRU: return XCHECK_SZ(sz, nr, struct pkru_state);
+ case XFEATURE_PASID: return XCHECK_SZ(sz, nr, struct ia32_pasid_state);
+ case XFEATURE_XTILE_CFG: return XCHECK_SZ(sz, nr, struct xtile_cfg);
+ case XFEATURE_CET_USER: return XCHECK_SZ(sz, nr, struct cet_user_state);
+ case XFEATURE_XTILE_DATA: check_xtile_data_against_struct(sz); return true;
+ default:
XSTATE_WARN_ON(1, "No structure for xstate: %d\n", nr);
return false;
}
+
return true;
}
diff --git a/arch/x86/kernel/ibt_selftest.S b/arch/x86/kernel/ibt_selftest.S
new file mode 100644
index 0000000..c43c4ed
--- /dev/null
+++ b/arch/x86/kernel/ibt_selftest.S
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/linkage.h>
+#include <linux/objtool.h>
+#include <asm/nospec-branch.h>
+
+SYM_CODE_START(ibt_selftest_noendbr)
+ ANNOTATE_NOENDBR
+ UNWIND_HINT_FUNC
+ /* #CP handler sets %ax to 0 */
+ RET
+SYM_CODE_END(ibt_selftest_noendbr)
+
+SYM_FUNC_START(ibt_selftest)
+ lea ibt_selftest_noendbr(%rip), %rax
+ ANNOTATE_RETPOLINE_SAFE
+ jmp *%rax
+SYM_FUNC_END(ibt_selftest)
diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c
index f395826..b786d48 100644
--- a/arch/x86/kernel/idt.c
+++ b/arch/x86/kernel/idt.c
@@ -107,7 +107,7 @@ static const __initconst struct idt_data def_idts[] = {
ISTG(X86_TRAP_MC, asm_exc_machine_check, IST_INDEX_MCE),
#endif
-#ifdef CONFIG_X86_KERNEL_IBT
+#ifdef CONFIG_X86_CET
INTG(X86_TRAP_CP, asm_exc_control_protection),
#endif
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 72015db..9f09091 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -51,6 +51,7 @@
#include <asm/unwind.h>
#include <asm/tdx.h>
#include <asm/mmu_context.h>
+#include <asm/shstk.h>
#include "process.h"
@@ -122,6 +123,7 @@ void exit_thread(struct task_struct *tsk)
free_vm86(t);
+ shstk_free(tsk);
fpu__drop(fpu);
}
@@ -162,6 +164,7 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
struct inactive_task_frame *frame;
struct fork_frame *fork_frame;
struct pt_regs *childregs;
+ unsigned long new_ssp;
int ret = 0;
childregs = task_pt_regs(p);
@@ -199,7 +202,16 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
frame->flags = X86_EFLAGS_FIXED;
#endif
- fpu_clone(p, clone_flags, args->fn);
+ /*
+ * Allocate a new shadow stack for thread if needed. If shadow stack,
+ * is disabled, new_ssp will remain 0, and fpu_clone() will know not to
+ * update it.
+ */
+ new_ssp = shstk_alloc_thread_stack(p, clone_flags, args->stack_size);
+ if (IS_ERR_VALUE(new_ssp))
+ return PTR_ERR((void *)new_ssp);
+
+ fpu_clone(p, clone_flags, args->fn, new_ssp);
/* Kernel thread ? */
if (unlikely(p->flags & PF_KTHREAD)) {
@@ -245,6 +257,13 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
if (!ret && unlikely(test_tsk_thread_flag(current, TIF_IO_BITMAP)))
io_bitmap_share(p);
+ /*
+ * If copy_thread() if failing, don't leak the shadow stack possibly
+ * allocated in shstk_alloc_thread_stack() above.
+ */
+ if (ret)
+ shstk_free(p);
+
return ret;
}
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 3d181c1..33b2687 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -515,6 +515,8 @@ start_thread_common(struct pt_regs *regs, unsigned long new_ip,
load_gs_index(__USER_DS);
}
+ reset_thread_features();
+
loadsegment(fs, 0);
loadsegment(es, _ds);
loadsegment(ds, _ds);
@@ -894,6 +896,12 @@ long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2)
else
return put_user(LAM_U57_BITS, (unsigned long __user *)arg2);
#endif
+ case ARCH_SHSTK_ENABLE:
+ case ARCH_SHSTK_DISABLE:
+ case ARCH_SHSTK_LOCK:
+ case ARCH_SHSTK_UNLOCK:
+ case ARCH_SHSTK_STATUS:
+ return shstk_prctl(task, option, arg2);
default:
ret = -EINVAL;
break;
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index dfaa270..095f04b 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -58,6 +58,7 @@ enum x86_regset_64 {
REGSET64_FP,
REGSET64_IOPERM,
REGSET64_XSTATE,
+ REGSET64_SSP,
};
#define REGSET_GENERAL \
@@ -1267,6 +1268,17 @@ static struct user_regset x86_64_regsets[] __ro_after_init = {
.active = ioperm_active,
.regset_get = ioperm_get
},
+#ifdef CONFIG_X86_USER_SHADOW_STACK
+ [REGSET64_SSP] = {
+ .core_note_type = NT_X86_SHSTK,
+ .n = 1,
+ .size = sizeof(u64),
+ .align = sizeof(u64),
+ .active = ssp_active,
+ .regset_get = ssp_get,
+ .set = ssp_set
+ },
+#endif
};
static const struct user_regset_view user_x86_64_view = {
diff --git a/arch/x86/kernel/shstk.c b/arch/x86/kernel/shstk.c
new file mode 100644
index 0000000..fd68992
--- /dev/null
+++ b/arch/x86/kernel/shstk.c
@@ -0,0 +1,550 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * shstk.c - Intel shadow stack support
+ *
+ * Copyright (c) 2021, Intel Corporation.
+ * Yu-cheng Yu <yu-cheng.yu@intel.com>
+ */
+
+#include <linux/sched.h>
+#include <linux/bitops.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/sched/signal.h>
+#include <linux/compat.h>
+#include <linux/sizes.h>
+#include <linux/user.h>
+#include <linux/syscalls.h>
+#include <asm/msr.h>
+#include <asm/fpu/xstate.h>
+#include <asm/fpu/types.h>
+#include <asm/shstk.h>
+#include <asm/special_insns.h>
+#include <asm/fpu/api.h>
+#include <asm/prctl.h>
+
+#define SS_FRAME_SIZE 8
+
+static bool features_enabled(unsigned long features)
+{
+ return current->thread.features & features;
+}
+
+static void features_set(unsigned long features)
+{
+ current->thread.features |= features;
+}
+
+static void features_clr(unsigned long features)
+{
+ current->thread.features &= ~features;
+}
+
+/*
+ * Create a restore token on the shadow stack. A token is always 8-byte
+ * and aligned to 8.
+ */
+static int create_rstor_token(unsigned long ssp, unsigned long *token_addr)
+{
+ unsigned long addr;
+
+ /* Token must be aligned */
+ if (!IS_ALIGNED(ssp, 8))
+ return -EINVAL;
+
+ addr = ssp - SS_FRAME_SIZE;
+
+ /*
+ * SSP is aligned, so reserved bits and mode bit are a zero, just mark
+ * the token 64-bit.
+ */
+ ssp |= BIT(0);
+
+ if (write_user_shstk_64((u64 __user *)addr, (u64)ssp))
+ return -EFAULT;
+
+ if (token_addr)
+ *token_addr = addr;
+
+ return 0;
+}
+
+/*
+ * VM_SHADOW_STACK will have a guard page. This helps userspace protect
+ * itself from attacks. The reasoning is as follows:
+ *
+ * The shadow stack pointer(SSP) is moved by CALL, RET, and INCSSPQ. The
+ * INCSSP instruction can increment the shadow stack pointer. It is the
+ * shadow stack analog of an instruction like:
+ *
+ * addq $0x80, %rsp
+ *
+ * However, there is one important difference between an ADD on %rsp
+ * and INCSSP. In addition to modifying SSP, INCSSP also reads from the
+ * memory of the first and last elements that were "popped". It can be
+ * thought of as acting like this:
+ *
+ * READ_ONCE(ssp); // read+discard top element on stack
+ * ssp += nr_to_pop * 8; // move the shadow stack
+ * READ_ONCE(ssp-8); // read+discard last popped stack element
+ *
+ * The maximum distance INCSSP can move the SSP is 2040 bytes, before
+ * it would read the memory. Therefore a single page gap will be enough
+ * to prevent any operation from shifting the SSP to an adjacent stack,
+ * since it would have to land in the gap at least once, causing a
+ * fault.
+ */
+static unsigned long alloc_shstk(unsigned long addr, unsigned long size,
+ unsigned long token_offset, bool set_res_tok)
+{
+ int flags = MAP_ANONYMOUS | MAP_PRIVATE | MAP_ABOVE4G;
+ struct mm_struct *mm = current->mm;
+ unsigned long mapped_addr, unused;
+
+ if (addr)
+ flags |= MAP_FIXED_NOREPLACE;
+
+ mmap_write_lock(mm);
+ mapped_addr = do_mmap(NULL, addr, size, PROT_READ, flags,
+ VM_SHADOW_STACK | VM_WRITE, 0, &unused, NULL);
+ mmap_write_unlock(mm);
+
+ if (!set_res_tok || IS_ERR_VALUE(mapped_addr))
+ goto out;
+
+ if (create_rstor_token(mapped_addr + token_offset, NULL)) {
+ vm_munmap(mapped_addr, size);
+ return -EINVAL;
+ }
+
+out:
+ return mapped_addr;
+}
+
+static unsigned long adjust_shstk_size(unsigned long size)
+{
+ if (size)
+ return PAGE_ALIGN(size);
+
+ return PAGE_ALIGN(min_t(unsigned long long, rlimit(RLIMIT_STACK), SZ_4G));
+}
+
+static void unmap_shadow_stack(u64 base, u64 size)
+{
+ int r;
+
+ r = vm_munmap(base, size);
+
+ /*
+ * mmap_write_lock_killable() failed with -EINTR. This means
+ * the process is about to die and have it's MM cleaned up.
+ * This task shouldn't ever make it back to userspace. In this
+ * case it is ok to leak a shadow stack, so just exit out.
+ */
+ if (r == -EINTR)
+ return;
+
+ /*
+ * For all other types of vm_munmap() failure, either the
+ * system is out of memory or there is bug.
+ */
+ WARN_ON_ONCE(r);
+}
+
+static int shstk_setup(void)
+{
+ struct thread_shstk *shstk = ¤t->thread.shstk;
+ unsigned long addr, size;
+
+ /* Already enabled */
+ if (features_enabled(ARCH_SHSTK_SHSTK))
+ return 0;
+
+ /* Also not supported for 32 bit and x32 */
+ if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK) || in_32bit_syscall())
+ return -EOPNOTSUPP;
+
+ size = adjust_shstk_size(0);
+ addr = alloc_shstk(0, size, 0, false);
+ if (IS_ERR_VALUE(addr))
+ return PTR_ERR((void *)addr);
+
+ fpregs_lock_and_load();
+ wrmsrl(MSR_IA32_PL3_SSP, addr + size);
+ wrmsrl(MSR_IA32_U_CET, CET_SHSTK_EN);
+ fpregs_unlock();
+
+ shstk->base = addr;
+ shstk->size = size;
+ features_set(ARCH_SHSTK_SHSTK);
+
+ return 0;
+}
+
+void reset_thread_features(void)
+{
+ memset(¤t->thread.shstk, 0, sizeof(struct thread_shstk));
+ current->thread.features = 0;
+ current->thread.features_locked = 0;
+}
+
+unsigned long shstk_alloc_thread_stack(struct task_struct *tsk, unsigned long clone_flags,
+ unsigned long stack_size)
+{
+ struct thread_shstk *shstk = &tsk->thread.shstk;
+ unsigned long addr, size;
+
+ /*
+ * If shadow stack is not enabled on the new thread, skip any
+ * switch to a new shadow stack.
+ */
+ if (!features_enabled(ARCH_SHSTK_SHSTK))
+ return 0;
+
+ /*
+ * For CLONE_VM, except vfork, the child needs a separate shadow
+ * stack.
+ */
+ if ((clone_flags & (CLONE_VFORK | CLONE_VM)) != CLONE_VM)
+ return 0;
+
+ size = adjust_shstk_size(stack_size);
+ addr = alloc_shstk(0, size, 0, false);
+ if (IS_ERR_VALUE(addr))
+ return addr;
+
+ shstk->base = addr;
+ shstk->size = size;
+
+ return addr + size;
+}
+
+static unsigned long get_user_shstk_addr(void)
+{
+ unsigned long long ssp;
+
+ fpregs_lock_and_load();
+
+ rdmsrl(MSR_IA32_PL3_SSP, ssp);
+
+ fpregs_unlock();
+
+ return ssp;
+}
+
+#define SHSTK_DATA_BIT BIT(63)
+
+static int put_shstk_data(u64 __user *addr, u64 data)
+{
+ if (WARN_ON_ONCE(data & SHSTK_DATA_BIT))
+ return -EINVAL;
+
+ /*
+ * Mark the high bit so that the sigframe can't be processed as a
+ * return address.
+ */
+ if (write_user_shstk_64(addr, data | SHSTK_DATA_BIT))
+ return -EFAULT;
+ return 0;
+}
+
+static int get_shstk_data(unsigned long *data, unsigned long __user *addr)
+{
+ unsigned long ldata;
+
+ if (unlikely(get_user(ldata, addr)))
+ return -EFAULT;
+
+ if (!(ldata & SHSTK_DATA_BIT))
+ return -EINVAL;
+
+ *data = ldata & ~SHSTK_DATA_BIT;
+
+ return 0;
+}
+
+static int shstk_push_sigframe(unsigned long *ssp)
+{
+ unsigned long target_ssp = *ssp;
+
+ /* Token must be aligned */
+ if (!IS_ALIGNED(target_ssp, 8))
+ return -EINVAL;
+
+ *ssp -= SS_FRAME_SIZE;
+ if (put_shstk_data((void __user *)*ssp, target_ssp))
+ return -EFAULT;
+
+ return 0;
+}
+
+static int shstk_pop_sigframe(unsigned long *ssp)
+{
+ struct vm_area_struct *vma;
+ unsigned long token_addr;
+ bool need_to_check_vma;
+ int err = 1;
+
+ /*
+ * It is possible for the SSP to be off the end of a shadow stack by 4
+ * or 8 bytes. If the shadow stack is at the start of a page or 4 bytes
+ * before it, it might be this case, so check that the address being
+ * read is actually shadow stack.
+ */
+ if (!IS_ALIGNED(*ssp, 8))
+ return -EINVAL;
+
+ need_to_check_vma = PAGE_ALIGN(*ssp) == *ssp;
+
+ if (need_to_check_vma)
+ mmap_read_lock_killable(current->mm);
+
+ err = get_shstk_data(&token_addr, (unsigned long __user *)*ssp);
+ if (unlikely(err))
+ goto out_err;
+
+ if (need_to_check_vma) {
+ vma = find_vma(current->mm, *ssp);
+ if (!vma || !(vma->vm_flags & VM_SHADOW_STACK)) {
+ err = -EFAULT;
+ goto out_err;
+ }
+
+ mmap_read_unlock(current->mm);
+ }
+
+ /* Restore SSP aligned? */
+ if (unlikely(!IS_ALIGNED(token_addr, 8)))
+ return -EINVAL;
+
+ /* SSP in userspace? */
+ if (unlikely(token_addr >= TASK_SIZE_MAX))
+ return -EINVAL;
+
+ *ssp = token_addr;
+
+ return 0;
+out_err:
+ if (need_to_check_vma)
+ mmap_read_unlock(current->mm);
+ return err;
+}
+
+int setup_signal_shadow_stack(struct ksignal *ksig)
+{
+ void __user *restorer = ksig->ka.sa.sa_restorer;
+ unsigned long ssp;
+ int err;
+
+ if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK) ||
+ !features_enabled(ARCH_SHSTK_SHSTK))
+ return 0;
+
+ if (!restorer)
+ return -EINVAL;
+
+ ssp = get_user_shstk_addr();
+ if (unlikely(!ssp))
+ return -EINVAL;
+
+ err = shstk_push_sigframe(&ssp);
+ if (unlikely(err))
+ return err;
+
+ /* Push restorer address */
+ ssp -= SS_FRAME_SIZE;
+ err = write_user_shstk_64((u64 __user *)ssp, (u64)restorer);
+ if (unlikely(err))
+ return -EFAULT;
+
+ fpregs_lock_and_load();
+ wrmsrl(MSR_IA32_PL3_SSP, ssp);
+ fpregs_unlock();
+
+ return 0;
+}
+
+int restore_signal_shadow_stack(void)
+{
+ unsigned long ssp;
+ int err;
+
+ if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK) ||
+ !features_enabled(ARCH_SHSTK_SHSTK))
+ return 0;
+
+ ssp = get_user_shstk_addr();
+ if (unlikely(!ssp))
+ return -EINVAL;
+
+ err = shstk_pop_sigframe(&ssp);
+ if (unlikely(err))
+ return err;
+
+ fpregs_lock_and_load();
+ wrmsrl(MSR_IA32_PL3_SSP, ssp);
+ fpregs_unlock();
+
+ return 0;
+}
+
+void shstk_free(struct task_struct *tsk)
+{
+ struct thread_shstk *shstk = &tsk->thread.shstk;
+
+ if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK) ||
+ !features_enabled(ARCH_SHSTK_SHSTK))
+ return;
+
+ /*
+ * When fork() with CLONE_VM fails, the child (tsk) already has a
+ * shadow stack allocated, and exit_thread() calls this function to
+ * free it. In this case the parent (current) and the child share
+ * the same mm struct.
+ */
+ if (!tsk->mm || tsk->mm != current->mm)
+ return;
+
+ unmap_shadow_stack(shstk->base, shstk->size);
+}
+
+static int wrss_control(bool enable)
+{
+ u64 msrval;
+
+ if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK))
+ return -EOPNOTSUPP;
+
+ /*
+ * Only enable WRSS if shadow stack is enabled. If shadow stack is not
+ * enabled, WRSS will already be disabled, so don't bother clearing it
+ * when disabling.
+ */
+ if (!features_enabled(ARCH_SHSTK_SHSTK))
+ return -EPERM;
+
+ /* Already enabled/disabled? */
+ if (features_enabled(ARCH_SHSTK_WRSS) == enable)
+ return 0;
+
+ fpregs_lock_and_load();
+ rdmsrl(MSR_IA32_U_CET, msrval);
+
+ if (enable) {
+ features_set(ARCH_SHSTK_WRSS);
+ msrval |= CET_WRSS_EN;
+ } else {
+ features_clr(ARCH_SHSTK_WRSS);
+ if (!(msrval & CET_WRSS_EN))
+ goto unlock;
+
+ msrval &= ~CET_WRSS_EN;
+ }
+
+ wrmsrl(MSR_IA32_U_CET, msrval);
+
+unlock:
+ fpregs_unlock();
+
+ return 0;
+}
+
+static int shstk_disable(void)
+{
+ if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK))
+ return -EOPNOTSUPP;
+
+ /* Already disabled? */
+ if (!features_enabled(ARCH_SHSTK_SHSTK))
+ return 0;
+
+ fpregs_lock_and_load();
+ /* Disable WRSS too when disabling shadow stack */
+ wrmsrl(MSR_IA32_U_CET, 0);
+ wrmsrl(MSR_IA32_PL3_SSP, 0);
+ fpregs_unlock();
+
+ shstk_free(current);
+ features_clr(ARCH_SHSTK_SHSTK | ARCH_SHSTK_WRSS);
+
+ return 0;
+}
+
+SYSCALL_DEFINE3(map_shadow_stack, unsigned long, addr, unsigned long, size, unsigned int, flags)
+{
+ bool set_tok = flags & SHADOW_STACK_SET_TOKEN;
+ unsigned long aligned_size;
+
+ if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK))
+ return -EOPNOTSUPP;
+
+ if (flags & ~SHADOW_STACK_SET_TOKEN)
+ return -EINVAL;
+
+ /* If there isn't space for a token */
+ if (set_tok && size < 8)
+ return -ENOSPC;
+
+ if (addr && addr < SZ_4G)
+ return -ERANGE;
+
+ /*
+ * An overflow would result in attempting to write the restore token
+ * to the wrong location. Not catastrophic, but just return the right
+ * error code and block it.
+ */
+ aligned_size = PAGE_ALIGN(size);
+ if (aligned_size < size)
+ return -EOVERFLOW;
+
+ return alloc_shstk(addr, aligned_size, size, set_tok);
+}
+
+long shstk_prctl(struct task_struct *task, int option, unsigned long arg2)
+{
+ unsigned long features = arg2;
+
+ if (option == ARCH_SHSTK_STATUS) {
+ return put_user(task->thread.features, (unsigned long __user *)arg2);
+ }
+
+ if (option == ARCH_SHSTK_LOCK) {
+ task->thread.features_locked |= features;
+ return 0;
+ }
+
+ /* Only allow via ptrace */
+ if (task != current) {
+ if (option == ARCH_SHSTK_UNLOCK && IS_ENABLED(CONFIG_CHECKPOINT_RESTORE)) {
+ task->thread.features_locked &= ~features;
+ return 0;
+ }
+ return -EINVAL;
+ }
+
+ /* Do not allow to change locked features */
+ if (features & task->thread.features_locked)
+ return -EPERM;
+
+ /* Only support enabling/disabling one feature at a time. */
+ if (hweight_long(features) > 1)
+ return -EINVAL;
+
+ if (option == ARCH_SHSTK_DISABLE) {
+ if (features & ARCH_SHSTK_WRSS)
+ return wrss_control(false);
+ if (features & ARCH_SHSTK_SHSTK)
+ return shstk_disable();
+ return -EINVAL;
+ }
+
+ /* Handle ARCH_SHSTK_ENABLE */
+ if (features & ARCH_SHSTK_SHSTK)
+ return shstk_setup();
+ if (features & ARCH_SHSTK_WRSS)
+ return wrss_control(true);
+ return -EINVAL;
+}
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index cfeec3e..65fe209 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -40,6 +40,7 @@
#include <asm/syscall.h>
#include <asm/sigframe.h>
#include <asm/signal.h>
+#include <asm/shstk.h>
static inline int is_ia32_compat_frame(struct ksignal *ksig)
{
diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c
index 9027fc0..c12624b 100644
--- a/arch/x86/kernel/signal_32.c
+++ b/arch/x86/kernel/signal_32.c
@@ -402,7 +402,7 @@ int ia32_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs)
*/
static_assert(NSIGILL == 11);
static_assert(NSIGFPE == 15);
-static_assert(NSIGSEGV == 9);
+static_assert(NSIGSEGV == 10);
static_assert(NSIGBUS == 5);
static_assert(NSIGTRAP == 6);
static_assert(NSIGCHLD == 6);
diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c
index 13a1e60..cacf2ed 100644
--- a/arch/x86/kernel/signal_64.c
+++ b/arch/x86/kernel/signal_64.c
@@ -175,6 +175,9 @@ int x64_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs)
frame = get_sigframe(ksig, regs, sizeof(struct rt_sigframe), &fp);
uc_flags = frame_uc_flags(regs);
+ if (setup_signal_shadow_stack(ksig))
+ return -EFAULT;
+
if (!user_access_begin(frame, sizeof(*frame)))
return -EFAULT;
@@ -260,6 +263,9 @@ SYSCALL_DEFINE0(rt_sigreturn)
if (!restore_sigcontext(regs, &frame->uc.uc_mcontext, uc_flags))
goto badframe;
+ if (restore_signal_shadow_stack())
+ goto badframe;
+
if (restore_altstack(&frame->uc.uc_stack))
goto badframe;
@@ -403,7 +409,7 @@ void sigaction_compat_abi(struct k_sigaction *act, struct k_sigaction *oact)
*/
static_assert(NSIGILL == 11);
static_assert(NSIGFPE == 15);
-static_assert(NSIGSEGV == 9);
+static_assert(NSIGSEGV == 10);
static_assert(NSIGBUS == 5);
static_assert(NSIGTRAP == 6);
static_assert(NSIGCHLD == 6);
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c
index 8cc653f..c783aeb 100644
--- a/arch/x86/kernel/sys_x86_64.c
+++ b/arch/x86/kernel/sys_x86_64.c
@@ -193,7 +193,11 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
info.flags = VM_UNMAPPED_AREA_TOPDOWN;
info.length = len;
- info.low_limit = PAGE_SIZE;
+ if (!in_32bit_syscall() && (flags & MAP_ABOVE4G))
+ info.low_limit = SZ_4G;
+ else
+ info.low_limit = PAGE_SIZE;
+
info.high_limit = get_mmap_base(0);
/*
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 4a817d2..c876f1d 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -77,18 +77,6 @@
DECLARE_BITMAP(system_vectors, NR_VECTORS);
-static inline void cond_local_irq_enable(struct pt_regs *regs)
-{
- if (regs->flags & X86_EFLAGS_IF)
- local_irq_enable();
-}
-
-static inline void cond_local_irq_disable(struct pt_regs *regs)
-{
- if (regs->flags & X86_EFLAGS_IF)
- local_irq_disable();
-}
-
__always_inline int is_valid_bugaddr(unsigned long addr)
{
if (addr < TASK_SIZE_MAX)
@@ -213,81 +201,6 @@ DEFINE_IDTENTRY(exc_overflow)
do_error_trap(regs, 0, "overflow", X86_TRAP_OF, SIGSEGV, 0, NULL);
}
-#ifdef CONFIG_X86_KERNEL_IBT
-
-static __ro_after_init bool ibt_fatal = true;
-
-extern void ibt_selftest_ip(void); /* code label defined in asm below */
-
-enum cp_error_code {
- CP_EC = (1 << 15) - 1,
-
- CP_RET = 1,
- CP_IRET = 2,
- CP_ENDBR = 3,
- CP_RSTRORSSP = 4,
- CP_SETSSBSY = 5,
-
- CP_ENCL = 1 << 15,
-};
-
-DEFINE_IDTENTRY_ERRORCODE(exc_control_protection)
-{
- if (!cpu_feature_enabled(X86_FEATURE_IBT)) {
- pr_err("Unexpected #CP\n");
- BUG();
- }
-
- if (WARN_ON_ONCE(user_mode(regs) || (error_code & CP_EC) != CP_ENDBR))
- return;
-
- if (unlikely(regs->ip == (unsigned long)&ibt_selftest_ip)) {
- regs->ax = 0;
- return;
- }
-
- pr_err("Missing ENDBR: %pS\n", (void *)instruction_pointer(regs));
- if (!ibt_fatal) {
- printk(KERN_DEFAULT CUT_HERE);
- __warn(__FILE__, __LINE__, (void *)regs->ip, TAINT_WARN, regs, NULL);
- return;
- }
- BUG();
-}
-
-/* Must be noinline to ensure uniqueness of ibt_selftest_ip. */
-noinline bool ibt_selftest(void)
-{
- unsigned long ret;
-
- asm (" lea ibt_selftest_ip(%%rip), %%rax\n\t"
- ANNOTATE_RETPOLINE_SAFE
- " jmp *%%rax\n\t"
- "ibt_selftest_ip:\n\t"
- UNWIND_HINT_FUNC
- ANNOTATE_NOENDBR
- " nop\n\t"
-
- : "=a" (ret) : : "memory");
-
- return !ret;
-}
-
-static int __init ibt_setup(char *str)
-{
- if (!strcmp(str, "off"))
- setup_clear_cpu_cap(X86_FEATURE_IBT);
-
- if (!strcmp(str, "warn"))
- ibt_fatal = false;
-
- return 1;
-}
-
-__setup("ibt=", ibt_setup);
-
-#endif /* CONFIG_X86_KERNEL_IBT */
-
#ifdef CONFIG_X86_F00F_BUG
void handle_invalid_op(struct pt_regs *regs)
#else
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 2e861b9..ab778ea 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1112,8 +1112,22 @@ access_error(unsigned long error_code, struct vm_area_struct *vma)
(error_code & X86_PF_INSTR), foreign))
return 1;
+ /*
+ * Shadow stack accesses (PF_SHSTK=1) are only permitted to
+ * shadow stack VMAs. All other accesses result in an error.
+ */
+ if (error_code & X86_PF_SHSTK) {
+ if (unlikely(!(vma->vm_flags & VM_SHADOW_STACK)))
+ return 1;
+ if (unlikely(!(vma->vm_flags & VM_WRITE)))
+ return 1;
+ return 0;
+ }
+
if (error_code & X86_PF_WRITE) {
/* write, present and write, not present: */
+ if (unlikely(vma->vm_flags & VM_SHADOW_STACK))
+ return 1;
if (unlikely(!(vma->vm_flags & VM_WRITE)))
return 1;
return 0;
@@ -1305,6 +1319,14 @@ void do_user_addr_fault(struct pt_regs *regs,
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
+ /*
+ * Read-only permissions can not be expressed in shadow stack PTEs.
+ * Treat all shadow stack accesses as WRITE faults. This ensures
+ * that the MM will prepare everything (e.g., break COW) such that
+ * maybe_mkwrite() can create a proper shadow stack PTE.
+ */
+ if (error_code & X86_PF_SHSTK)
+ flags |= FAULT_FLAG_WRITE;
if (error_code & X86_PF_WRITE)
flags |= FAULT_FLAG_WRITE;
if (error_code & X86_PF_INSTR)
diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
index df4182b..bda9f12 100644
--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -2074,12 +2074,12 @@ int set_memory_nx(unsigned long addr, int numpages)
int set_memory_ro(unsigned long addr, int numpages)
{
- return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_RW), 0);
+ return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_RW | _PAGE_DIRTY), 0);
}
int set_memory_rox(unsigned long addr, int numpages)
{
- pgprot_t clr = __pgprot(_PAGE_RW);
+ pgprot_t clr = __pgprot(_PAGE_RW | _PAGE_DIRTY);
if (__supported_pte_mask & _PAGE_NX)
clr.pgprot |= _PAGE_NX;
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index d3a93e8..9deadf5 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -881,3 +881,43 @@ int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
#endif /* CONFIG_X86_64 */
#endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */
+
+pte_t pte_mkwrite(pte_t pte, struct vm_area_struct *vma)
+{
+ if (vma->vm_flags & VM_SHADOW_STACK)
+ return pte_mkwrite_shstk(pte);
+
+ pte = pte_mkwrite_novma(pte);
+
+ return pte_clear_saveddirty(pte);
+}
+
+pmd_t pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma)
+{
+ if (vma->vm_flags & VM_SHADOW_STACK)
+ return pmd_mkwrite_shstk(pmd);
+
+ pmd = pmd_mkwrite_novma(pmd);
+
+ return pmd_clear_saveddirty(pmd);
+}
+
+void arch_check_zapped_pte(struct vm_area_struct *vma, pte_t pte)
+{
+ /*
+ * Hardware before shadow stack can (rarely) set Dirty=1
+ * on a Write=0 PTE. So the below condition
+ * only indicates a software bug when shadow stack is
+ * supported by the HW. This checking is covered in
+ * pte_shstk().
+ */
+ VM_WARN_ON_ONCE(!(vma->vm_flags & VM_SHADOW_STACK) &&
+ pte_shstk(pte));
+}
+
+void arch_check_zapped_pmd(struct vm_area_struct *vma, pmd_t pmd)
+{
+ /* See note in arch_check_zapped_pte() */
+ VM_WARN_ON_ONCE(!(vma->vm_flags & VM_SHADOW_STACK) &&
+ pmd_shstk(pmd));
+}
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index 3e04f2b..49352fa 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -694,7 +694,7 @@ static struct trap_array_entry trap_array[] = {
TRAP_ENTRY(exc_coprocessor_error, false ),
TRAP_ENTRY(exc_alignment_check, false ),
TRAP_ENTRY(exc_simd_coprocessor_error, false ),
-#ifdef CONFIG_X86_KERNEL_IBT
+#ifdef CONFIG_X86_CET
TRAP_ENTRY(exc_control_protection, false ),
#endif
};
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index 1b5cba7..1652c39 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -166,7 +166,7 @@ void make_lowmem_page_readwrite(void *vaddr)
if (pte == NULL)
return; /* vaddr missing */
- ptev = pte_mkwrite(*pte);
+ ptev = pte_mkwrite_novma(*pte);
if (HYPERVISOR_update_va_mapping(address, ptev, 0))
BUG();
diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S
index 08f1ceb..9e5e680 100644
--- a/arch/x86/xen/xen-asm.S
+++ b/arch/x86/xen/xen-asm.S
@@ -148,7 +148,7 @@
xen_pv_trap asm_exc_spurious_interrupt_bug
xen_pv_trap asm_exc_coprocessor_error
xen_pv_trap asm_exc_alignment_check
-#ifdef CONFIG_X86_KERNEL_IBT
+#ifdef CONFIG_X86_CET
xen_pv_trap asm_exc_control_protection
#endif
#ifdef CONFIG_X86_MCE
diff --git a/arch/xtensa/include/asm/pgtable.h b/arch/xtensa/include/asm/pgtable.h
index ef79cb6..9a7e5e5 100644
--- a/arch/xtensa/include/asm/pgtable.h
+++ b/arch/xtensa/include/asm/pgtable.h
@@ -262,7 +262,7 @@ static inline pte_t pte_mkdirty(pte_t pte)
{ pte_val(pte) |= _PAGE_DIRTY; return pte; }
static inline pte_t pte_mkyoung(pte_t pte)
{ pte_val(pte) |= _PAGE_ACCESSED; return pte; }
-static inline pte_t pte_mkwrite(pte_t pte)
+static inline pte_t pte_mkwrite_novma(pte_t pte)
{ pte_val(pte) |= _PAGE_WRITABLE; return pte; }
#define pgprot_noncached(prot) \
diff --git a/drivers/cpuidle/cpuidle-pseries.c b/drivers/cpuidle/cpuidle-pseries.c
index a7d33f3..14db9b7 100644
--- a/drivers/cpuidle/cpuidle-pseries.c
+++ b/drivers/cpuidle/cpuidle-pseries.c
@@ -414,13 +414,7 @@ static int __init pseries_idle_probe(void)
return -ENODEV;
if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
- /*
- * Use local_paca instead of get_lppaca() since
- * preemption is not disabled, and it is not required in
- * fact, since lppaca_ptr does not need to be the value
- * associated to the current CPU, it can be from any CPU.
- */
- if (lppaca_shared_proc(local_paca->lppaca_ptr)) {
+ if (lppaca_shared_proc()) {
cpuidle_state_table = shared_states;
max_idle_state = ARRAY_SIZE(shared_states);
} else {
diff --git a/drivers/macintosh/ams/ams-core.c b/drivers/macintosh/ams/ams-core.c
index 877e8cb..c978b42 100644
--- a/drivers/macintosh/ams/ams-core.c
+++ b/drivers/macintosh/ams/ams-core.c
@@ -176,7 +176,7 @@ int ams_sensor_attach(void)
return result;
}
-int __init ams_init(void)
+static int __init ams_init(void)
{
struct device_node *np;
diff --git a/drivers/macintosh/ams/ams.h b/drivers/macintosh/ams/ams.h
index e053c15..5b295f5 100644
--- a/drivers/macintosh/ams/ams.h
+++ b/drivers/macintosh/ams/ams.h
@@ -6,6 +6,7 @@
#include <linux/input.h>
#include <linux/kthread.h>
#include <linux/mutex.h>
+#include <linux/platform_device.h>
#include <linux/spinlock.h>
#include <linux/types.h>
diff --git a/drivers/misc/cxl/native.c b/drivers/misc/cxl/native.c
index 50b0c44..fbe16a6 100644
--- a/drivers/misc/cxl/native.c
+++ b/drivers/misc/cxl/native.c
@@ -269,11 +269,6 @@ static void attach_spa(struct cxl_afu *afu)
cxl_p1n_write(afu, CXL_PSL_SPAP_An, spap);
}
-static inline void detach_spa(struct cxl_afu *afu)
-{
- cxl_p1n_write(afu, CXL_PSL_SPAP_An, 0);
-}
-
void cxl_release_spa(struct cxl_afu *afu)
{
if (afu->native->spa) {
diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
index 0ff9448..4cf9e7c 100644
--- a/drivers/misc/cxl/pci.c
+++ b/drivers/misc/cxl/pci.c
@@ -150,16 +150,7 @@ static inline resource_size_t p2_size(struct pci_dev *dev)
static int find_cxl_vsec(struct pci_dev *dev)
{
- int vsec = 0;
- u16 val;
-
- while ((vsec = pci_find_next_ext_capability(dev, vsec, PCI_EXT_CAP_ID_VNDR))) {
- pci_read_config_word(dev, vsec + 0x4, &val);
- if (val == CXL_PCI_VSEC_ID)
- return vsec;
- }
- return 0;
-
+ return pci_find_vsec_capability(dev, PCI_VENDOR_ID_IBM, CXL_PCI_VSEC_ID);
}
static void dump_cxl_config_space(struct pci_dev *dev)
diff --git a/drivers/net/ethernet/freescale/fs_enet/fs_enet.h b/drivers/net/ethernet/freescale/fs_enet/fs_enet.h
index 759bb70..21c07ac 100644
--- a/drivers/net/ethernet/freescale/fs_enet/fs_enet.h
+++ b/drivers/net/ethernet/freescale/fs_enet/fs_enet.h
@@ -10,8 +10,6 @@
#include <linux/phy.h>
#include <linux/dma-mapping.h>
-#include <asm/fs_pd.h>
-
#ifdef CONFIG_CPM1
#include <asm/cpm1.h>
#endif
diff --git a/drivers/net/ethernet/freescale/fs_enet/mac-fcc.c b/drivers/net/ethernet/freescale/fs_enet/mac-fcc.c
index d903a90..e2ffac9 100644
--- a/drivers/net/ethernet/freescale/fs_enet/mac-fcc.c
+++ b/drivers/net/ethernet/freescale/fs_enet/mac-fcc.c
@@ -37,7 +37,6 @@
#include <linux/pgtable.h>
#include <asm/immap_cpm2.h>
-#include <asm/mpc8260.h>
#include <asm/cpm2.h>
#include <asm/irq.h>
diff --git a/drivers/pci/hotplug/rpaphp_pci.c b/drivers/pci/hotplug/rpaphp_pci.c
index 630f770..bcfd26e 100644
--- a/drivers/pci/hotplug/rpaphp_pci.c
+++ b/drivers/pci/hotplug/rpaphp_pci.c
@@ -19,12 +19,92 @@
#include "../pci.h" /* for pci_add_new_bus */
#include "rpaphp.h"
+/*
+ * RTAS call get-sensor-state(DR_ENTITY_SENSE) return values as per PAPR:
+ * -- generic return codes ---
+ * -1: Hardware Error
+ * -2: RTAS_BUSY
+ * -3: Invalid sensor. RTAS Parameter Error.
+ * -- rtas_get_sensor function specific return codes ---
+ * -9000: Need DR entity to be powered up and unisolated before RTAS call
+ * -9001: Need DR entity to be powered up, but not unisolated, before RTAS call
+ * -9002: DR entity unusable
+ * 990x: Extended delay - where x is a number in the range of 0-5
+ */
+#define RTAS_SLOT_UNISOLATED -9000
+#define RTAS_SLOT_NOT_UNISOLATED -9001
+#define RTAS_SLOT_NOT_USABLE -9002
+
+static int rtas_get_sensor_errno(int rtas_rc)
+{
+ switch (rtas_rc) {
+ case 0:
+ /* Success case */
+ return 0;
+ case RTAS_SLOT_UNISOLATED:
+ case RTAS_SLOT_NOT_UNISOLATED:
+ return -EFAULT;
+ case RTAS_SLOT_NOT_USABLE:
+ return -ENODEV;
+ case RTAS_BUSY:
+ case RTAS_EXTENDED_DELAY_MIN...RTAS_EXTENDED_DELAY_MAX:
+ return -EBUSY;
+ default:
+ return rtas_error_rc(rtas_rc);
+ }
+}
+
+/*
+ * get_adapter_status() can be called by the EEH handler during EEH recovery.
+ * On certain PHB failures, the RTAS call rtas_call(get-sensor-state) returns
+ * extended busy error (9902) until PHB is recovered by pHyp. The RTAS call
+ * interface rtas_get_sensor() loops over the RTAS call on extended delay
+ * return code (9902) until the return value is either success (0) or error
+ * (-1). This causes the EEH handler to get stuck for ~6 seconds before it
+ * could notify that the PCI error has been detected and stop any active
+ * operations. This sometimes causes EEH recovery to fail. To avoid this issue,
+ * invoke rtas_call(get-sensor-state) directly if the respective PE is in EEH
+ * recovery state and return -EBUSY error based on RTAS return status. This
+ * will help the EEH handler to notify the driver about the PCI error
+ * immediately and successfully proceed with EEH recovery steps.
+ */
+
+static int __rpaphp_get_sensor_state(struct slot *slot, int *state)
+{
+ int rc;
+ int token = rtas_token("get-sensor-state");
+ struct pci_dn *pdn;
+ struct eeh_pe *pe;
+ struct pci_controller *phb = PCI_DN(slot->dn)->phb;
+
+ if (token == RTAS_UNKNOWN_SERVICE)
+ return -ENOENT;
+
+ /*
+ * Fallback to existing method for empty slot or PE isn't in EEH
+ * recovery.
+ */
+ pdn = list_first_entry_or_null(&PCI_DN(phb->dn)->child_list,
+ struct pci_dn, list);
+ if (!pdn)
+ goto fallback;
+
+ pe = eeh_dev_to_pe(pdn->edev);
+ if (pe && (pe->state & EEH_PE_RECOVERING)) {
+ rc = rtas_call(token, 2, 2, state, DR_ENTITY_SENSE,
+ slot->index);
+ return rtas_get_sensor_errno(rc);
+ }
+fallback:
+ return rtas_get_sensor(DR_ENTITY_SENSE, slot->index, state);
+}
+
int rpaphp_get_sensor_state(struct slot *slot, int *state)
{
int rc;
int setlevel;
- rc = rtas_get_sensor(DR_ENTITY_SENSE, slot->index, state);
+ rc = __rpaphp_get_sensor_state(slot, state);
if (rc < 0) {
if (rc == -EFAULT || rc == -EEXIST) {
@@ -40,8 +120,7 @@ int rpaphp_get_sensor_state(struct slot *slot, int *state)
dbg("%s: power on slot[%s] failed rc=%d.\n",
__func__, slot->name, rc);
} else {
- rc = rtas_get_sensor(DR_ENTITY_SENSE,
- slot->index, state);
+ rc = __rpaphp_get_sensor_state(slot, state);
}
} else if (rc == -ENODEV)
info("%s: slot is unusable\n", __func__);
diff --git a/fs/aio.c b/fs/aio.c
index b3174da..a4c2a6b 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -558,7 +558,7 @@ static int aio_setup_ring(struct kioctx *ctx, unsigned int nr_events)
ctx->mmap_base = do_mmap(ctx->aio_ring_file, 0, ctx->mmap_size,
PROT_READ | PROT_WRITE,
- MAP_SHARED, 0, &unused, NULL);
+ MAP_SHARED, 0, 0, &unused, NULL);
mmap_write_unlock(mm);
if (IS_ERR((void *)ctx->mmap_base)) {
ctx->mmap_size = 0;
diff --git a/fs/proc/array.c b/fs/proc/array.c
index d35bbf3..2c2efbe 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -431,6 +431,11 @@ static inline void task_untag_mask(struct seq_file *m, struct mm_struct *mm)
seq_printf(m, "untag_mask:\t%#lx\n", mm_untag_mask(mm));
}
+__weak void arch_proc_pid_thread_features(struct seq_file *m,
+ struct task_struct *task)
+{
+}
+
int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
struct pid *pid, struct task_struct *task)
{
@@ -455,6 +460,7 @@ int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
task_cpus_allowed(m, task);
cpuset_task_status_allowed(m, task);
task_context_switch_counts(m, task);
+ arch_proc_pid_thread_features(m, task);
return 0;
}
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 15ddf46..0f2aa36 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -692,6 +692,9 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma)
#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_MINOR
[ilog2(VM_UFFD_MINOR)] = "ui",
#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_MINOR */
+#ifdef CONFIG_X86_USER_SHADOW_STACK
+ [ilog2(VM_SHADOW_STACK)] = "ss",
+#endif
};
size_t i;
diff --git a/include/asm-generic/hugetlb.h b/include/asm-generic/hugetlb.h
index d7f6335..4da0279 100644
--- a/include/asm-generic/hugetlb.h
+++ b/include/asm-generic/hugetlb.h
@@ -22,7 +22,7 @@ static inline unsigned long huge_pte_dirty(pte_t pte)
static inline pte_t huge_pte_mkwrite(pte_t pte)
{
- return pte_mkwrite(pte);
+ return pte_mkwrite_novma(pte);
}
#ifndef __HAVE_ARCH_HUGE_PTE_WRPROTECT
diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h
index 7fbb459..db199d6 100644
--- a/include/linux/hw_breakpoint.h
+++ b/include/linux/hw_breakpoint.h
@@ -90,9 +90,6 @@ extern int dbg_reserve_bp_slot(struct perf_event *bp);
extern int dbg_release_bp_slot(struct perf_event *bp);
extern int reserve_bp_slot(struct perf_event *bp);
extern void release_bp_slot(struct perf_event *bp);
-int arch_reserve_bp_slot(struct perf_event *bp);
-void arch_release_bp_slot(struct perf_event *bp);
-void arch_unregister_hw_breakpoint(struct perf_event *bp);
extern void flush_ptrace_hw_breakpoint(struct task_struct *tsk);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 20e6d1d..bf5d0b1 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -319,11 +319,13 @@ extern unsigned int kobjsize(const void *objp);
#define VM_HIGH_ARCH_BIT_2 34 /* bit only usable on 64-bit architectures */
#define VM_HIGH_ARCH_BIT_3 35 /* bit only usable on 64-bit architectures */
#define VM_HIGH_ARCH_BIT_4 36 /* bit only usable on 64-bit architectures */
+#define VM_HIGH_ARCH_BIT_5 37 /* bit only usable on 64-bit architectures */
#define VM_HIGH_ARCH_0 BIT(VM_HIGH_ARCH_BIT_0)
#define VM_HIGH_ARCH_1 BIT(VM_HIGH_ARCH_BIT_1)
#define VM_HIGH_ARCH_2 BIT(VM_HIGH_ARCH_BIT_2)
#define VM_HIGH_ARCH_3 BIT(VM_HIGH_ARCH_BIT_3)
#define VM_HIGH_ARCH_4 BIT(VM_HIGH_ARCH_BIT_4)
+#define VM_HIGH_ARCH_5 BIT(VM_HIGH_ARCH_BIT_5)
#endif /* CONFIG_ARCH_USES_HIGH_VMA_FLAGS */
#ifdef CONFIG_ARCH_HAS_PKEYS
@@ -339,6 +341,21 @@ extern unsigned int kobjsize(const void *objp);
#endif
#endif /* CONFIG_ARCH_HAS_PKEYS */
+#ifdef CONFIG_X86_USER_SHADOW_STACK
+/*
+ * VM_SHADOW_STACK should not be set with VM_SHARED because of lack of
+ * support core mm.
+ *
+ * These VMAs will get a single end guard page. This helps userspace protect
+ * itself from attacks. A single page is enough for current shadow stack archs
+ * (x86). See the comments near alloc_shstk() in arch/x86/kernel/shstk.c
+ * for more details on the guard size.
+ */
+# define VM_SHADOW_STACK VM_HIGH_ARCH_5
+#else
+# define VM_SHADOW_STACK VM_NONE
+#endif
+
#if defined(CONFIG_X86)
# define VM_PAT VM_ARCH_1 /* PAT reserves whole VMA at once (x86) */
#elif defined(CONFIG_PPC)
@@ -370,7 +387,7 @@ extern unsigned int kobjsize(const void *objp);
#endif
#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_MINOR
-# define VM_UFFD_MINOR_BIT 37
+# define VM_UFFD_MINOR_BIT 38
# define VM_UFFD_MINOR BIT(VM_UFFD_MINOR_BIT) /* UFFD minor faults */
#else /* !CONFIG_HAVE_ARCH_USERFAULTFD_MINOR */
# define VM_UFFD_MINOR VM_NONE
@@ -397,6 +414,8 @@ extern unsigned int kobjsize(const void *objp);
#define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS
#endif
+#define VM_STARTGAP_FLAGS (VM_GROWSDOWN | VM_SHADOW_STACK)
+
#ifdef CONFIG_STACK_GROWSUP
#define VM_STACK VM_GROWSUP
#define VM_STACK_EARLY VM_GROWSDOWN
@@ -1309,7 +1328,7 @@ static inline unsigned long thp_size(struct page *page)
static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
{
if (likely(vma->vm_flags & VM_WRITE))
- pte = pte_mkwrite(pte);
+ pte = pte_mkwrite(pte, vma);
return pte;
}
@@ -3265,7 +3284,8 @@ extern unsigned long mmap_region(struct file *file, unsigned long addr,
struct list_head *uf);
extern unsigned long do_mmap(struct file *file, unsigned long addr,
unsigned long len, unsigned long prot, unsigned long flags,
- unsigned long pgoff, unsigned long *populate, struct list_head *uf);
+ vm_flags_t vm_flags, unsigned long pgoff, unsigned long *populate,
+ struct list_head *uf);
extern int do_vmi_munmap(struct vma_iterator *vmi, struct mm_struct *mm,
unsigned long start, size_t len, struct list_head *uf,
bool unlock);
@@ -3353,15 +3373,26 @@ struct vm_area_struct *vma_lookup(struct mm_struct *mm, unsigned long addr)
return mtree_load(&mm->mm_mt, addr);
}
+static inline unsigned long stack_guard_start_gap(struct vm_area_struct *vma)
+{
+ if (vma->vm_flags & VM_GROWSDOWN)
+ return stack_guard_gap;
+
+ /* See reasoning around the VM_SHADOW_STACK definition */
+ if (vma->vm_flags & VM_SHADOW_STACK)
+ return PAGE_SIZE;
+
+ return 0;
+}
+
static inline unsigned long vm_start_gap(struct vm_area_struct *vma)
{
+ unsigned long gap = stack_guard_start_gap(vma);
unsigned long vm_start = vma->vm_start;
- if (vma->vm_flags & VM_GROWSDOWN) {
- vm_start -= stack_guard_gap;
- if (vm_start > vma->vm_start)
- vm_start = 0;
- }
+ vm_start -= gap;
+ if (vm_start > vma->vm_start)
+ vm_start = 0;
return vm_start;
}
diff --git a/include/linux/mman.h b/include/linux/mman.h
index cee1e4b..40d9441 100644
--- a/include/linux/mman.h
+++ b/include/linux/mman.h
@@ -15,6 +15,9 @@
#ifndef MAP_32BIT
#define MAP_32BIT 0
#endif
+#ifndef MAP_ABOVE4G
+#define MAP_ABOVE4G 0
+#endif
#ifndef MAP_HUGE_2MB
#define MAP_HUGE_2MB 0
#endif
@@ -50,6 +53,7 @@
| MAP_STACK \
| MAP_HUGETLB \
| MAP_32BIT \
+ | MAP_ABOVE4G \
| MAP_HUGE_2MB \
| MAP_HUGE_1GB)
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index f49abcf..1fba072 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -371,6 +371,20 @@ static inline bool arch_has_hw_pte_young(void)
}
#endif
+#ifndef arch_check_zapped_pte
+static inline void arch_check_zapped_pte(struct vm_area_struct *vma,
+ pte_t pte)
+{
+}
+#endif
+
+#ifndef arch_check_zapped_pmd
+static inline void arch_check_zapped_pmd(struct vm_area_struct *vma,
+ pmd_t pmd)
+{
+}
+#endif
+
#ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR
static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
unsigned long address,
@@ -577,6 +591,20 @@ extern pud_t pudp_huge_clear_flush(struct vm_area_struct *vma,
pud_t *pudp);
#endif
+#ifndef pte_mkwrite
+static inline pte_t pte_mkwrite(pte_t pte, struct vm_area_struct *vma)
+{
+ return pte_mkwrite_novma(pte);
+}
+#endif
+
+#if defined(CONFIG_ARCH_WANT_PMD_MKWRITE) && !defined(pmd_mkwrite)
+static inline pmd_t pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma)
+{
+ return pmd_mkwrite_novma(pmd);
+}
+#endif
+
#ifndef __HAVE_ARCH_PTEP_SET_WRPROTECT
struct mm_struct;
static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long address, pte_t *ptep)
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 253f267..de407e7 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -159,6 +159,7 @@ int proc_pid_arch_status(struct seq_file *m, struct pid_namespace *ns,
#endif /* CONFIG_PROC_PID_ARCH_STATUS */
void arch_report_meminfo(struct seq_file *m);
+void arch_proc_pid_thread_features(struct seq_file *m, struct task_struct *task);
#else /* CONFIG_PROC_FS */
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index c0cb22c..22bc6bc 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -939,6 +939,7 @@ asmlinkage long sys_set_mempolicy_home_node(unsigned long start, unsigned long l
asmlinkage long sys_cachestat(unsigned int fd,
struct cachestat_range __user *cstat_range,
struct cachestat __user *cstat, unsigned int flags);
+asmlinkage long sys_map_shadow_stack(unsigned long addr, unsigned long size, unsigned int flags);
/*
* Architecture-specific system calls
diff --git a/include/uapi/asm-generic/siginfo.h b/include/uapi/asm-generic/siginfo.h
index ffbe4ce..0f52d0a 100644
--- a/include/uapi/asm-generic/siginfo.h
+++ b/include/uapi/asm-generic/siginfo.h
@@ -242,7 +242,8 @@ typedef struct siginfo {
#define SEGV_ADIPERR 7 /* Precise MCD exception */
#define SEGV_MTEAERR 8 /* Asynchronous ARM MTE error */
#define SEGV_MTESERR 9 /* Synchronous ARM MTE exception */
-#define NSIGSEGV 9
+#define SEGV_CPERR 10 /* Control protection fault */
+#define NSIGSEGV 10
/*
* SIGBUS si_codes
diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h
index e0e1591..ee0bcff1 100644
--- a/include/uapi/linux/elf.h
+++ b/include/uapi/linux/elf.h
@@ -409,6 +409,8 @@ typedef struct elf64_shdr {
#define NT_386_TLS 0x200 /* i386 TLS slots (struct user_desc) */
#define NT_386_IOPERM 0x201 /* x86 io permission bitmap (1=deny) */
#define NT_X86_XSTATE 0x202 /* x86 extended state using xsave */
+/* Old binutils treats 0x203 as a CET state */
+#define NT_X86_SHSTK 0x204 /* x86 SHSTK state */
#define NT_S390_HIGH_GPRS 0x300 /* s390 upper register halves */
#define NT_S390_TIMER 0x301 /* s390 timer register */
#define NT_S390_TODCMP 0x302 /* s390 TOD clock comparator register */
diff --git a/ipc/shm.c b/ipc/shm.c
index 60e45e7..576a543 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -1662,7 +1662,7 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg,
goto invalid;
}
- addr = do_mmap(file, addr, size, prot, flags, 0, &populate, NULL);
+ addr = do_mmap(file, addr, size, prot, flags, 0, 0, &populate, NULL);
*raddr = addr;
err = 0;
if (IS_ERR_VALUE(addr))
diff --git a/kernel/events/hw_breakpoint.c b/kernel/events/hw_breakpoint.c
index c379770..6c2cb4e 100644
--- a/kernel/events/hw_breakpoint.c
+++ b/kernel/events/hw_breakpoint.c
@@ -523,26 +523,6 @@ toggle_bp_slot(struct perf_event *bp, bool enable, enum bp_type_idx type, int we
return 0;
}
-__weak int arch_reserve_bp_slot(struct perf_event *bp)
-{
- return 0;
-}
-
-__weak void arch_release_bp_slot(struct perf_event *bp)
-{
-}
-
-/*
- * Function to perform processor-specific cleanup during unregistration
- */
-__weak void arch_unregister_hw_breakpoint(struct perf_event *bp)
-{
- /*
- * A weak stub function here for those archs that don't define
- * it inside arch/.../kernel/hw_breakpoint.c
- */
-}
-
/*
* Constraints to check before allowing this new breakpoint counter.
*
@@ -594,7 +574,6 @@ static int __reserve_bp_slot(struct perf_event *bp, u64 bp_type)
enum bp_type_idx type;
int max_pinned_slots;
int weight;
- int ret;
/* We couldn't initialize breakpoint constraints on boot */
if (!constraints_initialized)
@@ -613,10 +592,6 @@ static int __reserve_bp_slot(struct perf_event *bp, u64 bp_type)
if (max_pinned_slots > hw_breakpoint_slots_cached(type))
return -ENOSPC;
- ret = arch_reserve_bp_slot(bp);
- if (ret)
- return ret;
-
return toggle_bp_slot(bp, true, type, weight);
}
@@ -634,8 +609,6 @@ static void __release_bp_slot(struct perf_event *bp, u64 bp_type)
enum bp_type_idx type;
int weight;
- arch_release_bp_slot(bp);
-
type = find_slot_idx(bp_type);
weight = hw_breakpoint_weight(bp);
WARN_ON(toggle_bp_slot(bp, false, type, weight));
@@ -645,7 +618,6 @@ void release_bp_slot(struct perf_event *bp)
{
struct mutex *mtx = bp_constraints_lock(bp);
- arch_unregister_hw_breakpoint(bp);
__release_bp_slot(bp, bp->attr.bp_type);
bp_constraints_unlock(mtx);
}
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 781de7c..e137c13 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -274,6 +274,7 @@ COND_SYSCALL(vm86old);
COND_SYSCALL(modify_ldt);
COND_SYSCALL(vm86);
COND_SYSCALL(kexec_file_load);
+COND_SYSCALL(map_shadow_stack);
/* s390 */
COND_SYSCALL(s390_pci_mmio_read);
diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
index d61eaa0..48e329e 100644
--- a/mm/debug_vm_pgtable.c
+++ b/mm/debug_vm_pgtable.c
@@ -109,10 +109,10 @@ static void __init pte_basic_tests(struct pgtable_debug_args *args, int idx)
WARN_ON(!pte_same(pte, pte));
WARN_ON(!pte_young(pte_mkyoung(pte_mkold(pte))));
WARN_ON(!pte_dirty(pte_mkdirty(pte_mkclean(pte))));
- WARN_ON(!pte_write(pte_mkwrite(pte_wrprotect(pte))));
+ WARN_ON(!pte_write(pte_mkwrite(pte_wrprotect(pte), args->vma)));
WARN_ON(pte_young(pte_mkold(pte_mkyoung(pte))));
WARN_ON(pte_dirty(pte_mkclean(pte_mkdirty(pte))));
- WARN_ON(pte_write(pte_wrprotect(pte_mkwrite(pte))));
+ WARN_ON(pte_write(pte_wrprotect(pte_mkwrite(pte, args->vma))));
WARN_ON(pte_dirty(pte_wrprotect(pte_mkclean(pte))));
WARN_ON(!pte_dirty(pte_wrprotect(pte_mkdirty(pte))));
}
@@ -156,7 +156,7 @@ static void __init pte_advanced_tests(struct pgtable_debug_args *args)
pte = pte_mkclean(pte);
set_pte_at(args->mm, args->vaddr, args->ptep, pte);
flush_dcache_page(page);
- pte = pte_mkwrite(pte);
+ pte = pte_mkwrite(pte, args->vma);
pte = pte_mkdirty(pte);
ptep_set_access_flags(args->vma, args->vaddr, args->ptep, pte, 1);
pte = ptep_get(args->ptep);
@@ -202,10 +202,10 @@ static void __init pmd_basic_tests(struct pgtable_debug_args *args, int idx)
WARN_ON(!pmd_same(pmd, pmd));
WARN_ON(!pmd_young(pmd_mkyoung(pmd_mkold(pmd))));
WARN_ON(!pmd_dirty(pmd_mkdirty(pmd_mkclean(pmd))));
- WARN_ON(!pmd_write(pmd_mkwrite(pmd_wrprotect(pmd))));
+ WARN_ON(!pmd_write(pmd_mkwrite(pmd_wrprotect(pmd), args->vma)));
WARN_ON(pmd_young(pmd_mkold(pmd_mkyoung(pmd))));
WARN_ON(pmd_dirty(pmd_mkclean(pmd_mkdirty(pmd))));
- WARN_ON(pmd_write(pmd_wrprotect(pmd_mkwrite(pmd))));
+ WARN_ON(pmd_write(pmd_wrprotect(pmd_mkwrite(pmd, args->vma))));
WARN_ON(pmd_dirty(pmd_wrprotect(pmd_mkclean(pmd))));
WARN_ON(!pmd_dirty(pmd_wrprotect(pmd_mkdirty(pmd))));
/*
@@ -256,7 +256,7 @@ static void __init pmd_advanced_tests(struct pgtable_debug_args *args)
pmd = pmd_mkclean(pmd);
set_pmd_at(args->mm, vaddr, args->pmdp, pmd);
flush_dcache_page(page);
- pmd = pmd_mkwrite(pmd);
+ pmd = pmd_mkwrite(pmd, args->vma);
pmd = pmd_mkdirty(pmd);
pmdp_set_access_flags(args->vma, vaddr, args->pmdp, pmd, 1);
pmd = READ_ONCE(*args->pmdp);
diff --git a/mm/gup.c b/mm/gup.c
index 948f3b4..2f8a2d8 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1051,7 +1051,7 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags)
!writable_file_mapping_allowed(vma, gup_flags))
return -EFAULT;
- if (!(vm_flags & VM_WRITE)) {
+ if (!(vm_flags & VM_WRITE) || (vm_flags & VM_SHADOW_STACK)) {
if (!(gup_flags & FOLL_FORCE))
return -EFAULT;
/* hugetlb does not support FOLL_FORCE|FOLL_WRITE. */
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index fcafd9b..064fbd9 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -551,7 +551,7 @@ __setup("transparent_hugepage=", setup_transparent_hugepage);
pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma)
{
if (likely(vma->vm_flags & VM_WRITE))
- pmd = pmd_mkwrite(pmd);
+ pmd = pmd_mkwrite(pmd, vma);
return pmd;
}
@@ -1566,7 +1566,7 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf)
pmd = pmd_modify(oldpmd, vma->vm_page_prot);
pmd = pmd_mkyoung(pmd);
if (writable)
- pmd = pmd_mkwrite(pmd);
+ pmd = pmd_mkwrite(pmd, vma);
set_pmd_at(vma->vm_mm, haddr, vmf->pmd, pmd);
update_mmu_cache_pmd(vma, vmf->address, vmf->pmd);
spin_unlock(vmf->ptl);
@@ -1675,6 +1675,7 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
*/
orig_pmd = pmdp_huge_get_and_clear_full(vma, addr, pmd,
tlb->fullmm);
+ arch_check_zapped_pmd(vma, orig_pmd);
tlb_remove_pmd_tlb_entry(tlb, pmd, addr);
if (vma_is_special_huge(vma)) {
if (arch_needs_pgtable_deposit())
@@ -1919,7 +1920,7 @@ int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
/* See change_pte_range(). */
if ((cp_flags & MM_CP_TRY_CHANGE_WRITABLE) && !pmd_write(entry) &&
can_change_pmd_writable(vma, addr, entry))
- entry = pmd_mkwrite(entry);
+ entry = pmd_mkwrite(entry, vma);
ret = HPAGE_PMD_NR;
set_pmd_at(mm, addr, pmd, entry);
@@ -2233,7 +2234,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
} else {
entry = mk_pte(page + i, READ_ONCE(vma->vm_page_prot));
if (write)
- entry = pte_mkwrite(entry);
+ entry = pte_mkwrite(entry, vma);
if (anon_exclusive)
SetPageAnonExclusive(page + i);
if (!young)
@@ -3265,7 +3266,7 @@ void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, struct page *new)
if (pmd_swp_soft_dirty(*pvmw->pmd))
pmde = pmd_mksoft_dirty(pmde);
if (is_writable_migration_entry(entry))
- pmde = pmd_mkwrite(pmde);
+ pmde = pmd_mkwrite(pmde, vma);
if (pmd_swp_uffd_wp(*pvmw->pmd))
pmde = pmd_mkuffd_wp(pmde);
if (!is_migration_entry_young(entry))
diff --git a/mm/internal.h b/mm/internal.h
index d1d4bf4..30cf724 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -556,14 +556,14 @@ static inline bool is_exec_mapping(vm_flags_t flags)
}
/*
- * Stack area - automatically grows in one direction
+ * Stack area (including shadow stacks)
*
* VM_GROWSUP / VM_GROWSDOWN VMAs are always private anonymous:
* do_mmap() forbids all other combinations.
*/
static inline bool is_stack_mapping(vm_flags_t flags)
{
- return (flags & VM_STACK) == VM_STACK;
+ return ((flags & VM_STACK) == VM_STACK) || (flags & VM_SHADOW_STACK);
}
/*
diff --git a/mm/memory.c b/mm/memory.c
index 405a483..6c264d2 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1430,6 +1430,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
continue;
ptent = ptep_get_and_clear_full(mm, addr, pte,
tlb->fullmm);
+ arch_check_zapped_pte(vma, ptent);
tlb_remove_tlb_entry(tlb, pte, addr);
zap_install_uffd_wp_if_needed(vma, addr, pte, details,
ptent);
@@ -4124,7 +4125,7 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
entry = mk_pte(&folio->page, vma->vm_page_prot);
entry = pte_sw_mkyoung(entry);
if (vma->vm_flags & VM_WRITE)
- entry = pte_mkwrite(pte_mkdirty(entry));
+ entry = pte_mkwrite(pte_mkdirty(entry), vma);
vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address,
&vmf->ptl);
@@ -4842,7 +4843,7 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf)
pte = pte_modify(old_pte, vma->vm_page_prot);
pte = pte_mkyoung(pte);
if (writable)
- pte = pte_mkwrite(pte);
+ pte = pte_mkwrite(pte, vma);
ptep_modify_prot_commit(vma, vmf->address, vmf->pte, old_pte, pte);
update_mmu_cache_range(vmf, vma, vmf->address, vmf->pte, 1);
pte_unmap_unlock(vmf->pte, vmf->ptl);
diff --git a/mm/migrate.c b/mm/migrate.c
index 78c9bd5..b7fa020 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -220,7 +220,7 @@ static bool remove_migration_pte(struct folio *folio,
if (folio_test_dirty(folio) && is_migration_entry_dirty(entry))
pte = pte_mkdirty(pte);
if (is_writable_migration_entry(entry))
- pte = pte_mkwrite(pte);
+ pte = pte_mkwrite(pte, vma);
else if (pte_swp_uffd_wp(old_pte))
pte = pte_mkuffd_wp(pte);
diff --git a/mm/migrate_device.c b/mm/migrate_device.c
index d69131a..8ac1f79 100644
--- a/mm/migrate_device.c
+++ b/mm/migrate_device.c
@@ -624,7 +624,7 @@ static void migrate_vma_insert_page(struct migrate_vma *migrate,
}
entry = mk_pte(page, vma->vm_page_prot);
if (vma->vm_flags & VM_WRITE)
- entry = pte_mkwrite(pte_mkdirty(entry));
+ entry = pte_mkwrite(pte_mkdirty(entry), vma);
}
ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl);
diff --git a/mm/mmap.c b/mm/mmap.c
index 514ced1..b56a7f0 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1182,11 +1182,11 @@ static inline bool file_mmap_ok(struct file *file, struct inode *inode,
*/
unsigned long do_mmap(struct file *file, unsigned long addr,
unsigned long len, unsigned long prot,
- unsigned long flags, unsigned long pgoff,
- unsigned long *populate, struct list_head *uf)
+ unsigned long flags, vm_flags_t vm_flags,
+ unsigned long pgoff, unsigned long *populate,
+ struct list_head *uf)
{
struct mm_struct *mm = current->mm;
- vm_flags_t vm_flags;
int pkey = 0;
*populate = 0;
@@ -1246,7 +1246,7 @@ unsigned long do_mmap(struct file *file, unsigned long addr,
* to. we assume access permissions have been handled by the open
* of the memory object, so we don't do any here.
*/
- vm_flags = calc_vm_prot_bits(prot, pkey) | calc_vm_flag_bits(flags) |
+ vm_flags |= calc_vm_prot_bits(prot, pkey) | calc_vm_flag_bits(flags) |
mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
if (flags & MAP_LOCKED)
@@ -1564,7 +1564,7 @@ static unsigned long unmapped_area(struct vm_unmapped_area_info *info)
gap = mas.index;
gap += (info->align_offset - gap) & info->align_mask;
tmp = mas_next(&mas, ULONG_MAX);
- if (tmp && (tmp->vm_flags & VM_GROWSDOWN)) { /* Avoid prev check if possible */
+ if (tmp && (tmp->vm_flags & VM_STARTGAP_FLAGS)) { /* Avoid prev check if possible */
if (vm_start_gap(tmp) < gap + length - 1) {
low_limit = tmp->vm_end;
mas_reset(&mas);
@@ -1616,7 +1616,7 @@ static unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info)
gap -= (gap - info->align_offset) & info->align_mask;
gap_end = mas.last;
tmp = mas_next(&mas, ULONG_MAX);
- if (tmp && (tmp->vm_flags & VM_GROWSDOWN)) { /* Avoid prev check if possible */
+ if (tmp && (tmp->vm_flags & VM_STARTGAP_FLAGS)) { /* Avoid prev check if possible */
if (vm_start_gap(tmp) <= gap_end) {
high_limit = vm_start_gap(tmp);
mas_reset(&mas);
@@ -2998,7 +2998,7 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
file = get_file(vma->vm_file);
ret = do_mmap(vma->vm_file, start, size,
- prot, flags, pgoff, &populate, NULL);
+ prot, flags, 0, pgoff, &populate, NULL);
fput(file);
out:
mmap_write_unlock(mm);
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 130db91..b94fbb4 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -185,7 +185,7 @@ static long change_pte_range(struct mmu_gather *tlb,
if ((cp_flags & MM_CP_TRY_CHANGE_WRITABLE) &&
!pte_write(ptent) &&
can_change_pte_writable(vma, addr, ptent))
- ptent = pte_mkwrite(ptent);
+ ptent = pte_mkwrite(ptent, vma);
ptep_modify_prot_commit(vma, addr, pte, oldpte, ptent);
if (pte_needs_flush(oldpte, ptent))
diff --git a/mm/nommu.c b/mm/nommu.c
index 8dba41c..7f9e9e5 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1016,6 +1016,7 @@ unsigned long do_mmap(struct file *file,
unsigned long len,
unsigned long prot,
unsigned long flags,
+ vm_flags_t vm_flags,
unsigned long pgoff,
unsigned long *populate,
struct list_head *uf)
@@ -1023,7 +1024,6 @@ unsigned long do_mmap(struct file *file,
struct vm_area_struct *vma;
struct vm_region *region;
struct rb_node *rb;
- vm_flags_t vm_flags;
unsigned long capabilities, result;
int ret;
VMA_ITERATOR(vmi, current->mm, 0);
@@ -1043,7 +1043,7 @@ unsigned long do_mmap(struct file *file,
/* we've determined that we can make the mapping, now translate what we
* now know into VMA flags */
- vm_flags = determine_vm_flags(file, prot, flags, capabilities);
+ vm_flags |= determine_vm_flags(file, prot, flags, capabilities);
/* we're going to need to record the mapping */
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 0fc69ef..96d9eae 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -86,7 +86,7 @@ int mfill_atomic_install_pte(pmd_t *dst_pmd,
if (page_in_cache && !vm_shared)
writable = false;
if (writable)
- _dst_pte = pte_mkwrite(_dst_pte);
+ _dst_pte = pte_mkwrite(_dst_pte, dst_vma);
if (flags & MFILL_ATOMIC_WP)
_dst_pte = pte_mkuffd_wp(_dst_pte);
diff --git a/mm/util.c b/mm/util.c
index 4ed8b9b..f08b655 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -543,7 +543,7 @@ unsigned long vm_mmap_pgoff(struct file *file, unsigned long addr,
if (!ret) {
if (mmap_write_lock_killable(mm))
return -EINTR;
- ret = do_mmap(file, addr, len, prot, flag, pgoff, &populate,
+ ret = do_mmap(file, addr, len, prot, flag, 0, pgoff, &populate,
&uf);
mmap_write_unlock(mm);
userfaultfd_unmap_complete(mm, &uf);
diff --git a/tools/testing/selftests/powerpc/copyloops/asm/export.h b/tools/testing/selftests/powerpc/copyloops/linux/export.h
similarity index 100%
rename from tools/testing/selftests/powerpc/copyloops/asm/export.h
rename to tools/testing/selftests/powerpc/copyloops/linux/export.h
diff --git a/tools/testing/selftests/powerpc/harness.c b/tools/testing/selftests/powerpc/harness.c
index 0ad4f12..5876220 100644
--- a/tools/testing/selftests/powerpc/harness.c
+++ b/tools/testing/selftests/powerpc/harness.c
@@ -24,7 +24,7 @@
/* Setting timeout to -1 disables the alarm */
static uint64_t timeout = 120;
-int run_test(int (test_function)(void), char *name)
+int run_test(int (test_function)(void), const char *name)
{
bool terminated;
int rc, status;
@@ -101,7 +101,7 @@ void test_harness_set_timeout(uint64_t time)
timeout = time;
}
-int test_harness(int (test_function)(void), char *name)
+int test_harness(int (test_function)(void), const char *name)
{
int rc;
diff --git a/tools/testing/selftests/powerpc/include/subunit.h b/tools/testing/selftests/powerpc/include/subunit.h
index 068d55f..b0bb774 100644
--- a/tools/testing/selftests/powerpc/include/subunit.h
+++ b/tools/testing/selftests/powerpc/include/subunit.h
@@ -6,37 +6,37 @@
#ifndef _SELFTESTS_POWERPC_SUBUNIT_H
#define _SELFTESTS_POWERPC_SUBUNIT_H
-static inline void test_start(char *name)
+static inline void test_start(const char *name)
{
printf("test: %s\n", name);
}
-static inline void test_failure_detail(char *name, char *detail)
+static inline void test_failure_detail(const char *name, const char *detail)
{
printf("failure: %s [%s]\n", name, detail);
}
-static inline void test_failure(char *name)
+static inline void test_failure(const char *name)
{
printf("failure: %s\n", name);
}
-static inline void test_error(char *name)
+static inline void test_error(const char *name)
{
printf("error: %s\n", name);
}
-static inline void test_skip(char *name)
+static inline void test_skip(const char *name)
{
printf("skip: %s\n", name);
}
-static inline void test_success(char *name)
+static inline void test_success(const char *name)
{
printf("success: %s\n", name);
}
-static inline void test_finish(char *name, int status)
+static inline void test_finish(const char *name, int status)
{
if (status)
test_failure(name);
@@ -44,7 +44,7 @@ static inline void test_finish(char *name, int status)
test_success(name);
}
-static inline void test_set_git_version(char *value)
+static inline void test_set_git_version(const char *value)
{
printf("tags: git_version:%s\n", value);
}
diff --git a/tools/testing/selftests/powerpc/include/utils.h b/tools/testing/selftests/powerpc/include/utils.h
index 36c30c6..66d7b23 100644
--- a/tools/testing/selftests/powerpc/include/utils.h
+++ b/tools/testing/selftests/powerpc/include/utils.h
@@ -32,7 +32,7 @@ typedef uint16_t u16;
typedef uint8_t u8;
void test_harness_set_timeout(uint64_t time);
-int test_harness(int (test_function)(void), char *name);
+int test_harness(int (test_function)(void), const char *name);
int read_auxv(char *buf, ssize_t buf_size);
void *find_auxv_entry(int type, char *auxv);
diff --git a/tools/testing/selftests/powerpc/mm/.gitignore b/tools/testing/selftests/powerpc/mm/.gitignore
index 4e1a294..0df1a3a 100644
--- a/tools/testing/selftests/powerpc/mm/.gitignore
+++ b/tools/testing/selftests/powerpc/mm/.gitignore
@@ -1,15 +1,16 @@
# SPDX-License-Identifier: GPL-2.0-only
-hugetlb_vs_thp_test
-subpage_prot
-tempfile
-prot_sao
-segv_errors
-wild_bctr
-large_vm_fork_separation
bad_accesses
-tlbie_test
+exec_prot
+hugetlb_vs_thp_test
+large_vm_fork_separation
+large_vm_gpr_corruption
pkey_exec_prot
pkey_siginfo
+prot_sao
+segv_errors
stack_expansion_ldst
stack_expansion_signal
-large_vm_gpr_corruption
+subpage_prot
+tempfile
+tlbie_test
+wild_bctr
diff --git a/tools/testing/selftests/powerpc/ptrace/Makefile b/tools/testing/selftests/powerpc/ptrace/Makefile
index cbeeaea..1b39b86 100644
--- a/tools/testing/selftests/powerpc/ptrace/Makefile
+++ b/tools/testing/selftests/powerpc/ptrace/Makefile
@@ -36,6 +36,7 @@
CFLAGS += $(KHDR_INCLUDES) -fno-pie
$(OUTPUT)/ptrace-gpr: ptrace-gpr.S
+$(OUTPUT)/ptrace-perf-hwbreak: ptrace-perf-asm.S
$(OUTPUT)/ptrace-pkey $(OUTPUT)/core-pkey: LDLIBS += -pthread
$(TEST_GEN_PROGS): ../harness.c ../utils.c ../lib/reg.S
diff --git a/tools/testing/selftests/powerpc/ptrace/child.h b/tools/testing/selftests/powerpc/ptrace/child.h
index d7275b7..df62ff0 100644
--- a/tools/testing/selftests/powerpc/ptrace/child.h
+++ b/tools/testing/selftests/powerpc/ptrace/child.h
@@ -48,12 +48,12 @@ struct child_sync {
} \
} while (0)
-#define PARENT_SKIP_IF_UNSUPPORTED(x, sync) \
+#define PARENT_SKIP_IF_UNSUPPORTED(x, sync, msg) \
do { \
if ((x) == -1 && (errno == ENODEV || errno == EINVAL)) { \
(sync)->parent_gave_up = true; \
prod_child(sync); \
- SKIP_IF(1); \
+ SKIP_IF_MSG(1, msg); \
} \
} while (0)
diff --git a/tools/testing/selftests/powerpc/ptrace/core-pkey.c b/tools/testing/selftests/powerpc/ptrace/core-pkey.c
index f6f8596..f6da4cb 100644
--- a/tools/testing/selftests/powerpc/ptrace/core-pkey.c
+++ b/tools/testing/selftests/powerpc/ptrace/core-pkey.c
@@ -266,7 +266,7 @@ static int parent(struct shared_info *info, pid_t pid)
* to the child.
*/
ret = ptrace_read_regs(pid, NT_PPC_PKEY, regs, 3);
- PARENT_SKIP_IF_UNSUPPORTED(ret, &info->child_sync);
+ PARENT_SKIP_IF_UNSUPPORTED(ret, &info->child_sync, "PKEYs not supported");
PARENT_FAIL_IF(ret, &info->child_sync);
info->amr = regs[0];
diff --git a/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c b/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c
index f75739b..e374c6b 100644
--- a/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c
+++ b/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c
@@ -884,7 +884,7 @@ static int perf_hwbreak(void)
{
srand ( time(NULL) );
- SKIP_IF(!perf_breakpoint_supported());
+ SKIP_IF_MSG(!perf_breakpoint_supported(), "Perf breakpoints not supported");
return runtest();
}
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c b/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c
index 1345e9b..75d30d6 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c
@@ -64,26 +64,26 @@ static bool dawr_present(struct ppc_debug_info *dbginfo)
static void write_var(int len)
{
- __u8 *pcvar;
- __u16 *psvar;
- __u32 *pivar;
- __u64 *plvar;
+ volatile __u8 *pcvar;
+ volatile __u16 *psvar;
+ volatile __u32 *pivar;
+ volatile __u64 *plvar;
switch (len) {
case 1:
- pcvar = (__u8 *)&glvar;
+ pcvar = (volatile __u8 *)&glvar;
*pcvar = 0xff;
break;
case 2:
- psvar = (__u16 *)&glvar;
+ psvar = (volatile __u16 *)&glvar;
*psvar = 0xffff;
break;
case 4:
- pivar = (__u32 *)&glvar;
+ pivar = (volatile __u32 *)&glvar;
*pivar = 0xffffffff;
break;
case 8:
- plvar = (__u64 *)&glvar;
+ plvar = (volatile __u64 *)&glvar;
*plvar = 0xffffffffffffffffLL;
break;
}
@@ -98,16 +98,16 @@ static void read_var(int len)
switch (len) {
case 1:
- cvar = (__u8)glvar;
+ cvar = (volatile __u8)glvar;
break;
case 2:
- svar = (__u16)glvar;
+ svar = (volatile __u16)glvar;
break;
case 4:
- ivar = (__u32)glvar;
+ ivar = (volatile __u32)glvar;
break;
case 8:
- lvar = (__u64)glvar;
+ lvar = (volatile __u64)glvar;
break;
}
}
@@ -603,7 +603,7 @@ static int ptrace_hwbreak(void)
wait(NULL);
get_dbginfo(child_pid, &dbginfo);
- SKIP_IF(dbginfo.num_data_bps == 0);
+ SKIP_IF_MSG(dbginfo.num_data_bps == 0, "No data breakpoints present");
dawr = dawr_present(&dbginfo);
run_tests(child_pid, &dbginfo, dawr);
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-perf-asm.S b/tools/testing/selftests/powerpc/ptrace/ptrace-perf-asm.S
new file mode 100644
index 0000000..9aa2e58
--- /dev/null
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-perf-asm.S
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include <ppc-asm.h>
+
+.global same_watch_addr_load
+.global same_watch_addr_trap
+
+FUNC_START(same_watch_addr_child)
+ nop
+same_watch_addr_load:
+ ld 0,0(3)
+ nop
+same_watch_addr_trap:
+ trap
+ blr
+FUNC_END(same_watch_addr_child)
+
+
+.global perf_then_ptrace_load1
+.global perf_then_ptrace_load2
+.global perf_then_ptrace_trap
+
+FUNC_START(perf_then_ptrace_child)
+ nop
+perf_then_ptrace_load1:
+ ld 0,0(3)
+perf_then_ptrace_load2:
+ ld 0,0(4)
+ nop
+perf_then_ptrace_trap:
+ trap
+ blr
+FUNC_END(perf_then_ptrace_child)
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-perf-hwbreak.c b/tools/testing/selftests/powerpc/ptrace/ptrace-perf-hwbreak.c
index 3344e74..a0a0b9b 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-perf-hwbreak.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-perf-hwbreak.c
@@ -1,142 +1,167 @@
// SPDX-License-Identifier: GPL-2.0+
-#include <stdio.h>
-#include <string.h>
-#include <signal.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <errno.h>
-#include <linux/hw_breakpoint.h>
-#include <linux/perf_event.h>
+
#include <asm/unistd.h>
-#include <sys/ptrace.h>
+#include <linux/hw_breakpoint.h>
+#include <linux/ptrace.h>
+#include <memory.h>
+#include <stdlib.h>
#include <sys/wait.h>
-#include "ptrace.h"
-char data[16];
+#include "utils.h"
-/* Overlapping address range */
-volatile __u64 *ptrace_data1 = (__u64 *)&data[0];
-volatile __u64 *perf_data1 = (__u64 *)&data[4];
+/*
+ * Child subroutine that performs a load on the address, then traps
+ */
+void same_watch_addr_child(unsigned long *addr);
-/* Non-overlapping address range */
-volatile __u64 *ptrace_data2 = (__u64 *)&data[0];
-volatile __u64 *perf_data2 = (__u64 *)&data[8];
+/* Address of the ld instruction in same_watch_addr_child() */
+extern char same_watch_addr_load[];
-static unsigned long pid_max_addr(void)
+/* Address of the end trap instruction in same_watch_addr_child() */
+extern char same_watch_addr_trap[];
+
+/*
+ * Child subroutine that performs a load on the first address, then a load on
+ * the second address (with no instructions separating this from the first
+ * load), then traps.
+ */
+void perf_then_ptrace_child(unsigned long *first_addr, unsigned long *second_addr);
+
+/* Address of the first ld instruction in perf_then_ptrace_child() */
+extern char perf_then_ptrace_load1[];
+
+/* Address of the second ld instruction in perf_then_ptrace_child() */
+extern char perf_then_ptrace_load2[];
+
+/* Address of the end trap instruction in perf_then_ptrace_child() */
+extern char perf_then_ptrace_trap[];
+
+static inline long sys_ptrace(long request, pid_t pid, unsigned long addr, unsigned long data)
{
- FILE *fp;
- char *line, *c;
- char addr[100];
- size_t len = 0;
-
- fp = fopen("/proc/kallsyms", "r");
- if (!fp) {
- printf("Failed to read /proc/kallsyms. Exiting..\n");
- exit(EXIT_FAILURE);
- }
-
- while (getline(&line, &len, fp) != -1) {
- if (!strstr(line, "pid_max") || strstr(line, "pid_max_max") ||
- strstr(line, "pid_max_min"))
- continue;
-
- strncpy(addr, line, len < 100 ? len : 100);
- c = strchr(addr, ' ');
- *c = '\0';
- return strtoul(addr, &c, 16);
- }
- fclose(fp);
- printf("Could not find pix_max. Exiting..\n");
- exit(EXIT_FAILURE);
- return -1;
+ return syscall(__NR_ptrace, request, pid, addr, data);
}
-static void perf_user_event_attr_set(struct perf_event_attr *attr, __u64 addr, __u64 len)
+static long ptrace_traceme(void)
{
- memset(attr, 0, sizeof(struct perf_event_attr));
- attr->type = PERF_TYPE_BREAKPOINT;
- attr->size = sizeof(struct perf_event_attr);
- attr->bp_type = HW_BREAKPOINT_R;
- attr->bp_addr = addr;
- attr->bp_len = len;
- attr->exclude_kernel = 1;
- attr->exclude_hv = 1;
+ return sys_ptrace(PTRACE_TRACEME, 0, 0, 0);
}
-static void perf_kernel_event_attr_set(struct perf_event_attr *attr)
+static long ptrace_getregs(pid_t pid, struct pt_regs *result)
{
- memset(attr, 0, sizeof(struct perf_event_attr));
- attr->type = PERF_TYPE_BREAKPOINT;
- attr->size = sizeof(struct perf_event_attr);
- attr->bp_type = HW_BREAKPOINT_R;
- attr->bp_addr = pid_max_addr();
- attr->bp_len = sizeof(unsigned long);
- attr->exclude_user = 1;
- attr->exclude_hv = 1;
+ return sys_ptrace(PTRACE_GETREGS, pid, 0, (unsigned long)result);
}
-static int perf_cpu_event_open(int cpu, __u64 addr, __u64 len)
+static long ptrace_setregs(pid_t pid, struct pt_regs *result)
{
- struct perf_event_attr attr;
-
- perf_user_event_attr_set(&attr, addr, len);
- return syscall(__NR_perf_event_open, &attr, -1, cpu, -1, 0);
+ return sys_ptrace(PTRACE_SETREGS, pid, 0, (unsigned long)result);
}
-static int perf_thread_event_open(pid_t child_pid, __u64 addr, __u64 len)
+static long ptrace_cont(pid_t pid, long signal)
{
- struct perf_event_attr attr;
-
- perf_user_event_attr_set(&attr, addr, len);
- return syscall(__NR_perf_event_open, &attr, child_pid, -1, -1, 0);
+ return sys_ptrace(PTRACE_CONT, pid, 0, signal);
}
-static int perf_thread_cpu_event_open(pid_t child_pid, int cpu, __u64 addr, __u64 len)
+static long ptrace_singlestep(pid_t pid, long signal)
{
- struct perf_event_attr attr;
-
- perf_user_event_attr_set(&attr, addr, len);
- return syscall(__NR_perf_event_open, &attr, child_pid, cpu, -1, 0);
+ return sys_ptrace(PTRACE_SINGLESTEP, pid, 0, signal);
}
-static int perf_thread_kernel_event_open(pid_t child_pid)
+static long ppc_ptrace_gethwdbginfo(pid_t pid, struct ppc_debug_info *dbginfo)
{
- struct perf_event_attr attr;
-
- perf_kernel_event_attr_set(&attr);
- return syscall(__NR_perf_event_open, &attr, child_pid, -1, -1, 0);
+ return sys_ptrace(PPC_PTRACE_GETHWDBGINFO, pid, 0, (unsigned long)dbginfo);
}
-static int perf_cpu_kernel_event_open(int cpu)
+static long ppc_ptrace_sethwdbg(pid_t pid, struct ppc_hw_breakpoint *bp_info)
{
- struct perf_event_attr attr;
-
- perf_kernel_event_attr_set(&attr);
- return syscall(__NR_perf_event_open, &attr, -1, cpu, -1, 0);
+ return sys_ptrace(PPC_PTRACE_SETHWDEBUG, pid, 0, (unsigned long)bp_info);
}
-static int child(void)
+static long ppc_ptrace_delhwdbg(pid_t pid, int bp_id)
{
- int ret;
+ return sys_ptrace(PPC_PTRACE_DELHWDEBUG, pid, 0L, bp_id);
+}
- ret = ptrace(PTRACE_TRACEME, 0, NULL, 0);
- if (ret) {
- printf("Error: PTRACE_TRACEME failed\n");
- return 0;
- }
- kill(getpid(), SIGUSR1); /* --> parent (SIGUSR1) */
+static long ptrace_getreg_pc(pid_t pid, void **pc)
+{
+ struct pt_regs regs;
+ long err;
+
+ err = ptrace_getregs(pid, ®s);
+ if (err)
+ return err;
+
+ *pc = (void *)regs.nip;
return 0;
}
-static void ptrace_ppc_hw_breakpoint(struct ppc_hw_breakpoint *info, int type,
- __u64 addr, int len)
+static long ptrace_setreg_pc(pid_t pid, void *pc)
+{
+ struct pt_regs regs;
+ long err;
+
+ err = ptrace_getregs(pid, ®s);
+ if (err)
+ return err;
+
+ regs.nip = (unsigned long)pc;
+
+ err = ptrace_setregs(pid, ®s);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+static int perf_event_open(struct perf_event_attr *attr, pid_t pid, int cpu,
+ int group_fd, unsigned long flags)
+{
+ return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags);
+}
+
+static void perf_user_event_attr_set(struct perf_event_attr *attr, void *addr, u64 len)
+{
+ memset(attr, 0, sizeof(struct perf_event_attr));
+
+ attr->type = PERF_TYPE_BREAKPOINT;
+ attr->size = sizeof(struct perf_event_attr);
+ attr->bp_type = HW_BREAKPOINT_R;
+ attr->bp_addr = (u64)addr;
+ attr->bp_len = len;
+ attr->exclude_kernel = 1;
+ attr->exclude_hv = 1;
+}
+
+static int perf_watchpoint_open(pid_t child_pid, void *addr, u64 len)
+{
+ struct perf_event_attr attr;
+
+ perf_user_event_attr_set(&attr, addr, len);
+ return perf_event_open(&attr, child_pid, -1, -1, 0);
+}
+
+static int perf_read_counter(int perf_fd, u64 *count)
+{
+ /*
+ * A perf counter is retrieved by the read() syscall. It contains
+ * the current count as 8 bytes that are interpreted as a u64
+ */
+ ssize_t len = read(perf_fd, count, sizeof(*count));
+
+ if (len != sizeof(*count))
+ return -1;
+
+ return 0;
+}
+
+static void ppc_ptrace_init_breakpoint(struct ppc_hw_breakpoint *info,
+ int type, void *addr, int len)
{
info->version = 1;
info->trigger_type = type;
info->condition_mode = PPC_BREAKPOINT_CONDITION_NONE;
- info->addr = addr;
- info->addr2 = addr + len;
+ info->addr = (u64)addr;
+ info->addr2 = (u64)addr + len;
info->condition_value = 0;
if (!len)
info->addr_mode = PPC_BREAKPOINT_MODE_EXACT;
@@ -144,516 +169,277 @@ static void ptrace_ppc_hw_breakpoint(struct ppc_hw_breakpoint *info, int type,
info->addr_mode = PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE;
}
-static int ptrace_open(pid_t child_pid, __u64 wp_addr, int len)
+/*
+ * Checks if we can place at least 2 watchpoints on the child process
+ */
+static int check_watchpoints(pid_t pid)
{
- struct ppc_hw_breakpoint info;
-
- ptrace_ppc_hw_breakpoint(&info, PPC_BREAKPOINT_TRIGGER_RW, wp_addr, len);
- return ptrace(PPC_PTRACE_SETHWDEBUG, child_pid, 0, &info);
-}
-
-static int test1(pid_t child_pid)
-{
- int perf_fd;
- int ptrace_fd;
- int ret = 0;
-
- /* Test:
- * if (new per thread event by ptrace)
- * if (existing cpu event by perf)
- * if (addr range overlaps)
- * fail;
- */
-
- perf_fd = perf_cpu_event_open(0, (__u64)perf_data1, sizeof(*perf_data1));
- if (perf_fd < 0)
- return -1;
-
- ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1));
- if (ptrace_fd > 0 || errno != ENOSPC)
- ret = -1;
-
- close(perf_fd);
- return ret;
-}
-
-static int test2(pid_t child_pid)
-{
- int perf_fd;
- int ptrace_fd;
- int ret = 0;
-
- /* Test:
- * if (new per thread event by ptrace)
- * if (existing cpu event by perf)
- * if (addr range does not overlaps)
- * allow;
- */
-
- perf_fd = perf_cpu_event_open(0, (__u64)perf_data2, sizeof(*perf_data2));
- if (perf_fd < 0)
- return -1;
-
- ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data2, sizeof(*ptrace_data2));
- if (ptrace_fd < 0) {
- ret = -1;
- goto perf_close;
- }
- ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd);
-
-perf_close:
- close(perf_fd);
- return ret;
-}
-
-static int test3(pid_t child_pid)
-{
- int perf_fd;
- int ptrace_fd;
- int ret = 0;
-
- /* Test:
- * if (new per thread event by ptrace)
- * if (existing thread event by perf on the same thread)
- * if (addr range overlaps)
- * fail;
- */
- perf_fd = perf_thread_event_open(child_pid, (__u64)perf_data1,
- sizeof(*perf_data1));
- if (perf_fd < 0)
- return -1;
-
- ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1));
- if (ptrace_fd > 0 || errno != ENOSPC)
- ret = -1;
-
- close(perf_fd);
- return ret;
-}
-
-static int test4(pid_t child_pid)
-{
- int perf_fd;
- int ptrace_fd;
- int ret = 0;
-
- /* Test:
- * if (new per thread event by ptrace)
- * if (existing thread event by perf on the same thread)
- * if (addr range does not overlaps)
- * fail;
- */
- perf_fd = perf_thread_event_open(child_pid, (__u64)perf_data2,
- sizeof(*perf_data2));
- if (perf_fd < 0)
- return -1;
-
- ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data2, sizeof(*ptrace_data2));
- if (ptrace_fd < 0) {
- ret = -1;
- goto perf_close;
- }
- ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd);
-
-perf_close:
- close(perf_fd);
- return ret;
-}
-
-static int test5(pid_t child_pid)
-{
- int perf_fd;
- int ptrace_fd;
- int cpid;
- int ret = 0;
-
- /* Test:
- * if (new per thread event by ptrace)
- * if (existing thread event by perf on the different thread)
- * allow;
- */
- cpid = fork();
- if (!cpid) {
- /* Temporary Child */
- pause();
- exit(EXIT_SUCCESS);
- }
-
- perf_fd = perf_thread_event_open(cpid, (__u64)perf_data1, sizeof(*perf_data1));
- if (perf_fd < 0) {
- ret = -1;
- goto kill_child;
- }
-
- ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1));
- if (ptrace_fd < 0) {
- ret = -1;
- goto perf_close;
- }
-
- ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd);
-perf_close:
- close(perf_fd);
-kill_child:
- kill(cpid, SIGINT);
- return ret;
-}
-
-static int test6(pid_t child_pid)
-{
- int perf_fd;
- int ptrace_fd;
- int ret = 0;
-
- /* Test:
- * if (new per thread kernel event by perf)
- * if (existing thread event by ptrace on the same thread)
- * allow;
- * -- OR --
- * if (new per cpu kernel event by perf)
- * if (existing thread event by ptrace)
- * allow;
- */
- ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1));
- if (ptrace_fd < 0)
- return -1;
-
- perf_fd = perf_thread_kernel_event_open(child_pid);
- if (perf_fd < 0) {
- ret = -1;
- goto ptrace_close;
- }
- close(perf_fd);
-
- perf_fd = perf_cpu_kernel_event_open(0);
- if (perf_fd < 0) {
- ret = -1;
- goto ptrace_close;
- }
- close(perf_fd);
-
-ptrace_close:
- ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd);
- return ret;
-}
-
-static int test7(pid_t child_pid)
-{
- int perf_fd;
- int ptrace_fd;
- int ret = 0;
-
- /* Test:
- * if (new per thread event by perf)
- * if (existing thread event by ptrace on the same thread)
- * if (addr range overlaps)
- * fail;
- */
- ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1));
- if (ptrace_fd < 0)
- return -1;
-
- perf_fd = perf_thread_event_open(child_pid, (__u64)perf_data1,
- sizeof(*perf_data1));
- if (perf_fd > 0 || errno != ENOSPC)
- ret = -1;
-
- ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd);
- return ret;
-}
-
-static int test8(pid_t child_pid)
-{
- int perf_fd;
- int ptrace_fd;
- int ret = 0;
-
- /* Test:
- * if (new per thread event by perf)
- * if (existing thread event by ptrace on the same thread)
- * if (addr range does not overlaps)
- * allow;
- */
- ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data2, sizeof(*ptrace_data2));
- if (ptrace_fd < 0)
- return -1;
-
- perf_fd = perf_thread_event_open(child_pid, (__u64)perf_data2,
- sizeof(*perf_data2));
- if (perf_fd < 0) {
- ret = -1;
- goto ptrace_close;
- }
- close(perf_fd);
-
-ptrace_close:
- ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd);
- return ret;
-}
-
-static int test9(pid_t child_pid)
-{
- int perf_fd;
- int ptrace_fd;
- int cpid;
- int ret = 0;
-
- /* Test:
- * if (new per thread event by perf)
- * if (existing thread event by ptrace on the other thread)
- * allow;
- */
- ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1));
- if (ptrace_fd < 0)
- return -1;
-
- cpid = fork();
- if (!cpid) {
- /* Temporary Child */
- pause();
- exit(EXIT_SUCCESS);
- }
-
- perf_fd = perf_thread_event_open(cpid, (__u64)perf_data1, sizeof(*perf_data1));
- if (perf_fd < 0) {
- ret = -1;
- goto kill_child;
- }
- close(perf_fd);
-
-kill_child:
- kill(cpid, SIGINT);
- ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd);
- return ret;
-}
-
-static int test10(pid_t child_pid)
-{
- int perf_fd;
- int ptrace_fd;
- int ret = 0;
-
- /* Test:
- * if (new per cpu event by perf)
- * if (existing thread event by ptrace on the same thread)
- * if (addr range overlaps)
- * fail;
- */
- ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1));
- if (ptrace_fd < 0)
- return -1;
-
- perf_fd = perf_cpu_event_open(0, (__u64)perf_data1, sizeof(*perf_data1));
- if (perf_fd > 0 || errno != ENOSPC)
- ret = -1;
-
- ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd);
- return ret;
-}
-
-static int test11(pid_t child_pid)
-{
- int perf_fd;
- int ptrace_fd;
- int ret = 0;
-
- /* Test:
- * if (new per cpu event by perf)
- * if (existing thread event by ptrace on the same thread)
- * if (addr range does not overlap)
- * allow;
- */
- ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data2, sizeof(*ptrace_data2));
- if (ptrace_fd < 0)
- return -1;
-
- perf_fd = perf_cpu_event_open(0, (__u64)perf_data2, sizeof(*perf_data2));
- if (perf_fd < 0) {
- ret = -1;
- goto ptrace_close;
- }
- close(perf_fd);
-
-ptrace_close:
- ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd);
- return ret;
-}
-
-static int test12(pid_t child_pid)
-{
- int perf_fd;
- int ptrace_fd;
- int ret = 0;
-
- /* Test:
- * if (new per thread and per cpu event by perf)
- * if (existing thread event by ptrace on the same thread)
- * if (addr range overlaps)
- * fail;
- */
- ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1));
- if (ptrace_fd < 0)
- return -1;
-
- perf_fd = perf_thread_cpu_event_open(child_pid, 0, (__u64)perf_data1, sizeof(*perf_data1));
- if (perf_fd > 0 || errno != ENOSPC)
- ret = -1;
-
- ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd);
- return ret;
-}
-
-static int test13(pid_t child_pid)
-{
- int perf_fd;
- int ptrace_fd;
- int ret = 0;
-
- /* Test:
- * if (new per thread and per cpu event by perf)
- * if (existing thread event by ptrace on the same thread)
- * if (addr range does not overlap)
- * allow;
- */
- ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data2, sizeof(*ptrace_data2));
- if (ptrace_fd < 0)
- return -1;
-
- perf_fd = perf_thread_cpu_event_open(child_pid, 0, (__u64)perf_data2, sizeof(*perf_data2));
- if (perf_fd < 0) {
- ret = -1;
- goto ptrace_close;
- }
- close(perf_fd);
-
-ptrace_close:
- ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd);
- return ret;
-}
-
-static int test14(pid_t child_pid)
-{
- int perf_fd;
- int ptrace_fd;
- int cpid;
- int ret = 0;
-
- /* Test:
- * if (new per thread and per cpu event by perf)
- * if (existing thread event by ptrace on the other thread)
- * allow;
- */
- ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1));
- if (ptrace_fd < 0)
- return -1;
-
- cpid = fork();
- if (!cpid) {
- /* Temporary Child */
- pause();
- exit(EXIT_SUCCESS);
- }
-
- perf_fd = perf_thread_cpu_event_open(cpid, 0, (__u64)perf_data1,
- sizeof(*perf_data1));
- if (perf_fd < 0) {
- ret = -1;
- goto kill_child;
- }
- close(perf_fd);
-
-kill_child:
- kill(cpid, SIGINT);
- ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd);
- return ret;
-}
-
-static int do_test(const char *msg, int (*fun)(pid_t arg), pid_t arg)
-{
- int ret;
-
- ret = fun(arg);
- if (ret)
- printf("%s: Error\n", msg);
- else
- printf("%s: Ok\n", msg);
- return ret;
-}
-
-char *desc[14] = {
- "perf cpu event -> ptrace thread event (Overlapping)",
- "perf cpu event -> ptrace thread event (Non-overlapping)",
- "perf thread event -> ptrace same thread event (Overlapping)",
- "perf thread event -> ptrace same thread event (Non-overlapping)",
- "perf thread event -> ptrace other thread event",
- "ptrace thread event -> perf kernel event",
- "ptrace thread event -> perf same thread event (Overlapping)",
- "ptrace thread event -> perf same thread event (Non-overlapping)",
- "ptrace thread event -> perf other thread event",
- "ptrace thread event -> perf cpu event (Overlapping)",
- "ptrace thread event -> perf cpu event (Non-overlapping)",
- "ptrace thread event -> perf same thread & cpu event (Overlapping)",
- "ptrace thread event -> perf same thread & cpu event (Non-overlapping)",
- "ptrace thread event -> perf other thread & cpu event",
-};
-
-static int test(pid_t child_pid)
-{
- int ret = TEST_PASS;
-
- ret |= do_test(desc[0], test1, child_pid);
- ret |= do_test(desc[1], test2, child_pid);
- ret |= do_test(desc[2], test3, child_pid);
- ret |= do_test(desc[3], test4, child_pid);
- ret |= do_test(desc[4], test5, child_pid);
- ret |= do_test(desc[5], test6, child_pid);
- ret |= do_test(desc[6], test7, child_pid);
- ret |= do_test(desc[7], test8, child_pid);
- ret |= do_test(desc[8], test9, child_pid);
- ret |= do_test(desc[9], test10, child_pid);
- ret |= do_test(desc[10], test11, child_pid);
- ret |= do_test(desc[11], test12, child_pid);
- ret |= do_test(desc[12], test13, child_pid);
- ret |= do_test(desc[13], test14, child_pid);
-
- return ret;
-}
-
-static void get_dbginfo(pid_t child_pid, struct ppc_debug_info *dbginfo)
-{
- if (ptrace(PPC_PTRACE_GETHWDBGINFO, child_pid, NULL, dbginfo)) {
- perror("Can't get breakpoint info");
- exit(-1);
- }
-}
-
-static int ptrace_perf_hwbreak(void)
-{
- int ret;
- pid_t child_pid;
struct ppc_debug_info dbginfo;
- child_pid = fork();
- if (!child_pid)
- return child();
+ FAIL_IF_MSG(ppc_ptrace_gethwdbginfo(pid, &dbginfo), "PPC_PTRACE_GETHWDBGINFO failed");
+ SKIP_IF_MSG(dbginfo.num_data_bps <= 1, "Not enough data watchpoints (need at least 2)");
- /* parent */
- wait(NULL); /* <-- child (SIGUSR1) */
+ return 0;
+}
- get_dbginfo(child_pid, &dbginfo);
- SKIP_IF(dbginfo.num_data_bps <= 1);
+/*
+ * Wrapper around a plain fork() call that sets up the child for
+ * ptrace-ing. Both the parent and child return from this, though
+ * the child is stopped until ptrace_cont(pid) is run by the parent.
+ */
+static int ptrace_fork_child(pid_t *pid)
+{
+ int status;
- ret = perf_cpu_event_open(0, (__u64)perf_data1, sizeof(*perf_data1));
- SKIP_IF(ret < 0);
- close(ret);
+ *pid = fork();
- ret = test(child_pid);
+ if (*pid < 0)
+ FAIL_IF_MSG(1, "Failed to fork child");
- ptrace(PTRACE_CONT, child_pid, NULL, 0);
- return ret;
+ if (!*pid) {
+ FAIL_IF_EXIT_MSG(ptrace_traceme(), "PTRACE_TRACEME failed");
+ FAIL_IF_EXIT_MSG(raise(SIGSTOP), "Child failed to raise SIGSTOP");
+ } else {
+ /* Synchronise on child SIGSTOP */
+ FAIL_IF_MSG(waitpid(*pid, &status, 0) == -1, "Failed to wait for child");
+ FAIL_IF_MSG(!WIFSTOPPED(status), "Child is not stopped");
+ }
+
+ return 0;
+}
+
+/*
+ * Tests the interaction between ptrace and perf watching the same data.
+ *
+ * We expect ptrace to take 'priority', as it is has before-execute
+ * semantics.
+ *
+ * The perf counter should not be incremented yet because perf has after-execute
+ * semantics. E.g., if ptrace changes the child PC, we don't even execute the
+ * instruction at all.
+ *
+ * When the child is stopped for ptrace, we test both continue and single step.
+ * Both should increment the perf counter. We also test changing the PC somewhere
+ * different and stepping, which should not increment the perf counter.
+ */
+int same_watch_addr_test(void)
+{
+ struct ppc_hw_breakpoint bp_info; /* ptrace breakpoint info */
+ int bp_id; /* Breakpoint handle of ptrace watchpoint */
+ int perf_fd; /* File descriptor of perf performance counter */
+ u64 perf_count; /* Most recently fetched perf performance counter value */
+ pid_t pid; /* PID of child process */
+ void *pc; /* Most recently fetched child PC value */
+ int status; /* Stop status of child after waitpid */
+ unsigned long value; /* Dummy value to be read/written to by child */
+ int err;
+
+ err = ptrace_fork_child(&pid);
+ if (err)
+ return err;
+
+ if (!pid) {
+ same_watch_addr_child(&value);
+ exit(1);
+ }
+
+ err = check_watchpoints(pid);
+ if (err)
+ return err;
+
+ /* Place a perf watchpoint counter on value */
+ perf_fd = perf_watchpoint_open(pid, &value, sizeof(value));
+ FAIL_IF_MSG(perf_fd < 0, "Failed to open perf performance counter");
+
+ /* Place a ptrace watchpoint on value */
+ ppc_ptrace_init_breakpoint(&bp_info, PPC_BREAKPOINT_TRIGGER_READ, &value, sizeof(value));
+ bp_id = ppc_ptrace_sethwdbg(pid, &bp_info);
+ FAIL_IF_MSG(bp_id < 0, "Failed to set ptrace watchpoint");
+
+ /* Let the child run. It should stop on the ptrace watchpoint */
+ FAIL_IF_MSG(ptrace_cont(pid, 0), "Failed to continue child");
+
+ FAIL_IF_MSG(waitpid(pid, &status, 0) == -1, "Failed to wait for child");
+ FAIL_IF_MSG(!WIFSTOPPED(status), "Child is not stopped");
+ FAIL_IF_MSG(ptrace_getreg_pc(pid, &pc), "Failed to get child PC");
+ FAIL_IF_MSG(pc != same_watch_addr_load, "Child did not stop on load instruction");
+
+ /*
+ * We stopped before executing the load, so perf should not have
+ * recorded any events yet
+ */
+ FAIL_IF_MSG(perf_read_counter(perf_fd, &perf_count), "Failed to read perf counter");
+ FAIL_IF_MSG(perf_count != 0, "perf recorded unexpected event");
+
+ /* Single stepping over the load should increment the perf counter */
+ FAIL_IF_MSG(ptrace_singlestep(pid, 0), "Failed to single step child");
+
+ FAIL_IF_MSG(waitpid(pid, &status, 0) == -1, "Failed to wait for child");
+ FAIL_IF_MSG(!WIFSTOPPED(status), "Child is not stopped");
+ FAIL_IF_MSG(ptrace_getreg_pc(pid, &pc), "Failed to get child PC");
+ FAIL_IF_MSG(pc != same_watch_addr_load + 4, "Failed to single step load instruction");
+ FAIL_IF_MSG(perf_read_counter(perf_fd, &perf_count), "Failed to read perf counter");
+ FAIL_IF_MSG(perf_count != 1, "perf counter did not increment");
+
+ /*
+ * Set up a ptrace watchpoint on the value again and trigger it.
+ * The perf counter should not have incremented because we do not
+ * execute the load yet.
+ */
+ FAIL_IF_MSG(ppc_ptrace_delhwdbg(pid, bp_id), "Failed to remove old ptrace watchpoint");
+ bp_id = ppc_ptrace_sethwdbg(pid, &bp_info);
+ FAIL_IF_MSG(bp_id < 0, "Failed to set ptrace watchpoint");
+ FAIL_IF_MSG(ptrace_setreg_pc(pid, same_watch_addr_load), "Failed to set child PC");
+ FAIL_IF_MSG(ptrace_cont(pid, 0), "Failed to continue child");
+
+ FAIL_IF_MSG(waitpid(pid, &status, 0) == -1, "Failed to wait for child");
+ FAIL_IF_MSG(!WIFSTOPPED(status), "Child is not stopped");
+ FAIL_IF_MSG(ptrace_getreg_pc(pid, &pc), "Failed to get child PC");
+ FAIL_IF_MSG(pc != same_watch_addr_load, "Child did not stop on load trap");
+ FAIL_IF_MSG(perf_read_counter(perf_fd, &perf_count), "Failed to read perf counter");
+ FAIL_IF_MSG(perf_count != 1, "perf counter should not have changed");
+
+ /* Continuing over the load should increment the perf counter */
+ FAIL_IF_MSG(ptrace_cont(pid, 0), "Failed to continue child");
+
+ FAIL_IF_MSG(waitpid(pid, &status, 0) == -1, "Failed to wait for child");
+ FAIL_IF_MSG(!WIFSTOPPED(status), "Child is not stopped");
+ FAIL_IF_MSG(ptrace_getreg_pc(pid, &pc), "Failed to get child PC");
+ FAIL_IF_MSG(pc != same_watch_addr_trap, "Child did not stop on end trap");
+ FAIL_IF_MSG(perf_read_counter(perf_fd, &perf_count), "Failed to read perf counter");
+ FAIL_IF_MSG(perf_count != 2, "perf counter did not increment");
+
+ /*
+ * If we set the child PC back to the load instruction, then continue,
+ * we should reach the end trap (because ptrace is one-shot) and have
+ * another perf event.
+ */
+ FAIL_IF_MSG(ptrace_setreg_pc(pid, same_watch_addr_load), "Failed to set child PC");
+ FAIL_IF_MSG(ptrace_cont(pid, 0), "Failed to continue child");
+
+ FAIL_IF_MSG(waitpid(pid, &status, 0) == -1, "Failed to wait for child");
+ FAIL_IF_MSG(!WIFSTOPPED(status), "Child is not stopped");
+ FAIL_IF_MSG(ptrace_getreg_pc(pid, &pc), "Failed to get child PC");
+ FAIL_IF_MSG(pc != same_watch_addr_trap, "Child did not stop on end trap");
+ FAIL_IF_MSG(perf_read_counter(perf_fd, &perf_count), "Failed to read perf counter");
+ FAIL_IF_MSG(perf_count != 3, "perf counter did not increment");
+
+ /*
+ * If we set the child PC back to the load instruction, set a ptrace
+ * watchpoint on the load, then continue, we should immediately get
+ * the ptrace trap without incrementing the perf counter
+ */
+ FAIL_IF_MSG(ppc_ptrace_delhwdbg(pid, bp_id), "Failed to remove old ptrace watchpoint");
+ bp_id = ppc_ptrace_sethwdbg(pid, &bp_info);
+ FAIL_IF_MSG(bp_id < 0, "Failed to set ptrace watchpoint");
+ FAIL_IF_MSG(ptrace_setreg_pc(pid, same_watch_addr_load), "Failed to set child PC");
+ FAIL_IF_MSG(ptrace_cont(pid, 0), "Failed to continue child");
+
+ FAIL_IF_MSG(waitpid(pid, &status, 0) == -1, "Failed to wait for child");
+ FAIL_IF_MSG(!WIFSTOPPED(status), "Child is not stopped");
+ FAIL_IF_MSG(ptrace_getreg_pc(pid, &pc), "Failed to get child PC");
+ FAIL_IF_MSG(pc != same_watch_addr_load, "Child did not stop on load instruction");
+ FAIL_IF_MSG(perf_read_counter(perf_fd, &perf_count), "Failed to read perf counter");
+ FAIL_IF_MSG(perf_count != 3, "perf counter should not have changed");
+
+ /*
+ * If we change the PC while stopped on the load instruction, we should
+ * not increment the perf counter (because ptrace is before-execute,
+ * perf is after-execute).
+ */
+ FAIL_IF_MSG(ptrace_setreg_pc(pid, same_watch_addr_load + 4), "Failed to set child PC");
+ FAIL_IF_MSG(ptrace_cont(pid, 0), "Failed to continue child");
+
+ FAIL_IF_MSG(waitpid(pid, &status, 0) == -1, "Failed to wait for child");
+ FAIL_IF_MSG(!WIFSTOPPED(status), "Child is not stopped");
+ FAIL_IF_MSG(ptrace_getreg_pc(pid, &pc), "Failed to get child PC");
+ FAIL_IF_MSG(pc != same_watch_addr_trap, "Child did not stop on end trap");
+ FAIL_IF_MSG(perf_read_counter(perf_fd, &perf_count), "Failed to read perf counter");
+ FAIL_IF_MSG(perf_count != 3, "perf counter should not have changed");
+
+ /* Clean up child */
+ FAIL_IF_MSG(kill(pid, SIGKILL) != 0, "Failed to kill child");
+
+ return 0;
+}
+
+/*
+ * Tests the interaction between ptrace and perf when:
+ * 1. perf watches a value
+ * 2. ptrace watches a different value
+ * 3. The perf value is read, then the ptrace value is read immediately after
+ *
+ * A breakpoint implementation may accidentally misattribute/skip one of
+ * the ptrace or perf handlers, as interrupt based work is done after perf
+ * and before ptrace.
+ *
+ * We expect the perf counter to increment before the ptrace watchpoint
+ * triggers.
+ */
+int perf_then_ptrace_test(void)
+{
+ struct ppc_hw_breakpoint bp_info; /* ptrace breakpoint info */
+ int bp_id; /* Breakpoint handle of ptrace watchpoint */
+ int perf_fd; /* File descriptor of perf performance counter */
+ u64 perf_count; /* Most recently fetched perf performance counter value */
+ pid_t pid; /* PID of child process */
+ void *pc; /* Most recently fetched child PC value */
+ int status; /* Stop status of child after waitpid */
+ unsigned long perf_value; /* Dummy value to be watched by perf */
+ unsigned long ptrace_value; /* Dummy value to be watched by ptrace */
+ int err;
+
+ err = ptrace_fork_child(&pid);
+ if (err)
+ return err;
+
+ /*
+ * If we are the child, run a subroutine that reads the perf value,
+ * then reads the ptrace value with consecutive load instructions
+ */
+ if (!pid) {
+ perf_then_ptrace_child(&perf_value, &ptrace_value);
+ exit(0);
+ }
+
+ err = check_watchpoints(pid);
+ if (err)
+ return err;
+
+ /* Place a perf watchpoint counter */
+ perf_fd = perf_watchpoint_open(pid, &perf_value, sizeof(perf_value));
+ FAIL_IF_MSG(perf_fd < 0, "Failed to open perf performance counter");
+
+ /* Place a ptrace watchpoint */
+ ppc_ptrace_init_breakpoint(&bp_info, PPC_BREAKPOINT_TRIGGER_READ,
+ &ptrace_value, sizeof(ptrace_value));
+ bp_id = ppc_ptrace_sethwdbg(pid, &bp_info);
+ FAIL_IF_MSG(bp_id < 0, "Failed to set ptrace watchpoint");
+
+ /* Let the child run. It should stop on the ptrace watchpoint */
+ FAIL_IF_MSG(ptrace_cont(pid, 0), "Failed to continue child");
+
+ FAIL_IF_MSG(waitpid(pid, &status, 0) == -1, "Failed to wait for child");
+ FAIL_IF_MSG(!WIFSTOPPED(status), "Child is not stopped");
+ FAIL_IF_MSG(ptrace_getreg_pc(pid, &pc), "Failed to get child PC");
+ FAIL_IF_MSG(pc != perf_then_ptrace_load2, "Child did not stop on ptrace load");
+
+ /* perf should have recorded the first load */
+ FAIL_IF_MSG(perf_read_counter(perf_fd, &perf_count), "Failed to read perf counter");
+ FAIL_IF_MSG(perf_count != 1, "perf counter did not increment");
+
+ /* Clean up child */
+ FAIL_IF_MSG(kill(pid, SIGKILL) != 0, "Failed to kill child");
+
+ return 0;
}
int main(int argc, char *argv[])
{
- return test_harness(ptrace_perf_hwbreak, "ptrace-perf-hwbreak");
+ int err = 0;
+
+ err |= test_harness(same_watch_addr_test, "same_watch_addr");
+ err |= test_harness(perf_then_ptrace_test, "perf_then_ptrace");
+
+ return err;
}
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c b/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c
index bc454f8..d894743 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c
@@ -192,7 +192,7 @@ static int parent(struct shared_info *info, pid_t pid)
* to the child.
*/
ret = ptrace_read_regs(pid, NT_PPC_PKEY, regs, 3);
- PARENT_SKIP_IF_UNSUPPORTED(ret, &info->child_sync);
+ PARENT_SKIP_IF_UNSUPPORTED(ret, &info->child_sync, "PKEYs not supported");
PARENT_FAIL_IF(ret, &info->child_sync);
info->amr1 = info->amr2 = regs[0];
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tar.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tar.c
index 4436ca9..14726c7 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-tar.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tar.c
@@ -79,7 +79,7 @@ int ptrace_tar(void)
int ret, status;
// TAR was added in v2.07
- SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07));
+ SKIP_IF_MSG(!have_hwcap2(PPC_FEATURE2_ARCH_2_07), "TAR requires ISA 2.07 compatible hardware");
shm_id = shmget(IPC_PRIVATE, sizeof(int) * 3, 0777|IPC_CREAT);
pid = fork();
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-gpr.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-gpr.c
index 5dc152b..7c70d62 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-gpr.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-gpr.c
@@ -112,8 +112,8 @@ int ptrace_tm_gpr(void)
pid_t pid;
int ret, status;
- SKIP_IF(!have_htm());
- SKIP_IF(htm_is_synthetic());
+ SKIP_IF_MSG(!have_htm(), "Don't have transactional memory");
+ SKIP_IF_MSG(htm_is_synthetic(), "Transactional memory is synthetic");
shm_id = shmget(IPC_PRIVATE, sizeof(int) * 2, 0777|IPC_CREAT);
pid = fork();
if (pid < 0) {
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c
index 458cc1a..6c17ed0 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c
@@ -118,8 +118,8 @@ int ptrace_tm_spd_gpr(void)
pid_t pid;
int ret, status;
- SKIP_IF(!have_htm());
- SKIP_IF(htm_is_synthetic());
+ SKIP_IF_MSG(!have_htm(), "Don't have transactional memory");
+ SKIP_IF_MSG(htm_is_synthetic(), "Transactional memory is synthetic");
shm_id = shmget(IPC_PRIVATE, sizeof(int) * 3, 0777|IPC_CREAT);
pid = fork();
if (pid < 0) {
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-tar.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-tar.c
index e112a34..afd8dc2 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-tar.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-tar.c
@@ -128,8 +128,8 @@ int ptrace_tm_spd_tar(void)
pid_t pid;
int ret, status;
- SKIP_IF(!have_htm());
- SKIP_IF(htm_is_synthetic());
+ SKIP_IF_MSG(!have_htm(), "Don't have transactional memory");
+ SKIP_IF_MSG(htm_is_synthetic(), "Transactional memory is synthetic");
shm_id = shmget(IPC_PRIVATE, sizeof(int) * 3, 0777|IPC_CREAT);
pid = fork();
if (pid == 0)
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c
index 40133d4..14d2fac 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c
@@ -128,8 +128,8 @@ int ptrace_tm_spd_vsx(void)
pid_t pid;
int ret, status, i;
- SKIP_IF(!have_htm());
- SKIP_IF(htm_is_synthetic());
+ SKIP_IF_MSG(!have_htm(), "Don't have transactional memory");
+ SKIP_IF_MSG(htm_is_synthetic(), "Transactional memory is synthetic");
shm_id = shmget(IPC_PRIVATE, sizeof(int) * 3, 0777|IPC_CREAT);
for (i = 0; i < 128; i++) {
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spr.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spr.c
index 880ba6a..e64cdb0 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spr.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spr.c
@@ -113,8 +113,8 @@ int ptrace_tm_spr(void)
pid_t pid;
int ret, status;
- SKIP_IF(!have_htm());
- SKIP_IF(htm_is_synthetic());
+ SKIP_IF_MSG(!have_htm(), "Don't have transactional memory");
+ SKIP_IF_MSG(htm_is_synthetic(), "Transactional memory is synthetic");
shm_id = shmget(IPC_PRIVATE, sizeof(struct shared), 0777|IPC_CREAT);
shm_id1 = shmget(IPC_PRIVATE, sizeof(int), 0777|IPC_CREAT);
pid = fork();
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-tar.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-tar.c
index d0db6df..3963d4b0 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-tar.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-tar.c
@@ -116,8 +116,8 @@ int ptrace_tm_tar(void)
pid_t pid;
int ret, status;
- SKIP_IF(!have_htm());
- SKIP_IF(htm_is_synthetic());
+ SKIP_IF_MSG(!have_htm(), "Don't have transactional memory");
+ SKIP_IF_MSG(htm_is_synthetic(), "Transactional memory is synthetic");
shm_id = shmget(IPC_PRIVATE, sizeof(int) * 2, 0777|IPC_CREAT);
pid = fork();
if (pid == 0)
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c
index 4f05ce4..8c925d7 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c
@@ -112,8 +112,8 @@ int ptrace_tm_vsx(void)
pid_t pid;
int ret, status, i;
- SKIP_IF(!have_htm());
- SKIP_IF(htm_is_synthetic());
+ SKIP_IF_MSG(!have_htm(), "Don't have transactional memory");
+ SKIP_IF_MSG(htm_is_synthetic(), "Transactional memory is synthetic");
shm_id = shmget(IPC_PRIVATE, sizeof(int) * 2, 0777|IPC_CREAT);
for (i = 0; i < 128; i++) {
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-vsx.c b/tools/testing/selftests/powerpc/ptrace/ptrace-vsx.c
index cb9875f..11bc624 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-vsx.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-vsx.c
@@ -61,7 +61,7 @@ int ptrace_vsx(void)
pid_t pid;
int ret, status, i;
- SKIP_IF(!have_hwcap(PPC_FEATURE_HAS_VSX));
+ SKIP_IF_MSG(!have_hwcap(PPC_FEATURE_HAS_VSX), "Don't have VSX");
shm_id = shmget(IPC_PRIVATE, sizeof(int) * 2, 0777|IPC_CREAT);
diff --git a/tools/testing/selftests/powerpc/stringloops/asm/export.h b/tools/testing/selftests/powerpc/stringloops/linux/export.h
similarity index 100%
rename from tools/testing/selftests/powerpc/stringloops/asm/export.h
rename to tools/testing/selftests/powerpc/stringloops/linux/export.h
diff --git a/tools/testing/selftests/powerpc/vphn/asm/lppaca.h b/tools/testing/selftests/powerpc/vphn/asm/lppaca.h
deleted file mode 120000
index 942b1d0..0000000
--- a/tools/testing/selftests/powerpc/vphn/asm/lppaca.h
+++ /dev/null
@@ -1 +0,0 @@
-../../../../../../arch/powerpc/include/asm/lppaca.h
\ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/vphn/asm/vphn.h b/tools/testing/selftests/powerpc/vphn/asm/vphn.h
new file mode 120000
index 0000000..3a0b2a0
--- /dev/null
+++ b/tools/testing/selftests/powerpc/vphn/asm/vphn.h
@@ -0,0 +1 @@
+../../../../../../arch/powerpc/include/asm/vphn.h
\ No newline at end of file
diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
index 598135d..7e8c937 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -18,7 +18,7 @@
test_FCMOV test_FCOMI test_FISTTP \
vdso_restorer
TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip syscall_numbering \
- corrupt_xstate_header amx lam
+ corrupt_xstate_header amx lam test_shadow_stack
# Some selftests require 32bit support enabled also on 64bit systems
TARGETS_C_32BIT_NEEDED := ldt_gdt ptrace_syscall
diff --git a/tools/testing/selftests/x86/test_shadow_stack.c b/tools/testing/selftests/x86/test_shadow_stack.c
new file mode 100644
index 0000000..2188968
--- /dev/null
+++ b/tools/testing/selftests/x86/test_shadow_stack.c
@@ -0,0 +1,884 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This program test's basic kernel shadow stack support. It enables shadow
+ * stack manual via the arch_prctl(), instead of relying on glibc. It's
+ * Makefile doesn't compile with shadow stack support, so it doesn't rely on
+ * any particular glibc. As a result it can't do any operations that require
+ * special glibc shadow stack support (longjmp(), swapcontext(), etc). Just
+ * stick to the basics and hope the compiler doesn't do anything strange.
+ */
+
+#define _GNU_SOURCE
+
+#include <sys/syscall.h>
+#include <asm/mman.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <string.h>
+#include <errno.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include <asm/prctl.h>
+#include <sys/prctl.h>
+#include <stdint.h>
+#include <signal.h>
+#include <pthread.h>
+#include <sys/ioctl.h>
+#include <linux/userfaultfd.h>
+#include <setjmp.h>
+#include <sys/ptrace.h>
+#include <sys/signal.h>
+#include <linux/elf.h>
+
+/*
+ * Define the ABI defines if needed, so people can run the tests
+ * without building the headers.
+ */
+#ifndef __NR_map_shadow_stack
+#define __NR_map_shadow_stack 452
+
+#define SHADOW_STACK_SET_TOKEN (1ULL << 0)
+
+#define ARCH_SHSTK_ENABLE 0x5001
+#define ARCH_SHSTK_DISABLE 0x5002
+#define ARCH_SHSTK_LOCK 0x5003
+#define ARCH_SHSTK_UNLOCK 0x5004
+#define ARCH_SHSTK_STATUS 0x5005
+
+#define ARCH_SHSTK_SHSTK (1ULL << 0)
+#define ARCH_SHSTK_WRSS (1ULL << 1)
+
+#define NT_X86_SHSTK 0x204
+#endif
+
+#define SS_SIZE 0x200000
+#define PAGE_SIZE 0x1000
+
+#if (__GNUC__ < 8) || (__GNUC__ == 8 && __GNUC_MINOR__ < 5)
+int main(int argc, char *argv[])
+{
+ printf("[SKIP]\tCompiler does not support CET.\n");
+ return 0;
+}
+#else
+void write_shstk(unsigned long *addr, unsigned long val)
+{
+ asm volatile("wrssq %[val], (%[addr])\n"
+ : "=m" (addr)
+ : [addr] "r" (addr), [val] "r" (val));
+}
+
+static inline unsigned long __attribute__((always_inline)) get_ssp(void)
+{
+ unsigned long ret = 0;
+
+ asm volatile("xor %0, %0; rdsspq %0" : "=r" (ret));
+ return ret;
+}
+
+/*
+ * For use in inline enablement of shadow stack.
+ *
+ * The program can't return from the point where shadow stack gets enabled
+ * because there will be no address on the shadow stack. So it can't use
+ * syscall() for enablement, since it is a function.
+ *
+ * Based on code from nolibc.h. Keep a copy here because this can't pull in all
+ * of nolibc.h.
+ */
+#define ARCH_PRCTL(arg1, arg2) \
+({ \
+ long _ret; \
+ register long _num asm("eax") = __NR_arch_prctl; \
+ register long _arg1 asm("rdi") = (long)(arg1); \
+ register long _arg2 asm("rsi") = (long)(arg2); \
+ \
+ asm volatile ( \
+ "syscall\n" \
+ : "=a"(_ret) \
+ : "r"(_arg1), "r"(_arg2), \
+ "0"(_num) \
+ : "rcx", "r11", "memory", "cc" \
+ ); \
+ _ret; \
+})
+
+void *create_shstk(void *addr)
+{
+ return (void *)syscall(__NR_map_shadow_stack, addr, SS_SIZE, SHADOW_STACK_SET_TOKEN);
+}
+
+void *create_normal_mem(void *addr)
+{
+ return mmap(addr, SS_SIZE, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
+}
+
+void free_shstk(void *shstk)
+{
+ munmap(shstk, SS_SIZE);
+}
+
+int reset_shstk(void *shstk)
+{
+ return madvise(shstk, SS_SIZE, MADV_DONTNEED);
+}
+
+void try_shstk(unsigned long new_ssp)
+{
+ unsigned long ssp;
+
+ printf("[INFO]\tnew_ssp = %lx, *new_ssp = %lx\n",
+ new_ssp, *((unsigned long *)new_ssp));
+
+ ssp = get_ssp();
+ printf("[INFO]\tchanging ssp from %lx to %lx\n", ssp, new_ssp);
+
+ asm volatile("rstorssp (%0)\n":: "r" (new_ssp));
+ asm volatile("saveprevssp");
+ printf("[INFO]\tssp is now %lx\n", get_ssp());
+
+ /* Switch back to original shadow stack */
+ ssp -= 8;
+ asm volatile("rstorssp (%0)\n":: "r" (ssp));
+ asm volatile("saveprevssp");
+}
+
+int test_shstk_pivot(void)
+{
+ void *shstk = create_shstk(0);
+
+ if (shstk == MAP_FAILED) {
+ printf("[FAIL]\tError creating shadow stack: %d\n", errno);
+ return 1;
+ }
+ try_shstk((unsigned long)shstk + SS_SIZE - 8);
+ free_shstk(shstk);
+
+ printf("[OK]\tShadow stack pivot\n");
+ return 0;
+}
+
+int test_shstk_faults(void)
+{
+ unsigned long *shstk = create_shstk(0);
+
+ /* Read shadow stack, test if it's zero to not get read optimized out */
+ if (*shstk != 0)
+ goto err;
+
+ /* Wrss memory that was already read. */
+ write_shstk(shstk, 1);
+ if (*shstk != 1)
+ goto err;
+
+ /* Page out memory, so we can wrss it again. */
+ if (reset_shstk((void *)shstk))
+ goto err;
+
+ write_shstk(shstk, 1);
+ if (*shstk != 1)
+ goto err;
+
+ printf("[OK]\tShadow stack faults\n");
+ return 0;
+
+err:
+ return 1;
+}
+
+unsigned long saved_ssp;
+unsigned long saved_ssp_val;
+volatile bool segv_triggered;
+
+void __attribute__((noinline)) violate_ss(void)
+{
+ saved_ssp = get_ssp();
+ saved_ssp_val = *(unsigned long *)saved_ssp;
+
+ /* Corrupt shadow stack */
+ printf("[INFO]\tCorrupting shadow stack\n");
+ write_shstk((void *)saved_ssp, 0);
+}
+
+void segv_handler(int signum, siginfo_t *si, void *uc)
+{
+ printf("[INFO]\tGenerated shadow stack violation successfully\n");
+
+ segv_triggered = true;
+
+ /* Fix shadow stack */
+ write_shstk((void *)saved_ssp, saved_ssp_val);
+}
+
+int test_shstk_violation(void)
+{
+ struct sigaction sa = {};
+
+ sa.sa_sigaction = segv_handler;
+ sa.sa_flags = SA_SIGINFO;
+ if (sigaction(SIGSEGV, &sa, NULL))
+ return 1;
+
+ segv_triggered = false;
+
+ /* Make sure segv_triggered is set before violate_ss() */
+ asm volatile("" : : : "memory");
+
+ violate_ss();
+
+ signal(SIGSEGV, SIG_DFL);
+
+ printf("[OK]\tShadow stack violation test\n");
+
+ return !segv_triggered;
+}
+
+/* Gup test state */
+#define MAGIC_VAL 0x12345678
+bool is_shstk_access;
+void *shstk_ptr;
+int fd;
+
+void reset_test_shstk(void *addr)
+{
+ if (shstk_ptr)
+ free_shstk(shstk_ptr);
+ shstk_ptr = create_shstk(addr);
+}
+
+void test_access_fix_handler(int signum, siginfo_t *si, void *uc)
+{
+ printf("[INFO]\tViolation from %s\n", is_shstk_access ? "shstk access" : "normal write");
+
+ segv_triggered = true;
+
+ /* Fix shadow stack */
+ if (is_shstk_access) {
+ reset_test_shstk(shstk_ptr);
+ return;
+ }
+
+ free_shstk(shstk_ptr);
+ create_normal_mem(shstk_ptr);
+}
+
+bool test_shstk_access(void *ptr)
+{
+ is_shstk_access = true;
+ segv_triggered = false;
+ write_shstk(ptr, MAGIC_VAL);
+
+ asm volatile("" : : : "memory");
+
+ return segv_triggered;
+}
+
+bool test_write_access(void *ptr)
+{
+ is_shstk_access = false;
+ segv_triggered = false;
+ *(unsigned long *)ptr = MAGIC_VAL;
+
+ asm volatile("" : : : "memory");
+
+ return segv_triggered;
+}
+
+bool gup_write(void *ptr)
+{
+ unsigned long val;
+
+ lseek(fd, (unsigned long)ptr, SEEK_SET);
+ if (write(fd, &val, sizeof(val)) < 0)
+ return 1;
+
+ return 0;
+}
+
+bool gup_read(void *ptr)
+{
+ unsigned long val;
+
+ lseek(fd, (unsigned long)ptr, SEEK_SET);
+ if (read(fd, &val, sizeof(val)) < 0)
+ return 1;
+
+ return 0;
+}
+
+int test_gup(void)
+{
+ struct sigaction sa = {};
+ int status;
+ pid_t pid;
+
+ sa.sa_sigaction = test_access_fix_handler;
+ sa.sa_flags = SA_SIGINFO;
+ if (sigaction(SIGSEGV, &sa, NULL))
+ return 1;
+
+ segv_triggered = false;
+
+ fd = open("/proc/self/mem", O_RDWR);
+ if (fd == -1)
+ return 1;
+
+ reset_test_shstk(0);
+ if (gup_read(shstk_ptr))
+ return 1;
+ if (test_shstk_access(shstk_ptr))
+ return 1;
+ printf("[INFO]\tGup read -> shstk access success\n");
+
+ reset_test_shstk(0);
+ if (gup_write(shstk_ptr))
+ return 1;
+ if (test_shstk_access(shstk_ptr))
+ return 1;
+ printf("[INFO]\tGup write -> shstk access success\n");
+
+ reset_test_shstk(0);
+ if (gup_read(shstk_ptr))
+ return 1;
+ if (!test_write_access(shstk_ptr))
+ return 1;
+ printf("[INFO]\tGup read -> write access success\n");
+
+ reset_test_shstk(0);
+ if (gup_write(shstk_ptr))
+ return 1;
+ if (!test_write_access(shstk_ptr))
+ return 1;
+ printf("[INFO]\tGup write -> write access success\n");
+
+ close(fd);
+
+ /* COW/gup test */
+ reset_test_shstk(0);
+ pid = fork();
+ if (!pid) {
+ fd = open("/proc/self/mem", O_RDWR);
+ if (fd == -1)
+ exit(1);
+
+ if (gup_write(shstk_ptr)) {
+ close(fd);
+ exit(1);
+ }
+ close(fd);
+ exit(0);
+ }
+ waitpid(pid, &status, 0);
+ if (WEXITSTATUS(status)) {
+ printf("[FAIL]\tWrite in child failed\n");
+ return 1;
+ }
+ if (*(unsigned long *)shstk_ptr == MAGIC_VAL) {
+ printf("[FAIL]\tWrite in child wrote through to shared memory\n");
+ return 1;
+ }
+
+ printf("[INFO]\tCow gup write -> write access success\n");
+
+ free_shstk(shstk_ptr);
+
+ signal(SIGSEGV, SIG_DFL);
+
+ printf("[OK]\tShadow gup test\n");
+
+ return 0;
+}
+
+int test_mprotect(void)
+{
+ struct sigaction sa = {};
+
+ sa.sa_sigaction = test_access_fix_handler;
+ sa.sa_flags = SA_SIGINFO;
+ if (sigaction(SIGSEGV, &sa, NULL))
+ return 1;
+
+ segv_triggered = false;
+
+ /* mprotect a shadow stack as read only */
+ reset_test_shstk(0);
+ if (mprotect(shstk_ptr, SS_SIZE, PROT_READ) < 0) {
+ printf("[FAIL]\tmprotect(PROT_READ) failed\n");
+ return 1;
+ }
+
+ /* try to wrss it and fail */
+ if (!test_shstk_access(shstk_ptr)) {
+ printf("[FAIL]\tShadow stack access to read-only memory succeeded\n");
+ return 1;
+ }
+
+ /*
+ * The shadow stack was reset above to resolve the fault, make the new one
+ * read-only.
+ */
+ if (mprotect(shstk_ptr, SS_SIZE, PROT_READ) < 0) {
+ printf("[FAIL]\tmprotect(PROT_READ) failed\n");
+ return 1;
+ }
+
+ /* then back to writable */
+ if (mprotect(shstk_ptr, SS_SIZE, PROT_WRITE | PROT_READ) < 0) {
+ printf("[FAIL]\tmprotect(PROT_WRITE) failed\n");
+ return 1;
+ }
+
+ /* then wrss to it and succeed */
+ if (test_shstk_access(shstk_ptr)) {
+ printf("[FAIL]\tShadow stack access to mprotect() writable memory failed\n");
+ return 1;
+ }
+
+ free_shstk(shstk_ptr);
+
+ signal(SIGSEGV, SIG_DFL);
+
+ printf("[OK]\tmprotect() test\n");
+
+ return 0;
+}
+
+char zero[4096];
+
+static void *uffd_thread(void *arg)
+{
+ struct uffdio_copy req;
+ int uffd = *(int *)arg;
+ struct uffd_msg msg;
+ int ret;
+
+ while (1) {
+ ret = read(uffd, &msg, sizeof(msg));
+ if (ret > 0)
+ break;
+ else if (errno == EAGAIN)
+ continue;
+ return (void *)1;
+ }
+
+ req.dst = msg.arg.pagefault.address;
+ req.src = (__u64)zero;
+ req.len = 4096;
+ req.mode = 0;
+
+ if (ioctl(uffd, UFFDIO_COPY, &req))
+ return (void *)1;
+
+ return (void *)0;
+}
+
+int test_userfaultfd(void)
+{
+ struct uffdio_register uffdio_register;
+ struct uffdio_api uffdio_api;
+ struct sigaction sa = {};
+ pthread_t thread;
+ void *res;
+ int uffd;
+
+ sa.sa_sigaction = test_access_fix_handler;
+ sa.sa_flags = SA_SIGINFO;
+ if (sigaction(SIGSEGV, &sa, NULL))
+ return 1;
+
+ uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
+ if (uffd < 0) {
+ printf("[SKIP]\tUserfaultfd unavailable.\n");
+ return 0;
+ }
+
+ reset_test_shstk(0);
+
+ uffdio_api.api = UFFD_API;
+ uffdio_api.features = 0;
+ if (ioctl(uffd, UFFDIO_API, &uffdio_api))
+ goto err;
+
+ uffdio_register.range.start = (__u64)shstk_ptr;
+ uffdio_register.range.len = 4096;
+ uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
+ if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register))
+ goto err;
+
+ if (pthread_create(&thread, NULL, &uffd_thread, &uffd))
+ goto err;
+
+ reset_shstk(shstk_ptr);
+ test_shstk_access(shstk_ptr);
+
+ if (pthread_join(thread, &res))
+ goto err;
+
+ if (test_shstk_access(shstk_ptr))
+ goto err;
+
+ free_shstk(shstk_ptr);
+
+ signal(SIGSEGV, SIG_DFL);
+
+ if (!res)
+ printf("[OK]\tUserfaultfd test\n");
+ return !!res;
+err:
+ free_shstk(shstk_ptr);
+ close(uffd);
+ signal(SIGSEGV, SIG_DFL);
+ return 1;
+}
+
+/* Simple linked list for keeping track of mappings in test_guard_gap() */
+struct node {
+ struct node *next;
+ void *mapping;
+};
+
+/*
+ * This tests whether mmap will place other mappings in a shadow stack's guard
+ * gap. The steps are:
+ * 1. Finds an empty place by mapping and unmapping something.
+ * 2. Map a shadow stack in the middle of the known empty area.
+ * 3. Map a bunch of PAGE_SIZE mappings. These will use the search down
+ * direction, filling any gaps until it encounters the shadow stack's
+ * guard gap.
+ * 4. When a mapping lands below the shadow stack from step 2, then all
+ * of the above gaps are filled. The search down algorithm will have
+ * looked at the shadow stack gaps.
+ * 5. See if it landed in the gap.
+ */
+int test_guard_gap(void)
+{
+ void *free_area, *shstk, *test_map = (void *)0xFFFFFFFFFFFFFFFF;
+ struct node *head = NULL, *cur;
+
+ free_area = mmap(0, SS_SIZE * 3, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ munmap(free_area, SS_SIZE * 3);
+
+ shstk = create_shstk(free_area + SS_SIZE);
+ if (shstk == MAP_FAILED)
+ return 1;
+
+ while (test_map > shstk) {
+ test_map = mmap(0, PAGE_SIZE, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (test_map == MAP_FAILED)
+ return 1;
+ cur = malloc(sizeof(*cur));
+ cur->mapping = test_map;
+
+ cur->next = head;
+ head = cur;
+ }
+
+ while (head) {
+ cur = head;
+ head = cur->next;
+ munmap(cur->mapping, PAGE_SIZE);
+ free(cur);
+ }
+
+ free_shstk(shstk);
+
+ if (shstk - test_map - PAGE_SIZE != PAGE_SIZE)
+ return 1;
+
+ printf("[OK]\tGuard gap test\n");
+
+ return 0;
+}
+
+/*
+ * Too complicated to pull it out of the 32 bit header, but also get the
+ * 64 bit one needed above. Just define a copy here.
+ */
+#define __NR_compat_sigaction 67
+
+/*
+ * Call 32 bit signal handler to get 32 bit signals ABI. Make sure
+ * to push the registers that will get clobbered.
+ */
+int sigaction32(int signum, const struct sigaction *restrict act,
+ struct sigaction *restrict oldact)
+{
+ register long syscall_reg asm("eax") = __NR_compat_sigaction;
+ register long signum_reg asm("ebx") = signum;
+ register long act_reg asm("ecx") = (long)act;
+ register long oldact_reg asm("edx") = (long)oldact;
+ int ret = 0;
+
+ asm volatile ("int $0x80;"
+ : "=a"(ret), "=m"(oldact)
+ : "r"(syscall_reg), "r"(signum_reg), "r"(act_reg),
+ "r"(oldact_reg)
+ : "r8", "r9", "r10", "r11"
+ );
+
+ return ret;
+}
+
+sigjmp_buf jmp_buffer;
+
+void segv_gp_handler(int signum, siginfo_t *si, void *uc)
+{
+ segv_triggered = true;
+
+ /*
+ * To work with old glibc, this can't rely on siglongjmp working with
+ * shadow stack enabled, so disable shadow stack before siglongjmp().
+ */
+ ARCH_PRCTL(ARCH_SHSTK_DISABLE, ARCH_SHSTK_SHSTK);
+ siglongjmp(jmp_buffer, -1);
+}
+
+/*
+ * Transition to 32 bit mode and check that a #GP triggers a segfault.
+ */
+int test_32bit(void)
+{
+ struct sigaction sa = {};
+ struct sigaction *sa32;
+
+ /* Create sigaction in 32 bit address range */
+ sa32 = mmap(0, 4096, PROT_READ | PROT_WRITE,
+ MAP_32BIT | MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
+ sa32->sa_flags = SA_SIGINFO;
+
+ sa.sa_sigaction = segv_gp_handler;
+ sa.sa_flags = SA_SIGINFO;
+ if (sigaction(SIGSEGV, &sa, NULL))
+ return 1;
+
+
+ segv_triggered = false;
+
+ /* Make sure segv_triggered is set before triggering the #GP */
+ asm volatile("" : : : "memory");
+
+ /*
+ * Set handler to somewhere in 32 bit address space
+ */
+ sa32->sa_handler = (void *)sa32;
+ if (sigaction32(SIGUSR1, sa32, NULL))
+ return 1;
+
+ if (!sigsetjmp(jmp_buffer, 1))
+ raise(SIGUSR1);
+
+ if (segv_triggered)
+ printf("[OK]\t32 bit test\n");
+
+ return !segv_triggered;
+}
+
+void segv_handler_ptrace(int signum, siginfo_t *si, void *uc)
+{
+ /* The SSP adjustment caused a segfault. */
+ exit(0);
+}
+
+int test_ptrace(void)
+{
+ unsigned long saved_ssp, ssp = 0;
+ struct sigaction sa= {};
+ struct iovec iov;
+ int status;
+ int pid;
+
+ iov.iov_base = &ssp;
+ iov.iov_len = sizeof(ssp);
+
+ pid = fork();
+ if (!pid) {
+ ssp = get_ssp();
+
+ sa.sa_sigaction = segv_handler_ptrace;
+ sa.sa_flags = SA_SIGINFO;
+ if (sigaction(SIGSEGV, &sa, NULL))
+ return 1;
+
+ ptrace(PTRACE_TRACEME, NULL, NULL, NULL);
+ /*
+ * The parent will tweak the SSP and return from this function
+ * will #CP.
+ */
+ raise(SIGTRAP);
+
+ exit(1);
+ }
+
+ while (waitpid(pid, &status, 0) != -1 && WSTOPSIG(status) != SIGTRAP);
+
+ if (ptrace(PTRACE_GETREGSET, pid, NT_X86_SHSTK, &iov)) {
+ printf("[INFO]\tFailed to PTRACE_GETREGS\n");
+ goto out_kill;
+ }
+
+ if (!ssp) {
+ printf("[INFO]\tPtrace child SSP was 0\n");
+ goto out_kill;
+ }
+
+ saved_ssp = ssp;
+
+ iov.iov_len = 0;
+ if (!ptrace(PTRACE_SETREGSET, pid, NT_X86_SHSTK, &iov)) {
+ printf("[INFO]\tToo small size accepted via PTRACE_SETREGS\n");
+ goto out_kill;
+ }
+
+ iov.iov_len = sizeof(ssp) + 1;
+ if (!ptrace(PTRACE_SETREGSET, pid, NT_X86_SHSTK, &iov)) {
+ printf("[INFO]\tToo large size accepted via PTRACE_SETREGS\n");
+ goto out_kill;
+ }
+
+ ssp += 1;
+ if (!ptrace(PTRACE_SETREGSET, pid, NT_X86_SHSTK, &iov)) {
+ printf("[INFO]\tUnaligned SSP written via PTRACE_SETREGS\n");
+ goto out_kill;
+ }
+
+ ssp = 0xFFFFFFFFFFFF0000;
+ if (!ptrace(PTRACE_SETREGSET, pid, NT_X86_SHSTK, &iov)) {
+ printf("[INFO]\tKernel range SSP written via PTRACE_SETREGS\n");
+ goto out_kill;
+ }
+
+ /*
+ * Tweak the SSP so the child with #CP when it resumes and returns
+ * from raise()
+ */
+ ssp = saved_ssp + 8;
+ iov.iov_len = sizeof(ssp);
+ if (ptrace(PTRACE_SETREGSET, pid, NT_X86_SHSTK, &iov)) {
+ printf("[INFO]\tFailed to PTRACE_SETREGS\n");
+ goto out_kill;
+ }
+
+ if (ptrace(PTRACE_DETACH, pid, NULL, NULL)) {
+ printf("[INFO]\tFailed to PTRACE_DETACH\n");
+ goto out_kill;
+ }
+
+ waitpid(pid, &status, 0);
+ if (WEXITSTATUS(status))
+ return 1;
+
+ printf("[OK]\tPtrace test\n");
+ return 0;
+
+out_kill:
+ kill(pid, SIGKILL);
+ return 1;
+}
+
+int main(int argc, char *argv[])
+{
+ int ret = 0;
+
+ if (ARCH_PRCTL(ARCH_SHSTK_ENABLE, ARCH_SHSTK_SHSTK)) {
+ printf("[SKIP]\tCould not enable Shadow stack\n");
+ return 1;
+ }
+
+ if (ARCH_PRCTL(ARCH_SHSTK_DISABLE, ARCH_SHSTK_SHSTK)) {
+ ret = 1;
+ printf("[FAIL]\tDisabling shadow stack failed\n");
+ }
+
+ if (ARCH_PRCTL(ARCH_SHSTK_ENABLE, ARCH_SHSTK_SHSTK)) {
+ printf("[SKIP]\tCould not re-enable Shadow stack\n");
+ return 1;
+ }
+
+ if (ARCH_PRCTL(ARCH_SHSTK_ENABLE, ARCH_SHSTK_WRSS)) {
+ printf("[SKIP]\tCould not enable WRSS\n");
+ ret = 1;
+ goto out;
+ }
+
+ /* Should have succeeded if here, but this is a test, so double check. */
+ if (!get_ssp()) {
+ printf("[FAIL]\tShadow stack disabled\n");
+ return 1;
+ }
+
+ if (test_shstk_pivot()) {
+ ret = 1;
+ printf("[FAIL]\tShadow stack pivot\n");
+ goto out;
+ }
+
+ if (test_shstk_faults()) {
+ ret = 1;
+ printf("[FAIL]\tShadow stack fault test\n");
+ goto out;
+ }
+
+ if (test_shstk_violation()) {
+ ret = 1;
+ printf("[FAIL]\tShadow stack violation test\n");
+ goto out;
+ }
+
+ if (test_gup()) {
+ ret = 1;
+ printf("[FAIL]\tShadow shadow stack gup\n");
+ goto out;
+ }
+
+ if (test_mprotect()) {
+ ret = 1;
+ printf("[FAIL]\tShadow shadow mprotect test\n");
+ goto out;
+ }
+
+ if (test_userfaultfd()) {
+ ret = 1;
+ printf("[FAIL]\tUserfaultfd test\n");
+ goto out;
+ }
+
+ if (test_guard_gap()) {
+ ret = 1;
+ printf("[FAIL]\tGuard gap test\n");
+ goto out;
+ }
+
+ if (test_ptrace()) {
+ ret = 1;
+ printf("[FAIL]\tptrace test\n");
+ }
+
+ if (test_32bit()) {
+ ret = 1;
+ printf("[FAIL]\t32 bit test\n");
+ goto out;
+ }
+
+ return ret;
+
+out:
+ /*
+ * Disable shadow stack before the function returns, or there will be a
+ * shadow stack violation.
+ */
+ if (ARCH_PRCTL(ARCH_SHSTK_DISABLE, ARCH_SHSTK_SHSTK)) {
+ ret = 1;
+ printf("[FAIL]\tDisabling shadow stack failed\n");
+ }
+
+ return ret;
+}
+#endif