Merge branches 'for-next/kpti', 'for-next/missing-proto-warn', 'for-next/iss2-decode', 'for-next/kselftest', 'for-next/misc', 'for-next/feat_mops', 'for-next/module-alloc', 'for-next/sysreg', 'for-next/cpucap', 'for-next/acpi', 'for-next/kdump', 'for-next/acpi-doc', 'for-next/doc' and 'for-next/tpidr2-fix', remote-tracking branch 'arm64/for-next/perf' into for-next/core

* arm64/for-next/perf:
  docs: perf: Fix warning from 'make htmldocs' in hisi-pmu.rst
  docs: perf: Add new description for HiSilicon UC PMU
  drivers/perf: hisi: Add support for HiSilicon UC PMU driver
  drivers/perf: hisi: Add support for HiSilicon H60PA and PAv3 PMU driver
  perf: arm_cspmu: Add missing MODULE_DEVICE_TABLE
  perf/arm-cmn: Add sysfs identifier
  perf/arm-cmn: Revamp model detection
  perf/arm_dmc620: Add cpumask
  dt-bindings: perf: fsl-imx-ddr: Add i.MX93 compatible
  drivers/perf: imx_ddr: Add support for NXP i.MX9 SoC DDRC PMU driver
  perf/arm_cspmu: Decouple APMT dependency
  perf/arm_cspmu: Clean up ACPI dependency
  ACPI/APMT: Don't register invalid resource
  perf/arm_cspmu: Fix event attribute type
  perf: arm_cspmu: Set irq affinitiy only if overflow interrupt is used
  drivers/perf: hisi: Don't migrate perf to the CPU going to teardown
  drivers/perf: apple_m1: Force 63bit counters for M2 CPUs
  perf/arm-cmn: Fix DTC reset
  perf: qcom_l2_pmu: Make l2_cache_pmu_probe_cluster() more robust
  perf/arm-cci: Slightly optimize cci_pmu_sync_counters()

* for-next/kpti:
  : Simplify KPTI trampoline exit code
  arm64: entry: Simplify tramp_alias macro and tramp_exit routine
  arm64: entry: Preserve/restore X29 even for compat tasks

* for-next/missing-proto-warn:
  : Address -Wmissing-prototype warnings
  arm64: add alt_cb_patch_nops prototype
  arm64: move early_brk64 prototype to header
  arm64: signal: include asm/exception.h
  arm64: kaslr: add kaslr_early_init() declaration
  arm64: flush: include linux/libnvdimm.h
  arm64: module-plts: inline linux/moduleloader.h
  arm64: hide unused is_valid_bugaddr()
  arm64: efi: add efi_handle_corrupted_x18 prototype
  arm64: cpuidle: fix #ifdef for acpi functions
  arm64: kvm: add prototypes for functions called in asm
  arm64: spectre: provide prototypes for internal functions
  arm64: move cpu_suspend_set_dbg_restorer() prototype to header
  arm64: avoid prototype warnings for syscalls
  arm64: add scs_patch_vmlinux prototype
  arm64: xor-neon: mark xor_arm64_neon_*() static

* for-next/iss2-decode:
  : Add decode of ISS2 to data abort reports
  arm64/esr: Add decode of ISS2 to data abort reporting
  arm64/esr: Use GENMASK() for the ISS mask

* for-next/kselftest:
  : Various arm64 kselftest improvements
  kselftest/arm64: Log signal code and address for unexpected signals
  kselftest/arm64: Add a smoke test for ptracing hardware break/watch points

* for-next/misc:
  : Miscellaneous patches
  arm64: alternatives: make clean_dcache_range_nopatch() noinstr-safe
  arm64: hibernate: remove WARN_ON in save_processor_state
  arm64/fpsimd: Exit streaming mode when flushing tasks
  arm64: mm: fix VA-range sanity check
  arm64/mm: remove now-superfluous ISBs from TTBR writes
  arm64: consolidate rox page protection logic
  arm64: set __exception_irq_entry with __irq_entry as a default
  arm64: syscall: unmask DAIF for tracing status
  arm64: lockdep: enable checks for held locks when returning to userspace
  arm64/cpucaps: increase string width to properly format cpucaps.h
  arm64/cpufeature: Use helper for ECV CNTPOFF cpufeature

* for-next/feat_mops:
  : Support for ARMv8.8 memcpy instructions in userspace
  kselftest/arm64: add MOPS to hwcap test
  arm64: mops: allow disabling MOPS from the kernel command line
  arm64: mops: detect and enable FEAT_MOPS
  arm64: mops: handle single stepping after MOPS exception
  arm64: mops: handle MOPS exceptions
  KVM: arm64: hide MOPS from guests
  arm64: mops: don't disable host MOPS instructions from EL2
  arm64: mops: document boot requirements for MOPS
  KVM: arm64: switch HCRX_EL2 between host and guest
  arm64: cpufeature: detect FEAT_HCX
  KVM: arm64: initialize HCRX_EL2

* for-next/module-alloc:
  : Make the arm64 module allocation code more robust (clean-up, VA range expansion)
  arm64: module: rework module VA range selection
  arm64: module: mandate MODULE_PLTS
  arm64: module: move module randomization to module.c
  arm64: kaslr: split kaslr/module initialization
  arm64: kasan: remove !KASAN_VMALLOC remnants
  arm64: module: remove old !KASAN_VMALLOC logic

* for-next/sysreg: (21 commits)
  : More sysreg conversions to automatic generation
  arm64/sysreg: Convert TRBIDR_EL1 register to automatic generation
  arm64/sysreg: Convert TRBTRG_EL1 register to automatic generation
  arm64/sysreg: Convert TRBMAR_EL1 register to automatic generation
  arm64/sysreg: Convert TRBSR_EL1 register to automatic generation
  arm64/sysreg: Convert TRBBASER_EL1 register to automatic generation
  arm64/sysreg: Convert TRBPTR_EL1 register to automatic generation
  arm64/sysreg: Convert TRBLIMITR_EL1 register to automatic generation
  arm64/sysreg: Rename TRBIDR_EL1 fields per auto-gen tools format
  arm64/sysreg: Rename TRBTRG_EL1 fields per auto-gen tools format
  arm64/sysreg: Rename TRBMAR_EL1 fields per auto-gen tools format
  arm64/sysreg: Rename TRBSR_EL1 fields per auto-gen tools format
  arm64/sysreg: Rename TRBBASER_EL1 fields per auto-gen tools format
  arm64/sysreg: Rename TRBPTR_EL1 fields per auto-gen tools format
  arm64/sysreg: Rename TRBLIMITR_EL1 fields per auto-gen tools format
  arm64/sysreg: Convert OSECCR_EL1 to automatic generation
  arm64/sysreg: Convert OSDTRTX_EL1 to automatic generation
  arm64/sysreg: Convert OSDTRRX_EL1 to automatic generation
  arm64/sysreg: Convert OSLAR_EL1 to automatic generation
  arm64/sysreg: Standardise naming of bitfield constants in OSL[AS]R_EL1
  arm64/sysreg: Convert MDSCR_EL1 to automatic register generation
  ...

* for-next/cpucap:
  : arm64 cpucap clean-up
  arm64: cpufeature: fold cpus_set_cap() into update_cpu_capabilities()
  arm64: cpufeature: use cpucap naming
  arm64: alternatives: use cpucap naming
  arm64: standardise cpucap bitmap names

* for-next/acpi:
  : Various arm64-related ACPI patches
  ACPI: bus: Consolidate all arm specific initialisation into acpi_arm_init()

* for-next/kdump:
  : Simplify the crashkernel reservation behaviour of crashkernel=X,high on arm64
  arm64: add kdump.rst into index.rst
  Documentation: add kdump.rst to present crashkernel reservation on arm64
  arm64: kdump: simplify the reservation behaviour of crashkernel=,high

* for-next/acpi-doc:
  : Update ACPI documentation for Arm systems
  Documentation/arm64: Update ACPI tables from BBR
  Documentation/arm64: Update references in arm-acpi
  Documentation/arm64: Update ARM and arch reference

* for-next/doc:
  : arm64 documentation updates
  Documentation/arm64: Add ptdump documentation

* for-next/tpidr2-fix:
  : Fix the TPIDR2_EL0 register restoring on sigreturn
  kselftest/arm64: Add a test case for TPIDR2 restore
  arm64/signal: Restore TPIDR2 register rather than memory state
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 9e5bab2..e01fbfd 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -429,6 +429,9 @@
 	arm64.nosme	[ARM64] Unconditionally disable Scalable Matrix
 			Extension support
 
+	arm64.nomops	[ARM64] Unconditionally disable Memory Copy and Memory
+			Set instructions support
+
 	ataflop=	[HW,M68k]
 
 	atarimouse=	[HW,MOUSE] Atari Mouse
diff --git a/Documentation/admin-guide/perf/hisi-pmu.rst b/Documentation/admin-guide/perf/hisi-pmu.rst
index 5469793..e0174d2 100644
--- a/Documentation/admin-guide/perf/hisi-pmu.rst
+++ b/Documentation/admin-guide/perf/hisi-pmu.rst
@@ -56,14 +56,14 @@
 For HiSilicon uncore PMU v2 whose identifier is 0x30, the topology is the same
 as PMU v1, but some new functions are added to the hardware.
 
-(a) L3C PMU supports filtering by core/thread within the cluster which can be
+1. L3C PMU supports filtering by core/thread within the cluster which can be
 specified as a bitmap::
 
   $# perf stat -a -e hisi_sccl3_l3c0/config=0x02,tt_core=0x3/ sleep 5
 
 This will only count the operations from core/thread 0 and 1 in this cluster.
 
-(b) Tracetag allow the user to chose to count only read, write or atomic
+2. Tracetag allow the user to chose to count only read, write or atomic
 operations via the tt_req parameeter in perf. The default value counts all
 operations. tt_req is 3bits, 3'b100 represents read operations, 3'b101
 represents write operations, 3'b110 represents atomic store operations and
@@ -73,14 +73,16 @@
 
 This will only count the read operations in this cluster.
 
-(c) Datasrc allows the user to check where the data comes from. It is 5 bits.
+3. Datasrc allows the user to check where the data comes from. It is 5 bits.
 Some important codes are as follows:
-5'b00001: comes from L3C in this die;
-5'b01000: comes from L3C in the cross-die;
-5'b01001: comes from L3C which is in another socket;
-5'b01110: comes from the local DDR;
-5'b01111: comes from the cross-die DDR;
-5'b10000: comes from cross-socket DDR;
+
+- 5'b00001: comes from L3C in this die;
+- 5'b01000: comes from L3C in the cross-die;
+- 5'b01001: comes from L3C which is in another socket;
+- 5'b01110: comes from the local DDR;
+- 5'b01111: comes from the cross-die DDR;
+- 5'b10000: comes from cross-socket DDR;
+
 etc, it is mainly helpful to find that the data source is nearest from the CPU
 cores. If datasrc_cfg is used in the multi-chips, the datasrc_skt shall be
 configured in perf command::
@@ -88,15 +90,25 @@
   $# perf stat -a -e hisi_sccl3_l3c0/config=0xb9,datasrc_cfg=0xE/,
   hisi_sccl3_l3c0/config=0xb9,datasrc_cfg=0xF/ sleep 5
 
-(d)Some HiSilicon SoCs encapsulate multiple CPU and IO dies. Each CPU die
+4. Some HiSilicon SoCs encapsulate multiple CPU and IO dies. Each CPU die
 contains several Compute Clusters (CCLs). The I/O dies are called Super I/O
 clusters (SICL) containing multiple I/O clusters (ICLs). Each CCL/ICL in the
 SoC has a unique ID. Each ID is 11bits, include a 6-bit SCCL-ID and 5-bit
 CCL/ICL-ID. For I/O die, the ICL-ID is followed by:
-5'b00000: I/O_MGMT_ICL;
-5'b00001: Network_ICL;
-5'b00011: HAC_ICL;
-5'b10000: PCIe_ICL;
+
+- 5'b00000: I/O_MGMT_ICL;
+- 5'b00001: Network_ICL;
+- 5'b00011: HAC_ICL;
+- 5'b10000: PCIe_ICL;
+
+5. uring_channel: UC PMU events 0x47~0x59 supports filtering by tx request
+uring channel. It is 2 bits. Some important codes are as follows:
+
+- 2'b11: count the events which sent to the uring_ext (MATA) channel;
+- 2'b01: is the same as 2'b11;
+- 2'b10: count the events which sent to the uring (non-MATA) channel;
+- 2'b00: default value, count the events which sent to the both uring and
+  uring_ext channel;
 
 Users could configure IDs to count data come from specific CCL/ICL, by setting
 srcid_cmd & srcid_msk, and data desitined for specific CCL/ICL by setting
diff --git a/Documentation/arm64/acpi_object_usage.rst b/Documentation/arm64/acpi_object_usage.rst
index 484ef96..1da2220 100644
--- a/Documentation/arm64/acpi_object_usage.rst
+++ b/Documentation/arm64/acpi_object_usage.rst
@@ -17,16 +17,37 @@
 
        -  Recommended: BERT, EINJ, ERST, HEST, PCCT, SSDT
 
-       -  Optional: BGRT, CPEP, CSRT, DBG2, DRTM, ECDT, FACS, FPDT, IBFT,
-          IORT, MCHI, MPST, MSCT, NFIT, PMTT, RASF, SBST, SLIT, SPMI, SRAT,
-          STAO, TCPA, TPM2, UEFI, XENV
+       -  Optional: AGDI, BGRT, CEDT, CPEP, CSRT, DBG2, DRTM, ECDT, FACS, FPDT,
+          HMAT, IBFT, IORT, MCHI, MPAM, MPST, MSCT, NFIT, PMTT, PPTT, RASF, SBST,
+          SDEI, SLIT, SPMI, SRAT, STAO, TCPA, TPM2, UEFI, XENV
 
-       -  Not supported: BOOT, DBGP, DMAR, ETDT, HPET, IVRS, LPIT, MSDM, OEMx,
-          PSDT, RSDT, SLIC, WAET, WDAT, WDRT, WPBT
+       -  Not supported: AEST, APMT, BOOT, DBGP, DMAR, ETDT, HPET, IVRS, LPIT,
+          MSDM, OEMx, PDTT, PSDT, RAS2, RSDT, SLIC, WAET, WDAT, WDRT, WPBT
 
 ====== ========================================================================
 Table  Usage for ARMv8 Linux
 ====== ========================================================================
+AEST   Signature Reserved (signature == "AEST")
+
+       **Arm Error Source Table**
+
+       This table informs the OS of any error nodes in the system that are
+       compliant with the Arm RAS architecture.
+
+AGDI   Signature Reserved (signature == "AGDI")
+
+       **Arm Generic diagnostic Dump and Reset Device Interface Table**
+
+       This table describes a non-maskable event, that is used by the platform
+       firmware, to request the OS to generate a diagnostic dump and reset the device.
+
+APMT   Signature Reserved (signature == "APMT")
+
+       **Arm Performance Monitoring Table**
+
+       This table describes the properties of PMU support implmented by
+       components in the system.
+
 BERT   Section 18.3 (signature == "BERT")
 
        **Boot Error Record Table**
@@ -47,6 +68,13 @@
        Optional, not currently supported, with no real use-case for an
        ARM server.
 
+CEDT   Signature Reserved (signature == "CEDT")
+
+       **CXL Early Discovery Table**
+
+       This table allows the OS to discover any CXL Host Bridges and the Host
+       Bridge registers.
+
 CPEP   Section 5.2.18 (signature == "CPEP")
 
        **Corrected Platform Error Polling table**
@@ -184,6 +212,15 @@
        Must be supplied if RAS support is provided by the platform.  It
        is recommended this table be supplied.
 
+HMAT   Section 5.2.28 (signature == "HMAT")
+
+       **Heterogeneous Memory Attribute Table**
+
+       This table describes the memory attributes, such as memory side cache
+       attributes and bandwidth and latency details, related to Memory Proximity
+       Domains. The OS uses this information to optimize the system memory
+       configuration.
+
 HPET   Signature Reserved (signature == "HPET")
 
        **High Precision Event timer Table**
@@ -241,6 +278,13 @@
 
        Optional, not currently supported.
 
+MPAM   Signature Reserved (signature == "MPAM")
+
+       **Memory Partitioning And Monitoring table**
+
+       This table allows the OS to discover the MPAM controls implemented by
+       the subsystems.
+
 MPST   Section 5.2.21 (signature == "MPST")
 
        **Memory Power State Table**
@@ -281,18 +325,39 @@
        Recommend for use on arm64; use of PCC is recommended when using CPPC
        to control performance and power for platform processors.
 
+PDTT   Section 5.2.29 (signature == "PDTT")
+
+       **Platform Debug Trigger Table**
+
+       This table describes PCC channels used to gather debug logs of
+       non-architectural features.
+
+
 PMTT   Section 5.2.21.12 (signature == "PMTT")
 
        **Platform Memory Topology Table**
 
        Optional, not currently supported.
 
+PPTT   Section 5.2.30 (signature == "PPTT")
+
+       **Processor Properties Topology Table**
+
+       This table provides the processor and cache topology.
+
 PSDT   Section 5.2.11.3 (signature == "PSDT")
 
        **Persistent System Description Table**
 
        Obsolete table, will not be supported.
 
+RAS2   Section 5.2.21 (signature == "RAS2")
+
+       **RAS Features 2 table**
+
+       This table provides interfaces for the RAS capabilities implemented in
+       the platform.
+
 RASF   Section 5.2.20 (signature == "RASF")
 
        **RAS Feature table**
@@ -318,6 +383,12 @@
 
        Optional, not currently supported.
 
+SDEI   Signature Reserved (signature == "SDEI")
+
+       **Software Delegated Exception Interface table**
+
+       This table advertises the presence of the SDEI interface.
+
 SLIC   Signature Reserved (signature == "SLIC")
 
        **Software LIcensing table**
diff --git a/Documentation/arm64/arm-acpi.rst b/Documentation/arm64/arm-acpi.rst
index 47ecb99..37ec5e9 100644
--- a/Documentation/arm64/arm-acpi.rst
+++ b/Documentation/arm64/arm-acpi.rst
@@ -1,40 +1,41 @@
-=====================
-ACPI on ARMv8 Servers
-=====================
+===================
+ACPI on Arm systems
+===================
 
-ACPI can be used for ARMv8 general purpose servers designed to follow
-the ARM SBSA (Server Base System Architecture) [0] and SBBR (Server
-Base Boot Requirements) [1] specifications.  Please note that the SBBR
-can be retrieved simply by visiting [1], but the SBSA is currently only
-available to those with an ARM login due to ARM IP licensing concerns.
+ACPI can be used for Armv8 and Armv9 systems designed to follow
+the BSA (Arm Base System Architecture) [0] and BBR (Arm
+Base Boot Requirements) [1] specifications.  Both BSA and BBR are publicly
+accessible documents.
+Arm Servers, in addition to being BSA compliant, comply with a set
+of rules defined in SBSA (Server Base System Architecture) [2].
 
-The ARMv8 kernel implements the reduced hardware model of ACPI version
+The Arm kernel implements the reduced hardware model of ACPI version
 5.1 or later.  Links to the specification and all external documents
 it refers to are managed by the UEFI Forum.  The specification is
 available at http://www.uefi.org/specifications and documents referenced
 by the specification can be found via http://www.uefi.org/acpi.
 
-If an ARMv8 system does not meet the requirements of the SBSA and SBBR,
+If an Arm system does not meet the requirements of the BSA and BBR,
 or cannot be described using the mechanisms defined in the required ACPI
 specifications, then ACPI may not be a good fit for the hardware.
 
 While the documents mentioned above set out the requirements for building
-industry-standard ARMv8 servers, they also apply to more than one operating
+industry-standard Arm systems, they also apply to more than one operating
 system.  The purpose of this document is to describe the interaction between
-ACPI and Linux only, on an ARMv8 system -- that is, what Linux expects of
+ACPI and Linux only, on an Arm system -- that is, what Linux expects of
 ACPI and what ACPI can expect of Linux.
 
 
-Why ACPI on ARM?
+Why ACPI on Arm?
 ----------------
 Before examining the details of the interface between ACPI and Linux, it is
 useful to understand why ACPI is being used.  Several technologies already
 exist in Linux for describing non-enumerable hardware, after all.  In this
-section we summarize a blog post [2] from Grant Likely that outlines the
-reasoning behind ACPI on ARMv8 servers.  Actually, we snitch a good portion
+section we summarize a blog post [3] from Grant Likely that outlines the
+reasoning behind ACPI on Arm systems.  Actually, we snitch a good portion
 of the summary text almost directly, to be honest.
 
-The short form of the rationale for ACPI on ARM is:
+The short form of the rationale for ACPI on Arm is:
 
 -  ACPI’s byte code (AML) allows the platform to encode hardware behavior,
    while DT explicitly does not support this.  For hardware vendors, being
@@ -47,7 +48,7 @@
 
 -  In the enterprise server environment, ACPI has established bindings (such
    as for RAS) which are currently used in production systems.  DT does not.
-   Such bindings could be defined in DT at some point, but doing so means ARM
+   Such bindings could be defined in DT at some point, but doing so means Arm
    and x86 would end up using completely different code paths in both firmware
    and the kernel.
 
@@ -108,7 +109,7 @@
 
 Relationship with Device Tree
 -----------------------------
-ACPI support in drivers and subsystems for ARMv8 should never be mutually
+ACPI support in drivers and subsystems for Arm should never be mutually
 exclusive with DT support at compile time.
 
 At boot time the kernel will only use one description method depending on
@@ -121,11 +122,11 @@
 
 Booting using ACPI tables
 -------------------------
-The only defined method for passing ACPI tables to the kernel on ARMv8
+The only defined method for passing ACPI tables to the kernel on Arm
 is via the UEFI system configuration table.  Just so it is explicit, this
 means that ACPI is only supported on platforms that boot via UEFI.
 
-When an ARMv8 system boots, it can either have DT information, ACPI tables,
+When an Arm system boots, it can either have DT information, ACPI tables,
 or in some very unusual cases, both.  If no command line parameters are used,
 the kernel will try to use DT for device enumeration; if there is no DT
 present, the kernel will try to use ACPI tables, but only if they are present.
@@ -169,7 +170,7 @@
 
 For the ACPI core to operate properly, and in turn provide the information
 the kernel needs to configure devices, it expects to find the following
-tables (all section numbers refer to the ACPI 6.1 specification):
+tables (all section numbers refer to the ACPI 6.5 specification):
 
     -  RSDP (Root System Description Pointer), section 5.2.5
 
@@ -184,20 +185,76 @@
 
     -  GTDT (Generic Timer Description Table), section 5.2.24
 
+    -  PPTT (Processor Properties Topology Table), section 5.2.30
+
+    -  DBG2 (DeBuG port table 2), section 5.2.6, specifically Table 5-6.
+
+    -  APMT (Arm Performance Monitoring unit Table), section 5.2.6, specifically Table 5-6.
+
+    -  AGDI (Arm Generic diagnostic Dump and Reset Device Interface Table), section 5.2.6, specifically Table 5-6.
+
     -  If PCI is supported, the MCFG (Memory mapped ConFiGuration
-       Table), section 5.2.6, specifically Table 5-31.
+       Table), section 5.2.6, specifically Table 5-6.
 
     -  If booting without a console=<device> kernel parameter is
        supported, the SPCR (Serial Port Console Redirection table),
-       section 5.2.6, specifically Table 5-31.
+       section 5.2.6, specifically Table 5-6.
 
     -  If necessary to describe the I/O topology, SMMUs and GIC ITSs,
        the IORT (Input Output Remapping Table, section 5.2.6, specifically
-       Table 5-31).
+       Table 5-6).
 
-    -  If NUMA is supported, the SRAT (System Resource Affinity Table)
-       and SLIT (System Locality distance Information Table), sections
-       5.2.16 and 5.2.17, respectively.
+    -  If NUMA is supported, the following tables are required:
+
+       - SRAT (System Resource Affinity Table), section 5.2.16
+
+       - SLIT (System Locality distance Information Table), section 5.2.17
+
+    -  If NUMA is supported, and the system contains heterogeneous memory,
+       the HMAT (Heterogeneous Memory Attribute Table), section 5.2.28.
+
+    -  If the ACPI Platform Error Interfaces are required, the following
+       tables are conditionally required:
+
+       - BERT (Boot Error Record Table, section 18.3.1)
+
+       - EINJ (Error INJection table, section 18.6.1)
+
+       - ERST (Error Record Serialization Table, section 18.5)
+
+       - HEST (Hardware Error Source Table, section 18.3.2)
+
+       - SDEI (Software Delegated Exception Interface table, section 5.2.6,
+         specifically Table 5-6)
+
+       - AEST (Arm Error Source Table, section 5.2.6,
+         specifically Table 5-6)
+
+       - RAS2 (ACPI RAS2 feature table, section 5.2.21)
+
+    -  If the system contains controllers using PCC channel, the
+       PCCT (Platform Communications Channel Table), section 14.1
+
+    -  If the system contains a controller to capture board-level system state,
+       and communicates with the host via PCC, the PDTT (Platform Debug Trigger
+       Table), section 5.2.29.
+
+    -  If NVDIMM is supported, the NFIT (NVDIMM Firmware Interface Table), section 5.2.26
+
+    -  If video framebuffer is present, the BGRT (Boot Graphics Resource Table), section 5.2.23
+
+    -  If IPMI is implemented, the SPMI (Server Platform Management Interface),
+       section 5.2.6, specifically Table 5-6.
+
+    -  If the system contains a CXL Host Bridge, the CEDT (CXL Early Discovery
+       Table), section 5.2.6, specifically Table 5-6.
+
+    -  If the system supports MPAM, the MPAM (Memory Partitioning And Monitoring table), section 5.2.6,
+       specifically Table 5-6.
+
+    -  If the system lacks persistent storage, the IBFT (ISCSI Boot Firmware
+       Table), section 5.2.6, specifically Table 5-6.
+
 
 If the above tables are not all present, the kernel may or may not be
 able to boot properly since it may not be able to configure all of the
@@ -269,16 +326,14 @@
 object is described in the ACPI specification section 6.2.5, but this only
 describes how to define the structure of an object returned via _DSD, and
 how specific data structures are defined by specific UUIDs.  Linux should
-only use the _DSD Device Properties UUID [5]:
+only use the _DSD Device Properties UUID [4]:
 
    - UUID: daffd814-6eba-4d8c-8a91-bc9bbf4aa301
 
-   - https://www.uefi.org/sites/default/files/resources/_DSD-device-properties-UUID.pdf
-
-The UEFI Forum provides a mechanism for registering device properties [4]
-so that they may be used across all operating systems supporting ACPI.
-Device properties that have not been registered with the UEFI Forum should
-not be used.
+Common device properties can be registered by creating a pull request to [4] so
+that they may be used across all operating systems supporting ACPI.
+Device properties that have not been registered with the UEFI Forum can be used
+but not as "uefi-" common properties.
 
 Before creating new device properties, check to be sure that they have not
 been defined before and either registered in the Linux kernel documentation
@@ -306,7 +361,7 @@
 
 Once registration and review have been completed, the kernel provides an
 interface for looking up device properties in a manner independent of
-whether DT or ACPI is being used.  This API should be used [6]; it can
+whether DT or ACPI is being used.  This API should be used [5]; it can
 eliminate some duplication of code paths in driver probing functions and
 discourage divergence between DT bindings and ACPI device properties.
 
@@ -448,15 +503,15 @@
 ----
 The ACPI specification changes regularly.  During the year 2014, for instance,
 version 5.1 was released and version 6.0 substantially completed, with most of
-the changes being driven by ARM-specific requirements.  Proposed changes are
+the changes being driven by Arm-specific requirements.  Proposed changes are
 presented and discussed in the ASWG (ACPI Specification Working Group) which
 is a part of the UEFI Forum.  The current version of the ACPI specification
-is 6.1 release in January 2016.
+is 6.5 release in August 2022.
 
 Participation in this group is open to all UEFI members.  Please see
 http://www.uefi.org/workinggroup for details on group membership.
 
-It is the intent of the ARMv8 ACPI kernel code to follow the ACPI specification
+It is the intent of the Arm ACPI kernel code to follow the ACPI specification
 as closely as possible, and to only implement functionality that complies with
 the released standards from UEFI ASWG.  As a practical matter, there will be
 vendors that provide bad ACPI tables or violate the standards in some way.
@@ -470,12 +525,12 @@
 
 Linux Code
 ----------
-Individual items specific to Linux on ARM, contained in the Linux
+Individual items specific to Linux on Arm, contained in the Linux
 source code, are in the list that follows:
 
 ACPI_OS_NAME
                        This macro defines the string to be returned when
-                       an ACPI method invokes the _OS method.  On ARM64
+                       an ACPI method invokes the _OS method.  On Arm
                        systems, this macro will be "Linux" by default.
                        The command line parameter acpi_os=<string>
                        can be used to set it to some other value.  The
@@ -490,31 +545,23 @@
 
 References
 ----------
-[0] http://silver.arm.com
-    document ARM-DEN-0029, or newer:
-    "Server Base System Architecture", version 2.3, dated 27 Mar 2014
+[0] https://developer.arm.com/documentation/den0094/latest
+    document Arm-DEN-0094: "Arm Base System Architecture", version 1.0C, dated 6 Oct 2022
 
-[1] http://infocenter.arm.com/help/topic/com.arm.doc.den0044a/Server_Base_Boot_Requirements.pdf
-    Document ARM-DEN-0044A, or newer: "Server Base Boot Requirements, System
-    Software on ARM Platforms", dated 16 Aug 2014
+[1] https://developer.arm.com/documentation/den0044/latest
+    Document Arm-DEN-0044: "Arm Base Boot Requirements", version 2.0G, dated 15 Apr 2022
 
-[2] http://www.secretlab.ca/archives/151,
+[2] https://developer.arm.com/documentation/den0029/latest
+    Document Arm-DEN-0029: "Arm Server Base System Architecture", version 7.1, dated 06 Oct 2022
+
+[3] http://www.secretlab.ca/archives/151,
     10 Jan 2015, Copyright (c) 2015,
     Linaro Ltd., written by Grant Likely.
 
-[3] AMD ACPI for Seattle platform documentation
-    http://amd-dev.wpengine.netdna-cdn.com/wordpress/media/2012/10/Seattle_ACPI_Guide.pdf
+[4] _DSD (Device Specific Data) Implementation Guide
+    https://github.com/UEFI/DSD-Guide/blob/main/dsd-guide.pdf
 
-
-[4] http://www.uefi.org/acpi
-    please see the link for the "ACPI _DSD Device
-    Property Registry Instructions"
-
-[5] http://www.uefi.org/acpi
-    please see the link for the "_DSD (Device
-    Specific Data) Implementation Guide"
-
-[6] Kernel code for the unified device
+[5] Kernel code for the unified device
     property interface can be found in
     include/linux/property.h and drivers/base/property.c.
 
diff --git a/Documentation/arm64/booting.rst b/Documentation/arm64/booting.rst
index ffeccdd..b3bbf33 100644
--- a/Documentation/arm64/booting.rst
+++ b/Documentation/arm64/booting.rst
@@ -379,6 +379,12 @@
 
     - SMCR_EL2.EZT0 (bit 30) must be initialised to 0b1.
 
+  For CPUs with Memory Copy and Memory Set instructions (FEAT_MOPS):
+
+  - If the kernel is entered at EL1 and EL2 is present:
+
+    - HCRX_EL2.MSCEn (bit 11) must be initialised to 0b1.
+
 The requirements described above for CPU mode, caches, MMUs, architected
 timers, coherency and system registers apply to all CPUs.  All CPUs must
 enter the kernel in the same exception level.  Where the values documented
diff --git a/Documentation/arm64/cpu-feature-registers.rst b/Documentation/arm64/cpu-feature-registers.rst
index c7adc78..4e4625f 100644
--- a/Documentation/arm64/cpu-feature-registers.rst
+++ b/Documentation/arm64/cpu-feature-registers.rst
@@ -288,6 +288,8 @@
      +------------------------------+---------+---------+
      | Name                         |  bits   | visible |
      +------------------------------+---------+---------+
+     | MOPS                         | [19-16] |    y    |
+     +------------------------------+---------+---------+
      | RPRES                        | [7-4]   |    y    |
      +------------------------------+---------+---------+
      | WFXT                         | [3-0]   |    y    |
diff --git a/Documentation/arm64/elf_hwcaps.rst b/Documentation/arm64/elf_hwcaps.rst
index 83e57e4d..8f847d0 100644
--- a/Documentation/arm64/elf_hwcaps.rst
+++ b/Documentation/arm64/elf_hwcaps.rst
@@ -302,6 +302,9 @@
 HWCAP2_SMEF16F16
     Functionality implied by ID_AA64SMFR0_EL1.F16F16 == 0b1
 
+HWCAP2_MOPS
+    Functionality implied by ID_AA64ISAR2_EL1.MOPS == 0b0001.
+
 4. Unused AT_HWCAP bits
 -----------------------
 
diff --git a/Documentation/arm64/index.rst b/Documentation/arm64/index.rst
index ae21f81..d08e9242 100644
--- a/Documentation/arm64/index.rst
+++ b/Documentation/arm64/index.rst
@@ -15,11 +15,13 @@
     cpu-feature-registers
     elf_hwcaps
     hugetlbpage
+    kdump
     legacy_instructions
     memory
     memory-tagging-extension
     perf
     pointer-authentication
+    ptdump
     silicon-errata
     sme
     sve
diff --git a/Documentation/arm64/kdump.rst b/Documentation/arm64/kdump.rst
new file mode 100644
index 0000000..56a89f4
--- /dev/null
+++ b/Documentation/arm64/kdump.rst
@@ -0,0 +1,92 @@
+=======================================
+crashkernel memory reservation on arm64
+=======================================
+
+Author: Baoquan He <bhe@redhat.com>
+
+Kdump mechanism is used to capture a corrupted kernel vmcore so that
+it can be subsequently analyzed. In order to do this, a preliminarily
+reserved memory is needed to pre-load the kdump kernel and boot such
+kernel if corruption happens.
+
+That reserved memory for kdump is adapted to be able to minimally
+accommodate the kdump kernel and the user space programs needed for the
+vmcore collection.
+
+Kernel parameter
+================
+
+Through the kernel parameters below, memory can be reserved accordingly
+during the early stage of the first kernel booting so that a continuous
+large chunk of memomy can be found. The low memory reservation needs to
+be considered if the crashkernel is reserved from the high memory area.
+
+- crashkernel=size@offset
+- crashkernel=size
+- crashkernel=size,high crashkernel=size,low
+
+Low memory and high memory
+==========================
+
+For kdump reservations, low memory is the memory area under a specific
+limit, usually decided by the accessible address bits of the DMA-capable
+devices needed by the kdump kernel to run. Those devices not related to
+vmcore dumping can be ignored. On arm64, the low memory upper bound is
+not fixed: it is 1G on the RPi4 platform but 4G on most other systems.
+On special kernels built with CONFIG_ZONE_(DMA|DMA32) disabled, the
+whole system RAM is low memory. Outside of the low memory described
+above, the rest of system RAM is considered high memory.
+
+Implementation
+==============
+
+1) crashkernel=size@offset
+--------------------------
+
+The crashkernel memory must be reserved at the user-specified region or
+fail if already occupied.
+
+
+2) crashkernel=size
+-------------------
+
+The crashkernel memory region will be reserved in any available position
+according to the search order:
+
+Firstly, the kernel searches the low memory area for an available region
+with the specified size.
+
+If searching for low memory fails, the kernel falls back to searching
+the high memory area for an available region of the specified size. If
+the reservation in high memory succeeds, a default size reservation in
+the low memory will be done. Currently the default size is 128M,
+sufficient for the low memory needs of the kdump kernel.
+
+Note: crashkernel=size is the recommended option for crashkernel kernel
+reservations. The user would not need to know the system memory layout
+for a specific platform.
+
+3) crashkernel=size,high crashkernel=size,low
+---------------------------------------------
+
+crashkernel=size,(high|low) are an important supplement to
+crashkernel=size. They allows the user to specify how much memory needs
+to be allocated from the high memory and low memory respectively. On
+many systems the low memory is precious and crashkernel reservations
+from this area should be kept to a minimum.
+
+To reserve memory for crashkernel=size,high, searching is first
+attempted from the high memory region. If the reservation succeeds, the
+low memory reservation will be done subsequently.
+
+If reservation from the high memory failed, the kernel falls back to
+searching the low memory with the specified size in crashkernel=,high.
+If it succeeds, no further reservation for low memory is needed.
+
+Notes:
+
+- If crashkernel=,low is not specified, the default low memory
+  reservation will be done automatically.
+
+- if crashkernel=0,low is specified, it means that the low memory
+  reservation is omitted intentionally.
diff --git a/Documentation/arm64/memory.rst b/Documentation/arm64/memory.rst
index 2a641ba..55a55f3 100644
--- a/Documentation/arm64/memory.rst
+++ b/Documentation/arm64/memory.rst
@@ -33,8 +33,8 @@
   0000000000000000	0000ffffffffffff	 256TB		user
   ffff000000000000	ffff7fffffffffff	 128TB		kernel logical memory map
  [ffff600000000000	ffff7fffffffffff]	  32TB		[kasan shadow region]
-  ffff800000000000	ffff800007ffffff	 128MB		modules
-  ffff800008000000	fffffbffefffffff	 124TB		vmalloc
+  ffff800000000000	ffff80007fffffff	   2GB		modules
+  ffff800080000000	fffffbffefffffff	 124TB		vmalloc
   fffffbfff0000000	fffffbfffdffffff	 224MB		fixed mappings (top down)
   fffffbfffe000000	fffffbfffe7fffff	   8MB		[guard region]
   fffffbfffe800000	fffffbffff7fffff	  16MB		PCI I/O space
@@ -50,8 +50,8 @@
   0000000000000000	000fffffffffffff	   4PB		user
   fff0000000000000	ffff7fffffffffff	  ~4PB		kernel logical memory map
  [fffd800000000000	ffff7fffffffffff]	 512TB		[kasan shadow region]
-  ffff800000000000	ffff800007ffffff	 128MB		modules
-  ffff800008000000	fffffbffefffffff	 124TB		vmalloc
+  ffff800000000000	ffff80007fffffff	   2GB		modules
+  ffff800080000000	fffffbffefffffff	 124TB		vmalloc
   fffffbfff0000000	fffffbfffdffffff	 224MB		fixed mappings (top down)
   fffffbfffe000000	fffffbfffe7fffff	   8MB		[guard region]
   fffffbfffe800000	fffffbffff7fffff	  16MB		PCI I/O space
diff --git a/Documentation/arm64/ptdump.rst b/Documentation/arm64/ptdump.rst
new file mode 100644
index 0000000..5dcfc5d
--- /dev/null
+++ b/Documentation/arm64/ptdump.rst
@@ -0,0 +1,96 @@
+======================
+Kernel page table dump
+======================
+
+ptdump is a debugfs interface that provides a detailed dump of the
+kernel page tables. It offers a comprehensive overview of the kernel
+virtual memory layout as well as the attributes associated with the
+various regions in a human-readable format. It is useful to dump the
+kernel page tables to verify permissions and memory types. Examining the
+page table entries and permissions helps identify potential security
+vulnerabilities such as mappings with overly permissive access rights or
+improper memory protections.
+
+Memory hotplug allows dynamic expansion or contraction of available
+memory without requiring a system reboot. To maintain the consistency
+and integrity of the memory management data structures, arm64 makes use
+of the ``mem_hotplug_lock`` semaphore in write mode. Additionally, in
+read mode, ``mem_hotplug_lock`` supports an efficient implementation of
+``get_online_mems()`` and ``put_online_mems()``. These protect the
+offlining of memory being accessed by the ptdump code.
+
+In order to dump the kernel page tables, enable the following
+configurations and mount debugfs::
+
+ CONFIG_GENERIC_PTDUMP=y
+ CONFIG_PTDUMP_CORE=y
+ CONFIG_PTDUMP_DEBUGFS=y
+
+ mount -t debugfs nodev /sys/kernel/debug
+ cat /sys/kernel/debug/kernel_page_tables
+
+On analysing the output of ``cat /sys/kernel/debug/kernel_page_tables``
+one can derive information about the virtual address range of the entry,
+followed by size of the memory region covered by this entry, the
+hierarchical structure of the page tables and finally the attributes
+associated with each page. The page attributes provide information about
+access permissions, execution capability, type of mapping such as leaf
+level PTE or block level PGD, PMD and PUD, and access status of a page
+within the kernel memory. Assessing these attributes can assist in
+understanding the memory layout, access patterns and security
+characteristics of the kernel pages.
+
+Kernel virtual memory layout example::
+
+ start address        end address         size             attributes
+ +---------------------------------------------------------------------------------------+
+ | ---[ Linear Mapping start ]---------------------------------------------------------- |
+ | ..................                                                                    |
+ | 0xfff0000000000000-0xfff0000000210000  2112K PTE RW NX SHD AF  UXN  MEM/NORMAL-TAGGED |
+ | 0xfff0000000210000-0xfff0000001c00000 26560K PTE ro NX SHD AF  UXN  MEM/NORMAL        |
+ | ..................                                                                    |
+ | ---[ Linear Mapping end ]------------------------------------------------------------ |
+ +---------------------------------------------------------------------------------------+
+ | ---[ Modules start ]----------------------------------------------------------------- |
+ | ..................                                                                    |
+ | 0xffff800000000000-0xffff800008000000   128M PTE                                      |
+ | ..................                                                                    |
+ | ---[ Modules end ]------------------------------------------------------------------- |
+ +---------------------------------------------------------------------------------------+
+ | ---[ vmalloc() area ]---------------------------------------------------------------- |
+ | ..................                                                                    |
+ | 0xffff800008010000-0xffff800008200000  1984K PTE ro x  SHD AF       UXN  MEM/NORMAL   |
+ | 0xffff800008200000-0xffff800008e00000    12M PTE ro x  SHD AF  CON  UXN  MEM/NORMAL   |
+ | ..................                                                                    |
+ | ---[ vmalloc() end ]----------------------------------------------------------------- |
+ +---------------------------------------------------------------------------------------+
+ | ---[ Fixmap start ]------------------------------------------------------------------ |
+ | ..................                                                                    |
+ | 0xfffffbfffdb80000-0xfffffbfffdb90000    64K PTE ro x  SHD AF  UXN  MEM/NORMAL        |
+ | 0xfffffbfffdb90000-0xfffffbfffdba0000    64K PTE ro NX SHD AF  UXN  MEM/NORMAL        |
+ | ..................                                                                    |
+ | ---[ Fixmap end ]-------------------------------------------------------------------- |
+ +---------------------------------------------------------------------------------------+
+ | ---[ PCI I/O start ]----------------------------------------------------------------- |
+ | ..................                                                                    |
+ | 0xfffffbfffe800000-0xfffffbffff800000    16M PTE                                      |
+ | ..................                                                                    |
+ | ---[ PCI I/O end ]------------------------------------------------------------------- |
+ +---------------------------------------------------------------------------------------+
+ | ---[ vmemmap start ]----------------------------------------------------------------- |
+ | ..................                                                                    |
+ | 0xfffffc0002000000-0xfffffc0002200000     2M PTE RW NX SHD AF  UXN  MEM/NORMAL        |
+ | 0xfffffc0002200000-0xfffffc0020000000   478M PTE                                      |
+ | ..................                                                                    |
+ | ---[ vmemmap end ]------------------------------------------------------------------- |
+ +---------------------------------------------------------------------------------------+
+
+``cat /sys/kernel/debug/kernel_page_tables`` output::
+
+ 0xfff0000001c00000-0xfff0000080000000     2020M PTE  RW NX SHD AF   UXN    MEM/NORMAL-TAGGED
+ 0xfff0000080000000-0xfff0000800000000       30G PMD
+ 0xfff0000800000000-0xfff0000800700000        7M PTE  RW NX SHD AF   UXN    MEM/NORMAL-TAGGED
+ 0xfff0000800700000-0xfff0000800710000       64K PTE  ro NX SHD AF   UXN    MEM/NORMAL-TAGGED
+ 0xfff0000800710000-0xfff0000880000000  2089920K PTE  RW NX SHD AF   UXN    MEM/NORMAL-TAGGED
+ 0xfff0000880000000-0xfff0040000000000     4062G PMD
+ 0xfff0040000000000-0xffff800000000000     3964T PGD
diff --git a/Documentation/devicetree/bindings/perf/fsl-imx-ddr.yaml b/Documentation/devicetree/bindings/perf/fsl-imx-ddr.yaml
index 80a9238..e9fad4b 100644
--- a/Documentation/devicetree/bindings/perf/fsl-imx-ddr.yaml
+++ b/Documentation/devicetree/bindings/perf/fsl-imx-ddr.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/perf/fsl-imx-ddr.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Freescale(NXP) IMX8 DDR performance monitor
+title: Freescale(NXP) IMX8/9 DDR performance monitor
 
 maintainers:
   - Frank Li <frank.li@nxp.com>
@@ -19,6 +19,7 @@
           - fsl,imx8mm-ddr-pmu
           - fsl,imx8mn-ddr-pmu
           - fsl,imx8mp-ddr-pmu
+          - fsl,imx93-ddr-pmu
       - items:
           - enum:
               - fsl,imx8mm-ddr-pmu
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index b1201d2..a600208 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -207,6 +207,7 @@
 	select HAVE_IOREMAP_PROT
 	select HAVE_IRQ_TIME_ACCOUNTING
 	select HAVE_KVM
+	select HAVE_MOD_ARCH_SPECIFIC
 	select HAVE_NMI
 	select HAVE_PERF_EVENTS
 	select HAVE_PERF_REGS
@@ -577,7 +578,6 @@
 config ARM64_ERRATUM_843419
 	bool "Cortex-A53: 843419: A load or store might access an incorrect address"
 	default y
-	select ARM64_MODULE_PLTS if MODULES
 	help
 	  This option links the kernel with '--fix-cortex-a53-843419' and
 	  enables PLT support to replace certain ADRP instructions, which can
@@ -2107,26 +2107,6 @@
 	  register state capable of holding two dimensional matrix tiles to
 	  enable various matrix operations.
 
-config ARM64_MODULE_PLTS
-	bool "Use PLTs to allow module memory to spill over into vmalloc area"
-	depends on MODULES
-	select HAVE_MOD_ARCH_SPECIFIC
-	help
-	  Allocate PLTs when loading modules so that jumps and calls whose
-	  targets are too far away for their relative offsets to be encoded
-	  in the instructions themselves can be bounced via veneers in the
-	  module's PLT. This allows modules to be allocated in the generic
-	  vmalloc area after the dedicated module memory area has been
-	  exhausted.
-
-	  When running with address space randomization (KASLR), the module
-	  region itself may be too far away for ordinary relative jumps and
-	  calls, and so in that case, module PLTs are required and cannot be
-	  disabled.
-
-	  Specific errata workaround(s) might also force module PLTs to be
-	  enabled (ARM64_ERRATUM_843419).
-
 config ARM64_PSEUDO_NMI
 	bool "Support for NMI-like interrupts"
 	select ARM_GIC_V3
@@ -2167,7 +2147,6 @@
 
 config RANDOMIZE_BASE
 	bool "Randomize the address of the kernel image"
-	select ARM64_MODULE_PLTS if MODULES
 	select RELOCATABLE
 	help
 	  Randomizes the virtual address at which the kernel image is
@@ -2198,9 +2177,8 @@
 	  When this option is not set, the module region will be randomized over
 	  a limited range that contains the [_stext, _etext] interval of the
 	  core kernel, so branch relocations are almost always in range unless
-	  ARM64_MODULE_PLTS is enabled and the region is exhausted. In this
-	  particular case of region exhaustion, modules might be able to fall
-	  back to a larger 2GB area.
+	  the region is exhausted. In this particular case of region
+	  exhaustion, modules might be able to fall back to a larger 2GB area.
 
 config CC_HAVE_STACKPROTECTOR_SYSREG
 	def_bool $(cc-option,-mstack-protector-guard=sysreg -mstack-protector-guard-reg=sp_el0 -mstack-protector-guard-offset=0)
diff --git a/arch/arm64/include/asm/alternative-macros.h b/arch/arm64/include/asm/alternative-macros.h
index bdf1f6b..94b4861 100644
--- a/arch/arm64/include/asm/alternative-macros.h
+++ b/arch/arm64/include/asm/alternative-macros.h
@@ -23,17 +23,17 @@
 
 #include <linux/stringify.h>
 
-#define ALTINSTR_ENTRY(feature)					              \
+#define ALTINSTR_ENTRY(cpucap)					              \
 	" .word 661b - .\n"				/* label           */ \
 	" .word 663f - .\n"				/* new instruction */ \
-	" .hword " __stringify(feature) "\n"		/* feature bit     */ \
+	" .hword " __stringify(cpucap) "\n"		/* cpucap          */ \
 	" .byte 662b-661b\n"				/* source len      */ \
 	" .byte 664f-663f\n"				/* replacement len */
 
-#define ALTINSTR_ENTRY_CB(feature, cb)					      \
+#define ALTINSTR_ENTRY_CB(cpucap, cb)					      \
 	" .word 661b - .\n"				/* label           */ \
-	" .word " __stringify(cb) "- .\n"		/* callback */	      \
-	" .hword " __stringify(feature) "\n"		/* feature bit     */ \
+	" .word " __stringify(cb) "- .\n"		/* callback        */ \
+	" .hword " __stringify(cpucap) "\n"		/* cpucap          */ \
 	" .byte 662b-661b\n"				/* source len      */ \
 	" .byte 664f-663f\n"				/* replacement len */
 
@@ -53,13 +53,13 @@
  *
  * Alternatives with callbacks do not generate replacement instructions.
  */
-#define __ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg_enabled)	\
+#define __ALTERNATIVE_CFG(oldinstr, newinstr, cpucap, cfg_enabled)	\
 	".if "__stringify(cfg_enabled)" == 1\n"				\
 	"661:\n\t"							\
 	oldinstr "\n"							\
 	"662:\n"							\
 	".pushsection .altinstructions,\"a\"\n"				\
-	ALTINSTR_ENTRY(feature)						\
+	ALTINSTR_ENTRY(cpucap)						\
 	".popsection\n"							\
 	".subsection 1\n"						\
 	"663:\n\t"							\
@@ -70,31 +70,31 @@
 	".previous\n"							\
 	".endif\n"
 
-#define __ALTERNATIVE_CFG_CB(oldinstr, feature, cfg_enabled, cb)	\
+#define __ALTERNATIVE_CFG_CB(oldinstr, cpucap, cfg_enabled, cb)	\
 	".if "__stringify(cfg_enabled)" == 1\n"				\
 	"661:\n\t"							\
 	oldinstr "\n"							\
 	"662:\n"							\
 	".pushsection .altinstructions,\"a\"\n"				\
-	ALTINSTR_ENTRY_CB(feature, cb)					\
+	ALTINSTR_ENTRY_CB(cpucap, cb)					\
 	".popsection\n"							\
 	"663:\n\t"							\
 	"664:\n\t"							\
 	".endif\n"
 
-#define _ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg, ...)	\
-	__ALTERNATIVE_CFG(oldinstr, newinstr, feature, IS_ENABLED(cfg))
+#define _ALTERNATIVE_CFG(oldinstr, newinstr, cpucap, cfg, ...)	\
+	__ALTERNATIVE_CFG(oldinstr, newinstr, cpucap, IS_ENABLED(cfg))
 
-#define ALTERNATIVE_CB(oldinstr, feature, cb) \
-	__ALTERNATIVE_CFG_CB(oldinstr, (1 << ARM64_CB_SHIFT) | (feature), 1, cb)
+#define ALTERNATIVE_CB(oldinstr, cpucap, cb) \
+	__ALTERNATIVE_CFG_CB(oldinstr, (1 << ARM64_CB_SHIFT) | (cpucap), 1, cb)
 #else
 
 #include <asm/assembler.h>
 
-.macro altinstruction_entry orig_offset alt_offset feature orig_len alt_len
+.macro altinstruction_entry orig_offset alt_offset cpucap orig_len alt_len
 	.word \orig_offset - .
 	.word \alt_offset - .
-	.hword (\feature)
+	.hword (\cpucap)
 	.byte \orig_len
 	.byte \alt_len
 .endm
@@ -210,9 +210,9 @@ alternative_endif
 #endif  /*  __ASSEMBLY__  */
 
 /*
- * Usage: asm(ALTERNATIVE(oldinstr, newinstr, feature));
+ * Usage: asm(ALTERNATIVE(oldinstr, newinstr, cpucap));
  *
- * Usage: asm(ALTERNATIVE(oldinstr, newinstr, feature, CONFIG_FOO));
+ * Usage: asm(ALTERNATIVE(oldinstr, newinstr, cpucap, CONFIG_FOO));
  * N.B. If CONFIG_FOO is specified, but not selected, the whole block
  *      will be omitted, including oldinstr.
  */
@@ -224,15 +224,15 @@ alternative_endif
 #include <linux/types.h>
 
 static __always_inline bool
-alternative_has_feature_likely(const unsigned long feature)
+alternative_has_cap_likely(const unsigned long cpucap)
 {
-	compiletime_assert(feature < ARM64_NCAPS,
-			   "feature must be < ARM64_NCAPS");
+	compiletime_assert(cpucap < ARM64_NCAPS,
+			   "cpucap must be < ARM64_NCAPS");
 
 	asm_volatile_goto(
-	ALTERNATIVE_CB("b	%l[l_no]", %[feature], alt_cb_patch_nops)
+	ALTERNATIVE_CB("b	%l[l_no]", %[cpucap], alt_cb_patch_nops)
 	:
-	: [feature] "i" (feature)
+	: [cpucap] "i" (cpucap)
 	:
 	: l_no);
 
@@ -242,15 +242,15 @@ alternative_has_feature_likely(const unsigned long feature)
 }
 
 static __always_inline bool
-alternative_has_feature_unlikely(const unsigned long feature)
+alternative_has_cap_unlikely(const unsigned long cpucap)
 {
-	compiletime_assert(feature < ARM64_NCAPS,
-			   "feature must be < ARM64_NCAPS");
+	compiletime_assert(cpucap < ARM64_NCAPS,
+			   "cpucap must be < ARM64_NCAPS");
 
 	asm_volatile_goto(
-	ALTERNATIVE("nop", "b	%l[l_yes]", %[feature])
+	ALTERNATIVE("nop", "b	%l[l_yes]", %[cpucap])
 	:
-	: [feature] "i" (feature)
+	: [cpucap] "i" (cpucap)
 	:
 	: l_yes);
 
diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h
index a38b92e..00d97b8 100644
--- a/arch/arm64/include/asm/alternative.h
+++ b/arch/arm64/include/asm/alternative.h
@@ -13,7 +13,7 @@
 struct alt_instr {
 	s32 orig_offset;	/* offset to original instruction */
 	s32 alt_offset;		/* offset to replacement instruction */
-	u16 cpufeature;		/* cpufeature bit set for replacement */
+	u16 cpucap;		/* cpucap bit set for replacement */
 	u8  orig_len;		/* size of original instruction(s) */
 	u8  alt_len;		/* size of new instruction(s), <= orig_len */
 };
@@ -23,7 +23,7 @@ typedef void (*alternative_cb_t)(struct alt_instr *alt,
 
 void __init apply_boot_alternatives(void);
 void __init apply_alternatives_all(void);
-bool alternative_is_applied(u16 cpufeature);
+bool alternative_is_applied(u16 cpucap);
 
 #ifdef CONFIG_MODULES
 void apply_alternatives_module(void *start, size_t length);
@@ -31,5 +31,8 @@ void apply_alternatives_module(void *start, size_t length);
 static inline void apply_alternatives_module(void *start, size_t length) { }
 #endif
 
+void alt_cb_patch_nops(struct alt_instr *alt, __le32 *origptr,
+		       __le32 *updptr, int nr_inst);
+
 #endif /* __ASSEMBLY__ */
 #endif /* __ASM_ALTERNATIVE_H */
diff --git a/arch/arm64/include/asm/archrandom.h b/arch/arm64/include/asm/archrandom.h
index 2f5f3da..b0abc64f 100644
--- a/arch/arm64/include/asm/archrandom.h
+++ b/arch/arm64/include/asm/archrandom.h
@@ -129,4 +129,6 @@ static inline bool __init __early_cpu_has_rndr(void)
 	return (ftr >> ID_AA64ISAR0_EL1_RNDR_SHIFT) & 0xf;
 }
 
+u64 kaslr_early_init(void *fdt);
+
 #endif /* _ASM_ARCHRANDOM_H */
diff --git a/arch/arm64/include/asm/asm-uaccess.h b/arch/arm64/include/asm/asm-uaccess.h
index 75b211c..5b6efe8 100644
--- a/arch/arm64/include/asm/asm-uaccess.h
+++ b/arch/arm64/include/asm/asm-uaccess.h
@@ -18,7 +18,6 @@
 	bic	\tmp1, \tmp1, #TTBR_ASID_MASK
 	sub	\tmp1, \tmp1, #RESERVED_SWAPPER_OFFSET	// reserved_pg_dir
 	msr	ttbr0_el1, \tmp1			// set reserved TTBR0_EL1
-	isb
 	add	\tmp1, \tmp1, #RESERVED_SWAPPER_OFFSET
 	msr	ttbr1_el1, \tmp1		// set reserved ASID
 	isb
@@ -31,7 +30,6 @@
 	extr    \tmp2, \tmp2, \tmp1, #48
 	ror     \tmp2, \tmp2, #16
 	msr	ttbr1_el1, \tmp2		// set the active ASID
-	isb
 	msr	ttbr0_el1, \tmp1		// set the non-PAN TTBR0_EL1
 	isb
 	.endm
diff --git a/arch/arm64/include/asm/compat.h b/arch/arm64/include/asm/compat.h
index 74575c3..ae904a1 100644
--- a/arch/arm64/include/asm/compat.h
+++ b/arch/arm64/include/asm/compat.h
@@ -96,6 +96,8 @@ static inline int is_compat_thread(struct thread_info *thread)
 	return test_ti_thread_flag(thread, TIF_32BIT);
 }
 
+long compat_arm_syscall(struct pt_regs *regs, int scno);
+
 #else /* !CONFIG_COMPAT */
 
 static inline int is_compat_thread(struct thread_info *thread)
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 6bf013f..7a95c32 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -107,7 +107,7 @@ extern struct arm64_ftr_reg arm64_ftr_reg_ctrel0;
  * CPU capabilities:
  *
  * We use arm64_cpu_capabilities to represent system features, errata work
- * arounds (both used internally by kernel and tracked in cpu_hwcaps) and
+ * arounds (both used internally by kernel and tracked in system_cpucaps) and
  * ELF HWCAPs (which are exposed to user).
  *
  * To support systems with heterogeneous CPUs, we need to make sure that we
@@ -419,12 +419,12 @@ static __always_inline bool is_hyp_code(void)
 	return is_vhe_hyp_code() || is_nvhe_hyp_code();
 }
 
-extern DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS);
+extern DECLARE_BITMAP(system_cpucaps, ARM64_NCAPS);
 
-extern DECLARE_BITMAP(boot_capabilities, ARM64_NCAPS);
+extern DECLARE_BITMAP(boot_cpucaps, ARM64_NCAPS);
 
 #define for_each_available_cap(cap)		\
-	for_each_set_bit(cap, cpu_hwcaps, ARM64_NCAPS)
+	for_each_set_bit(cap, system_cpucaps, ARM64_NCAPS)
 
 bool this_cpu_has_cap(unsigned int cap);
 void cpu_set_feature(unsigned int num);
@@ -437,7 +437,7 @@ unsigned long cpu_get_elf_hwcap2(void);
 
 static __always_inline bool system_capabilities_finalized(void)
 {
-	return alternative_has_feature_likely(ARM64_ALWAYS_SYSTEM);
+	return alternative_has_cap_likely(ARM64_ALWAYS_SYSTEM);
 }
 
 /*
@@ -449,7 +449,7 @@ static __always_inline bool cpus_have_cap(unsigned int num)
 {
 	if (num >= ARM64_NCAPS)
 		return false;
-	return arch_test_bit(num, cpu_hwcaps);
+	return arch_test_bit(num, system_cpucaps);
 }
 
 /*
@@ -464,7 +464,7 @@ static __always_inline bool __cpus_have_const_cap(int num)
 {
 	if (num >= ARM64_NCAPS)
 		return false;
-	return alternative_has_feature_unlikely(num);
+	return alternative_has_cap_unlikely(num);
 }
 
 /*
@@ -504,16 +504,6 @@ static __always_inline bool cpus_have_const_cap(int num)
 		return cpus_have_cap(num);
 }
 
-static inline void cpus_set_cap(unsigned int num)
-{
-	if (num >= ARM64_NCAPS) {
-		pr_warn("Attempt to set an illegal CPU capability (%d >= %d)\n",
-			num, ARM64_NCAPS);
-	} else {
-		__set_bit(num, cpu_hwcaps);
-	}
-}
-
 static inline int __attribute_const__
 cpuid_feature_extract_signed_field_width(u64 features, int field, int width)
 {
diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h
index f86b157..ef46f2d 100644
--- a/arch/arm64/include/asm/efi.h
+++ b/arch/arm64/include/asm/efi.h
@@ -166,4 +166,6 @@ static inline void efi_capsule_flush_cache_range(void *addr, int size)
 	dcache_clean_inval_poc((unsigned long)addr, (unsigned long)addr + size);
 }
 
+efi_status_t efi_handle_corrupted_x18(efi_status_t s, const char *f);
+
 #endif /* _ASM_EFI_H */
diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h
index 037724b..96440c7 100644
--- a/arch/arm64/include/asm/el2_setup.h
+++ b/arch/arm64/include/asm/el2_setup.h
@@ -22,6 +22,15 @@
 	isb
 .endm
 
+.macro __init_el2_hcrx
+	mrs	x0, id_aa64mmfr1_el1
+	ubfx	x0, x0, #ID_AA64MMFR1_EL1_HCX_SHIFT, #4
+	cbz	x0, .Lskip_hcrx_\@
+	mov_q	x0, HCRX_HOST_FLAGS
+	msr_s	SYS_HCRX_EL2, x0
+.Lskip_hcrx_\@:
+.endm
+
 /*
  * Allow Non-secure EL1 and EL0 to access physical timer and counter.
  * This is not necessary for VHE, since the host kernel runs in EL2,
@@ -69,7 +78,7 @@
 	cbz	x0, .Lskip_trace_\@		// Skip if TraceBuffer is not present
 
 	mrs_s	x0, SYS_TRBIDR_EL1
-	and	x0, x0, TRBIDR_PROG
+	and	x0, x0, TRBIDR_EL1_P
 	cbnz	x0, .Lskip_trace_\@		// If TRBE is available at EL2
 
 	mov	x0, #(MDCR_EL2_E2TB_MASK << MDCR_EL2_E2TB_SHIFT)
@@ -184,6 +193,7 @@
  */
 .macro init_el2_state
 	__init_el2_sctlr
+	__init_el2_hcrx
 	__init_el2_timers
 	__init_el2_debug
 	__init_el2_lor
@@ -284,14 +294,6 @@
 	cbz     x1, .Lskip_sme_\@
 
 	msr_s	SYS_SMPRIMAP_EL2, xzr		// Make all priorities equal
-
-	mrs	x1, id_aa64mmfr1_el1		// HCRX_EL2 present?
-	ubfx	x1, x1, #ID_AA64MMFR1_EL1_HCX_SHIFT, #4
-	cbz	x1, .Lskip_sme_\@
-
-	mrs_s	x1, SYS_HCRX_EL2
-	orr	x1, x1, #HCRX_EL2_SMPME_MASK	// Enable priority mapping
-	msr_s	SYS_HCRX_EL2, x1
 .Lskip_sme_\@:
 .endm
 
diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h
index 8487aec..ae35939 100644
--- a/arch/arm64/include/asm/esr.h
+++ b/arch/arm64/include/asm/esr.h
@@ -47,7 +47,7 @@
 #define ESR_ELx_EC_DABT_LOW	(0x24)
 #define ESR_ELx_EC_DABT_CUR	(0x25)
 #define ESR_ELx_EC_SP_ALIGN	(0x26)
-/* Unallocated EC: 0x27 */
+#define ESR_ELx_EC_MOPS		(0x27)
 #define ESR_ELx_EC_FP_EXC32	(0x28)
 /* Unallocated EC: 0x29 - 0x2B */
 #define ESR_ELx_EC_FP_EXC64	(0x2C)
@@ -75,8 +75,11 @@
 
 #define ESR_ELx_IL_SHIFT	(25)
 #define ESR_ELx_IL		(UL(1) << ESR_ELx_IL_SHIFT)
-#define ESR_ELx_ISS_MASK	(ESR_ELx_IL - 1)
+#define ESR_ELx_ISS_MASK	(GENMASK(24, 0))
 #define ESR_ELx_ISS(esr)	((esr) & ESR_ELx_ISS_MASK)
+#define ESR_ELx_ISS2_SHIFT	(32)
+#define ESR_ELx_ISS2_MASK	(GENMASK_ULL(55, 32))
+#define ESR_ELx_ISS2(esr)	(((esr) & ESR_ELx_ISS2_MASK) >> ESR_ELx_ISS2_SHIFT)
 
 /* ISS field definitions shared by different classes */
 #define ESR_ELx_WNR_SHIFT	(6)
@@ -140,6 +143,20 @@
 #define ESR_ELx_CM_SHIFT	(8)
 #define ESR_ELx_CM 		(UL(1) << ESR_ELx_CM_SHIFT)
 
+/* ISS2 field definitions for Data Aborts */
+#define ESR_ELx_TnD_SHIFT	(10)
+#define ESR_ELx_TnD 		(UL(1) << ESR_ELx_TnD_SHIFT)
+#define ESR_ELx_TagAccess_SHIFT	(9)
+#define ESR_ELx_TagAccess	(UL(1) << ESR_ELx_TagAccess_SHIFT)
+#define ESR_ELx_GCS_SHIFT	(8)
+#define ESR_ELx_GCS 		(UL(1) << ESR_ELx_GCS_SHIFT)
+#define ESR_ELx_Overlay_SHIFT	(6)
+#define ESR_ELx_Overlay		(UL(1) << ESR_ELx_Overlay_SHIFT)
+#define ESR_ELx_DirtyBit_SHIFT	(5)
+#define ESR_ELx_DirtyBit	(UL(1) << ESR_ELx_DirtyBit_SHIFT)
+#define ESR_ELx_Xs_SHIFT	(0)
+#define ESR_ELx_Xs_MASK		(GENMASK_ULL(4, 0))
+
 /* ISS field definitions for exceptions taken in to Hyp */
 #define ESR_ELx_CV		(UL(1) << 24)
 #define ESR_ELx_COND_SHIFT	(20)
@@ -356,6 +373,15 @@
 #define ESR_ELx_SME_ISS_ZA_DISABLED	3
 #define ESR_ELx_SME_ISS_ZT_DISABLED	4
 
+/* ISS field definitions for MOPS exceptions */
+#define ESR_ELx_MOPS_ISS_MEM_INST	(UL(1) << 24)
+#define ESR_ELx_MOPS_ISS_FROM_EPILOGUE	(UL(1) << 18)
+#define ESR_ELx_MOPS_ISS_WRONG_OPTION	(UL(1) << 17)
+#define ESR_ELx_MOPS_ISS_OPTION_A	(UL(1) << 16)
+#define ESR_ELx_MOPS_ISS_DESTREG(esr)	(((esr) & (UL(0x1f) << 10)) >> 10)
+#define ESR_ELx_MOPS_ISS_SRCREG(esr)	(((esr) & (UL(0x1f) << 5)) >> 5)
+#define ESR_ELx_MOPS_ISS_SIZEREG(esr)	(((esr) & (UL(0x1f) << 0)) >> 0)
+
 #ifndef __ASSEMBLY__
 #include <asm/types.h>
 
diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h
index e73af70..ad688e1 100644
--- a/arch/arm64/include/asm/exception.h
+++ b/arch/arm64/include/asm/exception.h
@@ -8,16 +8,11 @@
 #define __ASM_EXCEPTION_H
 
 #include <asm/esr.h>
-#include <asm/kprobes.h>
 #include <asm/ptrace.h>
 
 #include <linux/interrupt.h>
 
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
 #define __exception_irq_entry	__irq_entry
-#else
-#define __exception_irq_entry	__kprobes
-#endif
 
 static inline unsigned long disr_to_esr(u64 disr)
 {
@@ -77,6 +72,7 @@ void do_el0_svc(struct pt_regs *regs);
 void do_el0_svc_compat(struct pt_regs *regs);
 void do_el0_fpac(struct pt_regs *regs, unsigned long esr);
 void do_el1_fpac(struct pt_regs *regs, unsigned long esr);
+void do_el0_mops(struct pt_regs *regs, unsigned long esr);
 void do_serror(struct pt_regs *regs, unsigned long esr);
 void do_notify_resume(struct pt_regs *regs, unsigned long thread_flags);
 
diff --git a/arch/arm64/include/asm/hw_breakpoint.h b/arch/arm64/include/asm/hw_breakpoint.h
index fa4c6ff..8405532 100644
--- a/arch/arm64/include/asm/hw_breakpoint.h
+++ b/arch/arm64/include/asm/hw_breakpoint.h
@@ -154,4 +154,12 @@ static inline int get_num_wrps(void)
 						ID_AA64DFR0_EL1_WRPs_SHIFT);
 }
 
+#ifdef CONFIG_CPU_PM
+extern void cpu_suspend_set_dbg_restorer(int (*hw_bp_restore)(unsigned int));
+#else
+static inline void cpu_suspend_set_dbg_restorer(int (*hw_bp_restore)(unsigned int))
+{
+}
+#endif
+
 #endif	/* __ASM_BREAKPOINT_H */
diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h
index 5d45f19..692b1ec 100644
--- a/arch/arm64/include/asm/hwcap.h
+++ b/arch/arm64/include/asm/hwcap.h
@@ -137,6 +137,7 @@
 #define KERNEL_HWCAP_SME_BI32I32	__khwcap2_feature(SME_BI32I32)
 #define KERNEL_HWCAP_SME_B16B16		__khwcap2_feature(SME_B16B16)
 #define KERNEL_HWCAP_SME_F16F16		__khwcap2_feature(SME_F16F16)
+#define KERNEL_HWCAP_MOPS		__khwcap2_feature(MOPS)
 
 /*
  * This yields a mask that user programs can use to figure out what
diff --git a/arch/arm64/include/asm/irqflags.h b/arch/arm64/include/asm/irqflags.h
index e0f5f6b..1f31ec1 100644
--- a/arch/arm64/include/asm/irqflags.h
+++ b/arch/arm64/include/asm/irqflags.h
@@ -24,7 +24,7 @@
 static __always_inline bool __irqflags_uses_pmr(void)
 {
 	return IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) &&
-	       alternative_has_feature_unlikely(ARM64_HAS_GIC_PRIO_MASKING);
+	       alternative_has_cap_unlikely(ARM64_HAS_GIC_PRIO_MASKING);
 }
 
 static __always_inline void __daif_local_irq_enable(void)
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index baef29f..d2d4f4c 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -9,6 +9,7 @@
 
 #include <asm/esr.h>
 #include <asm/memory.h>
+#include <asm/sysreg.h>
 #include <asm/types.h>
 
 /* Hyp Configuration Register (HCR) bits */
@@ -92,6 +93,9 @@
 #define HCR_HOST_NVHE_PROTECTED_FLAGS (HCR_HOST_NVHE_FLAGS | HCR_TSC)
 #define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H)
 
+#define HCRX_GUEST_FLAGS (HCRX_EL2_SMPME)
+#define HCRX_HOST_FLAGS (HCRX_EL2_MSCEn)
+
 /* TCR_EL2 Registers bits */
 #define TCR_EL2_RES1		((1U << 31) | (1 << 23))
 #define TCR_EL2_TBI		(1 << 20)
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index 43c3bc0..86042af 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -267,6 +267,24 @@ extern u64 __kvm_get_mdcr_el2(void);
 	__kvm_at_err;							\
 } )
 
+void __noreturn hyp_panic(void);
+asmlinkage void kvm_unexpected_el2_exception(void);
+asmlinkage void __noreturn hyp_panic(void);
+asmlinkage void __noreturn hyp_panic_bad_stack(void);
+asmlinkage void kvm_unexpected_el2_exception(void);
+struct kvm_cpu_context;
+void handle_trap(struct kvm_cpu_context *host_ctxt);
+asmlinkage void __noreturn kvm_host_psci_cpu_entry(bool is_cpu_on);
+void __noreturn __pkvm_init_finalise(void);
+void kvm_nvhe_prepare_backtrace(unsigned long fp, unsigned long pc);
+void kvm_patch_vector_branch(struct alt_instr *alt,
+	__le32 *origptr, __le32 *updptr, int nr_inst);
+void kvm_get_kimage_voffset(struct alt_instr *alt,
+	__le32 *origptr, __le32 *updptr, int nr_inst);
+void kvm_compute_final_ctr_el0(struct alt_instr *alt,
+	__le32 *origptr, __le32 *updptr, int nr_inst);
+void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr, u64 elr_virt,
+	u64 elr_phys, u64 par, uintptr_t vcpu, u64 far, u64 hpfar);
 
 #else /* __ASSEMBLY__ */
 
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 7e7e19e..e759e6b 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -1031,7 +1031,7 @@ void kvm_arm_clear_debug(struct kvm_vcpu *vcpu);
 void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu);
 
 #define kvm_vcpu_os_lock_enabled(vcpu)		\
-	(!!(__vcpu_sys_reg(vcpu, OSLSR_EL1) & SYS_OSLSR_OSLK))
+	(!!(__vcpu_sys_reg(vcpu, OSLSR_EL1) & OSLSR_EL1_OSLK))
 
 int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu,
 			       struct kvm_device_attr *attr);
diff --git a/arch/arm64/include/asm/lse.h b/arch/arm64/include/asm/lse.h
index f99d748..cbbcdc3 100644
--- a/arch/arm64/include/asm/lse.h
+++ b/arch/arm64/include/asm/lse.h
@@ -18,7 +18,7 @@
 
 static __always_inline bool system_uses_lse_atomics(void)
 {
-	return alternative_has_feature_likely(ARM64_HAS_LSE_ATOMICS);
+	return alternative_has_cap_likely(ARM64_HAS_LSE_ATOMICS);
 }
 
 #define __lse_ll_sc_body(op, ...)					\
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index c735afd..6e0e572 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -46,7 +46,7 @@
 #define KIMAGE_VADDR		(MODULES_END)
 #define MODULES_END		(MODULES_VADDR + MODULES_VSIZE)
 #define MODULES_VADDR		(_PAGE_END(VA_BITS_MIN))
-#define MODULES_VSIZE		(SZ_128M)
+#define MODULES_VSIZE		(SZ_2G)
 #define VMEMMAP_START		(-(UL(1) << (VA_BITS - VMEMMAP_SHIFT)))
 #define VMEMMAP_END		(VMEMMAP_START + VMEMMAP_SIZE)
 #define PCI_IO_END		(VMEMMAP_START - SZ_8M)
@@ -204,15 +204,17 @@ static inline unsigned long kaslr_offset(void)
 	return kimage_vaddr - KIMAGE_VADDR;
 }
 
+#ifdef CONFIG_RANDOMIZE_BASE
+void kaslr_init(void);
 static inline bool kaslr_enabled(void)
 {
-	/*
-	 * The KASLR offset modulo MIN_KIMG_ALIGN is taken from the physical
-	 * placement of the image rather than from the seed, so a displacement
-	 * of less than MIN_KIMG_ALIGN means that no seed was provided.
-	 */
-	return kaslr_offset() >= MIN_KIMG_ALIGN;
+	extern bool __kaslr_is_enabled;
+	return __kaslr_is_enabled;
 }
+#else
+static inline void kaslr_init(void) { }
+static inline bool kaslr_enabled(void) { return false; }
+#endif
 
 /*
  * Allow all memory at the discovery stage. We will clip it later.
diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
index 5691169..a6fb325 100644
--- a/arch/arm64/include/asm/mmu_context.h
+++ b/arch/arm64/include/asm/mmu_context.h
@@ -39,11 +39,16 @@ static inline void contextidr_thread_switch(struct task_struct *next)
 /*
  * Set TTBR0 to reserved_pg_dir. No translations will be possible via TTBR0.
  */
-static inline void cpu_set_reserved_ttbr0(void)
+static inline void cpu_set_reserved_ttbr0_nosync(void)
 {
 	unsigned long ttbr = phys_to_ttbr(__pa_symbol(reserved_pg_dir));
 
 	write_sysreg(ttbr, ttbr0_el1);
+}
+
+static inline void cpu_set_reserved_ttbr0(void)
+{
+	cpu_set_reserved_ttbr0_nosync();
 	isb();
 }
 
@@ -52,7 +57,6 @@ void cpu_do_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm);
 static inline void cpu_switch_mm(pgd_t *pgd, struct mm_struct *mm)
 {
 	BUG_ON(pgd == swapper_pg_dir);
-	cpu_set_reserved_ttbr0();
 	cpu_do_switch_mm(virt_to_phys(pgd),mm);
 }
 
@@ -164,7 +168,7 @@ static inline void cpu_replace_ttbr1(pgd_t *pgdp, pgd_t *idmap)
 		 * up (i.e. cpufeature framework is not up yet) and
 		 * latter only when we enable CNP via cpufeature's
 		 * enable() callback.
-		 * Also we rely on the cpu_hwcap bit being set before
+		 * Also we rely on the system_cpucaps bit being set before
 		 * calling the enable() function.
 		 */
 		ttbr1 |= TTBR_CNP_BIT;
diff --git a/arch/arm64/include/asm/module.h b/arch/arm64/include/asm/module.h
index 18734fe..bfa6638 100644
--- a/arch/arm64/include/asm/module.h
+++ b/arch/arm64/include/asm/module.h
@@ -7,7 +7,6 @@
 
 #include <asm-generic/module.h>
 
-#ifdef CONFIG_ARM64_MODULE_PLTS
 struct mod_plt_sec {
 	int			plt_shndx;
 	int			plt_num_entries;
@@ -21,7 +20,6 @@ struct mod_arch_specific {
 	/* for CONFIG_DYNAMIC_FTRACE */
 	struct plt_entry	*ftrace_trampolines;
 };
-#endif
 
 u64 module_emit_plt_entry(struct module *mod, Elf64_Shdr *sechdrs,
 			  void *loc, const Elf64_Rela *rela,
@@ -30,12 +28,6 @@ u64 module_emit_plt_entry(struct module *mod, Elf64_Shdr *sechdrs,
 u64 module_emit_veneer_for_adrp(struct module *mod, Elf64_Shdr *sechdrs,
 				void *loc, u64 val);
 
-#ifdef CONFIG_RANDOMIZE_BASE
-extern u64 module_alloc_base;
-#else
-#define module_alloc_base	((u64)_etext - MODULES_VSIZE)
-#endif
-
 struct plt_entry {
 	/*
 	 * A program that conforms to the AArch64 Procedure Call Standard
diff --git a/arch/arm64/include/asm/module.lds.h b/arch/arm64/include/asm/module.lds.h
index dbba4b7..b9ae834 100644
--- a/arch/arm64/include/asm/module.lds.h
+++ b/arch/arm64/include/asm/module.lds.h
@@ -1,9 +1,7 @@
 SECTIONS {
-#ifdef CONFIG_ARM64_MODULE_PLTS
 	.plt 0 : { BYTE(0) }
 	.init.plt 0 : { BYTE(0) }
 	.text.ftrace_trampoline 0 : { BYTE(0) }
-#endif
 
 #ifdef CONFIG_KASAN_SW_TAGS
 	/*
diff --git a/arch/arm64/include/asm/scs.h b/arch/arm64/include/asm/scs.h
index 13df982..3fdae5f 100644
--- a/arch/arm64/include/asm/scs.h
+++ b/arch/arm64/include/asm/scs.h
@@ -73,6 +73,7 @@ static inline void dynamic_scs_init(void) {}
 #endif
 
 int scs_patch(const u8 eh_frame[], int size);
+asmlinkage void scs_patch_vmlinux(void);
 
 #endif /* __ASSEMBLY __ */
 
diff --git a/arch/arm64/include/asm/spectre.h b/arch/arm64/include/asm/spectre.h
index db7b371..9cc5014 100644
--- a/arch/arm64/include/asm/spectre.h
+++ b/arch/arm64/include/asm/spectre.h
@@ -100,5 +100,21 @@ bool is_spectre_bhb_affected(const struct arm64_cpu_capabilities *entry, int sco
 u8 spectre_bhb_loop_affected(int scope);
 void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *__unused);
 bool try_emulate_el1_ssbs(struct pt_regs *regs, u32 instr);
+
+void spectre_v4_patch_fw_mitigation_enable(struct alt_instr *alt, __le32 *origptr,
+					   __le32 *updptr, int nr_inst);
+void smccc_patch_fw_mitigation_conduit(struct alt_instr *alt, __le32 *origptr,
+				       __le32 *updptr, int nr_inst);
+void spectre_bhb_patch_loop_mitigation_enable(struct alt_instr *alt, __le32 *origptr,
+					      __le32 *updptr, int nr_inst);
+void spectre_bhb_patch_fw_mitigation_enabled(struct alt_instr *alt, __le32 *origptr,
+					     __le32 *updptr, int nr_inst);
+void spectre_bhb_patch_loop_iter(struct alt_instr *alt,
+				 __le32 *origptr, __le32 *updptr, int nr_inst);
+void spectre_bhb_patch_wa3(struct alt_instr *alt,
+			   __le32 *origptr, __le32 *updptr, int nr_inst);
+void spectre_bhb_patch_clearbhb(struct alt_instr *alt,
+				__le32 *origptr, __le32 *updptr, int nr_inst);
+
 #endif	/* __ASSEMBLY__ */
 #endif	/* __ASM_SPECTRE_H */
diff --git a/arch/arm64/include/asm/syscall_wrapper.h b/arch/arm64/include/asm/syscall_wrapper.h
index d30217c..17f6875 100644
--- a/arch/arm64/include/asm/syscall_wrapper.h
+++ b/arch/arm64/include/asm/syscall_wrapper.h
@@ -38,6 +38,7 @@
 	asmlinkage long __arm64_compat_sys_##sname(const struct pt_regs *__unused)
 
 #define COND_SYSCALL_COMPAT(name) 							\
+	asmlinkage long __arm64_compat_sys_##name(const struct pt_regs *regs);		\
 	asmlinkage long __weak __arm64_compat_sys_##name(const struct pt_regs *regs)	\
 	{										\
 		return sys_ni_syscall();						\
@@ -53,6 +54,7 @@
 	ALLOW_ERROR_INJECTION(__arm64_sys##name, ERRNO);			\
 	static long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__));		\
 	static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));	\
+	asmlinkage long __arm64_sys##name(const struct pt_regs *regs);		\
 	asmlinkage long __arm64_sys##name(const struct pt_regs *regs)		\
 	{									\
 		return __se_sys##name(SC_ARM64_REGS_TO_ARGS(x,__VA_ARGS__));	\
@@ -73,11 +75,13 @@
 	asmlinkage long __arm64_sys_##sname(const struct pt_regs *__unused)
 
 #define COND_SYSCALL(name)							\
+	asmlinkage long __arm64_sys_##name(const struct pt_regs *regs);		\
 	asmlinkage long __weak __arm64_sys_##name(const struct pt_regs *regs)	\
 	{									\
 		return sys_ni_syscall();					\
 	}
 
+asmlinkage long __arm64_sys_ni_syscall(const struct pt_regs *__unused);
 #define SYS_NI(name) SYSCALL_ALIAS(__arm64_sys_##name, sys_ni_posix_timers);
 
 #endif /* __ASM_SYSCALL_WRAPPER_H */
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index e72d9aa..0c07b03 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -134,25 +134,17 @@
 #define SYS_SVCR_SMSTART_SM_EL0		sys_reg(0, 3, 4, 3, 3)
 #define SYS_SVCR_SMSTOP_SMZA_EL0	sys_reg(0, 3, 4, 6, 3)
 
-#define SYS_OSDTRRX_EL1			sys_reg(2, 0, 0, 0, 2)
-#define SYS_MDCCINT_EL1			sys_reg(2, 0, 0, 2, 0)
-#define SYS_MDSCR_EL1			sys_reg(2, 0, 0, 2, 2)
-#define SYS_OSDTRTX_EL1			sys_reg(2, 0, 0, 3, 2)
-#define SYS_OSECCR_EL1			sys_reg(2, 0, 0, 6, 2)
 #define SYS_DBGBVRn_EL1(n)		sys_reg(2, 0, 0, n, 4)
 #define SYS_DBGBCRn_EL1(n)		sys_reg(2, 0, 0, n, 5)
 #define SYS_DBGWVRn_EL1(n)		sys_reg(2, 0, 0, n, 6)
 #define SYS_DBGWCRn_EL1(n)		sys_reg(2, 0, 0, n, 7)
 #define SYS_MDRAR_EL1			sys_reg(2, 0, 1, 0, 0)
 
-#define SYS_OSLAR_EL1			sys_reg(2, 0, 1, 0, 4)
-#define SYS_OSLAR_OSLK			BIT(0)
-
 #define SYS_OSLSR_EL1			sys_reg(2, 0, 1, 1, 4)
-#define SYS_OSLSR_OSLM_MASK		(BIT(3) | BIT(0))
-#define SYS_OSLSR_OSLM_NI		0
-#define SYS_OSLSR_OSLM_IMPLEMENTED	BIT(3)
-#define SYS_OSLSR_OSLK			BIT(1)
+#define OSLSR_EL1_OSLM_MASK		(BIT(3) | BIT(0))
+#define OSLSR_EL1_OSLM_NI		0
+#define OSLSR_EL1_OSLM_IMPLEMENTED	BIT(3)
+#define OSLSR_EL1_OSLK			BIT(1)
 
 #define SYS_OSDLR_EL1			sys_reg(2, 0, 1, 3, 4)
 #define SYS_DBGPRCR_EL1			sys_reg(2, 0, 1, 4, 4)
@@ -235,54 +227,8 @@
 
 /*** End of Statistical Profiling Extension ***/
 
-/*
- * TRBE Registers
- */
-#define SYS_TRBLIMITR_EL1		sys_reg(3, 0, 9, 11, 0)
-#define SYS_TRBPTR_EL1			sys_reg(3, 0, 9, 11, 1)
-#define SYS_TRBBASER_EL1		sys_reg(3, 0, 9, 11, 2)
-#define SYS_TRBSR_EL1			sys_reg(3, 0, 9, 11, 3)
-#define SYS_TRBMAR_EL1			sys_reg(3, 0, 9, 11, 4)
-#define SYS_TRBTRG_EL1			sys_reg(3, 0, 9, 11, 6)
-#define SYS_TRBIDR_EL1			sys_reg(3, 0, 9, 11, 7)
-
-#define TRBLIMITR_LIMIT_MASK		GENMASK_ULL(51, 0)
-#define TRBLIMITR_LIMIT_SHIFT		12
-#define TRBLIMITR_NVM			BIT(5)
-#define TRBLIMITR_TRIG_MODE_MASK	GENMASK(1, 0)
-#define TRBLIMITR_TRIG_MODE_SHIFT	3
-#define TRBLIMITR_FILL_MODE_MASK	GENMASK(1, 0)
-#define TRBLIMITR_FILL_MODE_SHIFT	1
-#define TRBLIMITR_ENABLE		BIT(0)
-#define TRBPTR_PTR_MASK			GENMASK_ULL(63, 0)
-#define TRBPTR_PTR_SHIFT		0
-#define TRBBASER_BASE_MASK		GENMASK_ULL(51, 0)
-#define TRBBASER_BASE_SHIFT		12
-#define TRBSR_EC_MASK			GENMASK(5, 0)
-#define TRBSR_EC_SHIFT			26
-#define TRBSR_IRQ			BIT(22)
-#define TRBSR_TRG			BIT(21)
-#define TRBSR_WRAP			BIT(20)
-#define TRBSR_ABORT			BIT(18)
-#define TRBSR_STOP			BIT(17)
-#define TRBSR_MSS_MASK			GENMASK(15, 0)
-#define TRBSR_MSS_SHIFT			0
-#define TRBSR_BSC_MASK			GENMASK(5, 0)
-#define TRBSR_BSC_SHIFT			0
-#define TRBSR_FSC_MASK			GENMASK(5, 0)
-#define TRBSR_FSC_SHIFT			0
-#define TRBMAR_SHARE_MASK		GENMASK(1, 0)
-#define TRBMAR_SHARE_SHIFT		8
-#define TRBMAR_OUTER_MASK		GENMASK(3, 0)
-#define TRBMAR_OUTER_SHIFT		4
-#define TRBMAR_INNER_MASK		GENMASK(3, 0)
-#define TRBMAR_INNER_SHIFT		0
-#define TRBTRG_TRG_MASK			GENMASK(31, 0)
-#define TRBTRG_TRG_SHIFT		0
-#define TRBIDR_FLAG			BIT(5)
-#define TRBIDR_PROG			BIT(4)
-#define TRBIDR_ALIGN_MASK		GENMASK(3, 0)
-#define TRBIDR_ALIGN_SHIFT		0
+#define TRBSR_EL1_BSC_MASK		GENMASK(5, 0)
+#define TRBSR_EL1_BSC_SHIFT		0
 
 #define SYS_PMINTENSET_EL1		sys_reg(3, 0, 9, 14, 1)
 #define SYS_PMINTENCLR_EL1		sys_reg(3, 0, 9, 14, 2)
diff --git a/arch/arm64/include/asm/traps.h b/arch/arm64/include/asm/traps.h
index 1f361e2..d66dfb3 100644
--- a/arch/arm64/include/asm/traps.h
+++ b/arch/arm64/include/asm/traps.h
@@ -29,6 +29,8 @@ void arm64_force_sig_fault(int signo, int code, unsigned long far, const char *s
 void arm64_force_sig_mceerr(int code, unsigned long far, short lsb, const char *str);
 void arm64_force_sig_ptrace_errno_trap(int errno, unsigned long far, const char *str);
 
+int early_brk64(unsigned long addr, unsigned long esr, struct pt_regs *regs);
+
 /*
  * Move regs->pc to next instruction and do necessary setup before it
  * is executed.
diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h
index 05f4fc2..14be500 100644
--- a/arch/arm64/include/asm/uaccess.h
+++ b/arch/arm64/include/asm/uaccess.h
@@ -65,7 +65,6 @@ static inline void __uaccess_ttbr0_disable(void)
 	ttbr &= ~TTBR_ASID_MASK;
 	/* reserved_pg_dir placed before swapper_pg_dir */
 	write_sysreg(ttbr - RESERVED_SWAPPER_OFFSET, ttbr0_el1);
-	isb();
 	/* Set reserved ASID */
 	write_sysreg(ttbr, ttbr1_el1);
 	isb();
@@ -89,7 +88,6 @@ static inline void __uaccess_ttbr0_enable(void)
 	ttbr1 &= ~TTBR_ASID_MASK;		/* safety measure */
 	ttbr1 |= ttbr0 & TTBR_ASID_MASK;
 	write_sysreg(ttbr1, ttbr1_el1);
-	isb();
 
 	/* Restore user page table */
 	write_sysreg(ttbr0, ttbr0_el1);
diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h
index 69a4fb7..a2cac43 100644
--- a/arch/arm64/include/uapi/asm/hwcap.h
+++ b/arch/arm64/include/uapi/asm/hwcap.h
@@ -102,5 +102,6 @@
 #define HWCAP2_SME_BI32I32	(1UL << 40)
 #define HWCAP2_SME_B16B16	(1UL << 41)
 #define HWCAP2_SME_F16F16	(1UL << 42)
+#define HWCAP2_MOPS		(1UL << 43)
 
 #endif /* _UAPI__ASM_HWCAP_H */
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 7c2bb4e..3864a64 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -42,8 +42,7 @@
 obj-$(CONFIG_COMPAT_ALIGNMENT_FIXUPS)	+= compat_alignment.o
 obj-$(CONFIG_KUSER_HELPERS)		+= kuser32.o
 obj-$(CONFIG_FUNCTION_TRACER)		+= ftrace.o entry-ftrace.o
-obj-$(CONFIG_MODULES)			+= module.o
-obj-$(CONFIG_ARM64_MODULE_PLTS)		+= module-plts.o
+obj-$(CONFIG_MODULES)			+= module.o module-plts.o
 obj-$(CONFIG_PERF_EVENTS)		+= perf_regs.o perf_callchain.o
 obj-$(CONFIG_HAVE_HW_BREAKPOINT)	+= hw_breakpoint.o
 obj-$(CONFIG_CPU_PM)			+= sleep.o suspend.o
diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c
index d32d4ed..8ff6610 100644
--- a/arch/arm64/kernel/alternative.c
+++ b/arch/arm64/kernel/alternative.c
@@ -24,8 +24,8 @@
 #define ALT_ORIG_PTR(a)		__ALT_PTR(a, orig_offset)
 #define ALT_REPL_PTR(a)		__ALT_PTR(a, alt_offset)
 
-#define ALT_CAP(a)		((a)->cpufeature & ~ARM64_CB_BIT)
-#define ALT_HAS_CB(a)		((a)->cpufeature & ARM64_CB_BIT)
+#define ALT_CAP(a)		((a)->cpucap & ~ARM64_CB_BIT)
+#define ALT_HAS_CB(a)		((a)->cpucap & ARM64_CB_BIT)
 
 /* Volatile, as we may be patching the guts of READ_ONCE() */
 static volatile int all_alternatives_applied;
@@ -37,12 +37,12 @@ struct alt_region {
 	struct alt_instr *end;
 };
 
-bool alternative_is_applied(u16 cpufeature)
+bool alternative_is_applied(u16 cpucap)
 {
-	if (WARN_ON(cpufeature >= ARM64_NCAPS))
+	if (WARN_ON(cpucap >= ARM64_NCAPS))
 		return false;
 
-	return test_bit(cpufeature, applied_alternatives);
+	return test_bit(cpucap, applied_alternatives);
 }
 
 /*
@@ -121,11 +121,11 @@ static noinstr void patch_alternative(struct alt_instr *alt,
  * accidentally call into the cache.S code, which is patched by us at
  * runtime.
  */
-static void clean_dcache_range_nopatch(u64 start, u64 end)
+static noinstr void clean_dcache_range_nopatch(u64 start, u64 end)
 {
 	u64 cur, d_size, ctr_el0;
 
-	ctr_el0 = read_sanitised_ftr_reg(SYS_CTR_EL0);
+	ctr_el0 = arm64_ftr_reg_ctrel0.sys_val;
 	d_size = 4 << cpuid_feature_extract_unsigned_field(ctr_el0,
 							   CTR_EL0_DminLine_SHIFT);
 	cur = start & ~(d_size - 1);
@@ -141,7 +141,7 @@ static void clean_dcache_range_nopatch(u64 start, u64 end)
 
 static void __apply_alternatives(const struct alt_region *region,
 				 bool is_module,
-				 unsigned long *feature_mask)
+				 unsigned long *cpucap_mask)
 {
 	struct alt_instr *alt;
 	__le32 *origptr, *updptr;
@@ -151,7 +151,7 @@ static void __apply_alternatives(const struct alt_region *region,
 		int nr_inst;
 		int cap = ALT_CAP(alt);
 
-		if (!test_bit(cap, feature_mask))
+		if (!test_bit(cap, cpucap_mask))
 			continue;
 
 		if (!cpus_have_cap(cap))
@@ -188,11 +188,10 @@ static void __apply_alternatives(const struct alt_region *region,
 		icache_inval_all_pou();
 		isb();
 
-		/* Ignore ARM64_CB bit from feature mask */
 		bitmap_or(applied_alternatives, applied_alternatives,
-			  feature_mask, ARM64_NCAPS);
+			  cpucap_mask, ARM64_NCAPS);
 		bitmap_and(applied_alternatives, applied_alternatives,
-			   cpu_hwcaps, ARM64_NCAPS);
+			   system_cpucaps, ARM64_NCAPS);
 	}
 }
 
@@ -239,7 +238,7 @@ static int __init __apply_alternatives_multi_stop(void *unused)
 	} else {
 		DECLARE_BITMAP(remaining_capabilities, ARM64_NCAPS);
 
-		bitmap_complement(remaining_capabilities, boot_capabilities,
+		bitmap_complement(remaining_capabilities, boot_cpucaps,
 				  ARM64_NCAPS);
 
 		BUG_ON(all_alternatives_applied);
@@ -274,7 +273,7 @@ void __init apply_boot_alternatives(void)
 	pr_info("applying boot alternatives\n");
 
 	__apply_alternatives(&kernel_alternatives, false,
-			     &boot_capabilities[0]);
+			     &boot_cpucaps[0]);
 }
 
 #ifdef CONFIG_MODULES
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 7d7128c..05660f0d 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -105,11 +105,11 @@ unsigned int compat_elf_hwcap __read_mostly = COMPAT_ELF_HWCAP_DEFAULT;
 unsigned int compat_elf_hwcap2 __read_mostly;
 #endif
 
-DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS);
-EXPORT_SYMBOL(cpu_hwcaps);
-static struct arm64_cpu_capabilities const __ro_after_init *cpu_hwcaps_ptrs[ARM64_NCAPS];
+DECLARE_BITMAP(system_cpucaps, ARM64_NCAPS);
+EXPORT_SYMBOL(system_cpucaps);
+static struct arm64_cpu_capabilities const __ro_after_init *cpucap_ptrs[ARM64_NCAPS];
 
-DECLARE_BITMAP(boot_capabilities, ARM64_NCAPS);
+DECLARE_BITMAP(boot_cpucaps, ARM64_NCAPS);
 
 bool arm64_use_ng_mappings = false;
 EXPORT_SYMBOL(arm64_use_ng_mappings);
@@ -137,7 +137,7 @@ static cpumask_var_t cpu_32bit_el0_mask __cpumask_var_read_mostly;
 void dump_cpu_features(void)
 {
 	/* file-wide pr_fmt adds "CPU features: " prefix */
-	pr_emerg("0x%*pb\n", ARM64_NCAPS, &cpu_hwcaps);
+	pr_emerg("0x%*pb\n", ARM64_NCAPS, &system_cpucaps);
 }
 
 #define ARM64_CPUID_FIELDS(reg, field, min_value)			\
@@ -223,6 +223,7 @@ static const struct arm64_ftr_bits ftr_id_aa64isar2[] = {
 	ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_CSSC_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_RPRFM_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_HIGHER_SAFE, ID_AA64ISAR2_EL1_BC_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_MOPS_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH),
 		       FTR_STRICT, FTR_EXACT, ID_AA64ISAR2_EL1_APA3_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH),
@@ -364,6 +365,7 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = {
 static const struct arm64_ftr_bits ftr_id_aa64mmfr1[] = {
 	ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_TIDCP1_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_AFP_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_HCX_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_ETS_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_TWED_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_XNX_SHIFT, 4, 0),
@@ -954,24 +956,24 @@ extern const struct arm64_cpu_capabilities arm64_errata[];
 static const struct arm64_cpu_capabilities arm64_features[];
 
 static void __init
-init_cpu_hwcaps_indirect_list_from_array(const struct arm64_cpu_capabilities *caps)
+init_cpucap_indirect_list_from_array(const struct arm64_cpu_capabilities *caps)
 {
 	for (; caps->matches; caps++) {
 		if (WARN(caps->capability >= ARM64_NCAPS,
 			"Invalid capability %d\n", caps->capability))
 			continue;
-		if (WARN(cpu_hwcaps_ptrs[caps->capability],
+		if (WARN(cpucap_ptrs[caps->capability],
 			"Duplicate entry for capability %d\n",
 			caps->capability))
 			continue;
-		cpu_hwcaps_ptrs[caps->capability] = caps;
+		cpucap_ptrs[caps->capability] = caps;
 	}
 }
 
-static void __init init_cpu_hwcaps_indirect_list(void)
+static void __init init_cpucap_indirect_list(void)
 {
-	init_cpu_hwcaps_indirect_list_from_array(arm64_features);
-	init_cpu_hwcaps_indirect_list_from_array(arm64_errata);
+	init_cpucap_indirect_list_from_array(arm64_features);
+	init_cpucap_indirect_list_from_array(arm64_errata);
 }
 
 static void __init setup_boot_cpu_capabilities(void);
@@ -1049,10 +1051,10 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info)
 		init_cpu_ftr_reg(SYS_GMID_EL1, info->reg_gmid);
 
 	/*
-	 * Initialize the indirect array of CPU hwcaps capabilities pointers
-	 * before we handle the boot CPU below.
+	 * Initialize the indirect array of CPU capabilities pointers before we
+	 * handle the boot CPU below.
 	 */
-	init_cpu_hwcaps_indirect_list();
+	init_cpucap_indirect_list();
 
 	/*
 	 * Detect and enable early CPU capabilities based on the boot CPU,
@@ -2048,9 +2050,9 @@ static bool has_address_auth_cpucap(const struct arm64_cpu_capabilities *entry,
 static bool has_address_auth_metacap(const struct arm64_cpu_capabilities *entry,
 				     int scope)
 {
-	bool api = has_address_auth_cpucap(cpu_hwcaps_ptrs[ARM64_HAS_ADDRESS_AUTH_IMP_DEF], scope);
-	bool apa = has_address_auth_cpucap(cpu_hwcaps_ptrs[ARM64_HAS_ADDRESS_AUTH_ARCH_QARMA5], scope);
-	bool apa3 = has_address_auth_cpucap(cpu_hwcaps_ptrs[ARM64_HAS_ADDRESS_AUTH_ARCH_QARMA3], scope);
+	bool api = has_address_auth_cpucap(cpucap_ptrs[ARM64_HAS_ADDRESS_AUTH_IMP_DEF], scope);
+	bool apa = has_address_auth_cpucap(cpucap_ptrs[ARM64_HAS_ADDRESS_AUTH_ARCH_QARMA5], scope);
+	bool apa3 = has_address_auth_cpucap(cpucap_ptrs[ARM64_HAS_ADDRESS_AUTH_ARCH_QARMA3], scope);
 
 	return apa || apa3 || api;
 }
@@ -2186,6 +2188,11 @@ static void cpu_enable_dit(const struct arm64_cpu_capabilities *__unused)
 	set_pstate_dit(1);
 }
 
+static void cpu_enable_mops(const struct arm64_cpu_capabilities *__unused)
+{
+	sysreg_clear_set(sctlr_el1, 0, SCTLR_EL1_MSCEn);
+}
+
 /* Internal helper functions to match cpu capability type */
 static bool
 cpucap_late_cpu_optional(const struct arm64_cpu_capabilities *cap)
@@ -2235,11 +2242,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.capability = ARM64_HAS_ECV_CNTPOFF,
 		.type = ARM64_CPUCAP_SYSTEM_FEATURE,
 		.matches = has_cpuid_feature,
-		.sys_reg = SYS_ID_AA64MMFR0_EL1,
-		.field_pos = ID_AA64MMFR0_EL1_ECV_SHIFT,
-		.field_width = 4,
-		.sign = FTR_UNSIGNED,
-		.min_field_value = ID_AA64MMFR0_EL1_ECV_CNTPOFF,
+		ARM64_CPUID_FIELDS(ID_AA64MMFR0_EL1, ECV, CNTPOFF)
 	},
 #ifdef CONFIG_ARM64_PAN
 	{
@@ -2309,6 +2312,13 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.type = ARM64_CPUCAP_SYSTEM_FEATURE,
 		.matches = is_kvm_protected_mode,
 	},
+	{
+		.desc = "HCRX_EL2 register",
+		.capability = ARM64_HAS_HCX,
+		.type = ARM64_CPUCAP_STRICT_BOOT_CPU_FEATURE,
+		.matches = has_cpuid_feature,
+		ARM64_CPUID_FIELDS(ID_AA64MMFR1_EL1, HCX, IMP)
+	},
 #endif
 	{
 		.desc = "Kernel page table isolation (KPTI)",
@@ -2641,6 +2651,14 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.cpu_enable = cpu_enable_dit,
 		ARM64_CPUID_FIELDS(ID_AA64PFR0_EL1, DIT, IMP)
 	},
+	{
+		.desc = "Memory Copy and Memory Set instructions",
+		.capability = ARM64_HAS_MOPS,
+		.type = ARM64_CPUCAP_SYSTEM_FEATURE,
+		.matches = has_cpuid_feature,
+		.cpu_enable = cpu_enable_mops,
+		ARM64_CPUID_FIELDS(ID_AA64ISAR2_EL1, MOPS, IMP)
+	},
 	{},
 };
 
@@ -2769,6 +2787,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
 	HWCAP_CAP(ID_AA64ISAR2_EL1, RPRFM, IMP, CAP_HWCAP, KERNEL_HWCAP_RPRFM),
 	HWCAP_CAP(ID_AA64ISAR2_EL1, RPRES, IMP, CAP_HWCAP, KERNEL_HWCAP_RPRES),
 	HWCAP_CAP(ID_AA64ISAR2_EL1, WFxT, IMP, CAP_HWCAP, KERNEL_HWCAP_WFXT),
+	HWCAP_CAP(ID_AA64ISAR2_EL1, MOPS, IMP, CAP_HWCAP, KERNEL_HWCAP_MOPS),
 #ifdef CONFIG_ARM64_SME
 	HWCAP_CAP(ID_AA64PFR1_EL1, SME, IMP, CAP_HWCAP, KERNEL_HWCAP_SME),
 	HWCAP_CAP(ID_AA64SMFR0_EL1, FA64, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_FA64),
@@ -2895,7 +2914,7 @@ static void update_cpu_capabilities(u16 scope_mask)
 
 	scope_mask &= ARM64_CPUCAP_SCOPE_MASK;
 	for (i = 0; i < ARM64_NCAPS; i++) {
-		caps = cpu_hwcaps_ptrs[i];
+		caps = cpucap_ptrs[i];
 		if (!caps || !(caps->type & scope_mask) ||
 		    cpus_have_cap(caps->capability) ||
 		    !caps->matches(caps, cpucap_default_scope(caps)))
@@ -2903,10 +2922,11 @@ static void update_cpu_capabilities(u16 scope_mask)
 
 		if (caps->desc)
 			pr_info("detected: %s\n", caps->desc);
-		cpus_set_cap(caps->capability);
+
+		__set_bit(caps->capability, system_cpucaps);
 
 		if ((scope_mask & SCOPE_BOOT_CPU) && (caps->type & SCOPE_BOOT_CPU))
-			set_bit(caps->capability, boot_capabilities);
+			set_bit(caps->capability, boot_cpucaps);
 	}
 }
 
@@ -2920,7 +2940,7 @@ static int cpu_enable_non_boot_scope_capabilities(void *__unused)
 	u16 non_boot_scope = SCOPE_ALL & ~SCOPE_BOOT_CPU;
 
 	for_each_available_cap(i) {
-		const struct arm64_cpu_capabilities *cap = cpu_hwcaps_ptrs[i];
+		const struct arm64_cpu_capabilities *cap = cpucap_ptrs[i];
 
 		if (WARN_ON(!cap))
 			continue;
@@ -2950,7 +2970,7 @@ static void __init enable_cpu_capabilities(u16 scope_mask)
 	for (i = 0; i < ARM64_NCAPS; i++) {
 		unsigned int num;
 
-		caps = cpu_hwcaps_ptrs[i];
+		caps = cpucap_ptrs[i];
 		if (!caps || !(caps->type & scope_mask))
 			continue;
 		num = caps->capability;
@@ -2995,7 +3015,7 @@ static void verify_local_cpu_caps(u16 scope_mask)
 	scope_mask &= ARM64_CPUCAP_SCOPE_MASK;
 
 	for (i = 0; i < ARM64_NCAPS; i++) {
-		caps = cpu_hwcaps_ptrs[i];
+		caps = cpucap_ptrs[i];
 		if (!caps || !(caps->type & scope_mask))
 			continue;
 
@@ -3194,7 +3214,7 @@ static void __init setup_boot_cpu_capabilities(void)
 bool this_cpu_has_cap(unsigned int n)
 {
 	if (!WARN_ON(preemptible()) && n < ARM64_NCAPS) {
-		const struct arm64_cpu_capabilities *cap = cpu_hwcaps_ptrs[n];
+		const struct arm64_cpu_capabilities *cap = cpucap_ptrs[n];
 
 		if (cap)
 			return cap->matches(cap, SCOPE_LOCAL_CPU);
@@ -3207,13 +3227,13 @@ EXPORT_SYMBOL_GPL(this_cpu_has_cap);
 /*
  * This helper function is used in a narrow window when,
  * - The system wide safe registers are set with all the SMP CPUs and,
- * - The SYSTEM_FEATURE cpu_hwcaps may not have been set.
+ * - The SYSTEM_FEATURE system_cpucaps may not have been set.
  * In all other cases cpus_have_{const_}cap() should be used.
  */
 static bool __maybe_unused __system_matches_cap(unsigned int n)
 {
 	if (n < ARM64_NCAPS) {
-		const struct arm64_cpu_capabilities *cap = cpu_hwcaps_ptrs[n];
+		const struct arm64_cpu_capabilities *cap = cpucap_ptrs[n];
 
 		if (cap)
 			return cap->matches(cap, SCOPE_SYSTEM);
diff --git a/arch/arm64/kernel/cpuidle.c b/arch/arm64/kernel/cpuidle.c
index 42e19ff..d1f6859 100644
--- a/arch/arm64/kernel/cpuidle.c
+++ b/arch/arm64/kernel/cpuidle.c
@@ -13,7 +13,7 @@
 #include <linux/of_device.h>
 #include <linux/psci.h>
 
-#ifdef CONFIG_ACPI
+#ifdef CONFIG_ACPI_PROCESSOR_IDLE
 
 #include <acpi/processor.h>
 
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index eb4378c..076a124 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -125,6 +125,7 @@ static const char *const hwcap_str[] = {
 	[KERNEL_HWCAP_SME_BI32I32]	= "smebi32i32",
 	[KERNEL_HWCAP_SME_B16B16]	= "smeb16b16",
 	[KERNEL_HWCAP_SME_F16F16]	= "smef16f16",
+	[KERNEL_HWCAP_MOPS]		= "mops",
 };
 
 #ifdef CONFIG_COMPAT
diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c
index 3af3c01..6b2e0c3 100644
--- a/arch/arm64/kernel/entry-common.c
+++ b/arch/arm64/kernel/entry-common.c
@@ -126,7 +126,7 @@ static __always_inline void __exit_to_user_mode(void)
 	lockdep_hardirqs_on(CALLER_ADDR0);
 }
 
-static __always_inline void prepare_exit_to_user_mode(struct pt_regs *regs)
+static __always_inline void exit_to_user_mode_prepare(struct pt_regs *regs)
 {
 	unsigned long flags;
 
@@ -135,11 +135,13 @@ static __always_inline void prepare_exit_to_user_mode(struct pt_regs *regs)
 	flags = read_thread_flags();
 	if (unlikely(flags & _TIF_WORK_MASK))
 		do_notify_resume(regs, flags);
+
+	lockdep_sys_exit();
 }
 
 static __always_inline void exit_to_user_mode(struct pt_regs *regs)
 {
-	prepare_exit_to_user_mode(regs);
+	exit_to_user_mode_prepare(regs);
 	mte_check_tfsr_exit();
 	__exit_to_user_mode();
 }
@@ -611,6 +613,14 @@ static void noinstr el0_bti(struct pt_regs *regs)
 	exit_to_user_mode(regs);
 }
 
+static void noinstr el0_mops(struct pt_regs *regs, unsigned long esr)
+{
+	enter_from_user_mode(regs);
+	local_daif_restore(DAIF_PROCCTX);
+	do_el0_mops(regs, esr);
+	exit_to_user_mode(regs);
+}
+
 static void noinstr el0_inv(struct pt_regs *regs, unsigned long esr)
 {
 	enter_from_user_mode(regs);
@@ -688,6 +698,9 @@ asmlinkage void noinstr el0t_64_sync_handler(struct pt_regs *regs)
 	case ESR_ELx_EC_BTI:
 		el0_bti(regs);
 		break;
+	case ESR_ELx_EC_MOPS:
+		el0_mops(regs, esr);
+		break;
 	case ESR_ELx_EC_BREAKPT_LOW:
 	case ESR_ELx_EC_SOFTSTP_LOW:
 	case ESR_ELx_EC_WATCHPT_LOW:
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index ab2a6e3..a40e5e5 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -101,12 +101,11 @@
 .org .Lventry_start\@ + 128	// Did we overflow the ventry slot?
 	.endm
 
-	.macro tramp_alias, dst, sym, tmp
-	mov_q	\dst, TRAMP_VALIAS
-	adr_l	\tmp, \sym
-	add	\dst, \dst, \tmp
-	adr_l	\tmp, .entry.tramp.text
-	sub	\dst, \dst, \tmp
+	.macro	tramp_alias, dst, sym
+	.set	.Lalias\@, TRAMP_VALIAS + \sym - .entry.tramp.text
+	movz	\dst, :abs_g2_s:.Lalias\@
+	movk	\dst, :abs_g1_nc:.Lalias\@
+	movk	\dst, :abs_g0_nc:.Lalias\@
 	.endm
 
 	/*
@@ -435,13 +434,14 @@
 	eret
 alternative_else_nop_endif
 #ifdef CONFIG_UNMAP_KERNEL_AT_EL0
-	bne	4f
 	msr	far_el1, x29
-	tramp_alias	x30, tramp_exit_native, x29
-	br	x30
-4:
-	tramp_alias	x30, tramp_exit_compat, x29
-	br	x30
+
+	ldr_this_cpu	x30, this_cpu_vector, x29
+	tramp_alias	x29, tramp_exit
+	msr		vbar_el1, x30		// install vector table
+	ldr		lr, [sp, #S_LR]		// restore x30
+	add		sp, sp, #PT_REGS_SIZE	// restore sp
+	br		x29
 #endif
 	.else
 	ldr	lr, [sp, #S_LR]
@@ -732,22 +732,6 @@
 .org 1b + 128	// Did we overflow the ventry slot?
 	.endm
 
-	.macro tramp_exit, regsize = 64
-	tramp_data_read_var	x30, this_cpu_vector
-	get_this_cpu_offset x29
-	ldr	x30, [x30, x29]
-
-	msr	vbar_el1, x30
-	ldr	lr, [sp, #S_LR]
-	tramp_unmap_kernel	x29
-	.if	\regsize == 64
-	mrs	x29, far_el1
-	.endif
-	add	sp, sp, #PT_REGS_SIZE		// restore sp
-	eret
-	sb
-	.endm
-
 	.macro	generate_tramp_vector,	kpti, bhb
 .Lvector_start\@:
 	.space	0x400
@@ -768,7 +752,7 @@
  */
 	.pushsection ".entry.tramp.text", "ax"
 	.align	11
-SYM_CODE_START_NOALIGN(tramp_vectors)
+SYM_CODE_START_LOCAL_NOALIGN(tramp_vectors)
 #ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY
 	generate_tramp_vector	kpti=1, bhb=BHB_MITIGATION_LOOP
 	generate_tramp_vector	kpti=1, bhb=BHB_MITIGATION_FW
@@ -777,13 +761,12 @@
 	generate_tramp_vector	kpti=1, bhb=BHB_MITIGATION_NONE
 SYM_CODE_END(tramp_vectors)
 
-SYM_CODE_START(tramp_exit_native)
-	tramp_exit
-SYM_CODE_END(tramp_exit_native)
-
-SYM_CODE_START(tramp_exit_compat)
-	tramp_exit	32
-SYM_CODE_END(tramp_exit_compat)
+SYM_CODE_START_LOCAL(tramp_exit)
+	tramp_unmap_kernel	x29
+	mrs		x29, far_el1		// restore x29
+	eret
+	sb
+SYM_CODE_END(tramp_exit)
 	.popsection				// .entry.tramp.text
 #endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */
 
@@ -1077,7 +1060,7 @@
 alternative_else_nop_endif
 
 #ifdef CONFIG_UNMAP_KERNEL_AT_EL0
-	tramp_alias	dst=x5, sym=__sdei_asm_exit_trampoline, tmp=x3
+	tramp_alias	dst=x5, sym=__sdei_asm_exit_trampoline
 	br	x5
 #endif
 SYM_CODE_END(__sdei_asm_handler)
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 2fbafa5..7a1aeb9 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -1649,6 +1649,7 @@ void fpsimd_flush_thread(void)
 
 		fpsimd_flush_thread_vl(ARM64_VEC_SME);
 		current->thread.svcr = 0;
+		sme_smstop();
 	}
 
 	current->thread.fp_type = FP_STATE_FPSIMD;
diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c
index 432626c..a650f5e 100644
--- a/arch/arm64/kernel/ftrace.c
+++ b/arch/arm64/kernel/ftrace.c
@@ -197,7 +197,7 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
 
 static struct plt_entry *get_ftrace_plt(struct module *mod)
 {
-#ifdef CONFIG_ARM64_MODULE_PLTS
+#ifdef CONFIG_MODULES
 	struct plt_entry *plt = mod->arch.ftrace_trampolines;
 
 	return &plt[FTRACE_PLT_IDX];
@@ -249,7 +249,7 @@ static bool ftrace_find_callable_addr(struct dyn_ftrace *rec,
 	 * must use a PLT to reach it. We can only place PLTs for modules, and
 	 * only when module PLT support is built-in.
 	 */
-	if (!IS_ENABLED(CONFIG_ARM64_MODULE_PLTS))
+	if (!IS_ENABLED(CONFIG_MODULES))
 		return false;
 
 	/*
@@ -431,10 +431,8 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
 	 *
 	 * Note: 'mod' is only set at module load time.
 	 */
-	if (!IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_ARGS) &&
-	    IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) && mod) {
+	if (!IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_ARGS) && mod)
 		return aarch64_insn_patch_text_nosync((void *)pc, new);
-	}
 
 	if (!ftrace_find_callable_addr(rec, mod, &addr))
 		return -EINVAL;
diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c
index 788597a..02870beb 100644
--- a/arch/arm64/kernel/hibernate.c
+++ b/arch/arm64/kernel/hibernate.c
@@ -99,7 +99,6 @@ int pfn_is_nosave(unsigned long pfn)
 
 void notrace save_processor_state(void)
 {
-	WARN_ON(num_online_cpus() != 1);
 }
 
 void notrace restore_processor_state(void)
diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c
index b29a311..db2a186 100644
--- a/arch/arm64/kernel/hw_breakpoint.c
+++ b/arch/arm64/kernel/hw_breakpoint.c
@@ -973,14 +973,6 @@ static int hw_breakpoint_reset(unsigned int cpu)
 	return 0;
 }
 
-#ifdef CONFIG_CPU_PM
-extern void cpu_suspend_set_dbg_restorer(int (*hw_bp_restore)(unsigned int));
-#else
-static inline void cpu_suspend_set_dbg_restorer(int (*hw_bp_restore)(unsigned int))
-{
-}
-#endif
-
 /*
  * One-time initialisation.
  */
diff --git a/arch/arm64/kernel/idreg-override.c b/arch/arm64/kernel/idreg-override.c
index 370ab84..8439248 100644
--- a/arch/arm64/kernel/idreg-override.c
+++ b/arch/arm64/kernel/idreg-override.c
@@ -123,6 +123,7 @@ static const struct ftr_set_desc isar2 __initconst = {
 	.fields		= {
 		FIELD("gpa3", ID_AA64ISAR2_EL1_GPA3_SHIFT, NULL),
 		FIELD("apa3", ID_AA64ISAR2_EL1_APA3_SHIFT, NULL),
+		FIELD("mops", ID_AA64ISAR2_EL1_MOPS_SHIFT, NULL),
 		{}
 	},
 };
@@ -174,6 +175,7 @@ static const struct {
 	  "id_aa64isar1.gpi=0 id_aa64isar1.gpa=0 "
 	  "id_aa64isar1.api=0 id_aa64isar1.apa=0 "
 	  "id_aa64isar2.gpa3=0 id_aa64isar2.apa3=0"	   },
+	{ "arm64.nomops",		"id_aa64isar2.mops=0" },
 	{ "arm64.nomte",		"id_aa64pfr1.mte=0" },
 	{ "nokaslr",			"kaslr.disabled=1" },
 };
diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c
index e7477f2..17f96a1 100644
--- a/arch/arm64/kernel/kaslr.c
+++ b/arch/arm64/kernel/kaslr.c
@@ -4,90 +4,35 @@
  */
 
 #include <linux/cache.h>
-#include <linux/crc32.h>
 #include <linux/init.h>
-#include <linux/libfdt.h>
-#include <linux/mm_types.h>
-#include <linux/sched.h>
-#include <linux/types.h>
-#include <linux/pgtable.h>
-#include <linux/random.h>
+#include <linux/printk.h>
 
-#include <asm/fixmap.h>
-#include <asm/kernel-pgtable.h>
+#include <asm/cpufeature.h>
 #include <asm/memory.h>
-#include <asm/mmu.h>
-#include <asm/sections.h>
-#include <asm/setup.h>
 
-u64 __ro_after_init module_alloc_base;
 u16 __initdata memstart_offset_seed;
 
 struct arm64_ftr_override kaslr_feature_override __initdata;
 
-static int __init kaslr_init(void)
+bool __ro_after_init __kaslr_is_enabled = false;
+
+void __init kaslr_init(void)
 {
-	u64 module_range;
-	u32 seed;
-
-	/*
-	 * Set a reasonable default for module_alloc_base in case
-	 * we end up running with module randomization disabled.
-	 */
-	module_alloc_base = (u64)_etext - MODULES_VSIZE;
-
 	if (kaslr_feature_override.val & kaslr_feature_override.mask & 0xf) {
 		pr_info("KASLR disabled on command line\n");
-		return 0;
+		return;
 	}
 
-	if (!kaslr_enabled()) {
+	/*
+	 * The KASLR offset modulo MIN_KIMG_ALIGN is taken from the physical
+	 * placement of the image rather than from the seed, so a displacement
+	 * of less than MIN_KIMG_ALIGN means that no seed was provided.
+	 */
+	if (kaslr_offset() < MIN_KIMG_ALIGN) {
 		pr_warn("KASLR disabled due to lack of seed\n");
-		return 0;
+		return;
 	}
 
 	pr_info("KASLR enabled\n");
-
-	/*
-	 * KASAN without KASAN_VMALLOC does not expect the module region to
-	 * intersect the vmalloc region, since shadow memory is allocated for
-	 * each module at load time, whereas the vmalloc region will already be
-	 * shadowed by KASAN zero pages.
-	 */
-	BUILD_BUG_ON((IS_ENABLED(CONFIG_KASAN_GENERIC) ||
-	              IS_ENABLED(CONFIG_KASAN_SW_TAGS)) &&
-		     !IS_ENABLED(CONFIG_KASAN_VMALLOC));
-
-	seed = get_random_u32();
-
-	if (IS_ENABLED(CONFIG_RANDOMIZE_MODULE_REGION_FULL)) {
-		/*
-		 * Randomize the module region over a 2 GB window covering the
-		 * kernel. This reduces the risk of modules leaking information
-		 * about the address of the kernel itself, but results in
-		 * branches between modules and the core kernel that are
-		 * resolved via PLTs. (Branches between modules will be
-		 * resolved normally.)
-		 */
-		module_range = SZ_2G - (u64)(_end - _stext);
-		module_alloc_base = max((u64)_end - SZ_2G, (u64)MODULES_VADDR);
-	} else {
-		/*
-		 * Randomize the module region by setting module_alloc_base to
-		 * a PAGE_SIZE multiple in the range [_etext - MODULES_VSIZE,
-		 * _stext) . This guarantees that the resulting region still
-		 * covers [_stext, _etext], and that all relative branches can
-		 * be resolved without veneers unless this region is exhausted
-		 * and we fall back to a larger 2GB window in module_alloc()
-		 * when ARM64_MODULE_PLTS is enabled.
-		 */
-		module_range = MODULES_VSIZE - (u64)(_etext - _stext);
-	}
-
-	/* use the lower 21 bits to randomize the base of the module region */
-	module_alloc_base += (module_range * (seed & ((1 << 21) - 1))) >> 21;
-	module_alloc_base &= PAGE_MASK;
-
-	return 0;
+	__kaslr_is_enabled = true;
 }
-subsys_initcall(kaslr_init)
diff --git a/arch/arm64/kernel/module-plts.c b/arch/arm64/kernel/module-plts.c
index 543493b..ad02058 100644
--- a/arch/arm64/kernel/module-plts.c
+++ b/arch/arm64/kernel/module-plts.c
@@ -7,6 +7,7 @@
 #include <linux/ftrace.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/moduleloader.h>
 #include <linux/sort.h>
 
 static struct plt_entry __get_adrp_add_pair(u64 dst, u64 pc,
diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
index 5af4975..dd85129 100644
--- a/arch/arm64/kernel/module.c
+++ b/arch/arm64/kernel/module.c
@@ -7,6 +7,8 @@
  * Author: Will Deacon <will.deacon@arm.com>
  */
 
+#define pr_fmt(fmt) "Modules: " fmt
+
 #include <linux/bitops.h>
 #include <linux/elf.h>
 #include <linux/ftrace.h>
@@ -15,52 +17,131 @@
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/moduleloader.h>
+#include <linux/random.h>
 #include <linux/scs.h>
 #include <linux/vmalloc.h>
+
 #include <asm/alternative.h>
 #include <asm/insn.h>
 #include <asm/scs.h>
 #include <asm/sections.h>
 
+static u64 module_direct_base __ro_after_init = 0;
+static u64 module_plt_base __ro_after_init = 0;
+
+/*
+ * Choose a random page-aligned base address for a window of 'size' bytes which
+ * entirely contains the interval [start, end - 1].
+ */
+static u64 __init random_bounding_box(u64 size, u64 start, u64 end)
+{
+	u64 max_pgoff, pgoff;
+
+	if ((end - start) >= size)
+		return 0;
+
+	max_pgoff = (size - (end - start)) / PAGE_SIZE;
+	pgoff = get_random_u32_inclusive(0, max_pgoff);
+
+	return start - pgoff * PAGE_SIZE;
+}
+
+/*
+ * Modules may directly reference data and text anywhere within the kernel
+ * image and other modules. References using PREL32 relocations have a +/-2G
+ * range, and so we need to ensure that the entire kernel image and all modules
+ * fall within a 2G window such that these are always within range.
+ *
+ * Modules may directly branch to functions and code within the kernel text,
+ * and to functions and code within other modules. These branches will use
+ * CALL26/JUMP26 relocations with a +/-128M range. Without PLTs, we must ensure
+ * that the entire kernel text and all module text falls within a 128M window
+ * such that these are always within range. With PLTs, we can expand this to a
+ * 2G window.
+ *
+ * We chose the 128M region to surround the entire kernel image (rather than
+ * just the text) as using the same bounds for the 128M and 2G regions ensures
+ * by construction that we never select a 128M region that is not a subset of
+ * the 2G region. For very large and unusual kernel configurations this means
+ * we may fall back to PLTs where they could have been avoided, but this keeps
+ * the logic significantly simpler.
+ */
+static int __init module_init_limits(void)
+{
+	u64 kernel_end = (u64)_end;
+	u64 kernel_start = (u64)_text;
+	u64 kernel_size = kernel_end - kernel_start;
+
+	/*
+	 * The default modules region is placed immediately below the kernel
+	 * image, and is large enough to use the full 2G relocation range.
+	 */
+	BUILD_BUG_ON(KIMAGE_VADDR != MODULES_END);
+	BUILD_BUG_ON(MODULES_VSIZE < SZ_2G);
+
+	if (!kaslr_enabled()) {
+		if (kernel_size < SZ_128M)
+			module_direct_base = kernel_end - SZ_128M;
+		if (kernel_size < SZ_2G)
+			module_plt_base = kernel_end - SZ_2G;
+	} else {
+		u64 min = kernel_start;
+		u64 max = kernel_end;
+
+		if (IS_ENABLED(CONFIG_RANDOMIZE_MODULE_REGION_FULL)) {
+			pr_info("2G module region forced by RANDOMIZE_MODULE_REGION_FULL\n");
+		} else {
+			module_direct_base = random_bounding_box(SZ_128M, min, max);
+			if (module_direct_base) {
+				min = module_direct_base;
+				max = module_direct_base + SZ_128M;
+			}
+		}
+
+		module_plt_base = random_bounding_box(SZ_2G, min, max);
+	}
+
+	pr_info("%llu pages in range for non-PLT usage",
+		module_direct_base ? (SZ_128M - kernel_size) / PAGE_SIZE : 0);
+	pr_info("%llu pages in range for PLT usage",
+		module_plt_base ? (SZ_2G - kernel_size) / PAGE_SIZE : 0);
+
+	return 0;
+}
+subsys_initcall(module_init_limits);
+
 void *module_alloc(unsigned long size)
 {
-	u64 module_alloc_end = module_alloc_base + MODULES_VSIZE;
-	gfp_t gfp_mask = GFP_KERNEL;
-	void *p;
+	void *p = NULL;
 
-	/* Silence the initial allocation */
-	if (IS_ENABLED(CONFIG_ARM64_MODULE_PLTS))
-		gfp_mask |= __GFP_NOWARN;
+	/*
+	 * Where possible, prefer to allocate within direct branch range of the
+	 * kernel such that no PLTs are necessary.
+	 */
+	if (module_direct_base) {
+		p = __vmalloc_node_range(size, MODULE_ALIGN,
+					 module_direct_base,
+					 module_direct_base + SZ_128M,
+					 GFP_KERNEL | __GFP_NOWARN,
+					 PAGE_KERNEL, 0, NUMA_NO_NODE,
+					 __builtin_return_address(0));
+	}
 
-	if (IS_ENABLED(CONFIG_KASAN_GENERIC) ||
-	    IS_ENABLED(CONFIG_KASAN_SW_TAGS))
-		/* don't exceed the static module region - see below */
-		module_alloc_end = MODULES_END;
+	if (!p && module_plt_base) {
+		p = __vmalloc_node_range(size, MODULE_ALIGN,
+					 module_plt_base,
+					 module_plt_base + SZ_2G,
+					 GFP_KERNEL | __GFP_NOWARN,
+					 PAGE_KERNEL, 0, NUMA_NO_NODE,
+					 __builtin_return_address(0));
+	}
 
-	p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_base,
-				module_alloc_end, gfp_mask, PAGE_KERNEL, VM_DEFER_KMEMLEAK,
-				NUMA_NO_NODE, __builtin_return_address(0));
+	if (!p) {
+		pr_warn_ratelimited("%s: unable to allocate memory\n",
+				    __func__);
+	}
 
-	if (!p && IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) &&
-	    (IS_ENABLED(CONFIG_KASAN_VMALLOC) ||
-	     (!IS_ENABLED(CONFIG_KASAN_GENERIC) &&
-	      !IS_ENABLED(CONFIG_KASAN_SW_TAGS))))
-		/*
-		 * KASAN without KASAN_VMALLOC can only deal with module
-		 * allocations being served from the reserved module region,
-		 * since the remainder of the vmalloc region is already
-		 * backed by zero shadow pages, and punching holes into it
-		 * is non-trivial. Since the module region is not randomized
-		 * when KASAN is enabled without KASAN_VMALLOC, it is even
-		 * less likely that the module region gets exhausted, so we
-		 * can simply omit this fallback in that case.
-		 */
-		p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_base,
-				module_alloc_base + SZ_2G, GFP_KERNEL,
-				PAGE_KERNEL, 0, NUMA_NO_NODE,
-				__builtin_return_address(0));
-
-	if (p && (kasan_alloc_module_shadow(p, size, gfp_mask) < 0)) {
+	if (p && (kasan_alloc_module_shadow(p, size, GFP_KERNEL) < 0)) {
 		vfree(p);
 		return NULL;
 	}
@@ -448,9 +529,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
 		case R_AARCH64_CALL26:
 			ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 26,
 					     AARCH64_INSN_IMM_26);
-
-			if (IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) &&
-			    ovf == -ERANGE) {
+			if (ovf == -ERANGE) {
 				val = module_emit_plt_entry(me, sechdrs, loc, &rel[i], sym);
 				if (!val)
 					return -ENOEXEC;
@@ -487,7 +566,7 @@ static int module_init_ftrace_plt(const Elf_Ehdr *hdr,
 				  const Elf_Shdr *sechdrs,
 				  struct module *mod)
 {
-#if defined(CONFIG_ARM64_MODULE_PLTS) && defined(CONFIG_DYNAMIC_FTRACE)
+#if defined(CONFIG_DYNAMIC_FTRACE)
 	const Elf_Shdr *s;
 	struct plt_entry *plts;
 
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index b8ec7b3..417a8a8 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -296,6 +296,8 @@ void __init __no_sanitize_address setup_arch(char **cmdline_p)
 
 	*cmdline_p = boot_command_line;
 
+	kaslr_init();
+
 	/*
 	 * If know now we are going to need KPTI then use non-global
 	 * mappings from the start, avoiding the cost of rewriting
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index 2cfc810..e304f7e 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -23,6 +23,7 @@
 #include <asm/daifflags.h>
 #include <asm/debug-monitors.h>
 #include <asm/elf.h>
+#include <asm/exception.h>
 #include <asm/cacheflush.h>
 #include <asm/ucontext.h>
 #include <asm/unistd.h>
@@ -398,7 +399,7 @@ static int restore_tpidr2_context(struct user_ctxs *user)
 
 	__get_user_error(tpidr2_el0, &user->tpidr2->tpidr2, err);
 	if (!err)
-		current->thread.tpidr2_el0 = tpidr2_el0;
+		write_sysreg_s(tpidr2_el0, SYS_TPIDR2_EL0);
 
 	return err;
 }
diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c
index da84cf8..5a668d7 100644
--- a/arch/arm64/kernel/syscall.c
+++ b/arch/arm64/kernel/syscall.c
@@ -147,11 +147,9 @@ static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr,
 	 * exit regardless, as the old entry assembly did.
 	 */
 	if (!has_syscall_work(flags) && !IS_ENABLED(CONFIG_DEBUG_RSEQ)) {
-		local_daif_mask();
 		flags = read_thread_flags();
 		if (!has_syscall_work(flags) && !(flags & _TIF_SINGLESTEP))
 			return;
-		local_daif_restore(DAIF_PROCCTX);
 	}
 
 trace_exit:
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 4bb1b8f..794a2dd 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -514,6 +514,63 @@ void do_el1_fpac(struct pt_regs *regs, unsigned long esr)
 	die("Oops - FPAC", regs, esr);
 }
 
+void do_el0_mops(struct pt_regs *regs, unsigned long esr)
+{
+	bool wrong_option = esr & ESR_ELx_MOPS_ISS_WRONG_OPTION;
+	bool option_a = esr & ESR_ELx_MOPS_ISS_OPTION_A;
+	int dstreg = ESR_ELx_MOPS_ISS_DESTREG(esr);
+	int srcreg = ESR_ELx_MOPS_ISS_SRCREG(esr);
+	int sizereg = ESR_ELx_MOPS_ISS_SIZEREG(esr);
+	unsigned long dst, src, size;
+
+	dst = pt_regs_read_reg(regs, dstreg);
+	src = pt_regs_read_reg(regs, srcreg);
+	size = pt_regs_read_reg(regs, sizereg);
+
+	/*
+	 * Put the registers back in the original format suitable for a
+	 * prologue instruction, using the generic return routine from the
+	 * Arm ARM (DDI 0487I.a) rules CNTMJ and MWFQH.
+	 */
+	if (esr & ESR_ELx_MOPS_ISS_MEM_INST) {
+		/* SET* instruction */
+		if (option_a ^ wrong_option) {
+			/* Format is from Option A; forward set */
+			pt_regs_write_reg(regs, dstreg, dst + size);
+			pt_regs_write_reg(regs, sizereg, -size);
+		}
+	} else {
+		/* CPY* instruction */
+		if (!(option_a ^ wrong_option)) {
+			/* Format is from Option B */
+			if (regs->pstate & PSR_N_BIT) {
+				/* Backward copy */
+				pt_regs_write_reg(regs, dstreg, dst - size);
+				pt_regs_write_reg(regs, srcreg, src - size);
+			}
+		} else {
+			/* Format is from Option A */
+			if (size & BIT(63)) {
+				/* Forward copy */
+				pt_regs_write_reg(regs, dstreg, dst + size);
+				pt_regs_write_reg(regs, srcreg, src + size);
+				pt_regs_write_reg(regs, sizereg, -size);
+			}
+		}
+	}
+
+	if (esr & ESR_ELx_MOPS_ISS_FROM_EPILOGUE)
+		regs->pc -= 8;
+	else
+		regs->pc -= 4;
+
+	/*
+	 * If single stepping then finish the step before executing the
+	 * prologue instruction.
+	 */
+	user_fastforward_single_step(current);
+}
+
 #define __user_cache_maint(insn, address, res)			\
 	if (address >= TASK_SIZE_MAX) {				\
 		res = -EFAULT;					\
@@ -824,6 +881,7 @@ static const char *esr_class_str[] = {
 	[ESR_ELx_EC_DABT_LOW]		= "DABT (lower EL)",
 	[ESR_ELx_EC_DABT_CUR]		= "DABT (current EL)",
 	[ESR_ELx_EC_SP_ALIGN]		= "SP Alignment",
+	[ESR_ELx_EC_MOPS]		= "MOPS",
 	[ESR_ELx_EC_FP_EXC32]		= "FP (AArch32)",
 	[ESR_ELx_EC_FP_EXC64]		= "FP (AArch64)",
 	[ESR_ELx_EC_SERROR]		= "SError",
@@ -947,7 +1005,7 @@ void do_serror(struct pt_regs *regs, unsigned long esr)
 }
 
 /* GENERIC_BUG traps */
-
+#ifdef CONFIG_GENERIC_BUG
 int is_valid_bugaddr(unsigned long addr)
 {
 	/*
@@ -959,6 +1017,7 @@ int is_valid_bugaddr(unsigned long addr)
 	 */
 	return 1;
 }
+#endif
 
 static int bug_handler(struct pt_regs *regs, unsigned long esr)
 {
diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c
index 55f80fb..8725291c 100644
--- a/arch/arm64/kvm/debug.c
+++ b/arch/arm64/kvm/debug.c
@@ -333,7 +333,7 @@ void kvm_arch_vcpu_load_debug_state_flags(struct kvm_vcpu *vcpu)
 
 	/* Check if we have TRBE implemented and available at the host */
 	if (cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_EL1_TraceBuffer_SHIFT) &&
-	    !(read_sysreg_s(SYS_TRBIDR_EL1) & TRBIDR_PROG))
+	    !(read_sysreg_s(SYS_TRBIDR_EL1) & TRBIDR_EL1_P))
 		vcpu_set_flag(vcpu, DEBUG_STATE_SAVE_TRBE);
 }
 
diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
index e78a08a..eb123aa 100644
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -130,6 +130,9 @@ static inline void ___activate_traps(struct kvm_vcpu *vcpu)
 
 	if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN) && (hcr & HCR_VSE))
 		write_sysreg_s(vcpu->arch.vsesr_el2, SYS_VSESR_EL2);
+
+	if (cpus_have_final_cap(ARM64_HAS_HCX))
+		write_sysreg_s(HCRX_GUEST_FLAGS, SYS_HCRX_EL2);
 }
 
 static inline void ___deactivate_traps(struct kvm_vcpu *vcpu)
@@ -144,6 +147,9 @@ static inline void ___deactivate_traps(struct kvm_vcpu *vcpu)
 		vcpu->arch.hcr_el2 &= ~HCR_VSE;
 		vcpu->arch.hcr_el2 |= read_sysreg(hcr_el2) & HCR_VSE;
 	}
+
+	if (cpus_have_final_cap(ARM64_HAS_HCX))
+		write_sysreg_s(HCRX_HOST_FLAGS, SYS_HCRX_EL2);
 }
 
 static inline bool __populate_fault_info(struct kvm_vcpu *vcpu)
diff --git a/arch/arm64/kvm/hyp/nvhe/debug-sr.c b/arch/arm64/kvm/hyp/nvhe/debug-sr.c
index d756b93..4558c02 100644
--- a/arch/arm64/kvm/hyp/nvhe/debug-sr.c
+++ b/arch/arm64/kvm/hyp/nvhe/debug-sr.c
@@ -56,7 +56,7 @@ static void __debug_save_trace(u64 *trfcr_el1)
 	*trfcr_el1 = 0;
 
 	/* Check if the TRBE is enabled */
-	if (!(read_sysreg_s(SYS_TRBLIMITR_EL1) & TRBLIMITR_ENABLE))
+	if (!(read_sysreg_s(SYS_TRBLIMITR_EL1) & TRBLIMITR_EL1_E))
 		return;
 	/*
 	 * Prohibit trace generation while we are in guest.
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 71b1209..34262ff 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -388,9 +388,9 @@ static bool trap_oslar_el1(struct kvm_vcpu *vcpu,
 		return read_from_write_only(vcpu, p, r);
 
 	/* Forward the OSLK bit to OSLSR */
-	oslsr = __vcpu_sys_reg(vcpu, OSLSR_EL1) & ~SYS_OSLSR_OSLK;
-	if (p->regval & SYS_OSLAR_OSLK)
-		oslsr |= SYS_OSLSR_OSLK;
+	oslsr = __vcpu_sys_reg(vcpu, OSLSR_EL1) & ~OSLSR_EL1_OSLK;
+	if (p->regval & OSLAR_EL1_OSLK)
+		oslsr |= OSLSR_EL1_OSLK;
 
 	__vcpu_sys_reg(vcpu, OSLSR_EL1) = oslsr;
 	return true;
@@ -414,7 +414,7 @@ static int set_oslsr_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
 	 * The only modifiable bit is the OSLK bit. Refuse the write if
 	 * userspace attempts to change any other bit in the register.
 	 */
-	if ((val ^ rd->val) & ~SYS_OSLSR_OSLK)
+	if ((val ^ rd->val) & ~OSLSR_EL1_OSLK)
 		return -EINVAL;
 
 	__vcpu_sys_reg(vcpu, rd->reg) = val;
@@ -1252,6 +1252,7 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu, struct sys_reg_desc const *r
 				 ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_GPA3));
 		if (!cpus_have_final_cap(ARM64_HAS_WFXT))
 			val &= ~ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_WFxT);
+		val &= ~ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_MOPS);
 		break;
 	case SYS_ID_AA64DFR0_EL1:
 		/* Limit debug to ARMv8.0 */
@@ -1781,7 +1782,7 @@ static const struct sys_reg_desc sys_reg_descs[] = {
 	{ SYS_DESC(SYS_MDRAR_EL1), trap_raz_wi },
 	{ SYS_DESC(SYS_OSLAR_EL1), trap_oslar_el1 },
 	{ SYS_DESC(SYS_OSLSR_EL1), trap_oslsr_el1, reset_val, OSLSR_EL1,
-		SYS_OSLSR_OSLM_IMPLEMENTED, .set_user = set_oslsr_el1, },
+		OSLSR_EL1_OSLM_IMPLEMENTED, .set_user = set_oslsr_el1, },
 	{ SYS_DESC(SYS_OSDLR_EL1), trap_raz_wi },
 	{ SYS_DESC(SYS_DBGPRCR_EL1), trap_raz_wi },
 	{ SYS_DESC(SYS_DBGCLAIMSET_EL1), trap_raz_wi },
diff --git a/arch/arm64/lib/xor-neon.c b/arch/arm64/lib/xor-neon.c
index 96b1719..f9a53b7 100644
--- a/arch/arm64/lib/xor-neon.c
+++ b/arch/arm64/lib/xor-neon.c
@@ -10,7 +10,7 @@
 #include <linux/module.h>
 #include <asm/neon-intrinsics.h>
 
-void xor_arm64_neon_2(unsigned long bytes, unsigned long * __restrict p1,
+static void xor_arm64_neon_2(unsigned long bytes, unsigned long * __restrict p1,
 	const unsigned long * __restrict p2)
 {
 	uint64_t *dp1 = (uint64_t *)p1;
@@ -37,7 +37,7 @@ void xor_arm64_neon_2(unsigned long bytes, unsigned long * __restrict p1,
 	} while (--lines > 0);
 }
 
-void xor_arm64_neon_3(unsigned long bytes, unsigned long * __restrict p1,
+static void xor_arm64_neon_3(unsigned long bytes, unsigned long * __restrict p1,
 	const unsigned long * __restrict p2,
 	const unsigned long * __restrict p3)
 {
@@ -73,7 +73,7 @@ void xor_arm64_neon_3(unsigned long bytes, unsigned long * __restrict p1,
 	} while (--lines > 0);
 }
 
-void xor_arm64_neon_4(unsigned long bytes, unsigned long * __restrict p1,
+static void xor_arm64_neon_4(unsigned long bytes, unsigned long * __restrict p1,
 	const unsigned long * __restrict p2,
 	const unsigned long * __restrict p3,
 	const unsigned long * __restrict p4)
@@ -118,7 +118,7 @@ void xor_arm64_neon_4(unsigned long bytes, unsigned long * __restrict p1,
 	} while (--lines > 0);
 }
 
-void xor_arm64_neon_5(unsigned long bytes, unsigned long * __restrict p1,
+static void xor_arm64_neon_5(unsigned long bytes, unsigned long * __restrict p1,
 	const unsigned long * __restrict p2,
 	const unsigned long * __restrict p3,
 	const unsigned long * __restrict p4,
diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index e1e0dca..1881975 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -364,8 +364,8 @@ void cpu_do_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm)
 	ttbr1 &= ~TTBR_ASID_MASK;
 	ttbr1 |= FIELD_PREP(TTBR_ASID_MASK, asid);
 
+	cpu_set_reserved_ttbr0_nosync();
 	write_sysreg(ttbr1, ttbr1_el1);
-	isb();
 	write_sysreg(ttbr0, ttbr0_el1);
 	isb();
 	post_ttbr_update_workaround();
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index cb21ccd..7b2537c 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -66,6 +66,8 @@ static inline const struct fault_info *esr_to_debug_fault_info(unsigned long esr
 
 static void data_abort_decode(unsigned long esr)
 {
+	unsigned long iss2 = ESR_ELx_ISS2(esr);
+
 	pr_alert("Data abort info:\n");
 
 	if (esr & ESR_ELx_ISV) {
@@ -78,12 +80,21 @@ static void data_abort_decode(unsigned long esr)
 			 (esr & ESR_ELx_SF) >> ESR_ELx_SF_SHIFT,
 			 (esr & ESR_ELx_AR) >> ESR_ELx_AR_SHIFT);
 	} else {
-		pr_alert("  ISV = 0, ISS = 0x%08lx\n", esr & ESR_ELx_ISS_MASK);
+		pr_alert("  ISV = 0, ISS = 0x%08lx, ISS2 = 0x%08lx\n",
+			 esr & ESR_ELx_ISS_MASK, iss2);
 	}
 
-	pr_alert("  CM = %lu, WnR = %lu\n",
+	pr_alert("  CM = %lu, WnR = %lu, TnD = %lu, TagAccess = %lu\n",
 		 (esr & ESR_ELx_CM) >> ESR_ELx_CM_SHIFT,
-		 (esr & ESR_ELx_WNR) >> ESR_ELx_WNR_SHIFT);
+		 (esr & ESR_ELx_WNR) >> ESR_ELx_WNR_SHIFT,
+		 (iss2 & ESR_ELx_TnD) >> ESR_ELx_TnD_SHIFT,
+		 (iss2 & ESR_ELx_TagAccess) >> ESR_ELx_TagAccess_SHIFT);
+
+	pr_alert("  GCS = %ld, Overlay = %lu, DirtyBit = %lu, Xs = %llu\n",
+		 (iss2 & ESR_ELx_GCS) >> ESR_ELx_GCS_SHIFT,
+		 (iss2 & ESR_ELx_Overlay) >> ESR_ELx_Overlay_SHIFT,
+		 (iss2 & ESR_ELx_DirtyBit) >> ESR_ELx_DirtyBit_SHIFT,
+		 (iss2 & ESR_ELx_Xs_MASK) >> ESR_ELx_Xs_SHIFT);
 }
 
 static void mem_abort_decode(unsigned long esr)
@@ -886,9 +897,6 @@ void do_sp_pc_abort(unsigned long addr, unsigned long esr, struct pt_regs *regs)
 }
 NOKPROBE_SYMBOL(do_sp_pc_abort);
 
-int __init early_brk64(unsigned long addr, unsigned long esr,
-		       struct pt_regs *regs);
-
 /*
  * __refdata because early_brk64 is __init, but the reference to it is
  * clobbered at arch_initcall time.
diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c
index 5f9379b..4e64760 100644
--- a/arch/arm64/mm/flush.c
+++ b/arch/arm64/mm/flush.c
@@ -8,6 +8,7 @@
 
 #include <linux/export.h>
 #include <linux/mm.h>
+#include <linux/libnvdimm.h>
 #include <linux/pagemap.h>
 
 #include <asm/cacheflush.h>
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 66e70ca..c28c2c8 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -69,6 +69,7 @@ phys_addr_t __ro_after_init arm64_dma_phys_limit;
 
 #define CRASH_ADDR_LOW_MAX		arm64_dma_phys_limit
 #define CRASH_ADDR_HIGH_MAX		(PHYS_MASK + 1)
+#define CRASH_HIGH_SEARCH_BASE		SZ_4G
 
 #define DEFAULT_CRASH_KERNEL_LOW_SIZE	(128UL << 20)
 
@@ -101,12 +102,13 @@ static int __init reserve_crashkernel_low(unsigned long long low_size)
  */
 static void __init reserve_crashkernel(void)
 {
-	unsigned long long crash_base, crash_size;
-	unsigned long long crash_low_size = 0;
+	unsigned long long crash_low_size = 0, search_base = 0;
 	unsigned long long crash_max = CRASH_ADDR_LOW_MAX;
+	unsigned long long crash_base, crash_size;
 	char *cmdline = boot_command_line;
-	int ret;
 	bool fixed_base = false;
+	bool high = false;
+	int ret;
 
 	if (!IS_ENABLED(CONFIG_KEXEC_CORE))
 		return;
@@ -129,7 +131,9 @@ static void __init reserve_crashkernel(void)
 		else if (ret)
 			return;
 
+		search_base = CRASH_HIGH_SEARCH_BASE;
 		crash_max = CRASH_ADDR_HIGH_MAX;
+		high = true;
 	} else if (ret || !crash_size) {
 		/* The specified value is invalid */
 		return;
@@ -140,31 +144,51 @@ static void __init reserve_crashkernel(void)
 	/* User specifies base address explicitly. */
 	if (crash_base) {
 		fixed_base = true;
+		search_base = crash_base;
 		crash_max = crash_base + crash_size;
 	}
 
 retry:
 	crash_base = memblock_phys_alloc_range(crash_size, CRASH_ALIGN,
-					       crash_base, crash_max);
+					       search_base, crash_max);
 	if (!crash_base) {
 		/*
-		 * If the first attempt was for low memory, fall back to
-		 * high memory, the minimum required low memory will be
-		 * reserved later.
+		 * For crashkernel=size[KMG]@offset[KMG], print out failure
+		 * message if can't reserve the specified region.
 		 */
-		if (!fixed_base && (crash_max == CRASH_ADDR_LOW_MAX)) {
+		if (fixed_base) {
+			pr_warn("crashkernel reservation failed - memory is in use.\n");
+			return;
+		}
+
+		/*
+		 * For crashkernel=size[KMG], if the first attempt was for
+		 * low memory, fall back to high memory, the minimum required
+		 * low memory will be reserved later.
+		 */
+		if (!high && crash_max == CRASH_ADDR_LOW_MAX) {
 			crash_max = CRASH_ADDR_HIGH_MAX;
+			search_base = CRASH_ADDR_LOW_MAX;
 			crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE;
 			goto retry;
 		}
 
+		/*
+		 * For crashkernel=size[KMG],high, if the first attempt was
+		 * for high memory, fall back to low memory.
+		 */
+		if (high && crash_max == CRASH_ADDR_HIGH_MAX) {
+			crash_max = CRASH_ADDR_LOW_MAX;
+			search_base = 0;
+			goto retry;
+		}
 		pr_warn("cannot allocate crashkernel (size:0x%llx)\n",
 			crash_size);
 		return;
 	}
 
-	if ((crash_base > CRASH_ADDR_LOW_MAX - crash_low_size) &&
-	     crash_low_size && reserve_crashkernel_low(crash_low_size)) {
+	if ((crash_base >= CRASH_ADDR_LOW_MAX) && crash_low_size &&
+	     reserve_crashkernel_low(crash_low_size)) {
 		memblock_phys_free(crash_base, crash_size);
 		return;
 	}
diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c
index e969e68..f17d066 100644
--- a/arch/arm64/mm/kasan_init.c
+++ b/arch/arm64/mm/kasan_init.c
@@ -214,7 +214,7 @@ static void __init clear_pgds(unsigned long start,
 static void __init kasan_init_shadow(void)
 {
 	u64 kimg_shadow_start, kimg_shadow_end;
-	u64 mod_shadow_start, mod_shadow_end;
+	u64 mod_shadow_start;
 	u64 vmalloc_shadow_end;
 	phys_addr_t pa_start, pa_end;
 	u64 i;
@@ -223,7 +223,6 @@ static void __init kasan_init_shadow(void)
 	kimg_shadow_end = PAGE_ALIGN((u64)kasan_mem_to_shadow(KERNEL_END));
 
 	mod_shadow_start = (u64)kasan_mem_to_shadow((void *)MODULES_VADDR);
-	mod_shadow_end = (u64)kasan_mem_to_shadow((void *)MODULES_END);
 
 	vmalloc_shadow_end = (u64)kasan_mem_to_shadow((void *)VMALLOC_END);
 
@@ -246,17 +245,9 @@ static void __init kasan_init_shadow(void)
 	kasan_populate_early_shadow(kasan_mem_to_shadow((void *)PAGE_END),
 				   (void *)mod_shadow_start);
 
-	if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) {
-		BUILD_BUG_ON(VMALLOC_START != MODULES_END);
-		kasan_populate_early_shadow((void *)vmalloc_shadow_end,
-					    (void *)KASAN_SHADOW_END);
-	} else {
-		kasan_populate_early_shadow((void *)kimg_shadow_end,
-					    (void *)KASAN_SHADOW_END);
-		if (kimg_shadow_start > mod_shadow_end)
-			kasan_populate_early_shadow((void *)mod_shadow_end,
-						    (void *)kimg_shadow_start);
-	}
+	BUILD_BUG_ON(VMALLOC_START != MODULES_END);
+	kasan_populate_early_shadow((void *)vmalloc_shadow_end,
+				    (void *)KASAN_SHADOW_END);
 
 	for_each_mem_range(i, &pa_start, &pa_end) {
 		void *start = (void *)__phys_to_virt(pa_start);
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index af6bc84..95d3608 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -451,7 +451,7 @@ static phys_addr_t pgd_pgtable_alloc(int shift)
 void __init create_mapping_noalloc(phys_addr_t phys, unsigned long virt,
 				   phys_addr_t size, pgprot_t prot)
 {
-	if ((virt >= PAGE_END) && (virt < VMALLOC_START)) {
+	if (virt < PAGE_OFFSET) {
 		pr_warn("BUG: not creating mapping for %pa at 0x%016lx - outside kernel range\n",
 			&phys, virt);
 		return;
@@ -478,7 +478,7 @@ void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys,
 static void update_mapping_prot(phys_addr_t phys, unsigned long virt,
 				phys_addr_t size, pgprot_t prot)
 {
-	if ((virt >= PAGE_END) && (virt < VMALLOC_START)) {
+	if (virt < PAGE_OFFSET) {
 		pr_warn("BUG: not updating mapping for %pa at 0x%016lx - outside kernel range\n",
 			&phys, virt);
 		return;
@@ -663,12 +663,17 @@ static void __init map_kernel_segment(pgd_t *pgdp, void *va_start, void *va_end,
 	vm_area_add_early(vma);
 }
 
+static pgprot_t kernel_exec_prot(void)
+{
+	return rodata_enabled ? PAGE_KERNEL_ROX : PAGE_KERNEL_EXEC;
+}
+
 #ifdef CONFIG_UNMAP_KERNEL_AT_EL0
 static int __init map_entry_trampoline(void)
 {
 	int i;
 
-	pgprot_t prot = rodata_enabled ? PAGE_KERNEL_ROX : PAGE_KERNEL_EXEC;
+	pgprot_t prot = kernel_exec_prot();
 	phys_addr_t pa_start = __pa_symbol(__entry_tramp_text_start);
 
 	/* The trampoline is always mapped and can therefore be global */
@@ -723,7 +728,7 @@ static void __init map_kernel(pgd_t *pgdp)
 	 * mapping to install SW breakpoints. Allow this (only) when
 	 * explicitly requested with rodata=off.
 	 */
-	pgprot_t text_prot = rodata_enabled ? PAGE_KERNEL_ROX : PAGE_KERNEL_EXEC;
+	pgprot_t text_prot = kernel_exec_prot();
 
 	/*
 	 * If we have a CPU that supports BTI and a kernel built for
diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps
index 40ba954..debc460 100644
--- a/arch/arm64/tools/cpucaps
+++ b/arch/arm64/tools/cpucaps
@@ -32,8 +32,10 @@
 HAS_GIC_CPUIF_SYSREGS
 HAS_GIC_PRIO_MASKING
 HAS_GIC_PRIO_RELAXED_SYNC
+HAS_HCX
 HAS_LDAPR
 HAS_LSE_ATOMICS
+HAS_MOPS
 HAS_NESTED_VIRT
 HAS_NO_FPSIMD
 HAS_NO_HW_PREFETCH
diff --git a/arch/arm64/tools/gen-cpucaps.awk b/arch/arm64/tools/gen-cpucaps.awk
index 00c9e72..8525980 100755
--- a/arch/arm64/tools/gen-cpucaps.awk
+++ b/arch/arm64/tools/gen-cpucaps.awk
@@ -24,12 +24,12 @@
 }
 
 /^[vA-Z0-9_]+$/ {
-	printf("#define ARM64_%-30s\t%d\n", $0, cap_num++)
+	printf("#define ARM64_%-40s\t%d\n", $0, cap_num++)
 	next
 }
 
 END {
-	printf("#define ARM64_NCAPS\t\t\t\t%d\n", cap_num)
+	printf("#define ARM64_NCAPS\t\t\t\t\t%d\n", cap_num)
 	print ""
 	print "#endif /* __ASM_CPUCAPS_H */"
 }
diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg
index c9a0d1fa..c585725 100644
--- a/arch/arm64/tools/sysreg
+++ b/arch/arm64/tools/sysreg
@@ -48,6 +48,61 @@
 # feature that introduces them (eg, FEAT_LS64_ACCDATA introduces enumeration
 # item ACCDATA) though it may be more taseful to do something else.
 
+Sysreg	OSDTRRX_EL1	2	0	0	0	2
+Res0	63:32
+Field	31:0	DTRRX
+EndSysreg
+
+Sysreg	MDCCINT_EL1	2	0	0	2	0
+Res0	63:31
+Field	30	RX
+Field	29	TX
+Res0	28:0
+EndSysreg
+
+Sysreg	MDSCR_EL1	2	0	0	2	2
+Res0	63:36
+Field	35	EHBWE
+Field	34	EnSPM
+Field	33	TTA
+Field	32	EMBWE
+Field	31	TFO
+Field	30	RXfull
+Field	29	TXfull
+Res0	28
+Field	27	RXO
+Field	26	TXU
+Res0	25:24
+Field	23:22	INTdis
+Field	21	TDA
+Res0	20
+Field	19	SC2
+Res0	18:16
+Field	15	MDE
+Field	14	HDE
+Field	13	KDE
+Field	12	TDCC
+Res0	11:7
+Field	6	ERR
+Res0	5:1
+Field	0	SS
+EndSysreg
+
+Sysreg	OSDTRTX_EL1	2	0	0	3	2
+Res0	63:32
+Field	31:0	DTRTX
+EndSysreg
+
+Sysreg	OSECCR_EL1	2	0	0	6	2
+Res0	63:32
+Field	31:0	EDECCR
+EndSysreg
+
+Sysreg	OSLAR_EL1	2	0	1	0	4
+Res0	63:1
+Field	0	OSLK
+EndSysreg
+
 Sysreg ID_PFR0_EL1	3	0	0	1	0
 Res0	63:32
 UnsignedEnum	31:28	RAS
@@ -2200,3 +2255,80 @@
 Res0	63:24
 Field	23:0	INTID
 EndSysreg
+
+Sysreg	TRBLIMITR_EL1	3	0	9	11	0
+Field	63:12	LIMIT
+Res0	11:7
+Field	6	XE
+Field	5	nVM
+Enum	4:3	TM
+	0b00	STOP
+	0b01	IRQ
+	0b11	IGNR
+EndEnum
+Enum	2:1	FM
+	0b00	FILL
+	0b01	WRAP
+	0b11	CBUF
+EndEnum
+Field	0	E
+EndSysreg
+
+Sysreg	TRBPTR_EL1	3	0	9	11	1
+Field	63:0	PTR
+EndSysreg
+
+Sysreg	TRBBASER_EL1	3	0	9	11	2
+Field	63:12	BASE
+Res0	11:0
+EndSysreg
+
+Sysreg	TRBSR_EL1	3	0	9	11	3
+Res0	63:56
+Field	55:32	MSS2
+Field	31:26	EC
+Res0	25:24
+Field	23	DAT
+Field	22	IRQ
+Field	21	TRG
+Field	20	WRAP
+Res0	19
+Field	18	EA
+Field	17	S
+Res0	16
+Field	15:0	MSS
+EndSysreg
+
+Sysreg	TRBMAR_EL1	3	0	9	11	4
+Res0	63:12
+Enum	11:10	PAS
+	0b00	SECURE
+	0b01	NON_SECURE
+	0b10	ROOT
+	0b11	REALM
+EndEnum
+Enum	9:8	SH
+	0b00	NON_SHAREABLE
+	0b10	OUTER_SHAREABLE
+	0b11	INNER_SHAREABLE
+EndEnum
+Field	7:0	Attr
+EndSysreg
+
+Sysreg	TRBTRG_EL1	3	0	9	11	6
+Res0	63:32
+Field	31:0	TRG
+EndSysreg
+
+Sysreg	TRBIDR_EL1	3	0	9	11	7
+Res0	63:12
+Enum	11:8	EA
+	0b0000	NON_DESC
+	0b0001	IGNORE
+	0b0010	SERROR
+EndEnum
+Res0	7:6
+Field	5	F
+Field	4	P
+Field	3:0	Align
+EndSysreg
diff --git a/drivers/acpi/arm64/Makefile b/drivers/acpi/arm64/Makefile
index e21a9e8..f81fe24 100644
--- a/drivers/acpi/arm64/Makefile
+++ b/drivers/acpi/arm64/Makefile
@@ -3,4 +3,4 @@
 obj-$(CONFIG_ACPI_IORT) 	+= iort.o
 obj-$(CONFIG_ACPI_GTDT) 	+= gtdt.o
 obj-$(CONFIG_ACPI_APMT) 	+= apmt.o
-obj-y				+= dma.o
+obj-y				+= dma.o init.o
diff --git a/drivers/acpi/arm64/agdi.c b/drivers/acpi/arm64/agdi.c
index f605302..8b3c7d4 100644
--- a/drivers/acpi/arm64/agdi.c
+++ b/drivers/acpi/arm64/agdi.c
@@ -9,11 +9,11 @@
 #define pr_fmt(fmt) "ACPI: AGDI: " fmt
 
 #include <linux/acpi.h>
-#include <linux/acpi_agdi.h>
 #include <linux/arm_sdei.h>
 #include <linux/io.h>
 #include <linux/kernel.h>
 #include <linux/platform_device.h>
+#include "init.h"
 
 struct agdi_data {
 	int sdei_event;
diff --git a/drivers/acpi/arm64/apmt.c b/drivers/acpi/arm64/apmt.c
index 8cab69f..bb010f6 100644
--- a/drivers/acpi/arm64/apmt.c
+++ b/drivers/acpi/arm64/apmt.c
@@ -10,10 +10,10 @@
 #define pr_fmt(fmt)	"ACPI: APMT: " fmt
 
 #include <linux/acpi.h>
-#include <linux/acpi_apmt.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/platform_device.h>
+#include "init.h"
 
 #define DEV_NAME "arm-cs-arch-pmu"
 
@@ -35,11 +35,13 @@ static int __init apmt_init_resources(struct resource *res,
 
 	num_res++;
 
-	res[num_res].start = node->base_address1;
-	res[num_res].end = node->base_address1 + SZ_4K - 1;
-	res[num_res].flags = IORESOURCE_MEM;
+	if (node->flags & ACPI_APMT_FLAGS_DUAL_PAGE) {
+		res[num_res].start = node->base_address1;
+		res[num_res].end = node->base_address1 + SZ_4K - 1;
+		res[num_res].flags = IORESOURCE_MEM;
 
-	num_res++;
+		num_res++;
+	}
 
 	if (node->ovflw_irq != 0) {
 		trigger = (node->ovflw_irq_flags & ACPI_APMT_OVFLW_IRQ_FLAGS_MODE);
diff --git a/drivers/acpi/arm64/init.c b/drivers/acpi/arm64/init.c
new file mode 100644
index 0000000..d3ce53d
--- /dev/null
+++ b/drivers/acpi/arm64/init.c
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/acpi.h>
+#include "init.h"
+
+void __init acpi_arm_init(void)
+{
+	if (IS_ENABLED(CONFIG_ACPI_AGDI))
+		acpi_agdi_init();
+	if (IS_ENABLED(CONFIG_ACPI_APMT))
+		acpi_apmt_init();
+	if (IS_ENABLED(CONFIG_ACPI_IORT))
+		acpi_iort_init();
+}
diff --git a/drivers/acpi/arm64/init.h b/drivers/acpi/arm64/init.h
new file mode 100644
index 0000000..a1715a2a
--- /dev/null
+++ b/drivers/acpi/arm64/init.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#include <linux/init.h>
+
+void __init acpi_agdi_init(void);
+void __init acpi_apmt_init(void);
+void __init acpi_iort_init(void);
diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c
index 38fb849..3631230 100644
--- a/drivers/acpi/arm64/iort.c
+++ b/drivers/acpi/arm64/iort.c
@@ -19,6 +19,7 @@
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 #include <linux/dma-map-ops.h>
+#include "init.h"
 
 #define IORT_TYPE_MASK(type)	(1 << (type))
 #define IORT_MSI_TYPE		(1 << ACPI_IORT_NODE_ITS_GROUP)
diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index d161ff7..7a1eaf8 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -26,9 +26,6 @@
 #include <asm/mpspec.h>
 #include <linux/dmi.h>
 #endif
-#include <linux/acpi_agdi.h>
-#include <linux/acpi_apmt.h>
-#include <linux/acpi_iort.h>
 #include <linux/acpi_viot.h>
 #include <linux/pci.h>
 #include <acpi/apei.h>
@@ -1408,7 +1405,7 @@ static int __init acpi_init(void)
 	acpi_init_ffh();
 
 	pci_mmcfg_late_init();
-	acpi_iort_init();
+	acpi_arm_init();
 	acpi_viot_early_init();
 	acpi_hest_init();
 	acpi_ghes_init();
@@ -1420,8 +1417,6 @@ static int __init acpi_init(void)
 	acpi_debugger_init();
 	acpi_setup_sb_notify_handler();
 	acpi_viot_init();
-	acpi_agdi_init();
-	acpi_apmt_init();
 	return 0;
 }
 
diff --git a/drivers/hwtracing/coresight/coresight-trbe.c b/drivers/hwtracing/coresight/coresight-trbe.c
index 1fc4fd7..1bab91c 100644
--- a/drivers/hwtracing/coresight/coresight-trbe.c
+++ b/drivers/hwtracing/coresight/coresight-trbe.c
@@ -218,7 +218,7 @@ static inline void set_trbe_enabled(struct trbe_cpudata *cpudata, u64 trblimitr)
 	 * Enable the TRBE without clearing LIMITPTR which
 	 * might be required for fetching the buffer limits.
 	 */
-	trblimitr |= TRBLIMITR_ENABLE;
+	trblimitr |= TRBLIMITR_EL1_E;
 	write_sysreg_s(trblimitr, SYS_TRBLIMITR_EL1);
 
 	/* Synchronize the TRBE enable event */
@@ -236,7 +236,7 @@ static inline void set_trbe_disabled(struct trbe_cpudata *cpudata)
 	 * Disable the TRBE without clearing LIMITPTR which
 	 * might be required for fetching the buffer limits.
 	 */
-	trblimitr &= ~TRBLIMITR_ENABLE;
+	trblimitr &= ~TRBLIMITR_EL1_E;
 	write_sysreg_s(trblimitr, SYS_TRBLIMITR_EL1);
 
 	if (trbe_needs_drain_after_disable(cpudata))
@@ -582,12 +582,12 @@ static void clr_trbe_status(void)
 	u64 trbsr = read_sysreg_s(SYS_TRBSR_EL1);
 
 	WARN_ON(is_trbe_enabled());
-	trbsr &= ~TRBSR_IRQ;
-	trbsr &= ~TRBSR_TRG;
-	trbsr &= ~TRBSR_WRAP;
-	trbsr &= ~(TRBSR_EC_MASK << TRBSR_EC_SHIFT);
-	trbsr &= ~(TRBSR_BSC_MASK << TRBSR_BSC_SHIFT);
-	trbsr &= ~TRBSR_STOP;
+	trbsr &= ~TRBSR_EL1_IRQ;
+	trbsr &= ~TRBSR_EL1_TRG;
+	trbsr &= ~TRBSR_EL1_WRAP;
+	trbsr &= ~TRBSR_EL1_EC_MASK;
+	trbsr &= ~TRBSR_EL1_BSC_MASK;
+	trbsr &= ~TRBSR_EL1_S;
 	write_sysreg_s(trbsr, SYS_TRBSR_EL1);
 }
 
@@ -596,13 +596,13 @@ static void set_trbe_limit_pointer_enabled(struct trbe_buf *buf)
 	u64 trblimitr = read_sysreg_s(SYS_TRBLIMITR_EL1);
 	unsigned long addr = buf->trbe_limit;
 
-	WARN_ON(!IS_ALIGNED(addr, (1UL << TRBLIMITR_LIMIT_SHIFT)));
+	WARN_ON(!IS_ALIGNED(addr, (1UL << TRBLIMITR_EL1_LIMIT_SHIFT)));
 	WARN_ON(!IS_ALIGNED(addr, PAGE_SIZE));
 
-	trblimitr &= ~TRBLIMITR_NVM;
-	trblimitr &= ~(TRBLIMITR_FILL_MODE_MASK << TRBLIMITR_FILL_MODE_SHIFT);
-	trblimitr &= ~(TRBLIMITR_TRIG_MODE_MASK << TRBLIMITR_TRIG_MODE_SHIFT);
-	trblimitr &= ~(TRBLIMITR_LIMIT_MASK << TRBLIMITR_LIMIT_SHIFT);
+	trblimitr &= ~TRBLIMITR_EL1_nVM;
+	trblimitr &= ~TRBLIMITR_EL1_FM_MASK;
+	trblimitr &= ~TRBLIMITR_EL1_TM_MASK;
+	trblimitr &= ~TRBLIMITR_EL1_LIMIT_MASK;
 
 	/*
 	 * Fill trace buffer mode is used here while configuring the
@@ -613,14 +613,15 @@ static void set_trbe_limit_pointer_enabled(struct trbe_buf *buf)
 	 * trace data in the interrupt handler, before reconfiguring
 	 * the TRBE.
 	 */
-	trblimitr |= (TRBE_FILL_MODE_FILL & TRBLIMITR_FILL_MODE_MASK) << TRBLIMITR_FILL_MODE_SHIFT;
+	trblimitr |= (TRBLIMITR_EL1_FM_FILL << TRBLIMITR_EL1_FM_SHIFT) &
+		     TRBLIMITR_EL1_FM_MASK;
 
 	/*
 	 * Trigger mode is not used here while configuring the TRBE for
 	 * the trace capture. Hence just keep this in the ignore mode.
 	 */
-	trblimitr |= (TRBE_TRIG_MODE_IGNORE & TRBLIMITR_TRIG_MODE_MASK) <<
-		      TRBLIMITR_TRIG_MODE_SHIFT;
+	trblimitr |= (TRBLIMITR_EL1_TM_IGNR << TRBLIMITR_EL1_TM_SHIFT) &
+		     TRBLIMITR_EL1_TM_MASK;
 	trblimitr |= (addr & PAGE_MASK);
 	set_trbe_enabled(buf->cpudata, trblimitr);
 }
diff --git a/drivers/hwtracing/coresight/coresight-trbe.h b/drivers/hwtracing/coresight/coresight-trbe.h
index 98ff1b1..77cbb5c 100644
--- a/drivers/hwtracing/coresight/coresight-trbe.h
+++ b/drivers/hwtracing/coresight/coresight-trbe.h
@@ -30,7 +30,7 @@ static inline bool is_trbe_enabled(void)
 {
 	u64 trblimitr = read_sysreg_s(SYS_TRBLIMITR_EL1);
 
-	return trblimitr & TRBLIMITR_ENABLE;
+	return trblimitr & TRBLIMITR_EL1_E;
 }
 
 #define TRBE_EC_OTHERS		0
@@ -39,7 +39,7 @@ static inline bool is_trbe_enabled(void)
 
 static inline int get_trbe_ec(u64 trbsr)
 {
-	return (trbsr >> TRBSR_EC_SHIFT) & TRBSR_EC_MASK;
+	return (trbsr & TRBSR_EL1_EC_MASK) >> TRBSR_EL1_EC_SHIFT;
 }
 
 #define TRBE_BSC_NOT_STOPPED 0
@@ -48,63 +48,55 @@ static inline int get_trbe_ec(u64 trbsr)
 
 static inline int get_trbe_bsc(u64 trbsr)
 {
-	return (trbsr >> TRBSR_BSC_SHIFT) & TRBSR_BSC_MASK;
+	return (trbsr & TRBSR_EL1_BSC_MASK) >> TRBSR_EL1_BSC_SHIFT;
 }
 
 static inline void clr_trbe_irq(void)
 {
 	u64 trbsr = read_sysreg_s(SYS_TRBSR_EL1);
 
-	trbsr &= ~TRBSR_IRQ;
+	trbsr &= ~TRBSR_EL1_IRQ;
 	write_sysreg_s(trbsr, SYS_TRBSR_EL1);
 }
 
 static inline bool is_trbe_irq(u64 trbsr)
 {
-	return trbsr & TRBSR_IRQ;
+	return trbsr & TRBSR_EL1_IRQ;
 }
 
 static inline bool is_trbe_trg(u64 trbsr)
 {
-	return trbsr & TRBSR_TRG;
+	return trbsr & TRBSR_EL1_TRG;
 }
 
 static inline bool is_trbe_wrap(u64 trbsr)
 {
-	return trbsr & TRBSR_WRAP;
+	return trbsr & TRBSR_EL1_WRAP;
 }
 
 static inline bool is_trbe_abort(u64 trbsr)
 {
-	return trbsr & TRBSR_ABORT;
+	return trbsr & TRBSR_EL1_EA;
 }
 
 static inline bool is_trbe_running(u64 trbsr)
 {
-	return !(trbsr & TRBSR_STOP);
+	return !(trbsr & TRBSR_EL1_S);
 }
 
-#define TRBE_TRIG_MODE_STOP		0
-#define TRBE_TRIG_MODE_IRQ		1
-#define TRBE_TRIG_MODE_IGNORE		3
-
-#define TRBE_FILL_MODE_FILL		0
-#define TRBE_FILL_MODE_WRAP		1
-#define TRBE_FILL_MODE_CIRCULAR_BUFFER	3
-
 static inline bool get_trbe_flag_update(u64 trbidr)
 {
-	return trbidr & TRBIDR_FLAG;
+	return trbidr & TRBIDR_EL1_F;
 }
 
 static inline bool is_trbe_programmable(u64 trbidr)
 {
-	return !(trbidr & TRBIDR_PROG);
+	return !(trbidr & TRBIDR_EL1_P);
 }
 
 static inline int get_trbe_address_align(u64 trbidr)
 {
-	return (trbidr >> TRBIDR_ALIGN_SHIFT) & TRBIDR_ALIGN_MASK;
+	return (trbidr & TRBIDR_EL1_Align_MASK) >> TRBIDR_EL1_Align_SHIFT;
 }
 
 static inline unsigned long get_trbe_write_pointer(void)
@@ -121,7 +113,7 @@ static inline void set_trbe_write_pointer(unsigned long addr)
 static inline unsigned long get_trbe_limit_pointer(void)
 {
 	u64 trblimitr = read_sysreg_s(SYS_TRBLIMITR_EL1);
-	unsigned long addr = trblimitr & (TRBLIMITR_LIMIT_MASK << TRBLIMITR_LIMIT_SHIFT);
+	unsigned long addr = trblimitr & TRBLIMITR_EL1_LIMIT_MASK;
 
 	WARN_ON(!IS_ALIGNED(addr, PAGE_SIZE));
 	return addr;
@@ -130,7 +122,7 @@ static inline unsigned long get_trbe_limit_pointer(void)
 static inline unsigned long get_trbe_base_pointer(void)
 {
 	u64 trbbaser = read_sysreg_s(SYS_TRBBASER_EL1);
-	unsigned long addr = trbbaser & (TRBBASER_BASE_MASK << TRBBASER_BASE_SHIFT);
+	unsigned long addr = trbbaser & TRBBASER_EL1_BASE_MASK;
 
 	WARN_ON(!IS_ALIGNED(addr, PAGE_SIZE));
 	return addr;
@@ -139,7 +131,7 @@ static inline unsigned long get_trbe_base_pointer(void)
 static inline void set_trbe_base_pointer(unsigned long addr)
 {
 	WARN_ON(is_trbe_enabled());
-	WARN_ON(!IS_ALIGNED(addr, (1UL << TRBBASER_BASE_SHIFT)));
+	WARN_ON(!IS_ALIGNED(addr, (1UL << TRBBASER_EL1_BASE_SHIFT)));
 	WARN_ON(!IS_ALIGNED(addr, PAGE_SIZE));
 	write_sysreg_s(addr, SYS_TRBBASER_EL1);
 }
diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig
index 711f824..4c07d71 100644
--- a/drivers/perf/Kconfig
+++ b/drivers/perf/Kconfig
@@ -127,6 +127,14 @@
 	  can give information about memory throughput and other related
 	  events.
 
+config FSL_IMX9_DDR_PMU
+	tristate "Freescale i.MX9 DDR perf monitor"
+	depends on ARCH_MXC
+	 help
+	 Provides support for the DDR performance monitor in i.MX9, which
+	 can give information about memory throughput and other related
+	 events.
+
 config QCOM_L2_PMU
 	bool "Qualcomm Technologies L2-cache PMU"
 	depends on ARCH_QCOM && ARM64 && ACPI
diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile
index dabc859..5cfe895 100644
--- a/drivers/perf/Makefile
+++ b/drivers/perf/Makefile
@@ -8,6 +8,7 @@
 obj-$(CONFIG_ARM_PMUV3) += arm_pmuv3.o
 obj-$(CONFIG_ARM_SMMU_V3_PMU) += arm_smmuv3_pmu.o
 obj-$(CONFIG_FSL_IMX8_DDR_PMU) += fsl_imx8_ddr_perf.o
+obj-$(CONFIG_FSL_IMX9_DDR_PMU) += fsl_imx9_ddr_perf.o
 obj-$(CONFIG_HISI_PMU) += hisilicon/
 obj-$(CONFIG_QCOM_L2_PMU)	+= qcom_l2_pmu.o
 obj-$(CONFIG_QCOM_L3_PMU) += qcom_l3_pmu.o
diff --git a/drivers/perf/apple_m1_cpu_pmu.c b/drivers/perf/apple_m1_cpu_pmu.c
index 8574c6e..cd2de44 100644
--- a/drivers/perf/apple_m1_cpu_pmu.c
+++ b/drivers/perf/apple_m1_cpu_pmu.c
@@ -493,6 +493,17 @@ static int m1_pmu_map_event(struct perf_event *event)
 	return armpmu_map_event(event, &m1_pmu_perf_map, NULL, M1_PMU_CFG_EVENT);
 }
 
+static int m2_pmu_map_event(struct perf_event *event)
+{
+	/*
+	 * Same deal as the above, except that M2 has 64bit counters.
+	 * Which, as far as we're concerned, actually means 63 bits.
+	 * Yes, this is getting awkward.
+	 */
+	event->hw.flags |= ARMPMU_EVT_63BIT;
+	return armpmu_map_event(event, &m1_pmu_perf_map, NULL, M1_PMU_CFG_EVENT);
+}
+
 static void m1_pmu_reset(void *info)
 {
 	int i;
@@ -525,7 +536,7 @@ static int m1_pmu_set_event_filter(struct hw_perf_event *event,
 	return 0;
 }
 
-static int m1_pmu_init(struct arm_pmu *cpu_pmu)
+static int m1_pmu_init(struct arm_pmu *cpu_pmu, u32 flags)
 {
 	cpu_pmu->handle_irq	  = m1_pmu_handle_irq;
 	cpu_pmu->enable		  = m1_pmu_enable_event;
@@ -536,7 +547,14 @@ static int m1_pmu_init(struct arm_pmu *cpu_pmu)
 	cpu_pmu->clear_event_idx  = m1_pmu_clear_event_idx;
 	cpu_pmu->start		  = m1_pmu_start;
 	cpu_pmu->stop		  = m1_pmu_stop;
-	cpu_pmu->map_event	  = m1_pmu_map_event;
+
+	if (flags & ARMPMU_EVT_47BIT)
+		cpu_pmu->map_event = m1_pmu_map_event;
+	else if (flags & ARMPMU_EVT_63BIT)
+		cpu_pmu->map_event = m2_pmu_map_event;
+	else
+		return WARN_ON(-EINVAL);
+
 	cpu_pmu->reset		  = m1_pmu_reset;
 	cpu_pmu->set_event_filter = m1_pmu_set_event_filter;
 
@@ -550,25 +568,25 @@ static int m1_pmu_init(struct arm_pmu *cpu_pmu)
 static int m1_pmu_ice_init(struct arm_pmu *cpu_pmu)
 {
 	cpu_pmu->name = "apple_icestorm_pmu";
-	return m1_pmu_init(cpu_pmu);
+	return m1_pmu_init(cpu_pmu, ARMPMU_EVT_47BIT);
 }
 
 static int m1_pmu_fire_init(struct arm_pmu *cpu_pmu)
 {
 	cpu_pmu->name = "apple_firestorm_pmu";
-	return m1_pmu_init(cpu_pmu);
+	return m1_pmu_init(cpu_pmu, ARMPMU_EVT_47BIT);
 }
 
 static int m2_pmu_avalanche_init(struct arm_pmu *cpu_pmu)
 {
 	cpu_pmu->name = "apple_avalanche_pmu";
-	return m1_pmu_init(cpu_pmu);
+	return m1_pmu_init(cpu_pmu, ARMPMU_EVT_63BIT);
 }
 
 static int m2_pmu_blizzard_init(struct arm_pmu *cpu_pmu)
 {
 	cpu_pmu->name = "apple_blizzard_pmu";
-	return m1_pmu_init(cpu_pmu);
+	return m1_pmu_init(cpu_pmu, ARMPMU_EVT_63BIT);
 }
 
 static const struct of_device_id m1_pmu_of_device_ids[] = {
diff --git a/drivers/perf/arm-cci.c b/drivers/perf/arm-cci.c
index 03b1309..998259f 100644
--- a/drivers/perf/arm-cci.c
+++ b/drivers/perf/arm-cci.c
@@ -645,7 +645,7 @@ static void cci_pmu_sync_counters(struct cci_pmu *cci_pmu)
 	struct cci_pmu_hw_events *cci_hw = &cci_pmu->hw_events;
 	DECLARE_BITMAP(mask, HW_CNTRS_MAX);
 
-	bitmap_zero(mask, cci_pmu->num_cntrs);
+	bitmap_zero(mask, HW_CNTRS_MAX);
 	for_each_set_bit(i, cci_pmu->hw_events.used_mask, cci_pmu->num_cntrs) {
 		struct perf_event *event = cci_hw->events[i];
 
@@ -656,7 +656,7 @@ static void cci_pmu_sync_counters(struct cci_pmu *cci_pmu)
 		if (event->hw.state & PERF_HES_STOPPED)
 			continue;
 		if (event->hw.state & PERF_HES_ARCH) {
-			set_bit(i, mask);
+			__set_bit(i, mask);
 			event->hw.state &= ~PERF_HES_ARCH;
 		}
 	}
diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c
index 47d359f..b8c1587 100644
--- a/drivers/perf/arm-cmn.c
+++ b/drivers/perf/arm-cmn.c
@@ -44,8 +44,11 @@
 #define CMN_MAX_DTMS			(CMN_MAX_XPS + (CMN_MAX_DIMENSION - 1) * 4)
 
 /* The CFG node has various info besides the discovery tree */
-#define CMN_CFGM_PERIPH_ID_2		0x0010
-#define CMN_CFGM_PID2_REVISION		GENMASK(7, 4)
+#define CMN_CFGM_PERIPH_ID_01		0x0008
+#define CMN_CFGM_PID0_PART_0		GENMASK_ULL(7, 0)
+#define CMN_CFGM_PID1_PART_1		GENMASK_ULL(35, 32)
+#define CMN_CFGM_PERIPH_ID_23		0x0010
+#define CMN_CFGM_PID2_REVISION		GENMASK_ULL(7, 4)
 
 #define CMN_CFGM_INFO_GLOBAL		0x900
 #define CMN_INFO_MULTIPLE_DTM_EN	BIT_ULL(63)
@@ -186,6 +189,7 @@
 #define CMN_WP_DOWN			2
 
 
+/* Internal values for encoding event support */
 enum cmn_model {
 	CMN600 = 1,
 	CMN650 = 2,
@@ -197,26 +201,34 @@ enum cmn_model {
 	CMN_650ON = CMN650 | CMN700,
 };
 
+/* Actual part numbers and revision IDs defined by the hardware */
+enum cmn_part {
+	PART_CMN600 = 0x434,
+	PART_CMN650 = 0x436,
+	PART_CMN700 = 0x43c,
+	PART_CI700 = 0x43a,
+};
+
 /* CMN-600 r0px shouldn't exist in silicon, thankfully */
 enum cmn_revision {
-	CMN600_R1P0,
-	CMN600_R1P1,
-	CMN600_R1P2,
-	CMN600_R1P3,
-	CMN600_R2P0,
-	CMN600_R3P0,
-	CMN600_R3P1,
-	CMN650_R0P0 = 0,
-	CMN650_R1P0,
-	CMN650_R1P1,
-	CMN650_R2P0,
-	CMN650_R1P2,
-	CMN700_R0P0 = 0,
-	CMN700_R1P0,
-	CMN700_R2P0,
-	CI700_R0P0 = 0,
-	CI700_R1P0,
-	CI700_R2P0,
+	REV_CMN600_R1P0,
+	REV_CMN600_R1P1,
+	REV_CMN600_R1P2,
+	REV_CMN600_R1P3,
+	REV_CMN600_R2P0,
+	REV_CMN600_R3P0,
+	REV_CMN600_R3P1,
+	REV_CMN650_R0P0 = 0,
+	REV_CMN650_R1P0,
+	REV_CMN650_R1P1,
+	REV_CMN650_R2P0,
+	REV_CMN650_R1P2,
+	REV_CMN700_R0P0 = 0,
+	REV_CMN700_R1P0,
+	REV_CMN700_R2P0,
+	REV_CI700_R0P0 = 0,
+	REV_CI700_R1P0,
+	REV_CI700_R2P0,
 };
 
 enum cmn_node_type {
@@ -306,7 +318,7 @@ struct arm_cmn {
 	unsigned int state;
 
 	enum cmn_revision rev;
-	enum cmn_model model;
+	enum cmn_part part;
 	u8 mesh_x;
 	u8 mesh_y;
 	u16 num_xps;
@@ -394,19 +406,35 @@ static struct arm_cmn_node *arm_cmn_node(const struct arm_cmn *cmn,
 	return NULL;
 }
 
+static enum cmn_model arm_cmn_model(const struct arm_cmn *cmn)
+{
+	switch (cmn->part) {
+	case PART_CMN600:
+		return CMN600;
+	case PART_CMN650:
+		return CMN650;
+	case PART_CMN700:
+		return CMN700;
+	case PART_CI700:
+		return CI700;
+	default:
+		return 0;
+	};
+}
+
 static u32 arm_cmn_device_connect_info(const struct arm_cmn *cmn,
 				       const struct arm_cmn_node *xp, int port)
 {
 	int offset = CMN_MXP__CONNECT_INFO(port);
 
 	if (port >= 2) {
-		if (cmn->model & (CMN600 | CMN650))
+		if (cmn->part == PART_CMN600 || cmn->part == PART_CMN650)
 			return 0;
 		/*
 		 * CI-700 may have extra ports, but still has the
 		 * mesh_port_connect_info registers in the way.
 		 */
-		if (cmn->model == CI700)
+		if (cmn->part == PART_CI700)
 			offset += CI700_CONNECT_INFO_P2_5_OFFSET;
 	}
 
@@ -640,7 +668,7 @@ static umode_t arm_cmn_event_attr_is_visible(struct kobject *kobj,
 
 	eattr = container_of(attr, typeof(*eattr), attr.attr);
 
-	if (!(eattr->model & cmn->model))
+	if (!(eattr->model & arm_cmn_model(cmn)))
 		return 0;
 
 	type = eattr->type;
@@ -658,7 +686,7 @@ static umode_t arm_cmn_event_attr_is_visible(struct kobject *kobj,
 		if ((intf & 4) && !(cmn->ports_used & BIT(intf & 3)))
 			return 0;
 
-		if (chan == 4 && cmn->model == CMN600)
+		if (chan == 4 && cmn->part == PART_CMN600)
 			return 0;
 
 		if ((chan == 5 && cmn->rsp_vc_num < 2) ||
@@ -669,19 +697,19 @@ static umode_t arm_cmn_event_attr_is_visible(struct kobject *kobj,
 	}
 
 	/* Revision-specific differences */
-	if (cmn->model == CMN600) {
-		if (cmn->rev < CMN600_R1P3) {
+	if (cmn->part == PART_CMN600) {
+		if (cmn->rev < REV_CMN600_R1P3) {
 			if (type == CMN_TYPE_CXRA && eventid > 0x10)
 				return 0;
 		}
-		if (cmn->rev < CMN600_R1P2) {
+		if (cmn->rev < REV_CMN600_R1P2) {
 			if (type == CMN_TYPE_HNF && eventid == 0x1b)
 				return 0;
 			if (type == CMN_TYPE_CXRA || type == CMN_TYPE_CXHA)
 				return 0;
 		}
-	} else if (cmn->model == CMN650) {
-		if (cmn->rev < CMN650_R2P0 || cmn->rev == CMN650_R1P2) {
+	} else if (cmn->part == PART_CMN650) {
+		if (cmn->rev < REV_CMN650_R2P0 || cmn->rev == REV_CMN650_R1P2) {
 			if (type == CMN_TYPE_HNF && eventid > 0x22)
 				return 0;
 			if (type == CMN_TYPE_SBSX && eventid == 0x17)
@@ -689,8 +717,8 @@ static umode_t arm_cmn_event_attr_is_visible(struct kobject *kobj,
 			if (type == CMN_TYPE_RNI && eventid > 0x10)
 				return 0;
 		}
-	} else if (cmn->model == CMN700) {
-		if (cmn->rev < CMN700_R2P0) {
+	} else if (cmn->part == PART_CMN700) {
+		if (cmn->rev < REV_CMN700_R2P0) {
 			if (type == CMN_TYPE_HNF && eventid > 0x2c)
 				return 0;
 			if (type == CMN_TYPE_CCHA && eventid > 0x74)
@@ -698,7 +726,7 @@ static umode_t arm_cmn_event_attr_is_visible(struct kobject *kobj,
 			if (type == CMN_TYPE_CCLA && eventid > 0x27)
 				return 0;
 		}
-		if (cmn->rev < CMN700_R1P0) {
+		if (cmn->rev < REV_CMN700_R1P0) {
 			if (type == CMN_TYPE_HNF && eventid > 0x2b)
 				return 0;
 		}
@@ -1171,19 +1199,31 @@ static ssize_t arm_cmn_cpumask_show(struct device *dev,
 static struct device_attribute arm_cmn_cpumask_attr =
 		__ATTR(cpumask, 0444, arm_cmn_cpumask_show, NULL);
 
-static struct attribute *arm_cmn_cpumask_attrs[] = {
+static ssize_t arm_cmn_identifier_show(struct device *dev,
+				       struct device_attribute *attr, char *buf)
+{
+	struct arm_cmn *cmn = to_cmn(dev_get_drvdata(dev));
+
+	return sysfs_emit(buf, "%03x%02x\n", cmn->part, cmn->rev);
+}
+
+static struct device_attribute arm_cmn_identifier_attr =
+		__ATTR(identifier, 0444, arm_cmn_identifier_show, NULL);
+
+static struct attribute *arm_cmn_other_attrs[] = {
 	&arm_cmn_cpumask_attr.attr,
+	&arm_cmn_identifier_attr.attr,
 	NULL,
 };
 
-static const struct attribute_group arm_cmn_cpumask_attr_group = {
-	.attrs = arm_cmn_cpumask_attrs,
+static const struct attribute_group arm_cmn_other_attrs_group = {
+	.attrs = arm_cmn_other_attrs,
 };
 
 static const struct attribute_group *arm_cmn_attr_groups[] = {
 	&arm_cmn_event_attrs_group,
 	&arm_cmn_format_attrs_group,
-	&arm_cmn_cpumask_attr_group,
+	&arm_cmn_other_attrs_group,
 	NULL
 };
 
@@ -1200,7 +1240,7 @@ static u32 arm_cmn_wp_config(struct perf_event *event)
 	u32 grp = CMN_EVENT_WP_GRP(event);
 	u32 exc = CMN_EVENT_WP_EXCLUSIVE(event);
 	u32 combine = CMN_EVENT_WP_COMBINE(event);
-	bool is_cmn600 = to_cmn(event->pmu)->model == CMN600;
+	bool is_cmn600 = to_cmn(event->pmu)->part == PART_CMN600;
 
 	config = FIELD_PREP(CMN_DTM_WPn_CONFIG_WP_DEV_SEL, dev) |
 		 FIELD_PREP(CMN_DTM_WPn_CONFIG_WP_CHN_SEL, chn) |
@@ -1520,14 +1560,14 @@ static int arm_cmn_validate_group(struct arm_cmn *cmn, struct perf_event *event)
 	return ret;
 }
 
-static enum cmn_filter_select arm_cmn_filter_sel(enum cmn_model model,
+static enum cmn_filter_select arm_cmn_filter_sel(const struct arm_cmn *cmn,
 						 enum cmn_node_type type,
 						 unsigned int eventid)
 {
 	struct arm_cmn_event_attr *e;
-	int i;
+	enum cmn_model model = arm_cmn_model(cmn);
 
-	for (i = 0; i < ARRAY_SIZE(arm_cmn_event_attrs) - 1; i++) {
+	for (int i = 0; i < ARRAY_SIZE(arm_cmn_event_attrs) - 1; i++) {
 		e = container_of(arm_cmn_event_attrs[i], typeof(*e), attr.attr);
 		if (e->model & model && e->type == type && e->eventid == eventid)
 			return e->fsel;
@@ -1570,12 +1610,12 @@ static int arm_cmn_event_init(struct perf_event *event)
 		/* ...but the DTM may depend on which port we're watching */
 		if (cmn->multi_dtm)
 			hw->dtm_offset = CMN_EVENT_WP_DEV_SEL(event) / 2;
-	} else if (type == CMN_TYPE_XP && cmn->model == CMN700) {
+	} else if (type == CMN_TYPE_XP && cmn->part == PART_CMN700) {
 		hw->wide_sel = true;
 	}
 
 	/* This is sufficiently annoying to recalculate, so cache it */
-	hw->filter_sel = arm_cmn_filter_sel(cmn->model, type, eventid);
+	hw->filter_sel = arm_cmn_filter_sel(cmn, type, eventid);
 
 	bynodeid = CMN_EVENT_BYNODEID(event);
 	nodeid = CMN_EVENT_NODEID(event);
@@ -1899,9 +1939,10 @@ static int arm_cmn_init_dtc(struct arm_cmn *cmn, struct arm_cmn_node *dn, int id
 	if (dtc->irq < 0)
 		return dtc->irq;
 
-	writel_relaxed(0, dtc->base + CMN_DT_PMCR);
+	writel_relaxed(CMN_DT_DTC_CTL_DT_EN, dtc->base + CMN_DT_DTC_CTL);
+	writel_relaxed(CMN_DT_PMCR_PMU_EN | CMN_DT_PMCR_OVFL_INTR_EN, dtc->base + CMN_DT_PMCR);
+	writeq_relaxed(0, dtc->base + CMN_DT_PMCCNTR);
 	writel_relaxed(0x1ff, dtc->base + CMN_DT_PMOVSR_CLR);
-	writel_relaxed(CMN_DT_PMCR_OVFL_INTR_EN, dtc->base + CMN_DT_PMCR);
 
 	return 0;
 }
@@ -1961,7 +2002,7 @@ static int arm_cmn_init_dtcs(struct arm_cmn *cmn)
 			dn->type = CMN_TYPE_CCLA;
 	}
 
-	writel_relaxed(CMN_DT_DTC_CTL_DT_EN, cmn->dtc[0].base + CMN_DT_DTC_CTL);
+	arm_cmn_set_state(cmn, CMN_STATE_DISABLED);
 
 	return 0;
 }
@@ -2006,6 +2047,7 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset)
 	void __iomem *cfg_region;
 	struct arm_cmn_node cfg, *dn;
 	struct arm_cmn_dtm *dtm;
+	enum cmn_part part;
 	u16 child_count, child_poff;
 	u32 xp_offset[CMN_MAX_XPS];
 	u64 reg;
@@ -2017,7 +2059,19 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset)
 		return -ENODEV;
 
 	cfg_region = cmn->base + rgn_offset;
-	reg = readl_relaxed(cfg_region + CMN_CFGM_PERIPH_ID_2);
+
+	reg = readq_relaxed(cfg_region + CMN_CFGM_PERIPH_ID_01);
+	part = FIELD_GET(CMN_CFGM_PID0_PART_0, reg);
+	part |= FIELD_GET(CMN_CFGM_PID1_PART_1, reg) << 8;
+	if (cmn->part && cmn->part != part)
+		dev_warn(cmn->dev,
+			 "Firmware binding mismatch: expected part number 0x%x, found 0x%x\n",
+			 cmn->part, part);
+	cmn->part = part;
+	if (!arm_cmn_model(cmn))
+		dev_warn(cmn->dev, "Unknown part number: 0x%x\n", part);
+
+	reg = readl_relaxed(cfg_region + CMN_CFGM_PERIPH_ID_23);
 	cmn->rev = FIELD_GET(CMN_CFGM_PID2_REVISION, reg);
 
 	reg = readq_relaxed(cfg_region + CMN_CFGM_INFO_GLOBAL);
@@ -2081,7 +2135,7 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset)
 		if (xp->id == (1 << 3))
 			cmn->mesh_x = xp->logid;
 
-		if (cmn->model == CMN600)
+		if (cmn->part == PART_CMN600)
 			xp->dtc = 0xf;
 		else
 			xp->dtc = 1 << readl_relaxed(xp_region + CMN_DTM_UNIT_INFO);
@@ -2201,7 +2255,7 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset)
 	if (cmn->num_xps == 1)
 		dev_warn(cmn->dev, "1x1 config not fully supported, translate XP events manually\n");
 
-	dev_dbg(cmn->dev, "model %d, periph_id_2 revision %d\n", cmn->model, cmn->rev);
+	dev_dbg(cmn->dev, "periph_id part 0x%03x revision %d\n", cmn->part, cmn->rev);
 	reg = cmn->ports_used;
 	dev_dbg(cmn->dev, "mesh %dx%d, ID width %d, ports %6pbl%s\n",
 		cmn->mesh_x, cmn->mesh_y, arm_cmn_xyidbits(cmn), &reg,
@@ -2256,17 +2310,17 @@ static int arm_cmn_probe(struct platform_device *pdev)
 		return -ENOMEM;
 
 	cmn->dev = &pdev->dev;
-	cmn->model = (unsigned long)device_get_match_data(cmn->dev);
+	cmn->part = (unsigned long)device_get_match_data(cmn->dev);
 	platform_set_drvdata(pdev, cmn);
 
-	if (cmn->model == CMN600 && has_acpi_companion(cmn->dev)) {
+	if (cmn->part == PART_CMN600 && has_acpi_companion(cmn->dev)) {
 		rootnode = arm_cmn600_acpi_probe(pdev, cmn);
 	} else {
 		rootnode = 0;
 		cmn->base = devm_platform_ioremap_resource(pdev, 0);
 		if (IS_ERR(cmn->base))
 			return PTR_ERR(cmn->base);
-		if (cmn->model == CMN600)
+		if (cmn->part == PART_CMN600)
 			rootnode = arm_cmn600_of_probe(pdev->dev.of_node);
 	}
 	if (rootnode < 0)
@@ -2335,10 +2389,10 @@ static int arm_cmn_remove(struct platform_device *pdev)
 
 #ifdef CONFIG_OF
 static const struct of_device_id arm_cmn_of_match[] = {
-	{ .compatible = "arm,cmn-600", .data = (void *)CMN600 },
-	{ .compatible = "arm,cmn-650", .data = (void *)CMN650 },
-	{ .compatible = "arm,cmn-700", .data = (void *)CMN700 },
-	{ .compatible = "arm,ci-700", .data = (void *)CI700 },
+	{ .compatible = "arm,cmn-600", .data = (void *)PART_CMN600 },
+	{ .compatible = "arm,cmn-650" },
+	{ .compatible = "arm,cmn-700" },
+	{ .compatible = "arm,ci-700" },
 	{}
 };
 MODULE_DEVICE_TABLE(of, arm_cmn_of_match);
@@ -2346,9 +2400,9 @@ MODULE_DEVICE_TABLE(of, arm_cmn_of_match);
 
 #ifdef CONFIG_ACPI
 static const struct acpi_device_id arm_cmn_acpi_match[] = {
-	{ "ARMHC600", CMN600 },
-	{ "ARMHC650", CMN650 },
-	{ "ARMHC700", CMN700 },
+	{ "ARMHC600", PART_CMN600 },
+	{ "ARMHC650" },
+	{ "ARMHC700" },
 	{}
 };
 MODULE_DEVICE_TABLE(acpi, arm_cmn_acpi_match);
diff --git a/drivers/perf/arm_cspmu/Kconfig b/drivers/perf/arm_cspmu/Kconfig
index 0b316fe..25d25de 100644
--- a/drivers/perf/arm_cspmu/Kconfig
+++ b/drivers/perf/arm_cspmu/Kconfig
@@ -4,8 +4,7 @@
 
 config ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU
 	tristate "ARM Coresight Architecture PMU"
-	depends on ARM64 && ACPI
-	depends on ACPI_APMT || COMPILE_TEST
+	depends on ARM64 || COMPILE_TEST
 	help
 	  Provides support for performance monitoring unit (PMU) devices
 	  based on ARM CoreSight PMU architecture. Note that this PMU
diff --git a/drivers/perf/arm_cspmu/arm_cspmu.c b/drivers/perf/arm_cspmu/arm_cspmu.c
index a3f1c41..e2b7827 100644
--- a/drivers/perf/arm_cspmu/arm_cspmu.c
+++ b/drivers/perf/arm_cspmu/arm_cspmu.c
@@ -28,7 +28,6 @@
 #include <linux/module.h>
 #include <linux/perf_event.h>
 #include <linux/platform_device.h>
-#include <acpi/processor.h>
 
 #include "arm_cspmu.h"
 #include "nvidia_cspmu.h"
@@ -101,10 +100,6 @@
 #define ARM_CSPMU_ACTIVE_CPU_MASK		0x0
 #define ARM_CSPMU_ASSOCIATED_CPU_MASK		0x1
 
-/* Check if field f in flags is set with value v */
-#define CHECK_APMT_FLAG(flags, f, v) \
-	((flags & (ACPI_APMT_FLAGS_ ## f)) == (ACPI_APMT_FLAGS_ ## f ## _ ## v))
-
 /* Check and use default if implementer doesn't provide attribute callback */
 #define CHECK_DEFAULT_IMPL_OPS(ops, callback)			\
 	do {							\
@@ -122,6 +117,11 @@
 
 static unsigned long arm_cspmu_cpuhp_state;
 
+static struct acpi_apmt_node *arm_cspmu_apmt_node(struct device *dev)
+{
+	return *(struct acpi_apmt_node **)dev_get_platdata(dev);
+}
+
 /*
  * In CoreSight PMU architecture, all of the MMIO registers are 32-bit except
  * counter register. The counter register can be implemented as 32-bit or 64-bit
@@ -156,12 +156,6 @@ static u64 read_reg64_hilohi(const void __iomem *addr, u32 max_poll_count)
 	return val;
 }
 
-/* Check if PMU supports 64-bit single copy atomic. */
-static inline bool supports_64bit_atomics(const struct arm_cspmu *cspmu)
-{
-	return CHECK_APMT_FLAG(cspmu->apmt_node->flags, ATOMIC, SUPP);
-}
-
 /* Check if cycle counter is supported. */
 static inline bool supports_cycle_counter(const struct arm_cspmu *cspmu)
 {
@@ -189,10 +183,10 @@ static inline bool use_64b_counter_reg(const struct arm_cspmu *cspmu)
 ssize_t arm_cspmu_sysfs_event_show(struct device *dev,
 				struct device_attribute *attr, char *buf)
 {
-	struct dev_ext_attribute *eattr =
-		container_of(attr, struct dev_ext_attribute, attr);
-	return sysfs_emit(buf, "event=0x%llx\n",
-			  (unsigned long long)eattr->var);
+	struct perf_pmu_events_attr *pmu_attr;
+
+	pmu_attr = container_of(attr, typeof(*pmu_attr), attr);
+	return sysfs_emit(buf, "event=0x%llx\n", pmu_attr->id);
 }
 EXPORT_SYMBOL_GPL(arm_cspmu_sysfs_event_show);
 
@@ -320,7 +314,7 @@ static const char *arm_cspmu_get_name(const struct arm_cspmu *cspmu)
 	static atomic_t pmu_idx[ACPI_APMT_NODE_TYPE_COUNT] = { 0 };
 
 	dev = cspmu->dev;
-	apmt_node = cspmu->apmt_node;
+	apmt_node = arm_cspmu_apmt_node(dev);
 	pmu_type = apmt_node->type;
 
 	if (pmu_type >= ACPI_APMT_NODE_TYPE_COUNT) {
@@ -397,8 +391,8 @@ static const struct impl_match impl_match[] = {
 static int arm_cspmu_init_impl_ops(struct arm_cspmu *cspmu)
 {
 	int ret;
-	struct acpi_apmt_node *apmt_node = cspmu->apmt_node;
 	struct arm_cspmu_impl_ops *impl_ops = &cspmu->impl.ops;
+	struct acpi_apmt_node *apmt_node = arm_cspmu_apmt_node(cspmu->dev);
 	const struct impl_match *match = impl_match;
 
 	/*
@@ -720,7 +714,7 @@ static u64 arm_cspmu_read_counter(struct perf_event *event)
 		offset = counter_offset(sizeof(u64), event->hw.idx);
 		counter_addr = cspmu->base1 + offset;
 
-		return supports_64bit_atomics(cspmu) ?
+		return cspmu->has_atomic_dword ?
 			       readq(counter_addr) :
 			       read_reg64_hilohi(counter_addr, HILOHI_MAX_POLL);
 	}
@@ -911,24 +905,18 @@ static struct arm_cspmu *arm_cspmu_alloc(struct platform_device *pdev)
 {
 	struct acpi_apmt_node *apmt_node;
 	struct arm_cspmu *cspmu;
-	struct device *dev;
-
-	dev = &pdev->dev;
-	apmt_node = *(struct acpi_apmt_node **)dev_get_platdata(dev);
-	if (!apmt_node) {
-		dev_err(dev, "failed to get APMT node\n");
-		return NULL;
-	}
+	struct device *dev = &pdev->dev;
 
 	cspmu = devm_kzalloc(dev, sizeof(*cspmu), GFP_KERNEL);
 	if (!cspmu)
 		return NULL;
 
 	cspmu->dev = dev;
-	cspmu->apmt_node = apmt_node;
-
 	platform_set_drvdata(pdev, cspmu);
 
+	apmt_node = arm_cspmu_apmt_node(dev);
+	cspmu->has_atomic_dword = apmt_node->flags & ACPI_APMT_FLAGS_ATOMIC;
+
 	return cspmu;
 }
 
@@ -936,11 +924,9 @@ static int arm_cspmu_init_mmio(struct arm_cspmu *cspmu)
 {
 	struct device *dev;
 	struct platform_device *pdev;
-	struct acpi_apmt_node *apmt_node;
 
 	dev = cspmu->dev;
 	pdev = to_platform_device(dev);
-	apmt_node = cspmu->apmt_node;
 
 	/* Base address for page 0. */
 	cspmu->base0 = devm_platform_ioremap_resource(pdev, 0);
@@ -951,7 +937,7 @@ static int arm_cspmu_init_mmio(struct arm_cspmu *cspmu)
 
 	/* Base address for page 1 if supported. Otherwise point to page 0. */
 	cspmu->base1 = cspmu->base0;
-	if (CHECK_APMT_FLAG(apmt_node->flags, DUAL_PAGE, SUPP)) {
+	if (platform_get_resource(pdev, IORESOURCE_MEM, 1)) {
 		cspmu->base1 = devm_platform_ioremap_resource(pdev, 1);
 		if (IS_ERR(cspmu->base1)) {
 			dev_err(dev, "ioremap failed for page-1 resource\n");
@@ -1048,19 +1034,14 @@ static int arm_cspmu_request_irq(struct arm_cspmu *cspmu)
 	int irq, ret;
 	struct device *dev;
 	struct platform_device *pdev;
-	struct acpi_apmt_node *apmt_node;
 
 	dev = cspmu->dev;
 	pdev = to_platform_device(dev);
-	apmt_node = cspmu->apmt_node;
 
 	/* Skip IRQ request if the PMU does not support overflow interrupt. */
-	if (apmt_node->ovflw_irq == 0)
-		return 0;
-
-	irq = platform_get_irq(pdev, 0);
+	irq = platform_get_irq_optional(pdev, 0);
 	if (irq < 0)
-		return irq;
+		return irq == -ENXIO ? 0 : irq;
 
 	ret = devm_request_irq(dev, irq, arm_cspmu_handle_irq,
 			       IRQF_NOBALANCING | IRQF_NO_THREAD, dev_name(dev),
@@ -1075,6 +1056,9 @@ static int arm_cspmu_request_irq(struct arm_cspmu *cspmu)
 	return 0;
 }
 
+#if defined(CONFIG_ACPI) && defined(CONFIG_ARM64)
+#include <acpi/processor.h>
+
 static inline int arm_cspmu_find_cpu_container(int cpu, u32 container_uid)
 {
 	u32 acpi_uid;
@@ -1099,15 +1083,13 @@ static inline int arm_cspmu_find_cpu_container(int cpu, u32 container_uid)
 	return -ENODEV;
 }
 
-static int arm_cspmu_get_cpus(struct arm_cspmu *cspmu)
+static int arm_cspmu_acpi_get_cpus(struct arm_cspmu *cspmu)
 {
-	struct device *dev;
 	struct acpi_apmt_node *apmt_node;
 	int affinity_flag;
 	int cpu;
 
-	dev = cspmu->pmu.dev;
-	apmt_node = cspmu->apmt_node;
+	apmt_node = arm_cspmu_apmt_node(cspmu->dev);
 	affinity_flag = apmt_node->flags & ACPI_APMT_FLAGS_AFFINITY;
 
 	if (affinity_flag == ACPI_APMT_FLAGS_AFFINITY_PROC) {
@@ -1129,12 +1111,23 @@ static int arm_cspmu_get_cpus(struct arm_cspmu *cspmu)
 	}
 
 	if (cpumask_empty(&cspmu->associated_cpus)) {
-		dev_dbg(dev, "No cpu associated with the PMU\n");
+		dev_dbg(cspmu->dev, "No cpu associated with the PMU\n");
 		return -ENODEV;
 	}
 
 	return 0;
 }
+#else
+static int arm_cspmu_acpi_get_cpus(struct arm_cspmu *cspmu)
+{
+	return -ENODEV;
+}
+#endif
+
+static int arm_cspmu_get_cpus(struct arm_cspmu *cspmu)
+{
+	return arm_cspmu_acpi_get_cpus(cspmu);
+}
 
 static int arm_cspmu_register_pmu(struct arm_cspmu *cspmu)
 {
@@ -1220,6 +1213,12 @@ static int arm_cspmu_device_remove(struct platform_device *pdev)
 	return 0;
 }
 
+static const struct platform_device_id arm_cspmu_id[] = {
+	{DRVNAME, 0},
+	{ },
+};
+MODULE_DEVICE_TABLE(platform, arm_cspmu_id);
+
 static struct platform_driver arm_cspmu_driver = {
 	.driver = {
 			.name = DRVNAME,
@@ -1227,12 +1226,14 @@ static struct platform_driver arm_cspmu_driver = {
 		},
 	.probe = arm_cspmu_device_probe,
 	.remove = arm_cspmu_device_remove,
+	.id_table = arm_cspmu_id,
 };
 
 static void arm_cspmu_set_active_cpu(int cpu, struct arm_cspmu *cspmu)
 {
 	cpumask_set_cpu(cpu, &cspmu->active_cpu);
-	WARN_ON(irq_set_affinity(cspmu->irq, &cspmu->active_cpu));
+	if (cspmu->irq)
+		WARN_ON(irq_set_affinity(cspmu->irq, &cspmu->active_cpu));
 }
 
 static int arm_cspmu_cpu_online(unsigned int cpu, struct hlist_node *node)
diff --git a/drivers/perf/arm_cspmu/arm_cspmu.h b/drivers/perf/arm_cspmu/arm_cspmu.h
index 51323b1..83df53d 100644
--- a/drivers/perf/arm_cspmu/arm_cspmu.h
+++ b/drivers/perf/arm_cspmu/arm_cspmu.h
@@ -8,7 +8,6 @@
 #ifndef __ARM_CSPMU_H__
 #define __ARM_CSPMU_H__
 
-#include <linux/acpi.h>
 #include <linux/bitfield.h>
 #include <linux/cpumask.h>
 #include <linux/device.h>
@@ -118,16 +117,16 @@ struct arm_cspmu_impl {
 struct arm_cspmu {
 	struct pmu pmu;
 	struct device *dev;
-	struct acpi_apmt_node *apmt_node;
 	const char *name;
 	const char *identifier;
 	void __iomem *base0;
 	void __iomem *base1;
-	int irq;
 	cpumask_t associated_cpus;
 	cpumask_t active_cpu;
 	struct hlist_node cpuhp_node;
+	int irq;
 
+	bool has_atomic_dword;
 	u32 pmcfgr;
 	u32 num_logical_ctrs;
 	u32 num_set_clr_reg;
diff --git a/drivers/perf/arm_dmc620_pmu.c b/drivers/perf/arm_dmc620_pmu.c
index 5de06f9..9d0f01c 100644
--- a/drivers/perf/arm_dmc620_pmu.c
+++ b/drivers/perf/arm_dmc620_pmu.c
@@ -227,9 +227,31 @@ static const struct attribute_group dmc620_pmu_format_attr_group = {
 	.attrs	= dmc620_pmu_formats_attrs,
 };
 
+static ssize_t dmc620_pmu_cpumask_show(struct device *dev,
+				       struct device_attribute *attr, char *buf)
+{
+	struct dmc620_pmu *dmc620_pmu = to_dmc620_pmu(dev_get_drvdata(dev));
+
+	return cpumap_print_to_pagebuf(true, buf,
+				       cpumask_of(dmc620_pmu->irq->cpu));
+}
+
+static struct device_attribute dmc620_pmu_cpumask_attr =
+	__ATTR(cpumask, 0444, dmc620_pmu_cpumask_show, NULL);
+
+static struct attribute *dmc620_pmu_cpumask_attrs[] = {
+	&dmc620_pmu_cpumask_attr.attr,
+	NULL,
+};
+
+static const struct attribute_group dmc620_pmu_cpumask_attr_group = {
+	.attrs = dmc620_pmu_cpumask_attrs,
+};
+
 static const struct attribute_group *dmc620_pmu_attr_groups[] = {
 	&dmc620_pmu_events_attr_group,
 	&dmc620_pmu_format_attr_group,
+	&dmc620_pmu_cpumask_attr_group,
 	NULL,
 };
 
diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c
index 15bd1e3..277e29f 100644
--- a/drivers/perf/arm_pmu.c
+++ b/drivers/perf/arm_pmu.c
@@ -109,6 +109,8 @@ static inline u64 arm_pmu_event_max_period(struct perf_event *event)
 {
 	if (event->hw.flags & ARMPMU_EVT_64BIT)
 		return GENMASK_ULL(63, 0);
+	else if (event->hw.flags & ARMPMU_EVT_63BIT)
+		return GENMASK_ULL(62, 0);
 	else if (event->hw.flags & ARMPMU_EVT_47BIT)
 		return GENMASK_ULL(46, 0);
 	else
diff --git a/drivers/perf/fsl_imx9_ddr_perf.c b/drivers/perf/fsl_imx9_ddr_perf.c
new file mode 100644
index 0000000..71d5b07
--- /dev/null
+++ b/drivers/perf/fsl_imx9_ddr_perf.c
@@ -0,0 +1,711 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright 2023 NXP
+
+#include <linux/bitfield.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_device.h>
+#include <linux/of_irq.h>
+#include <linux/perf_event.h>
+
+/* Performance monitor configuration */
+#define PMCFG1  			0x00
+#define PMCFG1_RD_TRANS_FILT_EN 	BIT(31)
+#define PMCFG1_WR_TRANS_FILT_EN 	BIT(30)
+#define PMCFG1_RD_BT_FILT_EN 		BIT(29)
+#define PMCFG1_ID_MASK  		GENMASK(17, 0)
+
+#define PMCFG2  			0x04
+#define PMCFG2_ID			GENMASK(17, 0)
+
+/* Global control register affects all counters and takes priority over local control registers */
+#define PMGC0		0x40
+/* Global control register bits */
+#define PMGC0_FAC	BIT(31)
+#define PMGC0_PMIE	BIT(30)
+#define PMGC0_FCECE	BIT(29)
+
+/*
+ * 64bit counter0 exclusively dedicated to counting cycles
+ * 32bit counters monitor counter-specific events in addition to counting reference events
+ */
+#define PMLCA(n)	(0x40 + 0x10 + (0x10 * n))
+#define PMLCB(n)	(0x40 + 0x14 + (0x10 * n))
+#define PMC(n)		(0x40 + 0x18 + (0x10 * n))
+/* Local control register bits */
+#define PMLCA_FC	BIT(31)
+#define PMLCA_CE	BIT(26)
+#define PMLCA_EVENT	GENMASK(22, 16)
+
+#define NUM_COUNTERS		11
+#define CYCLES_COUNTER		0
+
+#define to_ddr_pmu(p)		container_of(p, struct ddr_pmu, pmu)
+
+#define DDR_PERF_DEV_NAME	"imx9_ddr"
+#define DDR_CPUHP_CB_NAME	DDR_PERF_DEV_NAME "_perf_pmu"
+
+static DEFINE_IDA(ddr_ida);
+
+struct imx_ddr_devtype_data {
+	const char *identifier;		/* system PMU identifier for userspace */
+};
+
+struct ddr_pmu {
+	struct pmu pmu;
+	void __iomem *base;
+	unsigned int cpu;
+	struct hlist_node node;
+	struct device *dev;
+	struct perf_event *events[NUM_COUNTERS];
+	int active_events;
+	enum cpuhp_state cpuhp_state;
+	const struct imx_ddr_devtype_data *devtype_data;
+	int irq;
+	int id;
+};
+
+static const struct imx_ddr_devtype_data imx93_devtype_data = {
+	.identifier = "imx93",
+};
+
+static const struct of_device_id imx_ddr_pmu_dt_ids[] = {
+	{.compatible = "fsl,imx93-ddr-pmu", .data = &imx93_devtype_data},
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, imx_ddr_pmu_dt_ids);
+
+static ssize_t ddr_perf_identifier_show(struct device *dev,
+					struct device_attribute *attr,
+					char *page)
+{
+	struct ddr_pmu *pmu = dev_get_drvdata(dev);
+
+	return sysfs_emit(page, "%s\n", pmu->devtype_data->identifier);
+}
+
+static struct device_attribute ddr_perf_identifier_attr =
+	__ATTR(identifier, 0444, ddr_perf_identifier_show, NULL);
+
+static struct attribute *ddr_perf_identifier_attrs[] = {
+	&ddr_perf_identifier_attr.attr,
+	NULL,
+};
+
+static struct attribute_group ddr_perf_identifier_attr_group = {
+	.attrs = ddr_perf_identifier_attrs,
+};
+
+static ssize_t ddr_perf_cpumask_show(struct device *dev,
+				     struct device_attribute *attr, char *buf)
+{
+	struct ddr_pmu *pmu = dev_get_drvdata(dev);
+
+	return cpumap_print_to_pagebuf(true, buf, cpumask_of(pmu->cpu));
+}
+
+static struct device_attribute ddr_perf_cpumask_attr =
+	__ATTR(cpumask, 0444, ddr_perf_cpumask_show, NULL);
+
+static struct attribute *ddr_perf_cpumask_attrs[] = {
+	&ddr_perf_cpumask_attr.attr,
+	NULL,
+};
+
+static const struct attribute_group ddr_perf_cpumask_attr_group = {
+	.attrs = ddr_perf_cpumask_attrs,
+};
+
+static ssize_t ddr_pmu_event_show(struct device *dev,
+				  struct device_attribute *attr, char *page)
+{
+	struct perf_pmu_events_attr *pmu_attr;
+
+	pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
+	return sysfs_emit(page, "event=0x%02llx\n", pmu_attr->id);
+}
+
+#define IMX9_DDR_PMU_EVENT_ATTR(_name, _id)				\
+	(&((struct perf_pmu_events_attr[]) {				\
+		{ .attr = __ATTR(_name, 0444, ddr_pmu_event_show, NULL),\
+		  .id = _id, }						\
+	})[0].attr.attr)
+
+static struct attribute *ddr_perf_events_attrs[] = {
+	/* counter0 cycles event */
+	IMX9_DDR_PMU_EVENT_ATTR(cycles, 0),
+
+	/* reference events for all normal counters, need assert DEBUG19[21] bit */
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_ddrc1_rmw_for_ecc, 12),
+	IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_rreorder, 13),
+	IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_wreorder, 14),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_pm_0, 15),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_pm_1, 16),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_pm_2, 17),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_pm_3, 18),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_pm_4, 19),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_pm_5, 22),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_pm_6, 23),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_pm_7, 24),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_pm_8, 25),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_pm_9, 26),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_pm_10, 27),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_pm_11, 28),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_pm_12, 31),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_pm_13, 59),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_pm_15, 61),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_pm_29, 63),
+
+	/* counter1 specific events */
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_0, 64),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_1, 65),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_2, 66),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_3, 67),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_4, 68),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_5, 69),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_6, 70),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_7, 71),
+
+	/* counter2 specific events */
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_0, 64),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_1, 65),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_2, 66),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_3, 67),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_4, 68),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_5, 69),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_6, 70),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_7, 71),
+	IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_empty, 72),
+	IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pm_rd_trans_filt, 73),
+
+	/* counter3 specific events */
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_0, 64),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_1, 65),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_2, 66),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_3, 67),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_4, 68),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_5, 69),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_6, 70),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_7, 71),
+	IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_full, 72),
+	IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pm_wr_trans_filt, 73),
+
+	/* counter4 specific events */
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_0, 64),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_1, 65),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_2, 66),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_3, 67),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_4, 68),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_5, 69),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_6, 70),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_7, 71),
+	IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_ld_rdq2_rmw, 72),
+	IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pm_rd_beat_filt, 73),
+
+	/* counter5 specific events */
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_0, 64),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_1, 65),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_2, 66),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_3, 67),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_4, 68),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_5, 69),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_6, 70),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_7, 71),
+	IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_ld_rdq1, 72),
+
+	/* counter6 specific events */
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_end_0, 64),
+	IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_ld_rdq2, 72),
+
+	/* counter7 specific events */
+	IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_1_2_full, 64),
+	IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_ld_wrq0, 65),
+
+	/* counter8 specific events */
+	IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_bias_switched, 64),
+	IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_1_4_full, 65),
+
+	/* counter9 specific events */
+	IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_ld_wrq1, 65),
+	IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_3_4_full, 66),
+
+	/* counter10 specific events */
+	IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_misc_mrk, 65),
+	IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_ld_rdq0, 66),
+	NULL,
+};
+
+static const struct attribute_group ddr_perf_events_attr_group = {
+	.name = "events",
+	.attrs = ddr_perf_events_attrs,
+};
+
+PMU_FORMAT_ATTR(event, "config:0-7");
+PMU_FORMAT_ATTR(counter, "config:8-15");
+PMU_FORMAT_ATTR(axi_id, "config1:0-17");
+PMU_FORMAT_ATTR(axi_mask, "config2:0-17");
+
+static struct attribute *ddr_perf_format_attrs[] = {
+	&format_attr_event.attr,
+	&format_attr_counter.attr,
+	&format_attr_axi_id.attr,
+	&format_attr_axi_mask.attr,
+	NULL,
+};
+
+static const struct attribute_group ddr_perf_format_attr_group = {
+	.name = "format",
+	.attrs = ddr_perf_format_attrs,
+};
+
+static const struct attribute_group *attr_groups[] = {
+	&ddr_perf_identifier_attr_group,
+	&ddr_perf_cpumask_attr_group,
+	&ddr_perf_events_attr_group,
+	&ddr_perf_format_attr_group,
+	NULL,
+};
+
+static void ddr_perf_clear_counter(struct ddr_pmu *pmu, int counter)
+{
+	if (counter == CYCLES_COUNTER) {
+		writel(0, pmu->base + PMC(counter) + 0x4);
+		writel(0, pmu->base + PMC(counter));
+	} else {
+		writel(0, pmu->base + PMC(counter));
+	}
+}
+
+static u64 ddr_perf_read_counter(struct ddr_pmu *pmu, int counter)
+{
+	u32 val_lower, val_upper;
+	u64 val;
+
+	if (counter != CYCLES_COUNTER) {
+		val = readl_relaxed(pmu->base + PMC(counter));
+		goto out;
+	}
+
+	/* special handling for reading 64bit cycle counter */
+	do {
+		val_upper = readl_relaxed(pmu->base + PMC(counter) + 0x4);
+		val_lower = readl_relaxed(pmu->base + PMC(counter));
+	} while (val_upper != readl_relaxed(pmu->base + PMC(counter) + 0x4));
+
+	val = val_upper;
+	val = (val << 32);
+	val |= val_lower;
+out:
+	return val;
+}
+
+static void ddr_perf_counter_global_config(struct ddr_pmu *pmu, bool enable)
+{
+	u32 ctrl;
+
+	ctrl = readl_relaxed(pmu->base + PMGC0);
+
+	if (enable) {
+		/*
+		 * The performance monitor must be reset before event counting
+		 * sequences. The performance monitor can be reset by first freezing
+		 * one or more counters and then clearing the freeze condition to
+		 * allow the counters to count according to the settings in the
+		 * performance monitor registers. Counters can be frozen individually
+		 * by setting PMLCAn[FC] bits, or simultaneously by setting PMGC0[FAC].
+		 * Simply clearing these freeze bits will then allow the performance
+		 * monitor to begin counting based on the register settings.
+		 */
+		ctrl |= PMGC0_FAC;
+		writel(ctrl, pmu->base + PMGC0);
+
+		/*
+		 * Freeze all counters disabled, interrupt enabled, and freeze
+		 * counters on condition enabled.
+		 */
+		ctrl &= ~PMGC0_FAC;
+		ctrl |= PMGC0_PMIE | PMGC0_FCECE;
+		writel(ctrl, pmu->base + PMGC0);
+	} else {
+		ctrl |= PMGC0_FAC;
+		ctrl &= ~(PMGC0_PMIE | PMGC0_FCECE);
+		writel(ctrl, pmu->base + PMGC0);
+	}
+}
+
+static void ddr_perf_counter_local_config(struct ddr_pmu *pmu, int config,
+				    int counter, bool enable)
+{
+	u32 ctrl_a;
+
+	ctrl_a = readl_relaxed(pmu->base + PMLCA(counter));
+
+	if (enable) {
+		ctrl_a |= PMLCA_FC;
+		writel(ctrl_a, pmu->base + PMLCA(counter));
+
+		ddr_perf_clear_counter(pmu, counter);
+
+		/* Freeze counter disabled, condition enabled, and program event.*/
+		ctrl_a &= ~PMLCA_FC;
+		ctrl_a |= PMLCA_CE;
+		ctrl_a &= ~FIELD_PREP(PMLCA_EVENT, 0x7F);
+		ctrl_a |= FIELD_PREP(PMLCA_EVENT, (config & 0x000000FF));
+		writel(ctrl_a, pmu->base + PMLCA(counter));
+	} else {
+		/* Freeze counter. */
+		ctrl_a |= PMLCA_FC;
+		writel(ctrl_a, pmu->base + PMLCA(counter));
+	}
+}
+
+static void ddr_perf_monitor_config(struct ddr_pmu *pmu, int cfg, int cfg1, int cfg2)
+{
+	u32 pmcfg1, pmcfg2;
+	int event, counter;
+
+	event = cfg & 0x000000FF;
+	counter = (cfg & 0x0000FF00) >> 8;
+
+	pmcfg1 = readl_relaxed(pmu->base + PMCFG1);
+
+	if (counter == 2 && event == 73)
+		pmcfg1 |= PMCFG1_RD_TRANS_FILT_EN;
+	else if (counter == 2 && event != 73)
+		pmcfg1 &= ~PMCFG1_RD_TRANS_FILT_EN;
+
+	if (counter == 3 && event == 73)
+		pmcfg1 |= PMCFG1_WR_TRANS_FILT_EN;
+	else if (counter == 3 && event != 73)
+		pmcfg1 &= ~PMCFG1_WR_TRANS_FILT_EN;
+
+	if (counter == 4 && event == 73)
+		pmcfg1 |= PMCFG1_RD_BT_FILT_EN;
+	else if (counter == 4 && event != 73)
+		pmcfg1 &= ~PMCFG1_RD_BT_FILT_EN;
+
+	pmcfg1 &= ~FIELD_PREP(PMCFG1_ID_MASK, 0x3FFFF);
+	pmcfg1 |= FIELD_PREP(PMCFG1_ID_MASK, cfg2);
+	writel(pmcfg1, pmu->base + PMCFG1);
+
+	pmcfg2 = readl_relaxed(pmu->base + PMCFG2);
+	pmcfg2 &= ~FIELD_PREP(PMCFG2_ID, 0x3FFFF);
+	pmcfg2 |= FIELD_PREP(PMCFG2_ID, cfg1);
+	writel(pmcfg2, pmu->base + PMCFG2);
+}
+
+static void ddr_perf_event_update(struct perf_event *event)
+{
+	struct ddr_pmu *pmu = to_ddr_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	int counter = hwc->idx;
+	u64 new_raw_count;
+
+	new_raw_count = ddr_perf_read_counter(pmu, counter);
+	local64_add(new_raw_count, &event->count);
+
+	/* clear counter's value every time */
+	ddr_perf_clear_counter(pmu, counter);
+}
+
+static int ddr_perf_event_init(struct perf_event *event)
+{
+	struct ddr_pmu *pmu = to_ddr_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	struct perf_event *sibling;
+
+	if (event->attr.type != event->pmu->type)
+		return -ENOENT;
+
+	if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
+		return -EOPNOTSUPP;
+
+	if (event->cpu < 0) {
+		dev_warn(pmu->dev, "Can't provide per-task data!\n");
+		return -EOPNOTSUPP;
+	}
+
+	/*
+	 * We must NOT create groups containing mixed PMUs, although software
+	 * events are acceptable (for example to create a CCN group
+	 * periodically read when a hrtimer aka cpu-clock leader triggers).
+	 */
+	if (event->group_leader->pmu != event->pmu &&
+			!is_software_event(event->group_leader))
+		return -EINVAL;
+
+	for_each_sibling_event(sibling, event->group_leader) {
+		if (sibling->pmu != event->pmu &&
+				!is_software_event(sibling))
+			return -EINVAL;
+	}
+
+	event->cpu = pmu->cpu;
+	hwc->idx = -1;
+
+	return 0;
+}
+
+static void ddr_perf_event_start(struct perf_event *event, int flags)
+{
+	struct ddr_pmu *pmu = to_ddr_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	int counter = hwc->idx;
+
+	local64_set(&hwc->prev_count, 0);
+
+	ddr_perf_counter_local_config(pmu, event->attr.config, counter, true);
+	hwc->state = 0;
+}
+
+static int ddr_perf_event_add(struct perf_event *event, int flags)
+{
+	struct ddr_pmu *pmu = to_ddr_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	int cfg = event->attr.config;
+	int cfg1 = event->attr.config1;
+	int cfg2 = event->attr.config2;
+	int counter;
+
+	counter = (cfg & 0x0000FF00) >> 8;
+
+	pmu->events[counter] = event;
+	pmu->active_events++;
+	hwc->idx = counter;
+	hwc->state |= PERF_HES_STOPPED;
+
+	if (flags & PERF_EF_START)
+		ddr_perf_event_start(event, flags);
+
+	/* read trans, write trans, read beat */
+	ddr_perf_monitor_config(pmu, cfg, cfg1, cfg2);
+
+	return 0;
+}
+
+static void ddr_perf_event_stop(struct perf_event *event, int flags)
+{
+	struct ddr_pmu *pmu = to_ddr_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	int counter = hwc->idx;
+
+	ddr_perf_counter_local_config(pmu, event->attr.config, counter, false);
+	ddr_perf_event_update(event);
+
+	hwc->state |= PERF_HES_STOPPED;
+}
+
+static void ddr_perf_event_del(struct perf_event *event, int flags)
+{
+	struct ddr_pmu *pmu = to_ddr_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+
+	ddr_perf_event_stop(event, PERF_EF_UPDATE);
+
+	pmu->active_events--;
+	hwc->idx = -1;
+}
+
+static void ddr_perf_pmu_enable(struct pmu *pmu)
+{
+	struct ddr_pmu *ddr_pmu = to_ddr_pmu(pmu);
+
+	ddr_perf_counter_global_config(ddr_pmu, true);
+}
+
+static void ddr_perf_pmu_disable(struct pmu *pmu)
+{
+	struct ddr_pmu *ddr_pmu = to_ddr_pmu(pmu);
+
+	ddr_perf_counter_global_config(ddr_pmu, false);
+}
+
+static void ddr_perf_init(struct ddr_pmu *pmu, void __iomem *base,
+			 struct device *dev)
+{
+	*pmu = (struct ddr_pmu) {
+		.pmu = (struct pmu) {
+			.module       = THIS_MODULE,
+			.capabilities = PERF_PMU_CAP_NO_EXCLUDE,
+			.task_ctx_nr  = perf_invalid_context,
+			.attr_groups  = attr_groups,
+			.event_init   = ddr_perf_event_init,
+			.add          = ddr_perf_event_add,
+			.del          = ddr_perf_event_del,
+			.start        = ddr_perf_event_start,
+			.stop         = ddr_perf_event_stop,
+			.read         = ddr_perf_event_update,
+			.pmu_enable   = ddr_perf_pmu_enable,
+			.pmu_disable  = ddr_perf_pmu_disable,
+		},
+		.base = base,
+		.dev = dev,
+	};
+}
+
+static irqreturn_t ddr_perf_irq_handler(int irq, void *p)
+{
+	struct ddr_pmu *pmu = (struct ddr_pmu *)p;
+	struct perf_event *event;
+	int i;
+
+	/*
+	 * Counters can generate an interrupt on an overflow when msb of a
+	 * counter changes from 0 to 1. For the interrupt to be signalled,
+	 * below condition mush be satisfied:
+	 * PMGC0[PMIE] = 1, PMGC0[FCECE] = 1, PMLCAn[CE] = 1
+	 * When an interrupt is signalled, PMGC0[FAC] is set by hardware and
+	 * all of the registers are frozen.
+	 * Software can clear the interrupt condition by resetting the performance
+	 * monitor and clearing the most significant bit of the counter that
+	 * generate the overflow.
+	 */
+	for (i = 0; i < NUM_COUNTERS; i++) {
+		if (!pmu->events[i])
+			continue;
+
+		event = pmu->events[i];
+
+		ddr_perf_event_update(event);
+	}
+
+	ddr_perf_counter_global_config(pmu, true);
+
+	return IRQ_HANDLED;
+}
+
+static int ddr_perf_offline_cpu(unsigned int cpu, struct hlist_node *node)
+{
+	struct ddr_pmu *pmu = hlist_entry_safe(node, struct ddr_pmu, node);
+	int target;
+
+	if (cpu != pmu->cpu)
+		return 0;
+
+	target = cpumask_any_but(cpu_online_mask, cpu);
+	if (target >= nr_cpu_ids)
+		return 0;
+
+	perf_pmu_migrate_context(&pmu->pmu, cpu, target);
+	pmu->cpu = target;
+
+	WARN_ON(irq_set_affinity(pmu->irq, cpumask_of(pmu->cpu)));
+
+	return 0;
+}
+
+static int ddr_perf_probe(struct platform_device *pdev)
+{
+	struct ddr_pmu *pmu;
+	void __iomem *base;
+	int ret, irq;
+	char *name;
+
+	base = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
+
+	pmu = devm_kzalloc(&pdev->dev, sizeof(*pmu), GFP_KERNEL);
+	if (!pmu)
+		return -ENOMEM;
+
+	ddr_perf_init(pmu, base, &pdev->dev);
+
+	pmu->devtype_data = of_device_get_match_data(&pdev->dev);
+
+	platform_set_drvdata(pdev, pmu);
+
+	pmu->id = ida_simple_get(&ddr_ida, 0, 0, GFP_KERNEL);
+	name = devm_kasprintf(&pdev->dev, GFP_KERNEL, DDR_PERF_DEV_NAME "%d", pmu->id);
+	if (!name) {
+		ret = -ENOMEM;
+		goto format_string_err;
+	}
+
+	pmu->cpu = raw_smp_processor_id();
+	ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, DDR_CPUHP_CB_NAME,
+				      NULL, ddr_perf_offline_cpu);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "Failed to add callbacks for multi state\n");
+		goto cpuhp_state_err;
+	}
+	pmu->cpuhp_state = ret;
+
+	/* Register the pmu instance for cpu hotplug */
+	ret = cpuhp_state_add_instance_nocalls(pmu->cpuhp_state, &pmu->node);
+	if (ret) {
+		dev_err(&pdev->dev, "Error %d registering hotplug\n", ret);
+		goto cpuhp_instance_err;
+	}
+
+	/* Request irq */
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0) {
+		ret = irq;
+		goto ddr_perf_err;
+	}
+
+	ret = devm_request_irq(&pdev->dev, irq, ddr_perf_irq_handler,
+			       IRQF_NOBALANCING | IRQF_NO_THREAD,
+			       DDR_CPUHP_CB_NAME, pmu);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "Request irq failed: %d", ret);
+		goto ddr_perf_err;
+	}
+
+	pmu->irq = irq;
+	ret = irq_set_affinity(pmu->irq, cpumask_of(pmu->cpu));
+	if (ret) {
+		dev_err(pmu->dev, "Failed to set interrupt affinity\n");
+		goto ddr_perf_err;
+	}
+
+	ret = perf_pmu_register(&pmu->pmu, name, -1);
+	if (ret)
+		goto ddr_perf_err;
+
+	return 0;
+
+ddr_perf_err:
+	cpuhp_state_remove_instance_nocalls(pmu->cpuhp_state, &pmu->node);
+cpuhp_instance_err:
+	cpuhp_remove_multi_state(pmu->cpuhp_state);
+cpuhp_state_err:
+format_string_err:
+	ida_simple_remove(&ddr_ida, pmu->id);
+	dev_warn(&pdev->dev, "i.MX9 DDR Perf PMU failed (%d), disabled\n", ret);
+	return ret;
+}
+
+static int ddr_perf_remove(struct platform_device *pdev)
+{
+	struct ddr_pmu *pmu = platform_get_drvdata(pdev);
+
+	cpuhp_state_remove_instance_nocalls(pmu->cpuhp_state, &pmu->node);
+	cpuhp_remove_multi_state(pmu->cpuhp_state);
+
+	perf_pmu_unregister(&pmu->pmu);
+
+	ida_simple_remove(&ddr_ida, pmu->id);
+
+	return 0;
+}
+
+static struct platform_driver imx_ddr_pmu_driver = {
+	.driver         = {
+		.name                = "imx9-ddr-pmu",
+		.of_match_table      = imx_ddr_pmu_dt_ids,
+		.suppress_bind_attrs = true,
+	},
+	.probe          = ddr_perf_probe,
+	.remove         = ddr_perf_remove,
+};
+module_platform_driver(imx_ddr_pmu_driver);
+
+MODULE_AUTHOR("Xu Yang <xu.yang_2@nxp.com>");
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("DDRC PerfMon for i.MX9 SoCs");
diff --git a/drivers/perf/hisilicon/Makefile b/drivers/perf/hisilicon/Makefile
index 4d2c9ab..48dcc83 100644
--- a/drivers/perf/hisilicon/Makefile
+++ b/drivers/perf/hisilicon/Makefile
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0-only
 obj-$(CONFIG_HISI_PMU) += hisi_uncore_pmu.o hisi_uncore_l3c_pmu.o \
 			  hisi_uncore_hha_pmu.o hisi_uncore_ddrc_pmu.o hisi_uncore_sllc_pmu.o \
-			  hisi_uncore_pa_pmu.o hisi_uncore_cpa_pmu.o
+			  hisi_uncore_pa_pmu.o hisi_uncore_cpa_pmu.o hisi_uncore_uc_pmu.o
 
 obj-$(CONFIG_HISI_PCIE_PMU) += hisi_pcie_pmu.o
 obj-$(CONFIG_HNS3_PMU) += hns3_pmu.o
diff --git a/drivers/perf/hisilicon/hisi_pcie_pmu.c b/drivers/perf/hisilicon/hisi_pcie_pmu.c
index 6fee0b6..e10fc7c 100644
--- a/drivers/perf/hisilicon/hisi_pcie_pmu.c
+++ b/drivers/perf/hisilicon/hisi_pcie_pmu.c
@@ -683,7 +683,7 @@ static int hisi_pcie_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
 
 	pcie_pmu->on_cpu = -1;
 	/* Choose a new CPU from all online cpus. */
-	target = cpumask_first(cpu_online_mask);
+	target = cpumask_any_but(cpu_online_mask, cpu);
 	if (target >= nr_cpu_ids) {
 		pci_err(pcie_pmu->pdev, "There is no CPU to set\n");
 		return 0;
diff --git a/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c
index 71b6687..d941e746 100644
--- a/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c
+++ b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c
@@ -22,9 +22,15 @@
 #define PA_TT_CTRL			0x1c08
 #define PA_TGTID_CTRL			0x1c14
 #define PA_SRCID_CTRL			0x1c18
+
+/* H32 PA interrupt registers */
 #define PA_INT_MASK			0x1c70
 #define PA_INT_STATUS			0x1c78
 #define PA_INT_CLEAR			0x1c7c
+
+#define H60PA_INT_STATUS		0x1c70
+#define H60PA_INT_MASK			0x1c74
+
 #define PA_EVENT_TYPE0			0x1c80
 #define PA_PMU_VERSION			0x1cf0
 #define PA_EVENT_CNT0_L			0x1d00
@@ -46,6 +52,12 @@ HISI_PMU_EVENT_ATTR_EXTRACTOR(srcid_cmd, config1, 32, 22);
 HISI_PMU_EVENT_ATTR_EXTRACTOR(srcid_msk, config1, 43, 33);
 HISI_PMU_EVENT_ATTR_EXTRACTOR(tracetag_en, config1, 44, 44);
 
+struct hisi_pa_pmu_int_regs {
+	u32 mask_offset;
+	u32 clear_offset;
+	u32 status_offset;
+};
+
 static void hisi_pa_pmu_enable_tracetag(struct perf_event *event)
 {
 	struct hisi_pmu *pa_pmu = to_hisi_pmu(event->pmu);
@@ -219,40 +231,40 @@ static void hisi_pa_pmu_disable_counter(struct hisi_pmu *pa_pmu,
 static void hisi_pa_pmu_enable_counter_int(struct hisi_pmu *pa_pmu,
 					   struct hw_perf_event *hwc)
 {
+	struct hisi_pa_pmu_int_regs *regs = pa_pmu->dev_info->private;
 	u32 val;
 
 	/* Write 0 to enable interrupt */
-	val = readl(pa_pmu->base + PA_INT_MASK);
+	val = readl(pa_pmu->base + regs->mask_offset);
 	val &= ~(1 << hwc->idx);
-	writel(val, pa_pmu->base + PA_INT_MASK);
+	writel(val, pa_pmu->base + regs->mask_offset);
 }
 
 static void hisi_pa_pmu_disable_counter_int(struct hisi_pmu *pa_pmu,
 					    struct hw_perf_event *hwc)
 {
+	struct hisi_pa_pmu_int_regs *regs = pa_pmu->dev_info->private;
 	u32 val;
 
 	/* Write 1 to mask interrupt */
-	val = readl(pa_pmu->base + PA_INT_MASK);
+	val = readl(pa_pmu->base + regs->mask_offset);
 	val |= 1 << hwc->idx;
-	writel(val, pa_pmu->base + PA_INT_MASK);
+	writel(val, pa_pmu->base + regs->mask_offset);
 }
 
 static u32 hisi_pa_pmu_get_int_status(struct hisi_pmu *pa_pmu)
 {
-	return readl(pa_pmu->base + PA_INT_STATUS);
+	struct hisi_pa_pmu_int_regs *regs = pa_pmu->dev_info->private;
+
+	return readl(pa_pmu->base + regs->status_offset);
 }
 
 static void hisi_pa_pmu_clear_int_status(struct hisi_pmu *pa_pmu, int idx)
 {
-	writel(1 << idx, pa_pmu->base + PA_INT_CLEAR);
-}
+	struct hisi_pa_pmu_int_regs *regs = pa_pmu->dev_info->private;
 
-static const struct acpi_device_id hisi_pa_pmu_acpi_match[] = {
-	{ "HISI0273", },
-	{}
-};
-MODULE_DEVICE_TABLE(acpi, hisi_pa_pmu_acpi_match);
+	writel(1 << idx, pa_pmu->base + regs->clear_offset);
+}
 
 static int hisi_pa_pmu_init_data(struct platform_device *pdev,
 				   struct hisi_pmu *pa_pmu)
@@ -276,6 +288,10 @@ static int hisi_pa_pmu_init_data(struct platform_device *pdev,
 	pa_pmu->ccl_id = -1;
 	pa_pmu->sccl_id = -1;
 
+	pa_pmu->dev_info = device_get_match_data(&pdev->dev);
+	if (!pa_pmu->dev_info)
+		return -ENODEV;
+
 	pa_pmu->base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(pa_pmu->base)) {
 		dev_err(&pdev->dev, "ioremap failed for pa_pmu resource.\n");
@@ -314,6 +330,32 @@ static const struct attribute_group hisi_pa_pmu_v2_events_group = {
 	.attrs = hisi_pa_pmu_v2_events_attr,
 };
 
+static struct attribute *hisi_pa_pmu_v3_events_attr[] = {
+	HISI_PMU_EVENT_ATTR(tx_req,	0x0),
+	HISI_PMU_EVENT_ATTR(tx_dat,	0x1),
+	HISI_PMU_EVENT_ATTR(tx_snp,	0x2),
+	HISI_PMU_EVENT_ATTR(rx_req,	0x7),
+	HISI_PMU_EVENT_ATTR(rx_dat,	0x8),
+	HISI_PMU_EVENT_ATTR(rx_snp,	0x9),
+	NULL
+};
+
+static const struct attribute_group hisi_pa_pmu_v3_events_group = {
+	.name = "events",
+	.attrs = hisi_pa_pmu_v3_events_attr,
+};
+
+static struct attribute *hisi_h60pa_pmu_events_attr[] = {
+	HISI_PMU_EVENT_ATTR(rx_flit,	0x50),
+	HISI_PMU_EVENT_ATTR(tx_flit,	0x65),
+	NULL
+};
+
+static const struct attribute_group hisi_h60pa_pmu_events_group = {
+	.name = "events",
+	.attrs = hisi_h60pa_pmu_events_attr,
+};
+
 static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL);
 
 static struct attribute *hisi_pa_pmu_cpumask_attrs[] = {
@@ -337,6 +379,12 @@ static const struct attribute_group hisi_pa_pmu_identifier_group = {
 	.attrs = hisi_pa_pmu_identifier_attrs,
 };
 
+static struct hisi_pa_pmu_int_regs hisi_pa_pmu_regs = {
+	.mask_offset = PA_INT_MASK,
+	.clear_offset = PA_INT_CLEAR,
+	.status_offset = PA_INT_STATUS,
+};
+
 static const struct attribute_group *hisi_pa_pmu_v2_attr_groups[] = {
 	&hisi_pa_pmu_v2_format_group,
 	&hisi_pa_pmu_v2_events_group,
@@ -345,6 +393,46 @@ static const struct attribute_group *hisi_pa_pmu_v2_attr_groups[] = {
 	NULL
 };
 
+static const struct hisi_pmu_dev_info hisi_h32pa_v2 = {
+	.name = "pa",
+	.attr_groups = hisi_pa_pmu_v2_attr_groups,
+	.private = &hisi_pa_pmu_regs,
+};
+
+static const struct attribute_group *hisi_pa_pmu_v3_attr_groups[] = {
+	&hisi_pa_pmu_v2_format_group,
+	&hisi_pa_pmu_v3_events_group,
+	&hisi_pa_pmu_cpumask_attr_group,
+	&hisi_pa_pmu_identifier_group,
+	NULL
+};
+
+static const struct hisi_pmu_dev_info hisi_h32pa_v3 = {
+	.name = "pa",
+	.attr_groups = hisi_pa_pmu_v3_attr_groups,
+	.private = &hisi_pa_pmu_regs,
+};
+
+static struct hisi_pa_pmu_int_regs hisi_h60pa_pmu_regs = {
+	.mask_offset = H60PA_INT_MASK,
+	.clear_offset = H60PA_INT_STATUS, /* Clear on write */
+	.status_offset = H60PA_INT_STATUS,
+};
+
+static const struct attribute_group *hisi_h60pa_pmu_attr_groups[] = {
+	&hisi_pa_pmu_v2_format_group,
+	&hisi_h60pa_pmu_events_group,
+	&hisi_pa_pmu_cpumask_attr_group,
+	&hisi_pa_pmu_identifier_group,
+	NULL
+};
+
+static const struct hisi_pmu_dev_info hisi_h60pa = {
+	.name = "h60pa",
+	.attr_groups = hisi_h60pa_pmu_attr_groups,
+	.private = &hisi_h60pa_pmu_regs,
+};
+
 static const struct hisi_uncore_ops hisi_uncore_pa_ops = {
 	.write_evtype		= hisi_pa_pmu_write_evtype,
 	.get_event_idx		= hisi_uncore_pmu_get_event_idx,
@@ -375,7 +463,7 @@ static int hisi_pa_pmu_dev_probe(struct platform_device *pdev,
 	if (ret)
 		return ret;
 
-	pa_pmu->pmu_events.attr_groups = hisi_pa_pmu_v2_attr_groups;
+	pa_pmu->pmu_events.attr_groups = pa_pmu->dev_info->attr_groups;
 	pa_pmu->num_counters = PA_NR_COUNTERS;
 	pa_pmu->ops = &hisi_uncore_pa_ops;
 	pa_pmu->check_event = 0xB0;
@@ -400,8 +488,9 @@ static int hisi_pa_pmu_probe(struct platform_device *pdev)
 	if (ret)
 		return ret;
 
-	name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sicl%u_pa%u",
-			      pa_pmu->sicl_id, pa_pmu->index_id);
+	name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sicl%d_%s%u",
+			      pa_pmu->sicl_id, pa_pmu->dev_info->name,
+			      pa_pmu->index_id);
 	if (!name)
 		return -ENOMEM;
 
@@ -435,6 +524,14 @@ static int hisi_pa_pmu_remove(struct platform_device *pdev)
 	return 0;
 }
 
+static const struct acpi_device_id hisi_pa_pmu_acpi_match[] = {
+	{ "HISI0273", (kernel_ulong_t)&hisi_h32pa_v2 },
+	{ "HISI0275", (kernel_ulong_t)&hisi_h32pa_v3 },
+	{ "HISI0274", (kernel_ulong_t)&hisi_h60pa },
+	{}
+};
+MODULE_DEVICE_TABLE(acpi, hisi_pa_pmu_acpi_match);
+
 static struct platform_driver hisi_pa_pmu_driver = {
 	.driver = {
 		.name = "hisi_pa_pmu",
diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c
index 2823f38..0403145 100644
--- a/drivers/perf/hisilicon/hisi_uncore_pmu.c
+++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c
@@ -20,7 +20,6 @@
 
 #include "hisi_uncore_pmu.h"
 
-#define HISI_GET_EVENTID(ev) (ev->hw.config_base & 0xff)
 #define HISI_MAX_PERIOD(nr) (GENMASK_ULL((nr) - 1, 0))
 
 /*
@@ -226,6 +225,9 @@ int hisi_uncore_pmu_event_init(struct perf_event *event)
 	hwc->idx		= -1;
 	hwc->config_base	= event->attr.config;
 
+	if (hisi_pmu->ops->check_filter && hisi_pmu->ops->check_filter(event))
+		return -EINVAL;
+
 	/* Enforce to use the same CPU for all events in this PMU */
 	event->cpu = hisi_pmu->on_cpu;
 
diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.h b/drivers/perf/hisilicon/hisi_uncore_pmu.h
index 07890a8..92402aa 100644
--- a/drivers/perf/hisilicon/hisi_uncore_pmu.h
+++ b/drivers/perf/hisilicon/hisi_uncore_pmu.h
@@ -43,9 +43,15 @@
 		return FIELD_GET(GENMASK_ULL(hi, lo), event->attr.config);  \
 	}
 
+#define HISI_GET_EVENTID(ev) (ev->hw.config_base & 0xff)
+
+#define HISI_PMU_EVTYPE_BITS		8
+#define HISI_PMU_EVTYPE_SHIFT(idx)	((idx) % 4 * HISI_PMU_EVTYPE_BITS)
+
 struct hisi_pmu;
 
 struct hisi_uncore_ops {
+	int (*check_filter)(struct perf_event *event);
 	void (*write_evtype)(struct hisi_pmu *, int, u32);
 	int (*get_event_idx)(struct perf_event *);
 	u64 (*read_counter)(struct hisi_pmu *, struct hw_perf_event *);
@@ -62,6 +68,13 @@ struct hisi_uncore_ops {
 	void (*disable_filter)(struct perf_event *event);
 };
 
+/* Describes the HISI PMU chip features information */
+struct hisi_pmu_dev_info {
+	const char *name;
+	const struct attribute_group **attr_groups;
+	void *private;
+};
+
 struct hisi_pmu_hwevents {
 	struct perf_event *hw_events[HISI_MAX_COUNTERS];
 	DECLARE_BITMAP(used_mask, HISI_MAX_COUNTERS);
@@ -72,6 +85,7 @@ struct hisi_pmu_hwevents {
 struct hisi_pmu {
 	struct pmu pmu;
 	const struct hisi_uncore_ops *ops;
+	const struct hisi_pmu_dev_info *dev_info;
 	struct hisi_pmu_hwevents pmu_events;
 	/* associated_cpus: All CPUs associated with the PMU */
 	cpumask_t associated_cpus;
diff --git a/drivers/perf/hisilicon/hisi_uncore_uc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_uc_pmu.c
new file mode 100644
index 0000000..63da05e
--- /dev/null
+++ b/drivers/perf/hisilicon/hisi_uncore_uc_pmu.c
@@ -0,0 +1,578 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * HiSilicon SoC UC (unified cache) uncore Hardware event counters support
+ *
+ * Copyright (C) 2023 HiSilicon Limited
+ *
+ * This code is based on the uncore PMUs like hisi_uncore_l3c_pmu.
+ */
+#include <linux/cpuhotplug.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/list.h>
+#include <linux/mod_devicetable.h>
+#include <linux/property.h>
+
+#include "hisi_uncore_pmu.h"
+
+/* Dynamic CPU hotplug state used by UC PMU */
+static enum cpuhp_state hisi_uc_pmu_online;
+
+/* UC register definition */
+#define HISI_UC_INT_MASK_REG		0x0800
+#define HISI_UC_INT_STS_REG		0x0808
+#define HISI_UC_INT_CLEAR_REG		0x080c
+#define HISI_UC_TRACETAG_CTRL_REG	0x1b2c
+#define HISI_UC_TRACETAG_REQ_MSK	GENMASK(9, 7)
+#define HISI_UC_TRACETAG_MARK_EN	BIT(0)
+#define HISI_UC_TRACETAG_REQ_EN		(HISI_UC_TRACETAG_MARK_EN | BIT(2))
+#define HISI_UC_TRACETAG_SRCID_EN	BIT(3)
+#define HISI_UC_SRCID_CTRL_REG		0x1b40
+#define HISI_UC_SRCID_MSK		GENMASK(14, 1)
+#define HISI_UC_EVENT_CTRL_REG		0x1c00
+#define HISI_UC_EVENT_TRACETAG_EN	BIT(29)
+#define HISI_UC_EVENT_URING_MSK		GENMASK(28, 27)
+#define HISI_UC_EVENT_GLB_EN		BIT(26)
+#define HISI_UC_VERSION_REG		0x1cf0
+#define HISI_UC_EVTYPE_REGn(n)		(0x1d00 + (n) * 4)
+#define HISI_UC_EVTYPE_MASK		GENMASK(7, 0)
+#define HISI_UC_CNTR_REGn(n)		(0x1e00 + (n) * 8)
+
+#define HISI_UC_NR_COUNTERS		0x8
+#define HISI_UC_V2_NR_EVENTS		0xFF
+#define HISI_UC_CNTR_REG_BITS		64
+
+#define HISI_UC_RD_REQ_TRACETAG		0x4
+#define HISI_UC_URING_EVENT_MIN		0x47
+#define HISI_UC_URING_EVENT_MAX		0x59
+
+HISI_PMU_EVENT_ATTR_EXTRACTOR(rd_req_en, config1, 0, 0);
+HISI_PMU_EVENT_ATTR_EXTRACTOR(uring_channel, config1, 5, 4);
+HISI_PMU_EVENT_ATTR_EXTRACTOR(srcid, config1, 19, 6);
+HISI_PMU_EVENT_ATTR_EXTRACTOR(srcid_en, config1, 20, 20);
+
+static int hisi_uc_pmu_check_filter(struct perf_event *event)
+{
+	struct hisi_pmu *uc_pmu = to_hisi_pmu(event->pmu);
+
+	if (hisi_get_srcid_en(event) && !hisi_get_rd_req_en(event)) {
+		dev_err(uc_pmu->dev,
+			"rcid_en depends on rd_req_en being enabled!\n");
+		return -EINVAL;
+	}
+
+	if (!hisi_get_uring_channel(event))
+		return 0;
+
+	if ((HISI_GET_EVENTID(event) < HISI_UC_URING_EVENT_MIN) ||
+	    (HISI_GET_EVENTID(event) > HISI_UC_URING_EVENT_MAX))
+		dev_warn(uc_pmu->dev,
+			 "Only events: [%#x ~ %#x] support channel filtering!",
+			 HISI_UC_URING_EVENT_MIN, HISI_UC_URING_EVENT_MAX);
+
+	return 0;
+}
+
+static void hisi_uc_pmu_config_req_tracetag(struct perf_event *event)
+{
+	struct hisi_pmu *uc_pmu = to_hisi_pmu(event->pmu);
+	u32 val;
+
+	if (!hisi_get_rd_req_en(event))
+		return;
+
+	val = readl(uc_pmu->base + HISI_UC_TRACETAG_CTRL_REG);
+
+	/* The request-type has been configured */
+	if (FIELD_GET(HISI_UC_TRACETAG_REQ_MSK, val) == HISI_UC_RD_REQ_TRACETAG)
+		return;
+
+	/* Set request-type for tracetag, only read request is supported! */
+	val &= ~HISI_UC_TRACETAG_REQ_MSK;
+	val |= FIELD_PREP(HISI_UC_TRACETAG_REQ_MSK, HISI_UC_RD_REQ_TRACETAG);
+	val |= HISI_UC_TRACETAG_REQ_EN;
+	writel(val, uc_pmu->base + HISI_UC_TRACETAG_CTRL_REG);
+}
+
+static void hisi_uc_pmu_clear_req_tracetag(struct perf_event *event)
+{
+	struct hisi_pmu *uc_pmu = to_hisi_pmu(event->pmu);
+	u32 val;
+
+	if (!hisi_get_rd_req_en(event))
+		return;
+
+	val = readl(uc_pmu->base + HISI_UC_TRACETAG_CTRL_REG);
+
+	/* Do nothing, the request-type tracetag has been cleaned up */
+	if (FIELD_GET(HISI_UC_TRACETAG_REQ_MSK, val) == 0)
+		return;
+
+	/* Clear request-type */
+	val &= ~HISI_UC_TRACETAG_REQ_MSK;
+	val &= ~HISI_UC_TRACETAG_REQ_EN;
+	writel(val, uc_pmu->base + HISI_UC_TRACETAG_CTRL_REG);
+}
+
+static void hisi_uc_pmu_config_srcid_tracetag(struct perf_event *event)
+{
+	struct hisi_pmu *uc_pmu = to_hisi_pmu(event->pmu);
+	u32 val;
+
+	if (!hisi_get_srcid_en(event))
+		return;
+
+	val = readl(uc_pmu->base + HISI_UC_TRACETAG_CTRL_REG);
+
+	/* Do nothing, the source id has been configured */
+	if (FIELD_GET(HISI_UC_TRACETAG_SRCID_EN, val))
+		return;
+
+	/* Enable source id tracetag */
+	val |= HISI_UC_TRACETAG_SRCID_EN;
+	writel(val, uc_pmu->base + HISI_UC_TRACETAG_CTRL_REG);
+
+	val = readl(uc_pmu->base + HISI_UC_SRCID_CTRL_REG);
+	val &= ~HISI_UC_SRCID_MSK;
+	val |= FIELD_PREP(HISI_UC_SRCID_MSK, hisi_get_srcid(event));
+	writel(val, uc_pmu->base + HISI_UC_SRCID_CTRL_REG);
+
+	/* Depend on request-type tracetag enabled */
+	hisi_uc_pmu_config_req_tracetag(event);
+}
+
+static void hisi_uc_pmu_clear_srcid_tracetag(struct perf_event *event)
+{
+	struct hisi_pmu *uc_pmu = to_hisi_pmu(event->pmu);
+	u32 val;
+
+	if (!hisi_get_srcid_en(event))
+		return;
+
+	val = readl(uc_pmu->base + HISI_UC_TRACETAG_CTRL_REG);
+
+	/* Do nothing, the source id has been cleaned up */
+	if (FIELD_GET(HISI_UC_TRACETAG_SRCID_EN, val) == 0)
+		return;
+
+	hisi_uc_pmu_clear_req_tracetag(event);
+
+	/* Disable source id tracetag */
+	val &= ~HISI_UC_TRACETAG_SRCID_EN;
+	writel(val, uc_pmu->base + HISI_UC_TRACETAG_CTRL_REG);
+
+	val = readl(uc_pmu->base + HISI_UC_SRCID_CTRL_REG);
+	val &= ~HISI_UC_SRCID_MSK;
+	writel(val, uc_pmu->base + HISI_UC_SRCID_CTRL_REG);
+}
+
+static void hisi_uc_pmu_config_uring_channel(struct perf_event *event)
+{
+	struct hisi_pmu *uc_pmu = to_hisi_pmu(event->pmu);
+	u32 uring_channel = hisi_get_uring_channel(event);
+	u32 val;
+
+	/* Do nothing if not being set or is set explicitly to zero (default) */
+	if (uring_channel == 0)
+		return;
+
+	val = readl(uc_pmu->base + HISI_UC_EVENT_CTRL_REG);
+
+	/* Do nothing, the uring_channel has been configured */
+	if (uring_channel == FIELD_GET(HISI_UC_EVENT_URING_MSK, val))
+		return;
+
+	val &= ~HISI_UC_EVENT_URING_MSK;
+	val |= FIELD_PREP(HISI_UC_EVENT_URING_MSK, uring_channel);
+	writel(val, uc_pmu->base + HISI_UC_EVENT_CTRL_REG);
+}
+
+static void hisi_uc_pmu_clear_uring_channel(struct perf_event *event)
+{
+	struct hisi_pmu *uc_pmu = to_hisi_pmu(event->pmu);
+	u32 val;
+
+	/* Do nothing if not being set or is set explicitly to zero (default) */
+	if (hisi_get_uring_channel(event) == 0)
+		return;
+
+	val = readl(uc_pmu->base + HISI_UC_EVENT_CTRL_REG);
+
+	/* Do nothing, the uring_channel has been cleaned up */
+	if (FIELD_GET(HISI_UC_EVENT_URING_MSK, val) == 0)
+		return;
+
+	val &= ~HISI_UC_EVENT_URING_MSK;
+	writel(val, uc_pmu->base + HISI_UC_EVENT_CTRL_REG);
+}
+
+static void hisi_uc_pmu_enable_filter(struct perf_event *event)
+{
+	if (event->attr.config1 == 0)
+		return;
+
+	hisi_uc_pmu_config_uring_channel(event);
+	hisi_uc_pmu_config_req_tracetag(event);
+	hisi_uc_pmu_config_srcid_tracetag(event);
+}
+
+static void hisi_uc_pmu_disable_filter(struct perf_event *event)
+{
+	if (event->attr.config1 == 0)
+		return;
+
+	hisi_uc_pmu_clear_srcid_tracetag(event);
+	hisi_uc_pmu_clear_req_tracetag(event);
+	hisi_uc_pmu_clear_uring_channel(event);
+}
+
+static void hisi_uc_pmu_write_evtype(struct hisi_pmu *uc_pmu, int idx, u32 type)
+{
+	u32 val;
+
+	/*
+	 * Select the appropriate event select register.
+	 * There are 2 32-bit event select registers for the
+	 * 8 hardware counters, each event code is 8-bit wide.
+	 */
+	val = readl(uc_pmu->base + HISI_UC_EVTYPE_REGn(idx / 4));
+	val &= ~(HISI_UC_EVTYPE_MASK << HISI_PMU_EVTYPE_SHIFT(idx));
+	val |= (type << HISI_PMU_EVTYPE_SHIFT(idx));
+	writel(val, uc_pmu->base + HISI_UC_EVTYPE_REGn(idx / 4));
+}
+
+static void hisi_uc_pmu_start_counters(struct hisi_pmu *uc_pmu)
+{
+	u32 val;
+
+	val = readl(uc_pmu->base + HISI_UC_EVENT_CTRL_REG);
+	val |= HISI_UC_EVENT_GLB_EN;
+	writel(val, uc_pmu->base + HISI_UC_EVENT_CTRL_REG);
+}
+
+static void hisi_uc_pmu_stop_counters(struct hisi_pmu *uc_pmu)
+{
+	u32 val;
+
+	val = readl(uc_pmu->base + HISI_UC_EVENT_CTRL_REG);
+	val &= ~HISI_UC_EVENT_GLB_EN;
+	writel(val, uc_pmu->base + HISI_UC_EVENT_CTRL_REG);
+}
+
+static void hisi_uc_pmu_enable_counter(struct hisi_pmu *uc_pmu,
+					struct hw_perf_event *hwc)
+{
+	u32 val;
+
+	/* Enable counter index */
+	val = readl(uc_pmu->base + HISI_UC_EVENT_CTRL_REG);
+	val |= (1 << hwc->idx);
+	writel(val, uc_pmu->base + HISI_UC_EVENT_CTRL_REG);
+}
+
+static void hisi_uc_pmu_disable_counter(struct hisi_pmu *uc_pmu,
+					struct hw_perf_event *hwc)
+{
+	u32 val;
+
+	/* Clear counter index */
+	val = readl(uc_pmu->base + HISI_UC_EVENT_CTRL_REG);
+	val &= ~(1 << hwc->idx);
+	writel(val, uc_pmu->base + HISI_UC_EVENT_CTRL_REG);
+}
+
+static u64 hisi_uc_pmu_read_counter(struct hisi_pmu *uc_pmu,
+				    struct hw_perf_event *hwc)
+{
+	return readq(uc_pmu->base + HISI_UC_CNTR_REGn(hwc->idx));
+}
+
+static void hisi_uc_pmu_write_counter(struct hisi_pmu *uc_pmu,
+				      struct hw_perf_event *hwc, u64 val)
+{
+	writeq(val, uc_pmu->base + HISI_UC_CNTR_REGn(hwc->idx));
+}
+
+static void hisi_uc_pmu_enable_counter_int(struct hisi_pmu *uc_pmu,
+					   struct hw_perf_event *hwc)
+{
+	u32 val;
+
+	val = readl(uc_pmu->base + HISI_UC_INT_MASK_REG);
+	val &= ~(1 << hwc->idx);
+	writel(val, uc_pmu->base + HISI_UC_INT_MASK_REG);
+}
+
+static void hisi_uc_pmu_disable_counter_int(struct hisi_pmu *uc_pmu,
+					    struct hw_perf_event *hwc)
+{
+	u32 val;
+
+	val = readl(uc_pmu->base + HISI_UC_INT_MASK_REG);
+	val |= (1 << hwc->idx);
+	writel(val, uc_pmu->base + HISI_UC_INT_MASK_REG);
+}
+
+static u32 hisi_uc_pmu_get_int_status(struct hisi_pmu *uc_pmu)
+{
+	return readl(uc_pmu->base + HISI_UC_INT_STS_REG);
+}
+
+static void hisi_uc_pmu_clear_int_status(struct hisi_pmu *uc_pmu, int idx)
+{
+	writel(1 << idx, uc_pmu->base + HISI_UC_INT_CLEAR_REG);
+}
+
+static int hisi_uc_pmu_init_data(struct platform_device *pdev,
+				 struct hisi_pmu *uc_pmu)
+{
+	/*
+	 * Use SCCL (Super CPU Cluster) ID and CCL (CPU Cluster) ID to
+	 * identify the topology information of UC PMU devices in the chip.
+	 * They have some CCLs per SCCL and then 4 UC PMU per CCL.
+	 */
+	if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id",
+				     &uc_pmu->sccl_id)) {
+		dev_err(&pdev->dev, "Can not read uc sccl-id!\n");
+		return -EINVAL;
+	}
+
+	if (device_property_read_u32(&pdev->dev, "hisilicon,ccl-id",
+				     &uc_pmu->ccl_id)) {
+		dev_err(&pdev->dev, "Can not read uc ccl-id!\n");
+		return -EINVAL;
+	}
+
+	if (device_property_read_u32(&pdev->dev, "hisilicon,sub-id",
+				     &uc_pmu->sub_id)) {
+		dev_err(&pdev->dev, "Can not read sub-id!\n");
+		return -EINVAL;
+	}
+
+	uc_pmu->base = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(uc_pmu->base)) {
+		dev_err(&pdev->dev, "ioremap failed for uc_pmu resource\n");
+		return PTR_ERR(uc_pmu->base);
+	}
+
+	uc_pmu->identifier = readl(uc_pmu->base + HISI_UC_VERSION_REG);
+
+	return 0;
+}
+
+static struct attribute *hisi_uc_pmu_format_attr[] = {
+	HISI_PMU_FORMAT_ATTR(event, "config:0-7"),
+	HISI_PMU_FORMAT_ATTR(rd_req_en, "config1:0-0"),
+	HISI_PMU_FORMAT_ATTR(uring_channel, "config1:4-5"),
+	HISI_PMU_FORMAT_ATTR(srcid, "config1:6-19"),
+	HISI_PMU_FORMAT_ATTR(srcid_en, "config1:20-20"),
+	NULL
+};
+
+static const struct attribute_group hisi_uc_pmu_format_group = {
+	.name = "format",
+	.attrs = hisi_uc_pmu_format_attr,
+};
+
+static struct attribute *hisi_uc_pmu_events_attr[] = {
+	HISI_PMU_EVENT_ATTR(sq_time,		0x00),
+	HISI_PMU_EVENT_ATTR(pq_time,		0x01),
+	HISI_PMU_EVENT_ATTR(hbm_time,		0x02),
+	HISI_PMU_EVENT_ATTR(iq_comp_time_cring,	0x03),
+	HISI_PMU_EVENT_ATTR(iq_comp_time_uring,	0x05),
+	HISI_PMU_EVENT_ATTR(cpu_rd,		0x10),
+	HISI_PMU_EVENT_ATTR(cpu_rd64,		0x17),
+	HISI_PMU_EVENT_ATTR(cpu_rs64,		0x19),
+	HISI_PMU_EVENT_ATTR(cpu_mru,		0x1a),
+	HISI_PMU_EVENT_ATTR(cycles,		0x9c),
+	HISI_PMU_EVENT_ATTR(spipe_hit,		0xb3),
+	HISI_PMU_EVENT_ATTR(hpipe_hit,		0xdb),
+	HISI_PMU_EVENT_ATTR(cring_rxdat_cnt,	0xfa),
+	HISI_PMU_EVENT_ATTR(cring_txdat_cnt,	0xfb),
+	HISI_PMU_EVENT_ATTR(uring_rxdat_cnt,	0xfc),
+	HISI_PMU_EVENT_ATTR(uring_txdat_cnt,	0xfd),
+	NULL
+};
+
+static const struct attribute_group hisi_uc_pmu_events_group = {
+	.name = "events",
+	.attrs = hisi_uc_pmu_events_attr,
+};
+
+static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL);
+
+static struct attribute *hisi_uc_pmu_cpumask_attrs[] = {
+	&dev_attr_cpumask.attr,
+	NULL,
+};
+
+static const struct attribute_group hisi_uc_pmu_cpumask_attr_group = {
+	.attrs = hisi_uc_pmu_cpumask_attrs,
+};
+
+static struct device_attribute hisi_uc_pmu_identifier_attr =
+	__ATTR(identifier, 0444, hisi_uncore_pmu_identifier_attr_show, NULL);
+
+static struct attribute *hisi_uc_pmu_identifier_attrs[] = {
+	&hisi_uc_pmu_identifier_attr.attr,
+	NULL
+};
+
+static const struct attribute_group hisi_uc_pmu_identifier_group = {
+	.attrs = hisi_uc_pmu_identifier_attrs,
+};
+
+static const struct attribute_group *hisi_uc_pmu_attr_groups[] = {
+	&hisi_uc_pmu_format_group,
+	&hisi_uc_pmu_events_group,
+	&hisi_uc_pmu_cpumask_attr_group,
+	&hisi_uc_pmu_identifier_group,
+	NULL
+};
+
+static const struct hisi_uncore_ops hisi_uncore_uc_pmu_ops = {
+	.check_filter		= hisi_uc_pmu_check_filter,
+	.write_evtype		= hisi_uc_pmu_write_evtype,
+	.get_event_idx		= hisi_uncore_pmu_get_event_idx,
+	.start_counters		= hisi_uc_pmu_start_counters,
+	.stop_counters		= hisi_uc_pmu_stop_counters,
+	.enable_counter		= hisi_uc_pmu_enable_counter,
+	.disable_counter	= hisi_uc_pmu_disable_counter,
+	.enable_counter_int	= hisi_uc_pmu_enable_counter_int,
+	.disable_counter_int	= hisi_uc_pmu_disable_counter_int,
+	.write_counter		= hisi_uc_pmu_write_counter,
+	.read_counter		= hisi_uc_pmu_read_counter,
+	.get_int_status		= hisi_uc_pmu_get_int_status,
+	.clear_int_status	= hisi_uc_pmu_clear_int_status,
+	.enable_filter		= hisi_uc_pmu_enable_filter,
+	.disable_filter		= hisi_uc_pmu_disable_filter,
+};
+
+static int hisi_uc_pmu_dev_probe(struct platform_device *pdev,
+				 struct hisi_pmu *uc_pmu)
+{
+	int ret;
+
+	ret = hisi_uc_pmu_init_data(pdev, uc_pmu);
+	if (ret)
+		return ret;
+
+	ret = hisi_uncore_pmu_init_irq(uc_pmu, pdev);
+	if (ret)
+		return ret;
+
+	uc_pmu->pmu_events.attr_groups = hisi_uc_pmu_attr_groups;
+	uc_pmu->check_event = HISI_UC_EVTYPE_MASK;
+	uc_pmu->ops = &hisi_uncore_uc_pmu_ops;
+	uc_pmu->counter_bits = HISI_UC_CNTR_REG_BITS;
+	uc_pmu->num_counters = HISI_UC_NR_COUNTERS;
+	uc_pmu->dev = &pdev->dev;
+	uc_pmu->on_cpu = -1;
+
+	return 0;
+}
+
+static void hisi_uc_pmu_remove_cpuhp_instance(void *hotplug_node)
+{
+	cpuhp_state_remove_instance_nocalls(hisi_uc_pmu_online, hotplug_node);
+}
+
+static void hisi_uc_pmu_unregister_pmu(void *pmu)
+{
+	perf_pmu_unregister(pmu);
+}
+
+static int hisi_uc_pmu_probe(struct platform_device *pdev)
+{
+	struct hisi_pmu *uc_pmu;
+	char *name;
+	int ret;
+
+	uc_pmu = devm_kzalloc(&pdev->dev, sizeof(*uc_pmu), GFP_KERNEL);
+	if (!uc_pmu)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, uc_pmu);
+
+	ret = hisi_uc_pmu_dev_probe(pdev, uc_pmu);
+	if (ret)
+		return ret;
+
+	name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%d_uc%d_%u",
+			      uc_pmu->sccl_id, uc_pmu->ccl_id, uc_pmu->sub_id);
+	if (!name)
+		return -ENOMEM;
+
+	ret = cpuhp_state_add_instance(hisi_uc_pmu_online, &uc_pmu->node);
+	if (ret)
+		return dev_err_probe(&pdev->dev, ret, "Error registering hotplug\n");
+
+	ret = devm_add_action_or_reset(&pdev->dev,
+				       hisi_uc_pmu_remove_cpuhp_instance,
+				       &uc_pmu->node);
+	if (ret)
+		return ret;
+
+	hisi_pmu_init(uc_pmu, THIS_MODULE);
+
+	ret = perf_pmu_register(&uc_pmu->pmu, name, -1);
+	if (ret)
+		return ret;
+
+	return devm_add_action_or_reset(&pdev->dev,
+					hisi_uc_pmu_unregister_pmu,
+					&uc_pmu->pmu);
+}
+
+static const struct acpi_device_id hisi_uc_pmu_acpi_match[] = {
+	{ "HISI0291", },
+	{}
+};
+MODULE_DEVICE_TABLE(acpi, hisi_uc_pmu_acpi_match);
+
+static struct platform_driver hisi_uc_pmu_driver = {
+	.driver = {
+		.name = "hisi_uc_pmu",
+		.acpi_match_table = hisi_uc_pmu_acpi_match,
+		/*
+		 * We have not worked out a safe bind/unbind process,
+		 * Forcefully unbinding during sampling will lead to a
+		 * kernel panic, so this is not supported yet.
+		 */
+		.suppress_bind_attrs = true,
+	},
+	.probe = hisi_uc_pmu_probe,
+};
+
+static int __init hisi_uc_pmu_module_init(void)
+{
+	int ret;
+
+	ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
+				      "perf/hisi/uc:online",
+				      hisi_uncore_pmu_online_cpu,
+				      hisi_uncore_pmu_offline_cpu);
+	if (ret < 0) {
+		pr_err("UC PMU: Error setup hotplug, ret = %d\n", ret);
+		return ret;
+	}
+	hisi_uc_pmu_online = ret;
+
+	ret = platform_driver_register(&hisi_uc_pmu_driver);
+	if (ret)
+		cpuhp_remove_multi_state(hisi_uc_pmu_online);
+
+	return ret;
+}
+module_init(hisi_uc_pmu_module_init);
+
+static void __exit hisi_uc_pmu_module_exit(void)
+{
+	platform_driver_unregister(&hisi_uc_pmu_driver);
+	cpuhp_remove_multi_state(hisi_uc_pmu_online);
+}
+module_exit(hisi_uc_pmu_module_exit);
+
+MODULE_DESCRIPTION("HiSilicon SoC UC uncore PMU driver");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Junhao He <hejunhao3@huawei.com>");
diff --git a/drivers/perf/qcom_l2_pmu.c b/drivers/perf/qcom_l2_pmu.c
index aaca6db..3f9a98c 100644
--- a/drivers/perf/qcom_l2_pmu.c
+++ b/drivers/perf/qcom_l2_pmu.c
@@ -857,7 +857,6 @@ static int l2_cache_pmu_probe_cluster(struct device *dev, void *data)
 		return -ENOMEM;
 
 	INIT_LIST_HEAD(&cluster->next);
-	list_add(&cluster->next, &l2cache_pmu->clusters);
 	cluster->cluster_id = fw_cluster_id;
 
 	irq = platform_get_irq(sdev, 0);
@@ -883,6 +882,7 @@ static int l2_cache_pmu_probe_cluster(struct device *dev, void *data)
 
 	spin_lock_init(&cluster->pmu_lock);
 
+	list_add(&cluster->next, &l2cache_pmu->clusters);
 	l2cache_pmu->num_pmus++;
 
 	return 0;
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 7b71dd7..5ef126a 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -1507,6 +1507,12 @@ static inline int find_acpi_cpu_topology_hetero_id(unsigned int cpu)
 }
 #endif
 
+#ifdef CONFIG_ARM64
+void acpi_arm_init(void);
+#else
+static inline void acpi_arm_init(void) { }
+#endif
+
 #ifdef CONFIG_ACPI_PCC
 void acpi_init_pcc(void);
 #else
diff --git a/include/linux/acpi_agdi.h b/include/linux/acpi_agdi.h
deleted file mode 100644
index f477f0b..0000000
--- a/include/linux/acpi_agdi.h
+++ /dev/null
@@ -1,13 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-
-#ifndef __ACPI_AGDI_H__
-#define __ACPI_AGDI_H__
-
-#include <linux/acpi.h>
-
-#ifdef CONFIG_ACPI_AGDI
-void __init acpi_agdi_init(void);
-#else
-static inline void acpi_agdi_init(void) {}
-#endif
-#endif /* __ACPI_AGDI_H__ */
diff --git a/include/linux/acpi_apmt.h b/include/linux/acpi_apmt.h
deleted file mode 100644
index 40bd634..0000000
--- a/include/linux/acpi_apmt.h
+++ /dev/null
@@ -1,19 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0
- *
- * ARM CoreSight PMU driver.
- * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES.
- *
- */
-
-#ifndef __ACPI_APMT_H__
-#define __ACPI_APMT_H__
-
-#include <linux/acpi.h>
-
-#ifdef CONFIG_ACPI_APMT
-void acpi_apmt_init(void);
-#else
-static inline void acpi_apmt_init(void) { }
-#endif /* CONFIG_ACPI_APMT */
-
-#endif /* __ACPI_APMT_H__ */
diff --git a/include/linux/acpi_iort.h b/include/linux/acpi_iort.h
index b43be09..e4e7bb6 100644
--- a/include/linux/acpi_iort.h
+++ b/include/linux/acpi_iort.h
@@ -27,7 +27,6 @@ int iort_register_domain_token(int trans_id, phys_addr_t base,
 void iort_deregister_domain_token(int trans_id);
 struct fwnode_handle *iort_find_domain_token(int trans_id);
 #ifdef CONFIG_ACPI_IORT
-void acpi_iort_init(void);
 u32 iort_msi_map_id(struct device *dev, u32 id);
 struct irq_domain *iort_get_device_domain(struct device *dev, u32 id,
 					  enum irq_domain_bus_token bus_token);
@@ -43,7 +42,6 @@ int iort_iommu_configure_id(struct device *dev, const u32 *id_in);
 void iort_iommu_get_resv_regions(struct device *dev, struct list_head *head);
 phys_addr_t acpi_iort_dma_get_max_cpu_address(void);
 #else
-static inline void acpi_iort_init(void) { }
 static inline u32 iort_msi_map_id(struct device *dev, u32 id)
 { return id; }
 static inline struct irq_domain *iort_get_device_domain(
diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h
index 525b5d6..c0e4baf 100644
--- a/include/linux/perf/arm_pmu.h
+++ b/include/linux/perf/arm_pmu.h
@@ -26,9 +26,11 @@
  */
 #define ARMPMU_EVT_64BIT		0x00001 /* Event uses a 64bit counter */
 #define ARMPMU_EVT_47BIT		0x00002 /* Event uses a 47bit counter */
+#define ARMPMU_EVT_63BIT		0x00004 /* Event uses a 63bit counter */
 
 static_assert((PERF_EVENT_FLAG_ARCH & ARMPMU_EVT_64BIT) == ARMPMU_EVT_64BIT);
 static_assert((PERF_EVENT_FLAG_ARCH & ARMPMU_EVT_47BIT) == ARMPMU_EVT_47BIT);
+static_assert((PERF_EVENT_FLAG_ARCH & ARMPMU_EVT_63BIT) == ARMPMU_EVT_63BIT);
 
 #define HW_OP_UNSUPPORTED		0xFFFF
 #define C(_x)				PERF_COUNT_HW_CACHE_##_x
diff --git a/tools/testing/selftests/arm64/abi/hwcap.c b/tools/testing/selftests/arm64/abi/hwcap.c
index 93333a9..d4ad813 100644
--- a/tools/testing/selftests/arm64/abi/hwcap.c
+++ b/tools/testing/selftests/arm64/abi/hwcap.c
@@ -39,6 +39,20 @@ static void cssc_sigill(void)
 	asm volatile(".inst 0xdac01c00" : : : "x0");
 }
 
+static void mops_sigill(void)
+{
+	char dst[1], src[1];
+	register char *dstp asm ("x0") = dst;
+	register char *srcp asm ("x1") = src;
+	register long size asm ("x2") = 1;
+
+	/* CPYP [x0]!, [x1]!, x2! */
+	asm volatile(".inst 0x1d010440"
+		     : "+r" (dstp), "+r" (srcp), "+r" (size)
+		     :
+		     : "cc", "memory");
+}
+
 static void rng_sigill(void)
 {
 	asm volatile("mrs x0, S3_3_C2_C4_0" : : : "x0");
@@ -210,6 +224,14 @@ static const struct hwcap_data {
 		.sigill_fn = cssc_sigill,
 	},
 	{
+		.name = "MOPS",
+		.at_hwcap = AT_HWCAP2,
+		.hwcap_bit = HWCAP2_MOPS,
+		.cpuinfo = "mops",
+		.sigill_fn = mops_sigill,
+		.sigill_reliable = true,
+	},
+	{
 		.name = "RNG",
 		.at_hwcap = AT_HWCAP2,
 		.hwcap_bit = HWCAP2_RNG,
diff --git a/tools/testing/selftests/arm64/signal/.gitignore b/tools/testing/selftests/arm64/signal/.gitignore
index 8ab4c86..839e3a2 100644
--- a/tools/testing/selftests/arm64/signal/.gitignore
+++ b/tools/testing/selftests/arm64/signal/.gitignore
@@ -4,7 +4,7 @@
 sme_*
 ssve_*
 sve_*
-tpidr2_siginfo
+tpidr2_*
 za_*
 zt_*
 !*.[ch]
diff --git a/tools/testing/selftests/arm64/signal/testcases/tpidr2_restore.c b/tools/testing/selftests/arm64/signal/testcases/tpidr2_restore.c
new file mode 100644
index 0000000..f9a86c0
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/tpidr2_restore.c
@@ -0,0 +1,86 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2023 ARM Limited
+ *
+ * Verify that the TPIDR2 register context in signal frames is restored.
+ */
+
+#include <signal.h>
+#include <ucontext.h>
+#include <sys/auxv.h>
+#include <sys/prctl.h>
+#include <unistd.h>
+#include <asm/sigcontext.h>
+
+#include "test_signals_utils.h"
+#include "testcases.h"
+
+#define SYS_TPIDR2 "S3_3_C13_C0_5"
+
+static uint64_t get_tpidr2(void)
+{
+	uint64_t val;
+
+	asm volatile (
+		"mrs	%0, " SYS_TPIDR2 "\n"
+		: "=r"(val)
+		:
+		: "cc");
+
+	return val;
+}
+
+static void set_tpidr2(uint64_t val)
+{
+	asm volatile (
+		"msr	" SYS_TPIDR2 ", %0\n"
+		:
+		: "r"(val)
+		: "cc");
+}
+
+
+static uint64_t initial_tpidr2;
+
+static bool save_tpidr2(struct tdescr *td)
+{
+	initial_tpidr2 = get_tpidr2();
+	fprintf(stderr, "Initial TPIDR2: %lx\n", initial_tpidr2);
+
+	return true;
+}
+
+static int modify_tpidr2(struct tdescr *td, siginfo_t *si, ucontext_t *uc)
+{
+	uint64_t my_tpidr2 = get_tpidr2();
+
+	my_tpidr2++;
+	fprintf(stderr, "Setting TPIDR2 to %lx\n", my_tpidr2);
+	set_tpidr2(my_tpidr2);
+
+	return 0;
+}
+
+static void check_tpidr2(struct tdescr *td)
+{
+	uint64_t tpidr2 = get_tpidr2();
+
+	td->pass = tpidr2 == initial_tpidr2;
+
+	if (td->pass)
+		fprintf(stderr, "TPIDR2 restored\n");
+	else
+		fprintf(stderr, "TPIDR2 was %lx but is now %lx\n",
+			initial_tpidr2, tpidr2);
+}
+
+struct tdescr tde = {
+	.name = "TPIDR2 restore",
+	.descr = "Validate that TPIDR2 is restored from the sigframe",
+	.feats_required = FEAT_SME,
+	.timeout = 3,
+	.sig_trig = SIGUSR1,
+	.init = save_tpidr2,
+	.run = modify_tpidr2,
+	.check_result = check_tpidr2,
+};